diff --git a/main.working b/main.working deleted file mode 100644 index 5254d38..0000000 --- a/main.working +++ /dev/null @@ -1,145 +0,0 @@ -package main - -import ( - "crypto/tls" - "encoding/csv" - "encoding/xml" - "flag" - "fmt" - "io/ioutil" - "net/http" - "net/url" - "os" - "path" - "strings" -) - -// Sitemap represents the structure of the XML sitemap -type Sitemap struct { - URLs []URL `xml:"url"` -} - -// URL represents a single URL entry in the sitemap -type URL struct { - Loc string `xml:"loc"` -} - -func main() { - // Define command line flags - csvOutput := flag.Bool("csv", false, "Output URLs as CSV to sitemap.csv") - sitemapURL := flag.String("url", "", "URL of the sitemap") - flag.Parse() - - // List of known paths for sitemap.xml - knownPaths := []string{ - "/sitemap.xml", - "/sitemap_index.xml", - "/sitemap/sitemap.xml", - "/sitemap/sitemap-index.xml", - } - - // If no URL is provided, check common paths - if *sitemapURL == "" { - fmt.Println("No URL provided, checking common paths for sitemap.xml") - baseURL := "https://example.com" // Replace with your base URL if needed - for _, p := range knownPaths { - u, err := url.Parse(baseURL) - if err != nil { - fmt.Printf("Invalid base URL: %v\n", err) - return - } - u.Path = path.Join(u.Path, p) - if checkURL(u.String()) { - *sitemapURL = u.String() - break - } - } - if *sitemapURL == "" { - fmt.Println("Sitemap not found in common paths") - return - } - } - - // Create an HTTP client with TLS certificate verification disabled - client := &http.Client{ - Transport: &http.Transport{ - TLSClientConfig: &tls.Config{InsecureSkipVerify: true}, - }, - } - - // Fetch the sitemap XML - resp, err := client.Get(*sitemapURL) - if err != nil { - fmt.Printf("Failed to fetch sitemap: %v\n", err) - return - } - defer resp.Body.Close() - - // Read the XML response - data, err := ioutil.ReadAll(resp.Body) - if err != nil { - fmt.Printf("Failed to read response body: %v\n", err) - return - } - - // Parse the XML data - var sitemap Sitemap - err = xml.Unmarshal(data, &sitemap) - if err != nil { - fmt.Printf("Failed to parse XML: %v\n", err) - return - } - - // Extract URLs - urls := make([]string, len(sitemap.URLs)) - for i, url := range sitemap.URLs { - urls[i] = url.Loc - } - - if *csvOutput { - // Output URLs to a CSV file - err := writeURLsToCSV("sitemap.csv", urls) - if err != nil { - fmt.Printf("Failed to write CSV: %v\n", err) - return - } - fmt.Println("URLs written to sitemap.csv") - } else { - // Print URLs as a raw comma-separated string - fmt.Println(strings.Join(urls, ",")) - } -} - -// checkURL checks if a given URL is accessible -func checkURL(sitemapURL string) bool { - client := &http.Client{ - Transport: &http.Transport{ - TLSClientConfig: &tls.Config{InsecureSkipVerify: true}, - }, - } - resp, err := client.Head(sitemapURL) - if err != nil { - return false - } - return resp.StatusCode == http.StatusOK -} - -// writeURLsToCSV writes a slice of URLs to a CSV file -func writeURLsToCSV(filename string, urls []string) error { - file, err := os.Create(filename) - if err != nil { - return err - } - defer file.Close() - - writer := csv.NewWriter(file) - defer writer.Flush() - - for _, url := range urls { - if err := writer.Write([]string{url}); err != nil { - return err - } - } - return nil -} -