package main import ( "crypto/tls" "encoding/csv" "encoding/xml" "flag" "fmt" "io/ioutil" "net/http" "net/url" "os" "path" "strings" ) // Sitemap represents the structure of the XML sitemap type Sitemap struct { URLs []URL `xml:"url"` } // URL represents a single URL entry in the sitemap type URL struct { Loc string `xml:"loc"` } func main() { // Define command line flags csvOutput := flag.Bool("csv", false, "Output URLs as CSV to sitemap.csv") sitemapURL := flag.String("url", "", "URL of the sitemap") flag.Parse() // List of known paths for sitemap.xml knownPaths := []string{ "/sitemap.xml", "/sitemap_index.xml", "/sitemap/sitemap.xml", "/sitemap/sitemap-index.xml", } // If no URL is provided, check common paths if *sitemapURL == "" { fmt.Println("No URL provided, checking common paths for sitemap.xml") baseURL := "https://example.com" // Replace with your base URL if needed for _, p := range knownPaths { u, err := url.Parse(baseURL) if err != nil { fmt.Printf("Invalid base URL: %v\n", err) return } u.Path = path.Join(u.Path, p) if checkURL(u.String()) { *sitemapURL = u.String() break } } if *sitemapURL == "" { fmt.Println("Sitemap not found in common paths") return } } // Create an HTTP client with TLS certificate verification disabled client := &http.Client{ Transport: &http.Transport{ TLSClientConfig: &tls.Config{InsecureSkipVerify: true}, }, } // Fetch the sitemap XML resp, err := client.Get(*sitemapURL) if err != nil { fmt.Printf("Failed to fetch sitemap: %v\n", err) return } defer resp.Body.Close() // Read the XML response data, err := ioutil.ReadAll(resp.Body) if err != nil { fmt.Printf("Failed to read response body: %v\n", err) return } // Parse the XML data var sitemap Sitemap err = xml.Unmarshal(data, &sitemap) if err != nil { fmt.Printf("Failed to parse XML: %v\n", err) return } // Extract URLs urls := make([]string, len(sitemap.URLs)) for i, url := range sitemap.URLs { urls[i] = url.Loc } if *csvOutput { // Output URLs to a CSV file err := writeURLsToCSV("sitemap.csv", urls) if err != nil { fmt.Printf("Failed to write CSV: %v\n", err) return } fmt.Println("URLs written to sitemap.csv") } else { // Print URLs as a raw comma-separated string fmt.Println(strings.Join(urls, ",")) } } // checkURL checks if a given URL is accessible func checkURL(sitemapURL string) bool { client := &http.Client{ Transport: &http.Transport{ TLSClientConfig: &tls.Config{InsecureSkipVerify: true}, }, } resp, err := client.Head(sitemapURL) if err != nil { return false } return resp.StatusCode == http.StatusOK } // writeURLsToCSV writes a slice of URLs to a CSV file func writeURLsToCSV(filename string, urls []string) error { file, err := os.Create(filename) if err != nil { return err } defer file.Close() writer := csv.NewWriter(file) defer writer.Flush() for _, url := range urls { if err := writer.Write([]string{url}); err != nil { return err } } return nil }