146 lines
3.6 KiB
Plaintext
146 lines
3.6 KiB
Plaintext
package main
|
|
|
|
import (
|
|
"crypto/tls"
|
|
"encoding/csv"
|
|
"encoding/xml"
|
|
"flag"
|
|
"fmt"
|
|
"io/ioutil"
|
|
"net/http"
|
|
"net/url"
|
|
"os"
|
|
"path"
|
|
"strings"
|
|
)
|
|
|
|
// Sitemap represents the structure of the XML sitemap
|
|
type Sitemap struct {
|
|
URLs []URL `xml:"url"`
|
|
}
|
|
|
|
// URL represents a single URL entry in the sitemap
|
|
type URL struct {
|
|
Loc string `xml:"loc"`
|
|
}
|
|
|
|
func main() {
|
|
// Define command line flags
|
|
csvOutput := flag.Bool("csv", false, "Output URLs as CSV to sitemap.csv")
|
|
sitemapURL := flag.String("url", "", "URL of the sitemap")
|
|
flag.Parse()
|
|
|
|
// List of known paths for sitemap.xml
|
|
knownPaths := []string{
|
|
"/sitemap.xml",
|
|
"/sitemap_index.xml",
|
|
"/sitemap/sitemap.xml",
|
|
"/sitemap/sitemap-index.xml",
|
|
}
|
|
|
|
// If no URL is provided, check common paths
|
|
if *sitemapURL == "" {
|
|
fmt.Println("No URL provided, checking common paths for sitemap.xml")
|
|
baseURL := "https://example.com" // Replace with your base URL if needed
|
|
for _, p := range knownPaths {
|
|
u, err := url.Parse(baseURL)
|
|
if err != nil {
|
|
fmt.Printf("Invalid base URL: %v\n", err)
|
|
return
|
|
}
|
|
u.Path = path.Join(u.Path, p)
|
|
if checkURL(u.String()) {
|
|
*sitemapURL = u.String()
|
|
break
|
|
}
|
|
}
|
|
if *sitemapURL == "" {
|
|
fmt.Println("Sitemap not found in common paths")
|
|
return
|
|
}
|
|
}
|
|
|
|
// Create an HTTP client with TLS certificate verification disabled
|
|
client := &http.Client{
|
|
Transport: &http.Transport{
|
|
TLSClientConfig: &tls.Config{InsecureSkipVerify: true},
|
|
},
|
|
}
|
|
|
|
// Fetch the sitemap XML
|
|
resp, err := client.Get(*sitemapURL)
|
|
if err != nil {
|
|
fmt.Printf("Failed to fetch sitemap: %v\n", err)
|
|
return
|
|
}
|
|
defer resp.Body.Close()
|
|
|
|
// Read the XML response
|
|
data, err := ioutil.ReadAll(resp.Body)
|
|
if err != nil {
|
|
fmt.Printf("Failed to read response body: %v\n", err)
|
|
return
|
|
}
|
|
|
|
// Parse the XML data
|
|
var sitemap Sitemap
|
|
err = xml.Unmarshal(data, &sitemap)
|
|
if err != nil {
|
|
fmt.Printf("Failed to parse XML: %v\n", err)
|
|
return
|
|
}
|
|
|
|
// Extract URLs
|
|
urls := make([]string, len(sitemap.URLs))
|
|
for i, url := range sitemap.URLs {
|
|
urls[i] = url.Loc
|
|
}
|
|
|
|
if *csvOutput {
|
|
// Output URLs to a CSV file
|
|
err := writeURLsToCSV("sitemap.csv", urls)
|
|
if err != nil {
|
|
fmt.Printf("Failed to write CSV: %v\n", err)
|
|
return
|
|
}
|
|
fmt.Println("URLs written to sitemap.csv")
|
|
} else {
|
|
// Print URLs as a raw comma-separated string
|
|
fmt.Println(strings.Join(urls, ","))
|
|
}
|
|
}
|
|
|
|
// checkURL checks if a given URL is accessible
|
|
func checkURL(sitemapURL string) bool {
|
|
client := &http.Client{
|
|
Transport: &http.Transport{
|
|
TLSClientConfig: &tls.Config{InsecureSkipVerify: true},
|
|
},
|
|
}
|
|
resp, err := client.Head(sitemapURL)
|
|
if err != nil {
|
|
return false
|
|
}
|
|
return resp.StatusCode == http.StatusOK
|
|
}
|
|
|
|
// writeURLsToCSV writes a slice of URLs to a CSV file
|
|
func writeURLsToCSV(filename string, urls []string) error {
|
|
file, err := os.Create(filename)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
defer file.Close()
|
|
|
|
writer := csv.NewWriter(file)
|
|
defer writer.Flush()
|
|
|
|
for _, url := range urls {
|
|
if err := writer.Write([]string{url}); err != nil {
|
|
return err
|
|
}
|
|
}
|
|
return nil
|
|
}
|
|
|