sitemapper/main.working

146 lines
3.6 KiB
Plaintext

package main
import (
"crypto/tls"
"encoding/csv"
"encoding/xml"
"flag"
"fmt"
"io/ioutil"
"net/http"
"net/url"
"os"
"path"
"strings"
)
// Sitemap represents the structure of the XML sitemap
type Sitemap struct {
URLs []URL `xml:"url"`
}
// URL represents a single URL entry in the sitemap
type URL struct {
Loc string `xml:"loc"`
}
func main() {
// Define command line flags
csvOutput := flag.Bool("csv", false, "Output URLs as CSV to sitemap.csv")
sitemapURL := flag.String("url", "", "URL of the sitemap")
flag.Parse()
// List of known paths for sitemap.xml
knownPaths := []string{
"/sitemap.xml",
"/sitemap_index.xml",
"/sitemap/sitemap.xml",
"/sitemap/sitemap-index.xml",
}
// If no URL is provided, check common paths
if *sitemapURL == "" {
fmt.Println("No URL provided, checking common paths for sitemap.xml")
baseURL := "https://example.com" // Replace with your base URL if needed
for _, p := range knownPaths {
u, err := url.Parse(baseURL)
if err != nil {
fmt.Printf("Invalid base URL: %v\n", err)
return
}
u.Path = path.Join(u.Path, p)
if checkURL(u.String()) {
*sitemapURL = u.String()
break
}
}
if *sitemapURL == "" {
fmt.Println("Sitemap not found in common paths")
return
}
}
// Create an HTTP client with TLS certificate verification disabled
client := &http.Client{
Transport: &http.Transport{
TLSClientConfig: &tls.Config{InsecureSkipVerify: true},
},
}
// Fetch the sitemap XML
resp, err := client.Get(*sitemapURL)
if err != nil {
fmt.Printf("Failed to fetch sitemap: %v\n", err)
return
}
defer resp.Body.Close()
// Read the XML response
data, err := ioutil.ReadAll(resp.Body)
if err != nil {
fmt.Printf("Failed to read response body: %v\n", err)
return
}
// Parse the XML data
var sitemap Sitemap
err = xml.Unmarshal(data, &sitemap)
if err != nil {
fmt.Printf("Failed to parse XML: %v\n", err)
return
}
// Extract URLs
urls := make([]string, len(sitemap.URLs))
for i, url := range sitemap.URLs {
urls[i] = url.Loc
}
if *csvOutput {
// Output URLs to a CSV file
err := writeURLsToCSV("sitemap.csv", urls)
if err != nil {
fmt.Printf("Failed to write CSV: %v\n", err)
return
}
fmt.Println("URLs written to sitemap.csv")
} else {
// Print URLs as a raw comma-separated string
fmt.Println(strings.Join(urls, ","))
}
}
// checkURL checks if a given URL is accessible
func checkURL(sitemapURL string) bool {
client := &http.Client{
Transport: &http.Transport{
TLSClientConfig: &tls.Config{InsecureSkipVerify: true},
},
}
resp, err := client.Head(sitemapURL)
if err != nil {
return false
}
return resp.StatusCode == http.StatusOK
}
// writeURLsToCSV writes a slice of URLs to a CSV file
func writeURLsToCSV(filename string, urls []string) error {
file, err := os.Create(filename)
if err != nil {
return err
}
defer file.Close()
writer := csv.NewWriter(file)
defer writer.Flush()
for _, url := range urls {
if err := writer.Write([]string{url}); err != nil {
return err
}
}
return nil
}