Delete main.working
This commit is contained in:
parent
05c98aa4ee
commit
0e5619799a
145
main.working
145
main.working
|
@ -1,145 +0,0 @@
|
|||
package main
|
||||
|
||||
import (
|
||||
"crypto/tls"
|
||||
"encoding/csv"
|
||||
"encoding/xml"
|
||||
"flag"
|
||||
"fmt"
|
||||
"io/ioutil"
|
||||
"net/http"
|
||||
"net/url"
|
||||
"os"
|
||||
"path"
|
||||
"strings"
|
||||
)
|
||||
|
||||
// Sitemap represents the structure of the XML sitemap
|
||||
type Sitemap struct {
|
||||
URLs []URL `xml:"url"`
|
||||
}
|
||||
|
||||
// URL represents a single URL entry in the sitemap
|
||||
type URL struct {
|
||||
Loc string `xml:"loc"`
|
||||
}
|
||||
|
||||
func main() {
|
||||
// Define command line flags
|
||||
csvOutput := flag.Bool("csv", false, "Output URLs as CSV to sitemap.csv")
|
||||
sitemapURL := flag.String("url", "", "URL of the sitemap")
|
||||
flag.Parse()
|
||||
|
||||
// List of known paths for sitemap.xml
|
||||
knownPaths := []string{
|
||||
"/sitemap.xml",
|
||||
"/sitemap_index.xml",
|
||||
"/sitemap/sitemap.xml",
|
||||
"/sitemap/sitemap-index.xml",
|
||||
}
|
||||
|
||||
// If no URL is provided, check common paths
|
||||
if *sitemapURL == "" {
|
||||
fmt.Println("No URL provided, checking common paths for sitemap.xml")
|
||||
baseURL := "https://example.com" // Replace with your base URL if needed
|
||||
for _, p := range knownPaths {
|
||||
u, err := url.Parse(baseURL)
|
||||
if err != nil {
|
||||
fmt.Printf("Invalid base URL: %v\n", err)
|
||||
return
|
||||
}
|
||||
u.Path = path.Join(u.Path, p)
|
||||
if checkURL(u.String()) {
|
||||
*sitemapURL = u.String()
|
||||
break
|
||||
}
|
||||
}
|
||||
if *sitemapURL == "" {
|
||||
fmt.Println("Sitemap not found in common paths")
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
// Create an HTTP client with TLS certificate verification disabled
|
||||
client := &http.Client{
|
||||
Transport: &http.Transport{
|
||||
TLSClientConfig: &tls.Config{InsecureSkipVerify: true},
|
||||
},
|
||||
}
|
||||
|
||||
// Fetch the sitemap XML
|
||||
resp, err := client.Get(*sitemapURL)
|
||||
if err != nil {
|
||||
fmt.Printf("Failed to fetch sitemap: %v\n", err)
|
||||
return
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
// Read the XML response
|
||||
data, err := ioutil.ReadAll(resp.Body)
|
||||
if err != nil {
|
||||
fmt.Printf("Failed to read response body: %v\n", err)
|
||||
return
|
||||
}
|
||||
|
||||
// Parse the XML data
|
||||
var sitemap Sitemap
|
||||
err = xml.Unmarshal(data, &sitemap)
|
||||
if err != nil {
|
||||
fmt.Printf("Failed to parse XML: %v\n", err)
|
||||
return
|
||||
}
|
||||
|
||||
// Extract URLs
|
||||
urls := make([]string, len(sitemap.URLs))
|
||||
for i, url := range sitemap.URLs {
|
||||
urls[i] = url.Loc
|
||||
}
|
||||
|
||||
if *csvOutput {
|
||||
// Output URLs to a CSV file
|
||||
err := writeURLsToCSV("sitemap.csv", urls)
|
||||
if err != nil {
|
||||
fmt.Printf("Failed to write CSV: %v\n", err)
|
||||
return
|
||||
}
|
||||
fmt.Println("URLs written to sitemap.csv")
|
||||
} else {
|
||||
// Print URLs as a raw comma-separated string
|
||||
fmt.Println(strings.Join(urls, ","))
|
||||
}
|
||||
}
|
||||
|
||||
// checkURL checks if a given URL is accessible
|
||||
func checkURL(sitemapURL string) bool {
|
||||
client := &http.Client{
|
||||
Transport: &http.Transport{
|
||||
TLSClientConfig: &tls.Config{InsecureSkipVerify: true},
|
||||
},
|
||||
}
|
||||
resp, err := client.Head(sitemapURL)
|
||||
if err != nil {
|
||||
return false
|
||||
}
|
||||
return resp.StatusCode == http.StatusOK
|
||||
}
|
||||
|
||||
// writeURLsToCSV writes a slice of URLs to a CSV file
|
||||
func writeURLsToCSV(filename string, urls []string) error {
|
||||
file, err := os.Create(filename)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer file.Close()
|
||||
|
||||
writer := csv.NewWriter(file)
|
||||
defer writer.Flush()
|
||||
|
||||
for _, url := range urls {
|
||||
if err := writer.Write([]string{url}); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
Loading…
Reference in New Issue