feat: default-on JSON reports and exports; set export-dir default to exports
This commit is contained in:
parent
a3d277488f
commit
bbb7808d1f
155
main.go
155
main.go
|
@ -2,12 +2,15 @@ package main
|
|||
|
||||
import (
|
||||
"context"
|
||||
"encoding/csv"
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"flag"
|
||||
"fmt"
|
||||
"net/http"
|
||||
"net/url"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
"sync/atomic"
|
||||
"time"
|
||||
|
@ -28,6 +31,7 @@ func main() {
|
|||
var sameHostOnly bool
|
||||
var output string
|
||||
var quiet bool
|
||||
var exportDir string
|
||||
|
||||
flag.StringVar(&target, "target", "", "Target site URL (e.g., https://example.com)")
|
||||
flag.IntVar(&concurrency, "concurrency", 10, "Number of concurrent workers")
|
||||
|
@ -37,6 +41,7 @@ func main() {
|
|||
flag.BoolVar(&sameHostOnly, "same-host-only", true, "Limit crawl to the same host as target")
|
||||
flag.StringVar(&output, "output", "text", "Output format: text|json")
|
||||
flag.BoolVar(&quiet, "quiet", false, "Suppress progress output")
|
||||
flag.StringVar(&exportDir, "export-dir", "exports", "Directory to write CSV/NDJSON exports into (set empty to disable)")
|
||||
flag.Parse()
|
||||
|
||||
if strings.TrimSpace(target) == "" {
|
||||
|
@ -184,6 +189,17 @@ func main() {
|
|||
|
||||
reports := report.Build(target, visited, smURLs, crawlErrs, checkResults, outlinks, meta, params, pages, robots)
|
||||
|
||||
if exportDir != "" {
|
||||
if err := exportAll(exportDir, reports); err != nil {
|
||||
fmt.Fprintf(os.Stderr, "export error: %v\n", err)
|
||||
}
|
||||
}
|
||||
|
||||
// Save JSON report to ./reports/<host>.json by default (ignored by git)
|
||||
if err := saveReportJSON("reports", reports); err != nil {
|
||||
fmt.Fprintf(os.Stderr, "save report error: %v\n", err)
|
||||
}
|
||||
|
||||
switch output {
|
||||
case "json":
|
||||
enc := json.NewEncoder(os.Stdout)
|
||||
|
@ -204,3 +220,142 @@ func truncateForTTY(s string, max int) string {
|
|||
}
|
||||
return s[:max-1] + "…"
|
||||
}
|
||||
|
||||
func exportAll(baseDir string, r report.Report) error {
|
||||
u, err := url.Parse(r.Target)
|
||||
if err != nil || u.Host == "" {
|
||||
return fmt.Errorf("invalid target for export: %s", r.Target)
|
||||
}
|
||||
dir := filepath.Join(baseDir, u.Host)
|
||||
if err := os.MkdirAll(dir, 0o755); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := exportCSVPages(filepath.Join(dir, "pages.csv"), r); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := exportCSVLinks(filepath.Join(dir, "links.csv"), r); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := exportNDJSON(filepath.Join(dir, "pages.ndjson"), pagesToNDJSON(r)); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := exportNDJSON(filepath.Join(dir, "links.ndjson"), linksToNDJSON(r)); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := exportNDJSON(filepath.Join(dir, "link_statuses.ndjson"), linkStatusesToNDJSON(r)); err != nil {
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func exportCSVPages(path string, r report.Report) error {
|
||||
f, err := os.Create(path)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer f.Close()
|
||||
w := csv.NewWriter(f)
|
||||
defer w.Flush()
|
||||
_ = w.Write([]string{"url", "title", "responseTimeMs", "contentLength", "depth"})
|
||||
for u, pm := range r.Pages {
|
||||
rec := []string{u, pm.Title, fmt.Sprintf("%d", pm.ResponseTimeMs), fmt.Sprintf("%d", pm.ContentLength), fmt.Sprintf("%d", pm.Depth)}
|
||||
_ = w.Write(rec)
|
||||
}
|
||||
return w.Error()
|
||||
}
|
||||
|
||||
func exportCSVLinks(path string, r report.Report) error {
|
||||
f, err := os.Create(path)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer f.Close()
|
||||
w := csv.NewWriter(f)
|
||||
defer w.Flush()
|
||||
_ = w.Write([]string{"sourceUrl", "targetUrl"})
|
||||
for src, lst := range r.PageOutlinks {
|
||||
for _, dst := range lst {
|
||||
_ = w.Write([]string{src, dst})
|
||||
}
|
||||
}
|
||||
return w.Error()
|
||||
}
|
||||
|
||||
type ndjsonItem interface{}
|
||||
|
||||
func exportNDJSON(path string, items []ndjsonItem) error {
|
||||
f, err := os.Create(path)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer f.Close()
|
||||
enc := json.NewEncoder(f)
|
||||
for _, it := range items {
|
||||
if err := enc.Encode(it); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func pagesToNDJSON(r report.Report) []ndjsonItem {
|
||||
res := make([]ndjsonItem, 0, len(r.Pages))
|
||||
for u, pm := range r.Pages {
|
||||
res = append(res, map[string]any{
|
||||
"type": "page",
|
||||
"url": u,
|
||||
"title": pm.Title,
|
||||
"responseTimeMs": pm.ResponseTimeMs,
|
||||
"contentLength": pm.ContentLength,
|
||||
"depth": pm.Depth,
|
||||
})
|
||||
}
|
||||
return res
|
||||
}
|
||||
|
||||
func linksToNDJSON(r report.Report) []ndjsonItem {
|
||||
var res []ndjsonItem
|
||||
for src, lst := range r.PageOutlinks {
|
||||
for _, dst := range lst {
|
||||
res = append(res, map[string]any{
|
||||
"type": "link",
|
||||
"src": src,
|
||||
"dest": dst,
|
||||
})
|
||||
}
|
||||
}
|
||||
return res
|
||||
}
|
||||
|
||||
func linkStatusesToNDJSON(r report.Report) []ndjsonItem {
|
||||
res := make([]ndjsonItem, 0, len(r.LinkStatuses))
|
||||
for _, ls := range r.LinkStatuses {
|
||||
res = append(res, map[string]any{
|
||||
"type": "link_status",
|
||||
"url": ls.URL,
|
||||
"statusCode": ls.StatusCode,
|
||||
"ok": ls.OK,
|
||||
"error": ls.Err,
|
||||
})
|
||||
}
|
||||
return res
|
||||
}
|
||||
|
||||
func saveReportJSON(baseDir string, r report.Report) error {
|
||||
u, err := url.Parse(r.Target)
|
||||
if err != nil || u.Host == "" {
|
||||
return fmt.Errorf("invalid target for save: %s", r.Target)
|
||||
}
|
||||
if err := os.MkdirAll(baseDir, 0o755); err != nil {
|
||||
return err
|
||||
}
|
||||
path := filepath.Join(baseDir, u.Host+".json")
|
||||
f, err := os.Create(path)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer f.Close()
|
||||
enc := json.NewEncoder(f)
|
||||
enc.SetIndent("", " ")
|
||||
return enc.Encode(r)
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue