97 lines
2.8 KiB
Go
97 lines
2.8 KiB
Go
package report
|
|
|
|
import (
|
|
"fmt"
|
|
"io"
|
|
"sort"
|
|
|
|
"urlcrawler/internal/linkcheck"
|
|
)
|
|
|
|
type Report struct {
|
|
Target string `json:"target"`
|
|
CrawledURLs []string `json:"crawledUrls"`
|
|
SitemapURLs []string `json:"sitemapUrls,omitempty"`
|
|
CrawlErrors map[string]string `json:"crawlErrors,omitempty"`
|
|
LinkStatuses []linkcheck.LinkStatus `json:"linkStatuses"`
|
|
PageOutlinks map[string][]string `json:"pageOutlinks"`
|
|
LinkSources map[string][]string `json:"linkSources"`
|
|
MissingInSitemap []string `json:"missingInSitemap,omitempty"`
|
|
InSitemapNotCrawled []string `json:"inSitemapNotCrawled,omitempty"`
|
|
}
|
|
|
|
func Build(target string, crawled map[string]struct{}, sitemap map[string]struct{}, crawlErrs map[string]error, check linkcheck.Results, outlinks map[string]map[string]struct{}) Report {
|
|
crawledList := keys(crawled)
|
|
sitemapList := keys(sitemap)
|
|
crawlErrMap := make(map[string]string, len(crawlErrs))
|
|
for k, v := range crawlErrs {
|
|
crawlErrMap[k] = v.Error()
|
|
}
|
|
|
|
missing := difference(crawled, sitemap)
|
|
missingList := keys(missing)
|
|
inSmNotCrawled := difference(sitemap, crawled)
|
|
inSmNotCrawledList := keys(inSmNotCrawled)
|
|
|
|
pageOut := make(map[string][]string, len(outlinks))
|
|
linkSrc := make(map[string][]string)
|
|
for page, set := range outlinks {
|
|
lst := keys(set)
|
|
pageOut[page] = lst
|
|
for _, u := range lst {
|
|
linkSrc[u] = append(linkSrc[u], page)
|
|
}
|
|
}
|
|
|
|
return Report{
|
|
Target: target,
|
|
CrawledURLs: crawledList,
|
|
SitemapURLs: sitemapList,
|
|
CrawlErrors: crawlErrMap,
|
|
LinkStatuses: check.Statuses,
|
|
PageOutlinks: pageOut,
|
|
LinkSources: linkSrc,
|
|
MissingInSitemap: missingList,
|
|
InSitemapNotCrawled: inSmNotCrawledList,
|
|
}
|
|
}
|
|
|
|
func PrintText(w io.Writer, r Report) {
|
|
fmt.Fprintf(w, "Target: %s\n\n", r.Target)
|
|
fmt.Fprintf(w, "Crawled URLs: %d\n", len(r.CrawledURLs))
|
|
fmt.Fprintf(w, "Sitemap URLs: %d\n", len(r.SitemapURLs))
|
|
fmt.Fprintf(w, "Links checked: %d\n", len(r.LinkStatuses))
|
|
fmt.Fprintf(w, "Missing in sitemap: %d\n", len(r.MissingInSitemap))
|
|
fmt.Fprintf(w, "In sitemap not crawled: %d\n\n", len(r.InSitemapNotCrawled))
|
|
|
|
// Keep text output concise; details available in JSON
|
|
}
|
|
|
|
func keys[T comparable](m map[T]struct{}) []T {
|
|
res := make([]T, 0, len(m))
|
|
for k := range m {
|
|
res = append(res, k)
|
|
}
|
|
sort.Slice(res, func(i, j int) bool { return asString(res[i]) < asString(res[j]) })
|
|
return res
|
|
}
|
|
|
|
func asString[T any](v T) string {
|
|
switch x := any(v).(type) {
|
|
case string:
|
|
return x
|
|
default:
|
|
return fmt.Sprintf("%v", v)
|
|
}
|
|
}
|
|
|
|
func difference(a, b map[string]struct{}) map[string]struct{} {
|
|
res := make(map[string]struct{})
|
|
for k := range a {
|
|
if _, ok := b[k]; !ok {
|
|
res[k] = struct{}{}
|
|
}
|
|
}
|
|
return res
|
|
}
|