feat(report): add metadata/params/stats + URL normalization; docs: schema; chore: update TODO; test: fresh reports
This commit is contained in:
parent
e7b4d33971
commit
3eb9ab48bf
8
TODO.md
8
TODO.md
|
@ -3,10 +3,10 @@
|
||||||
Prioritized from easiest/low-risk to more involved work. Check off as we ship.
|
Prioritized from easiest/low-risk to more involved work. Check off as we ship.
|
||||||
|
|
||||||
### Quick wins (target v0.0.2)
|
### Quick wins (target v0.0.2)
|
||||||
- [ ] Add crawl metadata (startedAt, finishedAt, durationMs)
|
- [x] Add crawl metadata (startedAt, finishedAt, durationMs)
|
||||||
- [ ] Include run parameters in report (maxDepth, concurrency, timeout, userAgent, sameHostOnly)
|
- [x] Include run parameters in report (maxDepth, concurrency, timeout, userAgent, sameHostOnly)
|
||||||
- [ ] Status histogram (2xx/3xx/4xx/5xx totals) in summary
|
- [x] Status histogram (2xx/3xx/4xx/5xx totals) in summary
|
||||||
- [ ] Normalize and dedupe trailing `/.` URL variants in output
|
- [x] Normalize and dedupe trailing `/.` URL variants in output
|
||||||
- [ ] Add compact `reportSummary` text block to JSON
|
- [ ] Add compact `reportSummary` text block to JSON
|
||||||
- [ ] Top external domains with counts
|
- [ ] Top external domains with counts
|
||||||
- [ ] Broken links sample (first N) + per-domain broken counts
|
- [ ] Broken links sample (first N) + per-domain broken counts
|
||||||
|
|
|
@ -18,31 +18,80 @@ type Report struct {
|
||||||
LinkSources map[string][]string `json:"linkSources"`
|
LinkSources map[string][]string `json:"linkSources"`
|
||||||
MissingInSitemap []string `json:"missingInSitemap,omitempty"`
|
MissingInSitemap []string `json:"missingInSitemap,omitempty"`
|
||||||
InSitemapNotCrawled []string `json:"inSitemapNotCrawled,omitempty"`
|
InSitemapNotCrawled []string `json:"inSitemapNotCrawled,omitempty"`
|
||||||
|
Metadata Metadata `json:"metadata"`
|
||||||
|
Params Params `json:"params"`
|
||||||
|
Stats Stats `json:"stats"`
|
||||||
}
|
}
|
||||||
|
|
||||||
func Build(target string, crawled map[string]struct{}, sitemap map[string]struct{}, crawlErrs map[string]error, check linkcheck.Results, outlinks map[string]map[string]struct{}) Report {
|
type Metadata struct {
|
||||||
crawledList := keys(crawled)
|
StartedAt string `json:"startedAt"` // RFC3339
|
||||||
sitemapList := keys(sitemap)
|
FinishedAt string `json:"finishedAt"` // RFC3339
|
||||||
|
DurationMs int64 `json:"durationMs"`
|
||||||
|
}
|
||||||
|
|
||||||
|
type Params struct {
|
||||||
|
MaxDepth int `json:"maxDepth"`
|
||||||
|
Concurrency int `json:"concurrency"`
|
||||||
|
TimeoutMs int64 `json:"timeoutMs"`
|
||||||
|
UserAgent string `json:"userAgent"`
|
||||||
|
SameHostOnly bool `json:"sameHostOnly"`
|
||||||
|
}
|
||||||
|
|
||||||
|
type Stats struct {
|
||||||
|
OK int `json:"ok"`
|
||||||
|
Broken int `json:"broken"`
|
||||||
|
Status2xx int `json:"status2xx"`
|
||||||
|
Status3xx int `json:"status3xx"`
|
||||||
|
Status4xx int `json:"status4xx"`
|
||||||
|
Status5xx int `json:"status5xx"`
|
||||||
|
StatusOther int `json:"statusOther"`
|
||||||
|
}
|
||||||
|
|
||||||
|
func Build(target string, crawled map[string]struct{}, sitemap map[string]struct{}, crawlErrs map[string]error, check linkcheck.Results, outlinks map[string]map[string]struct{}, meta Metadata, params Params) Report {
|
||||||
|
crawledList := sanitizeURLs(keys(crawled))
|
||||||
|
sitemapList := sanitizeURLs(keys(sitemap))
|
||||||
crawlErrMap := make(map[string]string, len(crawlErrs))
|
crawlErrMap := make(map[string]string, len(crawlErrs))
|
||||||
for k, v := range crawlErrs {
|
for k, v := range crawlErrs {
|
||||||
crawlErrMap[k] = v.Error()
|
crawlErrMap[k] = v.Error()
|
||||||
}
|
}
|
||||||
|
|
||||||
missing := difference(crawled, sitemap)
|
missing := difference(crawled, sitemap)
|
||||||
missingList := keys(missing)
|
missingList := sanitizeURLs(keys(missing))
|
||||||
inSmNotCrawled := difference(sitemap, crawled)
|
inSmNotCrawled := difference(sitemap, crawled)
|
||||||
inSmNotCrawledList := keys(inSmNotCrawled)
|
inSmNotCrawledList := sanitizeURLs(keys(inSmNotCrawled))
|
||||||
|
|
||||||
pageOut := make(map[string][]string, len(outlinks))
|
pageOut := make(map[string][]string, len(outlinks))
|
||||||
linkSrc := make(map[string][]string)
|
linkSrc := make(map[string][]string)
|
||||||
for page, set := range outlinks {
|
for page, set := range outlinks {
|
||||||
lst := keys(set)
|
lst := sanitizeURLs(keys(set))
|
||||||
pageOut[page] = lst
|
pageOut[page] = lst
|
||||||
for _, u := range lst {
|
for _, u := range lst {
|
||||||
linkSrc[u] = append(linkSrc[u], page)
|
linkSrc[u] = append(linkSrc[u], page)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Compute simple status histogram
|
||||||
|
var st Stats
|
||||||
|
for _, ls := range check.Statuses {
|
||||||
|
if ls.OK {
|
||||||
|
st.OK++
|
||||||
|
} else {
|
||||||
|
st.Broken++
|
||||||
|
}
|
||||||
|
switch {
|
||||||
|
case ls.StatusCode >= 200 && ls.StatusCode < 300:
|
||||||
|
st.Status2xx++
|
||||||
|
case ls.StatusCode >= 300 && ls.StatusCode < 400:
|
||||||
|
st.Status3xx++
|
||||||
|
case ls.StatusCode >= 400 && ls.StatusCode < 500:
|
||||||
|
st.Status4xx++
|
||||||
|
case ls.StatusCode >= 500 && ls.StatusCode < 600:
|
||||||
|
st.Status5xx++
|
||||||
|
default:
|
||||||
|
st.StatusOther++
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
return Report{
|
return Report{
|
||||||
Target: target,
|
Target: target,
|
||||||
CrawledURLs: crawledList,
|
CrawledURLs: crawledList,
|
||||||
|
@ -53,6 +102,9 @@ func Build(target string, crawled map[string]struct{}, sitemap map[string]struct
|
||||||
LinkSources: linkSrc,
|
LinkSources: linkSrc,
|
||||||
MissingInSitemap: missingList,
|
MissingInSitemap: missingList,
|
||||||
InSitemapNotCrawled: inSmNotCrawledList,
|
InSitemapNotCrawled: inSmNotCrawledList,
|
||||||
|
Metadata: meta,
|
||||||
|
Params: params,
|
||||||
|
Stats: st,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -94,3 +146,20 @@ func difference(a, b map[string]struct{}) map[string]struct{} {
|
||||||
}
|
}
|
||||||
return res
|
return res
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// sanitizeURLs normalizes small variants like trailing "/." to "/" for consistency.
|
||||||
|
func sanitizeURLs(urls []string) []string {
|
||||||
|
out := make([]string, 0, len(urls))
|
||||||
|
for _, u := range urls {
|
||||||
|
out = append(out, sanitizeURL(u))
|
||||||
|
}
|
||||||
|
sort.Strings(out)
|
||||||
|
return out
|
||||||
|
}
|
||||||
|
|
||||||
|
func sanitizeURL(u string) string {
|
||||||
|
if len(u) >= 2 && u[len(u)-2:] == "/." {
|
||||||
|
return u[:len(u)-1]
|
||||||
|
}
|
||||||
|
return u
|
||||||
|
}
|
||||||
|
|
17
main.go
17
main.go
|
@ -47,6 +47,17 @@ func main() {
|
||||||
client := &http.Client{Timeout: timeout}
|
client := &http.Client{Timeout: timeout}
|
||||||
ctx := context.Background()
|
ctx := context.Background()
|
||||||
|
|
||||||
|
// Report metadata
|
||||||
|
started := time.Now()
|
||||||
|
meta := report.Metadata{StartedAt: started.UTC().Format(time.RFC3339)}
|
||||||
|
params := report.Params{
|
||||||
|
MaxDepth: maxDepth,
|
||||||
|
Concurrency: concurrency,
|
||||||
|
TimeoutMs: timeout.Milliseconds(),
|
||||||
|
UserAgent: userAgent,
|
||||||
|
SameHostOnly: sameHostOnly,
|
||||||
|
}
|
||||||
|
|
||||||
fmt.Fprintf(os.Stderr, "Starting crawl of %s (depth: %d)...\n", target, maxDepth)
|
fmt.Fprintf(os.Stderr, "Starting crawl of %s (depth: %d)...\n", target, maxDepth)
|
||||||
|
|
||||||
// Setup progress counters
|
// Setup progress counters
|
||||||
|
@ -137,8 +148,12 @@ func main() {
|
||||||
urlsVisited.Load(), urlsErrored.Load())
|
urlsVisited.Load(), urlsErrored.Load())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
finished := time.Now()
|
||||||
|
meta.FinishedAt = finished.UTC().Format(time.RFC3339)
|
||||||
|
meta.DurationMs = finished.Sub(started).Milliseconds()
|
||||||
|
|
||||||
fmt.Fprintf(os.Stderr, "Building report...\n")
|
fmt.Fprintf(os.Stderr, "Building report...\n")
|
||||||
reports := report.Build(target, visited, smURLs, crawlErrs, checkResults, outlinks)
|
reports := report.Build(target, visited, smURLs, crawlErrs, checkResults, outlinks, meta, params)
|
||||||
|
|
||||||
switch output {
|
switch output {
|
||||||
case "json":
|
case "json":
|
||||||
|
|
|
@ -21,7 +21,28 @@ This document describes the structure of the JSON reports produced by `urlcrawle
|
||||||
"https://example.com/about": ["https://example.com"]
|
"https://example.com/about": ["https://example.com"]
|
||||||
},
|
},
|
||||||
"missingInSitemap": ["https://example.com/page-not-in-sitemap"],
|
"missingInSitemap": ["https://example.com/page-not-in-sitemap"],
|
||||||
"inSitemapNotCrawled": ["https://example.com/deferred"]
|
"inSitemapNotCrawled": ["https://example.com/deferred"],
|
||||||
|
"metadata": {
|
||||||
|
"startedAt": "2025-08-31T12:34:56Z",
|
||||||
|
"finishedAt": "2025-08-31T12:35:57Z",
|
||||||
|
"durationMs": 61000
|
||||||
|
},
|
||||||
|
"params": {
|
||||||
|
"maxDepth": 1,
|
||||||
|
"concurrency": 5,
|
||||||
|
"timeoutMs": 5000,
|
||||||
|
"userAgent": "urlcrawler/1.0",
|
||||||
|
"sameHostOnly": true
|
||||||
|
},
|
||||||
|
"stats": {
|
||||||
|
"ok": 12,
|
||||||
|
"broken": 1,
|
||||||
|
"status2xx": 12,
|
||||||
|
"status3xx": 0,
|
||||||
|
"status4xx": 1,
|
||||||
|
"status5xx": 0,
|
||||||
|
"statusOther": 0
|
||||||
|
}
|
||||||
}
|
}
|
||||||
```
|
```
|
||||||
|
|
||||||
|
@ -49,9 +70,30 @@ This document describes the structure of the JSON reports produced by `urlcrawle
|
||||||
|
|
||||||
- **inSitemapNotCrawled** (string[]; optional): URLs present in the sitemap that were not crawled (e.g., due to depth limits or off-host rules).
|
- **inSitemapNotCrawled** (string[]; optional): URLs present in the sitemap that were not crawled (e.g., due to depth limits or off-host rules).
|
||||||
|
|
||||||
|
- **metadata** (object): Crawl timing information.
|
||||||
|
- **startedAt** (string, RFC3339)
|
||||||
|
- **finishedAt** (string, RFC3339)
|
||||||
|
- **durationMs** (number)
|
||||||
|
|
||||||
|
- **params** (object): Parameters used for the run.
|
||||||
|
- **maxDepth** (number)
|
||||||
|
- **concurrency** (number)
|
||||||
|
- **timeoutMs** (number)
|
||||||
|
- **userAgent** (string)
|
||||||
|
- **sameHostOnly** (boolean)
|
||||||
|
|
||||||
|
- **stats** (object): Summary of link status results.
|
||||||
|
- **ok** (number)
|
||||||
|
- **broken** (number)
|
||||||
|
- **status2xx** (number)
|
||||||
|
- **status3xx** (number)
|
||||||
|
- **status4xx** (number)
|
||||||
|
- **status5xx** (number)
|
||||||
|
- **statusOther** (number)
|
||||||
|
|
||||||
### Notes
|
### Notes
|
||||||
|
|
||||||
- URLs are normalized and deduplicated during crawl.
|
- URLs are normalized and deduplicated during crawl. Minor variants like trailing `/.` are normalized in output.
|
||||||
- Content-type filtering: only `text/html` pages are parsed for outlinks.
|
- Content-type filtering: only `text/html` pages are parsed for outlinks.
|
||||||
- Sitemap fetching is best-effort; absence is not treated as an error.
|
- Sitemap fetching is best-effort; absence is not treated as an error.
|
||||||
- The JSON lists are sorted to produce stable outputs across runs.
|
- The JSON lists are sorted to produce stable outputs across runs.
|
||||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,330 @@
|
||||||
|
{
|
||||||
|
"target": "https://lilsgym.ca",
|
||||||
|
"crawledUrls": [
|
||||||
|
"https://lilsgym.ca",
|
||||||
|
"https://lilsgym.ca/contact.html",
|
||||||
|
"https://lilsgym.ca/contact.html?type=group-training",
|
||||||
|
"https://lilsgym.ca/contact.html?type=membership",
|
||||||
|
"https://lilsgym.ca/facility.html",
|
||||||
|
"https://lilsgym.ca/grouptraining.html",
|
||||||
|
"https://lilsgym.ca/personaltraining.html"
|
||||||
|
],
|
||||||
|
"sitemapUrls": [
|
||||||
|
"https://lilsgym.ca/",
|
||||||
|
"https://lilsgym.ca/contact.html",
|
||||||
|
"https://lilsgym.ca/facility.html",
|
||||||
|
"https://lilsgym.ca/grouptraining.html",
|
||||||
|
"https://lilsgym.ca/personaltraining.html",
|
||||||
|
"https://lilsgym.ca/policies.html",
|
||||||
|
"https://lilsgym.ca/privacy.html"
|
||||||
|
],
|
||||||
|
"linkStatuses": [
|
||||||
|
{
|
||||||
|
"url": "https://lilsgym.ca/assets/img/webp/IMG_0313_00-1024x768.webp",
|
||||||
|
"statusCode": 200,
|
||||||
|
"ok": true
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"url": "https://lilsgym.ca/facility.html",
|
||||||
|
"statusCode": 200,
|
||||||
|
"ok": true
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"url": "https://lilsgym.ca/assets/img/webp/IMG_0333_00-1024x768.webp",
|
||||||
|
"statusCode": 200,
|
||||||
|
"ok": true
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"url": "https://lilsgym.ca/assets/img/facility-new/IMG_6741.webp",
|
||||||
|
"statusCode": 200,
|
||||||
|
"ok": true
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"url": "https://lilsgym.ca/personaltraining.html",
|
||||||
|
"statusCode": 200,
|
||||||
|
"ok": true
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"url": "https://lilsgym.ca/contact.html",
|
||||||
|
"statusCode": 200,
|
||||||
|
"ok": true
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"url": "https://lilsgym.ca/assets/img/webp/IMG_0335_00-1024x768.webp",
|
||||||
|
"statusCode": 200,
|
||||||
|
"ok": true
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"url": "https://lilsgym.ca/assets/img/webp/IMG_0314_00-1024x768.webp",
|
||||||
|
"statusCode": 200,
|
||||||
|
"ok": true
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"url": "https://lilsgym.ca/assets/img/facility-new/IMG_6731.webp",
|
||||||
|
"statusCode": 200,
|
||||||
|
"ok": true
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"url": "https://lilsgym.ca/assets/img/webp/IMG_9115.webp",
|
||||||
|
"statusCode": 200,
|
||||||
|
"ok": true
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"url": "https://lilsgym.ca/contact.html?type=membership",
|
||||||
|
"statusCode": 200,
|
||||||
|
"ok": true
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"url": "https://lilsgym.ca/assets/img/webp/IMG_0332_00-1024x768.webp",
|
||||||
|
"statusCode": 200,
|
||||||
|
"ok": true
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"url": "https://lilsgym.ca/assets/img/facility-new/IMG_6739.webp",
|
||||||
|
"statusCode": 200,
|
||||||
|
"ok": true
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"url": "https://lilsgym.ca/grouptraining.html",
|
||||||
|
"statusCode": 200,
|
||||||
|
"ok": true
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"url": "https://lilsgym.ca/assets/img/webp/IMG_0339_00-1-1024x768.webp",
|
||||||
|
"statusCode": 200,
|
||||||
|
"ok": true
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"url": "https://lilsgym.ca/contact.html?type=group-training",
|
||||||
|
"statusCode": 200,
|
||||||
|
"ok": true
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"url": "https://titan-training.ca/",
|
||||||
|
"statusCode": 200,
|
||||||
|
"ok": true
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"url": "https://lilsgym.ca",
|
||||||
|
"statusCode": 200,
|
||||||
|
"ok": true
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"url": "https://lilsgym.ca/assets/img/webp/IMG_0331_00-1024x768.webp",
|
||||||
|
"statusCode": 200,
|
||||||
|
"ok": true
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"url": "https://maps.google.com/?q=32+Weber+St+W,+Kitchener,+ON+N2H+3Z2",
|
||||||
|
"statusCode": 200,
|
||||||
|
"ok": true
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"url": "https://lilsgym.ca/assets/img/webp/IMG_0306_00-1024x768.webp",
|
||||||
|
"statusCode": 200,
|
||||||
|
"ok": true
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"url": "https://lilsgym.ca/assets/img/facility-new/rotated/IMG_6738_rotated.webp",
|
||||||
|
"statusCode": 200,
|
||||||
|
"ok": true
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"url": "https://lilsgym.ca/assets/img/webp/IMG_0323_00-1024x768.webp",
|
||||||
|
"statusCode": 200,
|
||||||
|
"ok": true
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"url": "https://www.blackswanstrength.com/",
|
||||||
|
"statusCode": 200,
|
||||||
|
"ok": true
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"url": "https://www.mutantfrogfitness.ca/",
|
||||||
|
"statusCode": 200,
|
||||||
|
"ok": true
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"url": "https://booking.setmore.com/scheduleappointment/0b712222-be8d-4521-afc7-afbed7d7a4b4?lang=",
|
||||||
|
"statusCode": 200,
|
||||||
|
"ok": true
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"pageOutlinks": {
|
||||||
|
"https://lilsgym.ca": [
|
||||||
|
"https://lilsgym.ca/contact.html",
|
||||||
|
"https://lilsgym.ca/grouptraining.html",
|
||||||
|
"https://lilsgym.ca/personaltraining.html"
|
||||||
|
],
|
||||||
|
"https://lilsgym.ca/contact.html": [
|
||||||
|
"https://booking.setmore.com/scheduleappointment/0b712222-be8d-4521-afc7-afbed7d7a4b4?lang=",
|
||||||
|
"https://lilsgym.ca/contact.html",
|
||||||
|
"https://maps.google.com/?q=32+Weber+St+W,+Kitchener,+ON+N2H+3Z2"
|
||||||
|
],
|
||||||
|
"https://lilsgym.ca/contact.html?type=group-training": [
|
||||||
|
"https://booking.setmore.com/scheduleappointment/0b712222-be8d-4521-afc7-afbed7d7a4b4?lang=",
|
||||||
|
"https://lilsgym.ca/contact.html?type=group-training",
|
||||||
|
"https://maps.google.com/?q=32+Weber+St+W,+Kitchener,+ON+N2H+3Z2"
|
||||||
|
],
|
||||||
|
"https://lilsgym.ca/contact.html?type=membership": [
|
||||||
|
"https://booking.setmore.com/scheduleappointment/0b712222-be8d-4521-afc7-afbed7d7a4b4?lang=",
|
||||||
|
"https://lilsgym.ca/contact.html?type=membership",
|
||||||
|
"https://maps.google.com/?q=32+Weber+St+W,+Kitchener,+ON+N2H+3Z2"
|
||||||
|
],
|
||||||
|
"https://lilsgym.ca/facility.html": [
|
||||||
|
"https://lilsgym.ca/assets/img/facility-new/IMG_6731.webp",
|
||||||
|
"https://lilsgym.ca/assets/img/facility-new/IMG_6739.webp",
|
||||||
|
"https://lilsgym.ca/assets/img/facility-new/IMG_6741.webp",
|
||||||
|
"https://lilsgym.ca/assets/img/facility-new/rotated/IMG_6738_rotated.webp",
|
||||||
|
"https://lilsgym.ca/assets/img/webp/IMG_0306_00-1024x768.webp",
|
||||||
|
"https://lilsgym.ca/assets/img/webp/IMG_0313_00-1024x768.webp",
|
||||||
|
"https://lilsgym.ca/assets/img/webp/IMG_0314_00-1024x768.webp",
|
||||||
|
"https://lilsgym.ca/assets/img/webp/IMG_0323_00-1024x768.webp",
|
||||||
|
"https://lilsgym.ca/assets/img/webp/IMG_0331_00-1024x768.webp",
|
||||||
|
"https://lilsgym.ca/assets/img/webp/IMG_0332_00-1024x768.webp",
|
||||||
|
"https://lilsgym.ca/assets/img/webp/IMG_0333_00-1024x768.webp",
|
||||||
|
"https://lilsgym.ca/assets/img/webp/IMG_0335_00-1024x768.webp",
|
||||||
|
"https://lilsgym.ca/assets/img/webp/IMG_0339_00-1-1024x768.webp",
|
||||||
|
"https://lilsgym.ca/assets/img/webp/IMG_9115.webp",
|
||||||
|
"https://lilsgym.ca/contact.html",
|
||||||
|
"https://lilsgym.ca/facility.html",
|
||||||
|
"https://lilsgym.ca/grouptraining.html"
|
||||||
|
],
|
||||||
|
"https://lilsgym.ca/grouptraining.html": [
|
||||||
|
"https://lilsgym.ca/contact.html",
|
||||||
|
"https://lilsgym.ca/contact.html?type=group-training",
|
||||||
|
"https://lilsgym.ca/contact.html?type=membership",
|
||||||
|
"https://lilsgym.ca/grouptraining.html"
|
||||||
|
],
|
||||||
|
"https://lilsgym.ca/personaltraining.html": [
|
||||||
|
"https://lilsgym.ca/contact.html",
|
||||||
|
"https://lilsgym.ca/facility.html",
|
||||||
|
"https://lilsgym.ca/personaltraining.html",
|
||||||
|
"https://titan-training.ca/",
|
||||||
|
"https://www.blackswanstrength.com/",
|
||||||
|
"https://www.mutantfrogfitness.ca/"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"linkSources": {
|
||||||
|
"https://booking.setmore.com/scheduleappointment/0b712222-be8d-4521-afc7-afbed7d7a4b4?lang=": [
|
||||||
|
"https://lilsgym.ca/contact.html",
|
||||||
|
"https://lilsgym.ca/contact.html?type=membership",
|
||||||
|
"https://lilsgym.ca/contact.html?type=group-training"
|
||||||
|
],
|
||||||
|
"https://lilsgym.ca/assets/img/facility-new/IMG_6731.webp": [
|
||||||
|
"https://lilsgym.ca/facility.html"
|
||||||
|
],
|
||||||
|
"https://lilsgym.ca/assets/img/facility-new/IMG_6739.webp": [
|
||||||
|
"https://lilsgym.ca/facility.html"
|
||||||
|
],
|
||||||
|
"https://lilsgym.ca/assets/img/facility-new/IMG_6741.webp": [
|
||||||
|
"https://lilsgym.ca/facility.html"
|
||||||
|
],
|
||||||
|
"https://lilsgym.ca/assets/img/facility-new/rotated/IMG_6738_rotated.webp": [
|
||||||
|
"https://lilsgym.ca/facility.html"
|
||||||
|
],
|
||||||
|
"https://lilsgym.ca/assets/img/webp/IMG_0306_00-1024x768.webp": [
|
||||||
|
"https://lilsgym.ca/facility.html"
|
||||||
|
],
|
||||||
|
"https://lilsgym.ca/assets/img/webp/IMG_0313_00-1024x768.webp": [
|
||||||
|
"https://lilsgym.ca/facility.html"
|
||||||
|
],
|
||||||
|
"https://lilsgym.ca/assets/img/webp/IMG_0314_00-1024x768.webp": [
|
||||||
|
"https://lilsgym.ca/facility.html"
|
||||||
|
],
|
||||||
|
"https://lilsgym.ca/assets/img/webp/IMG_0323_00-1024x768.webp": [
|
||||||
|
"https://lilsgym.ca/facility.html"
|
||||||
|
],
|
||||||
|
"https://lilsgym.ca/assets/img/webp/IMG_0331_00-1024x768.webp": [
|
||||||
|
"https://lilsgym.ca/facility.html"
|
||||||
|
],
|
||||||
|
"https://lilsgym.ca/assets/img/webp/IMG_0332_00-1024x768.webp": [
|
||||||
|
"https://lilsgym.ca/facility.html"
|
||||||
|
],
|
||||||
|
"https://lilsgym.ca/assets/img/webp/IMG_0333_00-1024x768.webp": [
|
||||||
|
"https://lilsgym.ca/facility.html"
|
||||||
|
],
|
||||||
|
"https://lilsgym.ca/assets/img/webp/IMG_0335_00-1024x768.webp": [
|
||||||
|
"https://lilsgym.ca/facility.html"
|
||||||
|
],
|
||||||
|
"https://lilsgym.ca/assets/img/webp/IMG_0339_00-1-1024x768.webp": [
|
||||||
|
"https://lilsgym.ca/facility.html"
|
||||||
|
],
|
||||||
|
"https://lilsgym.ca/assets/img/webp/IMG_9115.webp": [
|
||||||
|
"https://lilsgym.ca/facility.html"
|
||||||
|
],
|
||||||
|
"https://lilsgym.ca/contact.html": [
|
||||||
|
"https://lilsgym.ca/grouptraining.html",
|
||||||
|
"https://lilsgym.ca/contact.html",
|
||||||
|
"https://lilsgym.ca/personaltraining.html",
|
||||||
|
"https://lilsgym.ca/facility.html",
|
||||||
|
"https://lilsgym.ca"
|
||||||
|
],
|
||||||
|
"https://lilsgym.ca/contact.html?type=group-training": [
|
||||||
|
"https://lilsgym.ca/grouptraining.html",
|
||||||
|
"https://lilsgym.ca/contact.html?type=group-training"
|
||||||
|
],
|
||||||
|
"https://lilsgym.ca/contact.html?type=membership": [
|
||||||
|
"https://lilsgym.ca/grouptraining.html",
|
||||||
|
"https://lilsgym.ca/contact.html?type=membership"
|
||||||
|
],
|
||||||
|
"https://lilsgym.ca/facility.html": [
|
||||||
|
"https://lilsgym.ca/personaltraining.html",
|
||||||
|
"https://lilsgym.ca/facility.html"
|
||||||
|
],
|
||||||
|
"https://lilsgym.ca/grouptraining.html": [
|
||||||
|
"https://lilsgym.ca/grouptraining.html",
|
||||||
|
"https://lilsgym.ca/facility.html",
|
||||||
|
"https://lilsgym.ca"
|
||||||
|
],
|
||||||
|
"https://lilsgym.ca/personaltraining.html": [
|
||||||
|
"https://lilsgym.ca/personaltraining.html",
|
||||||
|
"https://lilsgym.ca"
|
||||||
|
],
|
||||||
|
"https://maps.google.com/?q=32+Weber+St+W,+Kitchener,+ON+N2H+3Z2": [
|
||||||
|
"https://lilsgym.ca/contact.html",
|
||||||
|
"https://lilsgym.ca/contact.html?type=membership",
|
||||||
|
"https://lilsgym.ca/contact.html?type=group-training"
|
||||||
|
],
|
||||||
|
"https://titan-training.ca/": [
|
||||||
|
"https://lilsgym.ca/personaltraining.html"
|
||||||
|
],
|
||||||
|
"https://www.blackswanstrength.com/": [
|
||||||
|
"https://lilsgym.ca/personaltraining.html"
|
||||||
|
],
|
||||||
|
"https://www.mutantfrogfitness.ca/": [
|
||||||
|
"https://lilsgym.ca/personaltraining.html"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"missingInSitemap": [
|
||||||
|
"https://lilsgym.ca",
|
||||||
|
"https://lilsgym.ca/contact.html?type=group-training",
|
||||||
|
"https://lilsgym.ca/contact.html?type=membership"
|
||||||
|
],
|
||||||
|
"inSitemapNotCrawled": [
|
||||||
|
"https://lilsgym.ca/",
|
||||||
|
"https://lilsgym.ca/policies.html",
|
||||||
|
"https://lilsgym.ca/privacy.html"
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"startedAt": "2025-08-31T13:31:06Z",
|
||||||
|
"finishedAt": "2025-08-31T13:31:08Z",
|
||||||
|
"durationMs": 1610
|
||||||
|
},
|
||||||
|
"params": {
|
||||||
|
"maxDepth": 2,
|
||||||
|
"concurrency": 5,
|
||||||
|
"timeoutMs": 5000,
|
||||||
|
"userAgent": "urlcrawler/1.0",
|
||||||
|
"sameHostOnly": true
|
||||||
|
},
|
||||||
|
"stats": {
|
||||||
|
"ok": 26,
|
||||||
|
"broken": 0,
|
||||||
|
"status2xx": 26,
|
||||||
|
"status3xx": 0,
|
||||||
|
"status4xx": 0,
|
||||||
|
"status5xx": 0,
|
||||||
|
"statusOther": 0
|
||||||
|
}
|
||||||
|
}
|
|
@ -3,7 +3,7 @@
|
||||||
"crawledUrls": [
|
"crawledUrls": [
|
||||||
"https://titan-training.ca",
|
"https://titan-training.ca",
|
||||||
"https://titan-training.ca/",
|
"https://titan-training.ca/",
|
||||||
"https://titan-training.ca/.",
|
"https://titan-training.ca/",
|
||||||
"https://titan-training.ca/cdn-cgi/l/email-protection",
|
"https://titan-training.ca/cdn-cgi/l/email-protection",
|
||||||
"https://titan-training.ca/product-details/product/681330e25a7661691fe205c8",
|
"https://titan-training.ca/product-details/product/681330e25a7661691fe205c8",
|
||||||
"https://titan-training.ca/product-details/product/681331db52e2115c63435275",
|
"https://titan-training.ca/product-details/product/681331db52e2115c63435275",
|
||||||
|
@ -16,7 +16,27 @@
|
||||||
],
|
],
|
||||||
"linkStatuses": [
|
"linkStatuses": [
|
||||||
{
|
{
|
||||||
"url": "https://titan-training.ca/products-list",
|
"url": "https://titan-training.ca",
|
||||||
|
"statusCode": 200,
|
||||||
|
"ok": true
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"url": "https://titan-training.ca/titan-training.ca",
|
||||||
|
"statusCode": 200,
|
||||||
|
"ok": true
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"url": "https://www.google.com/maps/search?api=1\u0026query=Google\u0026query_place_id=ChIJwzSnW430K4gRU8zOBshqKAg",
|
||||||
|
"statusCode": 404,
|
||||||
|
"ok": false
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"url": "https://titan-training.ca/.",
|
||||||
|
"statusCode": 200,
|
||||||
|
"ok": true
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"url": "https://titan-training.ca/product-details/product/titan-training.ca",
|
||||||
"statusCode": 200,
|
"statusCode": 200,
|
||||||
"ok": true
|
"ok": true
|
||||||
},
|
},
|
||||||
|
@ -26,12 +46,7 @@
|
||||||
"ok": true
|
"ok": true
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"url": "https://www.cloudflare.com/5xx-error-landing",
|
"url": "https://www.youtube.com/channel/UCOtL1D3s3fBxHJLAyF5kNRA/featured?view_as=public",
|
||||||
"statusCode": 200,
|
|
||||||
"ok": true
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"url": "https://titan-training.ca",
|
|
||||||
"statusCode": 200,
|
"statusCode": 200,
|
||||||
"ok": true
|
"ok": true
|
||||||
},
|
},
|
||||||
|
@ -41,17 +56,7 @@
|
||||||
"ok": true
|
"ok": true
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"url": "https://www.youtube.com/channel/UCOtL1D3s3fBxHJLAyF5kNRA/featured?view_as=public",
|
"url": "https://titan-training.ca/products-list",
|
||||||
"statusCode": 200,
|
|
||||||
"ok": true
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"url": "https://developers.cloudflare.com/waf/tools/scrape-shield/email-address-obfuscation",
|
|
||||||
"statusCode": 200,
|
|
||||||
"ok": true
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"url": "https://developers.cloudflare.com/fundamentals/setup/account/create-account",
|
|
||||||
"statusCode": 200,
|
"statusCode": 200,
|
||||||
"ok": true
|
"ok": true
|
||||||
},
|
},
|
||||||
|
@ -60,6 +65,11 @@
|
||||||
"statusCode": 200,
|
"statusCode": 200,
|
||||||
"ok": true
|
"ok": true
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
"url": "https://titan-training.ca/product-details/product/681331db52e2115c63435275",
|
||||||
|
"statusCode": 200,
|
||||||
|
"ok": true
|
||||||
|
},
|
||||||
{
|
{
|
||||||
"url": "https://www.cloudflare.com/sign-up?utm_source=email_protection",
|
"url": "https://www.cloudflare.com/sign-up?utm_source=email_protection",
|
||||||
"statusCode": 403,
|
"statusCode": 403,
|
||||||
|
@ -71,39 +81,29 @@
|
||||||
"ok": true
|
"ok": true
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"url": "https://titan-training.ca/titan-training.ca",
|
"url": "https://developers.cloudflare.com/waf/tools/scrape-shield/email-address-obfuscation",
|
||||||
"statusCode": 200,
|
"statusCode": 200,
|
||||||
"ok": true
|
"ok": true
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"url": "https://titan-training.ca/product-details/product/681331db52e2115c63435275",
|
"url": "https://www.cloudflare.com/5xx-error-landing",
|
||||||
"statusCode": 200,
|
"statusCode": 200,
|
||||||
"ok": true
|
"ok": true
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"url": "https://titan-training.ca/.",
|
"url": "https://developers.cloudflare.com/fundamentals/setup/account/create-account",
|
||||||
"statusCode": 200,
|
"statusCode": 200,
|
||||||
"ok": true
|
"ok": true
|
||||||
},
|
},
|
||||||
{
|
|
||||||
"url": "https://www.google.com/maps/search?api=1\u0026query=Google\u0026query_place_id=ChIJwzSnW430K4gRU8zOBshqKAg",
|
|
||||||
"statusCode": 404,
|
|
||||||
"ok": false
|
|
||||||
},
|
|
||||||
{
|
{
|
||||||
"url": "https://www.instagram.com/titan__training",
|
"url": "https://www.instagram.com/titan__training",
|
||||||
"statusCode": 200,
|
"statusCode": 200,
|
||||||
"ok": true
|
"ok": true
|
||||||
},
|
|
||||||
{
|
|
||||||
"url": "https://titan-training.ca/product-details/product/titan-training.ca",
|
|
||||||
"statusCode": 200,
|
|
||||||
"ok": true
|
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"pageOutlinks": {
|
"pageOutlinks": {
|
||||||
"https://titan-training.ca": [
|
"https://titan-training.ca": [
|
||||||
"https://titan-training.ca/.",
|
"https://titan-training.ca/",
|
||||||
"https://titan-training.ca/cdn-cgi/l/email-protection",
|
"https://titan-training.ca/cdn-cgi/l/email-protection",
|
||||||
"https://titan-training.ca/products-list",
|
"https://titan-training.ca/products-list",
|
||||||
"https://titan-training.ca/titan-training.ca",
|
"https://titan-training.ca/titan-training.ca",
|
||||||
|
@ -188,19 +188,17 @@
|
||||||
],
|
],
|
||||||
"https://titan-training.ca/": [
|
"https://titan-training.ca/": [
|
||||||
"https://titan-training.ca/",
|
"https://titan-training.ca/",
|
||||||
"https://titan-training.ca/.",
|
|
||||||
"https://titan-training.ca/product-details/product/681330e25a7661691fe205c8",
|
"https://titan-training.ca/product-details/product/681330e25a7661691fe205c8",
|
||||||
|
"https://titan-training.ca/.",
|
||||||
"https://titan-training.ca/product-details/product/681331db52e2115c63435275",
|
"https://titan-training.ca/product-details/product/681331db52e2115c63435275",
|
||||||
|
"https://titan-training.ca",
|
||||||
"https://titan-training.ca/products-list"
|
"https://titan-training.ca/products-list"
|
||||||
],
|
],
|
||||||
"https://titan-training.ca/.": [
|
|
||||||
"https://titan-training.ca"
|
|
||||||
],
|
|
||||||
"https://titan-training.ca/cdn-cgi/l/email-protection": [
|
"https://titan-training.ca/cdn-cgi/l/email-protection": [
|
||||||
"https://titan-training.ca/",
|
"https://titan-training.ca/",
|
||||||
|
"https://titan-training.ca/product-details/product/681330e25a7661691fe205c8",
|
||||||
"https://titan-training.ca/.",
|
"https://titan-training.ca/.",
|
||||||
"https://titan-training.ca/titan-training.ca",
|
"https://titan-training.ca/titan-training.ca",
|
||||||
"https://titan-training.ca/product-details/product/681330e25a7661691fe205c8",
|
|
||||||
"https://titan-training.ca/product-details/product/681331db52e2115c63435275",
|
"https://titan-training.ca/product-details/product/681331db52e2115c63435275",
|
||||||
"https://titan-training.ca",
|
"https://titan-training.ca",
|
||||||
"https://titan-training.ca/products-list"
|
"https://titan-training.ca/products-list"
|
||||||
|
@ -219,9 +217,9 @@
|
||||||
],
|
],
|
||||||
"https://titan-training.ca/products-list": [
|
"https://titan-training.ca/products-list": [
|
||||||
"https://titan-training.ca/",
|
"https://titan-training.ca/",
|
||||||
|
"https://titan-training.ca/product-details/product/681330e25a7661691fe205c8",
|
||||||
"https://titan-training.ca/.",
|
"https://titan-training.ca/.",
|
||||||
"https://titan-training.ca/titan-training.ca",
|
"https://titan-training.ca/titan-training.ca",
|
||||||
"https://titan-training.ca/product-details/product/681330e25a7661691fe205c8",
|
|
||||||
"https://titan-training.ca/product-details/product/681331db52e2115c63435275",
|
"https://titan-training.ca/product-details/product/681331db52e2115c63435275",
|
||||||
"https://titan-training.ca",
|
"https://titan-training.ca",
|
||||||
"https://titan-training.ca/products-list"
|
"https://titan-training.ca/products-list"
|
||||||
|
@ -241,9 +239,9 @@
|
||||||
],
|
],
|
||||||
"https://www.facebook.com/titantrainingkw": [
|
"https://www.facebook.com/titantrainingkw": [
|
||||||
"https://titan-training.ca/",
|
"https://titan-training.ca/",
|
||||||
|
"https://titan-training.ca/product-details/product/681330e25a7661691fe205c8",
|
||||||
"https://titan-training.ca/.",
|
"https://titan-training.ca/.",
|
||||||
"https://titan-training.ca/titan-training.ca",
|
"https://titan-training.ca/titan-training.ca",
|
||||||
"https://titan-training.ca/product-details/product/681330e25a7661691fe205c8",
|
|
||||||
"https://titan-training.ca/product-details/product/681331db52e2115c63435275",
|
"https://titan-training.ca/product-details/product/681331db52e2115c63435275",
|
||||||
"https://titan-training.ca",
|
"https://titan-training.ca",
|
||||||
"https://titan-training.ca/products-list"
|
"https://titan-training.ca/products-list"
|
||||||
|
@ -256,18 +254,18 @@
|
||||||
],
|
],
|
||||||
"https://www.instagram.com/titan__training": [
|
"https://www.instagram.com/titan__training": [
|
||||||
"https://titan-training.ca/",
|
"https://titan-training.ca/",
|
||||||
|
"https://titan-training.ca/product-details/product/681330e25a7661691fe205c8",
|
||||||
"https://titan-training.ca/.",
|
"https://titan-training.ca/.",
|
||||||
"https://titan-training.ca/titan-training.ca",
|
"https://titan-training.ca/titan-training.ca",
|
||||||
"https://titan-training.ca/product-details/product/681330e25a7661691fe205c8",
|
|
||||||
"https://titan-training.ca/product-details/product/681331db52e2115c63435275",
|
"https://titan-training.ca/product-details/product/681331db52e2115c63435275",
|
||||||
"https://titan-training.ca",
|
"https://titan-training.ca",
|
||||||
"https://titan-training.ca/products-list"
|
"https://titan-training.ca/products-list"
|
||||||
],
|
],
|
||||||
"https://www.youtube.com/channel/UCOtL1D3s3fBxHJLAyF5kNRA/featured?view_as=public": [
|
"https://www.youtube.com/channel/UCOtL1D3s3fBxHJLAyF5kNRA/featured?view_as=public": [
|
||||||
"https://titan-training.ca/",
|
"https://titan-training.ca/",
|
||||||
|
"https://titan-training.ca/product-details/product/681330e25a7661691fe205c8",
|
||||||
"https://titan-training.ca/.",
|
"https://titan-training.ca/.",
|
||||||
"https://titan-training.ca/titan-training.ca",
|
"https://titan-training.ca/titan-training.ca",
|
||||||
"https://titan-training.ca/product-details/product/681330e25a7661691fe205c8",
|
|
||||||
"https://titan-training.ca/product-details/product/681331db52e2115c63435275",
|
"https://titan-training.ca/product-details/product/681331db52e2115c63435275",
|
||||||
"https://titan-training.ca",
|
"https://titan-training.ca",
|
||||||
"https://titan-training.ca/products-list"
|
"https://titan-training.ca/products-list"
|
||||||
|
@ -276,7 +274,7 @@
|
||||||
"missingInSitemap": [
|
"missingInSitemap": [
|
||||||
"https://titan-training.ca",
|
"https://titan-training.ca",
|
||||||
"https://titan-training.ca/",
|
"https://titan-training.ca/",
|
||||||
"https://titan-training.ca/.",
|
"https://titan-training.ca/",
|
||||||
"https://titan-training.ca/cdn-cgi/l/email-protection",
|
"https://titan-training.ca/cdn-cgi/l/email-protection",
|
||||||
"https://titan-training.ca/product-details/product/681330e25a7661691fe205c8",
|
"https://titan-training.ca/product-details/product/681330e25a7661691fe205c8",
|
||||||
"https://titan-training.ca/product-details/product/681331db52e2115c63435275",
|
"https://titan-training.ca/product-details/product/681331db52e2115c63435275",
|
||||||
|
@ -286,5 +284,26 @@
|
||||||
"inSitemapNotCrawled": [
|
"inSitemapNotCrawled": [
|
||||||
"https://titan-training.ca/home",
|
"https://titan-training.ca/home",
|
||||||
"https://titan-training.ca/test_path?item=123"
|
"https://titan-training.ca/test_path?item=123"
|
||||||
]
|
],
|
||||||
|
"metadata": {
|
||||||
|
"startedAt": "2025-08-31T13:41:55Z",
|
||||||
|
"finishedAt": "2025-08-31T13:41:58Z",
|
||||||
|
"durationMs": 2547
|
||||||
|
},
|
||||||
|
"params": {
|
||||||
|
"maxDepth": 2,
|
||||||
|
"concurrency": 5,
|
||||||
|
"timeoutMs": 5000,
|
||||||
|
"userAgent": "urlcrawler/1.0",
|
||||||
|
"sameHostOnly": true
|
||||||
|
},
|
||||||
|
"stats": {
|
||||||
|
"ok": 15,
|
||||||
|
"broken": 2,
|
||||||
|
"status2xx": 15,
|
||||||
|
"status3xx": 0,
|
||||||
|
"status4xx": 2,
|
||||||
|
"status5xx": 0,
|
||||||
|
"statusOther": 0
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue