diff --git a/knip.json b/knip.json index b7fc9e9..5318986 100644 --- a/knip.json +++ b/knip.json @@ -2,7 +2,8 @@ "$schema": "https://unpkg.com/knip@latest/schema.json", "entry": [ "lib/document_stores/*.js", - "lib/key_generators/*.js" + "lib/key_generators/*.js", + "scripts/generate-sitemap.js" ], "ignore": [ "static/**", diff --git a/package.json b/package.json index 342947d..22dd352 100644 --- a/package.json +++ b/package.json @@ -68,7 +68,8 @@ "test:security:csp": "node test/security/security_spec.js --test=csp", "test:security:cors": "node test/security/security_spec.js --test=cors", "test:security:combined": "node test/security/security_spec.js --test=combinedSecurity", - "build": "node update-js.js", + "build": "node update-js.js && node scripts/generate-sitemap.js", + "build:sitemap": "node scripts/generate-sitemap.js", "scan:sbom": "./scripts/scan-sbom.sh", "scan:trivy": "./scripts/scan-trivy-fs.sh", "scan:trivy:image": "./scripts/scan-trivy-image.sh", diff --git a/scripts/generate-sitemap.js b/scripts/generate-sitemap.js new file mode 100644 index 0000000..ee36ebb --- /dev/null +++ b/scripts/generate-sitemap.js @@ -0,0 +1,114 @@ +#!/usr/bin/env node +/** + * Sitemap generator for Hastebin + * Generates sitemap.xml with static pages and MCP discovery endpoint + */ + +const fs = require('fs'); +const path = require('path'); + +// Get base URL from environment or use default +const baseUrl = process.env.HASTEBIN_BASE_URL || + process.env.HASTEBIN_SITEMAP_URL || + 'https://haste.nixc.us'; + +// Static pages to include in sitemap +const pages = [ + { + loc: '/', + changefreq: 'weekly', + priority: '1.0', + description: 'Homepage - create and share code snippets' + }, + { + loc: '/about', + changefreq: 'monthly', + priority: '0.8', + description: 'About page with documentation and usage instructions' + }, + { + loc: '/.well-known/mcp.json', + changefreq: 'monthly', + priority: '0.7', + description: 'MCP (Model Context Protocol) discovery endpoint for AI assistants' + } +]; + +// Generate sitemap XML +function generateSitemap() { + const today = new Date().toISOString().split('T')[0]; + + let xml = '\n'; + xml += '\n'; + xml += ' \n'; + xml += '\n'; + + for (const page of pages) { + xml += ' \n'; + xml += ' \n'; + xml += ' ' + baseUrl + page.loc + '\n'; + xml += ' ' + today + '\n'; + xml += ' ' + page.changefreq + '\n'; + xml += ' ' + page.priority + '\n'; + xml += ' \n'; + xml += '\n'; + } + + xml += '\n'; + + return xml; +} + +// Generate robots.txt content +function generateRobotsTxt() { + let robots = '# Hastebin robots.txt\n'; + robots += '# Updated: ' + new Date().toISOString().split('T')[0] + '\n'; + robots += '\n'; + robots += 'User-agent: *\n'; + robots += '\n'; + robots += '# Allow static pages\n'; + robots += 'Allow: /$\n'; + robots += 'Allow: /about\n'; + robots += '\n'; + robots += '# Allow MCP discovery for AI assistants and tools\n'; + robots += 'Allow: /.well-known/\n'; + robots += 'Allow: /.well-known/mcp.json\n'; + robots += '\n'; + robots += '# Disallow individual pastes (ephemeral content)\n'; + robots += 'Disallow: /raw/\n'; + robots += 'Disallow: /documents/\n'; + robots += '\n'; + robots += '# Disallow paste URLs (random keys)\n'; + robots += '# Pastes are identified by 10-character alphanumeric keys\n'; + robots += 'Disallow: /*.*\n'; + robots += '\n'; + robots += '# Sitemap location\n'; + robots += 'Sitemap: ' + baseUrl + '/sitemap.xml\n'; + + return robots; +} + +// Main execution +const staticDir = path.join(__dirname, '..', 'static'); + +// Generate and write sitemap.xml +const sitemapContent = generateSitemap(); +const sitemapPath = path.join(staticDir, 'sitemap.xml'); +fs.writeFileSync(sitemapPath, sitemapContent, 'utf8'); +console.log('Generated sitemap.xml at', sitemapPath); +console.log('Base URL:', baseUrl); +console.log('Pages included:', pages.length); + +// Generate and write robots.txt +const robotsContent = generateRobotsTxt(); +const robotsPath = path.join(staticDir, 'robots.txt'); +fs.writeFileSync(robotsPath, robotsContent, 'utf8'); +console.log('Generated robots.txt at', robotsPath); + +console.log('\nSitemap generation complete!'); diff --git a/static/robots.txt b/static/robots.txt index b6ae602..aaae371 100644 --- a/static/robots.txt +++ b/static/robots.txt @@ -1,4 +1,23 @@ +# Hastebin robots.txt +# Updated: 2026-01-23 + User-agent: * -Disallow: /* -Allow: /?okparam= + +# Allow static pages Allow: /$ +Allow: /about + +# Allow MCP discovery for AI assistants and tools +Allow: /.well-known/ +Allow: /.well-known/mcp.json + +# Disallow individual pastes (ephemeral content) +Disallow: /raw/ +Disallow: /documents/ + +# Disallow paste URLs (random keys) +# Pastes are identified by 10-character alphanumeric keys +Disallow: /*.* + +# Sitemap location +Sitemap: https://haste.nixc.us/sitemap.xml diff --git a/static/sitemap.xml b/static/sitemap.xml new file mode 100644 index 0000000..f2cc216 --- /dev/null +++ b/static/sitemap.xml @@ -0,0 +1,35 @@ + + + + + + + + + + https://haste.nixc.us/ + 2026-01-23 + weekly + 1.0 + + + + + https://haste.nixc.us/about + 2026-01-23 + monthly + 0.8 + + + + + https://haste.nixc.us/.well-known/mcp.json + 2026-01-23 + monthly + 0.7 + + +