Add sitemap.xml generation with MCP discovery endpoint
ci/woodpecker/push/woodpecker Pipeline failed Details

- New build script generates sitemap.xml at build time
- Includes homepage, about page, and /.well-known/mcp.json
- Updated robots.txt to allow crawling of about page and MCP endpoints
- Sitemap encourages search engines to index the MCP discovery path
This commit is contained in:
Colin 2026-01-23 09:27:09 -05:00
parent edc1680f1c
commit ebcb90ca18
Signed by: colin
SSH Key Fingerprint: SHA256:nRPCQTeMFLdGytxRQmPVK9VXY3/ePKQ5lGRyJhT5DY8
5 changed files with 174 additions and 4 deletions

View File

@ -2,7 +2,8 @@
"$schema": "https://unpkg.com/knip@latest/schema.json",
"entry": [
"lib/document_stores/*.js",
"lib/key_generators/*.js"
"lib/key_generators/*.js",
"scripts/generate-sitemap.js"
],
"ignore": [
"static/**",

View File

@ -68,7 +68,8 @@
"test:security:csp": "node test/security/security_spec.js --test=csp",
"test:security:cors": "node test/security/security_spec.js --test=cors",
"test:security:combined": "node test/security/security_spec.js --test=combinedSecurity",
"build": "node update-js.js",
"build": "node update-js.js && node scripts/generate-sitemap.js",
"build:sitemap": "node scripts/generate-sitemap.js",
"scan:sbom": "./scripts/scan-sbom.sh",
"scan:trivy": "./scripts/scan-trivy-fs.sh",
"scan:trivy:image": "./scripts/scan-trivy-image.sh",

114
scripts/generate-sitemap.js Normal file
View File

@ -0,0 +1,114 @@
#!/usr/bin/env node
/**
* Sitemap generator for Hastebin
* Generates sitemap.xml with static pages and MCP discovery endpoint
*/
const fs = require('fs');
const path = require('path');
// Get base URL from environment or use default
const baseUrl = process.env.HASTEBIN_BASE_URL ||
process.env.HASTEBIN_SITEMAP_URL ||
'https://haste.nixc.us';
// Static pages to include in sitemap
const pages = [
{
loc: '/',
changefreq: 'weekly',
priority: '1.0',
description: 'Homepage - create and share code snippets'
},
{
loc: '/about',
changefreq: 'monthly',
priority: '0.8',
description: 'About page with documentation and usage instructions'
},
{
loc: '/.well-known/mcp.json',
changefreq: 'monthly',
priority: '0.7',
description: 'MCP (Model Context Protocol) discovery endpoint for AI assistants'
}
];
// Generate sitemap XML
function generateSitemap() {
const today = new Date().toISOString().split('T')[0];
let xml = '<?xml version="1.0" encoding="UTF-8"?>\n';
xml += '<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9"\n';
xml += ' xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"\n';
xml += ' xsi:schemaLocation="http://www.sitemaps.org/schemas/sitemap/0.9\n';
xml += ' http://www.sitemaps.org/schemas/sitemap/0.9/sitemap.xsd">\n';
xml += '\n';
xml += ' <!-- Hastebin Sitemap -->\n';
xml += ' <!-- Generated: ' + today + ' -->\n';
xml += ' <!-- Base URL: ' + baseUrl + ' -->\n';
xml += '\n';
for (const page of pages) {
xml += ' <!-- ' + page.description + ' -->\n';
xml += ' <url>\n';
xml += ' <loc>' + baseUrl + page.loc + '</loc>\n';
xml += ' <lastmod>' + today + '</lastmod>\n';
xml += ' <changefreq>' + page.changefreq + '</changefreq>\n';
xml += ' <priority>' + page.priority + '</priority>\n';
xml += ' </url>\n';
xml += '\n';
}
xml += '</urlset>\n';
return xml;
}
// Generate robots.txt content
function generateRobotsTxt() {
let robots = '# Hastebin robots.txt\n';
robots += '# Updated: ' + new Date().toISOString().split('T')[0] + '\n';
robots += '\n';
robots += 'User-agent: *\n';
robots += '\n';
robots += '# Allow static pages\n';
robots += 'Allow: /$\n';
robots += 'Allow: /about\n';
robots += '\n';
robots += '# Allow MCP discovery for AI assistants and tools\n';
robots += 'Allow: /.well-known/\n';
robots += 'Allow: /.well-known/mcp.json\n';
robots += '\n';
robots += '# Disallow individual pastes (ephemeral content)\n';
robots += 'Disallow: /raw/\n';
robots += 'Disallow: /documents/\n';
robots += '\n';
robots += '# Disallow paste URLs (random keys)\n';
robots += '# Pastes are identified by 10-character alphanumeric keys\n';
robots += 'Disallow: /*.*\n';
robots += '\n';
robots += '# Sitemap location\n';
robots += 'Sitemap: ' + baseUrl + '/sitemap.xml\n';
return robots;
}
// Main execution
const staticDir = path.join(__dirname, '..', 'static');
// Generate and write sitemap.xml
const sitemapContent = generateSitemap();
const sitemapPath = path.join(staticDir, 'sitemap.xml');
fs.writeFileSync(sitemapPath, sitemapContent, 'utf8');
console.log('Generated sitemap.xml at', sitemapPath);
console.log('Base URL:', baseUrl);
console.log('Pages included:', pages.length);
// Generate and write robots.txt
const robotsContent = generateRobotsTxt();
const robotsPath = path.join(staticDir, 'robots.txt');
fs.writeFileSync(robotsPath, robotsContent, 'utf8');
console.log('Generated robots.txt at', robotsPath);
console.log('\nSitemap generation complete!');

View File

@ -1,4 +1,23 @@
# Hastebin robots.txt
# Updated: 2026-01-23
User-agent: *
Disallow: /*
Allow: /?okparam=
# Allow static pages
Allow: /$
Allow: /about
# Allow MCP discovery for AI assistants and tools
Allow: /.well-known/
Allow: /.well-known/mcp.json
# Disallow individual pastes (ephemeral content)
Disallow: /raw/
Disallow: /documents/
# Disallow paste URLs (random keys)
# Pastes are identified by 10-character alphanumeric keys
Disallow: /*.*
# Sitemap location
Sitemap: https://haste.nixc.us/sitemap.xml

35
static/sitemap.xml Normal file
View File

@ -0,0 +1,35 @@
<?xml version="1.0" encoding="UTF-8"?>
<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://www.sitemaps.org/schemas/sitemap/0.9
http://www.sitemaps.org/schemas/sitemap/0.9/sitemap.xsd">
<!-- Hastebin Sitemap -->
<!-- Generated: 2026-01-23 -->
<!-- Base URL: https://haste.nixc.us -->
<!-- Homepage - create and share code snippets -->
<url>
<loc>https://haste.nixc.us/</loc>
<lastmod>2026-01-23</lastmod>
<changefreq>weekly</changefreq>
<priority>1.0</priority>
</url>
<!-- About page with documentation and usage instructions -->
<url>
<loc>https://haste.nixc.us/about</loc>
<lastmod>2026-01-23</lastmod>
<changefreq>monthly</changefreq>
<priority>0.8</priority>
</url>
<!-- MCP (Model Context Protocol) discovery endpoint for AI assistants -->
<url>
<loc>https://haste.nixc.us/.well-known/mcp.json</loc>
<lastmod>2026-01-23</lastmod>
<changefreq>monthly</changefreq>
<priority>0.7</priority>
</url>
</urlset>