Add sitemap.xml generation with MCP discovery endpoint
ci/woodpecker/push/woodpecker Pipeline failed
Details
ci/woodpecker/push/woodpecker Pipeline failed
Details
- New build script generates sitemap.xml at build time - Includes homepage, about page, and /.well-known/mcp.json - Updated robots.txt to allow crawling of about page and MCP endpoints - Sitemap encourages search engines to index the MCP discovery path
This commit is contained in:
parent
edc1680f1c
commit
ebcb90ca18
|
|
@ -2,7 +2,8 @@
|
||||||
"$schema": "https://unpkg.com/knip@latest/schema.json",
|
"$schema": "https://unpkg.com/knip@latest/schema.json",
|
||||||
"entry": [
|
"entry": [
|
||||||
"lib/document_stores/*.js",
|
"lib/document_stores/*.js",
|
||||||
"lib/key_generators/*.js"
|
"lib/key_generators/*.js",
|
||||||
|
"scripts/generate-sitemap.js"
|
||||||
],
|
],
|
||||||
"ignore": [
|
"ignore": [
|
||||||
"static/**",
|
"static/**",
|
||||||
|
|
|
||||||
|
|
@ -68,7 +68,8 @@
|
||||||
"test:security:csp": "node test/security/security_spec.js --test=csp",
|
"test:security:csp": "node test/security/security_spec.js --test=csp",
|
||||||
"test:security:cors": "node test/security/security_spec.js --test=cors",
|
"test:security:cors": "node test/security/security_spec.js --test=cors",
|
||||||
"test:security:combined": "node test/security/security_spec.js --test=combinedSecurity",
|
"test:security:combined": "node test/security/security_spec.js --test=combinedSecurity",
|
||||||
"build": "node update-js.js",
|
"build": "node update-js.js && node scripts/generate-sitemap.js",
|
||||||
|
"build:sitemap": "node scripts/generate-sitemap.js",
|
||||||
"scan:sbom": "./scripts/scan-sbom.sh",
|
"scan:sbom": "./scripts/scan-sbom.sh",
|
||||||
"scan:trivy": "./scripts/scan-trivy-fs.sh",
|
"scan:trivy": "./scripts/scan-trivy-fs.sh",
|
||||||
"scan:trivy:image": "./scripts/scan-trivy-image.sh",
|
"scan:trivy:image": "./scripts/scan-trivy-image.sh",
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,114 @@
|
||||||
|
#!/usr/bin/env node
|
||||||
|
/**
|
||||||
|
* Sitemap generator for Hastebin
|
||||||
|
* Generates sitemap.xml with static pages and MCP discovery endpoint
|
||||||
|
*/
|
||||||
|
|
||||||
|
const fs = require('fs');
|
||||||
|
const path = require('path');
|
||||||
|
|
||||||
|
// Get base URL from environment or use default
|
||||||
|
const baseUrl = process.env.HASTEBIN_BASE_URL ||
|
||||||
|
process.env.HASTEBIN_SITEMAP_URL ||
|
||||||
|
'https://haste.nixc.us';
|
||||||
|
|
||||||
|
// Static pages to include in sitemap
|
||||||
|
const pages = [
|
||||||
|
{
|
||||||
|
loc: '/',
|
||||||
|
changefreq: 'weekly',
|
||||||
|
priority: '1.0',
|
||||||
|
description: 'Homepage - create and share code snippets'
|
||||||
|
},
|
||||||
|
{
|
||||||
|
loc: '/about',
|
||||||
|
changefreq: 'monthly',
|
||||||
|
priority: '0.8',
|
||||||
|
description: 'About page with documentation and usage instructions'
|
||||||
|
},
|
||||||
|
{
|
||||||
|
loc: '/.well-known/mcp.json',
|
||||||
|
changefreq: 'monthly',
|
||||||
|
priority: '0.7',
|
||||||
|
description: 'MCP (Model Context Protocol) discovery endpoint for AI assistants'
|
||||||
|
}
|
||||||
|
];
|
||||||
|
|
||||||
|
// Generate sitemap XML
|
||||||
|
function generateSitemap() {
|
||||||
|
const today = new Date().toISOString().split('T')[0];
|
||||||
|
|
||||||
|
let xml = '<?xml version="1.0" encoding="UTF-8"?>\n';
|
||||||
|
xml += '<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9"\n';
|
||||||
|
xml += ' xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"\n';
|
||||||
|
xml += ' xsi:schemaLocation="http://www.sitemaps.org/schemas/sitemap/0.9\n';
|
||||||
|
xml += ' http://www.sitemaps.org/schemas/sitemap/0.9/sitemap.xsd">\n';
|
||||||
|
xml += '\n';
|
||||||
|
xml += ' <!-- Hastebin Sitemap -->\n';
|
||||||
|
xml += ' <!-- Generated: ' + today + ' -->\n';
|
||||||
|
xml += ' <!-- Base URL: ' + baseUrl + ' -->\n';
|
||||||
|
xml += '\n';
|
||||||
|
|
||||||
|
for (const page of pages) {
|
||||||
|
xml += ' <!-- ' + page.description + ' -->\n';
|
||||||
|
xml += ' <url>\n';
|
||||||
|
xml += ' <loc>' + baseUrl + page.loc + '</loc>\n';
|
||||||
|
xml += ' <lastmod>' + today + '</lastmod>\n';
|
||||||
|
xml += ' <changefreq>' + page.changefreq + '</changefreq>\n';
|
||||||
|
xml += ' <priority>' + page.priority + '</priority>\n';
|
||||||
|
xml += ' </url>\n';
|
||||||
|
xml += '\n';
|
||||||
|
}
|
||||||
|
|
||||||
|
xml += '</urlset>\n';
|
||||||
|
|
||||||
|
return xml;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Generate robots.txt content
|
||||||
|
function generateRobotsTxt() {
|
||||||
|
let robots = '# Hastebin robots.txt\n';
|
||||||
|
robots += '# Updated: ' + new Date().toISOString().split('T')[0] + '\n';
|
||||||
|
robots += '\n';
|
||||||
|
robots += 'User-agent: *\n';
|
||||||
|
robots += '\n';
|
||||||
|
robots += '# Allow static pages\n';
|
||||||
|
robots += 'Allow: /$\n';
|
||||||
|
robots += 'Allow: /about\n';
|
||||||
|
robots += '\n';
|
||||||
|
robots += '# Allow MCP discovery for AI assistants and tools\n';
|
||||||
|
robots += 'Allow: /.well-known/\n';
|
||||||
|
robots += 'Allow: /.well-known/mcp.json\n';
|
||||||
|
robots += '\n';
|
||||||
|
robots += '# Disallow individual pastes (ephemeral content)\n';
|
||||||
|
robots += 'Disallow: /raw/\n';
|
||||||
|
robots += 'Disallow: /documents/\n';
|
||||||
|
robots += '\n';
|
||||||
|
robots += '# Disallow paste URLs (random keys)\n';
|
||||||
|
robots += '# Pastes are identified by 10-character alphanumeric keys\n';
|
||||||
|
robots += 'Disallow: /*.*\n';
|
||||||
|
robots += '\n';
|
||||||
|
robots += '# Sitemap location\n';
|
||||||
|
robots += 'Sitemap: ' + baseUrl + '/sitemap.xml\n';
|
||||||
|
|
||||||
|
return robots;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Main execution
|
||||||
|
const staticDir = path.join(__dirname, '..', 'static');
|
||||||
|
|
||||||
|
// Generate and write sitemap.xml
|
||||||
|
const sitemapContent = generateSitemap();
|
||||||
|
const sitemapPath = path.join(staticDir, 'sitemap.xml');
|
||||||
|
fs.writeFileSync(sitemapPath, sitemapContent, 'utf8');
|
||||||
|
console.log('Generated sitemap.xml at', sitemapPath);
|
||||||
|
console.log('Base URL:', baseUrl);
|
||||||
|
console.log('Pages included:', pages.length);
|
||||||
|
|
||||||
|
// Generate and write robots.txt
|
||||||
|
const robotsContent = generateRobotsTxt();
|
||||||
|
const robotsPath = path.join(staticDir, 'robots.txt');
|
||||||
|
fs.writeFileSync(robotsPath, robotsContent, 'utf8');
|
||||||
|
console.log('Generated robots.txt at', robotsPath);
|
||||||
|
|
||||||
|
console.log('\nSitemap generation complete!');
|
||||||
|
|
@ -1,4 +1,23 @@
|
||||||
|
# Hastebin robots.txt
|
||||||
|
# Updated: 2026-01-23
|
||||||
|
|
||||||
User-agent: *
|
User-agent: *
|
||||||
Disallow: /*
|
|
||||||
Allow: /?okparam=
|
# Allow static pages
|
||||||
Allow: /$
|
Allow: /$
|
||||||
|
Allow: /about
|
||||||
|
|
||||||
|
# Allow MCP discovery for AI assistants and tools
|
||||||
|
Allow: /.well-known/
|
||||||
|
Allow: /.well-known/mcp.json
|
||||||
|
|
||||||
|
# Disallow individual pastes (ephemeral content)
|
||||||
|
Disallow: /raw/
|
||||||
|
Disallow: /documents/
|
||||||
|
|
||||||
|
# Disallow paste URLs (random keys)
|
||||||
|
# Pastes are identified by 10-character alphanumeric keys
|
||||||
|
Disallow: /*.*
|
||||||
|
|
||||||
|
# Sitemap location
|
||||||
|
Sitemap: https://haste.nixc.us/sitemap.xml
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,35 @@
|
||||||
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9"
|
||||||
|
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
|
||||||
|
xsi:schemaLocation="http://www.sitemaps.org/schemas/sitemap/0.9
|
||||||
|
http://www.sitemaps.org/schemas/sitemap/0.9/sitemap.xsd">
|
||||||
|
|
||||||
|
<!-- Hastebin Sitemap -->
|
||||||
|
<!-- Generated: 2026-01-23 -->
|
||||||
|
<!-- Base URL: https://haste.nixc.us -->
|
||||||
|
|
||||||
|
<!-- Homepage - create and share code snippets -->
|
||||||
|
<url>
|
||||||
|
<loc>https://haste.nixc.us/</loc>
|
||||||
|
<lastmod>2026-01-23</lastmod>
|
||||||
|
<changefreq>weekly</changefreq>
|
||||||
|
<priority>1.0</priority>
|
||||||
|
</url>
|
||||||
|
|
||||||
|
<!-- About page with documentation and usage instructions -->
|
||||||
|
<url>
|
||||||
|
<loc>https://haste.nixc.us/about</loc>
|
||||||
|
<lastmod>2026-01-23</lastmod>
|
||||||
|
<changefreq>monthly</changefreq>
|
||||||
|
<priority>0.8</priority>
|
||||||
|
</url>
|
||||||
|
|
||||||
|
<!-- MCP (Model Context Protocol) discovery endpoint for AI assistants -->
|
||||||
|
<url>
|
||||||
|
<loc>https://haste.nixc.us/.well-known/mcp.json</loc>
|
||||||
|
<lastmod>2026-01-23</lastmod>
|
||||||
|
<changefreq>monthly</changefreq>
|
||||||
|
<priority>0.7</priority>
|
||||||
|
</url>
|
||||||
|
|
||||||
|
</urlset>
|
||||||
Loading…
Reference in New Issue