Add sitemap.xml generation with MCP discovery endpoint
ci/woodpecker/push/woodpecker Pipeline failed
Details
ci/woodpecker/push/woodpecker Pipeline failed
Details
- New build script generates sitemap.xml at build time - Includes homepage, about page, and /.well-known/mcp.json - Updated robots.txt to allow crawling of about page and MCP endpoints - Sitemap encourages search engines to index the MCP discovery path
This commit is contained in:
parent
edc1680f1c
commit
ebcb90ca18
|
|
@ -2,7 +2,8 @@
|
|||
"$schema": "https://unpkg.com/knip@latest/schema.json",
|
||||
"entry": [
|
||||
"lib/document_stores/*.js",
|
||||
"lib/key_generators/*.js"
|
||||
"lib/key_generators/*.js",
|
||||
"scripts/generate-sitemap.js"
|
||||
],
|
||||
"ignore": [
|
||||
"static/**",
|
||||
|
|
|
|||
|
|
@ -68,7 +68,8 @@
|
|||
"test:security:csp": "node test/security/security_spec.js --test=csp",
|
||||
"test:security:cors": "node test/security/security_spec.js --test=cors",
|
||||
"test:security:combined": "node test/security/security_spec.js --test=combinedSecurity",
|
||||
"build": "node update-js.js",
|
||||
"build": "node update-js.js && node scripts/generate-sitemap.js",
|
||||
"build:sitemap": "node scripts/generate-sitemap.js",
|
||||
"scan:sbom": "./scripts/scan-sbom.sh",
|
||||
"scan:trivy": "./scripts/scan-trivy-fs.sh",
|
||||
"scan:trivy:image": "./scripts/scan-trivy-image.sh",
|
||||
|
|
|
|||
|
|
@ -0,0 +1,114 @@
|
|||
#!/usr/bin/env node
|
||||
/**
|
||||
* Sitemap generator for Hastebin
|
||||
* Generates sitemap.xml with static pages and MCP discovery endpoint
|
||||
*/
|
||||
|
||||
const fs = require('fs');
|
||||
const path = require('path');
|
||||
|
||||
// Get base URL from environment or use default
|
||||
const baseUrl = process.env.HASTEBIN_BASE_URL ||
|
||||
process.env.HASTEBIN_SITEMAP_URL ||
|
||||
'https://haste.nixc.us';
|
||||
|
||||
// Static pages to include in sitemap
|
||||
const pages = [
|
||||
{
|
||||
loc: '/',
|
||||
changefreq: 'weekly',
|
||||
priority: '1.0',
|
||||
description: 'Homepage - create and share code snippets'
|
||||
},
|
||||
{
|
||||
loc: '/about',
|
||||
changefreq: 'monthly',
|
||||
priority: '0.8',
|
||||
description: 'About page with documentation and usage instructions'
|
||||
},
|
||||
{
|
||||
loc: '/.well-known/mcp.json',
|
||||
changefreq: 'monthly',
|
||||
priority: '0.7',
|
||||
description: 'MCP (Model Context Protocol) discovery endpoint for AI assistants'
|
||||
}
|
||||
];
|
||||
|
||||
// Generate sitemap XML
|
||||
function generateSitemap() {
|
||||
const today = new Date().toISOString().split('T')[0];
|
||||
|
||||
let xml = '<?xml version="1.0" encoding="UTF-8"?>\n';
|
||||
xml += '<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9"\n';
|
||||
xml += ' xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"\n';
|
||||
xml += ' xsi:schemaLocation="http://www.sitemaps.org/schemas/sitemap/0.9\n';
|
||||
xml += ' http://www.sitemaps.org/schemas/sitemap/0.9/sitemap.xsd">\n';
|
||||
xml += '\n';
|
||||
xml += ' <!-- Hastebin Sitemap -->\n';
|
||||
xml += ' <!-- Generated: ' + today + ' -->\n';
|
||||
xml += ' <!-- Base URL: ' + baseUrl + ' -->\n';
|
||||
xml += '\n';
|
||||
|
||||
for (const page of pages) {
|
||||
xml += ' <!-- ' + page.description + ' -->\n';
|
||||
xml += ' <url>\n';
|
||||
xml += ' <loc>' + baseUrl + page.loc + '</loc>\n';
|
||||
xml += ' <lastmod>' + today + '</lastmod>\n';
|
||||
xml += ' <changefreq>' + page.changefreq + '</changefreq>\n';
|
||||
xml += ' <priority>' + page.priority + '</priority>\n';
|
||||
xml += ' </url>\n';
|
||||
xml += '\n';
|
||||
}
|
||||
|
||||
xml += '</urlset>\n';
|
||||
|
||||
return xml;
|
||||
}
|
||||
|
||||
// Generate robots.txt content
|
||||
function generateRobotsTxt() {
|
||||
let robots = '# Hastebin robots.txt\n';
|
||||
robots += '# Updated: ' + new Date().toISOString().split('T')[0] + '\n';
|
||||
robots += '\n';
|
||||
robots += 'User-agent: *\n';
|
||||
robots += '\n';
|
||||
robots += '# Allow static pages\n';
|
||||
robots += 'Allow: /$\n';
|
||||
robots += 'Allow: /about\n';
|
||||
robots += '\n';
|
||||
robots += '# Allow MCP discovery for AI assistants and tools\n';
|
||||
robots += 'Allow: /.well-known/\n';
|
||||
robots += 'Allow: /.well-known/mcp.json\n';
|
||||
robots += '\n';
|
||||
robots += '# Disallow individual pastes (ephemeral content)\n';
|
||||
robots += 'Disallow: /raw/\n';
|
||||
robots += 'Disallow: /documents/\n';
|
||||
robots += '\n';
|
||||
robots += '# Disallow paste URLs (random keys)\n';
|
||||
robots += '# Pastes are identified by 10-character alphanumeric keys\n';
|
||||
robots += 'Disallow: /*.*\n';
|
||||
robots += '\n';
|
||||
robots += '# Sitemap location\n';
|
||||
robots += 'Sitemap: ' + baseUrl + '/sitemap.xml\n';
|
||||
|
||||
return robots;
|
||||
}
|
||||
|
||||
// Main execution
|
||||
const staticDir = path.join(__dirname, '..', 'static');
|
||||
|
||||
// Generate and write sitemap.xml
|
||||
const sitemapContent = generateSitemap();
|
||||
const sitemapPath = path.join(staticDir, 'sitemap.xml');
|
||||
fs.writeFileSync(sitemapPath, sitemapContent, 'utf8');
|
||||
console.log('Generated sitemap.xml at', sitemapPath);
|
||||
console.log('Base URL:', baseUrl);
|
||||
console.log('Pages included:', pages.length);
|
||||
|
||||
// Generate and write robots.txt
|
||||
const robotsContent = generateRobotsTxt();
|
||||
const robotsPath = path.join(staticDir, 'robots.txt');
|
||||
fs.writeFileSync(robotsPath, robotsContent, 'utf8');
|
||||
console.log('Generated robots.txt at', robotsPath);
|
||||
|
||||
console.log('\nSitemap generation complete!');
|
||||
|
|
@ -1,4 +1,23 @@
|
|||
# Hastebin robots.txt
|
||||
# Updated: 2026-01-23
|
||||
|
||||
User-agent: *
|
||||
Disallow: /*
|
||||
Allow: /?okparam=
|
||||
|
||||
# Allow static pages
|
||||
Allow: /$
|
||||
Allow: /about
|
||||
|
||||
# Allow MCP discovery for AI assistants and tools
|
||||
Allow: /.well-known/
|
||||
Allow: /.well-known/mcp.json
|
||||
|
||||
# Disallow individual pastes (ephemeral content)
|
||||
Disallow: /raw/
|
||||
Disallow: /documents/
|
||||
|
||||
# Disallow paste URLs (random keys)
|
||||
# Pastes are identified by 10-character alphanumeric keys
|
||||
Disallow: /*.*
|
||||
|
||||
# Sitemap location
|
||||
Sitemap: https://haste.nixc.us/sitemap.xml
|
||||
|
|
|
|||
|
|
@ -0,0 +1,35 @@
|
|||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9"
|
||||
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
|
||||
xsi:schemaLocation="http://www.sitemaps.org/schemas/sitemap/0.9
|
||||
http://www.sitemaps.org/schemas/sitemap/0.9/sitemap.xsd">
|
||||
|
||||
<!-- Hastebin Sitemap -->
|
||||
<!-- Generated: 2026-01-23 -->
|
||||
<!-- Base URL: https://haste.nixc.us -->
|
||||
|
||||
<!-- Homepage - create and share code snippets -->
|
||||
<url>
|
||||
<loc>https://haste.nixc.us/</loc>
|
||||
<lastmod>2026-01-23</lastmod>
|
||||
<changefreq>weekly</changefreq>
|
||||
<priority>1.0</priority>
|
||||
</url>
|
||||
|
||||
<!-- About page with documentation and usage instructions -->
|
||||
<url>
|
||||
<loc>https://haste.nixc.us/about</loc>
|
||||
<lastmod>2026-01-23</lastmod>
|
||||
<changefreq>monthly</changefreq>
|
||||
<priority>0.8</priority>
|
||||
</url>
|
||||
|
||||
<!-- MCP (Model Context Protocol) discovery endpoint for AI assistants -->
|
||||
<url>
|
||||
<loc>https://haste.nixc.us/.well-known/mcp.json</loc>
|
||||
<lastmod>2026-01-23</lastmod>
|
||||
<changefreq>monthly</changefreq>
|
||||
<priority>0.7</priority>
|
||||
</url>
|
||||
|
||||
</urlset>
|
||||
Loading…
Reference in New Issue