hastebin/lib/rich_snippet.js

161 lines
4.9 KiB
JavaScript

/**
* Rich snippet generation for paste content.
* Produces Open Graph / Twitter meta + Schema.org JSON-LD payload.
*/
/* eslint-disable no-control-regex */
function clamp(str, max) {
if (!str) return '';
if (str.length <= max) return str;
return str.slice(0, Math.max(0, max - 1)).trimEnd() + '…';
}
function escapeHtmlAttr(str) {
return String(str || '')
.replace(/&/g, '&amp;')
.replace(/"/g, '&quot;')
.replace(/</g, '&lt;')
.replace(/>/g, '&gt;');
}
function normalizeForSummary(text) {
// Remove ASCII control chars except \n and \t, normalize line endings.
const cleaned = String(text || '')
.replace(/\r\n/g, '\n')
.replace(/\r/g, '\n')
.replace(/[\u0000-\u0008\u000B\u000C\u000E-\u001F]/g, '');
return cleaned;
}
function firstNonEmptyLine(text) {
const lines = normalizeForSummary(text).split('\n');
for (const line of lines) {
const t = line.trim();
if (t) return t;
}
return '';
}
function looksLikeUrlList(text) {
const lines = normalizeForSummary(text).split('\n').map((l) => l.trim()).filter(Boolean);
if (lines.length === 0) return false;
const sample = lines.slice(0, 10);
const urlish = sample.filter((l) => /^https?:\/\/\S+$/i.test(l));
return urlish.length >= Math.max(2, Math.ceil(sample.length * 0.6));
}
function tryParseJson(text) {
const t = String(text || '').trim();
if (!(t.startsWith('{') || t.startsWith('['))) return null;
try {
return JSON.parse(t);
} catch (_) {
return null;
}
}
function guessKind(text) {
const t = normalizeForSummary(text);
const head = t.slice(0, 2000);
const first = firstNonEmptyLine(t);
if (/^#!/.test(first)) {
const m = first.match(/^#!\s*(\S+)/);
return m && m[1] ? `Script (${m[1]})` : 'Script';
}
const parsedJson = tryParseJson(t);
if (parsedJson) {
if (Array.isArray(parsedJson)) return 'JSON array';
if (parsedJson && typeof parsedJson === 'object') {
const keys = Object.keys(parsedJson).slice(0, 3);
if (keys.length) return `JSON object (${keys.join(', ')})`;
return 'JSON object';
}
return 'JSON';
}
if (/^Traceback \(most recent call last\):/m.test(head) || /\bException\b/.test(head)) {
return 'Error trace';
}
if (/\n\s*at\s+\S+\s+\(.*:\d+:\d+\)/.test(head) || /\bUnhandledPromiseRejection\b/.test(head)) {
return 'Error trace';
}
if (looksLikeUrlList(t)) return 'URL list';
if (/BEGIN (RSA |EC |OPENSSH )?PRIVATE KEY/.test(head) || /AKIA[0-9A-Z]{16}/.test(head)) {
return 'Sensitive content';
}
if (/<html[\s>]/i.test(head) || /<!doctype html>/i.test(head)) return 'HTML snippet';
if (/^\s*\{[\s\S]*\}\s*$/.test(t) && /:\s*["{\[]/.test(head)) return 'Structured data';
if (/\b(select|insert|update|delete)\b[\s\S]*\bfrom\b/i.test(head)) return 'SQL snippet';
if (/\b(class|def)\s+\w+\b/.test(head) && /\bimport\b/.test(head)) return 'Code snippet';
if (/\b(function|const|let|var)\b/.test(head) || /\bimport\b/.test(head)) return 'Code snippet';
return 'Paste';
}
function buildSummary(text) {
const t = normalizeForSummary(text);
const lines = t.split('\n').map((l) => l.trim()).filter(Boolean);
if (lines.length === 0) return '';
// Prefer a few short-ish lines; avoid dumping massive single lines.
const picked = [];
for (const line of lines) {
const safe = line.replace(/\s+/g, ' ').trim();
if (!safe) continue;
picked.push(clamp(safe, 140));
if (picked.join(' ').length >= 220 || picked.length >= 3) break;
}
return clamp(picked.join(' · '), 220);
}
function computeBaseUrl(req, config) {
const protoHeader = req && req.headers ? req.headers['x-forwarded-proto'] : null;
const protocol =
(protoHeader && String(protoHeader).split(',')[0].trim()) ||
(req && req.connection && req.connection.encrypted ? 'https' : 'http');
const hostHeader = req && req.headers ? req.headers.host : null;
const host = hostHeader || (config && config.host && config.port ? `${config.host}:${config.port}` : '');
return `${protocol}://${host}`;
}
function generateRichSnippet(options) {
const key = options && options.key ? String(options.key) : '';
const content = options && options.content ? String(options.content) : '';
const req = options && options.req ? options.req : null;
const config = options && options.config ? options.config : null;
const kind = guessKind(content);
const summary = buildSummary(content) || `A ${kind.toLowerCase()} on hastebin.`;
const baseUrl = computeBaseUrl(req, config);
const url = key ? `${baseUrl}/${encodeURIComponent(key)}` : baseUrl;
const title = clamp(`${kind} · ${key}`, 70);
const description = clamp(summary, 200);
const jsonLd = {
'@context': 'https://schema.org',
'@type': 'CreativeWork',
name: title,
description: description,
url: url
};
return {
title,
description,
url,
ogType: 'article',
jsonLd,
escapeHtmlAttr
};
}
module.exports = {
generateRichSnippet,
escapeHtmlAttr
};