161 lines
4.9 KiB
JavaScript
161 lines
4.9 KiB
JavaScript
/**
|
|
* Rich snippet generation for paste content.
|
|
* Produces Open Graph / Twitter meta + Schema.org JSON-LD payload.
|
|
*/
|
|
/* eslint-disable no-control-regex */
|
|
|
|
function clamp(str, max) {
|
|
if (!str) return '';
|
|
if (str.length <= max) return str;
|
|
return str.slice(0, Math.max(0, max - 1)).trimEnd() + '…';
|
|
}
|
|
|
|
function escapeHtmlAttr(str) {
|
|
return String(str || '')
|
|
.replace(/&/g, '&')
|
|
.replace(/"/g, '"')
|
|
.replace(/</g, '<')
|
|
.replace(/>/g, '>');
|
|
}
|
|
|
|
function normalizeForSummary(text) {
|
|
// Remove ASCII control chars except \n and \t, normalize line endings.
|
|
const cleaned = String(text || '')
|
|
.replace(/\r\n/g, '\n')
|
|
.replace(/\r/g, '\n')
|
|
.replace(/[\u0000-\u0008\u000B\u000C\u000E-\u001F]/g, '');
|
|
return cleaned;
|
|
}
|
|
|
|
function firstNonEmptyLine(text) {
|
|
const lines = normalizeForSummary(text).split('\n');
|
|
for (const line of lines) {
|
|
const t = line.trim();
|
|
if (t) return t;
|
|
}
|
|
return '';
|
|
}
|
|
|
|
function looksLikeUrlList(text) {
|
|
const lines = normalizeForSummary(text).split('\n').map((l) => l.trim()).filter(Boolean);
|
|
if (lines.length === 0) return false;
|
|
const sample = lines.slice(0, 10);
|
|
const urlish = sample.filter((l) => /^https?:\/\/\S+$/i.test(l));
|
|
return urlish.length >= Math.max(2, Math.ceil(sample.length * 0.6));
|
|
}
|
|
|
|
function tryParseJson(text) {
|
|
const t = String(text || '').trim();
|
|
if (!(t.startsWith('{') || t.startsWith('['))) return null;
|
|
try {
|
|
return JSON.parse(t);
|
|
} catch (_) {
|
|
return null;
|
|
}
|
|
}
|
|
|
|
function guessKind(text) {
|
|
const t = normalizeForSummary(text);
|
|
const head = t.slice(0, 2000);
|
|
const first = firstNonEmptyLine(t);
|
|
|
|
if (/^#!/.test(first)) {
|
|
const m = first.match(/^#!\s*(\S+)/);
|
|
return m && m[1] ? `Script (${m[1]})` : 'Script';
|
|
}
|
|
|
|
const parsedJson = tryParseJson(t);
|
|
if (parsedJson) {
|
|
if (Array.isArray(parsedJson)) return 'JSON array';
|
|
if (parsedJson && typeof parsedJson === 'object') {
|
|
const keys = Object.keys(parsedJson).slice(0, 3);
|
|
if (keys.length) return `JSON object (${keys.join(', ')})`;
|
|
return 'JSON object';
|
|
}
|
|
return 'JSON';
|
|
}
|
|
|
|
if (/^Traceback \(most recent call last\):/m.test(head) || /\bException\b/.test(head)) {
|
|
return 'Error trace';
|
|
}
|
|
if (/\n\s*at\s+\S+\s+\(.*:\d+:\d+\)/.test(head) || /\bUnhandledPromiseRejection\b/.test(head)) {
|
|
return 'Error trace';
|
|
}
|
|
if (looksLikeUrlList(t)) return 'URL list';
|
|
if (/BEGIN (RSA |EC |OPENSSH )?PRIVATE KEY/.test(head) || /AKIA[0-9A-Z]{16}/.test(head)) {
|
|
return 'Sensitive content';
|
|
}
|
|
if (/<html[\s>]/i.test(head) || /<!doctype html>/i.test(head)) return 'HTML snippet';
|
|
if (/^\s*\{[\s\S]*\}\s*$/.test(t) && /:\s*["{\[]/.test(head)) return 'Structured data';
|
|
if (/\b(select|insert|update|delete)\b[\s\S]*\bfrom\b/i.test(head)) return 'SQL snippet';
|
|
if (/\b(class|def)\s+\w+\b/.test(head) && /\bimport\b/.test(head)) return 'Code snippet';
|
|
if (/\b(function|const|let|var)\b/.test(head) || /\bimport\b/.test(head)) return 'Code snippet';
|
|
|
|
return 'Paste';
|
|
}
|
|
|
|
function buildSummary(text) {
|
|
const t = normalizeForSummary(text);
|
|
const lines = t.split('\n').map((l) => l.trim()).filter(Boolean);
|
|
if (lines.length === 0) return '';
|
|
|
|
// Prefer a few short-ish lines; avoid dumping massive single lines.
|
|
const picked = [];
|
|
for (const line of lines) {
|
|
const safe = line.replace(/\s+/g, ' ').trim();
|
|
if (!safe) continue;
|
|
picked.push(clamp(safe, 140));
|
|
if (picked.join(' ').length >= 220 || picked.length >= 3) break;
|
|
}
|
|
return clamp(picked.join(' · '), 220);
|
|
}
|
|
|
|
function computeBaseUrl(req, config) {
|
|
const protoHeader = req && req.headers ? req.headers['x-forwarded-proto'] : null;
|
|
const protocol =
|
|
(protoHeader && String(protoHeader).split(',')[0].trim()) ||
|
|
(req && req.connection && req.connection.encrypted ? 'https' : 'http');
|
|
|
|
const hostHeader = req && req.headers ? req.headers.host : null;
|
|
const host = hostHeader || (config && config.host && config.port ? `${config.host}:${config.port}` : '');
|
|
return `${protocol}://${host}`;
|
|
}
|
|
|
|
function generateRichSnippet(options) {
|
|
const key = options && options.key ? String(options.key) : '';
|
|
const content = options && options.content ? String(options.content) : '';
|
|
const req = options && options.req ? options.req : null;
|
|
const config = options && options.config ? options.config : null;
|
|
|
|
const kind = guessKind(content);
|
|
const summary = buildSummary(content) || `A ${kind.toLowerCase()} on hastebin.`;
|
|
const baseUrl = computeBaseUrl(req, config);
|
|
const url = key ? `${baseUrl}/${encodeURIComponent(key)}` : baseUrl;
|
|
|
|
const title = clamp(`${kind} · ${key}`, 70);
|
|
const description = clamp(summary, 200);
|
|
|
|
const jsonLd = {
|
|
'@context': 'https://schema.org',
|
|
'@type': 'CreativeWork',
|
|
name: title,
|
|
description: description,
|
|
url: url
|
|
};
|
|
|
|
return {
|
|
title,
|
|
description,
|
|
url,
|
|
ogType: 'article',
|
|
jsonLd,
|
|
escapeHtmlAttr
|
|
};
|
|
}
|
|
|
|
module.exports = {
|
|
generateRichSnippet,
|
|
escapeHtmlAttr
|
|
};
|
|
|