resume/docker/generate-pdfs.js

282 lines
9.2 KiB
JavaScript

#!/usr/bin/env node
/**
* PDF Generation Script
*
* Uses Puppeteer to render each HTML page to PDF.
* Run with: node generate-pdfs.js
*
* Prerequisites: npm install puppeteer
*/
const puppeteer = require('puppeteer');
const http = require('http');
const fs = require('fs');
const path = require('path');
// Configuration
const SITE_DIR = path.join(__dirname, 'resume');
const PDF_DIR = path.join(SITE_DIR, 'pdfs');
const PORT = 8765;
// MIME types for static file serving
const MIME_TYPES = {
'.html': 'text/html',
'.css': 'text/css',
'.js': 'application/javascript',
'.json': 'application/json',
'.png': 'image/png',
'.jpg': 'image/jpeg',
'.jpeg': 'image/jpeg',
'.gif': 'image/gif',
'.svg': 'image/svg+xml',
'.ico': 'image/x-icon',
'.woff': 'font/woff',
'.woff2': 'font/woff2',
'.ttf': 'font/ttf',
'.eot': 'application/vnd.ms-fontobject',
};
/**
* Find all HTML files in a directory recursively
*/
function findHtmlFiles(dir, baseDir = dir) {
const files = [];
const entries = fs.readdirSync(dir, { withFileTypes: true });
for (const entry of entries) {
const fullPath = path.join(dir, entry.name);
if (entry.isDirectory()) {
// Skip pdfs directory, node_modules, and hidden directories
if (entry.name === 'pdfs' || entry.name === 'node_modules' || entry.name.startsWith('.')) {
continue;
}
files.push(...findHtmlFiles(fullPath, baseDir));
} else if (entry.isFile() && entry.name.endsWith('.html')) {
// Skip template files
if (entry.name.includes('template') || entry.name.includes('with-includes')) {
continue;
}
const relativePath = path.relative(baseDir, fullPath);
files.push(relativePath);
}
}
return files;
}
/**
* Create a simple static file server
*/
function createServer() {
return http.createServer((req, res) => {
let urlPath = req.url.split('?')[0];
if (urlPath === '/') urlPath = '/index.html';
const filePath = path.join(SITE_DIR, urlPath);
const ext = path.extname(filePath);
const contentType = MIME_TYPES[ext] || 'application/octet-stream';
fs.readFile(filePath, (err, data) => {
if (err) {
res.writeHead(404);
res.end('Not found');
return;
}
res.writeHead(200, { 'Content-Type': contentType });
res.end(data);
});
});
}
/**
* Ensure directory exists
*/
function ensureDir(dirPath) {
if (!fs.existsSync(dirPath)) {
fs.mkdirSync(dirPath, { recursive: true });
}
}
/**
* Generate PDF for a single HTML file
*/
async function generatePdf(browser, htmlFile) {
const page = await browser.newPage();
// Convert file path to URL path
const urlPath = '/' + htmlFile.replace(/\\/g, '/');
const url = `http://localhost:${PORT}${urlPath}`;
// Determine output PDF path
const pdfRelativePath = htmlFile.replace(/\.html$/, '.pdf');
const pdfPath = path.join(PDF_DIR, pdfRelativePath);
// Ensure output directory exists
ensureDir(path.dirname(pdfPath));
try {
// Navigate to the page and wait for content to load
await page.goto(url, {
waitUntil: 'networkidle0',
timeout: 30000
});
// Wait for includes.js to finish loading header and footer
// Check if header-include element exists and has content
try {
await page.waitForFunction(() => {
const headerInclude = document.getElementById('header-include');
const footerInclude = document.getElementById('footer-include');
// If neither element exists, page doesn't use includes - that's fine
if (!headerInclude && !footerInclude) return true;
// If header exists but is empty, wait
if (headerInclude && headerInclude.innerHTML.trim() === '') return false;
// If footer exists but is empty, wait
if (footerInclude && footerInclude.innerHTML.trim() === '') return false;
// If header exists, check if nav is loaded (indicates includes.js finished)
if (headerInclude) {
const nav = headerInclude.querySelector('nav');
if (!nav) return false;
}
return true;
}, {
timeout: 15000,
polling: 100 // Check every 100ms
});
} catch (waitError) {
// If waiting for includes times out, continue anyway
// Some pages might not use includes or might load differently
console.warn(`Warning: Includes may not have fully loaded for ${htmlFile}, continuing anyway...`);
}
// Additional wait for any remaining JavaScript to finish
await page.waitForTimeout(1000);
// Generate PDF with retry logic for transient errors
let retries = 2;
let lastError = null;
while (retries > 0) {
try {
await page.pdf({
path: pdfPath,
format: 'A4',
printBackground: true,
margin: {
top: '20mm',
right: '15mm',
bottom: '20mm',
left: '15mm'
}
});
console.log(`✓ Generated: ${pdfRelativePath}`);
return; // Success, exit retry loop
} catch (pdfError) {
lastError = pdfError;
// Check if it's a recoverable error
if (pdfError.message.includes('detached') ||
pdfError.message.includes('closed') ||
pdfError.message.includes('Target closed')) {
retries--;
if (retries > 0) {
console.warn(`Retrying PDF generation for ${htmlFile} (${retries} attempts remaining)...`);
// Wait a bit before retrying
await page.waitForTimeout(2000);
// Re-navigate to the page if it was closed
try {
await page.goto(url, {
waitUntil: 'networkidle0',
timeout: 30000
});
await page.waitForTimeout(1000);
} catch (navError) {
// If navigation fails, break out of retry loop
break;
}
}
} else {
// Non-recoverable error, don't retry
throw pdfError;
}
}
}
// If we get here, all retries failed
throw lastError || new Error('PDF generation failed after retries');
} catch (error) {
console.error(`✗ Failed: ${htmlFile} - ${error.message}`);
} finally {
try {
await page.close();
} catch (closeError) {
// Ignore close errors - page may have been closed already
// This can happen if the page navigated or was closed during processing
// Common error: "Protocol error: Connection closed" or "Target closed"
if (!closeError.message.includes('closed') && !closeError.message.includes('Target closed')) {
// Only log if it's not a "page already closed" error
console.warn(`Warning closing page for ${htmlFile}: ${closeError.message}`);
}
}
}
}
/**
* Main function
*/
async function main() {
console.log('PDF Generation Script');
console.log('=====================\n');
// Find all HTML files
const htmlFiles = findHtmlFiles(SITE_DIR);
console.log(`Found ${htmlFiles.length} HTML files to process\n`);
if (htmlFiles.length === 0) {
console.log('No HTML files found. Exiting.');
return;
}
// Clean and create PDF directory
if (fs.existsSync(PDF_DIR)) {
fs.rmSync(PDF_DIR, { recursive: true });
}
ensureDir(PDF_DIR);
// Start local server
const server = createServer();
await new Promise(resolve => server.listen(PORT, resolve));
console.log(`Local server started on port ${PORT}\n`);
// Launch browser
const browser = await puppeteer.launch({
headless: 'new',
args: ['--no-sandbox', '--disable-setuid-sandbox']
});
try {
// Generate PDFs for each HTML file
for (const htmlFile of htmlFiles) {
await generatePdf(browser, htmlFile);
}
console.log(`\n✓ PDF generation complete! Files saved to: ${PDF_DIR}`);
} finally {
await browser.close();
server.close();
}
}
// Run the script
main().catch(error => {
console.error('Fatal error:', error);
process.exit(1);
});