282 lines
9.2 KiB
JavaScript
282 lines
9.2 KiB
JavaScript
#!/usr/bin/env node
|
|
/**
|
|
* PDF Generation Script
|
|
*
|
|
* Uses Puppeteer to render each HTML page to PDF.
|
|
* Run with: node generate-pdfs.js
|
|
*
|
|
* Prerequisites: npm install puppeteer
|
|
*/
|
|
|
|
const puppeteer = require('puppeteer');
|
|
const http = require('http');
|
|
const fs = require('fs');
|
|
const path = require('path');
|
|
|
|
// Configuration
|
|
const SITE_DIR = path.join(__dirname, 'resume');
|
|
const PDF_DIR = path.join(SITE_DIR, 'pdfs');
|
|
const PORT = 8765;
|
|
|
|
// MIME types for static file serving
|
|
const MIME_TYPES = {
|
|
'.html': 'text/html',
|
|
'.css': 'text/css',
|
|
'.js': 'application/javascript',
|
|
'.json': 'application/json',
|
|
'.png': 'image/png',
|
|
'.jpg': 'image/jpeg',
|
|
'.jpeg': 'image/jpeg',
|
|
'.gif': 'image/gif',
|
|
'.svg': 'image/svg+xml',
|
|
'.ico': 'image/x-icon',
|
|
'.woff': 'font/woff',
|
|
'.woff2': 'font/woff2',
|
|
'.ttf': 'font/ttf',
|
|
'.eot': 'application/vnd.ms-fontobject',
|
|
};
|
|
|
|
/**
|
|
* Find all HTML files in a directory recursively
|
|
*/
|
|
function findHtmlFiles(dir, baseDir = dir) {
|
|
const files = [];
|
|
const entries = fs.readdirSync(dir, { withFileTypes: true });
|
|
|
|
for (const entry of entries) {
|
|
const fullPath = path.join(dir, entry.name);
|
|
|
|
if (entry.isDirectory()) {
|
|
// Skip pdfs directory, node_modules, and hidden directories
|
|
if (entry.name === 'pdfs' || entry.name === 'node_modules' || entry.name.startsWith('.')) {
|
|
continue;
|
|
}
|
|
files.push(...findHtmlFiles(fullPath, baseDir));
|
|
} else if (entry.isFile() && entry.name.endsWith('.html')) {
|
|
// Skip template files
|
|
if (entry.name.includes('template') || entry.name.includes('with-includes')) {
|
|
continue;
|
|
}
|
|
const relativePath = path.relative(baseDir, fullPath);
|
|
files.push(relativePath);
|
|
}
|
|
}
|
|
|
|
return files;
|
|
}
|
|
|
|
/**
|
|
* Create a simple static file server
|
|
*/
|
|
function createServer() {
|
|
return http.createServer((req, res) => {
|
|
let urlPath = req.url.split('?')[0];
|
|
if (urlPath === '/') urlPath = '/index.html';
|
|
|
|
const filePath = path.join(SITE_DIR, urlPath);
|
|
const ext = path.extname(filePath);
|
|
const contentType = MIME_TYPES[ext] || 'application/octet-stream';
|
|
|
|
fs.readFile(filePath, (err, data) => {
|
|
if (err) {
|
|
res.writeHead(404);
|
|
res.end('Not found');
|
|
return;
|
|
}
|
|
res.writeHead(200, { 'Content-Type': contentType });
|
|
res.end(data);
|
|
});
|
|
});
|
|
}
|
|
|
|
/**
|
|
* Ensure directory exists
|
|
*/
|
|
function ensureDir(dirPath) {
|
|
if (!fs.existsSync(dirPath)) {
|
|
fs.mkdirSync(dirPath, { recursive: true });
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Generate PDF for a single HTML file
|
|
*/
|
|
async function generatePdf(browser, htmlFile) {
|
|
const page = await browser.newPage();
|
|
|
|
// Convert file path to URL path
|
|
const urlPath = '/' + htmlFile.replace(/\\/g, '/');
|
|
const url = `http://localhost:${PORT}${urlPath}`;
|
|
|
|
// Determine output PDF path
|
|
const pdfRelativePath = htmlFile.replace(/\.html$/, '.pdf');
|
|
const pdfPath = path.join(PDF_DIR, pdfRelativePath);
|
|
|
|
// Ensure output directory exists
|
|
ensureDir(path.dirname(pdfPath));
|
|
|
|
try {
|
|
// Navigate to the page and wait for content to load
|
|
await page.goto(url, {
|
|
waitUntil: 'networkidle0',
|
|
timeout: 30000
|
|
});
|
|
|
|
// Wait for includes.js to finish loading header and footer
|
|
// Check if header-include element exists and has content
|
|
try {
|
|
await page.waitForFunction(() => {
|
|
const headerInclude = document.getElementById('header-include');
|
|
const footerInclude = document.getElementById('footer-include');
|
|
|
|
// If neither element exists, page doesn't use includes - that's fine
|
|
if (!headerInclude && !footerInclude) return true;
|
|
|
|
// If header exists but is empty, wait
|
|
if (headerInclude && headerInclude.innerHTML.trim() === '') return false;
|
|
|
|
// If footer exists but is empty, wait
|
|
if (footerInclude && footerInclude.innerHTML.trim() === '') return false;
|
|
|
|
// If header exists, check if nav is loaded (indicates includes.js finished)
|
|
if (headerInclude) {
|
|
const nav = headerInclude.querySelector('nav');
|
|
if (!nav) return false;
|
|
}
|
|
|
|
return true;
|
|
}, {
|
|
timeout: 15000,
|
|
polling: 100 // Check every 100ms
|
|
});
|
|
} catch (waitError) {
|
|
// If waiting for includes times out, continue anyway
|
|
// Some pages might not use includes or might load differently
|
|
console.warn(`Warning: Includes may not have fully loaded for ${htmlFile}, continuing anyway...`);
|
|
}
|
|
|
|
// Additional wait for any remaining JavaScript to finish
|
|
await page.waitForTimeout(1000);
|
|
|
|
// Generate PDF with retry logic for transient errors
|
|
let retries = 2;
|
|
let lastError = null;
|
|
|
|
while (retries > 0) {
|
|
try {
|
|
await page.pdf({
|
|
path: pdfPath,
|
|
format: 'A4',
|
|
printBackground: true,
|
|
margin: {
|
|
top: '20mm',
|
|
right: '15mm',
|
|
bottom: '20mm',
|
|
left: '15mm'
|
|
}
|
|
});
|
|
|
|
console.log(`✓ Generated: ${pdfRelativePath}`);
|
|
return; // Success, exit retry loop
|
|
} catch (pdfError) {
|
|
lastError = pdfError;
|
|
// Check if it's a recoverable error
|
|
if (pdfError.message.includes('detached') ||
|
|
pdfError.message.includes('closed') ||
|
|
pdfError.message.includes('Target closed')) {
|
|
retries--;
|
|
if (retries > 0) {
|
|
console.warn(`Retrying PDF generation for ${htmlFile} (${retries} attempts remaining)...`);
|
|
// Wait a bit before retrying
|
|
await page.waitForTimeout(2000);
|
|
// Re-navigate to the page if it was closed
|
|
try {
|
|
await page.goto(url, {
|
|
waitUntil: 'networkidle0',
|
|
timeout: 30000
|
|
});
|
|
await page.waitForTimeout(1000);
|
|
} catch (navError) {
|
|
// If navigation fails, break out of retry loop
|
|
break;
|
|
}
|
|
}
|
|
} else {
|
|
// Non-recoverable error, don't retry
|
|
throw pdfError;
|
|
}
|
|
}
|
|
}
|
|
|
|
// If we get here, all retries failed
|
|
throw lastError || new Error('PDF generation failed after retries');
|
|
} catch (error) {
|
|
console.error(`✗ Failed: ${htmlFile} - ${error.message}`);
|
|
} finally {
|
|
try {
|
|
await page.close();
|
|
} catch (closeError) {
|
|
// Ignore close errors - page may have been closed already
|
|
// This can happen if the page navigated or was closed during processing
|
|
// Common error: "Protocol error: Connection closed" or "Target closed"
|
|
if (!closeError.message.includes('closed') && !closeError.message.includes('Target closed')) {
|
|
// Only log if it's not a "page already closed" error
|
|
console.warn(`Warning closing page for ${htmlFile}: ${closeError.message}`);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Main function
|
|
*/
|
|
async function main() {
|
|
console.log('PDF Generation Script');
|
|
console.log('=====================\n');
|
|
|
|
// Find all HTML files
|
|
const htmlFiles = findHtmlFiles(SITE_DIR);
|
|
console.log(`Found ${htmlFiles.length} HTML files to process\n`);
|
|
|
|
if (htmlFiles.length === 0) {
|
|
console.log('No HTML files found. Exiting.');
|
|
return;
|
|
}
|
|
|
|
// Clean and create PDF directory
|
|
if (fs.existsSync(PDF_DIR)) {
|
|
fs.rmSync(PDF_DIR, { recursive: true });
|
|
}
|
|
ensureDir(PDF_DIR);
|
|
|
|
// Start local server
|
|
const server = createServer();
|
|
await new Promise(resolve => server.listen(PORT, resolve));
|
|
console.log(`Local server started on port ${PORT}\n`);
|
|
|
|
// Launch browser
|
|
const browser = await puppeteer.launch({
|
|
headless: 'new',
|
|
args: ['--no-sandbox', '--disable-setuid-sandbox']
|
|
});
|
|
|
|
try {
|
|
// Generate PDFs for each HTML file
|
|
for (const htmlFile of htmlFiles) {
|
|
await generatePdf(browser, htmlFile);
|
|
}
|
|
|
|
console.log(`\n✓ PDF generation complete! Files saved to: ${PDF_DIR}`);
|
|
} finally {
|
|
await browser.close();
|
|
server.close();
|
|
}
|
|
}
|
|
|
|
// Run the script
|
|
main().catch(error => {
|
|
console.error('Fatal error:', error);
|
|
process.exit(1);
|
|
});
|
|
|