UNPKG

@thecodingwhale/cv-processor

Version:

CV Processor to extract structured data from PDF resumes using TypeScript

447 lines (420 loc) 14.7 kB
"use strict"; var __importDefault = (this && this.__importDefault) || function (mod) { return (mod && mod.__esModule) ? mod : { "default": mod }; }; Object.defineProperty(exports, "__esModule", { value: true }); exports.renderChartsFromReport = renderChartsFromReport; exports.createHtmlReport = createHtmlReport; const child_process_1 = require("child_process"); const fs_1 = __importDefault(require("fs")); const path_1 = __importDefault(require("path")); const util_1 = require("util"); const execAsync = (0, util_1.promisify)(child_process_1.exec); /** * Renders Mermaid charts from a markdown report to static PNG images * Requires @mermaid-js/mermaid-cli to be installed */ async function renderChartsFromReport(reportPath, outputDir) { // Ensure output directory exists if (!fs_1.default.existsSync(outputDir)) { fs_1.default.mkdirSync(outputDir, { recursive: true }); } // Read the markdown report const markdown = fs_1.default.readFileSync(reportPath, 'utf-8'); // Extract all Mermaid diagram blocks const diagramRegex = /```mermaid\n([\s\S]*?)```/g; const diagrams = []; let match; while ((match = diagramRegex.exec(markdown)) !== null) { diagrams.push({ content: match[1], index: diagrams.length, }); } console.log(`Found ${diagrams.length} Mermaid diagrams in the report`); if (diagrams.length === 0) { return []; } // Create temporary files for each diagram and render them const renderedImages = []; for (const diagram of diagrams) { const tempFilePath = path_1.default.join(outputDir, `temp_diagram_${diagram.index}.mmd`); const outputPath = path_1.default.join(outputDir, `chart_${diagram.index}.png`); // Write diagram to temp file fs_1.default.writeFileSync(tempFilePath, diagram.content); try { // Render the diagram using Mermaid CLI await execAsync(`npx mmdc -i ${tempFilePath} -o ${outputPath} -b transparent -w 800`); console.log(`Rendered diagram ${diagram.index} to ${outputPath}`); renderedImages.push(outputPath); } catch (error) { console.error(`Error rendering diagram ${diagram.index}:`, error); } // Clean up temp file fs_1.default.unlinkSync(tempFilePath); } return renderedImages; } /** * Creates an HTML version of the report with embedded charts */ async function createHtmlReport(markdownPath, chartOutputDir) { // First render the charts const chartPaths = await renderChartsFromReport(markdownPath, chartOutputDir); // Read the markdown report const markdown = fs_1.default.readFileSync(markdownPath, 'utf-8'); // Create HTML template const htmlPath = markdownPath.replace('.md', '.html'); // Parse the markdown to extract sections const sections = []; // Extract all section headings const lines = markdown.split('\n'); let currentSection = null; for (let i = 0; i < lines.length; i++) { const line = lines[i]; // Check for headings const h1Match = line.match(/^# (.+)$/); const h2Match = line.match(/^## (.+)$/); const h3Match = line.match(/^### (.+)$/); if (h1Match || h2Match || h3Match) { // Save previous section if exists if (currentSection) { sections.push(currentSection); } // Start new section const title = h1Match ? h1Match[1] : h2Match ? h2Match[1] : h3Match[1]; const level = h1Match ? 1 : h2Match ? 2 : 3; currentSection = { title, level, content: line + '\n', }; } else if (currentSection) { // Add line to current section currentSection.content += line + '\n'; // Check if this section contains a mermaid chart if (line.includes('```mermaid')) { const chartIndexMatch = currentSection.content.match(/```mermaid[\s\S]*?```/g); if (chartIndexMatch) { const chartCount = chartIndexMatch.length; const sectionChartIndex = sections.filter((s) => s.chartIndex !== undefined).length; currentSection.chartIndex = sectionChartIndex; } } } } // Add the last section if (currentSection) { sections.push(currentSection); } // Replace Mermaid code blocks with image references let chartIndex = 0; // Process content for each section for (let i = 0; i < sections.length; i++) { if (sections[i].chartIndex !== undefined) { // This section has a chart if (chartIndex < chartPaths.length) { const imgPath = chartPaths[chartIndex]; const imgTag = `<img src="${path_1.default.relative(path_1.default.dirname(htmlPath), imgPath)}" alt="Chart ${chartIndex}" class="chart-image" />`; // Replace the mermaid code block with the image sections[i].content = sections[i].content.replace(/```mermaid[\s\S]*?```/, imgTag); chartIndex++; } } // Convert tables to HTML tables sections[i].content = convertMarkdownTablesToHTML(sections[i].content); } // Create HTML with improved styling for presentation const html = `<!DOCTYPE html> <html> <head> <meta charset="UTF-8"> <meta name="viewport" content="width=device-width, initial-scale=1.0"> <title>CV Processing Report</title> <style> :root { --primary-color: #2c3e50; --secondary-color: #3498db; --accent-color: #e74c3c; --background-color: #f8f9fa; --card-background: #ffffff; --text-color: #333333; --border-color: #e0e0e0; } body { font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, Oxygen, Ubuntu, Cantarell, 'Open Sans', 'Helvetica Neue', sans-serif; line-height: 1.6; color: var(--text-color); background-color: var(--background-color); margin: 0; padding: 0; } .container { max-width: 1200px; margin: 0 auto; padding: 20px; } header { background-color: var(--primary-color); color: white; padding: 20px 0; margin-bottom: 30px; text-align: center; } h1 { margin: 0; padding: 0; font-size: 2.5rem; } h2 { color: var(--primary-color); border-bottom: 2px solid var(--secondary-color); padding-bottom: 10px; margin-top: 40px; } h3 { color: var(--primary-color); margin-top: 30px; } .section { background-color: var(--card-background); border-radius: 8px; box-shadow: 0 2px 10px rgba(0,0,0,0.05); margin-bottom: 30px; padding: 20px; overflow: hidden; } .section-header { margin-top: 0; color: var(--primary-color); } .flex-container { display: flex; flex-wrap: wrap; gap: 20px; margin: 20px 0; } .chart-container { flex: 1; min-width: 300px; text-align: center; } .table-container { flex: 1; min-width: 300px; overflow-x: auto; } table { border-collapse: collapse; width: 100%; margin: 20px 0; } th, td { border: 1px solid var(--border-color); padding: 12px 15px; text-align: left; } th { background-color: var(--secondary-color); color: white; font-weight: 600; } tr:nth-child(even) { background-color: rgba(0,0,0,0.02); } tr:hover { background-color: rgba(0,0,0,0.05); } .chart-image { max-width: 100%; height: auto; border-radius: 5px; box-shadow: 0 2px 5px rgba(0,0,0,0.1); } code { background-color: #f5f5f5; padding: 2px 4px; border-radius: 3px; } .meta-info { display: flex; justify-content: space-around; background-color: var(--card-background); border-radius: 8px; padding: 15px; margin-bottom: 30px; box-shadow: 0 2px 10px rgba(0,0,0,0.05); } .meta-item { text-align: center; } .meta-item strong { display: block; font-size: 1.2rem; color: var(--secondary-color); } .recommendations { background-color: #f8f5e6; border-left: 4px solid #f1c40f; padding: 15px; margin: 20px 0; } .recommendation-item { margin-bottom: 15px; } @media (max-width: 768px) { .flex-container { flex-direction: column; } .chart-container, .table-container { width: 100%; } } </style> </head> <body> <header> <div class="container"> <h1>CV Processing Report</h1> </div> </header> <div class="container"> ${generateHTMLContent(sections)} </div> </body> </html>`; // Write HTML file fs_1.default.writeFileSync(htmlPath, html); console.log(`Created HTML report at ${htmlPath}`); return htmlPath; } /** * Generates structured HTML content from markdown sections */ function generateHTMLContent(sections) { let html = ''; // Find and process summary section const summarySection = sections.find((s) => s.title === 'Merged CV Processing Report'); if (summarySection) { // Extract metadata like date, samples, runs const dateMatch = summarySection.content.match(/\*\*Date\*\*: (.*)/); const samplesMatch = summarySection.content.match(/\*\*Total CV Samples\*\*: (.*)/); const runsMatch = summarySection.content.match(/\*\*Total Runs Analyzed\*\*: (.*)/); html += '<div class="meta-info">'; if (dateMatch) { html += `<div class="meta-item"><span>Date</span><strong>${dateMatch[1]}</strong></div>`; } if (samplesMatch) { html += `<div class="meta-item"><span>Total CV Samples</span><strong>${samplesMatch[1]}</strong></div>`; } if (runsMatch) { html += `<div class="meta-item"><span>Total Runs Analyzed</span><strong>${runsMatch[1]}</strong></div>`; } html += '</div>'; } // Process each section for (let i = 0; i < sections.length; i++) { const section = sections[i]; // Skip the already processed summary section if (section.title === 'Merged CV Processing Report') continue; // Skip "All Runs" section as it's usually too large if (section.title === 'All Runs') continue; // Create section html += `<div class="section">`; html += `<h${section.level} class="section-header">${section.title}</h${section.level}>`; // Special handling for sections with charts if (section.chartIndex !== undefined) { // Extract table from content const tableMatch = section.content.match(/<table>[\s\S]*?<\/table>/); if (tableMatch) { html += `<div class="flex-container">`; html += `<div class="chart-container">`; // Find and extract the img tag const imgMatch = section.content.match(/<img[^>]+>/); if (imgMatch) { html += imgMatch[0]; } html += `</div>`; html += `<div class="table-container">`; html += tableMatch[0]; html += `</div>`; html += `</div>`; // Remove the table and img from content to avoid duplication let remainingContent = section.content .replace(/<img[^>]+>/, '') .replace(/<table>[\s\S]*?<\/table>/, ''); // Add any remaining content if (remainingContent.trim()) { html += remainingContent; } } else { // Just add the content with the chart html += section.content; } } else if (section.title === 'Recommendations') { // Special handling for recommendations section html += `<div class="recommendations">`; // Extract recommendation items const recommendations = section.content.split(/###\s+/); for (let j = 1; j < recommendations.length; j++) { const recContent = recommendations[j]; const recTitle = recContent.split('\n')[0]; const recDetails = recContent.replace(recTitle, '').trim(); html += `<div class="recommendation-item">`; html += `<h4>${recTitle}</h4>`; html += `<p>${recDetails}</p>`; html += `</div>`; } html += `</div>`; } else { // Regular section, just add the content html += section.content; } html += `</div>`; } return html; } /** * Converts markdown tables to HTML tables */ function convertMarkdownTablesToHTML(content) { let html = content; // Find all markdown tables const tableRegex = /\|.*\|\n\|[-:| ]+\|\n(?:\|.*\|\n)+/g; const tables = content.match(tableRegex); if (tables) { for (const table of tables) { const rows = table.trim().split('\n'); // Start HTML table let htmlTable = '<table>\n<thead>\n<tr>\n'; // Process header const headerCells = rows[0] .split('|') .filter((cell) => cell.trim() !== ''); for (const cell of headerCells) { htmlTable += `<th>${cell.trim()}</th>\n`; } htmlTable += '</tr>\n</thead>\n<tbody>\n'; // Skip header and separator rows, process data rows for (let i = 2; i < rows.length; i++) { htmlTable += '<tr>\n'; const cells = rows[i].split('|').filter((cell) => cell.trim() !== ''); for (const cell of cells) { htmlTable += `<td>${cell.trim()}</td>\n`; } htmlTable += '</tr>\n'; } htmlTable += '</tbody>\n</table>'; // Replace the markdown table with the HTML table html = html.replace(table, htmlTable); } } return html; }