@thecodingwhale/cv-processor
Version:
CV Processor to extract structured data from PDF resumes using TypeScript
447 lines (420 loc) • 14.7 kB
JavaScript
"use strict";
var __importDefault = (this && this.__importDefault) || function (mod) {
return (mod && mod.__esModule) ? mod : { "default": mod };
};
Object.defineProperty(exports, "__esModule", { value: true });
exports.renderChartsFromReport = renderChartsFromReport;
exports.createHtmlReport = createHtmlReport;
const child_process_1 = require("child_process");
const fs_1 = __importDefault(require("fs"));
const path_1 = __importDefault(require("path"));
const util_1 = require("util");
const execAsync = (0, util_1.promisify)(child_process_1.exec);
/**
* Renders Mermaid charts from a markdown report to static PNG images
* Requires @mermaid-js/mermaid-cli to be installed
*/
async function renderChartsFromReport(reportPath, outputDir) {
// Ensure output directory exists
if (!fs_1.default.existsSync(outputDir)) {
fs_1.default.mkdirSync(outputDir, { recursive: true });
}
// Read the markdown report
const markdown = fs_1.default.readFileSync(reportPath, 'utf-8');
// Extract all Mermaid diagram blocks
const diagramRegex = /```mermaid\n([\s\S]*?)```/g;
const diagrams = [];
let match;
while ((match = diagramRegex.exec(markdown)) !== null) {
diagrams.push({
content: match[1],
index: diagrams.length,
});
}
console.log(`Found ${diagrams.length} Mermaid diagrams in the report`);
if (diagrams.length === 0) {
return [];
}
// Create temporary files for each diagram and render them
const renderedImages = [];
for (const diagram of diagrams) {
const tempFilePath = path_1.default.join(outputDir, `temp_diagram_${diagram.index}.mmd`);
const outputPath = path_1.default.join(outputDir, `chart_${diagram.index}.png`);
// Write diagram to temp file
fs_1.default.writeFileSync(tempFilePath, diagram.content);
try {
// Render the diagram using Mermaid CLI
await execAsync(`npx mmdc -i ${tempFilePath} -o ${outputPath} -b transparent -w 800`);
console.log(`Rendered diagram ${diagram.index} to ${outputPath}`);
renderedImages.push(outputPath);
}
catch (error) {
console.error(`Error rendering diagram ${diagram.index}:`, error);
}
// Clean up temp file
fs_1.default.unlinkSync(tempFilePath);
}
return renderedImages;
}
/**
* Creates an HTML version of the report with embedded charts
*/
async function createHtmlReport(markdownPath, chartOutputDir) {
// First render the charts
const chartPaths = await renderChartsFromReport(markdownPath, chartOutputDir);
// Read the markdown report
const markdown = fs_1.default.readFileSync(markdownPath, 'utf-8');
// Create HTML template
const htmlPath = markdownPath.replace('.md', '.html');
// Parse the markdown to extract sections
const sections = [];
// Extract all section headings
const lines = markdown.split('\n');
let currentSection = null;
for (let i = 0; i < lines.length; i++) {
const line = lines[i];
// Check for headings
const h1Match = line.match(/^# (.+)$/);
const h2Match = line.match(/^## (.+)$/);
const h3Match = line.match(/^### (.+)$/);
if (h1Match || h2Match || h3Match) {
// Save previous section if exists
if (currentSection) {
sections.push(currentSection);
}
// Start new section
const title = h1Match ? h1Match[1] : h2Match ? h2Match[1] : h3Match[1];
const level = h1Match ? 1 : h2Match ? 2 : 3;
currentSection = {
title,
level,
content: line + '\n',
};
}
else if (currentSection) {
// Add line to current section
currentSection.content += line + '\n';
// Check if this section contains a mermaid chart
if (line.includes('```mermaid')) {
const chartIndexMatch = currentSection.content.match(/```mermaid[\s\S]*?```/g);
if (chartIndexMatch) {
const chartCount = chartIndexMatch.length;
const sectionChartIndex = sections.filter((s) => s.chartIndex !== undefined).length;
currentSection.chartIndex = sectionChartIndex;
}
}
}
}
// Add the last section
if (currentSection) {
sections.push(currentSection);
}
// Replace Mermaid code blocks with image references
let chartIndex = 0;
// Process content for each section
for (let i = 0; i < sections.length; i++) {
if (sections[i].chartIndex !== undefined) {
// This section has a chart
if (chartIndex < chartPaths.length) {
const imgPath = chartPaths[chartIndex];
const imgTag = `<img src="${path_1.default.relative(path_1.default.dirname(htmlPath), imgPath)}" alt="Chart ${chartIndex}" class="chart-image" />`;
// Replace the mermaid code block with the image
sections[i].content = sections[i].content.replace(/```mermaid[\s\S]*?```/, imgTag);
chartIndex++;
}
}
// Convert tables to HTML tables
sections[i].content = convertMarkdownTablesToHTML(sections[i].content);
}
// Create HTML with improved styling for presentation
const html = `
<html>
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>CV Processing Report</title>
<style>
:root {
--primary-color: #2c3e50;
--secondary-color: #3498db;
--accent-color: #e74c3c;
--background-color: #f8f9fa;
--card-background: #ffffff;
--text-color: #333333;
--border-color: #e0e0e0;
}
body {
font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, Oxygen, Ubuntu, Cantarell, 'Open Sans', 'Helvetica Neue', sans-serif;
line-height: 1.6;
color: var(--text-color);
background-color: var(--background-color);
margin: 0;
padding: 0;
}
.container {
max-width: 1200px;
margin: 0 auto;
padding: 20px;
}
header {
background-color: var(--primary-color);
color: white;
padding: 20px 0;
margin-bottom: 30px;
text-align: center;
}
h1 {
margin: 0;
padding: 0;
font-size: 2.5rem;
}
h2 {
color: var(--primary-color);
border-bottom: 2px solid var(--secondary-color);
padding-bottom: 10px;
margin-top: 40px;
}
h3 {
color: var(--primary-color);
margin-top: 30px;
}
.section {
background-color: var(--card-background);
border-radius: 8px;
box-shadow: 0 2px 10px rgba(0,0,0,0.05);
margin-bottom: 30px;
padding: 20px;
overflow: hidden;
}
.section-header {
margin-top: 0;
color: var(--primary-color);
}
.flex-container {
display: flex;
flex-wrap: wrap;
gap: 20px;
margin: 20px 0;
}
.chart-container {
flex: 1;
min-width: 300px;
text-align: center;
}
.table-container {
flex: 1;
min-width: 300px;
overflow-x: auto;
}
table {
border-collapse: collapse;
width: 100%;
margin: 20px 0;
}
th, td {
border: 1px solid var(--border-color);
padding: 12px 15px;
text-align: left;
}
th {
background-color: var(--secondary-color);
color: white;
font-weight: 600;
}
tr:nth-child(even) {
background-color: rgba(0,0,0,0.02);
}
tr:hover {
background-color: rgba(0,0,0,0.05);
}
.chart-image {
max-width: 100%;
height: auto;
border-radius: 5px;
box-shadow: 0 2px 5px rgba(0,0,0,0.1);
}
code {
background-color: #f5f5f5;
padding: 2px 4px;
border-radius: 3px;
}
.meta-info {
display: flex;
justify-content: space-around;
background-color: var(--card-background);
border-radius: 8px;
padding: 15px;
margin-bottom: 30px;
box-shadow: 0 2px 10px rgba(0,0,0,0.05);
}
.meta-item {
text-align: center;
}
.meta-item strong {
display: block;
font-size: 1.2rem;
color: var(--secondary-color);
}
.recommendations {
background-color: #f8f5e6;
border-left: 4px solid #f1c40f;
padding: 15px;
margin: 20px 0;
}
.recommendation-item {
margin-bottom: 15px;
}
@media (max-width: 768px) {
.flex-container {
flex-direction: column;
}
.chart-container, .table-container {
width: 100%;
}
}
</style>
</head>
<body>
<header>
<div class="container">
<h1>CV Processing Report</h1>
</div>
</header>
<div class="container">
${generateHTMLContent(sections)}
</div>
</body>
</html>`;
// Write HTML file
fs_1.default.writeFileSync(htmlPath, html);
console.log(`Created HTML report at ${htmlPath}`);
return htmlPath;
}
/**
* Generates structured HTML content from markdown sections
*/
function generateHTMLContent(sections) {
let html = '';
// Find and process summary section
const summarySection = sections.find((s) => s.title === 'Merged CV Processing Report');
if (summarySection) {
// Extract metadata like date, samples, runs
const dateMatch = summarySection.content.match(/\*\*Date\*\*: (.*)/);
const samplesMatch = summarySection.content.match(/\*\*Total CV Samples\*\*: (.*)/);
const runsMatch = summarySection.content.match(/\*\*Total Runs Analyzed\*\*: (.*)/);
html += '<div class="meta-info">';
if (dateMatch) {
html += `<div class="meta-item"><span>Date</span><strong>${dateMatch[1]}</strong></div>`;
}
if (samplesMatch) {
html += `<div class="meta-item"><span>Total CV Samples</span><strong>${samplesMatch[1]}</strong></div>`;
}
if (runsMatch) {
html += `<div class="meta-item"><span>Total Runs Analyzed</span><strong>${runsMatch[1]}</strong></div>`;
}
html += '</div>';
}
// Process each section
for (let i = 0; i < sections.length; i++) {
const section = sections[i];
// Skip the already processed summary section
if (section.title === 'Merged CV Processing Report')
continue;
// Skip "All Runs" section as it's usually too large
if (section.title === 'All Runs')
continue;
// Create section
html += `<div class="section">`;
html += `<h${section.level} class="section-header">${section.title}</h${section.level}>`;
// Special handling for sections with charts
if (section.chartIndex !== undefined) {
// Extract table from content
const tableMatch = section.content.match(/<table>[\s\S]*?<\/table>/);
if (tableMatch) {
html += `<div class="flex-container">`;
html += `<div class="chart-container">`;
// Find and extract the img tag
const imgMatch = section.content.match(/<img[^>]+>/);
if (imgMatch) {
html += imgMatch[0];
}
html += `</div>`;
html += `<div class="table-container">`;
html += tableMatch[0];
html += `</div>`;
html += `</div>`;
// Remove the table and img from content to avoid duplication
let remainingContent = section.content
.replace(/<img[^>]+>/, '')
.replace(/<table>[\s\S]*?<\/table>/, '');
// Add any remaining content
if (remainingContent.trim()) {
html += remainingContent;
}
}
else {
// Just add the content with the chart
html += section.content;
}
}
else if (section.title === 'Recommendations') {
// Special handling for recommendations section
html += `<div class="recommendations">`;
// Extract recommendation items
const recommendations = section.content.split(/###\s+/);
for (let j = 1; j < recommendations.length; j++) {
const recContent = recommendations[j];
const recTitle = recContent.split('\n')[0];
const recDetails = recContent.replace(recTitle, '').trim();
html += `<div class="recommendation-item">`;
html += `<h4>${recTitle}</h4>`;
html += `<p>${recDetails}</p>`;
html += `</div>`;
}
html += `</div>`;
}
else {
// Regular section, just add the content
html += section.content;
}
html += `</div>`;
}
return html;
}
/**
* Converts markdown tables to HTML tables
*/
function convertMarkdownTablesToHTML(content) {
let html = content;
// Find all markdown tables
const tableRegex = /\|.*\|\n\|[-:| ]+\|\n(?:\|.*\|\n)+/g;
const tables = content.match(tableRegex);
if (tables) {
for (const table of tables) {
const rows = table.trim().split('\n');
// Start HTML table
let htmlTable = '<table>\n<thead>\n<tr>\n';
// Process header
const headerCells = rows[0]
.split('|')
.filter((cell) => cell.trim() !== '');
for (const cell of headerCells) {
htmlTable += `<th>${cell.trim()}</th>\n`;
}
htmlTable += '</tr>\n</thead>\n<tbody>\n';
// Skip header and separator rows, process data rows
for (let i = 2; i < rows.length; i++) {
htmlTable += '<tr>\n';
const cells = rows[i].split('|').filter((cell) => cell.trim() !== '');
for (const cell of cells) {
htmlTable += `<td>${cell.trim()}</td>\n`;
}
htmlTable += '</tr>\n';
}
htmlTable += '</tbody>\n</table>';
// Replace the markdown table with the HTML table
html = html.replace(table, htmlTable);
}
}
return html;
}