@thecodingwhale/cv-processor
Version:
CV Processor to extract structured data from PDF resumes using TypeScript
270 lines (269 loc) • 13.9 kB
JavaScript
;
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
if (k2 === undefined) k2 = k;
var desc = Object.getOwnPropertyDescriptor(m, k);
if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
desc = { enumerable: true, get: function() { return m[k]; } };
}
Object.defineProperty(o, k2, desc);
}) : (function(o, m, k, k2) {
if (k2 === undefined) k2 = k;
o[k2] = m[k];
}));
var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
Object.defineProperty(o, "default", { enumerable: true, value: v });
}) : function(o, v) {
o["default"] = v;
});
var __importStar = (this && this.__importStar) || (function () {
var ownKeys = function(o) {
ownKeys = Object.getOwnPropertyNames || function (o) {
var ar = [];
for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
return ar;
};
return ownKeys(o);
};
return function (mod) {
if (mod && mod.__esModule) return mod;
var result = {};
if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
__setModuleDefault(result, mod);
return result;
};
})();
Object.defineProperty(exports, "__esModule", { value: true });
exports.ReportGenerator = void 0;
const fs = __importStar(require("fs"));
const glob_1 = require("glob");
const path = __importStar(require("path"));
/**
* ReportGenerator class is responsible for generating markdown reports
* from CV processing results, including token usage metrics.
*/
class ReportGenerator {
/**
* Generate a report for a specific output directory
*/
static async generateReport(outputDir, verbose = false) {
if (verbose) {
console.log(`Generating report for ${outputDir}`);
}
try {
// Find all JSON files in the directory
const jsonFiles = await (0, glob_1.glob)(`${outputDir}/*.json`);
if (jsonFiles.length === 0) {
return 'No JSON files found';
}
// Load all CV data files
const allData = [];
for (const file of jsonFiles) {
try {
const data = JSON.parse(fs.readFileSync(file, 'utf8'));
// Skip files without proper metadata
if (!data.metadata)
continue;
allData.push({
cvData: data,
file: path.basename(file),
provider: data.metadata.provider || 'unknown',
model: data.metadata.model || 'default',
time: data.metadata.processingTime || 0,
});
}
catch (error) {
console.error(`Error loading data file ${file}:`, error);
}
}
if (allData.length === 0) {
return 'No valid data files found';
}
// Get the CV name from the directory
const cvName = path.basename(outputDir).split('_')[0];
// Get the date
const dateMatch = path.basename(outputDir).match(/(\d{4}-\d{2}-\d{2})/);
const date = dateMatch
? dateMatch[1]
: new Date().toISOString().split('T')[0];
// Calculate total execution time
const totalTime = allData.reduce((sum, data) => sum + data.time, 0);
// Separate successful and failed executions
const successfulExecutions = allData.filter((data) => data.cvData.metadata && data.cvData.metadata.accuracy);
// For now we don't have failed executions in our data structure
// If needed, we can detect them based on missing accuracy or other criteria
const failedExecutions = [];
// Calculate success rate
const totalProviders = successfulExecutions.length + failedExecutions.length;
const successRate = totalProviders > 0
? (successfulExecutions.length / totalProviders) * 100
: 0;
// Start building the markdown report
let report = `# CV Processing Report\n\n`;
report += `**CV**: ${cvName}.pdf\n`;
report += `**Date**: ${date}\n`;
report += `**Total Execution Time**: ${totalTime.toFixed(2)} seconds\n\n`;
// Summary section
report += `## Summary\n\n`;
report += `- **Total Providers**: ${totalProviders}\n`;
report += `- **Successful**: ${successfulExecutions.length}\n`;
report += `- **Failed**: ${failedExecutions.length}\n`;
report += `- **Success Rate**: ${successRate.toFixed(1)}%\n`;
// Successful executions section
report += `## Successful Executions\n\n`;
report += `| Provider | Model | Time (s) | Accuracy | Token Usage | Est. Cost | Output File |\n`;
report += `|----------|-------|----------|----------|-------------|-----------|-------------|\n`;
for (const execution of successfulExecutions) {
const accuracy = execution.cvData.metadata?.accuracy?.overall ?? 0;
// Token usage information
const tokenUsage = execution.cvData.metadata?.tokenUsage;
const tokenCount = tokenUsage?.totalTokens ?? 'N/A';
const estCost = tokenUsage?.estimatedCost
? `$${tokenUsage.estimatedCost.toFixed(4)}`
: 'N/A';
report += `| ${execution.provider} | ${execution.model} | ${execution.time.toFixed(2)} | ${accuracy}% | ${tokenCount} | ${estCost} | [View](./${execution.file}) |\n`;
}
report += '\n';
// Failed executions section (if any)
if (failedExecutions.length > 0) {
report += `## Failed Executions\n\n`;
report += `| Provider | Model | Error |\n`;
report += `|----------|-------|-------|\n`;
for (const execution of failedExecutions) {
const errorMessage = execution.error.substring(0, 50) + '...';
report += `| ${execution.provider} | ${execution.model} | ${errorMessage} |\n`;
}
report += '\n';
}
// Performance comparison section
report += `## Performance Comparison\n\n`;
const fastest = [...successfulExecutions].sort((a, b) => a.time - b.time)[0];
const slowest = [...successfulExecutions].sort((a, b) => b.time - a.time)[0];
const avgTime = totalTime / successfulExecutions.length;
report += `- **Fastest**: ${fastest.provider} (${fastest.model}) - ${fastest.time.toFixed(2)}s\n`;
report += `- **Slowest**: ${slowest.provider} (${slowest.model}) - ${slowest.time.toFixed(2)}s\n`;
report += `- **Average Time**: ${avgTime.toFixed(2)}s\n\n`;
// Add accuracy comparison
report += `### Accuracy Comparison\n\n`;
report += `| Provider | Model | Accuracy | Token Usage | Cost |\n`;
report += `|----------|-------|----------|-------------|------|\n`;
successfulExecutions.forEach((execution) => {
const accuracy = execution.cvData.metadata?.accuracy?.overall ?? 0;
const tokenUsage = execution.cvData.metadata?.tokenUsage;
const cost = tokenUsage?.estimatedCost
? `$${tokenUsage.estimatedCost.toFixed(4)}`
: 'N/A';
report += `| ${execution.provider} | ${execution.model} | ${accuracy}% | ${tokenUsage?.totalTokens ?? 'N/A'} | ${cost} |\n`;
});
// Add token usage comparison
report += `\n### Token Usage Comparison\n\n`;
report += `| Provider | Model | Input Tokens | Output Tokens | Total Tokens | Cost |\n`;
report += `|----------|-------|--------------|---------------|--------------|------|\n`;
successfulExecutions.forEach((execution) => {
const tokenUsage = execution.cvData.metadata?.tokenUsage;
const cost = tokenUsage?.estimatedCost
? `$${tokenUsage.estimatedCost.toFixed(4)}`
: 'N/A';
report += `| ${execution.provider} | ${execution.model} | ${tokenUsage?.inputTokens ?? 'N/A'} | ${tokenUsage?.outputTokens ?? 'N/A'} | ${tokenUsage?.totalTokens ?? 'N/A'} | ${cost} |\n`;
});
// Add emptiness percentage comparison
report += `\n### Field Emptiness Comparison\n\n`;
report += `| Provider | Model | Populated Fields | Total Fields | Emptiness % |\n`;
report += `|----------|-------|-----------------|--------------|------------|\n`;
successfulExecutions.forEach((execution) => {
const emptinessPercentage = execution.cvData.metadata?.emptinessPercentage;
const percentage = emptinessPercentage?.percentage ?? 'N/A';
const nonEmptyFields = emptinessPercentage?.nonEmptyFields ?? 'N/A';
const totalFields = emptinessPercentage?.totalFields ?? 'N/A';
report += `| ${execution.provider} | ${execution.model || 'default'} | ${nonEmptyFields} | ${totalFields} | ${typeof percentage === 'number'
? `${(percentage * 100).toFixed(1)}%`
: percentage} |\n`;
});
// Sort by accuracy
const sortedByAccuracy = [...successfulExecutions].sort((a, b) => {
const accuracyA = a.cvData.metadata?.accuracy?.overall ?? 0;
const accuracyB = b.cvData.metadata?.accuracy?.overall ?? 0;
return accuracyB - accuracyA;
});
// Add accuracy details
report += `\n### Accuracy Details\n\n`;
sortedByAccuracy.forEach((execution) => {
const accuracy = execution.cvData.metadata?.accuracy;
const emptinessPercentage = execution.cvData.metadata?.emptinessPercentage;
if (!accuracy)
return;
report += `#### ${execution.provider} (${execution.model})\n`;
report += `- Overall Accuracy: ${accuracy.overall}%\n`;
if (accuracy.fieldAccuracy) {
report += `- Field Accuracy: ${accuracy.fieldAccuracy}%\n`;
}
report += `- Completeness: ${accuracy.completeness}%\n`;
if (accuracy.structuralFidelity) {
report += `- Structural Fidelity: ${accuracy.structuralFidelity}%\n`;
}
// Add emptiness percentage information if available
if (emptinessPercentage) {
report += `- Field Emptiness: ${emptinessPercentage.percentage}% (${emptinessPercentage.nonEmptyFields}/${emptinessPercentage.totalFields} fields populated)\n`;
}
if (accuracy.missingFields?.length) {
report += `- Missing Fields: ${accuracy.missingFields.join(', ')}\n`;
}
report += '\n';
});
// Sort by token usage
const sortedByTokens = [...successfulExecutions].sort((a, b) => {
const accuracyA = a.cvData.metadata?.accuracy?.overall ?? 0;
const accuracyB = b.cvData.metadata?.accuracy?.overall ?? 0;
return accuracyB - accuracyA;
});
// Add token usage details
report += `\n### Token Usage Details\n\n`;
sortedByTokens.forEach((execution) => {
const tokenUsage = execution.cvData.metadata?.tokenUsage;
if (!tokenUsage)
return;
report += `#### ${execution.provider} (${execution.model})\n`;
report += `- Input Tokens: ${tokenUsage.inputTokens}\n`;
report += `- Output Tokens: ${tokenUsage.outputTokens}\n`;
report += `- Total Tokens: ${tokenUsage.totalTokens}\n`;
if (tokenUsage.estimatedCost) {
report += `- Estimated Cost: $${tokenUsage.estimatedCost.toFixed(4)}\n`;
}
report += '\n';
});
// Add best accuracy summary
const bestAccuracy = sortedByAccuracy[0];
if (bestAccuracy?.cvData.metadata?.accuracy) {
report += `\n### Best Accuracy\n`;
report += `Provider: ${bestAccuracy.provider}\n`;
report += `Model: ${bestAccuracy.model}\n`;
report += `Accuracy: ${bestAccuracy.cvData.metadata.accuracy.overall}%\n\n`;
}
return report;
}
catch (error) {
console.error(`Error generating report: ${error}`);
return `Error generating report: ${error}`;
}
}
/**
* Save the report to a file
*/
static saveReport(report, outputDir) {
try {
const reportPath = path.join(outputDir, 'report.md');
fs.writeFileSync(reportPath, report);
console.log(`Report saved to ${reportPath}`);
}
catch (error) {
console.error(`Error saving report: ${error}`);
}
}
/**
* Generate and save a report for a specific directory
*/
static async generateAndSaveReport(outputDir, verbose = false) {
const report = await this.generateReport(outputDir, verbose);
this.saveReport(report, outputDir);
}
}
exports.ReportGenerator = ReportGenerator;