UNPKG

@thecodingwhale/cv-processor

Version:

CV Processor to extract structured data from PDF resumes using TypeScript

270 lines (269 loc) 13.9 kB
"use strict"; var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) { if (k2 === undefined) k2 = k; var desc = Object.getOwnPropertyDescriptor(m, k); if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) { desc = { enumerable: true, get: function() { return m[k]; } }; } Object.defineProperty(o, k2, desc); }) : (function(o, m, k, k2) { if (k2 === undefined) k2 = k; o[k2] = m[k]; })); var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) { Object.defineProperty(o, "default", { enumerable: true, value: v }); }) : function(o, v) { o["default"] = v; }); var __importStar = (this && this.__importStar) || (function () { var ownKeys = function(o) { ownKeys = Object.getOwnPropertyNames || function (o) { var ar = []; for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k; return ar; }; return ownKeys(o); }; return function (mod) { if (mod && mod.__esModule) return mod; var result = {}; if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]); __setModuleDefault(result, mod); return result; }; })(); Object.defineProperty(exports, "__esModule", { value: true }); exports.ReportGenerator = void 0; const fs = __importStar(require("fs")); const glob_1 = require("glob"); const path = __importStar(require("path")); /** * ReportGenerator class is responsible for generating markdown reports * from CV processing results, including token usage metrics. */ class ReportGenerator { /** * Generate a report for a specific output directory */ static async generateReport(outputDir, verbose = false) { if (verbose) { console.log(`Generating report for ${outputDir}`); } try { // Find all JSON files in the directory const jsonFiles = await (0, glob_1.glob)(`${outputDir}/*.json`); if (jsonFiles.length === 0) { return 'No JSON files found'; } // Load all CV data files const allData = []; for (const file of jsonFiles) { try { const data = JSON.parse(fs.readFileSync(file, 'utf8')); // Skip files without proper metadata if (!data.metadata) continue; allData.push({ cvData: data, file: path.basename(file), provider: data.metadata.provider || 'unknown', model: data.metadata.model || 'default', time: data.metadata.processingTime || 0, }); } catch (error) { console.error(`Error loading data file ${file}:`, error); } } if (allData.length === 0) { return 'No valid data files found'; } // Get the CV name from the directory const cvName = path.basename(outputDir).split('_')[0]; // Get the date const dateMatch = path.basename(outputDir).match(/(\d{4}-\d{2}-\d{2})/); const date = dateMatch ? dateMatch[1] : new Date().toISOString().split('T')[0]; // Calculate total execution time const totalTime = allData.reduce((sum, data) => sum + data.time, 0); // Separate successful and failed executions const successfulExecutions = allData.filter((data) => data.cvData.metadata && data.cvData.metadata.accuracy); // For now we don't have failed executions in our data structure // If needed, we can detect them based on missing accuracy or other criteria const failedExecutions = []; // Calculate success rate const totalProviders = successfulExecutions.length + failedExecutions.length; const successRate = totalProviders > 0 ? (successfulExecutions.length / totalProviders) * 100 : 0; // Start building the markdown report let report = `# CV Processing Report\n\n`; report += `**CV**: ${cvName}.pdf\n`; report += `**Date**: ${date}\n`; report += `**Total Execution Time**: ${totalTime.toFixed(2)} seconds\n\n`; // Summary section report += `## Summary\n\n`; report += `- **Total Providers**: ${totalProviders}\n`; report += `- **Successful**: ${successfulExecutions.length}\n`; report += `- **Failed**: ${failedExecutions.length}\n`; report += `- **Success Rate**: ${successRate.toFixed(1)}%\n`; // Successful executions section report += `## Successful Executions\n\n`; report += `| Provider | Model | Time (s) | Accuracy | Token Usage | Est. Cost | Output File |\n`; report += `|----------|-------|----------|----------|-------------|-----------|-------------|\n`; for (const execution of successfulExecutions) { const accuracy = execution.cvData.metadata?.accuracy?.overall ?? 0; // Token usage information const tokenUsage = execution.cvData.metadata?.tokenUsage; const tokenCount = tokenUsage?.totalTokens ?? 'N/A'; const estCost = tokenUsage?.estimatedCost ? `$${tokenUsage.estimatedCost.toFixed(4)}` : 'N/A'; report += `| ${execution.provider} | ${execution.model} | ${execution.time.toFixed(2)} | ${accuracy}% | ${tokenCount} | ${estCost} | [View](./${execution.file}) |\n`; } report += '\n'; // Failed executions section (if any) if (failedExecutions.length > 0) { report += `## Failed Executions\n\n`; report += `| Provider | Model | Error |\n`; report += `|----------|-------|-------|\n`; for (const execution of failedExecutions) { const errorMessage = execution.error.substring(0, 50) + '...'; report += `| ${execution.provider} | ${execution.model} | ${errorMessage} |\n`; } report += '\n'; } // Performance comparison section report += `## Performance Comparison\n\n`; const fastest = [...successfulExecutions].sort((a, b) => a.time - b.time)[0]; const slowest = [...successfulExecutions].sort((a, b) => b.time - a.time)[0]; const avgTime = totalTime / successfulExecutions.length; report += `- **Fastest**: ${fastest.provider} (${fastest.model}) - ${fastest.time.toFixed(2)}s\n`; report += `- **Slowest**: ${slowest.provider} (${slowest.model}) - ${slowest.time.toFixed(2)}s\n`; report += `- **Average Time**: ${avgTime.toFixed(2)}s\n\n`; // Add accuracy comparison report += `### Accuracy Comparison\n\n`; report += `| Provider | Model | Accuracy | Token Usage | Cost |\n`; report += `|----------|-------|----------|-------------|------|\n`; successfulExecutions.forEach((execution) => { const accuracy = execution.cvData.metadata?.accuracy?.overall ?? 0; const tokenUsage = execution.cvData.metadata?.tokenUsage; const cost = tokenUsage?.estimatedCost ? `$${tokenUsage.estimatedCost.toFixed(4)}` : 'N/A'; report += `| ${execution.provider} | ${execution.model} | ${accuracy}% | ${tokenUsage?.totalTokens ?? 'N/A'} | ${cost} |\n`; }); // Add token usage comparison report += `\n### Token Usage Comparison\n\n`; report += `| Provider | Model | Input Tokens | Output Tokens | Total Tokens | Cost |\n`; report += `|----------|-------|--------------|---------------|--------------|------|\n`; successfulExecutions.forEach((execution) => { const tokenUsage = execution.cvData.metadata?.tokenUsage; const cost = tokenUsage?.estimatedCost ? `$${tokenUsage.estimatedCost.toFixed(4)}` : 'N/A'; report += `| ${execution.provider} | ${execution.model} | ${tokenUsage?.inputTokens ?? 'N/A'} | ${tokenUsage?.outputTokens ?? 'N/A'} | ${tokenUsage?.totalTokens ?? 'N/A'} | ${cost} |\n`; }); // Add emptiness percentage comparison report += `\n### Field Emptiness Comparison\n\n`; report += `| Provider | Model | Populated Fields | Total Fields | Emptiness % |\n`; report += `|----------|-------|-----------------|--------------|------------|\n`; successfulExecutions.forEach((execution) => { const emptinessPercentage = execution.cvData.metadata?.emptinessPercentage; const percentage = emptinessPercentage?.percentage ?? 'N/A'; const nonEmptyFields = emptinessPercentage?.nonEmptyFields ?? 'N/A'; const totalFields = emptinessPercentage?.totalFields ?? 'N/A'; report += `| ${execution.provider} | ${execution.model || 'default'} | ${nonEmptyFields} | ${totalFields} | ${typeof percentage === 'number' ? `${(percentage * 100).toFixed(1)}%` : percentage} |\n`; }); // Sort by accuracy const sortedByAccuracy = [...successfulExecutions].sort((a, b) => { const accuracyA = a.cvData.metadata?.accuracy?.overall ?? 0; const accuracyB = b.cvData.metadata?.accuracy?.overall ?? 0; return accuracyB - accuracyA; }); // Add accuracy details report += `\n### Accuracy Details\n\n`; sortedByAccuracy.forEach((execution) => { const accuracy = execution.cvData.metadata?.accuracy; const emptinessPercentage = execution.cvData.metadata?.emptinessPercentage; if (!accuracy) return; report += `#### ${execution.provider} (${execution.model})\n`; report += `- Overall Accuracy: ${accuracy.overall}%\n`; if (accuracy.fieldAccuracy) { report += `- Field Accuracy: ${accuracy.fieldAccuracy}%\n`; } report += `- Completeness: ${accuracy.completeness}%\n`; if (accuracy.structuralFidelity) { report += `- Structural Fidelity: ${accuracy.structuralFidelity}%\n`; } // Add emptiness percentage information if available if (emptinessPercentage) { report += `- Field Emptiness: ${emptinessPercentage.percentage}% (${emptinessPercentage.nonEmptyFields}/${emptinessPercentage.totalFields} fields populated)\n`; } if (accuracy.missingFields?.length) { report += `- Missing Fields: ${accuracy.missingFields.join(', ')}\n`; } report += '\n'; }); // Sort by token usage const sortedByTokens = [...successfulExecutions].sort((a, b) => { const accuracyA = a.cvData.metadata?.accuracy?.overall ?? 0; const accuracyB = b.cvData.metadata?.accuracy?.overall ?? 0; return accuracyB - accuracyA; }); // Add token usage details report += `\n### Token Usage Details\n\n`; sortedByTokens.forEach((execution) => { const tokenUsage = execution.cvData.metadata?.tokenUsage; if (!tokenUsage) return; report += `#### ${execution.provider} (${execution.model})\n`; report += `- Input Tokens: ${tokenUsage.inputTokens}\n`; report += `- Output Tokens: ${tokenUsage.outputTokens}\n`; report += `- Total Tokens: ${tokenUsage.totalTokens}\n`; if (tokenUsage.estimatedCost) { report += `- Estimated Cost: $${tokenUsage.estimatedCost.toFixed(4)}\n`; } report += '\n'; }); // Add best accuracy summary const bestAccuracy = sortedByAccuracy[0]; if (bestAccuracy?.cvData.metadata?.accuracy) { report += `\n### Best Accuracy\n`; report += `Provider: ${bestAccuracy.provider}\n`; report += `Model: ${bestAccuracy.model}\n`; report += `Accuracy: ${bestAccuracy.cvData.metadata.accuracy.overall}%\n\n`; } return report; } catch (error) { console.error(`Error generating report: ${error}`); return `Error generating report: ${error}`; } } /** * Save the report to a file */ static saveReport(report, outputDir) { try { const reportPath = path.join(outputDir, 'report.md'); fs.writeFileSync(reportPath, report); console.log(`Report saved to ${reportPath}`); } catch (error) { console.error(`Error saving report: ${error}`); } } /** * Generate and save a report for a specific directory */ static async generateAndSaveReport(outputDir, verbose = false) { const report = await this.generateReport(outputDir, verbose); this.saveReport(report, outputDir); } } exports.ReportGenerator = ReportGenerator;