@thecodingwhale/cv-processor
Version:
CV Processor to extract structured data from PDF resumes using TypeScript
786 lines (785 loc) • 42.2 kB
JavaScript
;
var __importDefault = (this && this.__importDefault) || function (mod) {
return (mod && mod.__esModule) ? mod : { "default": mod };
};
Object.defineProperty(exports, "__esModule", { value: true });
exports.mergeReports = mergeReports;
const fs_1 = __importDefault(require("fs"));
const glob_1 = require("glob");
const path_1 = __importDefault(require("path"));
async function mergeReports(outputDir) {
// Find all report.md files in subdirectories
const reportFiles = await (0, glob_1.glob)(`${outputDir}/**/report.md`);
if (reportFiles.length === 0) {
return 'No report files found';
}
const report = {
providers: {},
models: {},
allRuns: [],
};
// Process each report file
for (const reportFile of reportFiles) {
const content = fs_1.default.readFileSync(reportFile, 'utf-8');
const dirName = path_1.default.dirname(reportFile);
const cvName = path_1.default.basename(dirName).split('_')[0];
// Extract the successful executions table
const successfulExecutionsMatch = content.match(/## Successful Executions\n\n\|.*\|.*\|\n\|.*\|.*\|\n((?:\|.*\|\n)*)/);
if (!successfulExecutionsMatch)
continue;
const executionRows = successfulExecutionsMatch[1].trim().split('\n');
// Extract the accuracy comparison table
const accuracyTableMatch = content.match(/## Accuracy Comparison\n\n\|.*\|.*\|\n\|.*\|.*\|\n((?:\|.*\|\n)*)/);
if (!accuracyTableMatch)
continue;
const accuracyRows = accuracyTableMatch[1].trim().split('\n');
// Extract the token usage comparison table if available
const tokenUsageMatch = content.match(/## Token Usage Comparison\n\n\|.*\|.*\|\n\|.*\|.*\|\n((?:\|.*\|\n)*)/);
// Extract the field emptiness comparison table if available
const emptinessTableMatch = content.match(/## Field Emptiness Comparison\n\n\|.*\|.*\|\n\|.*\|.*\|\n((?:\|.*\|\n)*)/);
// Map to store token usage by provider and model
const tokenUsageByModel = {};
// Map to store emptiness percentage by provider and model
const emptinessByModel = {};
// Process token usage data if available
if (tokenUsageMatch) {
const tokenUsageRows = tokenUsageMatch[1].trim().split('\n');
for (const row of tokenUsageRows) {
const columns = row.split('|').map((col) => col.trim());
if (columns.length < 7)
continue;
const provider = columns[1];
const model = columns[2];
const inputTokens = columns[3] !== 'N/A' ? parseInt(columns[3]) : 0;
const outputTokens = columns[4] !== 'N/A' ? parseInt(columns[4]) : 0;
const totalTokens = columns[5] !== 'N/A' ? parseInt(columns[5]) : 0;
// Parse estimated cost (remove $ and convert to float)
let estimatedCost = undefined;
if (columns[6] !== 'N/A') {
const costStr = columns[6].replace('$', '');
estimatedCost = parseFloat(costStr);
if (isNaN(estimatedCost))
estimatedCost = undefined;
}
// Create a key to match with execution data
const key = `${provider}_${model}`;
// Store token usage data
tokenUsageByModel[key] = {
inputTokens,
outputTokens,
totalTokens,
estimatedCost,
};
}
}
// Process emptiness percentage data if available
if (emptinessTableMatch) {
const emptinessRows = emptinessTableMatch[1].trim().split('\n');
for (const row of emptinessRows) {
const columns = row.split('|').map((col) => col.trim());
if (columns.length < 6)
continue;
const provider = columns[1];
const model = columns[2];
const nonEmptyFields = columns[3] !== 'N/A' ? parseInt(columns[3]) : 0;
const totalFields = columns[4] !== 'N/A' ? parseInt(columns[4]) : 0;
// Parse emptiness percentage (remove % and convert to decimal)
let percentage = 0;
if (columns[5] !== 'N/A') {
const percentStr = columns[5].replace('%', '');
percentage = parseFloat(percentStr) / 100;
if (isNaN(percentage))
percentage = 0;
}
// Parse expected emptiness percentage if available
let expectedPercentage = undefined;
if (columns.length >= 8 && columns[7] !== 'N/A' && columns[7] !== '-') {
const expectedPercentStr = columns[7].replace('%', '');
expectedPercentage = parseFloat(expectedPercentStr) / 100;
if (isNaN(expectedPercentage))
expectedPercentage = undefined;
}
// Create a key to match with execution data
const key = `${provider}_${model}`;
// Store emptiness percentage data
emptinessByModel[key] = {
percentage,
nonEmptyFields,
totalFields,
expectedPercentage,
};
}
}
// Process all executions with their times and output files
const executions = {};
for (const row of executionRows) {
const columns = row.split('|').map((col) => col.trim());
if (columns.length < 7)
continue;
const provider = columns[1];
const model = columns[2];
// Extract instructions path and conversion type if available
// Format: | Provider | Model | Instructions Path | Conversion Type | Time (s) | Accuracy | Output File |
let instructionPath = 'default';
let conversionType = 'unknown';
// Check if we have the new format with instruction path and conversion type
if (columns.length >= 9) {
instructionPath = columns[3];
conversionType = columns[4];
}
// Get the time column index based on the table format
const timeIndex = columns.length >= 9 ? 5 : 3;
const timeStr = columns[timeIndex];
// Get the output file link column
const outputFileLinkIndex = columns.length >= 9 ? 7 : 5;
const outputFileLink = columns[outputFileLinkIndex];
// Try to extract token usage directly from the table if available
let totalTokens = undefined;
let estimatedCost = undefined;
// If we have token usage column (depends on table format)
if (columns.length >= 9) {
// In the new format, token usage would be in column 6
const tokenUsageStr = columns[6];
if (tokenUsageStr !== 'N/A' && !tokenUsageStr.includes('%')) {
totalTokens = parseInt(tokenUsageStr);
if (isNaN(totalTokens))
totalTokens = undefined;
}
}
// Extract processing time
const time = parseFloat(timeStr);
if (isNaN(time))
continue;
// Extract output file path
const outputFileMatch = outputFileLink.match(/\[View\]\(\.\/(.+)\)/);
const outputFile = outputFileMatch ? outputFileMatch[1] : '';
// Create a key to match with accuracy table
const key = `${provider}_${model}`;
executions[key] = {
processingTime: time,
outputFile,
conversionType,
instructionPath,
};
}
// Process accuracy data and match with execution data
for (const row of accuracyRows) {
const columns = row.split('|').map((col) => col.trim());
if (columns.length < 7)
continue;
const provider = columns[1];
const model = columns[2];
// Parse accuracy percentage, properly handling the percent sign
const accuracyStr = columns[3].replace('%', '');
const accuracy = parseFloat(accuracyStr) / 100;
// Parse field accuracy, properly handling the percent sign
let fieldAccuracy = undefined;
if (columns[4] !== '-') {
const fieldAccuracyStr = columns[4].replace('%', '');
fieldAccuracy = parseFloat(fieldAccuracyStr) / 100;
}
// Parse completeness, properly handling the percent sign
let completeness = undefined;
if (columns[5] !== '-') {
const completenessStr = columns[5].replace('%', '');
completeness = parseFloat(completenessStr) / 100;
}
// Parse structure, properly handling the percent sign
let structure = undefined;
if (columns[6] !== '-') {
const structureStr = columns[6].replace('%', '');
structure = parseFloat(structureStr) / 100;
}
// Skip if we can't parse accuracy
if (isNaN(accuracy))
continue;
// Find matching execution data
const key = `${provider}_${model}`;
const execution = executions[key] || { processingTime: 0, outputFile: '' };
// Get token usage data if available
const tokenUsage = tokenUsageByModel[key];
// Get emptiness percentage data if available
const emptiness = emptinessByModel[key];
// Update provider metrics
const providerKey = provider;
if (!report.providers[providerKey]) {
report.providers[providerKey] = {
provider,
model: 'Various',
processingTime: 0,
accuracy: 0,
fieldAccuracy: 0,
completeness: 0,
structure: 0,
emptinessPercentage: 0,
expectedPercentage: 0,
count: 0,
successRate: 0,
files: [],
};
}
report.providers[providerKey].processingTime += execution.processingTime;
report.providers[providerKey].accuracy += accuracy;
if (fieldAccuracy)
report.providers[providerKey].fieldAccuracy += fieldAccuracy;
if (completeness)
report.providers[providerKey].completeness += completeness;
if (structure)
report.providers[providerKey].structure += structure;
if (emptiness) {
report.providers[providerKey].emptinessPercentage +=
emptiness.percentage;
if (emptiness.expectedPercentage !== undefined) {
// Initialize if undefined
if (report.providers[providerKey].expectedPercentage === undefined) {
report.providers[providerKey].expectedPercentage = 0;
}
report.providers[providerKey].expectedPercentage +=
emptiness.expectedPercentage;
}
}
report.providers[providerKey].count += 1;
report.providers[providerKey].files.push(path_1.default.join(dirName, execution.outputFile));
// Update model metrics
const modelKey = `${provider}_${model}`;
if (!report.models[modelKey]) {
report.models[modelKey] = {
provider,
model,
processingTime: 0,
accuracy: 0,
fieldAccuracy: 0,
completeness: 0,
structure: 0,
emptinessPercentage: 0,
expectedPercentage: 0,
count: 0,
successRate: 0,
files: [],
};
}
report.models[modelKey].processingTime += execution.processingTime;
report.models[modelKey].accuracy += accuracy;
if (fieldAccuracy)
report.models[modelKey].fieldAccuracy += fieldAccuracy;
if (completeness)
report.models[modelKey].completeness += completeness;
if (structure)
report.models[modelKey].structure += structure;
if (emptiness) {
report.models[modelKey].emptinessPercentage += emptiness.percentage;
if (emptiness.expectedPercentage !== undefined) {
// Initialize if undefined
if (report.models[modelKey].expectedPercentage === undefined) {
report.models[modelKey].expectedPercentage = 0;
}
report.models[modelKey].expectedPercentage +=
emptiness.expectedPercentage;
}
}
report.models[modelKey].count += 1;
report.models[modelKey].files.push(path_1.default.join(dirName, execution.outputFile));
// Add to all runs
report.allRuns.push({
cvName,
provider,
model,
processingTime: execution.processingTime,
accuracy,
fieldAccuracy,
completeness,
structure,
emptinessPercentage: emptiness ? emptiness.percentage : undefined,
expectedPercentage: emptiness?.expectedPercentage,
outputFile: path_1.default.join(dirName, execution.outputFile),
conversionType: execution.conversionType,
instructionPath: execution.instructionPath,
tokenUsage: tokenUsage
? {
totalTokens: tokenUsage.totalTokens || 0,
inputTokens: tokenUsage.inputTokens || 0,
outputTokens: tokenUsage.outputTokens || 0,
estimatedCost: tokenUsage.estimatedCost || 0,
}
: {
totalTokens: 0,
inputTokens: 0,
outputTokens: 0,
estimatedCost: 0,
},
});
// Store conversion types and instruction paths in provider and model records
if (execution.conversionType) {
if (!report.providers[providerKey].conversionTypes) {
report.providers[providerKey].conversionTypes = [];
}
if (!report.providers[providerKey].conversionTypes.includes(execution.conversionType)) {
report.providers[providerKey].conversionTypes.push(execution.conversionType);
}
if (!report.models[modelKey].conversionTypes) {
report.models[modelKey].conversionTypes = [];
}
if (!report.models[modelKey].conversionTypes.includes(execution.conversionType)) {
report.models[modelKey].conversionTypes.push(execution.conversionType);
}
}
if (execution.instructionPath) {
if (!report.providers[providerKey].instructionPaths) {
report.providers[providerKey].instructionPaths = [];
}
if (!report.providers[providerKey].instructionPaths.includes(execution.instructionPath)) {
report.providers[providerKey].instructionPaths.push(execution.instructionPath);
}
if (!report.models[modelKey].instructionPaths) {
report.models[modelKey].instructionPaths = [];
}
if (!report.models[modelKey].instructionPaths.includes(execution.instructionPath)) {
report.models[modelKey].instructionPaths.push(execution.instructionPath);
}
}
}
// Extract success rate from the summary
const totalProvidersMatch = content.match(/- \*\*Total Providers\*\*: (\d+)/);
const successfulMatch = content.match(/- \*\*Successful\*\*: (\d+)/);
if (totalProvidersMatch && successfulMatch) {
const total = parseInt(totalProvidersMatch[1]);
const successful = parseInt(successfulMatch[1]);
const successRate = successful / total;
// Apply success rate to all providers in this CV
for (const run of report.allRuns.filter((r) => r.cvName === cvName)) {
if (report.providers[run.provider]) {
report.providers[run.provider].successRate = successRate;
}
}
}
// Calculate average accuracy metrics if present in any runs
if (report.allRuns.length > 0 &&
report.allRuns.some((r) => r.emptinessPercentage)) {
const entriesWithEmptiness = report.allRuns.filter((r) => r.emptinessPercentage !== undefined);
const avgEmptiness = entriesWithEmptiness.reduce((sum, r) => sum + (r.emptinessPercentage || 0), 0) / entriesWithEmptiness.length;
for (const run of report.allRuns) {
if (run.emptinessPercentage !== undefined) {
run.emptinessPercentage = avgEmptiness;
}
}
}
}
// Calculate averages
for (const provider of Object.values(report.providers)) {
if (provider.count > 0) {
provider.processingTime = provider.processingTime / provider.count;
provider.accuracy = provider.accuracy / provider.count;
if (provider.fieldAccuracy !== undefined)
provider.fieldAccuracy = provider.fieldAccuracy / provider.count;
if (provider.completeness !== undefined)
provider.completeness = provider.completeness / provider.count;
if (provider.structure !== undefined)
provider.structure = provider.structure / provider.count;
if (provider.emptinessPercentage !== undefined)
provider.emptinessPercentage =
provider.emptinessPercentage / provider.count;
if (provider.expectedPercentage !== undefined)
provider.expectedPercentage =
provider.expectedPercentage / provider.count;
}
}
for (const model of Object.values(report.models)) {
if (model.count > 0) {
model.processingTime = model.processingTime / model.count;
model.accuracy = model.accuracy / model.count;
if (model.fieldAccuracy !== undefined)
model.fieldAccuracy = model.fieldAccuracy / model.count;
if (model.completeness !== undefined)
model.completeness = model.completeness / model.count;
if (model.structure !== undefined)
model.structure = model.structure / model.count;
if (model.emptinessPercentage !== undefined)
model.emptinessPercentage = model.emptinessPercentage / model.count;
if (model.expectedPercentage !== undefined)
model.expectedPercentage = model.expectedPercentage / model.count;
}
}
// Generate markdown report
return generateMarkdownReport(await loadTokenUsageData(report));
}
/**
* Load additional data from JSON files
*/
async function loadTokenUsageData(report) {
// Process each run to extract data from JSON files
for (const run of report.allRuns) {
try {
// Read the output file to get token usage information
if (fs_1.default.existsSync(run.outputFile)) {
const data = JSON.parse(fs_1.default.readFileSync(run.outputFile, 'utf8'));
// Check if token usage information is available in metadata
if (data.metadata && data.metadata.tokenUsage) {
run.tokenUsage = {
totalTokens: data.metadata.tokenUsage.totalTokens || 0,
inputTokens: data.metadata.tokenUsage.inputTokens || 0,
outputTokens: data.metadata.tokenUsage.outputTokens || 0,
estimatedCost: data.metadata.tokenUsage.estimatedCost || 0,
};
}
// Check if emptiness percentage information is available in metadata
if (data.metadata && data.metadata.emptinessPercentage) {
run.emptinessPercentage =
data.metadata.emptinessPercentage.percentage / 100;
}
}
}
catch (error) {
console.error(`Error loading additional data from ${run.outputFile}:`, error);
}
}
return report;
}
function generateMarkdownReport(report) {
const sortedProviders = Object.values(report.providers).sort((a, b) => b.accuracy - a.accuracy);
const sortedModels = Object.values(report.models).sort((a, b) => b.accuracy - a.accuracy);
const fastestProviders = [...Object.values(report.providers)].sort((a, b) => a.processingTime - b.processingTime);
const fastestModels = [...Object.values(report.models)].sort((a, b) => a.processingTime - b.processingTime);
// Sort by emptiness percentage (higher is better)
const bestEmptinessProviders = [...Object.values(report.providers)]
.filter((p) => p.emptinessPercentage !== undefined && p.emptinessPercentage > 0)
.sort((a, b) => (b.emptinessPercentage || 0) - (a.emptinessPercentage || 0));
const bestEmptinessModels = [...Object.values(report.models)]
.filter((m) => m.emptinessPercentage !== undefined && m.emptinessPercentage > 0)
.sort((a, b) => (b.emptinessPercentage || 0) - (a.emptinessPercentage || 0));
// Calculate a combined score (weighted average of accuracy and speed)
// Higher is better
const combinedScore = (metrics) => {
// Normalize processing time to a 0-1 scale (reversed, so faster is better)
const maxTime = Math.max(...Object.values(report.models).map((m) => m.processingTime));
const normalizedTime = maxTime > 0 ? 1 - metrics.processingTime / maxTime : 0;
// Include emptiness percentage in the score if available
const emptinessScore = metrics.emptinessPercentage || 0;
// Weight accuracy more heavily than speed, also consider emptiness percentage
return metrics.accuracy * 0.6 + normalizedTime * 0.2 + emptinessScore * 0.2;
};
// Get unique conversion types and instruction paths
const conversionTypes = Array.from(new Set(Object.values(report.models).flatMap((m) => m.conversionTypes || []))).sort();
const instructionPaths = Array.from(new Set(Object.values(report.models).flatMap((m) => m.instructionPaths || []))).sort();
const bestOverallProviders = [...Object.values(report.providers)].sort((a, b) => combinedScore(b) - combinedScore(a));
const bestOverallModels = [...Object.values(report.models)].sort((a, b) => combinedScore(b) - combinedScore(a));
let markdown = `# Merged CV Processing Report\n\n`;
markdown += `**Date**: ${new Date().toISOString().split('T')[0]}\n`;
markdown += `**Total CV Samples**: ${new Set(report.allRuns.map((r) => r.cvName)).size}\n`;
markdown += `**Total Runs Analyzed**: ${report.allRuns.length}\n\n`;
// Provider Accuracy Visualization with Mermaid
markdown += `## Provider Accuracy Visualization\n\n`;
markdown += '```mermaid\n';
markdown += 'pie title Provider Accuracy (%)\n';
for (const provider of sortedProviders.slice(0, 6)) {
// Limit to top 6 for readability
markdown += ` "${provider.provider}" : ${Math.round(provider.accuracy * 100)}\n`;
}
markdown += '```\n\n';
markdown += `## Best Providers by Accuracy\n\n`;
markdown += `| Provider | Avg Accuracy | Avg Field Accuracy | Avg Emptiness | Conversion Types | Instructions | Runs |\n`;
markdown += `|----------|-------------|-------------------|--------------|----------------|-------------|------|\n`;
for (const provider of sortedProviders) {
markdown += `| ${provider.provider} | ${(provider.accuracy * 100).toFixed(1)}% | ${provider.fieldAccuracy ? provider.fieldAccuracy.toFixed(1) : '-'}% | ${provider.emptinessPercentage
? provider.emptinessPercentage.toFixed(1)
: '-'}% | ${formatConversionTypes(provider.conversionTypes)} | ${formatInstructionPaths(provider.instructionPaths)} | ${provider.count} |\n`;
}
// Top Models Bar Chart - Fix visualization
markdown += `\n## Top Models Accuracy Comparison\n\n`;
markdown += '```mermaid\n';
markdown += 'pie title Top 6 Models by Accuracy (%)\n';
for (const model of sortedModels.slice(0, 6)) {
// Limit to top 6 models
markdown += ` "${model.provider} (${model.model})" : ${Math.round(model.accuracy * 100)}\n`;
}
markdown += '```\n\n';
markdown += `\n## Best Models by Accuracy\n\n`;
markdown += `| Provider | Model | Avg Accuracy | Avg Field Accuracy | Avg Emptiness | Avg Expected Emptiness | Conversion Types | Instructions | Runs |\n`;
markdown += `|----------|-------|-------------|-------------------|--------------|-----------------------|----------------|-------------|------|\n`;
for (const model of sortedModels) {
markdown += `| ${model.provider} | ${model.model} | ${(model.accuracy * 100).toFixed(1)}% | ${model.fieldAccuracy !== undefined
? `${model.fieldAccuracy.toFixed(1)}%`
: '-'} | ${model.emptinessPercentage !== undefined
? `${model.emptinessPercentage.toFixed(1)}%`
: '-'} | ${model.expectedPercentage !== undefined
? `${model.expectedPercentage.toFixed(1)}%`
: '-'} | ${formatConversionTypes(model.conversionTypes)} | ${formatInstructionPaths(model.instructionPaths)} | ${model.count} |\n`;
}
// Processing Time Chart - Fix visualization
markdown += `\n## Processing Time Visualization\n\n`;
markdown += '```mermaid\n';
markdown += 'gantt\n';
markdown += ' title Processing Time by Model (seconds)\n';
markdown += ' dateFormat X\n';
markdown += ' axisFormat %S s\n\n';
// Add bar chart elements for processing time (for top 6 fastest models)
for (const model of fastestModels.slice(0, 6)) {
// Use a cleaner label format
const label = `${model.provider} (${model.model.length > 10
? model.model.substring(0, 10) + '...'
: model.model})`;
const safeName = label.replace(/[^a-zA-Z0-9]/g, '_');
markdown += ` ${safeName} :a, 0, ${model.processingTime.toFixed(2)}s\n`;
}
markdown += '```\n\n';
markdown += `\n## Fastest Providers\n\n`;
markdown += `| Provider | Avg Processing Time (s) | Conversion Types | Instructions | Runs |\n`;
markdown += `|----------|--------------------------|----------------|-------------|------|\n`;
for (const provider of fastestProviders) {
markdown += `| ${provider.provider} | ${provider.processingTime.toFixed(2)} | ${formatConversionTypes(provider.conversionTypes)} | ${formatInstructionPaths(provider.instructionPaths)} | ${provider.count} |\n`;
}
markdown += `\n## Fastest Models\n\n`;
markdown += `| Provider | Model | Avg Processing Time (s) | Conversion Types | Instructions | Runs |\n`;
markdown += `|----------|-------|--------------------------|----------------|-------------|------|\n`;
for (const model of fastestModels) {
markdown += `| ${model.provider} | ${model.model} | ${model.processingTime.toFixed(2)} | ${formatConversionTypes(model.conversionTypes)} | ${formatInstructionPaths(model.instructionPaths)} | ${model.count} |\n`;
}
// Calculate token usage for each model
const modelTokenUsage = new Map();
for (const run of report.allRuns) {
const modelKey = `${run.provider}_${run.model}`;
// Skip if no token usage info
if (!run.tokenUsage)
continue;
if (!modelTokenUsage.has(modelKey)) {
modelTokenUsage.set(modelKey, {
totalTokens: 0,
inputTokens: 0,
outputTokens: 0,
estimatedCost: 0,
count: 0,
});
}
const usage = modelTokenUsage.get(modelKey);
usage.totalTokens += run.tokenUsage.totalTokens || 0;
usage.inputTokens += run.tokenUsage.inputTokens || 0;
usage.outputTokens += run.tokenUsage.outputTokens || 0;
usage.estimatedCost += run.tokenUsage.estimatedCost || 0;
usage.count++;
}
// Sort models by average total tokens (descending)
const sortedModelsByTokens = [...modelTokenUsage.entries()]
.map(([key, usage]) => {
const [provider, model] = key.split('_');
return {
provider,
model,
avgTotalTokens: usage.totalTokens / usage.count,
avgInputTokens: usage.inputTokens / usage.count,
avgOutputTokens: usage.outputTokens / usage.count,
avgEstimatedCost: usage.estimatedCost / usage.count,
count: usage.count,
};
})
.sort((a, b) => b.avgTotalTokens - a.avgTotalTokens);
// Token Usage Pie Chart - Fix visualization
if (sortedModelsByTokens.length > 0) {
markdown += `\n## Token Usage Visualization\n\n`;
markdown += '```mermaid\n';
markdown += 'pie title Average Token Usage by Provider\n';
// Group token usage by provider and limit to top 5 providers for readability
const providerTokens = {};
for (const usage of sortedModelsByTokens) {
const provider = usage.provider;
if (!providerTokens[provider]) {
providerTokens[provider] = 0;
}
providerTokens[provider] += usage.avgTotalTokens;
}
// Sort providers by token usage and take top 5
const topProviders = Object.entries(providerTokens)
.sort((a, b) => b[1] - a[1])
.slice(0, 5);
// Add pie chart segments for top 5 providers
for (const [provider, tokens] of topProviders) {
markdown += ` "${provider}" : ${Math.round(tokens)}\n`;
}
markdown += '```\n\n';
}
markdown += `\n## Token Usage Comparison by Model\n\n`;
markdown += `| Provider | Model | Avg Total Tokens | Avg Input Tokens | Avg Output Tokens | Avg Est. Cost | Conversion Types | Instructions |\n`;
markdown += `|----------|-------|-----------------|-----------------|------------------|-------------|-----------------|--------------|\n`;
for (const model of sortedModelsByTokens) {
const modelKey = `${model.provider}_${model.model}`;
const modelObj = report.models[modelKey];
markdown += `| ${model.provider} | ${model.model} | ${model.avgTotalTokens.toFixed(0)} | ${model.avgInputTokens.toFixed(0)} | ${model.avgOutputTokens.toFixed(0)} | $${model.avgEstimatedCost.toFixed(4)} | ${modelObj ? formatConversionTypes(modelObj.conversionTypes) : 'unknown'} | ${modelObj
? formatInstructionPaths(modelObj.instructionPaths)
: './instructions.txt'} |\n`;
}
// Accuracy vs Speed Visualization - Fix visualization
markdown += `\n## Accuracy vs Speed Visualization\n\n`;
markdown += '```mermaid\n';
markdown += 'graph TD\n';
markdown += ' title["Accuracy vs. Processing Time"];\n';
markdown += ' style title fill:#fff,stroke:#fff,stroke-width:0px;\n\n';
// Create nodes for top models
for (let i = 0; i < Math.min(8, Object.values(report.models).length); i++) {
const model = sortedModels[i]; // Use top models by accuracy
// Calculate node position based on processing time and accuracy
const accuracy = Math.round(model.accuracy * 100);
const time = model.processingTime.toFixed(1);
// Create node with formatted label
markdown += ` m${i}["${model.provider}<br/>${accuracy}% accuracy<br/>${time}s"];\n`;
markdown += ` class m${i} model${i};\n`;
}
// Add styling for nodes - each with a different color
markdown += ' classDef model0 fill:#4CAF50,stroke:#333,stroke-width:1px;\n';
markdown += ' classDef model1 fill:#2196F3,stroke:#333,stroke-width:1px;\n';
markdown += ' classDef model2 fill:#FFC107,stroke:#333,stroke-width:1px;\n';
markdown += ' classDef model3 fill:#F44336,stroke:#333,stroke-width:1px;\n';
markdown += ' classDef model4 fill:#9C27B0,stroke:#333,stroke-width:1px;\n';
markdown += ' classDef model5 fill:#00BCD4,stroke:#333,stroke-width:1px;\n';
markdown += ' classDef model6 fill:#FF9800,stroke:#333,stroke-width:1px;\n';
markdown += ' classDef model7 fill:#607D8B,stroke:#333,stroke-width:1px;\n';
markdown += '```\n\n';
markdown += `\n## Best Overall (Combined Accuracy & Speed)\n\n`;
markdown += `| Provider | Model | Accuracy | Processing Time (s) | Combined Score | Conversion Types | Instructions |\n`;
markdown += `|----------|-------|----------|---------------------|---------------|----------------|-------------|\n`;
for (const model of bestOverallModels.slice(0, 5)) {
markdown += `| ${model.provider} | ${model.model} | ${(model.accuracy * 100).toFixed(1)}% | ${model.processingTime.toFixed(2)} | ${combinedScore(model).toFixed(2)} | ${formatConversionTypes(model.conversionTypes)} | ${formatInstructionPaths(model.instructionPaths)} |\n`;
}
// Top Models Performance Comparison - Fix visualization
markdown += `\n## Top Models Performance Comparison\n\n`;
markdown += '```mermaid\n';
markdown += 'graph TD\n';
markdown += ' title["Top Model Performance"];\n';
markdown += ' style title fill:#fff,stroke:#fff,stroke-width:0px;\n\n';
// Create a cleaner visualization with the top 3 models
for (let i = 0; i < Math.min(3, bestOverallModels.length); i++) {
const model = bestOverallModels[i];
const convType = model.conversionTypes && model.conversionTypes.length > 0
? formatConversionType(model.conversionTypes[0])
: 'unknown';
markdown += ` m${i}["#${i + 1}: ${model.provider} (${model.model})<br/>`;
markdown += `🎯 Accuracy: ${(model.accuracy * 100).toFixed(1)}%<br/>`;
markdown += `⏱️ Speed: ${model.processingTime.toFixed(2)}s<br/>`;
markdown += `🔄 Conversion: ${convType}<br/>`;
markdown += `📊 Score: ${combinedScore(model).toFixed(2)}"];\n`;
markdown += ` class m${i} model${i};\n`;
}
markdown += ' classDef model0 fill:#4CAF50,stroke:#333,stroke-width:1px;\n';
markdown += ' classDef model1 fill:#2196F3,stroke:#333,stroke-width:1px;\n';
markdown += ' classDef model2 fill:#FFC107,stroke:#333,stroke-width:1px;\n';
markdown += '```\n\n';
markdown += `\n## Best Models by Field Emptiness\n\n`;
markdown += `| Provider | Model | Avg Emptiness | Avg Expected Emptiness | Conversion Types | Instructions | Runs |\n`;
markdown += `|----------|-------|--------------|-----------------------|----------------|-------------|------|\n`;
for (const model of bestEmptinessModels) {
markdown += `| ${model.provider} | ${model.model} | ${model.emptinessPercentage ? model.emptinessPercentage.toFixed(1) : '-'}% | ${model.expectedPercentage !== undefined
? `${model.expectedPercentage.toFixed(1)}%`
: '-'} | ${formatConversionTypes(model.conversionTypes)} | ${formatInstructionPaths(model.instructionPaths)} | ${model.count} |\n`;
}
markdown += `\n## Recommendations\n\n`;
// Best overall model
const bestModel = bestOverallModels[0];
markdown += `### Best Overall Model\n`;
markdown += `**${bestModel.provider} (${bestModel.model})** with ${(bestModel.accuracy * 100).toFixed(1)}% accuracy and ${bestModel.processingTime.toFixed(2)}s average processing time.\n\n`;
// Best for accuracy
const bestAccuracyModel = sortedModels[0];
markdown += `### Best for Accuracy\n`;
markdown += `**${bestAccuracyModel.provider} (${bestAccuracyModel.model})** with ${(bestAccuracyModel.accuracy * 100).toFixed(1)}% accuracy.\n\n`;
// Best for speed
const bestSpeedModel = fastestModels[0];
markdown += `### Best for Speed\n`;
markdown += `**${bestSpeedModel.provider} (${bestSpeedModel.model})** with ${bestSpeedModel.processingTime.toFixed(2)}s average processing time.\n\n`;
// Best for emptiness percentage
if (bestEmptinessModels.length > 0) {
const bestEmptinessModel = bestEmptinessModels[0];
markdown += `### Best for Field Emptiness\n`;
markdown += `**${bestEmptinessModel.provider} (${bestEmptinessModel.model})** with ${bestEmptinessModel.emptinessPercentage
? bestEmptinessModel.emptinessPercentage.toFixed(1)
: '-'}% field emptiness percentage`;
// Add expected emptiness if available
if (bestEmptinessModel.expectedPercentage !== undefined) {
markdown += ` (expected: ${bestEmptinessModel.expectedPercentage.toFixed(1)}%)`;
}
markdown += `.\n\n`;
}
// Most cost-effective model
if (sortedModelsByTokens.length > 0) {
const modelsByEfficiency = [...sortedModelsByTokens]
.filter((m) => m.avgEstimatedCost > 0 && m.avgTotalTokens > 0)
.map((m) => ({
...m,
tokenCostRatio: m.avgTotalTokens / m.avgEstimatedCost,
}))
.sort((a, b) => b.tokenCostRatio - a.tokenCostRatio);
if (modelsByEfficiency.length > 0) {
const bestCostModel = modelsByEfficiency[0];
markdown += `### Most Cost-Effective Model\n`;
markdown += `**${bestCostModel.provider} (${bestCostModel.model})** with ${bestCostModel.avgTotalTokens.toFixed(0)} tokens at $${bestCostModel.avgEstimatedCost.toFixed(4)} average cost.\n\n`;
}
}
markdown += `## All Runs\n\n`;
markdown += `| CV | Provider | Model | Accuracy | Field Accuracy | Emptiness | Expected Emptiness | Processing Time (s) | Conversion Type | Instructions | Total Tokens | Est. Cost |\n`;
markdown += `|----|----------|-------|----------|---------------|-----------|-------------------|---------------------|----------------|-------------|--------------|----------|\n`;
for (const run of report.allRuns) {
const totalTokens = run.tokenUsage ? run.tokenUsage.totalTokens : 'N/A';
const estCost = run.tokenUsage && run.tokenUsage.estimatedCost
? `$${run.tokenUsage.estimatedCost.toFixed(4)}`
: 'N/A';
const emptinessValue = run.emptinessPercentage !== undefined
? `${(run.emptinessPercentage * 100).toFixed(1)}%`
: 'N/A';
const fieldAccuracyValue = run.fieldAccuracy !== undefined
? `${(run.fieldAccuracy * 100).toFixed(1)}%`
: 'N/A';
const expectedEmptinessValue = run.expectedPercentage !== undefined
? `${(run.expectedPercentage * 100).toFixed(1)}%`
: 'N/A';
markdown += `| ${run.cvName} | ${run.provider} | ${run.model} | ${(run.accuracy * 100).toFixed(1)}% | ${fieldAccuracyValue} | ${emptinessValue} | ${expectedEmptinessValue} | ${run.processingTime.toFixed(2)} | ${formatConversionType(run.conversionType)} | ${formatInstructionPath(run.instructionPath)} | ${totalTokens} | ${estCost} |\n`;
}
return markdown;
}
// Format conversion type
function formatConversionType(type) {
if (!type)
return 'unknown';
// Handle percentage values by converting them to the appropriate type
if (type.endsWith('%')) {
const percentValue = parseInt(type);
// Based on convention in the system:
// 30% typically means pdftotexts
// 24% typically means pdftoimages
if (percentValue === 30)
return 'pdftotexts';
if (percentValue === 24 || percentValue === 18 || percentValue === 0)
return 'pdftoimages';
if (percentValue === 100)
return 'pdftotexts';
return type; // Return the original if no mapping
}
// Convert PdfToTexts -> pdftotexts and PdfToImages -> pdftoimages
if (type.toLowerCase().includes('text'))
return 'pdftotexts';
if (type.toLowerCase().includes('image'))
return 'pdftoimages';
return type.toLowerCase();
}
// Format instruction path
function formatInstructionPath(path) {
if (!path)
return './instructions.txt';
// Handle numeric instruction paths and convert them to the appropriate filename
if (!isNaN(parseFloat(path))) {
// Map numeric values to instruction files
// (Add more mappings as needed)
if (parseFloat(path) > 10)
return './instructions_version_1.txt';
return './instructions.txt';
}
// Extract the filename only, keeping the ./ prefix
const filename = path.split('/').pop() || 'instructions.txt';
return `./${filename}`;
}
// Format arrays of conversion types
function formatConversionTypes(types) {
if (!types || types.length === 0)
return 'unknown';
return types.map((t) => formatConversionType(t)).join(', ');
}
// Format arrays of instruction paths
function formatInstructionPaths(paths) {
if (!paths || paths.length === 0)
return './instructions.txt';
return paths.map((p) => formatInstructionPath(p)).join(', ');
}