@thecodingwhale/cv-processor

import fs from 'fs' import { glob } from 'glob' import path from 'path' interface ProviderMetrics { provider: string model: string processingTime: number accuracy: number fieldAccuracy?: number completeness?: number structure?: number emptinessPercentage?: number expectedPercentage?: number count: number successRate: number files: string[] conversionTypes?: string[] instructionPaths?: string[] } interface Report { providers: Record<string, ProviderMetrics> models: Record<string, ProviderMetrics> allRuns: { cvName: string provider: string model: string processingTime: number accuracy: number fieldAccuracy?: number completeness?: number structure?: number emptinessPercentage?: number expectedPercentage?: number outputFile: string conversionType?: string instructionPath?: string tokenUsage?: { totalTokens: number inputTokens: number outputTokens: number estimatedCost?: number } }[] } export async function mergeReports(outputDir: string): Promise<string> { // Find all report.md files in subdirectories const reportFiles = await glob(`${outputDir}/**/report.md`) if (reportFiles.length === 0) { return 'No report files found' } const report: Report = { providers: {}, models: {}, allRuns: [], } // Process each report file for (const reportFile of reportFiles) { const content = fs.readFileSync(reportFile, 'utf-8') const dirName = path.dirname(reportFile) const cvName = path.basename(dirName).split('_')[0] // Extract the successful executions table const successfulExecutionsMatch = content.match( /## Successful Executions\n\n\|.*\|.*\|\n\|.*\|.*\|\n((?:\|.*\|\n)*)/ ) if (!successfulExecutionsMatch) continue const executionRows = successfulExecutionsMatch[1].trim().split('\n') // Extract the accuracy comparison table const accuracyTableMatch = content.match( /## Accuracy Comparison\n\n\|.*\|.*\|\n\|.*\|.*\|\n((?:\|.*\|\n)*)/ ) if (!accuracyTableMatch) continue const accuracyRows = accuracyTableMatch[1].trim().split('\n') // Extract the token usage comparison table if available const tokenUsageMatch = content.match( /## Token Usage Comparison\n\n\|.*\|.*\|\n\|.*\|.*\|\n((?:\|.*\|\n)*)/ ) // Extract the field emptiness comparison table if available const emptinessTableMatch = content.match( /## Field Emptiness Comparison\n\n\|.*\|.*\|\n\|.*\|.*\|\n((?:\|.*\|\n)*)/ ) // Map to store token usage by provider and model const tokenUsageByModel: Record< string, { inputTokens: number outputTokens: number totalTokens: number estimatedCost?: number } > = {} // Map to store emptiness percentage by provider and model const emptinessByModel: Record< string, { percentage: number nonEmptyFields: number totalFields: number expectedPercentage?: number } > = {} // Process token usage data if available if (tokenUsageMatch) { const tokenUsageRows = tokenUsageMatch[1].trim().split('\n') for (const row of tokenUsageRows) { const columns = row.split('|').map((col) => col.trim()) if (columns.length < 7) continue const provider = columns[1] const model = columns[2] const inputTokens = columns[3] !== 'N/A' ? parseInt(columns[3]) : 0 const outputTokens = columns[4] !== 'N/A' ? parseInt(columns[4]) : 0 const totalTokens = columns[5] !== 'N/A' ? parseInt(columns[5]) : 0 // Parse estimated cost (remove $ and convert to float) let estimatedCost: number | undefined = undefined if (columns[6] !== 'N/A') { const costStr = columns[6].replace('$', '') estimatedCost = parseFloat(costStr) if (isNaN(estimatedCost)) estimatedCost = undefined } // Create a key to match with execution data const key = `${provider}_${model}` // Store token usage data tokenUsageByModel[key] = { inputTokens, outputTokens, totalTokens, estimatedCost, } } } // Process emptiness percentage data if available if (emptinessTableMatch) { const emptinessRows = emptinessTableMatch[1].trim().split('\n') for (const row of emptinessRows) { const columns = row.split('|').map((col) => col.trim()) if (columns.length < 6) continue const provider = columns[1] const model = columns[2] const nonEmptyFields = columns[3] !== 'N/A' ? parseInt(columns[3]) : 0 const totalFields = columns[4] !== 'N/A' ? parseInt(columns[4]) : 0 // Parse emptiness percentage (remove % and convert to decimal) let percentage = 0 if (columns[5] !== 'N/A') { const percentStr = columns[5].replace('%', '') percentage = parseFloat(percentStr) / 100 if (isNaN(percentage)) percentage = 0 } // Parse expected emptiness percentage if available let expectedPercentage = undefined if (columns.length >= 8 && columns[7] !== 'N/A' && columns[7] !== '-') { const expectedPercentStr = columns[7].replace('%', '') expectedPercentage = parseFloat(expectedPercentStr) / 100 if (isNaN(expectedPercentage)) expectedPercentage = undefined } // Create a key to match with execution data const key = `${provider}_${model}` // Store emptiness percentage data emptinessByModel[key] = { percentage, nonEmptyFields, totalFields, expectedPercentage, } } } // Process all executions with their times and output files const executions: Record< string, { processingTime: number outputFile: string conversionType?: string instructionPath?: string } > = {} for (const row of executionRows) { const columns = row.split('|').map((col) => col.trim()) if (columns.length < 7) continue const provider = columns[1] const model = columns[2] // Extract instructions path and conversion type if available // Format: | Provider | Model | Instructions Path | Conversion Type | Time (s) | Accuracy | Output File | let instructionPath = 'default' let conversionType = 'unknown' // Check if we have the new format with instruction path and conversion type if (columns.length >= 9) { instructionPath = columns[3] conversionType = columns[4] } // Get the time column index based on the table format const timeIndex = columns.length >= 9 ? 5 : 3 const timeStr = columns[timeIndex] // Get the output file link column const outputFileLinkIndex = columns.length >= 9 ? 7 : 5 const outputFileLink = columns[outputFileLinkIndex] // Try to extract token usage directly from the table if available let totalTokens: number | undefined = undefined let estimatedCost: number | undefined = undefined // If we have token usage column (depends on table format) if (columns.length >= 9) { // In the new format, token usage would be in column 6 const tokenUsageStr = columns[6] if (tokenUsageStr !== 'N/A' && !tokenUsageStr.includes('%')) { totalTokens = parseInt(tokenUsageStr) if (isNaN(totalTokens)) totalTokens = undefined } } // Extract processing time const time = parseFloat(timeStr) if (isNaN(time)) continue // Extract output file path const outputFileMatch = outputFileLink.match(/\[View\]$\.\/(.+)$/) const outputFile = outputFileMatch ? outputFileMatch[1] : '' // Create a key to match with accuracy table const key = `${provider}_${model}` executions[key] = { processingTime: time, outputFile, conversionType, instructionPath, } } // Process accuracy data and match with execution data for (const row of accuracyRows) { const columns = row.split('|').map((col) => col.trim()) if (columns.length < 7) continue const provider = columns[1] const model = columns[2] // Parse accuracy percentage, properly handling the percent sign const accuracyStr = columns[3].replace('%', '') const accuracy = parseFloat(accuracyStr) / 100 // Parse field accuracy, properly handling the percent sign let fieldAccuracy = undefined if (columns[4] !== '-') { const fieldAccuracyStr = columns[4].replace('%', '') fieldAccuracy = parseFloat(fieldAccuracyStr) / 100 } // Parse completeness, properly handling the percent sign let completeness = undefined if (columns[5] !== '-') { const completenessStr = columns[5].replace('%', '') completeness = parseFloat(completenessStr) / 100 } // Parse structure, properly handling the percent sign let structure = undefined if (columns[6] !== '-') { const structureStr = columns[6].replace('%', '') structure = parseFloat(structureStr) / 100 } // Skip if we can't parse accuracy if (isNaN(accuracy)) continue // Find matching execution data const key = `${provider}_${model}` const execution = executions[key] || { processingTime: 0, outputFile: '' } // Get token usage data if available const tokenUsage = tokenUsageByModel[key] // Get emptiness percentage data if available const emptiness = emptinessByModel[key] // Update provider metrics const providerKey = provider if (!report.providers[providerKey]) { report.providers[providerKey] = { provider, model: 'Various', processingTime: 0, accuracy: 0, fieldAccuracy: 0, completeness: 0, structure: 0, emptinessPercentage: 0, expectedPercentage: 0, count: 0, successRate: 0, files: [], } } report.providers[providerKey].processingTime += execution.processingTime report.providers[providerKey].accuracy += accuracy if (fieldAccuracy) report.providers[providerKey].fieldAccuracy! += fieldAccuracy if (completeness) report.providers[providerKey].completeness! += completeness if (structure) report.providers[providerKey].structure! += structure if (emptiness) { report.providers[providerKey].emptinessPercentage! += emptiness.percentage if (emptiness.expectedPercentage !== undefined) { // Initialize if undefined if (report.providers[providerKey].expectedPercentage === undefined) { report.providers[providerKey].expectedPercentage = 0 } report.providers[providerKey].expectedPercentage! += emptiness.expectedPercentage } } report.providers[providerKey].count += 1 report.providers[providerKey].files.push( path.join(dirName, execution.outputFile) ) // Update model metrics const modelKey = `${provider}_${model}` if (!report.models[modelKey]) { report.models[modelKey] = { provider, model, processingTime: 0, accuracy: 0, fieldAccuracy: 0, completeness: 0, structure: 0, emptinessPercentage: 0, expectedPercentage: 0, count: 0, successRate: 0, files: [], } } report.models[modelKey].processingTime += execution.processingTime report.models[modelKey].accuracy += accuracy if (fieldAccuracy) report.models[modelKey].fieldAccuracy! += fieldAccuracy if (completeness) report.models[modelKey].completeness! += completeness if (structure) report.models[modelKey].structure! += structure if (emptiness) { report.models[modelKey].emptinessPercentage! += emptiness.percentage if (emptiness.expectedPercentage !== undefined) { // Initialize if undefined if (report.models[modelKey].expectedPercentage === undefined) { report.models[modelKey].expectedPercentage = 0 } report.models[modelKey].expectedPercentage! += emptiness.expectedPercentage } } report.models[modelKey].count += 1 report.models[modelKey].files.push( path.join(dirName, execution.outputFile) ) // Add to all runs report.allRuns.push({ cvName, provider, model, processingTime: execution.processingTime, accuracy, fieldAccuracy, completeness, structure, emptinessPercentage: emptiness ? emptiness.percentage : undefined, expectedPercentage: emptiness?.expectedPercentage, outputFile: path.join(dirName, execution.outputFile), conversionType: execution.conversionType, instructionPath: execution.instructionPath, tokenUsage: tokenUsage ? { totalTokens: tokenUsage.totalTokens || 0, inputTokens: tokenUsage.inputTokens || 0, outputTokens: tokenUsage.outputTokens || 0, estimatedCost: tokenUsage.estimatedCost || 0, } : { totalTokens: 0, inputTokens: 0, outputTokens: 0, estimatedCost: 0, }, }) // Store conversion types and instruction paths in provider and model records if (execution.conversionType) { if (!report.providers[providerKey].conversionTypes) { report.providers[providerKey].conversionTypes = [] } if ( !report.providers[providerKey].conversionTypes.includes( execution.conversionType ) ) { report.providers[providerKey].conversionTypes.push( execution.conversionType ) } if (!report.models[modelKey].conversionTypes) { report.models[modelKey].conversionTypes = [] } if ( !report.models[modelKey].conversionTypes.includes( execution.conversionType ) ) { report.models[modelKey].conversionTypes.push(execution.conversionType) } } if (execution.instructionPath) { if (!report.providers[providerKey].instructionPaths) { report.providers[providerKey].instructionPaths = [] } if ( !report.providers[providerKey].instructionPaths.includes( execution.instructionPath ) ) { report.providers[providerKey].instructionPaths.push( execution.instructionPath ) } if (!report.models[modelKey].instructionPaths) { report.models[modelKey].instructionPaths = [] } if ( !report.models[modelKey].instructionPaths.includes( execution.instructionPath ) ) { report.models[modelKey].instructionPaths.push( execution.instructionPath ) } } } // Extract success rate from the summary const totalProvidersMatch = content.match( /- \*\*Total Providers\*\*: (\d+)/ ) const successfulMatch = content.match(/- \*\*Successful\*\*: (\d+)/) if (totalProvidersMatch && successfulMatch) { const total = parseInt(totalProvidersMatch[1]) const successful = parseInt(successfulMatch[1]) const successRate = successful / total // Apply success rate to all providers in this CV for (const run of report.allRuns.filter((r) => r.cvName === cvName)) { if (report.providers[run.provider]) { report.providers[run.provider].successRate = successRate } } } // Calculate average accuracy metrics if present in any runs if ( report.allRuns.length > 0 && report.allRuns.some((r) => r.emptinessPercentage) ) { const entriesWithEmptiness = report.allRuns.filter( (r) => r.emptinessPercentage !== undefined ) const avgEmptiness = entriesWithEmptiness.reduce( (sum, r) => sum + (r.emptinessPercentage || 0), 0 ) / entriesWithEmptiness.length for (const run of report.allRuns) { if (run.emptinessPercentage !== undefined) { run.emptinessPercentage = avgEmptiness } } } } // Calculate averages for (const provider of Object.values(report.providers)) { if (provider.count > 0) { provider.processingTime = provider.processingTime / provider.count provider.accuracy = provider.accuracy / provider.count if (provider.fieldAccuracy !== undefined) provider.fieldAccuracy = provider.fieldAccuracy / provider.count if (provider.completeness !== undefined) provider.completeness = provider.completeness / provider.count if (provider.structure !== undefined) provider.structure = provider.structure / provider.count if (provider.emptinessPercentage !== undefined) provider.emptinessPercentage = provider.emptinessPercentage / provider.count if (provider.expectedPercentage !== undefined) provider.expectedPercentage = provider.expectedPercentage / provider.count } } for (const model of Object.values(report.models)) { if (model.count > 0) { model.processingTime = model.processingTime / model.count model.accuracy = model.accuracy / model.count if (model.fieldAccuracy !== undefined) model.fieldAccuracy = model.fieldAccuracy / model.count if (model.completeness !== undefined) model.completeness = model.completeness / model.count if (model.structure !== undefined) model.structure = model.structure / model.count if (model.emptinessPercentage !== undefined) model.emptinessPercentage = model.emptinessPercentage / model.count if (model.expectedPercentage !== undefined) model.expectedPercentage = model.expectedPercentage / model.count } } // Generate markdown report return generateMarkdownReport(await loadTokenUsageData(report)) } /** * Load additional data from JSON files */ async function loadTokenUsageData(report: Report): Promise<Report> { // Process each run to extract data from JSON files for (const run of report.allRuns) { try { // Read the output file to get token usage information if (fs.existsSync(run.outputFile)) { const data = JSON.parse(fs.readFileSync(run.outputFile, 'utf8')) // Check if token usage information is available in metadata if (data.metadata && data.metadata.tokenUsage) { run.tokenUsage = { totalTokens: data.metadata.tokenUsage.totalTokens || 0, inputTokens: data.metadata.tokenUsage.inputTokens || 0, outputTokens: data.metadata.tokenUsage.outputTokens || 0, estimatedCost: data.metadata.tokenUsage.estimatedCost || 0, } } // Check if emptiness percentage information is available in metadata if (data.metadata && data.metadata.emptinessPercentage) { run.emptinessPercentage = data.metadata.emptinessPercentage.percentage / 100 } } } catch (error) { console.error( `Error loading additional data from ${run.outputFile}:`, error ) } } return report } function generateMarkdownReport(report: Report): string { const sortedProviders = Object.values(report.providers).sort( (a, b) => b.accuracy - a.accuracy ) const sortedModels = Object.values(report.models).sort( (a, b) => b.accuracy - a.accuracy ) const fastestProviders = [...Object.values(report.providers)].sort( (a, b) => a.processingTime - b.processingTime ) const fastestModels = [...Object.values(report.models)].sort( (a, b) => a.processingTime - b.processingTime ) // Sort by emptiness percentage (higher is better) const bestEmptinessProviders = [...Object.values(report.providers)] .filter( (p) => p.emptinessPercentage !== undefined && p.emptinessPercentage > 0 ) .sort((a, b) => (b.emptinessPercentage || 0) - (a.emptinessPercentage || 0)) const bestEmptinessModels = [...Object.values(report.models)] .filter( (m) => m.emptinessPercentage !== undefined && m.emptinessPercentage > 0 ) .sort((a, b) => (b.emptinessPercentage || 0) - (a.emptinessPercentage || 0)) // Calculate a combined score (weighted average of accuracy and speed) // Higher is better const combinedScore = (metrics: ProviderMetrics) => { // Normalize processing time to a 0-1 scale (reversed, so faster is better) const maxTime = Math.max( ...Object.values(report.models).map((m) => m.processingTime) ) const normalizedTime = maxTime > 0 ? 1 - metrics.processingTime / maxTime : 0 // Include emptiness percentage in the score if available const emptinessScore = metrics.emptinessPercentage || 0 // Weight accuracy more heavily than speed, also consider emptiness percentage return metrics.accuracy * 0.6 + normalizedTime * 0.2 + emptinessScore * 0.2 } // Get unique conversion types and instruction paths const conversionTypes = Array.from( new Set( Object.values(report.models).flatMap((m) => m.conversionTypes || []) ) ).sort() const instructionPaths = Array.from( new Set( Object.values(report.models).flatMap((m) => m.instructionPaths || []) ) ).sort() const bestOverallProviders = [...Object.values(report.providers)].sort( (a, b) => combinedScore(b) - combinedScore(a) ) const bestOverallModels = [...Object.values(report.models)].sort( (a, b) => combinedScore(b) - combinedScore(a) ) let markdown = `# Merged CV Processing Report\n\n` markdown += `**Date**: ${new Date().toISOString().split('T')[0]}\n` markdown += `**Total CV Samples**: ${ new Set(report.allRuns.map((r) => r.cvName)).size }\n` markdown += `**Total Runs Analyzed**: ${report.allRuns.length}\n\n` // Provider Accuracy Visualization with Mermaid markdown += `## Provider Accuracy Visualization\n\n` markdown += '```mermaid\n' markdown += 'pie title Provider Accuracy (%)\n' for (const provider of sortedProviders.slice(0, 6)) { // Limit to top 6 for readability markdown += ` "${provider.provider}" : ${Math.round( provider.accuracy * 100 )}\n` } markdown += '```\n\n' markdown += `## Best Providers by Accuracy\n\n` markdown += `| Provider | Avg Accuracy | Avg Field Accuracy | Avg Emptiness | Conversion Types | Instructions | Runs |\n` markdown += `|----------|-------------|-------------------|--------------|----------------|-------------|------|\n` for (const provider of sortedProviders) { markdown += `| ${provider.provider} | ${(provider.accuracy * 100).toFixed( 1 )}% | ${ provider.fieldAccuracy ? provider.fieldAccuracy.toFixed(1) : '-' }% | ${ provider.emptinessPercentage ? provider.emptinessPercentage.toFixed(1) : '-' }% | ${formatConversionTypes( provider.conversionTypes )} | ${formatInstructionPaths(provider.instructionPaths)} | ${ provider.count } |\n` } // Top Models Bar Chart - Fix visualization markdown += `\n## Top Models Accuracy Comparison\n\n` markdown += '```mermaid\n' markdown += 'pie title Top 6 Models by Accuracy (%)\n' for (const model of sortedModels.slice(0, 6)) { // Limit to top 6 models markdown += ` "${model.provider} (${model.model})" : ${Math.round( model.accuracy * 100 )}\n` } markdown += '```\n\n' markdown += `\n## Best Models by Accuracy\n\n` markdown += `| Provider | Model | Avg Accuracy | Avg Field Accuracy | Avg Emptiness | Avg Expected Emptiness | Conversion Types | Instructions | Runs |\n` markdown += `|----------|-------|-------------|-------------------|--------------|-----------------------|----------------|-------------|------|\n` for (const model of sortedModels) { markdown += `| ${model.provider} | ${model.model} | ${( model.accuracy * 100 ).toFixed(1)}% | ${ model.fieldAccuracy !== undefined ? `${model.fieldAccuracy.toFixed(1)}%` : '-' } | ${ model.emptinessPercentage !== undefined ? `${model.emptinessPercentage.toFixed(1)}%` : '-' } | ${ model.expectedPercentage !== undefined ? `${model.expectedPercentage.toFixed(1)}%` : '-' } | ${formatConversionTypes( model.conversionTypes )} | ${formatInstructionPaths(model.instructionPaths)} | ${model.count} |\n` } // Processing Time Chart - Fix visualization markdown += `\n## Processing Time Visualization\n\n` markdown += '```mermaid\n' markdown += 'gantt\n' markdown += ' title Processing Time by Model (seconds)\n' markdown += ' dateFormat X\n' markdown += ' axisFormat %S s\n\n' // Add bar chart elements for processing time (for top 6 fastest models) for (const model of fastestModels.slice(0, 6)) { // Use a cleaner label format const label = `${model.provider} (${ model.model.length > 10 ? model.model.substring(0, 10) + '...' : model.model })` const safeName = label.replace(/[^a-zA-Z0-9]/g, '_') markdown += ` ${safeName} :a, 0, ${model.processingTime.toFixed(2)}s\n` } markdown += '```\n\n' markdown += `\n## Fastest Providers\n\n` markdown += `| Provider | Avg Processing Time (s) | Conversion Types | Instructions | Runs |\n` markdown += `|----------|--------------------------|----------------|-------------|------|\n` for (const provider of fastestProviders) { markdown += `| ${provider.provider} | ${provider.processingTime.toFixed( 2 )} | ${formatConversionTypes( provider.conversionTypes )} | ${formatInstructionPaths(provider.instructionPaths)} | ${ provider.count } |\n` } markdown += `\n## Fastest Models\n\n` markdown += `| Provider | Model | Avg Processing Time (s) | Conversion Types | Instructions | Runs |\n` markdown += `|----------|-------|--------------------------|----------------|-------------|------|\n` for (const model of fastestModels) { markdown += `| ${model.provider} | ${ model.model } | ${model.processingTime.toFixed(2)} | ${formatConversionTypes( model.conversionTypes )} | ${formatInstructionPaths(model.instructionPaths)} | ${model.count} |\n` } // Calculate token usage for each model const modelTokenUsage = new Map< string, { totalTokens: number inputTokens: number outputTokens: number estimatedCost: number count: number } >() for (const run of report.allRuns) { const modelKey = `${run.provider}_${run.model}` // Skip if no token usage info if (!run.tokenUsage) continue if (!modelTokenUsage.has(modelKey)) { modelTokenUsage.set(modelKey, { totalTokens: 0, inputTokens: 0, outputTokens: 0, estimatedCost: 0, count: 0, }) } const usage = modelTokenUsage.get(modelKey)! usage.totalTokens += run.tokenUsage.totalTokens || 0 usage.inputTokens += run.tokenUsage.inputTokens || 0 usage.outputTokens += run.tokenUsage.outputTokens || 0 usage.estimatedCost += run.tokenUsage.estimatedCost || 0 usage.count++ } // Sort models by average total tokens (descending) const sortedModelsByTokens = [...modelTokenUsage.entries()] .map(([key, usage]) => { const [provider, model] = key.split('_') return { provider, model, avgTotalTokens: usage.totalTokens / usage.count, avgInputTokens: usage.inputTokens / usage.count, avgOutputTokens: usage.outputTokens / usage.count, avgEstimatedCost: usage.estimatedCost / usage.count, count: usage.count, } }) .sort((a, b) => b.avgTotalTokens - a.avgTotalTokens) // Token Usage Pie Chart - Fix visualization if (sortedModelsByTokens.length > 0) { markdown += `\n## Token Usage Visualization\n\n` markdown += '```mermaid\n' markdown += 'pie title Average Token Usage by Provider\n' // Group token usage by provider and limit to top 5 providers for readability const providerTokens: Record<string, number> = {} for (const usage of sortedModelsByTokens) { const provider = usage.provider if (!providerTokens[provider]) { providerTokens[provider] = 0 } providerTokens[provider] += usage.avgTotalTokens } // Sort providers by token usage and take top 5 const topProviders = Object.entries(providerTokens) .sort((a, b) => b[1] - a[1]) .slice(0, 5) // Add pie chart segments for top 5 providers for (const [provider, tokens] of topProviders) { markdown += ` "${provider}" : ${Math.round(tokens)}\n` } markdown += '```\n\n' } markdown += `\n## Token Usage Comparison by Model\n\n` markdown += `| Provider | Model | Avg Total Tokens | Avg Input Tokens | Avg Output Tokens | Avg Est. Cost | Conversion Types | Instructions |\n` markdown += `|----------|-------|-----------------|-----------------|------------------|-------------|-----------------|--------------|\n` for (const model of sortedModelsByTokens) { const modelKey = `${model.provider}_${model.model}` const modelObj = report.models[modelKey] markdown += `| ${model.provider} | ${ model.model } | ${model.avgTotalTokens.toFixed(0)} | ${model.avgInputTokens.toFixed( 0 )} | ${model.avgOutputTokens.toFixed( 0 )} | $${model.avgEstimatedCost.toFixed(4)} | ${ modelObj ? formatConversionTypes(modelObj.conversionTypes) : 'unknown' } | ${ modelObj ? formatInstructionPaths(modelObj.instructionPaths) : './instructions.txt' } |\n` } // Accuracy vs Speed Visualization - Fix visualization markdown += `\n## Accuracy vs Speed Visualization\n\n` markdown += '```mermaid\n' markdown += 'graph TD\n' markdown += ' title["Accuracy vs. Processing Time"];\n' markdown += ' style title fill:#fff,stroke:#fff,stroke-width:0px;\n\n' // Create nodes for top models for (let i = 0; i < Math.min(8, Object.values(report.models).length); i++) { const model = sortedModels[i] // Use top models by accuracy // Calculate node position based on processing time and accuracy const accuracy = Math.round(model.accuracy * 100) const time = model.processingTime.toFixed(1) // Create node with formatted label markdown += ` m${i}["${model.provider} ${accuracy}% accuracy ${time}s"];\n` markdown += ` class m${i} model${i};\n` } // Add styling for nodes - each with a different color markdown += ' classDef model0 fill:#4CAF50,stroke:#333,stroke-width:1px;\n' markdown += ' classDef model1 fill:#2196F3,stroke:#333,stroke-width:1px;\n' markdown += ' classDef model2 fill:#FFC107,stroke:#333,stroke-width:1px;\n' markdown += ' classDef model3 fill:#F44336,stroke:#333,stroke-width:1px;\n' markdown += ' classDef model4 fill:#9C27B0,stroke:#333,stroke-width:1px;\n' markdown += ' classDef model5 fill:#00BCD4,stroke:#333,stroke-width:1px;\n' markdown += ' classDef model6 fill:#FF9800,stroke:#333,stroke-width:1px;\n' markdown += ' classDef model7 fill:#607D8B,stroke:#333,stroke-width:1px;\n' markdown += '```\n\n' markdown += `\n## Best Overall (Combined Accuracy & Speed)\n\n` markdown += `| Provider | Model | Accuracy | Processing Time (s) | Combined Score | Conversion Types | Instructions |\n` markdown += `|----------|-------|----------|---------------------|---------------|----------------|-------------|\n` for (const model of bestOverallModels.slice(0, 5)) { markdown += `| ${model.provider} | ${model.model} | ${( model.accuracy * 100 ).toFixed(1)}% | ${model.processingTime.toFixed(2)} | ${combinedScore( model ).toFixed(2)} | ${formatConversionTypes( model.conversionTypes )} | ${formatInstructionPaths(model.instructionPaths)} |\n` } // Top Models Performance Comparison - Fix visualization markdown += `\n## Top Models Performance Comparison\n\n` markdown += '```mermaid\n' markdown += 'graph TD\n' markdown += ' title["Top Model Performance"];\n' markdown += ' style title fill:#fff,stroke:#fff,stroke-width:0px;\n\n' // Create a cleaner visualization with the top 3 models for (let i = 0; i < Math.min(3, bestOverallModels.length); i++) { const model = bestOverallModels[i] const convType = model.conversionTypes && model.conversionTypes.length > 0 ? formatConversionType(model.conversionTypes[0]) : 'unknown' markdown += ` m${i}["#${i + 1}: ${model.provider} (${model.model}) ` markdown += `🎯 Accuracy: ${(model.accuracy * 100).toFixed(1)}% ` markdown += `⏱️ Speed: ${model.processingTime.toFixed(2)}s ` markdown += `🔄 Conversion: ${convType} ` markdown += `📊 Score: ${combinedScore(model).toFixed(2)}"];\n` markdown += ` class m${i} model${i};\n` } markdown += ' classDef model0 fill:#4CAF50,stroke:#333,stroke-width:1px;\n' markdown += ' classDef model1 fill:#2196F3,stroke:#333,stroke-width:1px;\n' markdown += ' classDef model2 fill:#FFC107,stroke:#333,stroke-width:1px;\n' markdown += '```\n\n' markdown += `\n## Best Models by Field Emptiness\n\n` markdown += `| Provider | Model | Avg Emptiness | Avg Expected Emptiness | Conversion Types | Instructions | Runs |\n` markdown += `|----------|-------|--------------|-----------------------|----------------|-------------|------|\n` for (const model of bestEmptinessModels) { markdown += `| ${model.provider} | ${model.model} | ${ model.emptinessPercentage ? model.emptinessPercentage.toFixed(1) : '-' }% | ${ model.expectedPercentage !== undefined ? `${model.expectedPercentage.toFixed(1)}%` : '-' } | ${formatConversionTypes( model.conversionTypes )} | ${formatInstructionPaths(model.instructionPaths)} | ${model.count} |\n` } markdown += `\n## Recommendations\n\n` // Best overall model const bestModel = bestOverallModels[0] markdown += `### Best Overall Model\n` markdown += `**${bestModel.provider} (${bestModel.model})** with ${( bestModel.accuracy * 100 ).toFixed(1)}% accuracy and ${bestModel.processingTime.toFixed( 2 )}s average processing time.\n\n` // Best for accuracy const bestAccuracyModel = sortedModels[0] markdown += `### Best for Accuracy\n` markdown += `**${bestAccuracyModel.provider} (${ bestAccuracyModel.model })** with ${(bestAccuracyModel.accuracy * 100).toFixed(1)}% accuracy.\n\n` // Best for speed const bestSpeedModel = fastestModels[0] markdown += `### Best for Speed\n` markdown += `**${bestSpeedModel.provider} (${ bestSpeedModel.model })** with ${bestSpeedModel.processingTime.toFixed( 2 )}s average processing time.\n\n` // Best for emptiness percentage if (bestEmptinessModels.length > 0) { const bestEmptinessModel = bestEmptinessModels[0] markdown += `### Best for Field Emptiness\n` markdown += `**${bestEmptinessModel.provider} (${ bestEmptinessModel.model })** with ${ bestEmptinessModel.emptinessPercentage ? bestEmptinessModel.emptinessPercentage.toFixed(1) : '-' }% field emptiness percentage` // Add expected emptiness if available if (bestEmptinessModel.expectedPercentage !== undefined) { markdown += ` (expected: ${bestEmptinessModel.expectedPercentage.toFixed( 1 )}%)` } markdown += `.\n\n` } // Most cost-effective model if (sortedModelsByTokens.length > 0) { const modelsByEfficiency = [...sortedModelsByTokens] .filter((m) => m.avgEstimatedCost > 0 && m.avgTotalTokens > 0) .map((m) => ({ ...m, tokenCostRatio: m.avgTotalTokens / m.avgEstimatedCost, })) .sort((a, b) => b.tokenCostRatio - a.tokenCostRatio) if (modelsByEfficiency.length > 0) { const bestCostModel = modelsByEfficiency[0] markdown += `### Most Cost-Effective Model\n` markdown += `**${bestCostModel.provider} (${ bestCostModel.model })** with ${bestCostModel.avgTotalTokens.toFixed( 0 )} tokens at $${bestCostModel.avgEstimatedCost.toFixed( 4 )} average cost.\n\n` } } markdown += `## All Runs\n\n` markdown += `| CV | Provider | Model | Accuracy | Field Accuracy | Emptiness | Expected Emptiness | Processing Time (s) | Conversion Type | Instructions | Total Tokens | Est. Cost |\n` markdown += `|----|----------|-------|----------|---------------|-----------|-------------------|---------------------|----------------|-------------|--------------|----------|\n` for (const run of report.allRuns) { const totalTokens = run.tokenUsage ? run.tokenUsage.totalTokens : 'N/A' const estCost = run.tokenUsage && run.tokenUsage.estimatedCost ? `$${run.tokenUsage.estimatedCost.toFixed(4)}` : 'N/A' const emptinessValue = run.emptinessPercentage !== undefined ? `${(run.emptinessPercentage * 100).toFixed(1)}%` : 'N/A' const fieldAccuracyValue = run.fieldAccuracy !== undefined ? `${(run.fieldAccuracy * 100).toFixed(1)}%` : 'N/A' const expectedEmptinessValue = run.expectedPercentage !== undefined ? `${(run.expectedPercentage * 100).toFixed(1)}%` : 'N/A' markdown += `| ${run.cvName} | ${run.provider} | ${run.model} | ${( run.accuracy * 100 ).toFixed( 1 )}% | ${fieldAccuracyValue} | ${emptinessValue} | ${expectedEmptinessValue} | ${run.processingTime.toFixed( 2 )} | ${formatConversionType(run.conversionType)} | ${formatInstructionPath( run.instructionPath )} | ${totalTokens} | ${estCost} |\n` } return markdown } // Format conversion type function formatConversionType(type?: string): string { if (!type) return 'unknown' // Handle percentage values by converting them to the appropriate type if (type.endsWith('%')) { const percentValue = parseInt(type) // Based on convention in the system: // 30% typically means pdftotexts // 24% typically means pdftoimages if (percentValue === 30) return 'pdftotexts' if (percentValue === 24 || percentValue === 18 || percentValue === 0) return 'pdftoimages' if (percentValue === 100) return 'pdftotexts' return type // Return the original if no mapping } // Convert PdfToTexts -> pdftotexts and PdfToImages -> pdftoimages if (type.toLowerCase().includes('text')) return 'pdftotexts' if (type.toLowerCase().includes('image')) return 'pdftoimages' return type.toLowerCase() } // Format instruction path function formatInstructionPath(path?: string): string { if (!path) return './instructions.txt' // Handle numeric instruction paths and convert them to the appropriate filename if (!isNaN(parseFloat(path))) { // Map numeric values to instruction files // (Add more mappings as needed) if (parseFloat(path) > 10) return './instructions_version_1.txt' return './instructions.txt' } // Extract the filename only, keeping the ./ prefix const filename = path.split('/').pop() || 'instructions.txt' return `./${filename}` } // Format arrays of conversion types function formatConversionTypes(types?: string[]): string { if (!types || types.length === 0) return 'unknown' return types.map((t) => formatConversionType(t)).join(', ') } // Format arrays of instruction paths function formatInstructionPaths(paths?: string[]): string { if (!paths || paths.length === 0) return './instructions.txt' return paths.map((p) => formatInstructionPath(p)).join(', ') }