pdf-parse-new
Version:
Pure javascript cross-platform module to extract text from PDFs with AI-powered optimization and multi-core processing.
124 lines (102 loc) • 3.9 kB
JavaScript
/**
* Example: Compare All Methods
* Benchmark all parsing methods on the same PDF
*/
const fs = require('fs');
const PDF = require('../../index');
const PDFStream = require('../../lib/pdf-parse-stream');
const PDFWorkers = require('../../lib/pdf-parse-workers');
const PDFProcesses = require('../../lib/pdf-parse-processes');
const SmartParser = require('../../lib/SmartPDFParser');
async function compareAll() {
console.log('=== Compare All Parsing Methods ===\n');
const testFile = '../data/test_9000.pdf';
if (!fs.existsSync(testFile)) {
console.log('⚠️ Large test file not found:', testFile);
console.log(' Using smaller file for demonstration...\n');
return compareSmall();
}
const dataBuffer = fs.readFileSync(testFile);
console.log(`File: ${testFile}`);
console.log(`Size: ${(dataBuffer.length / 1024 / 1024).toFixed(2)} MB\n`);
const results = [];
// Test Workers
try {
console.log('Testing Workers...');
const start = Date.now();
const result = await PDFWorkers(dataBuffer, { maxWorkers: 4 });
const duration = Date.now() - start;
results.push({ method: 'Workers', duration, pages: result.numpages });
console.log(`✅ Workers: ${duration}ms\n`);
} catch (error) {
console.log(`❌ Workers failed: ${error.message}\n`);
}
// Test Processes
try {
console.log('Testing Processes...');
const start = Date.now();
const result = await PDFProcesses(dataBuffer, { maxProcesses: 4 });
const duration = Date.now() - start;
results.push({ method: 'Processes', duration, pages: result.numpages });
console.log(`✅ Processes: ${duration}ms\n`);
} catch (error) {
console.log(`❌ Processes failed: ${error.message}\n`);
}
// Test SmartParser
try {
console.log('Testing SmartParser...');
const parser = new SmartParser();
const start = Date.now();
const result = await parser.parse(dataBuffer);
const duration = Date.now() - start;
results.push({
method: `SmartParser (${result._meta.method})`,
duration,
pages: result.numpages
});
console.log(`✅ SmartParser: ${duration}ms\n`);
} catch (error) {
console.log(`❌ SmartParser failed: ${error.message}\n`);
}
// Show comparison
if (results.length > 0) {
console.log('📊 Comparison Results:');
console.log('─'.repeat(60));
results.sort((a, b) => a.duration - b.duration);
const fastest = results[0].duration;
results.forEach((r, i) => {
const speedup = i === 0 ? 'FASTEST' : `${((fastest / r.duration) * 100).toFixed(1)}% of fastest`;
console.log(`${(i + 1)}. ${r.method.padEnd(25)} ${r.duration.toString().padStart(6)}ms (${speedup})`);
});
console.log();
}
}
async function compareSmall() {
const testFile = './test/data/01-valid.pdf';
const dataBuffer = fs.readFileSync(testFile);
console.log(`File: ${testFile}`);
console.log(`Size: ${(dataBuffer.length / 1024).toFixed(2)} KB\n`);
const results = [];
// Test basic
const start1 = Date.now();
const result1 = await PDF(dataBuffer);
results.push({ method: 'Basic', duration: Date.now() - start1 });
// Test batch
const start2 = Date.now();
const result2 = await PDF(dataBuffer, { parallelizePages: true, batchSize: 5 });
results.push({ method: 'Batch-5', duration: Date.now() - start2 });
// Test SmartParser
const parser = new SmartParser();
const start3 = Date.now();
const result3 = await parser.parse(dataBuffer);
results.push({ method: `SmartParser (${result3._meta.method})`, duration: Date.now() - start3 });
console.log('📊 Comparison Results:');
console.log('─'.repeat(60));
results.sort((a, b) => a.duration - b.duration);
results.forEach((r, i) => {
console.log(`${(i + 1)}. ${r.method.padEnd(30)} ${r.duration.toString().padStart(5)}ms`);
});
console.log();
}
compareAll();