pdf-parse-new
Version:
Pure javascript cross-platform module to extract text from PDFs with AI-powered optimization and multi-core processing.
59 lines (44 loc) ⢠1.53 kB
JavaScript
/**
* Example: Streaming Parse
* Memory-efficient parsing with progress tracking
*/
const fs = require('fs');
const PDFStream = require('../../lib/pdf-parse-stream');
async function streamParse() {
console.log('=== Streaming Parse Example ===\n');
const testFile = '../data/01-valid.pdf';
if (!fs.existsSync(testFile)) {
console.error('Test file not found:', testFile);
process.exit(1);
}
const dataBuffer = fs.readFileSync(testFile);
console.log(`File: ${testFile}`);
console.log(`Size: ${(dataBuffer.length / 1024).toFixed(2)} KB\n`);
try {
console.log('Starting streaming parse...');
console.log('Progress:\n');
let lastProgress = 0;
const start = Date.now();
const result = await PDFStream(dataBuffer, {
chunkSize: 100,
onChunkComplete: (progress) => {
const percent = Math.floor(progress.progress);
if (percent > lastProgress) {
lastProgress = percent;
process.stdout.write(`\r${'ā'.repeat(Math.floor(percent/2))}${'ā'.repeat(50-Math.floor(percent/2))} ${percent}%`);
}
}
});
const duration = Date.now() - start;
console.log('\n\nā
Success!');
console.log(`Duration: ${duration}ms`);
console.log(`Pages: ${result.numpages}`);
console.log(`Characters: ${result.text.length}`);
console.log(`\nš” Streaming is memory-efficient for large PDFs!\n`);
} catch (error) {
console.error('\nā Error:', error.message);
process.exit(1);
}
}
streamParse();