UNPKG

pdf-parse-new

Version:

Pure javascript cross-platform module to extract text from PDFs with AI-powered optimization and multi-core processing.

github.com/simonegosetto/pdf-parse-new

simonegosetto/pdf-parse-new

45 lines (33 loc) • 1.12 kB

JavaScript

#!/usr/bin/env node /** * Example: Basic PDF parsing * Uses the standard parse function with default options */ const fs = require('fs'); const PDF = require('../../index'); async function basicParse() { console.log('=== Basic PDF Parsing ===\n'); const testFile = '../data/01-valid.pdf'; if (!fs.existsSync(testFile)) { console.error('Test file not found:', testFile); process.exit(1); } const dataBuffer = fs.readFileSync(testFile); console.log(`File: ${testFile}`); console.log(`Size: ${(dataBuffer.length / 1024).toFixed(2)} KB\n`); try { const start = Date.now(); const result = await PDF(dataBuffer); const duration = Date.now() - start; console.log('✅ Success!'); console.log(`Duration: ${duration}ms`); console.log(`Pages: ${result.numpages}`); console.log(`Characters: ${result.text.length}`); console.log(`PDF Version: ${result.version}`); console.log(`\nFirst 200 characters:\n${result.text.substring(0, 200)}...\n`); } catch (error) { console.error('❌ Error:', error.message); process.exit(1); } } basicParse();