UNPKG

llm-extract

Version:

Modular SDK for structured text extraction from documents using LLMs

35 lines 1.11 kB
"use strict"; Object.defineProperty(exports, "__esModule", { value: true }); exports.PDFProcessor = void 0; const pdf = require('pdf-parse'); class PDFProcessor { constructor() { this.supportedTypes = ['application/pdf']; } async parseDocument(buffer, options = {}) { const startTime = Date.now(); try { const data = await pdf(buffer, { max: options.maxPages || 0, }); const extractedText = data.text.trim(); const finalText = extractedText; return { extractedText: finalText, metadata: { pageCount: data.numpages, hasImages: false, processingInfo: { textLength: finalText.length, } }, processingTimeMs: Date.now() - startTime }; } catch (error) { throw new Error(`PDF parsing failed: ${error.message}`); } } } exports.PDFProcessor = PDFProcessor; //# sourceMappingURL=pdf.js.map