llm-extract
Version:
Modular SDK for structured text extraction from documents using LLMs
35 lines • 1.11 kB
JavaScript
Object.defineProperty(exports, "__esModule", { value: true });
exports.PDFProcessor = void 0;
const pdf = require('pdf-parse');
class PDFProcessor {
constructor() {
this.supportedTypes = ['application/pdf'];
}
async parseDocument(buffer, options = {}) {
const startTime = Date.now();
try {
const data = await pdf(buffer, {
max: options.maxPages || 0,
});
const extractedText = data.text.trim();
const finalText = extractedText;
return {
extractedText: finalText,
metadata: {
pageCount: data.numpages,
hasImages: false,
processingInfo: {
textLength: finalText.length,
}
},
processingTimeMs: Date.now() - startTime
};
}
catch (error) {
throw new Error(`PDF parsing failed: ${error.message}`);
}
}
}
exports.PDFProcessor = PDFProcessor;
//# sourceMappingURL=pdf.js.map
;