UNPKG

office-text-extractor

Version:

Yet another library to extract text from MS Office and PDF files

24 lines (23 loc) 748 B
// source/parsers/pdf.ts // The text extracter for PDF files. import { PDFParse } from 'pdf-parse'; export class PdfExtractor { /** * The type(s) of input acceptable to this method. */ mimes = ['application/pdf']; /** * Extract text from a PDF file if possible. * * @param payload The input and its type. * @returns The text extracted from the input. */ apply = async (input) => { // Create a new parser and run it on the given input buffer. const parser = new PDFParse({ data: input }); const result = await parser.getText({ parseHyperlinks: true }); // Clean up the parser and return the text. await parser.destroy(); return result.text; }; }