UNPKG

office-text-extractor-browser

Version:

Fork of office-text-extractor with unreleased changes that include browser support

26 lines (25 loc) 879 B
// source/parsers/docx.ts // The text extracter for DOCX files. import { extractRawText as parseWordFile } from 'mammoth'; export class DocExtractor { constructor() { /** * The type(s) of input acceptable to this method. */ this.mimes = [ 'application/vnd.openxmlformats-officedocument.wordprocessingml.document', ]; /** * Extract text from a DOCX file if possible. * * @param payload The input and its type. * @returns The text extracted from the input. */ this.apply = async (input) => { // Convert the DOCX to text and return the text. // @ts-expect-error: see feross/buffer#353, the types are incomplete. const parsedDocx = await parseWordFile({ buffer: input }); return parsedDocx.value; }; } }