office-text-extractor-browser
Version:
Fork of office-text-extractor with unreleased changes that include browser support
26 lines (25 loc) • 879 B
JavaScript
// source/parsers/docx.ts
// The text extracter for DOCX files.
import { extractRawText as parseWordFile } from 'mammoth';
export class DocExtractor {
constructor() {
/**
* The type(s) of input acceptable to this method.
*/
this.mimes = [
'application/vnd.openxmlformats-officedocument.wordprocessingml.document',
];
/**
* Extract text from a DOCX file if possible.
*
* @param payload The input and its type.
* @returns The text extracted from the input.
*/
this.apply = async (input) => {
// Convert the DOCX to text and return the text.
// @ts-expect-error: see feross/buffer#353, the types are incomplete.
const parsedDocx = await parseWordFile({ buffer: input });
return parsedDocx.value;
};
}
}