UNPKG

office-text-extractor-browser

Version:

Fork of office-text-extractor with unreleased changes that include browser support

github.com/philipkaufholz-8451/office-text-extractor

philipkaufholz-8451/office-text-extractor

57 lines (56 loc) • 2.21 kB

JavaScript

// source/lib.ts // The source code for the library. import { Buffer } from 'buffer/index.js'; import { fileTypeFromBuffer as getFileType } from 'file-type'; import { fetchUrl } from './util.js'; /** * The text extractor class. */ export class TextExtractor { constructor() { // The list of methods supported by this instance of the extractor. this.methods = []; /** * Registers a new method to this instance of the extractor. * * @param method The method of text extraction to add. * @returns The current instance, for method chaining. */ this.addMethod = (method) => { this.methods.push(method); return this; }; /** * Extracts text from the given input. * * @param payload The input and type of input to extract text from. * @returns The extracted text as a simple string. */ this.extractText = async ({ input, type }) => { // Turn the input into a buffer containing the file's contents. let preparedInput; if (typeof input === 'string') { // if (type === 'file') preparedInput = await readFile(input) if (type === 'url') preparedInput = await fetchUrl(input); else preparedInput = Buffer.from(input); } else { preparedInput = input; } // Check the mime type of the file. If there is no mime type, it's most // likely a txt/csv files. const mimeDetails = await getFileType(preparedInput); if (!mimeDetails) return preparedInput.toString(); // Find the extractor that can handle that mime type, and call it. const extractor = this.methods.find((method) => method.mimes.includes(mimeDetails.mime)); if (!extractor?.apply) { const message = `text-extractor: could not find a method to handle ${mimeDetails.mime}`; throw new Error(message); } return extractor.apply(preparedInput); }; } }