office-text-extractor
Version:
Yet another library to extract text from MS Office and PDF files
38 lines (37 loc) • 1.04 kB
TypeScript
/**
* The way to get the contents of the input file.
*/
export type InputType = 'buffer' | 'file' | 'url';
export type ExtractionPayload = {
type: InputType;
input: string | Uint8Array;
};
/**
* A method of text extraction.
*/
export type TextExtractionMethod = {
mimes: string[];
apply: (input: Uint8Array) => Promise<string>;
};
/**
* The text extractor class.
*/
export declare class TextExtractor {
methods: TextExtractionMethod[];
encoder: TextEncoder;
decoder: TextDecoder;
/**
* Registers a new method to this instance of the extractor.
*
* @param method The method of text extraction to add.
* @returns The current instance, for method chaining.
*/
addMethod: (method: TextExtractionMethod) => this;
/**
* Extracts text from the given input.
*
* @param payload The input and type of input to extract text from.
* @returns The extracted text as a simple string.
*/
extractText: ({ input, type }: ExtractionPayload) => Promise<string>;
}