@flatfile/plugin-xlsx-extractor
Version:
A plugin for parsing xlsx files in Flatfile.
88 lines (83 loc) • 3.61 kB
TypeScript
import * as _flatfile_listener from '@flatfile/listener';
import { WorkbookCapture } from '@flatfile/util-extractor';
interface DefaultOptions {
algorithm: 'default';
rowsToSearch?: number;
}
interface ExplicitHeadersOptions {
algorithm: 'explicitHeaders';
headers: string[];
skip?: number;
}
interface SpecificRowsOptions {
algorithm: 'specificRows';
rowNumbers: number[];
skip?: number;
}
interface DataRowAndSubHeaderDetectionOptions {
algorithm: 'dataRowAndSubHeaderDetection';
rowsToSearch?: number;
}
interface NewfangledOptions {
algorithm: 'newfangled';
}
interface AIDetectionOptions {
algorithm: 'aiDetection';
rowsToSearch?: number;
}
type GetHeadersOptions = DefaultOptions | ExplicitHeadersOptions | SpecificRowsOptions | DataRowAndSubHeaderDetectionOptions | NewfangledOptions | AIDetectionOptions;
interface GetHeadersResult {
header: string[];
headerRow: number;
letters: string[];
}
type ParseBufferOptions = Omit<ExcelExtractorOptions, 'chunkSize' | 'parallel'> & {
readonly headerSelectionEnabled?: boolean;
getHeaders: (options: any, data: string[][]) => Promise<GetHeadersResult>;
rowsToSearch?: number;
};
declare function parseBuffer(buffer: Buffer, options?: ParseBufferOptions): Promise<WorkbookCapture>;
/**
* Plugin config options.
*
* @property {boolean} raw - if true, return raw data; if false, return formatted text.
* @property {boolean} rawNumbers - if true, return raw numbers; if false, return formatted numbers.
* @property {string} dateNF - the date format.
* @property {number} chunkSize - the size of chunk to process when inserting records.
* @property {number} parallel - the quantity of parallel process when inserting records.
* @property {GetHeadersOptions} headerDetectionOptions - the options for header detection.
* @property {boolean} skipEmptyLines - if true, skip empty lines; if false, include empty lines.
* @property {boolean} debug - if true, display helpful console logs.
* @property {object} mergedCellOptions - the options for merged cell handling.
* @property {boolean} cascadeRowValues - if true, cascade values down the dataset until a blank row, new value, or end of dataset.
* @property {boolean} cascadeHeaderValues - if true, cascade values across the header rows until a blank column, new value, or end of dataset.
*/
interface ExcelExtractorOptions {
readonly raw?: boolean;
readonly rawNumbers?: boolean;
readonly dateNF?: string;
readonly headerDetectionOptions?: GetHeadersOptions;
readonly skipEmptyLines?: boolean;
readonly chunkSize?: number;
readonly parallel?: number;
readonly debug?: boolean;
readonly mergedCellOptions?: {
acrossColumns?: {
treatment: 'applyToAll' | 'applyToTopLeft' | 'coalesce' | 'concatenate';
separator?: string;
};
acrossRows?: {
treatment: 'applyToAll' | 'applyToTopLeft' | 'coalesce' | 'concatenate';
separator?: string;
};
acrossRanges?: {
treatment: 'applyToAll' | 'applyToTopLeft';
};
};
readonly cascadeRowValues?: boolean;
readonly cascadeHeaderValues?: boolean;
}
declare const ExcelExtractor: (options?: ExcelExtractorOptions) => (listener: _flatfile_listener.FlatfileListener) => void;
declare const excelParser: typeof parseBuffer;
declare const xlsxExtractorPlugin: (options?: ExcelExtractorOptions) => (listener: _flatfile_listener.FlatfileListener) => void;
export { ExcelExtractor, type ExcelExtractorOptions, excelParser, xlsxExtractorPlugin };