iterparse
Version:
Delightful data parsing
191 lines • 7.79 kB
TypeScript
import * as Papa from 'papaparse';
import { ProgressReportOptions, WriteProgressReportOptions } from './helpers';
import { AsyncIterable } from 'ix';
import { GuessableDelimiters } from 'papaparse';
import { AnyIterable, FileReference, FileWriteMode, IX } from './types';
export interface ParsingResult<T> {
data: T;
errors: Papa.ParseError[];
meta: Papa.ParseMeta;
}
export interface CSVReadOptions extends ProgressReportOptions, FileReference {
/**
* The delimiting character. Leave blank to auto-detect from a list of most common delimiters, or any values passed in through delimitersToGuess.
* * It can be a string or a function. If string, it must be one of length 1.
* * If a function, it must accept the input as first parameter and it must return a string which will be used as delimiter.
* * In both cases it cannot be found in Papa.BAD_DELIMITERS.
*
* More information [papaparse.com](https://www.papaparse.com/docs#config)
*/
delimiter?: string;
/**
* The newline sequence. Leave blank to auto-detect. Must be one of \r, \n, or \r\n.
*
* More information [papaparse.com](https://www.papaparse.com/docs#config)
* @defaultValue `"\r\n"`
*/
newline?: string;
/**
* The character used to quote fields. The quoting of all fields is not mandatory. Any field which is not quoted will correctly read.
*
* More information [papaparse.com](https://www.papaparse.com/docs#config)
* @defaultValue `"`
*/
quoteChar?: string;
/**
* The character used to escape the quote character within a field. If not set, this option will default to the value of quoteChar, meaning that the default escaping of quote character within a quoted field is using the quote character two times. (e.g. "column with ""quotes"" in text")
*
* More information [papaparse.com](https://www.papaparse.com/docs#config)
*
* @defaultValue `"`
*/
escapeChar?: string;
/**
* If true, the first row of parsed data will be interpreted as field names.
* An array of field names will be returned in meta, and each row of data will be an object of values keyed by field name instead of a simple array.
* Rows with a different number of fields from the header row will produce an error.
*
* More information [papaparse.com](https://www.papaparse.com/docs#config)
*
* @warning
* Duplicate field names will overwrite values in previous fields having the same name.
* @defaultValue `true`
*/
header?: boolean;
/**
* Trims white space from header values.
* Requires that `options.header === true`
*
* More information [papaparse.com](https://www.papaparse.com/docs#config)
*
* @defaultValue `true`
*/
trimHeaders?: boolean;
/**
* If true, numeric and boolean data will be converted to their type instead of remaining strings.
* Numeric data must conform to the definition of a decimal literal.
* Numerical values greater than 2^53 or less than -2^53 will not be converted to numbers to preserve precision.
* European-formatted numbers must have commas and dots swapped.
* If also accepts an object or a function. If object it's values should be a boolean to indicate if dynamic typing should be applied for each column number (or header name if using headers).
* If it's a function, it should return a boolean value for each field number (or name if using headers) which will be passed as first argument.
*
* More information [papaparse.com](https://www.papaparse.com/docs#config)
*
* @warning
* This option will reduce parsing performance
*
* @defaultValue `false`
*/
dynamicTyping?: boolean | {
[headerName: string]: boolean;
[columnNumber: number]: boolean;
} | ((field: string | number) => boolean);
/**
* The encoding to use when opening local files. If specified, it must be a value supported by the FileReader API.
* More information [papaparse.com](https://www.papaparse.com/docs#config)
* @defaultValue `utf8`
*/
encoding?: string;
/**
* A string that indicates a comment (for example, "#" or "//"). When Papa encounters a line starting with this string, it will skip the line.
* More information [papaparse.com](https://www.papaparse.com/docs#config)
* @defaultValue `false`
*/
comments?: boolean | string;
/**
* If true, lines that are completely empty (those which evaluate to an empty string) will be skipped. If set to 'greedy', lines that don't have any content (those which have only whitespace after parsing) will also be skipped.
* More information [papaparse.com](https://www.papaparse.com/docs#config)
* @defaultValue `true`
*/
skipEmptyLines?: boolean | 'greedy';
/**
* Fast mode speeds up parsing significantly for large inputs. However, it only works when the input has no quoted fields. Fast mode will automatically be enabled if no " characters appear in the input. You can force fast mode either way by setting it to true or false.
* More information [papaparse.com](https://www.papaparse.com/docs#config)
* @defaultValue `undefined`
*/
fastMode?: boolean;
/**
* An array of delimiters to guess from if the delimiter option is not set.
* More information [papaparse.com](https://www.papaparse.com/docs#config)
* @defaultValue `[',', '\t', '|', ';']`
*/
delimitersToGuess?: GuessableDelimiters[];
}
/**
* Read CSV file. In memory efficient way.
* @include ./CSVReadOptions.md
* @example
* import { csvRead } from 'iterparse'
* csvRead({ filePath: 'path/to/file' })
* .map((q)=> console.log(q))
* .count()
*
* @example
* import { csvRead } from 'iterparse'
* for await (const item of csvRead({ filePath: 'path/to/file' })) {
* console.log(item)
* }
* @category CSV
*/
export declare function csvRead<T>(options: CSVReadOptions): AsyncIterable<ParsingResult<T>>;
export interface CSVWriteOptions extends FileReference, FileWriteMode, WriteProgressReportOptions {
/**
* How many records store in memmory before writing to file. Higher the number bigger memory usage, but faster performance.
* Lower number smaller memory usage but, slower performance.
* Optimal buffer count is 1000
* @defaultValue 1000
*/
writeBuffer?: number;
/**
* @defaultValue `false`
*/
quotes?: boolean | boolean[];
/**
* @defaultValue `"`
*/
quoteChar?: string;
/**
* @defaultValue `"`
*/
escapeChar?: string;
/**
* @defaultValue `,`
*/
delimiter?: string;
/**
* @defaultValue `\r\n`
*/
newline?: string;
/**
* @defaultValue `false`
*/
skipEmptyLines?: boolean | 'greedy';
columns?: string[];
}
/**
* Writes json objects to file in ".csv" format
* @param data - Any iteratable.
* @param options - Write options
* @include ./CSVOptions.md
* @signature
* cswWrite(option)(iteratable)
* @signature
* cswWrite(iteratable,option)
* @example
* import { csvWrite } from 'iterparse'
* AsyncIterable.from([{...},{...},{...}])
* .pipe(csvWrite({ filePath: "path/to/file" }))
* .count()
* @example
* import { csvWrite } from 'iterparse'
* csvWrite([{ a: 1, b: 2 },{ a: 1, b: 2 }], { filePath: "/path/to/file" })
* .count()
* @category CSV
*/
export declare function csvWrite<T extends {
[k: string]: unknown;
}>(options: CSVWriteOptions): (data: AnyIterable<T>) => IX<T>;
export declare function csvWrite<T extends {
[k: string]: unknown;
}>(data: AnyIterable<T>, out: CSVWriteOptions): IX<T>;
//# sourceMappingURL=csv.d.ts.map