UNPKG

@kanaries/web-data-loader

Version:

data loader tools for common datasource types (in browser)

122 lines (121 loc) 3.66 kB
import * as Papa from 'papaparse'; const tickMode = 10000; const maxWaitValue = 0.9999; /** * csvReader. load, parse and sampling for csv file in stream. * @param file File Type * @param config * @param onLoading loading process callback */ export function csvReader(props) { const { file, config, onLoading, encoding = 'utf-8' } = props; return new Promise((resolve, reject) => { if (!config) { pureSteamReader(file, encoding, resolve, reject, onLoading); } else { reservoirSampling(file, encoding, config.size, resolve, reject, onLoading); } }); } /** * * @param file File Type * @param resolve * @param reject * @param onLoading loading process callback */ function pureSteamReader(file, encoding, resolve, reject, onLoading) { const rows = []; let fields = []; let index = -1; let loadedSize = 0; Papa.parse(file, { worker: true, encoding, step(results) { loadedSize += results.data.join(',').length; if (index === -1) { fields = results.data; } else { if (results.data && results.data.length && results.data.length === fields.length) { rows.push(results.data); } } onLoading && (index % tickMode === 0) && onLoading(loadedSize / file.size); index++; }, complete() { const dataSource = table2json(fields, rows); onLoading && onLoading(1); resolve(dataSource); }, error(err) { reject(err); } }); } /** * Reservoir Sampling * Algorithm R: * Vitter, Jeffrey S. (1 March 1985). "Random sampling with a reservoir" (PDF). ACM Transactions on Mathematical Software. 11 (1): 37–57. CiteSeerX 10.1.1.138.784. doi:10.1145/3147.3165. * @param file * @param size sample size * @param resolve * @param reject * @param onLoading loading process callback */ function reservoirSampling(file, encoding, size, resolve, reject, onLoading) { const rows = []; let fields = []; let index = -1; let loadedSize = 0; Papa.parse(file, { worker: true, encoding, step(results) { loadedSize += results.data.join(',').length; if (index === -1) { fields = results.data; } else if (results.data && results.data.length && results.data.length === fields.length) { if (index < size) { rows.push(results.data); } else { let pos = Math.round(Math.random() * index); if (pos < size) { rows[pos] = results.data; } } } onLoading && (index % tickMode === 0) && onLoading(loadedSize / file.size); index++; }, complete() { const dataSource = table2json(fields, rows); onLoading && onLoading(1); resolve(dataSource); }, error(err) { reject(err); } }); } /** * table to json * @param fieldNames list of field name, normally the first row of csv. * @param rows data rows. the rest rows of csv. */ export function table2json(fieldNames, rows) { const dataSource = []; for (let i = 0; i < rows.length; i++) { let record = {}; for (let j = 0; j < fieldNames.length; j++) { record[fieldNames[j]] = rows[i][j]; } dataSource.push(record); } return dataSource; }