@forzalabs/remora
Version:
A powerful CLI tool for seamless data translation.
72 lines (71 loc) • 3.5 kB
JavaScript
;
var __importDefault = (this && this.__importDefault) || function (mod) {
return (mod && mod.__esModule) ? mod : { "default": mod };
};
Object.defineProperty(exports, "__esModule", { value: true });
const TypeCaster_1 = __importDefault(require("../transform/TypeCaster"));
const CSVParser_1 = __importDefault(require("./CSVParser"));
class LineParserClass {
constructor() {
this.parse = (line, producer, dimensions, tracker) => {
var _a;
const { settings: { fileType, delimiter } } = producer;
switch (fileType) {
case 'PARQUET':
case 'TXT':
case 'XML':
case 'XLS':
case 'XLSX':
case 'CSV': {
let counter = performance.now();
const parts = CSVParser_1.default.parseRow(line, delimiter !== null && delimiter !== void 0 ? delimiter : ',');
tracker.measure('process-line:parse-csv-row', performance.now() - counter);
counter = performance.now();
const value = {};
for (const dim of dimensions) {
value[dim.name] = TypeCaster_1.default.cast(parts[dim.index], dim.prodDimension.type, dim.prodDimension.format);
}
tracker.measure('process-line:cast&build-record', performance.now() - counter);
return value;
}
case 'JSON':
case 'JSONL': {
let counter = performance.now();
const parsed = JSON.parse(line);
tracker.measure('process-line:parse-json', performance.now() - counter);
counter = performance.now();
const value = {};
for (const dim of dimensions) {
const key = (_a = dim.prodDimension.alias) !== null && _a !== void 0 ? _a : dim.prodDimension.name;
value[dim.name] = TypeCaster_1.default.cast(parsed[key], dim.prodDimension.type);
}
tracker.measure('process-line:cast&build-record', performance.now() - counter);
return value;
}
default:
throw new Error(`File type ${fileType} not implemented yet.`);
}
};
/**
* Used ONLY to parse internal records (inside the .dataset) since I know they are already prepared in the correct way
*/
this._internalParseCSV = (line, fields, delimiter) => {
var _a, _b;
const parts = CSVParser_1.default.parseRow(line, delimiter !== null && delimiter !== void 0 ? delimiter : ',');
const record = {};
for (const [index, field] of fields.entries()) {
const fieldKey = field.finalKey;
record[fieldKey] = TypeCaster_1.default.cast(parts[index], (_b = (_a = field.dimension) === null || _a === void 0 ? void 0 : _a.type) !== null && _b !== void 0 ? _b : 'string');
}
return record;
};
/**
* Used ONLY to parse internal records (inside the .dataset) since I know they are already prepared in the correct way
*/
this._internalParseJSON = (line) => {
return JSON.parse(line);
};
}
}
const LineParser = new LineParserClass();
exports.default = LineParser;