UNPKG

@forzalabs/remora

Version:

A powerful CLI tool for seamless data translation.

109 lines (108 loc) 5.83 kB
"use strict"; var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) { function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); } return new (P || (P = Promise))(function (resolve, reject) { function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } } function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } } function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); } step((generator = generator.apply(thisArg, _arguments || [])).next()); }); }; var __importDefault = (this && this.__importDefault) || function (mod) { return (mod && mod.__esModule) ? mod : { "default": mod }; }; Object.defineProperty(exports, "__esModule", { value: true }); const Affirm_1 = __importDefault(require("../core/Affirm")); const DriverFactory_1 = __importDefault(require("../drivers/DriverFactory")); const Environment_1 = __importDefault(require("../engines/Environment")); const CSVParser_1 = __importDefault(require("../engines/parsing/CSVParser")); const ProducerManager_1 = __importDefault(require("../engines/producer/ProducerManager")); const Algo_1 = __importDefault(require("../core/Algo")); const LineParser_1 = __importDefault(require("../engines/parsing/LineParser")); const CryptoEngine_1 = __importDefault(require("../engines/CryptoEngine")); class ProducerExecutorClass { constructor() { this.ready = (producer, scope) => __awaiter(this, void 0, void 0, function* () { (0, Affirm_1.default)(producer, 'Invalid producer'); const source = Environment_1.default.getSource(producer.source); (0, Affirm_1.default)(source, `Invalid source ${producer.source} on producer ${producer.name}`); const driver = yield DriverFactory_1.default.instantiateSource(source); return yield driver.ready({ producer, scope }); }); this.processHeader = (line, producer) => { const { settings: { fileType, hasHeaderRow, delimiter } } = producer; switch (fileType) { case 'PARQUET': case 'XML': case 'XLS': case 'XLSX': case 'CSV': { const parts = CSVParser_1.default.parseRow(line, delimiter); return parts; } case 'TXT': { if (hasHeaderRow) { const parts = CSVParser_1.default.parseRow(line, delimiter); return parts; } else { return producer.dimensions.map(x => { var _a; return (_a = x.alias) !== null && _a !== void 0 ? _a : x.name; }); } } case 'JSON': case 'JSONL': { const keys = Object.keys(JSON.parse(line)); return keys; } } }; this.reconcileHeader = (header, producer) => { var _a; const myHeader = [...header]; const producerDimensions = producer.dimensions; if (producerDimensions.some(x => x.sourceFilename === true)) myHeader.push(...producerDimensions.filter(x => x.sourceFilename === true).map(x => x.name)); const dimensions = []; for (const dimension of producerDimensions) { const key = (_a = dimension.alias) !== null && _a !== void 0 ? _a : dimension.name; const index = myHeader.findIndex(x => x === key); if (index < 0) throw new Error(`The dimension "${dimension.name}" (with key "${key}") of producer "${producer.name}" doesn't exist in the underlying dataset.`); dimensions.push({ index, name: dimension.name, prodDimension: dimension }); } return dimensions; }; this.processLine = (options) => { var _a; const { line, dimensions, index, producer, tracker } = options; Affirm_1.default.hasValue(line, 'Invalid line'); Affirm_1.default.hasValue(index, 'Invalid index'); (0, Affirm_1.default)(producer, 'Invalid producer'); if (!line) return null; // 1. map the underlying aliases to the new names AND cast to the correct type let counter = performance.now(); const record = LineParser_1.default.parse(line.trim(), producer, dimensions, tracker); tracker.measure('process-line:line-parse', performance.now() - counter); counter = performance.now(); for (const dimension of dimensions) { // 2. apply source file name // TODO: to replace with the actual full filename and not just the fileKey if (dimension.prodDimension.sourceFilename === true) record[dimension.name] = producer.settings.fileKey; // 3. mask const maskType = ProducerManager_1.default.getMask(dimension.prodDimension); if (Algo_1.default.hasVal(maskType)) record[dimension.name] = CryptoEngine_1.default.hashValue(maskType, (_a = record[dimension.name]) === null || _a === void 0 ? void 0 : _a.toString(), dimension.prodDimension.type); } tracker.measure('process-line:dimensions-filename-mask', performance.now() - counter); return record; }; } } const ProducerExecutor = new ProducerExecutorClass(); exports.default = ProducerExecutor;