UNPKG

@forzalabs/remora

Version:

A powerful CLI tool for seamless data translation.

281 lines (280 loc) 14.6 kB
"use strict"; var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) { function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); } return new (P || (P = Promise))(function (resolve, reject) { function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } } function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } } function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); } step((generator = generator.apply(thisArg, _arguments || [])).next()); }); }; var __asyncValues = (this && this.__asyncValues) || function (o) { if (!Symbol.asyncIterator) throw new TypeError("Symbol.asyncIterator is not defined."); var m = o[Symbol.asyncIterator], i; return m ? m.call(o) : (o = typeof __values === "function" ? __values(o) : o[Symbol.iterator](), i = {}, verb("next"), verb("throw"), verb("return"), i[Symbol.asyncIterator] = function () { return this; }, i); function verb(n) { i[n] = o[n] && function (v) { return new Promise(function (resolve, reject) { v = o[n](v), settle(resolve, reject, v.done, v.value); }); }; } function settle(resolve, reject, d, v) { Promise.resolve(v).then(function(v) { resolve({ value: v, done: d }); }, reject); } }; var __importDefault = (this && this.__importDefault) || function (mod) { return (mod && mod.__esModule) ? mod : { "default": mod }; }; Object.defineProperty(exports, "__esModule", { value: true }); const path_1 = __importDefault(require("path")); const fs_1 = __importDefault(require("fs")); const readline_1 = __importDefault(require("readline")); const promises_1 = __importDefault(require("fs/promises")); const TransformationEngine_1 = __importDefault(require("../engines/transform/TransformationEngine")); const RequestExecutor_1 = __importDefault(require("../engines/execution/RequestExecutor")); const Constants_1 = __importDefault(require("../Constants")); const Algo_1 = __importDefault(require("../core/Algo")); const LineParser_1 = __importDefault(require("../engines/parsing/LineParser")); const OutputExecutor_1 = __importDefault(require("./OutputExecutor")); const ConsumerManager_1 = __importDefault(require("../engines/consumer/ConsumerManager")); class ConsumerExecutorClass { constructor() { this._getWorkPath = (consumer, executionId) => { const execFolder = path_1.default.join(consumer.name, executionId); const workPath = path_1.default.join('./remora', Constants_1.default.defaults.PRODUCER_TEMP_FOLDER, execFolder, '.dataset'); return workPath; }; this._clearWorkPath = (workPath) => __awaiter(this, void 0, void 0, function* () { try { if (fs_1.default.existsSync(workPath)) { yield promises_1.default.unlink(workPath); } // eslint-disable-next-line @typescript-eslint/no-unused-vars } catch (error) { // Ignore file deletion errors } try { const dir = path_1.default.dirname(workPath); if (fs_1.default.existsSync(dir)) { yield promises_1.default.rmdir(dir); } // eslint-disable-next-line @typescript-eslint/no-unused-vars } catch (error) { // Ignore directory deletion errors } }); this._ensurePath = (pathUri) => { // make sure that the workpath exists const dir = path_1.default.dirname(pathUri); if (!fs_1.default.existsSync(dir)) fs_1.default.mkdirSync(dir, { recursive: true }); if (!fs_1.default.existsSync(pathUri)) fs_1.default.writeFileSync(pathUri, ''); }; this.processRecord = (options) => { var _a, _b; const { consumer, fields, dimensions, producer, record, requestOptions } = options; // Map to consumer fields and apply consumer field logic for (const field of fields) { const { cField } = field; const fieldKey = (_a = cField.alias) !== null && _a !== void 0 ? _a : cField.key; // Set the fixed default value for the field, or throw error if not present in the producer const dimension = dimensions.find(x => x.name === cField.key); if (!dimension) { if (cField.fixed && Algo_1.default.hasVal(cField.default)) record[fieldKey] = cField.default; else throw new Error(`The requested field "${cField.key}" from the consumer is not present in the underlying producer "${producer.name}" (${dimensions.map(x => x.name).join(', ')})`); } // Change the name of the dimension if (cField.alias && cField.alias !== dimension.name) { record[cField.alias] = record[dimension.name]; delete record[dimension.name]; } } // Transformations need to be applied after the mapping since they might refer to other fields with their new names for (const field of fields) { const { cField } = field; const fieldKey = (_b = cField.alias) !== null && _b !== void 0 ? _b : cField.key; if (cField.transform) record[fieldKey] = TransformationEngine_1.default.applyTransformations(record[fieldKey], cField.transform, cField, record); } // remove un-wanted producer dimensions for (const dimension of dimensions) { const field = fields.find(x => x.cField.key === dimension.name); if (!field) delete record[dimension.name]; } // apply consumer filters if (consumer.filters && consumer.filters.length > 0) { const isKept = consumer.filters.every(x => RequestExecutor_1.default.evaluateFilter(record, x.rule)); if (!isKept) return null; } // apply request custom filters if (requestOptions && requestOptions.filters) { const isKept = requestOptions.filters.every(x => RequestExecutor_1.default.evaluateFilter(record, x)); if (!isKept) return null; } return record; }; this.processDistinct = (datasetPath) => __awaiter(this, void 0, void 0, function* () { var _a, e_1, _b, _c; const reader = fs_1.default.createReadStream(datasetPath); const lineReader = readline_1.default.createInterface({ input: reader, crlfDelay: Infinity }); const tempWorkPath = datasetPath + '_tmp'; const writer = fs_1.default.createWriteStream(tempWorkPath); let newLineCount = 0; const seen = new Set(); try { for (var _d = true, lineReader_1 = __asyncValues(lineReader), lineReader_1_1; lineReader_1_1 = yield lineReader_1.next(), _a = lineReader_1_1.done, !_a; _d = true) { _c = lineReader_1_1.value; _d = false; const line = _c; if (!seen.has(line)) { seen.add(line); writer.write(line + '\n'); newLineCount++; } } } catch (e_1_1) { e_1 = { error: e_1_1 }; } finally { try { if (!_d && !_a && (_b = lineReader_1.return)) yield _b.call(lineReader_1); } finally { if (e_1) throw e_1.error; } } lineReader.close(); // Wait for the writer to finish before renaming yield new Promise((resolve, reject) => { writer.on('close', resolve); writer.on('error', reject); writer.end(); }); // Ensure the reader is fully closed before renaming if (!reader.destroyed) { yield new Promise(resolve => { reader.once('close', resolve); reader.destroy(); }); } // Delete original file first to avoid EPERM on Windows yield promises_1.default.unlink(datasetPath); yield promises_1.default.rename(tempWorkPath, datasetPath); return newLineCount; }); this.processDistinctOn = (consumer, datasetPath) => __awaiter(this, void 0, void 0, function* () { var _a, e_2, _b, _c; const reader = fs_1.default.createReadStream(datasetPath); const lineReader = readline_1.default.createInterface({ input: reader, crlfDelay: Infinity }); const { distinctOn } = consumer.options; const { keys, resolution } = distinctOn; const { strategy, orderBy, direction = 'asc' } = resolution; const internalRecordFormat = OutputExecutor_1.default._getInternalRecordFormat(consumer); const internalFields = ConsumerManager_1.default.getExpandedFields(consumer); // Map to store the winning record for each composite key // Key: composite key string, Value: { record: parsed object, line: original JSON line } const winners = new Map(); try { for (var _d = true, lineReader_2 = __asyncValues(lineReader), lineReader_2_1; lineReader_2_1 = yield lineReader_2.next(), _a = lineReader_2_1.done, !_a; _d = true) { _c = lineReader_2_1.value; _d = false; const line = _c; const record = (internalRecordFormat === 'CSV' || internalRecordFormat === 'TXT') ? LineParser_1.default._internalParseCSV(line, internalFields) : LineParser_1.default._internalParseJSON(line); const compositeKey = keys.map(k => { var _a; return String((_a = record[k]) !== null && _a !== void 0 ? _a : ''); }).join('|'); const existing = winners.get(compositeKey); if (!existing) { winners.set(compositeKey, { record, line }); continue; } const shouldReplace = this._shouldReplaceRecord(existing.record, record, strategy, orderBy, direction); if (shouldReplace) { winners.set(compositeKey, { record, line }); } } } catch (e_2_1) { e_2 = { error: e_2_1 }; } finally { try { if (!_d && !_a && (_b = lineReader_2.return)) yield _b.call(lineReader_2); } finally { if (e_2) throw e_2.error; } } lineReader.close(); // Write the winning records to the temp file const tempWorkPath = datasetPath + '_tmp'; const writer = fs_1.default.createWriteStream(tempWorkPath); for (const { line } of winners.values()) { writer.write(line + '\n'); } // Wait for the writer to finish before renaming yield new Promise((resolve, reject) => { writer.on('close', resolve); writer.on('error', reject); writer.end(); }); // Ensure the reader is fully closed before renaming if (!reader.destroyed) { yield new Promise(resolve => { reader.once('close', resolve); reader.destroy(); }); } // Delete original file first to avoid EPERM on Windows yield promises_1.default.unlink(datasetPath); yield promises_1.default.rename(tempWorkPath, datasetPath); return winners.size; }); /** * Determines if the new record should replace the existing record based on the resolution strategy */ this._shouldReplaceRecord = (existing, newRecord, strategy, orderBy, direction) => { switch (strategy) { case 'first': return false; case 'last': return true; case 'min': { const existingVal = existing[orderBy]; const newVal = newRecord[orderBy]; const comparison = this._compareValues(newVal, existingVal); // For 'min', we want the smallest value // If direction is 'desc', we invert the logic (smallest becomes largest) return direction === 'asc' ? comparison < 0 : comparison > 0; } case 'max': { const existingVal = existing[orderBy]; const newVal = newRecord[orderBy]; const comparison = this._compareValues(newVal, existingVal); // For 'max', we want the largest value // If direction is 'desc', we invert the logic (largest becomes smallest) return direction === 'asc' ? comparison > 0 : comparison < 0; } default: return false; } }; /** * Compares two values, handling numbers, strings, and dates * Returns: negative if a < b, positive if a > b, 0 if equal */ this._compareValues = (a, b) => { // Handle null/undefined if (a == null && b == null) return 0; if (a == null) return -1; if (b == null) return 1; // Try numeric comparison const numA = Number(a); const numB = Number(b); if (!isNaN(numA) && !isNaN(numB)) { return numA - numB; } // Fall back to string comparison return String(a).localeCompare(String(b)); }; } } const ConsumerExecutor = new ConsumerExecutorClass(); exports.default = ConsumerExecutor;