@forzalabs/remora
Version:
A powerful CLI tool for seamless data translation.
281 lines (280 loc) • 14.6 kB
JavaScript
;
var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
return new (P || (P = Promise))(function (resolve, reject) {
function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }
function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } }
function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); }
step((generator = generator.apply(thisArg, _arguments || [])).next());
});
};
var __asyncValues = (this && this.__asyncValues) || function (o) {
if (!Symbol.asyncIterator) throw new TypeError("Symbol.asyncIterator is not defined.");
var m = o[Symbol.asyncIterator], i;
return m ? m.call(o) : (o = typeof __values === "function" ? __values(o) : o[Symbol.iterator](), i = {}, verb("next"), verb("throw"), verb("return"), i[Symbol.asyncIterator] = function () { return this; }, i);
function verb(n) { i[n] = o[n] && function (v) { return new Promise(function (resolve, reject) { v = o[n](v), settle(resolve, reject, v.done, v.value); }); }; }
function settle(resolve, reject, d, v) { Promise.resolve(v).then(function(v) { resolve({ value: v, done: d }); }, reject); }
};
var __importDefault = (this && this.__importDefault) || function (mod) {
return (mod && mod.__esModule) ? mod : { "default": mod };
};
Object.defineProperty(exports, "__esModule", { value: true });
const path_1 = __importDefault(require("path"));
const fs_1 = __importDefault(require("fs"));
const readline_1 = __importDefault(require("readline"));
const promises_1 = __importDefault(require("fs/promises"));
const TransformationEngine_1 = __importDefault(require("../engines/transform/TransformationEngine"));
const RequestExecutor_1 = __importDefault(require("../engines/execution/RequestExecutor"));
const Constants_1 = __importDefault(require("../Constants"));
const Algo_1 = __importDefault(require("../core/Algo"));
const LineParser_1 = __importDefault(require("../engines/parsing/LineParser"));
const OutputExecutor_1 = __importDefault(require("./OutputExecutor"));
const ConsumerManager_1 = __importDefault(require("../engines/consumer/ConsumerManager"));
class ConsumerExecutorClass {
constructor() {
this._getWorkPath = (consumer, executionId) => {
const execFolder = path_1.default.join(consumer.name, executionId);
const workPath = path_1.default.join('./remora', Constants_1.default.defaults.PRODUCER_TEMP_FOLDER, execFolder, '.dataset');
return workPath;
};
this._clearWorkPath = (workPath) => __awaiter(this, void 0, void 0, function* () {
try {
if (fs_1.default.existsSync(workPath)) {
yield promises_1.default.unlink(workPath);
}
// eslint-disable-next-line @typescript-eslint/no-unused-vars
}
catch (error) {
// Ignore file deletion errors
}
try {
const dir = path_1.default.dirname(workPath);
if (fs_1.default.existsSync(dir)) {
yield promises_1.default.rmdir(dir);
}
// eslint-disable-next-line @typescript-eslint/no-unused-vars
}
catch (error) {
// Ignore directory deletion errors
}
});
this._ensurePath = (pathUri) => {
// make sure that the workpath exists
const dir = path_1.default.dirname(pathUri);
if (!fs_1.default.existsSync(dir))
fs_1.default.mkdirSync(dir, { recursive: true });
if (!fs_1.default.existsSync(pathUri))
fs_1.default.writeFileSync(pathUri, '');
};
this.processRecord = (options) => {
var _a, _b;
const { consumer, fields, dimensions, producer, record, requestOptions } = options;
// Map to consumer fields and apply consumer field logic
for (const field of fields) {
const { cField } = field;
const fieldKey = (_a = cField.alias) !== null && _a !== void 0 ? _a : cField.key;
// Set the fixed default value for the field, or throw error if not present in the producer
const dimension = dimensions.find(x => x.name === cField.key);
if (!dimension) {
if (cField.fixed && Algo_1.default.hasVal(cField.default))
record[fieldKey] = cField.default;
else
throw new Error(`The requested field "${cField.key}" from the consumer is not present in the underlying producer "${producer.name}" (${dimensions.map(x => x.name).join(', ')})`);
}
// Change the name of the dimension
if (cField.alias && cField.alias !== dimension.name) {
record[cField.alias] = record[dimension.name];
delete record[dimension.name];
}
}
// Transformations need to be applied after the mapping since they might refer to other fields with their new names
for (const field of fields) {
const { cField } = field;
const fieldKey = (_b = cField.alias) !== null && _b !== void 0 ? _b : cField.key;
if (cField.transform)
record[fieldKey] = TransformationEngine_1.default.applyTransformations(record[fieldKey], cField.transform, cField, record);
}
// remove un-wanted producer dimensions
for (const dimension of dimensions) {
const field = fields.find(x => x.cField.key === dimension.name);
if (!field)
delete record[dimension.name];
}
// apply consumer filters
if (consumer.filters && consumer.filters.length > 0) {
const isKept = consumer.filters.every(x => RequestExecutor_1.default.evaluateFilter(record, x.rule));
if (!isKept)
return null;
}
// apply request custom filters
if (requestOptions && requestOptions.filters) {
const isKept = requestOptions.filters.every(x => RequestExecutor_1.default.evaluateFilter(record, x));
if (!isKept)
return null;
}
return record;
};
this.processDistinct = (datasetPath) => __awaiter(this, void 0, void 0, function* () {
var _a, e_1, _b, _c;
const reader = fs_1.default.createReadStream(datasetPath);
const lineReader = readline_1.default.createInterface({ input: reader, crlfDelay: Infinity });
const tempWorkPath = datasetPath + '_tmp';
const writer = fs_1.default.createWriteStream(tempWorkPath);
let newLineCount = 0;
const seen = new Set();
try {
for (var _d = true, lineReader_1 = __asyncValues(lineReader), lineReader_1_1; lineReader_1_1 = yield lineReader_1.next(), _a = lineReader_1_1.done, !_a; _d = true) {
_c = lineReader_1_1.value;
_d = false;
const line = _c;
if (!seen.has(line)) {
seen.add(line);
writer.write(line + '\n');
newLineCount++;
}
}
}
catch (e_1_1) { e_1 = { error: e_1_1 }; }
finally {
try {
if (!_d && !_a && (_b = lineReader_1.return)) yield _b.call(lineReader_1);
}
finally { if (e_1) throw e_1.error; }
}
lineReader.close();
// Wait for the writer to finish before renaming
yield new Promise((resolve, reject) => {
writer.on('close', resolve);
writer.on('error', reject);
writer.end();
});
// Ensure the reader is fully closed before renaming
if (!reader.destroyed) {
yield new Promise(resolve => {
reader.once('close', resolve);
reader.destroy();
});
}
// Delete original file first to avoid EPERM on Windows
yield promises_1.default.unlink(datasetPath);
yield promises_1.default.rename(tempWorkPath, datasetPath);
return newLineCount;
});
this.processDistinctOn = (consumer, datasetPath) => __awaiter(this, void 0, void 0, function* () {
var _a, e_2, _b, _c;
const reader = fs_1.default.createReadStream(datasetPath);
const lineReader = readline_1.default.createInterface({ input: reader, crlfDelay: Infinity });
const { distinctOn } = consumer.options;
const { keys, resolution } = distinctOn;
const { strategy, orderBy, direction = 'asc' } = resolution;
const internalRecordFormat = OutputExecutor_1.default._getInternalRecordFormat(consumer);
const internalFields = ConsumerManager_1.default.getExpandedFields(consumer);
// Map to store the winning record for each composite key
// Key: composite key string, Value: { record: parsed object, line: original JSON line }
const winners = new Map();
try {
for (var _d = true, lineReader_2 = __asyncValues(lineReader), lineReader_2_1; lineReader_2_1 = yield lineReader_2.next(), _a = lineReader_2_1.done, !_a; _d = true) {
_c = lineReader_2_1.value;
_d = false;
const line = _c;
const record = (internalRecordFormat === 'CSV' || internalRecordFormat === 'TXT')
? LineParser_1.default._internalParseCSV(line, internalFields)
: LineParser_1.default._internalParseJSON(line);
const compositeKey = keys.map(k => { var _a; return String((_a = record[k]) !== null && _a !== void 0 ? _a : ''); }).join('|');
const existing = winners.get(compositeKey);
if (!existing) {
winners.set(compositeKey, { record, line });
continue;
}
const shouldReplace = this._shouldReplaceRecord(existing.record, record, strategy, orderBy, direction);
if (shouldReplace) {
winners.set(compositeKey, { record, line });
}
}
}
catch (e_2_1) { e_2 = { error: e_2_1 }; }
finally {
try {
if (!_d && !_a && (_b = lineReader_2.return)) yield _b.call(lineReader_2);
}
finally { if (e_2) throw e_2.error; }
}
lineReader.close();
// Write the winning records to the temp file
const tempWorkPath = datasetPath + '_tmp';
const writer = fs_1.default.createWriteStream(tempWorkPath);
for (const { line } of winners.values()) {
writer.write(line + '\n');
}
// Wait for the writer to finish before renaming
yield new Promise((resolve, reject) => {
writer.on('close', resolve);
writer.on('error', reject);
writer.end();
});
// Ensure the reader is fully closed before renaming
if (!reader.destroyed) {
yield new Promise(resolve => {
reader.once('close', resolve);
reader.destroy();
});
}
// Delete original file first to avoid EPERM on Windows
yield promises_1.default.unlink(datasetPath);
yield promises_1.default.rename(tempWorkPath, datasetPath);
return winners.size;
});
/**
* Determines if the new record should replace the existing record based on the resolution strategy
*/
this._shouldReplaceRecord = (existing, newRecord, strategy, orderBy, direction) => {
switch (strategy) {
case 'first':
return false;
case 'last':
return true;
case 'min': {
const existingVal = existing[orderBy];
const newVal = newRecord[orderBy];
const comparison = this._compareValues(newVal, existingVal);
// For 'min', we want the smallest value
// If direction is 'desc', we invert the logic (smallest becomes largest)
return direction === 'asc' ? comparison < 0 : comparison > 0;
}
case 'max': {
const existingVal = existing[orderBy];
const newVal = newRecord[orderBy];
const comparison = this._compareValues(newVal, existingVal);
// For 'max', we want the largest value
// If direction is 'desc', we invert the logic (largest becomes smallest)
return direction === 'asc' ? comparison > 0 : comparison < 0;
}
default:
return false;
}
};
/**
* Compares two values, handling numbers, strings, and dates
* Returns: negative if a < b, positive if a > b, 0 if equal
*/
this._compareValues = (a, b) => {
// Handle null/undefined
if (a == null && b == null)
return 0;
if (a == null)
return -1;
if (b == null)
return 1;
// Try numeric comparison
const numA = Number(a);
const numB = Number(b);
if (!isNaN(numA) && !isNaN(numB)) {
return numA - numB;
}
// Fall back to string comparison
return String(a).localeCompare(String(b));
};
}
}
const ConsumerExecutor = new ConsumerExecutorClass();
exports.default = ConsumerExecutor;