@forzalabs/remora
Version:
A powerful CLI tool for seamless data translation.
109 lines (108 loc) • 5.83 kB
JavaScript
;
var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
return new (P || (P = Promise))(function (resolve, reject) {
function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }
function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } }
function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); }
step((generator = generator.apply(thisArg, _arguments || [])).next());
});
};
var __importDefault = (this && this.__importDefault) || function (mod) {
return (mod && mod.__esModule) ? mod : { "default": mod };
};
Object.defineProperty(exports, "__esModule", { value: true });
const Affirm_1 = __importDefault(require("../core/Affirm"));
const DriverFactory_1 = __importDefault(require("../drivers/DriverFactory"));
const Environment_1 = __importDefault(require("../engines/Environment"));
const CSVParser_1 = __importDefault(require("../engines/parsing/CSVParser"));
const ProducerManager_1 = __importDefault(require("../engines/producer/ProducerManager"));
const Algo_1 = __importDefault(require("../core/Algo"));
const LineParser_1 = __importDefault(require("../engines/parsing/LineParser"));
const CryptoEngine_1 = __importDefault(require("../engines/CryptoEngine"));
class ProducerExecutorClass {
constructor() {
this.ready = (producer, scope) => __awaiter(this, void 0, void 0, function* () {
(0, Affirm_1.default)(producer, 'Invalid producer');
const source = Environment_1.default.getSource(producer.source);
(0, Affirm_1.default)(source, `Invalid source ${producer.source} on producer ${producer.name}`);
const driver = yield DriverFactory_1.default.instantiateSource(source);
return yield driver.ready({ producer, scope });
});
this.processHeader = (line, producer) => {
const { settings: { fileType, hasHeaderRow, delimiter } } = producer;
switch (fileType) {
case 'PARQUET':
case 'XML':
case 'XLS':
case 'XLSX':
case 'CSV': {
const parts = CSVParser_1.default.parseRow(line, delimiter);
return parts;
}
case 'TXT': {
if (hasHeaderRow) {
const parts = CSVParser_1.default.parseRow(line, delimiter);
return parts;
}
else {
return producer.dimensions.map(x => { var _a; return (_a = x.alias) !== null && _a !== void 0 ? _a : x.name; });
}
}
case 'JSON':
case 'JSONL': {
const keys = Object.keys(JSON.parse(line));
return keys;
}
}
};
this.reconcileHeader = (header, producer) => {
var _a;
const myHeader = [...header];
const producerDimensions = producer.dimensions;
if (producerDimensions.some(x => x.sourceFilename === true))
myHeader.push(...producerDimensions.filter(x => x.sourceFilename === true).map(x => x.name));
const dimensions = [];
for (const dimension of producerDimensions) {
const key = (_a = dimension.alias) !== null && _a !== void 0 ? _a : dimension.name;
const index = myHeader.findIndex(x => x === key);
if (index < 0)
throw new Error(`The dimension "${dimension.name}" (with key "${key}") of producer "${producer.name}" doesn't exist in the underlying dataset.`);
dimensions.push({
index,
name: dimension.name,
prodDimension: dimension
});
}
return dimensions;
};
this.processLine = (options) => {
var _a;
const { line, dimensions, index, producer, tracker } = options;
Affirm_1.default.hasValue(line, 'Invalid line');
Affirm_1.default.hasValue(index, 'Invalid index');
(0, Affirm_1.default)(producer, 'Invalid producer');
if (!line)
return null;
// 1. map the underlying aliases to the new names AND cast to the correct type
let counter = performance.now();
const record = LineParser_1.default.parse(line.trim(), producer, dimensions, tracker);
tracker.measure('process-line:line-parse', performance.now() - counter);
counter = performance.now();
for (const dimension of dimensions) {
// 2. apply source file name
// TODO: to replace with the actual full filename and not just the fileKey
if (dimension.prodDimension.sourceFilename === true)
record[dimension.name] = producer.settings.fileKey;
// 3. mask
const maskType = ProducerManager_1.default.getMask(dimension.prodDimension);
if (Algo_1.default.hasVal(maskType))
record[dimension.name] = CryptoEngine_1.default.hashValue(maskType, (_a = record[dimension.name]) === null || _a === void 0 ? void 0 : _a.toString(), dimension.prodDimension.type);
}
tracker.measure('process-line:dimensions-filename-mask', performance.now() - counter);
return record;
};
}
}
const ProducerExecutor = new ProducerExecutorClass();
exports.default = ProducerExecutor;