@forzalabs/remora
Version:
A powerful CLI tool for seamless data translation.
149 lines (148 loc) • 9.11 kB
JavaScript
"use strict";
var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
return new (P || (P = Promise))(function (resolve, reject) {
function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }
function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } }
function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); }
step((generator = generator.apply(thisArg, _arguments || [])).next());
});
};
var __importDefault = (this && this.__importDefault) || function (mod) {
return (mod && mod.__esModule) ? mod : { "default": mod };
};
Object.defineProperty(exports, "__esModule", { value: true });
const Affirm_1 = __importDefault(require("../../core/Affirm"));
const DriverFactory_1 = __importDefault(require("../../drivers/DriverFactory"));
const ConsumerEngine_1 = __importDefault(require("../consumer/ConsumerEngine"));
const PostProcessor_1 = __importDefault(require("../consumer/PostProcessor"));
const FileExporter_1 = __importDefault(require("../file/FileExporter"));
const ParseManager_1 = __importDefault(require("../parsing/ParseManager"));
const ProducerEngine_1 = __importDefault(require("../ProducerEngine"));
const SQLBuilder_1 = __importDefault(require("../sql/SQLBuilder"));
const SQLCompiler_1 = __importDefault(require("../sql/SQLCompiler"));
const ExecutionPlanner_1 = __importDefault(require("./ExecutionPlanner"));
const RequestExecutor_1 = __importDefault(require("./RequestExecutor"));
const TransformationEngine_1 = __importDefault(require("../transform/TransformationEngine"));
const JoinEngine_1 = __importDefault(require("../transform/JoinEngine"));
class ExecutionEnvironment {
constructor(consumer) {
this.run = (options) => __awaiter(this, void 0, void 0, function* () {
(0, Affirm_1.default)(this._consumer, 'Invalid consumer');
const plan = ExecutionPlanner_1.default.plan(this._consumer, options);
(0, Affirm_1.default)(plan, `Invalid execution plan`);
(0, Affirm_1.default)(plan.length > 0, `Empty execution plan`);
const start = performance.now();
const result = { shape: ConsumerEngine_1.default.getOutputShape(this._consumer), _elapsedMS: -1 };
for (const planStep of plan) {
switch (planStep.type) {
case 'compile-consumer-to-SQL': {
const sql = SQLCompiler_1.default.getConsumerReference(this._consumer);
this._envData.consumerSQL = sql;
this._envData.finalSQL = sql;
break;
}
case 'compile-execution-request-to-SQL': {
const sql = SQLBuilder_1.default.buildConsumerQuery(options);
this._envData.executionRequestSQL = sql;
this._envData.finalSQL = `WITH consumer AS (${this._envData.consumerSQL})\nSELECT * FROM consumer${this._envData.executionRequestSQL}`;
break;
}
case 'execute-SQL': {
(0, Affirm_1.default)(planStep.source, `Invalid source in execute-SQL step`);
const driver = yield DriverFactory_1.default.instantiateSource(planStep.source);
const queryData = (yield driver.query(this._envData.finalSQL)).rows;
this._storeIntermidiate(planStep, queryData);
break;
}
case 'read-file-whole': {
(0, Affirm_1.default)(planStep.producer, `Invalid producer in read-file-whole step`);
const fileData = yield ProducerEngine_1.default.readFile(planStep.producer, { readmode: 'all' });
this._storeIntermidiate(planStep, fileData.data);
break;
}
case 'read-file-lines': {
(0, Affirm_1.default)(planStep.lines, `Invalid lines in read-file-lines step`);
(0, Affirm_1.default)(planStep.producer, `Invalid producer in read-file-lines step`);
const { producer, lines: { from, to } } = planStep;
const fileData = yield ProducerEngine_1.default.readFile(producer, { readmode: 'lines', lines: { from, to } });
this._storeIntermidiate(planStep, fileData.data);
break;
}
case 'nested-field-unpacking': {
(0, Affirm_1.default)(planStep.producer, `Invalid producer in nested-field-unpacking step`);
const unpackedData = PostProcessor_1.default.unpack(this._resultingData, planStep.producer);
this._storeIntermidiate(planStep, unpackedData);
break;
}
case 'post-process-json': {
const myProdData = this._getIntermidiate(planStep);
const processedData = PostProcessor_1.default.doProjection(this._consumer, myProdData);
this._storeIntermidiate(planStep, processedData);
break;
}
case 'csv-to-json': {
(0, Affirm_1.default)(this._resultingData, 'Invalid data');
(0, Affirm_1.default)(Array.isArray(this._resultingData), 'Invalid data type, must be an array');
(0, Affirm_1.default)(planStep.producer, `Invalid producer in csv-to-json step`);
const csv = this._getIntermidiate(planStep);
const jsonData = ParseManager_1.default.csvToJson(csv, planStep.producer);
this._storeIntermidiate(planStep, jsonData);
break;
}
case 'export-file': {
(0, Affirm_1.default)(planStep.output, `Invalid output in export-file step`);
const res = yield FileExporter_1.default.export(this._consumer, planStep.output, this._resultingData);
result.fileUri = res;
break;
}
case 'apply-execution-request-to-result': {
this._resultingData = RequestExecutor_1.default.execute(this._resultingData, options);
break;
}
case 'apply-consumer-filters-on-JSON': {
this._resultingData = RequestExecutor_1.default._applyFilters(this._resultingData, this._consumer.filters.map(x => x.rule));
break;
}
case 'apply-transformations': {
this._resultingData = TransformationEngine_1.default.apply(this._consumer, this._resultingData);
break;
}
case 'join-producers-data': {
const joinedData = JoinEngine_1.default.join(this._consumer, this._producedData);
this._resultingData = joinedData;
break;
}
default: throw new Error(`Invalid execution plan step type "${planStep.type}"`);
}
}
result.data = this._resultingData;
result._elapsedMS = performance.now() - start;
return result;
});
this._storeIntermidiate = (step, data) => {
var _a, _b;
(0, Affirm_1.default)(step, 'Invalid step');
const key = (_b = (_a = step.producer) === null || _a === void 0 ? void 0 : _a.name) !== null && _b !== void 0 ? _b : '_default_';
let pData = this._producedData.find(x => x.producerKey === key);
if (!pData) {
pData = { producerKey: key, data: [] };
this._producedData.push(pData);
}
pData.data = data;
};
this._getIntermidiate = (step) => {
var _a, _b;
(0, Affirm_1.default)(step, 'Invalid step');
const key = (_b = (_a = step.producer) === null || _a === void 0 ? void 0 : _a.name) !== null && _b !== void 0 ? _b : '_default_';
const produced = this._producedData.find(x => x.producerKey === key);
(0, Affirm_1.default)(produced, `No produced dataset found for step "${step.type}" of producer "${key}".`);
return produced.data;
};
this._consumer = consumer;
this._envData = { consumerSQL: null, executionRequestSQL: null, finalSQL: null };
this._producedData = [];
this._resultingData = [];
}
}
exports.default = ExecutionEnvironment;