UNPKG

@forzalabs/remora

Version:

A powerful CLI tool for seamless data translation.

123 lines (122 loc) 7.01 kB
"use strict"; var __importDefault = (this && this.__importDefault) || function (mod) { return (mod && mod.__esModule) ? mod : { "default": mod }; }; Object.defineProperty(exports, "__esModule", { value: true }); const Affirm_1 = __importDefault(require("../../core/Affirm")); const Algo_1 = __importDefault(require("../../core/Algo")); const ConsumerManager_1 = __importDefault(require("../consumer/ConsumerManager")); const Environment_1 = __importDefault(require("../Environment")); class ExecutionPlannerClas { constructor() { this.getEngineClass = (engine) => { switch (engine) { case 'aws-dynamodb': return 'no-sql'; case 'aws-redshift': case 'postgres': return 'sql'; case 'aws-s3': return 'file'; case 'local': return 'local'; } }; this.plan = (consumer, options) => { (0, Affirm_1.default)(consumer, 'Invalid consumer'); const producersPlan = this._planProducers(consumer, options); const plan = [...producersPlan]; // At this point I have the data loaded in memory // TODO: can I handle streaming data? (e.g. a file that is too big to fit in memory) // TODO: how to handle pagination of SQL results? // Apply the transormations to the fields of the consumer // TODO: transformations can also be applied directly to the producer... how??? if (consumer.fields.some(x => Algo_1.default.hasVal(x.transform))) plan.push({ type: 'apply-transformations' }); const [source] = ConsumerManager_1.default.getSource(consumer); const engineClass = this.getEngineClass(source.engine); for (const output of consumer.outputs) { switch (output.format.toUpperCase()) { case 'JSON': { if (engineClass === 'file' && Algo_1.default.hasVal(options)) plan.push({ type: 'apply-execution-request-to-result' }); // TODO: test if it is needed and if it doesn't break soething else if (engineClass === 'sql') plan.push({ type: 'post-process-json' }); plan.push({ type: 'export-file', output }); break; } case 'CSV': case 'PARQUET': { plan.push({ type: 'export-file', output }); break; } case 'API': { if (engineClass === 'file' && Algo_1.default.hasVal(options)) plan.push({ type: 'apply-execution-request-to-result' }); break; } case 'SQL': { // TODO: what should I do here?? do I need to do anything? break; } default: throw new Error(`Output format "${output.format}" not supported`); } } return plan; }; this._planProducers = (consumer, options) => { (0, Affirm_1.default)(consumer, 'Invalid consumer'); const producers = consumer.producers.map(x => Environment_1.default.getProducer(x.name)); (0, Affirm_1.default)(producers, `Invalid producers on consumer "${consumer.name}"`); (0, Affirm_1.default)(producers.every(x => Algo_1.default.hasVal(x)), `One or more producers of consumer "${consumer.name}" not found.`); const sources = producers.map(x => Environment_1.default.getSource(x.source)); (0, Affirm_1.default)(sources, `Invalid sources on consumer "${consumer.name}"`); (0, Affirm_1.default)(sources.every(x => Algo_1.default.hasVal(x)), `One or more sources of consumer "${consumer.name}" not found.`); const engineClasses = sources.map(x => this.getEngineClass(x.engine)); const uniqEngineClasses = Algo_1.default.uniq(engineClasses); const plan = []; if (uniqEngineClasses.length === 1 && uniqEngineClasses[0] === 'sql') plan.push(...this._planProducer(producers[0], options)); else plan.push(...(producers.flatMap(x => this._planProducer(x, options)))); plan.push({ type: 'join-producers-data' }); if (consumer.filters && consumer.filters.length > 0) plan.push({ type: 'apply-consumer-filters-on-JSON' }); return plan; }; this._planProducer = (producer, options) => { var _a, _b; (0, Affirm_1.default)(producer, 'Invalid producer'); const source = Environment_1.default.getSource(producer.source); (0, Affirm_1.default)(source, `Source "${producer.source}" of producer "${producer.name}" not found.`); const plan = []; const producerEngine = source.engine; switch (producerEngine) { case 'postgres': case 'aws-redshift': { plan.push({ type: 'compile-consumer-to-SQL', producer }); if (Algo_1.default.hasVal(options)) plan.push({ type: 'compile-execution-request-to-SQL', producer }); plan.push({ type: 'execute-SQL', source: source, producer }); break; } case 'local': case 'aws-s3': { if (Algo_1.default.hasVal(options) && (options.limit || options.offset)) plan.push({ type: 'read-file-lines', producer, lines: { from: (_a = options.offset) !== null && _a !== void 0 ? _a : 0, to: options.limit ? (options.offset + options.limit) : undefined } }); else plan.push({ type: 'read-file-whole', producer }); const fileType = (_b = producer.settings.fileType) === null || _b === void 0 ? void 0 : _b.toUpperCase(); if (fileType === 'CSV' || fileType === 'TXT' || fileType === 'XLS' || fileType === 'XLSX') plan.push({ type: 'csv-to-json', producer }); if (producer.dimensions.some(x => { var _a, _b; return ((_a = x.alias) === null || _a === void 0 ? void 0 : _a.includes('{')) || ((_b = x.alias) === null || _b === void 0 ? void 0 : _b.includes('[')); })) plan.push({ type: 'nested-field-unpacking', producer }); plan.push({ type: 'post-process-json', producer }); break; } default: throw new Error(`Engine "${producerEngine}" not supported`); } return plan; }; } } const ExecutionPlanner = new ExecutionPlannerClas(); exports.default = ExecutionPlanner;