@forzalabs/remora
Version:
A powerful CLI tool for seamless data translation.
123 lines (122 loc) • 7.01 kB
JavaScript
;
var __importDefault = (this && this.__importDefault) || function (mod) {
return (mod && mod.__esModule) ? mod : { "default": mod };
};
Object.defineProperty(exports, "__esModule", { value: true });
const Affirm_1 = __importDefault(require("../../core/Affirm"));
const Algo_1 = __importDefault(require("../../core/Algo"));
const ConsumerManager_1 = __importDefault(require("../consumer/ConsumerManager"));
const Environment_1 = __importDefault(require("../Environment"));
class ExecutionPlannerClas {
constructor() {
this.getEngineClass = (engine) => {
switch (engine) {
case 'aws-dynamodb': return 'no-sql';
case 'aws-redshift':
case 'postgres': return 'sql';
case 'aws-s3': return 'file';
case 'local': return 'local';
}
};
this.plan = (consumer, options) => {
(0, Affirm_1.default)(consumer, 'Invalid consumer');
const producersPlan = this._planProducers(consumer, options);
const plan = [...producersPlan];
// At this point I have the data loaded in memory
// TODO: can I handle streaming data? (e.g. a file that is too big to fit in memory)
// TODO: how to handle pagination of SQL results?
// Apply the transormations to the fields of the consumer
// TODO: transformations can also be applied directly to the producer... how???
if (consumer.fields.some(x => Algo_1.default.hasVal(x.transform)))
plan.push({ type: 'apply-transformations' });
const [source] = ConsumerManager_1.default.getSource(consumer);
const engineClass = this.getEngineClass(source.engine);
for (const output of consumer.outputs) {
switch (output.format.toUpperCase()) {
case 'JSON': {
if (engineClass === 'file' && Algo_1.default.hasVal(options))
plan.push({ type: 'apply-execution-request-to-result' });
// TODO: test if it is needed and if it doesn't break soething else
if (engineClass === 'sql')
plan.push({ type: 'post-process-json' });
plan.push({ type: 'export-file', output });
break;
}
case 'CSV':
case 'PARQUET': {
plan.push({ type: 'export-file', output });
break;
}
case 'API': {
if (engineClass === 'file' && Algo_1.default.hasVal(options))
plan.push({ type: 'apply-execution-request-to-result' });
break;
}
case 'SQL': {
// TODO: what should I do here?? do I need to do anything?
break;
}
default:
throw new Error(`Output format "${output.format}" not supported`);
}
}
return plan;
};
this._planProducers = (consumer, options) => {
(0, Affirm_1.default)(consumer, 'Invalid consumer');
const producers = consumer.producers.map(x => Environment_1.default.getProducer(x.name));
(0, Affirm_1.default)(producers, `Invalid producers on consumer "${consumer.name}"`);
(0, Affirm_1.default)(producers.every(x => Algo_1.default.hasVal(x)), `One or more producers of consumer "${consumer.name}" not found.`);
const sources = producers.map(x => Environment_1.default.getSource(x.source));
(0, Affirm_1.default)(sources, `Invalid sources on consumer "${consumer.name}"`);
(0, Affirm_1.default)(sources.every(x => Algo_1.default.hasVal(x)), `One or more sources of consumer "${consumer.name}" not found.`);
const engineClasses = sources.map(x => this.getEngineClass(x.engine));
const uniqEngineClasses = Algo_1.default.uniq(engineClasses);
const plan = [];
if (uniqEngineClasses.length === 1 && uniqEngineClasses[0] === 'sql')
plan.push(...this._planProducer(producers[0], options));
else
plan.push(...(producers.flatMap(x => this._planProducer(x, options))));
plan.push({ type: 'join-producers-data' });
if (consumer.filters && consumer.filters.length > 0)
plan.push({ type: 'apply-consumer-filters-on-JSON' });
return plan;
};
this._planProducer = (producer, options) => {
var _a, _b;
(0, Affirm_1.default)(producer, 'Invalid producer');
const source = Environment_1.default.getSource(producer.source);
(0, Affirm_1.default)(source, `Source "${producer.source}" of producer "${producer.name}" not found.`);
const plan = [];
const producerEngine = source.engine;
switch (producerEngine) {
case 'postgres':
case 'aws-redshift': {
plan.push({ type: 'compile-consumer-to-SQL', producer });
if (Algo_1.default.hasVal(options))
plan.push({ type: 'compile-execution-request-to-SQL', producer });
plan.push({ type: 'execute-SQL', source: source, producer });
break;
}
case 'local':
case 'aws-s3': {
if (Algo_1.default.hasVal(options) && (options.limit || options.offset))
plan.push({ type: 'read-file-lines', producer, lines: { from: (_a = options.offset) !== null && _a !== void 0 ? _a : 0, to: options.limit ? (options.offset + options.limit) : undefined } });
else
plan.push({ type: 'read-file-whole', producer });
const fileType = (_b = producer.settings.fileType) === null || _b === void 0 ? void 0 : _b.toUpperCase();
if (fileType === 'CSV' || fileType === 'TXT' || fileType === 'XLS' || fileType === 'XLSX')
plan.push({ type: 'csv-to-json', producer });
if (producer.dimensions.some(x => { var _a, _b; return ((_a = x.alias) === null || _a === void 0 ? void 0 : _a.includes('{')) || ((_b = x.alias) === null || _b === void 0 ? void 0 : _b.includes('[')); }))
plan.push({ type: 'nested-field-unpacking', producer });
plan.push({ type: 'post-process-json', producer });
break;
}
default: throw new Error(`Engine "${producerEngine}" not supported`);
}
return plan;
};
}
}
const ExecutionPlanner = new ExecutionPlannerClas();
exports.default = ExecutionPlanner;