UNPKG

@forzalabs/remora

Version:

A powerful CLI tool for seamless data translation.

205 lines (204 loc) 12.6 kB
"use strict"; var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) { function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); } return new (P || (P = Promise))(function (resolve, reject) { function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } } function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } } function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); } step((generator = generator.apply(thisArg, _arguments || [])).next()); }); }; var __importDefault = (this && this.__importDefault) || function (mod) { return (mod && mod.__esModule) ? mod : { "default": mod }; }; Object.defineProperty(exports, "__esModule", { value: true }); const Affirm_1 = __importDefault(require("../core/Affirm")); const DriverFactory_1 = __importDefault(require("../drivers/DriverFactory")); const DeploymentPlanner_1 = __importDefault(require("./deployment/DeploymentPlanner")); const Environment_1 = __importDefault(require("./Environment")); const FileCompiler_1 = __importDefault(require("./file/FileCompiler")); const SQLCompiler_1 = __importDefault(require("./sql/SQLCompiler")); const SQLUtils_1 = __importDefault(require("./sql/SQLUtils")); const ParseManager_1 = __importDefault(require("./parsing/ParseManager")); class ProducerEngineClass { constructor() { this.compile = (producer) => { (0, Affirm_1.default)(producer, 'Invalid producer'); const source = Environment_1.default.getSource(producer.source); (0, Affirm_1.default)(source, `No source found for producer "${producer.name}" with name "${producer.source}"`); switch (source.engine) { case 'aws-redshift': case 'postgres': { const sql = SQLCompiler_1.default.compileProducer(producer, source); (0, Affirm_1.default)(sql, `Invalid SQL from compilation for producer "${producer.name}"`); return sql; } case 'aws-s3': { const columns = FileCompiler_1.default.compileProducer(producer, source); (0, Affirm_1.default)(columns, `Invalid columns from compilation for producer "${producer.name}"`); break; } default: throw new Error(`Invalid engine type "${source.engine}" for producer "${producer.name}": not implemented yet`); } }; this.deploy = (producer) => __awaiter(this, void 0, void 0, function* () { (0, Affirm_1.default)(producer, 'Invalid producer'); const source = Environment_1.default.getSource(producer.source); (0, Affirm_1.default)(source, `No source found for producer "${producer.name}" with name "${producer.source}"`); const driver = yield DriverFactory_1.default.instantiateSource(source); (0, Affirm_1.default)(driver, `No driver found for producer "${producer.name}" with driver type "${source.engine}"`); const plan = DeploymentPlanner_1.default.planProducer(producer); for (const planStep of plan) { switch (planStep.type) { case 'create-view': { const internalSchema = Environment_1.default.get('REMORA_SCHEMA'); (0, Affirm_1.default)(internalSchema, `Missing "REMORA_SCHEMA" on project settings (needed due to "${producer.name}" wanting to create a view)`); const sql = SQLCompiler_1.default.compileProducer(producer, source); const vSQL = `CREATE OR REPLACE VIEW "${internalSchema}"."${SQLUtils_1.default.viewName(producer.name)}" AS ${sql}`; yield driver.execute(vSQL); break; } default: throw new Error(`Invalid execution consumer plan step type "${planStep.type}"`); } } }); this.execute = (producer) => __awaiter(this, void 0, void 0, function* () { (0, Affirm_1.default)(producer, 'Invalid producer'); const source = Environment_1.default.getSource(producer.source); (0, Affirm_1.default)(source, `No source found for producer "${producer.name}" with name "${producer.source}"`); const driver = yield DriverFactory_1.default.instantiateSource(source); (0, Affirm_1.default)(driver, `No driver found for producer "${producer.name}" with driver type "${source.engine}"`); switch (source.engine) { case 'aws-redshift': { const sql = SQLCompiler_1.default.compileProducer(producer, source); (0, Affirm_1.default)(sql, `Invalid SQL from deployment compilation for producer "${producer.name}"`); const res = yield driver.query(sql); return { data: res.rows, dataType: 'array-of-json' }; } case 'aws-s3': { return this.readFile(producer, { readmode: 'all' }); } default: throw new Error(`Invalid engine type "${source.engine}" for producer "${producer.name}": not supported`); } }); this.readFile = (producer, options) => __awaiter(this, void 0, void 0, function* () { var _a; (0, Affirm_1.default)(producer, 'Invalid producer'); (0, Affirm_1.default)(options, 'Invalid options'); if (options.readmode === 'lines') (0, Affirm_1.default)(options.lines, 'Invalid lines'); const source = Environment_1.default.getSource(producer.source); (0, Affirm_1.default)(source, `No source found for producer "${producer.name}" with name "${producer.source}"`); const driver = yield DriverFactory_1.default.instantiateSource(source); (0, Affirm_1.default)(driver, `No driver found for producer "${producer.name}" with driver type "${source.engine}"`); const { settings: { fileKey, fileType, sheetName } } = producer; let lines = []; if (options.readmode === 'lines') lines = yield driver.readLinesInRange({ fileKey, fileType, options: { lineFrom: options.lines.from, lineTo: options.lines.to, sheetName } }); else lines = yield driver.download({ fileKey, fileType, options: { sheetName } }); switch ((_a = producer.settings.fileType) === null || _a === void 0 ? void 0 : _a.toUpperCase()) { case 'CSV': case 'TXT': return { data: lines, dataType: 'lines-of-text' }; case 'XLS': case 'XLSX': return { data: lines, dataType: 'lines-of-text' }; case 'JSONL': case 'JSON': { if (lines.length === 1) { // Attempt to handle cases where a single line might contain multiple JSON objects separated by newlines // Or if the entire file content is a single JSON array stringified. try { const parsedAsArray = JSON.parse(lines[0]); if (Array.isArray(parsedAsArray)) { return { data: parsedAsArray, dataType: 'array-of-json' }; } } catch (error) { // If parsing as array fails, proceed to split by newline console.warn('Failed to parse single line as JSON array, splitting by newline:', error); } lines = lines[0].split('\\n'); } const json = lines.filter(line => line.trim() !== '').map(x => JSON.parse(x)); return { data: json, dataType: 'array-of-json' }; } case 'XML': { // The driver's _readXmlLines method now returns an array of JSON strings. // Each string needs to be parsed into a JSON object. const json = lines.filter(line => line.trim() !== '').map(x => JSON.parse(x)); return { data: json, dataType: 'array-of-json' }; } default: throw new Error(`Invalid file type "${producer.settings.fileType}" for engine type "${source.engine}" for producer "${producer.name}": not supported`); } }); this.readSampleData = (producer_1, ...args_1) => __awaiter(this, [producer_1, ...args_1], void 0, function* (producer, sampleSize = 10, discover = false) { var _a; (0, Affirm_1.default)(producer, 'Invalid producer'); (0, Affirm_1.default)(sampleSize > 0, 'Sample size must be greater than 0'); const source = Environment_1.default.getSource(producer.source); (0, Affirm_1.default)(source, `No source found for producer "${producer.name}" with name "${producer.source}"`); let sampleData = []; switch (source.engine) { case 'aws-redshift': { const sql = `SELECT * FROM "${source.authentication['schema']}"."${producer.settings.sqlTable}" LIMIT ${sampleSize}`; (0, Affirm_1.default)(sql, `Invalid SQL from deployment compilation for producer "${producer.name}"`); const driver = yield DriverFactory_1.default.instantiateSource(source); (0, Affirm_1.default)(driver, `No driver found for producer "${producer.name}" with driver type "${source.engine}"`); const res = yield driver.query(sql); sampleData = res.rows; break; } case 'local': case 'aws-s3': { const fileData = yield this.readFile(producer, { readmode: 'lines', lines: { from: 0, to: sampleSize } }); const fileType = (_a = producer.settings.fileType) === null || _a === void 0 ? void 0 : _a.toUpperCase(); switch (fileType) { case 'CSV': case 'TXT': case 'XLS': case 'XLSX': { sampleData = ParseManager_1.default.csvLinesToJson(fileData.data, producer, discover); break; } case 'JSON': case 'JSONL': { // With JSON or JSONL the readFile function already parses the strings if (typeof fileData.data[0] === 'object') sampleData = fileData.data; else sampleData = fileData.data.map(line => JSON.parse(line)); sampleData = sampleData.slice(0, sampleSize); break; } case 'XML': { // fileData.data from readFile for XML is now an array of JSON objects if (fileData.dataType === 'array-of-json' && Array.isArray(fileData.data)) { sampleData = fileData.data; } else { // Fallback or error handling if data is not in expected format console.warn('Unexpected data format for XML in readSampleData'); sampleData = []; } sampleData = sampleData.slice(0, sampleSize); break; } default: { sampleData = fileData.data; break; } } break; } default: throw new Error(`Invalid engine type "${source.engine}" for producer "${producer.name}": not supported`); } return sampleData; }); } } const ProducerEngine = new ProducerEngineClass(); exports.default = ProducerEngine;