@forzalabs/remora
Version:
A powerful CLI tool for seamless data translation.
205 lines (204 loc) • 12.6 kB
JavaScript
;
var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
return new (P || (P = Promise))(function (resolve, reject) {
function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }
function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } }
function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); }
step((generator = generator.apply(thisArg, _arguments || [])).next());
});
};
var __importDefault = (this && this.__importDefault) || function (mod) {
return (mod && mod.__esModule) ? mod : { "default": mod };
};
Object.defineProperty(exports, "__esModule", { value: true });
const Affirm_1 = __importDefault(require("../core/Affirm"));
const DriverFactory_1 = __importDefault(require("../drivers/DriverFactory"));
const DeploymentPlanner_1 = __importDefault(require("./deployment/DeploymentPlanner"));
const Environment_1 = __importDefault(require("./Environment"));
const FileCompiler_1 = __importDefault(require("./file/FileCompiler"));
const SQLCompiler_1 = __importDefault(require("./sql/SQLCompiler"));
const SQLUtils_1 = __importDefault(require("./sql/SQLUtils"));
const ParseManager_1 = __importDefault(require("./parsing/ParseManager"));
class ProducerEngineClass {
constructor() {
this.compile = (producer) => {
(0, Affirm_1.default)(producer, 'Invalid producer');
const source = Environment_1.default.getSource(producer.source);
(0, Affirm_1.default)(source, `No source found for producer "${producer.name}" with name "${producer.source}"`);
switch (source.engine) {
case 'aws-redshift':
case 'postgres': {
const sql = SQLCompiler_1.default.compileProducer(producer, source);
(0, Affirm_1.default)(sql, `Invalid SQL from compilation for producer "${producer.name}"`);
return sql;
}
case 'aws-s3': {
const columns = FileCompiler_1.default.compileProducer(producer, source);
(0, Affirm_1.default)(columns, `Invalid columns from compilation for producer "${producer.name}"`);
break;
}
default: throw new Error(`Invalid engine type "${source.engine}" for producer "${producer.name}": not implemented yet`);
}
};
this.deploy = (producer) => __awaiter(this, void 0, void 0, function* () {
(0, Affirm_1.default)(producer, 'Invalid producer');
const source = Environment_1.default.getSource(producer.source);
(0, Affirm_1.default)(source, `No source found for producer "${producer.name}" with name "${producer.source}"`);
const driver = yield DriverFactory_1.default.instantiateSource(source);
(0, Affirm_1.default)(driver, `No driver found for producer "${producer.name}" with driver type "${source.engine}"`);
const plan = DeploymentPlanner_1.default.planProducer(producer);
for (const planStep of plan) {
switch (planStep.type) {
case 'create-view': {
const internalSchema = Environment_1.default.get('REMORA_SCHEMA');
(0, Affirm_1.default)(internalSchema, `Missing "REMORA_SCHEMA" on project settings (needed due to "${producer.name}" wanting to create a view)`);
const sql = SQLCompiler_1.default.compileProducer(producer, source);
const vSQL = `CREATE OR REPLACE VIEW "${internalSchema}"."${SQLUtils_1.default.viewName(producer.name)}" AS ${sql}`;
yield driver.execute(vSQL);
break;
}
default: throw new Error(`Invalid execution consumer plan step type "${planStep.type}"`);
}
}
});
this.execute = (producer) => __awaiter(this, void 0, void 0, function* () {
(0, Affirm_1.default)(producer, 'Invalid producer');
const source = Environment_1.default.getSource(producer.source);
(0, Affirm_1.default)(source, `No source found for producer "${producer.name}" with name "${producer.source}"`);
const driver = yield DriverFactory_1.default.instantiateSource(source);
(0, Affirm_1.default)(driver, `No driver found for producer "${producer.name}" with driver type "${source.engine}"`);
switch (source.engine) {
case 'aws-redshift': {
const sql = SQLCompiler_1.default.compileProducer(producer, source);
(0, Affirm_1.default)(sql, `Invalid SQL from deployment compilation for producer "${producer.name}"`);
const res = yield driver.query(sql);
return { data: res.rows, dataType: 'array-of-json' };
}
case 'aws-s3': {
return this.readFile(producer, { readmode: 'all' });
}
default: throw new Error(`Invalid engine type "${source.engine}" for producer "${producer.name}": not supported`);
}
});
this.readFile = (producer, options) => __awaiter(this, void 0, void 0, function* () {
var _a;
(0, Affirm_1.default)(producer, 'Invalid producer');
(0, Affirm_1.default)(options, 'Invalid options');
if (options.readmode === 'lines')
(0, Affirm_1.default)(options.lines, 'Invalid lines');
const source = Environment_1.default.getSource(producer.source);
(0, Affirm_1.default)(source, `No source found for producer "${producer.name}" with name "${producer.source}"`);
const driver = yield DriverFactory_1.default.instantiateSource(source);
(0, Affirm_1.default)(driver, `No driver found for producer "${producer.name}" with driver type "${source.engine}"`);
const { settings: { fileKey, fileType, sheetName } } = producer;
let lines = [];
if (options.readmode === 'lines')
lines = yield driver.readLinesInRange({ fileKey, fileType, options: { lineFrom: options.lines.from, lineTo: options.lines.to, sheetName } });
else
lines = yield driver.download({ fileKey, fileType, options: { sheetName } });
switch ((_a = producer.settings.fileType) === null || _a === void 0 ? void 0 : _a.toUpperCase()) {
case 'CSV':
case 'TXT':
return { data: lines, dataType: 'lines-of-text' };
case 'XLS':
case 'XLSX':
return { data: lines, dataType: 'lines-of-text' };
case 'JSONL':
case 'JSON': {
if (lines.length === 1) {
// Attempt to handle cases where a single line might contain multiple JSON objects separated by newlines
// Or if the entire file content is a single JSON array stringified.
try {
const parsedAsArray = JSON.parse(lines[0]);
if (Array.isArray(parsedAsArray)) {
return { data: parsedAsArray, dataType: 'array-of-json' };
}
}
catch (error) {
// If parsing as array fails, proceed to split by newline
console.warn('Failed to parse single line as JSON array, splitting by newline:', error);
}
lines = lines[0].split('\\n');
}
const json = lines.filter(line => line.trim() !== '').map(x => JSON.parse(x));
return { data: json, dataType: 'array-of-json' };
}
case 'XML': {
// The driver's _readXmlLines method now returns an array of JSON strings.
// Each string needs to be parsed into a JSON object.
const json = lines.filter(line => line.trim() !== '').map(x => JSON.parse(x));
return { data: json, dataType: 'array-of-json' };
}
default:
throw new Error(`Invalid file type "${producer.settings.fileType}" for engine type "${source.engine}" for producer "${producer.name}": not supported`);
}
});
this.readSampleData = (producer_1, ...args_1) => __awaiter(this, [producer_1, ...args_1], void 0, function* (producer, sampleSize = 10, discover = false) {
var _a;
(0, Affirm_1.default)(producer, 'Invalid producer');
(0, Affirm_1.default)(sampleSize > 0, 'Sample size must be greater than 0');
const source = Environment_1.default.getSource(producer.source);
(0, Affirm_1.default)(source, `No source found for producer "${producer.name}" with name "${producer.source}"`);
let sampleData = [];
switch (source.engine) {
case 'aws-redshift': {
const sql = `SELECT * FROM "${source.authentication['schema']}"."${producer.settings.sqlTable}" LIMIT ${sampleSize}`;
(0, Affirm_1.default)(sql, `Invalid SQL from deployment compilation for producer "${producer.name}"`);
const driver = yield DriverFactory_1.default.instantiateSource(source);
(0, Affirm_1.default)(driver, `No driver found for producer "${producer.name}" with driver type "${source.engine}"`);
const res = yield driver.query(sql);
sampleData = res.rows;
break;
}
case 'local':
case 'aws-s3': {
const fileData = yield this.readFile(producer, { readmode: 'lines', lines: { from: 0, to: sampleSize } });
const fileType = (_a = producer.settings.fileType) === null || _a === void 0 ? void 0 : _a.toUpperCase();
switch (fileType) {
case 'CSV':
case 'TXT':
case 'XLS':
case 'XLSX': {
sampleData = ParseManager_1.default.csvLinesToJson(fileData.data, producer, discover);
break;
}
case 'JSON':
case 'JSONL': {
// With JSON or JSONL the readFile function already parses the strings
if (typeof fileData.data[0] === 'object')
sampleData = fileData.data;
else
sampleData = fileData.data.map(line => JSON.parse(line));
sampleData = sampleData.slice(0, sampleSize);
break;
}
case 'XML': {
// fileData.data from readFile for XML is now an array of JSON objects
if (fileData.dataType === 'array-of-json' && Array.isArray(fileData.data)) {
sampleData = fileData.data;
}
else {
// Fallback or error handling if data is not in expected format
console.warn('Unexpected data format for XML in readSampleData');
sampleData = [];
}
sampleData = sampleData.slice(0, sampleSize);
break;
}
default: {
sampleData = fileData.data;
break;
}
}
break;
}
default:
throw new Error(`Invalid engine type "${source.engine}" for producer "${producer.name}": not supported`);
}
return sampleData;
});
}
}
const ProducerEngine = new ProducerEngineClass();
exports.default = ProducerEngine;