@forzalabs/remora
Version:
A powerful CLI tool for seamless data translation.
128 lines (127 loc) • 7.66 kB
JavaScript
;
var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
return new (P || (P = Promise))(function (resolve, reject) {
function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }
function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } }
function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); }
step((generator = generator.apply(thisArg, _arguments || [])).next());
});
};
var __importDefault = (this && this.__importDefault) || function (mod) {
return (mod && mod.__esModule) ? mod : { "default": mod };
};
Object.defineProperty(exports, "__esModule", { value: true });
const Affirm_1 = __importDefault(require("../../core/Affirm"));
const DriverFactory_1 = __importDefault(require("../../drivers/DriverFactory"));
const Environment_1 = __importDefault(require("../Environment"));
const DatasetManager_1 = __importDefault(require("../dataset/DatasetManager"));
const Logger_1 = __importDefault(require("../../helper/Logger"));
class ProducerEngineClass {
constructor() {
this.readFile = (producer, options) => __awaiter(this, void 0, void 0, function* () {
var _a, _b, _c;
(0, Affirm_1.default)(producer, 'Invalid producer');
(0, Affirm_1.default)(options, 'Invalid options');
if (options.readmode === 'lines')
(0, Affirm_1.default)(options.lines, 'Invalid lines');
const source = Environment_1.default.getSource(producer.source);
(0, Affirm_1.default)(source, `No source found for producer "${producer.name}" with name "${producer.source}"`);
const driver = yield DriverFactory_1.default.instantiateSource(source);
(0, Affirm_1.default)(driver, `No driver found for producer "${producer.name}" with driver type "${source.engine}"`);
const { settings: { fileKey, fileType, sheetName, hasHeaderRow } } = producer;
let dataset = DatasetManager_1.default.create(producer);
let lines = [];
switch (options.readmode) {
case 'lines':
lines = yield driver.readLinesInRange({
fileKey,
fileType,
options: { lineFrom: options.lines.from, lineTo: options.lines.to, sheetName, hasHeaderRow },
httpApi: (_a = producer.settings) === null || _a === void 0 ? void 0 : _a.httpApi
});
break;
case 'all':
lines = yield driver.readAll({
fileKey, fileType,
options: { sheetName, hasHeaderRow },
httpApi: (_b = producer.settings) === null || _b === void 0 ? void 0 : _b.httpApi
});
break;
case 'download':
dataset = yield driver.download(dataset);
break;
}
switch ((_c = producer.settings.fileType) === null || _c === void 0 ? void 0 : _c.toUpperCase()) {
case 'CSV':
case 'TXT':
return { data: lines, dataset, dataType: 'lines-of-text' };
case 'XLS':
case 'XLSX':
return { data: lines, dataset, dataType: 'lines-of-text' };
case 'PARQUET':
case 'JSONL':
case 'JSON': {
if (lines.length === 1) {
// Attempt to handle cases where a single line might contain multiple JSON objects separated by newlines
// Or if the entire file content is a single JSON array stringified.
try {
const parsedAsArray = JSON.parse(lines[0]);
if (Array.isArray(parsedAsArray)) {
return { data: parsedAsArray, dataset, dataType: 'array-of-json' };
}
}
catch (error) {
// If parsing as array fails, proceed to split by newline
console.warn('Failed to parse single line as JSON array, splitting by newline:', error);
}
lines = lines[0].split('\\n');
}
const json = lines.filter(line => line.trim() !== '').map(x => JSON.parse(x));
return { data: json, dataset, dataType: 'array-of-json' };
}
case 'XML': {
// The driver's _readXmlLines method now returns an array of JSON strings.
// Each string needs to be parsed into a JSON object.
const json = lines.filter(line => line.trim() !== '').map(x => JSON.parse(x));
return { data: json, dataset, dataType: 'array-of-json' };
}
default:
throw new Error(`Invalid file type "${producer.settings.fileType}" for engine type "${source.engine}" for producer "${producer.name}": not supported`);
}
});
this.readSampleData = (producer_1, ...args_1) => __awaiter(this, [producer_1, ...args_1], void 0, function* (producer, sampleSize = 10, discover = false) {
(0, Affirm_1.default)(producer, 'Invalid producer');
(0, Affirm_1.default)(sampleSize > 0, 'Sample size must be greater than 0');
const source = Environment_1.default.getSource(producer.source);
(0, Affirm_1.default)(source, `No source found for producer "${producer.name}" with name "${producer.source}"`);
let dataset = DatasetManager_1.default.create(producer);
switch (source.engine) {
case 'aws-redshift': {
const sql = `SELECT * FROM "${source.authentication['schema']}"."${producer.settings.sqlTable}" LIMIT ${sampleSize}`;
(0, Affirm_1.default)(sql, `Invalid SQL from deployment compilation for producer "${producer.name}"`);
const driver = yield DriverFactory_1.default.instantiateSource(source);
(0, Affirm_1.default)(driver, `No driver found for producer "${producer.name}" with driver type "${source.engine}"`);
const res = yield driver.query(sql);
dataset = yield dataset.loadFromMemory(res.rows, producer, discover);
break;
}
case 'local':
case 'aws-s3':
case 'delta-share': {
const fileData = yield this.readFile(producer, { readmode: 'lines', lines: { from: 0, to: sampleSize } });
dataset = yield dataset.loadFromMemory(fileData.data, producer, discover);
break;
}
default:
throw new Error(`Invalid engine type "${source.engine}" for producer "${producer.name}": not supported`);
}
const sampleData = [...yield dataset.readLines(sampleSize)];
dataset.destroy();
Logger_1.default.log(`Finished reading sample dataset:\n${dataset.printStats()}`);
return sampleData;
});
}
}
const ProducerEngine = new ProducerEngineClass();
exports.default = ProducerEngine;