@forzalabs/remora
Version:
A powerful CLI tool for seamless data translation.
177 lines (176 loc) ⢠8.33 kB
JavaScript
;
var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
return new (P || (P = Promise))(function (resolve, reject) {
function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }
function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } }
function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); }
step((generator = generator.apply(thisArg, _arguments || [])).next());
});
};
var __importDefault = (this && this.__importDefault) || function (mod) {
return (mod && mod.__esModule) ? mod : { "default": mod };
};
Object.defineProperty(exports, "__esModule", { value: true });
exports.sample = void 0;
const chalk_1 = __importDefault(require("chalk"));
const ora_1 = __importDefault(require("ora"));
const Environment_1 = __importDefault(require("../engines/Environment"));
const ProducerEngine_1 = __importDefault(require("../engines/producer/ProducerEngine"));
const DatasetRecord_1 = __importDefault(require("../engines/dataset/DatasetRecord"));
const compile_1 = require("./compile");
const Helper_1 = __importDefault(require("../helper/Helper"));
const sample = (resourceName_1, ...args_1) => __awaiter(void 0, [resourceName_1, ...args_1], void 0, function* (resourceName, sampleSize = 10) {
try {
(0, compile_1.compile)();
console.log(); // needed for newline
const spinner = (0, ora_1.default)(chalk_1.default.blue('Sampling dataset...')).start();
// Try to find the resource as a producer first, then as a consumer
const producer = Environment_1.default.getProducer(resourceName);
const consumer = Environment_1.default.getConsumer(resourceName);
if (!producer && !consumer) {
spinner.fail(chalk_1.default.red(`Resource "${resourceName}" not found. Please check if it exists as a producer or consumer.`));
process.exit(1);
}
let sampleData;
let resourceType;
if (producer) {
resourceType = 'Producer';
spinner.text = chalk_1.default.blue(`Sampling from producer "${resourceName}"...`);
sampleData = yield ProducerEngine_1.default.readSampleData(producer, sampleSize, false);
}
else {
resourceType = 'Consumer';
spinner.text = chalk_1.default.blue(`Sampling from consumer "${resourceName}"...`);
sampleData = yield sampleFromConsumer(consumer, sampleSize);
}
spinner.succeed(chalk_1.default.green(`Sample data retrieved from ${resourceType.toLowerCase()} "${resourceName}"`));
if (sampleData.length === 0) {
console.log(chalk_1.default.yellow('No data found in the dataset.'));
return;
}
// Display the sample data
console.log(chalk_1.default.cyan(`\nš Sample Data (showing ${sampleData.length} rows):`));
console.log(chalk_1.default.gray('ā'.repeat(80)));
displayDataAsTable(sampleData);
console.log(chalk_1.default.gray('ā'.repeat(80)));
console.log(chalk_1.default.green(`ā
Successfully sampled ${sampleData.length} rows from ${resourceType.toLowerCase()} "${resourceName}"`));
}
catch (err) {
const myErr = Helper_1.default.asError(err);
console.error(chalk_1.default.red.bold('\nā Error during sampling:'), myErr.message);
if (Helper_1.default.isDev())
console.log(myErr.stack);
process.exit(1);
}
});
exports.sample = sample;
const sampleFromConsumer = (consumer, sampleSize) => __awaiter(void 0, void 0, void 0, function* () {
// For consumers, we need to get sample data from the first producer
// and then apply the consumer's field mappings to show what the output would look like
const firstProducerRef = consumer.producers[0];
if (!firstProducerRef) {
throw new Error(`Consumer "${consumer.name}" has no producers configured`);
}
const producer = Environment_1.default.getProducer(firstProducerRef.name);
if (!producer) {
const subConsumer = Environment_1.default.getConsumer(firstProducerRef.name);
if (!subConsumer) {
throw new Error(`Producer or consumer "${firstProducerRef.name}" not found for consumer "${consumer.name}"`);
}
// If it's a consumer that references another consumer, sample from that consumer
return yield sampleFromConsumer(subConsumer, sampleSize);
}
// Get raw sample data from the producer
const rawSampleData = yield ProducerEngine_1.default.readSampleData(producer, sampleSize, false);
// For consumers with wildcard fields ("*"), return all data as-is
const hasWildcard = consumer.fields.some(field => field.key === '*');
if (hasWildcard) {
return rawSampleData;
}
// For consumers with specific field mappings, show only the mapped fields
// This gives users a preview of what the consumer output would look like
const mappedData = rawSampleData.map(record => {
const mappedRecord = new DatasetRecord_1.default('', [], record._delimiter);
consumer.fields.forEach(field => {
if (field.key !== '*') {
const sourceValue = record.getValue(field.key);
const outputKey = field.alias || field.key;
mappedRecord.setValue(outputKey, sourceValue);
}
});
return mappedRecord;
});
return mappedData;
});
const displayDataAsTable = (data) => {
if (data.length === 0)
return;
// Get all unique field names from the sample data
const allFields = new Set();
data.forEach(record => {
Object.keys(record._value).forEach(key => allFields.add(key));
});
const fields = Array.from(allFields);
// Calculate column widths
const columnWidths = {};
// Start with header widths
fields.forEach(field => {
columnWidths[field] = field.length;
});
// Check data widths
data.forEach(record => {
fields.forEach(field => {
const value = record._value[field];
const displayValue = formatValue(value);
columnWidths[field] = Math.max(columnWidths[field], displayValue.length);
});
});
// Limit column width to prevent overly wide tables
const maxColumnWidth = 30;
fields.forEach(field => {
columnWidths[field] = Math.min(columnWidths[field], maxColumnWidth);
});
// Print header
const headerRow = fields.map(field => chalk_1.default.bold(field.padEnd(columnWidths[field]))).join(' ā ');
console.log('ā ' + headerRow + ' ā');
// Print separator
const separator = fields.map(field => 'ā'.repeat(columnWidths[field])).join('āā¼ā');
console.log('āā' + separator + 'āā¤');
// Print data rows
data.forEach((record, index) => {
const dataRow = fields.map(field => {
const value = record._value[field];
const displayValue = formatValue(value);
const truncatedValue = displayValue.length > maxColumnWidth
? displayValue.substring(0, maxColumnWidth - 3) + '...'
: displayValue;
return truncatedValue.padEnd(columnWidths[field]);
}).join(' ā ');
// Alternate row colors for better readability
if (index % 2 === 0) {
console.log('ā ' + chalk_1.default.white(dataRow) + ' ā');
}
else {
console.log('ā ' + chalk_1.default.gray(dataRow) + ' ā');
}
});
};
const formatValue = (value) => {
if (value === null || value === undefined) {
return chalk_1.default.dim('null');
}
if (typeof value === 'string') {
return value;
}
if (typeof value === 'number') {
return chalk_1.default.cyan(value.toString());
}
if (typeof value === 'boolean') {
return chalk_1.default.yellow(value.toString());
}
if (value instanceof Date) {
return chalk_1.default.magenta(value.toISOString());
}
return chalk_1.default.dim(JSON.stringify(value));
};