@forzalabs/remora
Version:
A powerful CLI tool for seamless data translation.
217 lines (216 loc) • 13.7 kB
JavaScript
;
var __importDefault = (this && this.__importDefault) || function (mod) {
return (mod && mod.__esModule) ? mod : { "default": mod };
};
Object.defineProperty(exports, "__esModule", { value: true });
const Affirm_1 = __importDefault(require("../../core/Affirm"));
const Algo_1 = __importDefault(require("../../core/Algo"));
const ConsumerManager_1 = __importDefault(require("../consumer/ConsumerManager"));
const Environment_1 = __importDefault(require("../Environment"));
class ValidatorClass {
constructor() {
this.validateSources = (sources) => {
(0, Affirm_1.default)(sources, 'Invalid sources');
const errors = [];
try {
const dupes = Algo_1.default.duplicatesObject(sources, 'name');
if (dupes.length > 0)
errors.push(`Duplicate name(s) found in sources: "${dupes.map(x => x.name).join(', ')}"`);
for (let i = 0; i < sources.length; i++) {
const source = sources[i];
if (source.engine === 'local' && !source.authentication.path)
errors.push(`For source ${source.name}, the path has not been configured`);
}
}
catch (e) {
if (errors.length === 0)
errors.push(`There was an error in the validation Sources. (error: ${e})`);
}
return errors;
};
this.validateProducers = (producers) => {
(0, Affirm_1.default)(producers, 'Invalid producers');
const errors = [];
try {
const dupes = Algo_1.default.duplicatesObject(producers, 'name');
if (dupes.length > 0)
errors.push(`Duplicate name(s) found in producers: "${dupes.map(x => x.name).join(', ')}"`);
}
catch (e) {
if (errors.length === 0)
errors.push(`There was an error in the validation Producers. (error: ${e})`);
}
return errors;
};
this.validateProducer = (producer) => {
(0, Affirm_1.default)(producer, 'Invalid producer');
const errors = [];
try {
if (!producer.source || producer.source.length === 0)
errors.push(`Missing parameter "source" in producer`);
if (producer.dimensions.some(x => x.name.includes('{') || x.name.includes('[')))
errors.push(`Invalid dimension name found in producer "${producer.name}": can't use characters "{" or "[" in dimension names`);
// Validate sourceFilename dimension usage
const sourceFilenameDimensions = producer.dimensions.filter(x => x.sourceFilename === true);
if (sourceFilenameDimensions.length > 1) {
errors.push(`Producer "${producer.name}" has multiple dimensions with sourceFilename=true. Only one dimension can have this flag.`);
}
if (sourceFilenameDimensions.length > 0) {
const source = Environment_1.default.getSource(producer.source);
if (source) {
const validEngines = ['local', 'aws-s3'];
if (!validEngines.includes(source.engine)) {
errors.push(`Producer "${producer.name}" has a dimension with sourceFilename=true but the source engine "${source.engine}" doesn't support this feature. Only "local" and "aws-s3" sources support sourceFilename.`);
}
if (!producer.settings.fileKey && !producer.settings.fileType) {
errors.push(`Producer "${producer.name}" has a dimension with sourceFilename=true but is not a file-based producer. sourceFilename requires fileKey and fileType to be set.`);
}
}
}
}
catch (e) {
if (errors.length === 0)
errors.push(`There was an error in the validation Producer. (error: ${e})`);
}
return errors;
};
this.validateConsumers = (consumers) => {
(0, Affirm_1.default)(consumers, 'Invalid consumers');
const errors = [];
try {
const dupes = Algo_1.default.duplicatesObject(consumers, 'name');
if (dupes.length > 0)
errors.push(`Duplicate name(s) found in consumers: "${dupes.map(x => x.name).join(', ')}"`);
}
catch (e) {
if (errors.length === 0)
errors.push(`There was an error in the validation Consumers. (error: ${e})`);
}
return errors;
};
this.validateConsumer = (consumer) => {
(0, Affirm_1.default)(consumer, 'Invalid consumer');
const errors = [];
try {
// TODO: check that a consumer doesn't consume hitself
const allFieldsWithNoFrom = consumer.fields.filter(x => x.key === '*' && !x.from);
if (allFieldsWithNoFrom.length > 0 && consumer.producers.length > 1)
errors.push(`Field with key "*" was used without specifying the "from" producer and multiple producers were found.`);
// Validation on producers
if (consumer.producers.length === 0)
errors.push(`Consumer must have at least 1 producer.`);
const producers = consumer.producers.map(x => Environment_1.default.getProducer(x.name));
if (producers.length === 0)
errors.push('No producers found');
if (producers.some(x => !x))
errors.push(`Invalid producer found in consumer "${consumer.name}"`);
if (consumer.producers.length > 0) {
const withJoins = consumer.producers.filter(x => (Algo_1.default.hasVal(x.joins) && x.joins.length > 0) || !x.union);
const withUnions = consumer.producers.filter(x => x.union === true);
if (withJoins.length > 0 && withUnions.length)
errors.push(`Multiple producers in consumer have mixed "joins" and "union": you can either have multiple producers with "joins" or multiple producers with "union", but not both (joins: ${withJoins.map(x => x.name).join(', ')}; unions: ${withUnions.map(x => x.name).join(', ')})`);
}
// Validation on sources
const sources = producers.map(x => Environment_1.default.getSource(x.source));
if (sources.length === 0)
errors.push('No sources found');
if (sources.some(x => !x))
errors.push(`Invalid source found in consumer "${consumer.name}"`);
// For now we only support connecting producers of the same engine type to a consumer, so we give an error if we detect different ones
const uniqEngines = Algo_1.default.uniqBy(sources, 'engine');
if (uniqEngines.length !== 1)
errors.push(`Sources with different engines are used in the consumer "${consumer.name}" (${uniqEngines.join(', ')})`);
// For now we also only support consumers that have producers ALL having the same exact source
const uniqNames = Algo_1.default.uniqBy(sources, 'name');
if (uniqNames.length !== 1)
errors.push(`Producers with different sources are used in the consumer "${consumer.name}" (${uniqNames.join(', ')})`);
if (consumer.filters && consumer.filters.length > 0) {
if (consumer.filters.some(x => x.sql && x.rule))
errors.push(`A single consumer can't have both filters based on SQL and filters based on rules.`);
const [source] = ConsumerManager_1.default.getSource(consumer);
const engineClass = this.getEngineClass(source.engine);
if (engineClass === 'file' && consumer.filters.some(x => x.sql))
errors.push(`Filters based on SQL are only valid for SQL based sources. (source: ${source.name})`);
if (engineClass === 'sql' && consumer.filters.some(x => x.rule))
errors.push(`Filters based on rules are only valid for non-SQL based sources. (source: ${source.name})`);
// Check that the members of the rules are present in the consumer
const allFields = consumer.fields;
const ruleBasedFilters = consumer.filters.filter(x => x.rule);
const rulesWithMatchingFields = ruleBasedFilters.map(x => ({
match: allFields.find(k => { var _a; return ((_a = k.alias) !== null && _a !== void 0 ? _a : k.key) === x.rule.member; }),
rule: x
}));
if (rulesWithMatchingFields.some(x => !x.match)) {
const missingRules = rulesWithMatchingFields.filter(x => !x.match);
errors.push(`Filter(s) on member(s) "${missingRules.map(x => x.rule.rule.member).join(', ')}" is invalid since the member specified is not present in the consumer. Check the member value or add the missing field to the consumer.`);
}
}
const validateTransformations = (fields) => {
var _a;
const errors = [];
const trxsFields = fields.filter(x => x.transform);
for (const field of trxsFields) {
const trxToValidate = [];
if (Array.isArray(field.transform))
trxToValidate.push(...field.transform);
else
trxToValidate.push(field.transform);
for (const trans of trxToValidate) {
if ('combine_fields' in trans) {
const { combine_fields } = trans;
if (!combine_fields.fields || combine_fields.fields.length === 0)
errors.push(`The "combine_field" transformation is missing the "fields" property ("${field.key}").`);
const missingFieldsInConsumer = combine_fields.fields
.map(x => ({ field: x, found: fields.find(k => { var _a; return ((_a = k.alias) !== null && _a !== void 0 ? _a : k.key) === x; }) }))
.filter(x => !x.found);
if (missingFieldsInConsumer.length > 0)
errors.push(`The requested field(s) for a transformation is missing in the consumer -> missing field(s): "${missingFieldsInConsumer.map(x => x.field).join(', ')}"; field transformation: "${(_a = field.alias) !== null && _a !== void 0 ? _a : field.key}";`);
}
}
}
return errors;
};
errors.push(...validateTransformations(consumer.fields));
// Validation outputs
const duplicatesOutputs = Algo_1.default.duplicatesObject(consumer.outputs, 'format');
if (duplicatesOutputs.length > 0) {
const duplicatesTypes = Algo_1.default.uniq(duplicatesOutputs.map(x => x.format));
errors.push(`There are outputs with the same type. (duplicates type: ${duplicatesTypes.join(' and ')})`);
}
for (const output of consumer.outputs) {
const format = output.format.toUpperCase();
if (format === 'SQL' && output.accelerated && output.direct)
errors.push(`An output SQL cannot be both direct and accelerated (output: ${format})`);
if ((format === 'CSV' || format === 'JSON' || format === 'PARQUET')) {
if (!output.exportDestination)
errors.push(`A static file output must have an export destination set (${format})`);
else if (!Environment_1.default.getSource(output.exportDestination))
errors.push(`The export destination "${output.exportDestination}" was not found in the sources.`);
}
}
// Validate distinct
if (consumer.options) {
if (Algo_1.default.hasVal(consumer.options.distinct) && Algo_1.default.hasVal(consumer.options.distinctOn))
errors.push(`Can't specify a "distinct" and a "distinctOn" clause on the same consumer (${consumer.name}); use one or the other.`);
}
}
catch (e) {
if (errors.length === 0)
errors.push(`There was an error in the validation Consumer. (error: ${e})`);
}
return errors;
};
this.getEngineClass = (engine) => {
switch (engine) {
case 'aws-dynamodb': return 'no-sql';
case 'aws-redshift':
case 'postgres': return 'sql';
case 'delta-share':
case 'aws-s3': return 'file';
case 'local': return 'local';
}
};
}
}
const Validator = new ValidatorClass();
exports.default = Validator;