@forzalabs/remora
Version:
A powerful CLI tool for seamless data translation.
145 lines (144 loc) • 8.08 kB
JavaScript
;
var __importDefault = (this && this.__importDefault) || function (mod) {
return (mod && mod.__esModule) ? mod : { "default": mod };
};
Object.defineProperty(exports, "__esModule", { value: true });
const Affirm_1 = __importDefault(require("../../core/Affirm"));
const Environment_1 = __importDefault(require("../Environment"));
const ConsumerEngine_1 = __importDefault(require("../consumer/ConsumerEngine"));
class JoinEngineClass {
constructor() {
this.validateFieldInProducer = (fieldName, producerName) => {
var _a, _b, _c, _d;
const producer = Environment_1.default.getProducer(producerName);
if (!producer) {
throw new Error(`Producer ${producerName} not found`);
}
// Check dimensions
const hasDimension = producer.dimensions.some(d => d.name === fieldName);
// Check measures
const hasMeasure = (_b = (_a = producer.measures) === null || _a === void 0 ? void 0 : _a.some(m => m.name === fieldName)) !== null && _b !== void 0 ? _b : false;
if (!hasDimension && !hasMeasure) {
throw new Error(`Field '${fieldName}' not found in producer '${producerName}'. Available fields: ${producer.dimensions.map(d => d.name).concat((_d = (_c = producer.measures) === null || _c === void 0 ? void 0 : _c.map(m => m.name)) !== null && _d !== void 0 ? _d : []).join(', ')}`);
}
};
this.validateFieldInConsumer = (fieldName, consumerShape) => {
const hasField = consumerShape.dimensions.find(x => x.name === fieldName);
if (!hasField)
throw new Error(`Field '${fieldName}' not found in consumer '${consumerShape.name}'. Your join condition must be on fields that are present in the consumer.`);
};
this.parseJoinCondition = (sql, producer) => {
// Extract field names from SQL condition like ${P.id} = ${orders.user_id}
const regex = /\${([^}]+)}/g;
const matches = Array.from(sql.matchAll(regex));
if (matches.length !== 2)
throw new Error(`Invalid join condition: ${sql}. Expected format: \${P.field} = \${producer.field}`);
const [left, right] = matches.map(m => m[1]);
const [leftProducer, leftField] = left.split('.');
const [rightProducer, rightField] = right.split('.');
if (!leftField || !rightField)
throw new Error(`Invalid join condition: ${sql}. Both sides must specify a field name after the dot.`);
// Replace P with actual producer name
const actualLeftProducer = leftProducer === 'P' ? producer.name : leftProducer;
const actualRightProducer = rightProducer === 'P' ? producer.name : rightProducer;
// Validate both fields exist in their respective producers
this.validateFieldInProducer(leftField, actualLeftProducer);
this.validateFieldInProducer(rightField, actualRightProducer);
return {
leftProducer: actualLeftProducer,
leftField: leftField,
rightProducer: actualRightProducer,
rightField: rightField
};
};
this.findProducerData = (producerName, producedData) => {
const data = producedData.find(pd => pd.producerKey === producerName);
if (!data)
throw new Error(`No data found for producer: ${producerName}`);
return data.data;
};
this.createLookupMap = (data, key) => {
var _a;
const map = new Map();
for (const item of data) {
const row = item;
const keyValue = (_a = row[key]) === null || _a === void 0 ? void 0 : _a.toString();
if (keyValue === undefined)
continue;
const existing = map.get(keyValue);
if (existing)
existing.push(row);
else
map.set(keyValue, [row]);
}
return map;
};
this.join = (consumer, producedData) => {
var _a;
(0, Affirm_1.default)(consumer, 'Invalid consumer');
(0, Affirm_1.default)(producedData, 'Invalid produced data');
if (consumer.producers.length <= 1)
return this.findProducerData(consumer.producers[0].name, producedData);
// Start with the first producer's data
let result = [];
const consumerShape = ConsumerEngine_1.default.getOutputShape(consumer);
const consumerColumns = ConsumerEngine_1.default.compile(consumer);
// Iterate through each producer and its joins
for (let i = 0; i < consumer.producers.length; i++) {
const producer = consumer.producers[i];
if (!producer.joins)
continue;
for (const join of producer.joins) {
const otherProducer = consumer.producers.find(p => p.name === join.otherName);
if (!otherProducer) {
throw new Error(`Producer ${join.otherName} not found`);
}
const condition = this.parseJoinCondition(join.sql, producer);
this.validateFieldInConsumer(condition.leftField, consumerShape);
this.validateFieldInConsumer(condition.rightField, consumerShape);
const leftData = this.findProducerData(condition.leftProducer, producedData);
const rightData = this.findProducerData(condition.rightProducer, producedData);
// Create lookup map for the right dataset
const rightLookup = this.createLookupMap(rightData, condition.rightField);
// Perform the join based on relationship type
const joinedResult = [];
for (const item of leftData) {
const leftRow = item;
const leftValue = (_a = leftRow[condition.leftField]) === null || _a === void 0 ? void 0 : _a.toString();
if (leftValue === undefined)
continue;
const rightRows = rightLookup.get(leftValue) || [];
if (rightRows.length === 0) {
if (join.relationship !== 'one-to-many') {
// For one-to-one and many-to-one, keep rows even without matches
joinedResult.push(leftRow);
}
continue;
}
// Merge rows based on relationship type and field ownership
for (const rightRow of rightRows) {
const mergedRow = {};
// For each field in the compiled consumer, get it from its owner
for (const column of consumerColumns) {
const fieldName = column.consumerAlias;
// Get the data from the owner producer
if (column.owner === condition.leftProducer) {
mergedRow[fieldName] = leftRow[fieldName];
}
else if (column.owner === condition.rightProducer) {
mergedRow[fieldName] = rightRow[fieldName];
}
// If neither has the field, it will be undefined
}
joinedResult.push(mergedRow);
}
}
result = joinedResult;
}
}
return result;
};
}
}
const JoinEngine = new JoinEngineClass();
exports.default = JoinEngine;