arrow-table-joins
Version:
For now only the full outer join over fixed-width columns is implemented.
37 lines • 7.57 kB
JavaScript
;
Object.defineProperty(exports, "__esModule", { value: true });
const fields_1 = require("./fields");
const apache_arrow_1 = require("apache-arrow");
const util_1 = require("../util");
// todo: extract variable-width column merge from graphistry arrow-util project
function mergeRecordBatches(mergeOn, recordBatchIndex, outerRecordBatch, outerFieldsMap, innerRecordBatch, innerFieldsMap) {
let numNewRows = 0;
const length = outerRecordBatch.length;
const newRowIndices = new Int32Array(innerRecordBatch.length);
const outerIndexVector = outerRecordBatch.getChildAt(outerFieldsMap.get(mergeOn)[1]);
const innerIndexVector = innerRecordBatch.getChildAt(innerFieldsMap.get(mergeOn)[1]);
if (!recordBatchIndex && (recordBatchIndex = Object.create(null))) {
for (let i = -1; ++i < length; recordBatchIndex[outerIndexVector.get(i)] = i)
;
}
const commonFields = util_1.findCommonFields(outerFieldsMap, innerRecordBatch.schema.fields, mergeOn);
if (commonFields.length > 0) {
const names = commonFields.map((f) => f.name);
const left = outerRecordBatch.select(...names);
const right = innerRecordBatch.select(...names);
for (let i = -1, n = innerRecordBatch.length; ++i < n;) {
const innerVal = innerIndexVector.get(i);
const outerIdx = recordBatchIndex[innerVal];
(outerIdx > -1)
? left.set(outerIdx, right.get(i))
: (newRowIndices[numNewRows++] = i);
}
}
const schema = new apache_arrow_1.Schema(fields_1.mergeFields(outerRecordBatch.schema.fields, innerFieldsMap), util_1.mergeMaps(new Map(), outerRecordBatch.schema.metadata, innerRecordBatch.schema.metadata));
return [
new apache_arrow_1.RecordBatch(schema, outerRecordBatch.data),
newRowIndices.slice(0, numNewRows), recordBatchIndex
];
}
exports.mergeRecordBatches = mergeRecordBatches;
//# sourceMappingURL=data:application/json;base64,eyJ2ZXJzaW9uIjozLCJmaWxlIjoicmVjb3JkYmF0Y2hlcy5qcyIsInNvdXJjZVJvb3QiOiIiLCJzb3VyY2VzIjpbIi4uLy4uL3NyYy9tZXJnZS9yZWNvcmRiYXRjaGVzLnRzIl0sIm5hbWVzIjpbXSwibWFwcGluZ3MiOiI7O0FBQUEscUNBQXVDO0FBQ3ZDLCtDQUFtRDtBQUNuRCxrQ0FBc0Q7QUFHdEQsK0VBQStFO0FBRS9FLFNBQWdCLGtCQUFrQixDQUM5QixPQUFvQixFQUFFLGdCQUF5QixFQUMvQyxnQkFBZ0MsRUFBRSxjQUE2QixFQUMvRCxnQkFBZ0MsRUFBRSxjQUE2QjtJQUcvRCxJQUFJLFVBQVUsR0FBRyxDQUFDLENBQUM7SUFDbkIsTUFBTSxNQUFNLEdBQUcsZ0JBQWdCLENBQUMsTUFBTSxDQUFDO0lBQ3ZDLE1BQU0sYUFBYSxHQUFHLElBQUksVUFBVSxDQUFDLGdCQUFnQixDQUFDLE1BQU0sQ0FBQyxDQUFDO0lBRTlELE1BQU0sZ0JBQWdCLEdBQUcsZ0JBQWdCLENBQUMsVUFBVSxDQUFDLGNBQWMsQ0FBQyxHQUFHLENBQUMsT0FBa0IsQ0FBRSxDQUFDLENBQUMsQ0FBQyxDQUFFLENBQUM7SUFDbEcsTUFBTSxnQkFBZ0IsR0FBRyxnQkFBZ0IsQ0FBQyxVQUFVLENBQUMsY0FBYyxDQUFDLEdBQUcsQ0FBQyxPQUFrQixDQUFFLENBQUMsQ0FBQyxDQUFDLENBQUUsQ0FBQztJQUVsRyxJQUFJLENBQUMsZ0JBQWdCLElBQUksQ0FBQyxnQkFBZ0IsR0FBRyxNQUFNLENBQUMsTUFBTSxDQUFDLElBQUksQ0FBQyxDQUFDLEVBQUU7UUFDL0QsS0FBSyxJQUFJLENBQUMsR0FBRyxDQUFDLENBQUMsRUFBRSxFQUFFLENBQUMsR0FBRyxNQUFNLEVBQUUsZ0JBQWdCLENBQUMsZ0JBQWdCLENBQUMsR0FBRyxDQUFDLENBQUMsQ0FBQyxDQUFDLEdBQUcsQ0FBQztZQUFDLENBQUM7S0FDakY7SUFFRCxNQUFNLFlBQVksR0FBRyx1QkFBZ0IsQ0FBQyxjQUFjLEVBQUUsZ0JBQWdCLENBQUMsTUFBTSxDQUFDLE1BQXlCLEVBQUUsT0FBTyxDQUFDLENBQUM7SUFFbEgsSUFBSSxZQUFZLENBQUMsTUFBTSxHQUFHLENBQUMsRUFBRTtRQUN6QixNQUFNLEtBQUssR0FBRyxZQUFZLENBQUMsR0FBRyxDQUFDLENBQUMsQ0FBQyxFQUFFLEVBQUUsQ0FBQyxDQUFDLENBQUMsSUFBSSxDQUFDLENBQUM7UUFDOUMsTUFBTSxJQUFJLEdBQUcsZ0JBQWdCLENBQUMsTUFBTSxDQUFDLEdBQUcsS0FBSyxDQUF1QixDQUFDO1FBQ3JFLE1BQU0sS0FBSyxHQUFHLGdCQUFnQixDQUFDLE1BQU0sQ0FBQyxHQUFHLEtBQUssQ0FBdUIsQ0FBQztRQUN0RSxLQUFLLElBQUksQ0FBQyxHQUFHLENBQUMsQ0FBQyxFQUFFLENBQUMsR0FBRyxnQkFBZ0IsQ0FBQyxNQUFNLEVBQUUsRUFBRSxDQUFDLEdBQUcsQ0FBQyxHQUFHO1lBQ3BELE1BQU0sUUFBUSxHQUFHLGdCQUFnQixDQUFDLEdBQUcsQ0FBQyxDQUFDLENBQUMsQ0FBQztZQUN6QyxNQUFNLFFBQVEsR0FBRyxnQkFBZ0IsQ0FBQyxRQUFRLENBQUUsQ0FBQztZQUM3QyxDQUFDLFFBQVEsR0FBRyxDQUFDLENBQUMsQ0FBQztnQkFDWCxDQUFDLENBQUMsSUFBSSxDQUFDLEdBQUcsQ0FBQyxRQUFRLEVBQUUsS0FBSyxDQUFDLEdBQUcsQ0FBQyxDQUFDLENBQUMsQ0FBQztnQkFDbEMsQ0FBQyxDQUFDLENBQUMsYUFBYSxDQUFDLFVBQVUsRUFBRSxDQUFDLEdBQUcsQ0FBQyxDQUFDLENBQUM7U0FDM0M7S0FDSjtJQUVELE1BQU0sTUFBTSxHQUFHLElBQUkscUJBQU0sQ0FDckIsb0JBQVcsQ0FBQyxnQkFBZ0IsQ0FBQyxNQUFNLENBQUMsTUFBTSxFQUFFLGNBQWMsQ0FBQyxFQUMzRCxnQkFBUyxDQUFDLElBQUksR0FBRyxFQUFFLEVBQUUsZ0JBQWdCLENBQUMsTUFBTSxDQUFDLFFBQVEsRUFBRSxnQkFBZ0IsQ0FBQyxNQUFNLENBQUMsUUFBUSxDQUFDLENBQzNGLENBQUM7SUFFRixPQUFPO1FBQ0gsSUFBSSwwQkFBVyxDQUFDLE1BQU0sRUFBRSxnQkFBZ0IsQ0FBQyxJQUFJLENBQUM7UUFDOUMsYUFBYSxDQUFDLEtBQUssQ0FBQyxDQUFDLEVBQUUsVUFBVSxDQUFDLEVBQUUsZ0JBQWdCO0tBQ3ZELENBQUM7QUFDTixDQUFDO0FBekNELGdEQXlDQyIsInNvdXJjZXNDb250ZW50IjpbImltcG9ydCB7IG1lcmdlRmllbGRzIH0gZnJvbSAnLi9maWVsZHMnO1xuaW1wb3J0IHsgU2NoZW1hLCBSZWNvcmRCYXRjaCB9IGZyb20gJ2FwYWNoZS1hcnJvdyc7XG5pbXBvcnQgeyBtZXJnZU1hcHMsIGZpbmRDb21tb25GaWVsZHMgfSBmcm9tICcuLi91dGlsJztcbmltcG9ydCB7IFRLZXksIFRGaWVsZCwgVFNjaGVtYSwgVEZpZWxkc01hcCwgS2V5c01hcCB9IGZyb20gJy4uL2ludGVyZmFjZXMnO1xuXG4vLyB0b2RvOiBleHRyYWN0IHZhcmlhYmxlLXdpZHRoIGNvbHVtbiBtZXJnZSBmcm9tIGdyYXBoaXN0cnkgYXJyb3ctdXRpbCBwcm9qZWN0XG5cbmV4cG9ydCBmdW5jdGlvbiBtZXJnZVJlY29yZEJhdGNoZXM8VCBleHRlbmRzIFRTY2hlbWEsIFIgZXh0ZW5kcyBUU2NoZW1hPihcbiAgICBtZXJnZU9uOiBUS2V5PFQgJiBSPiwgcmVjb3JkQmF0Y2hJbmRleDogS2V5c01hcCxcbiAgICBvdXRlclJlY29yZEJhdGNoOiBSZWNvcmRCYXRjaDxUPiwgb3V0ZXJGaWVsZHNNYXA6IFRGaWVsZHNNYXA8VD4sXG4gICAgaW5uZXJSZWNvcmRCYXRjaDogUmVjb3JkQmF0Y2g8Uj4sIGlubmVyRmllbGRzTWFwOiBURmllbGRzTWFwPFI+XG4pIDogW1JlY29yZEJhdGNoPFQgJiBSPiwgSW50MzJBcnJheSwgS2V5c01hcF0ge1xuXG4gICAgbGV0IG51bU5ld1Jvd3MgPSAwO1xuICAgIGNvbnN0IGxlbmd0aCA9IG91dGVyUmVjb3JkQmF0Y2gubGVuZ3RoO1xuICAgIGNvbnN0IG5ld1Jvd0luZGljZXMgPSBuZXcgSW50MzJBcnJheShpbm5lclJlY29yZEJhdGNoLmxlbmd0aCk7XG5cbiAgICBjb25zdCBvdXRlckluZGV4VmVjdG9yID0gb3V0ZXJSZWNvcmRCYXRjaC5nZXRDaGlsZEF0KG91dGVyRmllbGRzTWFwLmdldChtZXJnZU9uIGFzIFRLZXk8VD4pIVsxXSkhO1xuICAgIGNvbnN0IGlubmVySW5kZXhWZWN0b3IgPSBpbm5lclJlY29yZEJhdGNoLmdldENoaWxkQXQoaW5uZXJGaWVsZHNNYXAuZ2V0KG1lcmdlT24gYXMgVEtleTxSPikhWzFdKSE7XG5cbiAgICBpZiAoIXJlY29yZEJhdGNoSW5kZXggJiYgKHJlY29yZEJhdGNoSW5kZXggPSBPYmplY3QuY3JlYXRlKG51bGwpKSkge1xuICAgICAgICBmb3IgKGxldCBpID0gLTE7ICsraSA8IGxlbmd0aDsgcmVjb3JkQmF0Y2hJbmRleFtvdXRlckluZGV4VmVjdG9yLmdldChpKV0gPSBpKTtcbiAgICB9XG5cbiAgICBjb25zdCBjb21tb25GaWVsZHMgPSBmaW5kQ29tbW9uRmllbGRzKG91dGVyRmllbGRzTWFwLCBpbm5lclJlY29yZEJhdGNoLnNjaGVtYS5maWVsZHMgYXMgVEZpZWxkPFQgJiBSPltdLCBtZXJnZU9uKTtcblxuICAgIGlmIChjb21tb25GaWVsZHMubGVuZ3RoID4gMCkge1xuICAgICAgICBjb25zdCBuYW1lcyA9IGNvbW1vbkZpZWxkcy5tYXAoKGYpID0+IGYubmFtZSk7XG4gICAgICAgIGNvbnN0IGxlZnQgPSBvdXRlclJlY29yZEJhdGNoLnNlbGVjdCguLi5uYW1lcykgYXMgUmVjb3JkQmF0Y2g8VCB8IFI+O1xuICAgICAgICBjb25zdCByaWdodCA9IGlubmVyUmVjb3JkQmF0Y2guc2VsZWN0KC4uLm5hbWVzKSBhcyBSZWNvcmRCYXRjaDxUIHwgUj47XG4gICAgICAgIGZvciAobGV0IGkgPSAtMSwgbiA9IGlubmVyUmVjb3JkQmF0Y2gubGVuZ3RoOyArK2kgPCBuOykge1xuICAgICAgICAgICAgY29uc3QgaW5uZXJWYWwgPSBpbm5lckluZGV4VmVjdG9yLmdldChpKTtcbiAgICAgICAgICAgIGNvbnN0IG91dGVySWR4ID0gcmVjb3JkQmF0Y2hJbmRleFtpbm5lclZhbF0hO1xuICAgICAgICAgICAgKG91dGVySWR4ID4gLTEpXG4gICAgICAgICAgICAgICAgPyBsZWZ0LnNldChvdXRlcklkeCwgcmlnaHQuZ2V0KGkpKVxuICAgICAgICAgICAgICAgIDogKG5ld1Jvd0luZGljZXNbbnVtTmV3Um93cysrXSA9IGkpO1xuICAgICAgICB9XG4gICAgfVxuXG4gICAgY29uc3Qgc2NoZW1hID0gbmV3IFNjaGVtYShcbiAgICAgICAgbWVyZ2VGaWVsZHMob3V0ZXJSZWNvcmRCYXRjaC5zY2hlbWEuZmllbGRzLCBpbm5lckZpZWxkc01hcCksXG4gICAgICAgIG1lcmdlTWFwcyhuZXcgTWFwKCksIG91dGVyUmVjb3JkQmF0Y2guc2NoZW1hLm1ldGFkYXRhLCBpbm5lclJlY29yZEJhdGNoLnNjaGVtYS5tZXRhZGF0YSlcbiAgICApO1xuXG4gICAgcmV0dXJuIFtcbiAgICAgICAgbmV3IFJlY29yZEJhdGNoKHNjaGVtYSwgb3V0ZXJSZWNvcmRCYXRjaC5kYXRhKSxcbiAgICAgICAgbmV3Um93SW5kaWNlcy5zbGljZSgwLCBudW1OZXdSb3dzKSwgcmVjb3JkQmF0Y2hJbmRleFxuICAgIF07XG59XG4iXX0=