UNPKG

danfojs

Version:

JavaScript library providing high performance, intuitive, and easy to use data structures for manipulating and processing structured data.

362 lines (360 loc) 13.7 kB
"use strict"; /** * @license * Copyright 2022 JsData. All rights reserved. * * This source code is licensed under the MIT license found in the * LICENSE file in the root directory of this source tree. * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * ========================================================================== */ var __spreadArray = (this && this.__spreadArray) || function (to, from, pack) { if (pack || arguments.length === 2) for (var i = 0, l = from.length, ar; i < l; i++) { if (ar || !(i in from)) { if (!ar) ar = Array.prototype.slice.call(from, 0, i); ar[i] = from[i]; } } return to.concat(ar || Array.prototype.slice.call(from)); }; var __importDefault = (this && this.__importDefault) || function (mod) { return (mod && mod.__esModule) ? mod : { "default": mod }; }; Object.defineProperty(exports, "__esModule", { value: true }); var frame_1 = __importDefault(require("../core/frame")); var utils_1 = __importDefault(require("../shared/utils")); var utils = new utils_1.default(); var Merge = /** @class */ (function () { function Merge(_a) { var left = _a.left, right = _a.right, on = _a.on, how = _a.how; this.leftColIndex = []; this.rightColIndex = []; this.left = left; this.right = right; this.on = on; this.how = how; //Obtain the column index of the column will //want to merge on for both left and right dataframe for (var i = 0; i < this.on.length; i++) { var key = this.on[i]; if (this.left.columns.includes(key) && this.right.columns.includes(key)) { var leftIndex = this.left.columns.indexOf(key); var rightIndex = this.right.columns.indexOf(key); this.leftColIndex.push(leftIndex); this.rightColIndex.push(rightIndex); } } } /** * Generate key combination base on the columns we want to merge on * e.g df = { * key1: ["KO", "K0", "K3", "K4"], * Key2: ["K1", "K1", "K3", "K5"], * A: [1,2,3,4] * B: [3,4,5,6] * } * keycomb = generateKeyCombination(df.values, [0,1]) * This should output * { * 'k0_k1': { * filters: [[1,3], [2,4]], # the value of other columns in thesame row with the combination keys * combValues: ["KO", "k1"] # the combination key from column Key1 (index 2) and key2 (index 1) * }, * 'K3_K3 : { * filters: [[3,5]], * combValues: ['K3', 'k3'] * }, * 'k4_k5' : { * filters: [[4,6]] * combValues: ['K4', 'K5'] * } * } * This key combination will be generated for both left and right dataframe * @param values * @param colIndex */ Merge.prototype.generateKeyCombination = function (values, colIndex) { var colKeyComb = {}; for (var i = 0; i < values.length; i++) { var rowValues = values[i]; var rowKeyCombValues = []; for (var j = 0; j < colIndex.length; j++) { var index = colIndex[j]; rowKeyCombValues.push(rowValues[index]); } var rowKeyComb = rowKeyCombValues.join('_'); var otherValues = rowValues.filter(function (val, index) { return !colIndex.includes(index); }); if (utils.keyInObject(colKeyComb, rowKeyComb)) { colKeyComb[rowKeyComb].filters.push(otherValues); } else { colKeyComb[rowKeyComb] = { filters: [otherValues], combValues: rowKeyCombValues }; } } return colKeyComb; }; /** * Generate columns for the newly generated merged DataFrame * e.g df = { * key1: ["KO", "K0", "K3", "K4"], * Key2: ["K1", "K1", "K3", "K5"], * A: [1,2,3,4] * B: [3,4,5,6] * } * df2 = { * key1: ["KO", "K0", "K3", "K4"], * Key2: ["K1", "K1", "K3", "K5"], * A: [1,2,3,4] * c: [3,4,5,6] * } * And both dataframe are to be merged on `key1` and `key2` * the newly generated column will be of the form * columns = ['key1', 'Key2', 'A', 'A_1', 'B', 'C'] * Notice 'A_1' , this because both DataFrame as column A and 1 is the * number of duplicate of that column */ Merge.prototype.createColumns = function () { var self = this; this.leftCol = self.left.columns.filter(function (_, index) { return !self.leftColIndex.includes(index); }); this.rightCol = self.right.columns.filter(function (_, index) { return !self.rightColIndex.includes(index); }); this.columns = __spreadArray([], this.on, true); var duplicateColumn = {}; var tempColumn = __spreadArray([], this.leftCol, true); tempColumn.push.apply(tempColumn, this.rightCol); for (var i = 0; i < tempColumn.length; i++) { var col = tempColumn[i]; if (utils.keyInObject(duplicateColumn, col)) { var columnName = col + "_" + duplicateColumn[col]; this.columns.push(columnName); duplicateColumn[col] += 1; } else { this.columns.push(col); duplicateColumn[col] = 1; } } }; /** * The basic methos perform the underneath operation of generating * the merge dataframe; using the combination keys generated from * bothe left and right DataFrame * e.g df = { * key1: ["KO", "K0", "K3", "K4"], * Key2: ["K1", "K1", "K3", "K5"], * A: [1,2,3,4] * B: [3,4,5,6] * } * df2 = { * key1: ["KO", "K0", "K3", "K4"], * Key2: ["K1", "K2", "K4", "K5"], * A: [3,6,8,9] * c: [2,4,6,8] * } * Running generatekeyCombination on both left and right data frame * we should have * leftKeyDict = { * 'k0_k1': { * filters: [[1,3], [2,4]], * combValues: ["KO", "k1"] * }, * 'K3_K3' : { * filters: [[3,5]], * combValues: ['K3', 'k3'] * }, * 'k4_k5' : { * filters: [[4,6]] * combValues: ['K4', 'K5'] * } * } * rightKeyDict = { * 'k0_k1': { * filters: [[3,2]], * combValues: ["KO", "k1"] * }, * 'K0_K2': { * filters: [[6,4]], * combValues: ['K0', 'K2'] * }, * 'K3_K4' : { * filters: [[8,9]], * combValues: ['K3', 'k4'] * }, * 'k4_k5' : { * filters: [[9,8]] * combValues: ['K4', 'K5'] * } * } * The `keys` is generated base on the type of merge operation we want to * perform. If we assume we are performing `outer` merge (which is a set of the * key combination from both leftKeyDict and rightKeyDict) then Keys should be * this * keys = ['K0_K1', 'K3_K3', 'k4_k5', 'K0_K2', 'k3_k4'] * The Keys, leftKeyDict and rightKeyDict are used to generated DataFrame data, * by looping through the Keys and checking if leftKeyDict and rightKeyDict as the * key if one of them does not the column in that row will be NaN * e.g Data for each row base on keys * COLUMNS = ['key1', 'Key2', 'A', 'B', 'A_1', 'C'] * 'K0_K1': ['K0', 'K1', 1, 3 , 3, 2 ] * 'K0_K1': ['K0', 'K1', 2, 4, NaN, NaN] * 'K3_K3': ['k3', 'K3', 3, 5, NaN, NaN] * 'K4_K5': ['K4', 'K5', 4, 6, 9, 8] * 'k0_K2': ['k0', 'K2' NaN, NaN, 6, 4] * 'k3_k4': ['K3', 'K4', NaN, NaN, 8, 6] * * @param keys * @param leftKeyDict * @param rightKeyDict */ Merge.prototype.basic = function (keys, leftKeyDict, rightKeyDict) { var _a, _b; var data = []; for (var i = 0; i < keys.length; i++) { var key = keys[i]; if (utils.keyInObject(leftKeyDict, key)) { var leftRows = leftKeyDict[key].filters; var leftCombValues = leftKeyDict[key].combValues; for (var lIndex = 0; lIndex < leftRows.length; lIndex++) { var leftRow = leftRows[lIndex]; if (utils.keyInObject(rightKeyDict, key)) { var rightRows = rightKeyDict[key].filters; for (var rIndex = 0; rIndex < rightRows.length; rIndex++) { var rightRow = rightRows[rIndex]; var combineData = leftCombValues.slice(0); combineData.push.apply(combineData, leftRow); combineData.push.apply(combineData, rightRow); data.push(combineData); } } else { var nanArray = Array((_a = this.rightCol) === null || _a === void 0 ? void 0 : _a.length).fill(NaN); var combineData = leftCombValues.slice(0); combineData.push.apply(combineData, leftRow); combineData.push.apply(combineData, nanArray); data.push(combineData); } } } else { var rightRows = rightKeyDict[key].filters; var rightCombValues = rightKeyDict[key].combValues; for (var i_1 = 0; i_1 < rightRows.length; i_1++) { var rightRow = rightRows[i_1]; var nanArray = Array((_b = this.leftCol) === null || _b === void 0 ? void 0 : _b.length).fill(NaN); var combineData = rightCombValues.slice(0); combineData.push.apply(combineData, nanArray); combineData.push.apply(combineData, rightRow); data.push(combineData); } } } return data; }; /** * Generate outer key from leftKeyDict and rightKeyDict * The Key pass into basic method is the union of * leftKeyDict and rightKeyDict * @param leftKeyDict * @param rightKeyDict */ Merge.prototype.outer = function (leftKeyDict, rightKeyDict) { var keys = Object.keys(leftKeyDict); keys.push.apply(keys, Object.keys(rightKeyDict)); var UniqueKeys = Array.from(new Set(keys)); var data = this.basic(UniqueKeys, leftKeyDict, rightKeyDict); return data; }; /** * Generate Key for basic method, * the key geneerated is the intersection of * leftKeyDict and rightKeyDict * @param leftKeyDict * @param rightKeyDict */ Merge.prototype.inner = function (leftKeyDict, rightKeyDict) { var leftKey = Object.keys(leftKeyDict); var rightKey = Object.keys(rightKeyDict); var keys = leftKey.filter(function (val) { return rightKey.includes(val); }); var data = this.basic(keys, leftKeyDict, rightKeyDict); return data; }; /** * The key is the leftKeyDict * @param leftKeyDict * @param rightKeyDict */ Merge.prototype.leftMerge = function (leftKeyDict, rightKeyDict) { var keys = Object.keys(leftKeyDict); var data = this.basic(keys, leftKeyDict, rightKeyDict); return data; }; /** * The key is the rightKeyDict * @param leftKeyDict * @param rightKeyDict */ Merge.prototype.rightMerge = function (leftKeyDict, rightKeyDict) { var keys = Object.keys(rightKeyDict); var data = this.basic(keys, leftKeyDict, rightKeyDict); return data; }; /** * Perform the merge operation * 1) Obtain both left and right dataframe values * 2) Generate the leftkeyDict and rightKeyDict * 3) Generate new merge columns * 4) check how merge is to be done and apply the * right methods */ Merge.prototype.operation = function () { var leftValues = this.left.values; var rightValues = this.right.values; var leftKeyDict = this.generateKeyCombination(leftValues, this.leftColIndex); var rightKeyDict = this.generateKeyCombination(rightValues, this.rightColIndex); this.createColumns(); var data = []; switch (this.how) { case "outer": data = this.outer(leftKeyDict, rightKeyDict); break; case "inner": data = this.inner(leftKeyDict, rightKeyDict); break; case "left": data = this.leftMerge(leftKeyDict, rightKeyDict); break; case "right": data = this.rightMerge(leftKeyDict, rightKeyDict); break; } var columns = this.columns; return new frame_1.default(data, { columns: __spreadArray([], columns, true) }); }; return Merge; }()); /** * Perform merge operation between two DataFrame * @param params : { * left: DataFrame * right: DataFrame * on: Array<string> * how: "outer" | "inner" | "left" | "right" * } */ function merge(params) { var mergeClass = new Merge(params); return mergeClass.operation(); } exports.default = merge;