UNPKG

datamodel

Version:

Relational algebra compliant in-memory tabular data store

579 lines (546 loc) 21 kB
import { FilteringMode } from './enums'; import { getUniqueId } from './utils'; import { updateFields, cloneWithSelect, cloneWithProject, updateData, getNormalizedProFields } from './helper'; import { crossProduct, difference, naturalJoinFilter, union } from './operator'; /** * Relation provides the definitions of basic operators of relational algebra like *selection*, *projection*, *union*, * *difference* etc. * * It is extended by {@link DataModel} to inherit the functionalities of relational algebra concept. * * @class * @public * @module Relation * @namespace DataModel */ class Relation { /** * Creates a new Relation instance by providing underlying data and schema. * * @private * * @param {Object | string | Relation} data - The input tabular data in dsv or json format or * an existing Relation instance object. * @param {Array} schema - An array of data schema. * @param {Object} [options] - The optional options. */ constructor (...params) { let source; this._parent = null; this._derivation = []; this._ancestorDerivation = []; this._children = []; if (params.length === 1 && ((source = params[0]) instanceof Relation)) { // parent datamodel was passed as part of source this._colIdentifier = source._colIdentifier; this._rowDiffset = source._rowDiffset; this._dataFormat = source._dataFormat; this._parent = source; this._partialFieldspace = this._parent._partialFieldspace; this._fieldStoreName = getUniqueId(); this.__calculateFieldspace().calculateFieldsConfig(); } else { updateData(this, ...params); this._fieldStoreName = this._partialFieldspace.name; this.__calculateFieldspace().calculateFieldsConfig(); this._propagationNameSpace = { mutableActions: {}, immutableActions: {} }; } } /** * Retrieves the {@link Schema | schema} details for every {@link Field | field} as an array. * * @public * * @return {Array.<Schema>} Array of fields schema. * ``` * [ * { name: 'Name', type: 'dimension' }, * { name: 'Miles_per_Gallon', type: 'measure', numberFormat: (val) => `${val} miles / gallon` }, * { name: 'Cylinder', type: 'dimension' }, * { name: 'Displacement', type: 'measure', defAggFn: 'max' }, * { name: 'HorsePower', type: 'measure', defAggFn: 'max' }, * { name: 'Weight_in_lbs', type: 'measure', defAggFn: 'avg', }, * { name: 'Acceleration', type: 'measure', defAggFn: 'avg' }, * { name: 'Year', type: 'dimension', subtype: 'datetime', format: '%Y' }, * { name: 'Origin' } * ] * ``` */ getSchema () { return this.getFieldspace().fields.map(d => d.schema()); } /** * Returns the name of the {@link DataModel} instance. If no name was specified during {@link DataModel} * initialization, then it returns a auto-generated name. * * @public * * @return {string} Name of the DataModel instance. */ getName() { return this._fieldStoreName; } getFieldspace () { return this._fieldspace; } __calculateFieldspace () { this._fieldspace = updateFields([this._rowDiffset, this._colIdentifier], this.getPartialFieldspace(), this._fieldStoreName); return this; } getPartialFieldspace () { return this._partialFieldspace; } /** * Performs {@link link_of_cross_product | cross-product} between two {@link DataModel} instances and returns a * new {@link DataModel} instance containing the results. This operation is also called theta join. * * Cross product takes two set and create one set where each value of one set is paired with each value of another * set. * * This method takes an optional predicate which filters the generated result rows. If the predicate returns true * the combined row is included in the resulatant table. * * @example * let originDM = dm.project(['Origin','Origin_Formal_Name']); * let carsDM = dm.project(['Name','Miles_per_Gallon','Origin']) * * console.log(carsDM.join(originDM))); * * console.log(carsDM.join(originDM, * obj => obj.[originDM.getName()].Origin === obj.[carsDM.getName()].Origin)); * * @text * This is chained version of `join` operator. `join` can also be used as * {@link link_to_join_op | functional operator}. * * @public * * @param {DataModel} joinWith - The DataModel to be joined with the current instance DataModel. * @param {SelectionPredicate} filterFn - The predicate function that will filter the result of the crossProduct. * * @return {DataModel} New DataModel instance created after joining. */ join (joinWith, filterFn) { return crossProduct(this, joinWith, filterFn); } /** * {@link natural_join | Natural join} is a special kind of cross-product join where filtering of rows are performed * internally by resolving common fields are from both table and the rows with common value are included. * * @example * let originDM = dm.project(['Origin','Origin_Formal_Name']); * let carsDM = dm.project(['Name','Miles_per_Gallon','Origin']) * * console.log(carsDM.naturalJoin(originDM)); * * @text * This is chained version of `naturalJoin` operator. `naturalJoin` can also be used as * {@link link_to_join_op | functional operator}. * * @public * * @param {DataModel} joinWith - The DataModel with which the current instance of DataModel on which the method is * called will be joined. * @return {DataModel} New DataModel instance created after joining. */ naturalJoin (joinWith) { return crossProduct(this, joinWith, naturalJoinFilter(this, joinWith), true); } /** * {@link link_to_union | Union} operation can be termed as vertical stacking of all rows from both the DataModel * instances, provided that both of the {@link DataModel} instances should have same column names. * * @example * console.log(EuropeanMakerDM.union(USAMakerDM)); * * @text * This is chained version of `naturalJoin` operator. `naturalJoin` can also be used as * {@link link_to_join_op | functional operator}. * * @public * * @param {DataModel} unionWith - DataModel instance for which union has to be applied with the instance on which * the method is called * * @return {DataModel} New DataModel instance with the result of the operation */ union (unionWith) { return union(this, unionWith); } /** * {@link link_to_difference | Difference } operation only include rows which are present in the datamodel on which * it was called but not on the one passed as argument. * * @example * console.log(highPowerDM.difference(highExpensiveDM)); * * @text * This is chained version of `naturalJoin` operator. `naturalJoin` can also be used as * {@link link_to_join_op | functional operator}. * * @public * * @param {DataModel} differenceWith - DataModel instance for which difference has to be applied with the instance * on which the method is called * @return {DataModel} New DataModel instance with the result of the operation */ difference (differenceWith) { return difference(this, differenceWith); } /** * {@link link_to_selection | Selection} is a row filtering operation. It expects a predicate and an optional mode * which control which all rows should be included in the resultant DataModel instance. * * {@link SelectionPredicate} is a function which returns a boolean value. For selection operation the selection * function is called for each row of DataModel instance with the current row passed as argument. * * After executing {@link SelectionPredicate} the rows are labeled as either an entry of selection set or an entry * of rejection set. * * {@link FilteringMode} operates on the selection and rejection set to determine which one would reflect in the * resultant datamodel. * * @warning * Selection and rejection set is only a logical idea for concept explanation purpose. * * @example * // with selection mode NORMAL: * const normDt = dt.select(fields => fields.Origin.value === "USA") * console.log(normDt)); * * // with selection mode INVERSE: * const inverDt = dt.select(fields => fields.Origin.value === "USA", { mode: DataModel.FilteringMode.INVERSE }) * console.log(inverDt); * * // with selection mode ALL: * const dtArr = dt.select(fields => fields.Origin.value === "USA", { mode: DataModel.FilteringMode.ALL }) * // print the selected parts * console.log(dtArr[0]); * // print the inverted parts * console.log(dtArr[1]); * * @text * This is chained version of `select` operator. `select` can also be used as * {@link link_to_join_op | functional operator}. * * @public * * @param {Function} selectFn - The predicate function which is called for each row with the current row. * ``` * function (row, i, cloneProvider, store) { ... } * ``` * @param {Object} config - The configuration object to control the inclusion exclusion of a row in resultant * DataModel instance. * @param {FilteringMode} [config.mode=FilteringMode.NORMAL] - The mode of the selection. * @return {DataModel} Returns the new DataModel instance(s) after operation. */ select (selectFn, config) { const defConfig = { mode: FilteringMode.NORMAL, saveChild: true }; config = Object.assign({}, defConfig, config); config.mode = config.mode || defConfig.mode; const cloneConfig = { saveChild: config.saveChild }; return cloneWithSelect( this, selectFn, config, cloneConfig ); } /** * Retrieves a boolean value if the current {@link DataModel} instance has data. * * @example * const schema = [ * { name: 'CarName', type: 'dimension' }, * { name: 'HorsePower', type: 'measure' }, * { name: "Origin", type: 'dimension' } * ]; * const data = []; * * const dt = new DataModel(data, schema); * console.log(dt.isEmpty()); * * @public * * @return {Boolean} True if the datamodel has no data, otherwise false. */ isEmpty () { return !this._rowDiffset.length || !this._colIdentifier.length; } /** * Creates a clone from the current DataModel instance with child parent relationship. * * @private * @param {boolean} [saveChild=true] - Whether the cloned instance would be recorded in the parent instance. * @return {DataModel} - Returns the newly cloned DataModel instance. */ clone (saveChild = true) { const clonedDm = new this.constructor(this); if (saveChild) { clonedDm.setParent(this); } else { clonedDm.setParent(null); } return clonedDm; } /** * {@link Projection} is filter column (field) operation. It expects list of fields' name and either include those * or exclude those based on {@link FilteringMode} on the resultant variable. * * Projection expects array of fields name based on which it creates the selection and rejection set. All the field * whose name is present in array goes in selection set and rest of the fields goes in rejection set. * * {@link FilteringMode} operates on the selection and rejection set to determine which one would reflect in the * resulatant datamodel. * * @warning * Selection and rejection set is only a logical idea for concept explanation purpose. * * @example * const dm = new DataModel(data, schema); * * // with projection mode NORMAL: * const normDt = dt.project(["Name", "HorsePower"]); * console.log(normDt.getData()); * * // with projection mode INVERSE: * const inverDt = dt.project(["Name", "HorsePower"], { mode: DataModel.FilteringMode.INVERSE }) * console.log(inverDt.getData()); * * // with selection mode ALL: * const dtArr = dt.project(["Name", "HorsePower"], { mode: DataModel.FilteringMode.ALL }) * // print the normal parts * console.log(dtArr[0].getData()); * // print the inverted parts * console.log(dtArr[1].getData()); * * @text * This is chained version of `select` operator. `select` can also be used as * {@link link_to_join_op | functional operator}. * * @public * * @param {Array.<string | Regexp>} projField - An array of column names in string or regular expression. * @param {Object} [config] - An optional config to control the creation of new DataModel * @param {FilteringMode} [config.mode=FilteringMode.NORMAL] - Mode of the projection * * @return {DataModel} Returns the new DataModel instance after operation. */ project (projField, config) { const defConfig = { mode: FilteringMode.NORMAL, saveChild: true }; config = Object.assign({}, defConfig, config); const fieldConfig = this.getFieldsConfig(); const allFields = Object.keys(fieldConfig); const { mode } = config; const normalizedProjField = getNormalizedProFields(projField, allFields, fieldConfig); let dataModel; if (mode === FilteringMode.ALL) { let projectionClone = cloneWithProject(this, normalizedProjField, { mode: FilteringMode.NORMAL, saveChild: config.saveChild }, allFields); let rejectionClone = cloneWithProject(this, normalizedProjField, { mode: FilteringMode.INVERSE, saveChild: config.saveChild }, allFields); dataModel = [projectionClone, rejectionClone]; } else { let projectionClone = cloneWithProject(this, normalizedProjField, config, allFields); dataModel = projectionClone; } return dataModel; } getFieldsConfig () { return this._fieldConfig; } calculateFieldsConfig () { this._fieldConfig = this._fieldspace.fields.reduce((acc, fieldObj, i) => { acc[fieldObj.name()] = { index: i, def: fieldObj.schema(), }; return acc; }, {}); return this; } /** * Frees up the resources associated with the current DataModel instance and breaks all the links instance has in * the DAG. * * @public */ dispose () { this._parent && this._parent.removeChild(this); this._parent = null; this._children.forEach((child) => { child._parent = null; }); this._children = []; } /** * Removes the specified child {@link DataModel} from the child list of the current {@link DataModel} instance. * * @example * const schema = [ * { name: 'Name', type: 'dimension' }, * { name: 'HorsePower', type: 'measure' }, * { name: "Origin", type: 'dimension' } * ]; * * const data = [ * { Name: "chevrolet chevelle malibu", Horsepower: 130, Origin: "USA" }, * { Name: "citroen ds-21 pallas", Horsepower: 115, Origin: "Europe" }, * { Name: "datsun pl510", Horsepower: 88, Origin: "Japan" }, * { Name: "amc rebel sst", Horsepower: 150, Origin: "USA"}, * ] * * const dt = new DataModel(data, schema); * * const dt2 = dt.select(fields => fields.Origin.value === "USA") * dt.removeChild(dt2); * * @private * * @param {DataModel} child - Delegates the parent to remove this child. */ removeChild (child) { let idx = this._children.findIndex(sibling => sibling === child); idx !== -1 ? this._children.splice(idx, 1) : true; } /** * Sets the specified {@link DataModel} as a parent for the current {@link DataModel} instance. * * @param {DataModel} parent - The datamodel instance which will act as parent. */ setParent (parent) { this._parent && this._parent.removeChild(this); this._parent = parent; parent && parent._children.push(this); } /** * Returns the parent {@link DataModel} instance. * * @example * const schema = [ * { name: 'Name', type: 'dimension' }, * { name: 'HorsePower', type: 'measure' }, * { name: "Origin", type: 'dimension' } * ]; * * const data = [ * { Name: "chevrolet chevelle malibu", Horsepower: 130, Origin: "USA" }, * { Name: "citroen ds-21 pallas", Horsepower: 115, Origin: "Europe" }, * { Name: "datsun pl510", Horsepower: 88, Origin: "Japan" }, * { Name: "amc rebel sst", Horsepower: 150, Origin: "USA"}, * ] * * const dt = new DataModel(data, schema); * * const dt2 = dt.select(fields => fields.Origin.value === "USA"); * const parentDm = dt2.getParent(); * * @return {DataModel} Returns the parent DataModel instance. */ getParent () { return this._parent; } /** * Returns the immediate child {@link DataModel} instances. * * @example * const schema = [ * { name: 'Name', type: 'dimension' }, * { name: 'HorsePower', type: 'measure' }, * { name: "Origin", type: 'dimension' } * ]; * * const data = [ * { Name: "chevrolet chevelle malibu", Horsepower: 130, Origin: "USA" }, * { Name: "citroen ds-21 pallas", Horsepower: 115, Origin: "Europe" }, * { Name: "datsun pl510", Horsepower: 88, Origin: "Japan" }, * { Name: "amc rebel sst", Horsepower: 150, Origin: "USA"}, * ] * * const dt = new DataModel(data, schema); * * const childDm1 = dt.select(fields => fields.Origin.value === "USA"); * const childDm2 = dt.select(fields => fields.Origin.value === "Japan"); * const childDm3 = dt.groupBy(["Origin"]); * * @return {DataModel[]} Returns the immediate child DataModel instances. */ getChildren () { return this._children; } /** * Returns the in-between operation meta data while creating the current {@link DataModel} instance. * * @example * const schema = [ * { name: 'Name', type: 'dimension' }, * { name: 'HorsePower', type: 'measure' }, * { name: "Origin", type: 'dimension' } * ]; * * const data = [ * { Name: "chevrolet chevelle malibu", Horsepower: 130, Origin: "USA" }, * { Name: "citroen ds-21 pallas", Horsepower: 115, Origin: "Europe" }, * { Name: "datsun pl510", Horsepower: 88, Origin: "Japan" }, * { Name: "amc rebel sst", Horsepower: 150, Origin: "USA"}, * ] * * const dt = new DataModel(data, schema); * const dt2 = dt.select(fields => fields.Origin.value === "USA"); * const dt3 = dt2.groupBy(["Origin"]); * const derivations = dt3.getDerivations(); * * @return {Any[]} Returns the derivation meta data. */ getDerivations () { return this._derivation; } /** * Returns the in-between operation meta data happened from root {@link DataModel} to current instance. * * @example * const schema = [ * { name: 'Name', type: 'dimension' }, * { name: 'HorsePower', type: 'measure' }, * { name: "Origin", type: 'dimension' } * ]; * * const data = [ * { Name: "chevrolet chevelle malibu", Horsepower: 130, Origin: "USA" }, * { Name: "citroen ds-21 pallas", Horsepower: 115, Origin: "Europe" }, * { Name: "datsun pl510", Horsepower: 88, Origin: "Japan" }, * { Name: "amc rebel sst", Horsepower: 150, Origin: "USA"}, * ] * * const dt = new DataModel(data, schema); * const dt2 = dt.select(fields => fields.Origin.value === "USA"); * const dt3 = dt2.groupBy(["Origin"]); * const ancDerivations = dt3.getAncestorDerivations(); * * @return {Any[]} Returns the previous derivation meta data. */ getAncestorDerivations () { return this._ancestorDerivation; } } export default Relation;