danfojs
Version:
JavaScript library providing high performance, intuitive, and easy to use data structures for manipulating and processing structured data.
491 lines (489 loc) • 18.3 kB
JavaScript
"use strict";
/**
* @license
* Copyright 2022 JsData. All rights reserved.
*
* This source code is licensed under the MIT license found in the
* LICENSE file in the root directory of this source tree.
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
* ==========================================================================
*/
var __assign = (this && this.__assign) || function () {
__assign = Object.assign || function(t) {
for (var s, i = 1, n = arguments.length; i < n; i++) {
s = arguments[i];
for (var p in s) if (Object.prototype.hasOwnProperty.call(s, p))
t[p] = s[p];
}
return t;
};
return __assign.apply(this, arguments);
};
var __importDefault = (this && this.__importDefault) || function (mod) {
return (mod && mod.__esModule) ? mod : { "default": mod };
};
Object.defineProperty(exports, "__esModule", { value: true });
var utils_1 = __importDefault(require("../shared/utils"));
var config_1 = __importDefault(require("../shared/config"));
var errors_1 = __importDefault(require("../shared/errors"));
var defaults_1 = require("../shared/defaults");
var tensorflowlib_1 = __importDefault(require("../shared/tensorflowlib"));
var utils = new utils_1.default();
/**
* N-Dimension data structure. Stores multi-dimensional
* data in a size-mutable, labeled data structure. Analogous to the Python Pandas DataFrame.
*
* @param Object
*
* data: 1D or 2D Array, JSON, Tensor, Block of data.
*
* index: Array of numeric or string names for subseting array. If not specified, indexes are auto generated.
*
* columns: Array of column names. If not specified, column names are auto generated.
*
* dtypes: Array of data types for each the column. If not specified, dtypes inferred.
*
* config: General configuration object for NDframe
*
* @returns NDframe
*/
var NDframe = /** @class */ (function () {
function NDframe(_a) {
var data = _a.data, index = _a.index, columns = _a.columns, dtypes = _a.dtypes, config = _a.config, isSeries = _a.isSeries;
this.$dataIncolumnFormat = [];
this.$index = [];
this.$columns = [];
this.$dtypes = [];
this.$isSeries = isSeries;
if (config) {
this.$config = new config_1.default(__assign(__assign({}, defaults_1.BASE_CONFIG), config));
}
else {
this.$config = new config_1.default(defaults_1.BASE_CONFIG);
}
if (data instanceof tensorflowlib_1.default.Tensor) {
data = data.arraySync();
}
if (data === undefined || (Array.isArray(data) && data.length === 0)) {
if (columns === undefined)
columns = [];
if (dtypes === undefined)
dtypes = [];
if (columns.length === 0 && dtypes.length !== 0)
errors_1.default.throwDtypeWithoutColumnError();
this.loadArrayIntoNdframe({ data: [], index: [], columns: columns, dtypes: dtypes });
}
else if (utils.is1DArray(data)) {
this.loadArrayIntoNdframe({ data: data, index: index, columns: columns, dtypes: dtypes });
}
else {
if (Array.isArray(data) && utils.isObject(data[0])) {
this.loadObjectIntoNdframe({ data: data, type: 1, index: index, columns: columns, dtypes: dtypes });
}
else if (utils.isObject(data)) {
this.loadObjectIntoNdframe({ data: data, type: 2, index: index, columns: columns, dtypes: dtypes });
}
else if (Array.isArray((data)[0]) ||
utils.isNumber((data)[0]) ||
utils.isString((data)[0])) {
this.loadArrayIntoNdframe({ data: data, index: index, columns: columns, dtypes: dtypes });
}
else if (Array.isArray(data) && data.length > 0 && utils.isDate(data[0])) {
this.loadArrayIntoNdframe({ data: data, index: index, columns: columns, dtypes: dtypes });
}
else {
throw new Error("File format not supported!");
}
}
}
/**
* Internal function to load array of data into NDFrame
* @param data The array of data to load into NDFrame
* @param index Array of numeric or string names for subsetting array.
* @param columns Array of column names.
* @param dtypes Array of data types for each the column.
*/
NDframe.prototype.loadArrayIntoNdframe = function (_a) {
var data = _a.data, index = _a.index, columns = _a.columns, dtypes = _a.dtypes;
// this.$data = utils.replaceUndefinedWithNaN(data, this.$isSeries);
this.$data = data;
if (!this.$config.isLowMemoryMode) {
//In NOT low memory mode, we transpose the array and save in column format.
//This makes column data retrieval run in constant time
this.$dataIncolumnFormat = utils.transposeArray(data);
}
this.$setIndex(index);
this.$setDtypes(dtypes);
this.$setColumnNames(columns);
};
/**
* Internal function to format and load a Javascript object or object of arrays into NDFrame.
* @param data Object or object of arrays.
* @param type The type of the object. There are two recognized types:
*
* - type 1 object are in JSON format `[{a: 1, b: 2}, {a: 30, b: 20}]`.
*
* - type 2 object are of the form `{a: [1,2,3,4], b: [30,20, 30, 20}]}`
* @param index Array of numeric or string names for subsetting array.
* @param columns Array of column names.
* @param dtypes Array of data types for each the column.
*/
NDframe.prototype.loadObjectIntoNdframe = function (_a) {
var data = _a.data, type = _a.type, index = _a.index, columns = _a.columns, dtypes = _a.dtypes;
if (type === 1 && Array.isArray(data)) {
var _data = (data).map(function (item) {
return Object.values(item);
});
var _columnNames = void 0;
if (columns) {
_columnNames = columns;
}
else {
_columnNames = Object.keys((data)[0]);
}
this.loadArrayIntoNdframe({ data: _data, index: index, columns: _columnNames, dtypes: dtypes });
}
else {
var _b = utils.getRowAndColValues(data), _data = _b[0], _colNames = _b[1];
var _columnNames = void 0;
if (columns) {
_columnNames = columns;
}
else {
_columnNames = _colNames;
}
this.loadArrayIntoNdframe({ data: _data, index: index, columns: _columnNames, dtypes: dtypes });
}
};
Object.defineProperty(NDframe.prototype, "tensor", {
/**
* Converts and returns the data in the NDframe as a Tensorflow.js Tensor.
*/
get: function () {
if (this.$isSeries) {
return tensorflowlib_1.default.tensor1d(this.$data, this.$dtypes[0]);
}
else {
return tensorflowlib_1.default.tensor2d(this.$data, this.shape, "float32");
}
},
enumerable: false,
configurable: true
});
Object.defineProperty(NDframe.prototype, "dtypes", {
/**
* Returns the dtypes of the columns
*/
get: function () {
return this.$dtypes;
},
enumerable: false,
configurable: true
});
/**
* Internal function to set the Dtypes of the NDFrame from an array. This function
* performs the necessary checks.
*/
NDframe.prototype.$setDtypes = function (dtypes) {
if (this.$isSeries) {
if (dtypes) {
if (this.$data.length != 0 && dtypes.length != 1) {
errors_1.default.throwDtypesLengthError(this, dtypes);
}
if (!(defaults_1.DATA_TYPES.includes("" + dtypes[0]))) {
errors_1.default.throwDtypeNotSupportedError(dtypes[0]);
}
this.$dtypes = dtypes;
}
else {
this.$dtypes = utils.inferDtype(this.$data);
}
}
else {
if (dtypes) {
if (this.$data.length != 0 && dtypes.length != this.shape[1]) {
errors_1.default.throwDtypesLengthError(this, dtypes);
}
if (this.$data.length == 0 && dtypes.length == 0) {
this.$dtypes = dtypes;
}
else {
dtypes.forEach(function (dtype) {
if (!(defaults_1.DATA_TYPES.includes(dtype))) {
errors_1.default.throwDtypeNotSupportedError(dtype);
}
});
this.$dtypes = dtypes;
}
}
else {
this.$dtypes = utils.inferDtype(this.$data);
}
}
};
Object.defineProperty(NDframe.prototype, "ndim", {
/**
* Returns the dimension of the data. Series have a dimension of 1,
* while DataFrames have a dimension of 2.
*/
get: function () {
if (this.$isSeries) {
return 1;
}
else {
return 2;
}
},
enumerable: false,
configurable: true
});
Object.defineProperty(NDframe.prototype, "axis", {
/**
* Returns the axis labels of the NDFrame.
*/
get: function () {
return {
index: this.$index,
columns: this.$columns
};
},
enumerable: false,
configurable: true
});
Object.defineProperty(NDframe.prototype, "config", {
/**
* Returns the configuration object of the NDFrame.
*/
get: function () {
return this.$config;
},
enumerable: false,
configurable: true
});
/**
* Internal function to set the configuration of the ndframe
*/
NDframe.prototype.$setConfig = function (config) {
this.$config = config;
};
Object.defineProperty(NDframe.prototype, "index", {
/**
* Returns the indices of the NDFrame
*/
get: function () {
return this.$index;
},
enumerable: false,
configurable: true
});
/**
* Internal function to set the index of the NDFrame with the specified
* array of indices. Performs all necessary checks to ensure that the
* index is valid.
*/
NDframe.prototype.$setIndex = function (index) {
if (index) {
if (this.$data.length != 0 && index.length != this.shape[0]) {
errors_1.default.throwIndexLengthError(this, index);
}
if (Array.from(new Set(index)).length !== this.shape[0]) {
errors_1.default.throwIndexDuplicateError();
}
this.$index = index;
}
else {
this.$index = utils.range(0, this.shape[0] - 1); //generate index
}
};
/**
* Internal function to reset the index of the NDFrame using a range of indices.
*/
NDframe.prototype.$resetIndex = function () {
this.$index = utils.range(0, this.shape[0] - 1);
};
Object.defineProperty(NDframe.prototype, "columns", {
/**
* Returns the column names of the NDFrame
*/
get: function () {
return this.$columns;
},
enumerable: false,
configurable: true
});
/**
* Internal function to set the column names for the NDFrame. This function
* performs a check to ensure that the column names are unique, and same length as the
* number of columns in the data.
*/
NDframe.prototype.$setColumnNames = function (columns) {
// console.log(columns);
if (this.$isSeries) {
if (columns) {
if (this.$data.length != 0 && columns.length != 1 && typeof columns != 'string') {
errors_1.default.throwColumnNamesLengthError(this, columns);
}
this.$columns = columns;
}
else {
this.$columns = ["0"];
}
}
else {
if (columns) {
if (this.$data.length != 0 && columns.length != this.shape[1]) {
errors_1.default.throwColumnNamesLengthError(this, columns);
}
if (Array.from(new Set(columns)).length !== columns.length) {
errors_1.default.throwColumnDuplicateError();
}
this.$columns = columns;
}
else {
this.$columns = (utils.range(0, this.shape[1] - 1)).map(function (val) { return "" + val; }); //generate columns
}
}
};
Object.defineProperty(NDframe.prototype, "shape", {
/**
* Returns the shape of the NDFrame. Shape is determined by [row length, column length]
*/
get: function () {
if (this.$data.length === 0) {
if (this.$columns.length === 0)
return [0, 0];
else
return [0, this.$columns.length];
}
if (this.$isSeries) {
return [this.$data.length, 1];
}
else {
var rowLen = (this.$data).length;
var colLen = this.$data[0].length;
return [rowLen, colLen];
}
},
enumerable: false,
configurable: true
});
Object.defineProperty(NDframe.prototype, "values", {
/**
* Returns the underlying data in Array format.
*/
get: function () {
return this.$data;
},
enumerable: false,
configurable: true
});
/**
* Updates the internal $data property to the specified value
* @param values An array of values to set
* @param checkLength Whether to check the length of the new values and the existing row length
* @param checkColumnLength Whether to check the length of the new values and the existing column length
* */
NDframe.prototype.$setValues = function (values, checkLength, checkColumnLength) {
var _this = this;
if (checkLength === void 0) { checkLength = true; }
if (checkColumnLength === void 0) { checkColumnLength = true; }
if (this.$isSeries) {
if (checkLength && values.length != this.shape[0]) {
errors_1.default.throwRowLengthError(this, values.length);
}
this.$data = values;
this.$dtypes = utils.inferDtype(values); //Dtype may change depeneding on the value set
if (!this.$config.isLowMemoryMode) {
this.$dataIncolumnFormat = values;
}
}
else {
if (checkLength && values.length != this.shape[0]) {
errors_1.default.throwRowLengthError(this, values.length);
}
if (checkColumnLength) {
values.forEach(function (value) {
if (value.length != _this.shape[1]) {
errors_1.default.throwColumnLengthError(_this, values.length);
}
});
}
this.$data = values;
this.$dtypes = utils.inferDtype(values);
if (!this.$config.isLowMemoryMode) {
this.$dataIncolumnFormat = utils.transposeArray(values);
}
}
};
Object.defineProperty(NDframe.prototype, "getColumnData", {
/**
* Returns the underlying data in Array column format.
* Similar to this.values, but in column format.
*/
get: function () {
if (this.config.isLowMemoryMode) {
return utils.transposeArray(this.values);
}
else {
return this.$dataIncolumnFormat;
}
},
enumerable: false,
configurable: true
});
Object.defineProperty(NDframe.prototype, "size", {
/**
* Returns the size of the NDFrame object
*
*/
get: function () {
return this.shape[0] * this.shape[1];
},
enumerable: false,
configurable: true
});
NDframe.prototype.toCSV = function (options) {
throw new Error("`toCSV` function is deprecated. Use `toCSV` function directly instead. e.g. `dfd.toCSV(df)`");
};
NDframe.prototype.toJSON = function (options) {
throw new Error("`toJSON` function is deprecated. Use `toJSON` function directly instead. e.g. `dfd.toJSON(df, { format: 'row' })`");
};
/**
* Converts a DataFrame or Series to Excel.
* @deprecated Use `toExcel` function directly instead.
* @example
* ```
* import * as dfd from "danfojs"
* const df = new dfd.DataFrame([[1, 2, 3], [4, 5, 6]])
* dfd.toExcel(df, {
* filePath: "./data/sample.xlsx",
* sheetName: "MySheet",
* })
* ```
*
* @example
* ```
* import { toExcel } from "danfojs-node"
* const df = new DataFrame([[1, 2, 3], [4, 5, 6]])
* toExcel(df, {
* filePath: "./data/sample.xlsx",
* sheetName: "MySheet",
* })
* ```
*/
NDframe.prototype.toExcel = function (options) {
throw new Error("Deprecated. Use `toExcel` function directly instead. e.g. `dfd.toExcel(df, {filePath: 'path/to/file.xlsx'})`");
};
/**
* Pretty prints a DataFrame or Series to the console
*/
NDframe.prototype.print = function () {
console.log(this + "");
};
return NDframe;
}());
exports.default = NDframe;