UNPKG

nodejs-polars

Version:

Polars: Blazingly fast DataFrames in Rust, Python, Node.js, R and SQL

236 lines (235 loc) 8.65 kB
"use strict"; var __importDefault = (this && this.__importDefault) || function (mod) { return (mod && mod.__esModule) ? mod : { "default": mod }; }; Object.defineProperty(exports, "__esModule", { value: true }); exports.jsTypeToPolarsType = void 0; exports.arrayToJsSeries = arrayToJsSeries; exports.arrayToJsDataFrame = arrayToJsDataFrame; const types_1 = require("node:util/types"); const datatypes_1 = require("../datatypes"); const datatype_1 = require("../datatypes/datatype"); const field_1 = require("../datatypes/field"); const series_1 = require("../series"); const polars_internal_1 = __importDefault(require("./polars_internal")); const jsTypeToPolarsType = (value) => { if (value === null) { return datatypes_1.DataType.Float64; } if (Array.isArray(value)) { return (0, exports.jsTypeToPolarsType)(firstNonNull(value)); } if ((0, types_1.isTypedArray)(value)) { switch (value.constructor.name) { case Int8Array.name: return datatypes_1.DataType.Int8; case Int16Array.name: return datatypes_1.DataType.Int16; case Int32Array.name: return datatypes_1.DataType.Int32; case BigInt64Array.name: return datatypes_1.DataType.Int64; case Uint8Array.name: return datatypes_1.DataType.UInt8; case Uint16Array.name: return datatypes_1.DataType.UInt16; case Uint32Array.name: return datatypes_1.DataType.UInt32; case BigUint64Array.name: return datatypes_1.DataType.UInt64; case Float32Array.name: return datatypes_1.DataType.Float32; case Float64Array.name: return datatypes_1.DataType.Float64; default: throw new Error(`unknown typed array type: ${value.constructor.name}`); } } if (value instanceof Date) { return datatypes_1.DataType.Datetime(datatype_1.TimeUnit.Milliseconds); } if (typeof value === "object" && value.constructor === Object) { const flds = Object.entries(value).map(([name, value]) => { const dtype = (0, exports.jsTypeToPolarsType)(value); return field_1.Field.from(name, dtype); }); return datatypes_1.DataType.Struct(flds); } switch (typeof value) { case "bigint": return datatypes_1.DataType.UInt64; case "number": return datatypes_1.DataType.Float64; case "string": return datatypes_1.DataType.Utf8; case "boolean": return datatypes_1.DataType.Bool; default: return datatypes_1.DataType.Float64; } }; exports.jsTypeToPolarsType = jsTypeToPolarsType; /** * __finds the first non null value in the inputs__ * ___ * If the first value is an array * it will find the first scalar type in the array and return it wrapped into the array * * @example * ``` * > const input = [null, [], [null, "a", "b"]] * > firstNonNull(input) * ["a"] * > const ints = [null, 1] * > firstNonNull(ints) * 1 * ``` */ const firstNonNull = (arr) => { const first = arr.find((x) => x !== null && x !== undefined); if (Array.isArray(first)) { return [firstNonNull(arr.flat())]; } return first; }; const fromTypedArray = (name, value) => { switch (value.constructor.name) { case Int8Array.name: return polars_internal_1.default.JsSeries.newInt8Array(name, value); case Int16Array.name: return polars_internal_1.default.JsSeries.newInt16Array(name, value); case Int32Array.name: return polars_internal_1.default.JsSeries.newInt32Array(name, value); case BigInt64Array.name: return polars_internal_1.default.JsSeries.newBigint64Array(name, value); case Uint8Array.name: return polars_internal_1.default.JsSeries.newUint8Array(name, value); case Uint8ClampedArray.name: return polars_internal_1.default.JsSeries.newUint8ClampedArray(name, value); case Uint16Array.name: return polars_internal_1.default.JsSeries.newUint16Array(name, value); case Uint32Array.name: return polars_internal_1.default.JsSeries.newUint32Array(name, value); case BigUint64Array.name: return polars_internal_1.default.JsSeries.newBiguint64Array(name, value); case Float32Array.name: return polars_internal_1.default.JsSeries.newFloat32Array(name, value); case Float64Array.name: return polars_internal_1.default.JsSeries.newFloat64Array(name, value); default: throw new Error(`unknown typed array type: ${value.constructor.name}`); } }; /** * Construct an internal `JsSeries` from an array */ function arrayToJsSeries(name = "", values = [], dtype, strict = false) { if ((0, types_1.isTypedArray)(values)) { return fromTypedArray(name, values); } //Empty sequence defaults to Float64 type if (!(values?.length || dtype)) { dtype = datatypes_1.DataType.Float64; } const firstValue = firstNonNull(values); if (Array.isArray(firstValue) || (0, types_1.isTypedArray)(firstValue)) { const listDtype = (0, exports.jsTypeToPolarsType)(firstValue); const ctor = (0, datatypes_1.polarsTypeToConstructor)(datatypes_1.DataType.List(listDtype)); const s = ctor(name, values, strict, listDtype); if (dtype instanceof datatype_1.FixedSizeList) { // TODO: build a FixedSizeList natively in Rust return s.cast(dtype, strict); } return s; } dtype = dtype ?? (0, exports.jsTypeToPolarsType)(firstValue); let series; if (dtype?.variant === "Struct") { const df = polars_internal_1.default.fromRows(values, null, 1); return df.toStruct(name); } if (dtype?.variant === "Decimal") { if (typeof firstValue !== "bigint") { throw new Error("Decimal type can only be constructed from BigInt"); } return polars_internal_1.default.JsSeries.newAnyvalue(name, values, dtype, strict); } if (firstValue instanceof Date) { series = polars_internal_1.default.JsSeries.newOptDate(name, values, strict); } else { const ctor = (0, datatypes_1.polarsTypeToConstructor)(dtype); series = ctor(name, values, strict); } if ([ "Datetime", "Date", "Categorical", "Int8", "Int16", "UInt8", "UInt16", "Float32", ].includes(dtype.variant)) { series = series.cast(dtype, strict); } return series; } function arrayToJsDataFrame(data, options) { const columns = options?.columns; let orient = options?.orient; const schema = options?.schema; const inferSchemaLength = options?.inferSchemaLength; let dataSeries; if (!data.length) { dataSeries = []; } else if (data[0]?._s) { dataSeries = []; for (const [idx, series] of data.entries()) { if (!series.name) { series.rename(`column_${idx}`, true); } dataSeries.push(series._s); } } else if (data[0].constructor.name === "Object") { const df = polars_internal_1.default.fromRows(data, schema, inferSchemaLength); if (columns) { df.columns = columns; } return df; } else if (Array.isArray(data[0])) { if (!orient && columns) { orient = columns.length === data.length ? "col" : "row"; } if (orient === "row") { const df = polars_internal_1.default.fromRows(data); if (columns) df.columns = columns; return df; } dataSeries = data.map((s, idx) => (0, series_1.Series)(`column_${idx}`, s)._s); } else { dataSeries = [(0, series_1.Series)("column_0", data)._s]; } dataSeries = handleColumnsArg(dataSeries, columns); return new polars_internal_1.default.JsDataFrame(dataSeries); } function handleColumnsArg(data, columns) { if (!columns) { return data; } if (!data) { return columns.map((c) => series_1.Series.from(c, [])._s); } if (data.length === columns.length) { for (const [idx, name] of columns.entries()) { data[idx].rename(name); } return data; } throw new TypeError("Dimensions of columns arg must match data dimensions."); }