UNPKG

nodejs-polars

Version:

Polars: Blazingly fast DataFrames in Rust, Python, Node.js, R and SQL

729 lines (728 loc) 24 kB
"use strict"; var __importDefault = (this && this.__importDefault) || function (mod) { return (mod && mod.__esModule) ? mod : { "default": mod }; }; Object.defineProperty(exports, "__esModule", { value: true }); exports.Series = void 0; exports._Series = _Series; const dataframe_1 = require("../dataframe"); const datatypes_1 = require("../datatypes"); const error_1 = require("../error"); const construction_1 = require("../internals/construction"); const polars_internal_1 = __importDefault(require("../internals/polars_internal")); const functions_1 = require("../lazy/functions"); const datetime_1 = require("./datetime"); const list_1 = require("./list"); const string_1 = require("./string"); const struct_1 = require("./struct"); const inspect = Symbol.for("nodejs.util.inspect.custom"); function _Series(_s) { const unwrap = (method, ...args) => { return _s[method](...args); }; const wrap = (method, ...args) => { return _Series(unwrap(method, ...args)); }; const dtypeWrap = (method, ...args) => { const dtype = _s.dtype; const dt = datatypes_1.DTYPE_TO_FFINAME[dtype]; const internalMethod = `series${method}${dt}`; return _Series(polars_internal_1.default[internalMethod](_s, ...args)); }; const dtypeUnwrap = (method, ...args) => { const dtype = _s.dtype; const dt = datatypes_1.DTYPE_TO_FFINAME[dtype]; const internalMethod = `series${method}${dt}`; return polars_internal_1.default[internalMethod](_s, ...args); }; const expr_op = (method, ...args) => { return _Series(_s) .toFrame() .select((0, functions_1.col)(_s.name)[method](...args)) .getColumn(_s.name); }; const series = { _s, [inspect]() { return _s.toString(); }, *[Symbol.iterator]() { let start = 0; const len = _s.len(); while (start < len) { const v = _s.getIdx(start); start++; yield v; } }, toString() { return _s.toString(); }, serialize(format) { return _s.serialize(format); }, [Symbol.toStringTag]() { return "Series"; }, get dtype() { return datatypes_1.DataType.deserialize(_s.dtype); }, get name() { return _s.name; }, get length() { return _s.len(); }, get str() { return (0, string_1.SeriesStringFunctions)(_s); }, get lst() { return (0, list_1.SeriesListFunctions)(_s); }, get date() { return (0, datetime_1.SeriesDateFunctions)(_s); }, get struct() { return (0, struct_1.SeriesStructFunctions)(_s); }, abs() { return wrap("abs"); }, add(field) { return dtypeWrap("Add", field); }, alias(name) { const s = _s.clone(); s.rename(name); return _Series(s); }, append(other) { _s.append(other.inner()); }, argMax() { return _s.argMax(); }, argMin() { return _s.argMin(); }, argSort(descending = false, nullsLast = true, multithreaded = true, maintainOrder = false) { if (typeof descending === "boolean") { return _Series(_s.argsort(descending, nullsLast, multithreaded, maintainOrder)); } return _Series(_s.argsort(descending.descending ?? descending.reverse ?? false, descending.nullsLast ?? nullsLast, descending.multithreaded ?? multithreaded, descending.maintainOrder ?? maintainOrder)); }, argTrue() { return _Series(this.toFrame() ._df.lazy() .select([polars_internal_1.default.argWhere(polars_internal_1.default.col(this.name))]) .collectSync() .column(this.name)); }, argUnique() { return _Series(_s.argUnique()); }, as(name) { return this.alias(name); }, bitand(other) { return _Series(_s.bitand(other._s)); }, bitor(other) { return _Series(_s.bitor(other._s)); }, bitxor(other) { return _Series(_s.bitxor(other._s)); }, cast(dtype, strict = false) { return _Series(_s.cast(dtype, strict)); }, chunkLengths() { return _s.chunkLengths(); }, clone() { return _Series(_s.clone()); }, concat(other) { const s = _s.clone(); s.append(other.inner()); return _Series(s); }, cumCount(reverse) { return expr_op("cumCount", reverse); }, cumSum(reverse) { return _Series(_s.cumSum(reverse)); }, cumMax(reverse) { return _Series(_s.cumMax(reverse)); }, cumMin(reverse) { return _Series(_s.cumMin(reverse)); }, cumProd(reverse) { return _Series(_s.cumProd(reverse)); }, describe() { let s = this.clone(); let stats = {}; if (!this.length) { throw new RangeError("Series must contain at least one value"); } if (this.isNumeric()) { s = s.cast(datatypes_1.DataType.Float64); stats = { min: s.min(), max: s.max(), null_count: s.nullCount(), mean: s.mean(), count: s.len(), }; } else if (s.isBoolean()) { stats = { sum: s.sum(), null_count: s.nullCount(), count: s.len(), }; } else if (s.isString()) { stats = { unique: s.nUnique(), null_count: s.nullCount(), count: s.len(), }; } else { throw new error_1.InvalidOperationError("describe", s.dtype); } return (0, dataframe_1.DataFrame)({ statistic: Object.keys(stats), value: Object.values(stats), }); }, diff(n = 1, nullBehavior = "ignore") { return typeof n === "number" ? _Series(_s.diff(n, nullBehavior)) : _Series(_s.diff(n?.n ?? 1, n.nullBehavior ?? nullBehavior)); }, div(field) { return dtypeWrap("Div", field); }, divideBy(field) { return this.div(field); }, dot(other) { return wrap("dot", other._s); }, dropNulls() { return wrap("dropNulls"); }, eq(field) { return dtypeWrap("Eq", field); }, equals(field) { return this.eq(field); }, ewmMean(...args) { return expr_op("ewmMean", ...args); }, ewmStd(...args) { return expr_op("ewmStd", ...args); }, ewmVar(...args) { return expr_op("ewmVar", ...args); }, explode() { return wrap("explode"); }, extend(value, n) { return wrap("extendConstant", value, n); }, extendConstant(value, n) { return wrap("extendConstant", value, n); }, fillNull(strategy) { return typeof strategy === "string" ? wrap("fillNull", strategy) : wrap("fillNull", strategy.strategy); }, filter(predicate) { return exports.Series.isSeries(predicate) ? wrap("filter", predicate._s) : wrap("filter", SeriesConstructor("", predicate)._s); }, get(field) { return dtypeUnwrap("Get", field); }, getIndex(idx) { return _s.getIdx(idx); }, gt(field) { return dtypeWrap("Gt", field); }, greaterThan(field) { return this.gt(field); }, gtEq(field) { return dtypeWrap("GtEq", field); }, greaterThanEquals(field) { return this.gtEq(field); }, hash(obj = 0n, k1 = 1n, k2 = 2n, k3 = 3n) { if (typeof obj === "number" || typeof obj === "bigint") { return wrap("hash", BigInt(obj), BigInt(k1), BigInt(k2), BigInt(k3)); } const o = { k0: obj, k1: k1, k2: k2, k3: k3, ...obj }; return wrap("hash", BigInt(o.k0), BigInt(o.k1), BigInt(o.k2), BigInt(o.k3)); }, hasValidity() { return _s.hasValidity(); }, head(length = 5) { return wrap("head", length); }, inner() { return _s; }, interpolate() { return expr_op("interpolate"); }, isBoolean() { const dtype = this.dtype; return dtype.equals(datatypes_1.DataType.Bool); }, isDateTime() { const dtype = this.dtype; return [datatypes_1.DataType.Date.variant, "Datetime"].includes(dtype.variant); }, isDuplicated() { return wrap("isDuplicated"); }, isFinite() { const dtype = this.dtype; if (![datatypes_1.DataType.Float32.variant, datatypes_1.DataType.Float64.variant].includes(dtype.variant)) { throw new error_1.InvalidOperationError("isFinite", dtype); } return wrap("isFinite"); }, isFirstDistinct() { return wrap("isFirstDistinct"); }, isFloat() { const dtype = this.dtype; return [datatypes_1.DataType.Float32.variant, datatypes_1.DataType.Float64.variant].includes(dtype.variant); }, isIn(other) { return exports.Series.isSeries(other) ? wrap("isIn", other._s) : wrap("isIn", (0, exports.Series)("", other)._s); }, isInfinite() { const dtype = this.dtype; if (![datatypes_1.DataType.Float32.variant, datatypes_1.DataType.Float64.variant].includes(dtype.variant)) { throw new error_1.InvalidOperationError("isFinite", dtype); } return wrap("isInfinite"); }, isNotNull() { return wrap("isNotNull"); }, isNull() { return wrap("isNull"); }, isNaN() { return wrap("isNan"); }, isNotNaN() { return wrap("isNotNan"); }, isNumeric() { const dtype = this.dtype; const numericTypes = [ datatypes_1.DataType.Int8.variant, datatypes_1.DataType.Int16.variant, datatypes_1.DataType.Int32.variant, datatypes_1.DataType.Int64.variant, datatypes_1.DataType.UInt8.variant, datatypes_1.DataType.UInt16.variant, datatypes_1.DataType.UInt32.variant, datatypes_1.DataType.UInt64.variant, datatypes_1.DataType.Float32.variant, datatypes_1.DataType.Float64.variant, ]; return numericTypes.includes(dtype.variant); }, isUnique() { return wrap("isUnique"); }, isString() { return this.dtype.equals(datatypes_1.DataType.String); }, kurtosis(fisher = true, bias = true) { if (typeof fisher === "boolean") { return _s.kurtosis(fisher, bias); } const d = { fisher: true, bias, ...fisher, }; return _s.kurtosis(d.fisher, d.bias); }, len() { return this.length; }, lt(field) { if (typeof field === "number") return dtypeWrap("Lt", field); if (exports.Series.isSeries(field)) { return wrap("lt", field._s); } throw new Error("Not a number nor a series"); }, lessThan(field) { if (typeof field === "number") return dtypeWrap("Lt", field); if (exports.Series.isSeries(field)) { return wrap("lt", field._s); } throw new Error("Not a number nor a series"); }, ltEq(field) { if (typeof field === "number") return dtypeWrap("LtEq", field); if (exports.Series.isSeries(field)) { return wrap("ltEq", field._s); } throw new Error("Not a number nor a series"); }, lessThanEquals(field) { if (typeof field === "number") return dtypeWrap("LtEq", field); if (exports.Series.isSeries(field)) { return wrap("ltEq", field._s); } throw new Error("Not a number nor a series"); }, limit(n = 10) { return wrap("limit", n); }, max() { return _s.max(); }, mean() { return _s.mean(); }, median() { return _s.median(); }, min() { return _s.min(); }, mode() { return wrap("mode"); }, minus(other) { if (typeof other === "number") return dtypeWrap("Sub", other); if (exports.Series.isSeries(other)) { return wrap("sub", other._s); } throw new Error("Not a number nor a series"); }, mul(other) { if (typeof other === "number") return dtypeWrap("Mul", other); if (exports.Series.isSeries(other)) { return wrap("mul", other._s); } throw new Error("Not a number nor a series"); }, nChunks() { return _s.nChunks(); }, neq(other) { if (typeof other === "number") return dtypeWrap("Neq", other); if (exports.Series.isSeries(other)) { return wrap("neq", other._s); } throw new Error("Not a number nor a series"); }, notEquals(other) { return this.neq(other); }, nullCount() { return _s.nullCount(); }, nUnique() { return _s.nUnique(); }, peakMax() { return expr_op("peakMax"); }, peakMin() { return expr_op("peakMin"); }, plus(other) { if (typeof other === "number") return dtypeWrap("Add", other); if (exports.Series.isSeries(other)) { return wrap("add", other._s); } throw new Error("Not a number nor a series"); }, quantile(quantile, interpolation = "nearest") { return _s.quantile(quantile, interpolation); }, rank(method = "average", descending = false) { return wrap("rank", method, descending); }, rechunk(inPlace = false) { return wrap("rechunk", inPlace); }, reinterpret(signed = true) { const dtype = this.dtype; if ([datatypes_1.DataType.UInt64.variant, datatypes_1.DataType.Int64.variant].includes(dtype.variant)) { return wrap("reinterpret", signed); } throw new error_1.InvalidOperationError("reinterpret", dtype); }, rem(field) { return dtypeWrap("Rem", field); }, modulo(field) { return this.rem(field); }, rename(obj, inPlace = false) { if (obj?.inPlace ?? inPlace) { _s.rename(obj?.name ?? obj); } else { return this.alias(obj?.name ?? obj); } }, rollingMax(...args) { return expr_op("rollingMax", ...args); }, rollingMean(...args) { return expr_op("rollingMean", ...args); }, rollingMin(...args) { return expr_op("rollingMin", ...args); }, rollingSum(...args) { return expr_op("rollingSum", ...args); }, rollingStd(...args) { return expr_op("rollingStd", ...args); }, rollingVar(...args) { return expr_op("rollingVar", ...args); }, rollingMedian(...args) { return expr_op("rollingMedian", ...args); }, rollingQuantile(...args) { return expr_op("rollingQuantile", ...args); }, rollingSkew(...args) { return expr_op("rollingSkew", ...args); }, floor() { return wrap("floor"); }, ceil() { return wrap("ceil"); }, round(opt) { if (this.isNumeric()) { if (typeof opt === "number") { return wrap("round", opt); } return wrap("round", opt.decimals); } throw new error_1.InvalidOperationError("round", this.dtype); }, clip(...args) { return expr_op("clip", ...args); }, scatter(indices, value) { indices = exports.Series.isSeries(indices) ? indices.cast(datatypes_1.DataType.UInt32) : (0, exports.Series)(indices); if (!exports.Series.isSeries(value)) { if (!Array.isArray(value)) { value = [value]; } value = (0, exports.Series)(value); } if (indices.length > 0) { value = value.extendConstant(value[0], indices.length - 1); } _s.scatter(indices._s, value._s); }, set(mask, value) { mask = exports.Series.isSeries(mask) ? mask : exports.Series.from(mask); return dtypeWrap("SetWithMask", mask.inner(), value); }, sample(opts, frac, withReplacement = false, seed) { // biome-ignore lint/style/noArguments: <explanation> if (arguments.length === 0) { return wrap("sampleN", 1, withReplacement, false, seed); } if (opts?.n !== undefined || opts?.frac !== undefined) { return this.sample(opts.n, opts.frac, opts.withReplacement, seed); } if (typeof opts === "number") { return wrap("sampleN", opts, withReplacement, false, seed); } if (typeof frac === "number") { return wrap("sampleFrac", frac, withReplacement, false, seed); } throw new TypeError("must specify either 'frac' or 'n'"); }, seriesEqual(other, nullEqual = true, strict = false) { return _s.seriesEqual(other._s, nullEqual, strict); }, shift(periods = 1) { return wrap("shift", periods); }, shiftAndFill(...args) { return expr_op("shiftAndFill", ...args); }, shrinkToFit(inPlace) { if (inPlace) { _s.shrinkToFit(); } else { const s = this.clone(); s.shrinkToFit(); return s; } }, skew(bias = true) { if (typeof bias === "boolean") { return _s.skew(bias); } return _s.skew(bias?.bias ?? true); }, slice(offset, length) { if (typeof offset === "number") { return wrap("slice", offset, length); } return wrap("slice", offset.offset, offset.length); }, sort(options) { options = { descending: false, nullsLast: false, ...(options ?? {}) }; return wrap("sort", options.descending ?? options.reverse ?? false, options.nullsLast); }, sub(field) { return dtypeWrap("Sub", field); }, sum() { return _s.sum(); }, tail(length = 5) { return wrap("tail", length); }, gather(indices) { return wrap("take", indices); }, gatherEvery(n, offset) { return wrap("gatherEvery", n, offset ?? 0); }, multiplyBy(field) { return this.mul(field); }, toArray() { return _s.toArray(); }, toTypedArray() { if (!this.hasValidity() || this.nullCount() === 0) { return _s.toTypedArray(); } throw new Error("data contains nulls, unable to convert to TypedArray"); }, toDummies(separator = "_", dropFirst = false) { return (0, dataframe_1._DataFrame)(_s.toDummies(separator, dropFirst)); }, toFrame() { return (0, dataframe_1._DataFrame)(new polars_internal_1.default.JsDataFrame([_s])); }, toBinary() { return _s.toBinary(); }, toJSON(...args) { // this is passed by `JSON.stringify` when calling `toJSON()` if (args[0] === "") { return _s.toJs(); } return _s.serialize("json").toString(); }, toObject() { return _s.toJs(); }, unique(maintainOrder) { if (maintainOrder) { return wrap("uniqueStable"); } return wrap("unique"); }, valueCounts(sort, parallel, name, normalize) { name = name ?? (normalize ? "proportion" : "count"); return (0, dataframe_1._DataFrame)(unwrap("valueCounts", sort ?? false, parallel ?? false, name, normalize ?? false)); }, values() { return this[Symbol.iterator](); }, zipWith(mask, other) { return wrap("zipWith", mask._s, other._s); }, }; return new Proxy(series, { get: (target, prop, receiver) => { if (typeof prop !== "symbol" && !Number.isNaN(Number(prop))) { return target.get(Number(prop)); } return Reflect.get(target, prop, receiver); }, set: (series, prop, input) => { if (typeof prop !== "symbol" && !Number.isNaN(Number(prop))) { series.scatter([Number(prop)], input); return true; } }, }); } const SeriesConstructor = (arg0, arg1, dtype, strict) => { if (typeof arg0 === "string") { const _s = (0, construction_1.arrayToJsSeries)(arg0, arg1, dtype, strict); return _Series(_s); } return SeriesConstructor("", arg0); }; const isSeries = (anyVal) => { try { return anyVal?.[Symbol.toStringTag]?.() === "Series"; } catch (err) { return false; } }; const from = (name, values) => { if (Array.isArray(name)) { return SeriesConstructor("", values); } return SeriesConstructor(name, values); }; const of = (...values) => { return exports.Series.from(values); }; exports.Series = Object.assign(SeriesConstructor, { isSeries, from, of, deserialize: (buf, fmt) => _Series(polars_internal_1.default.JsSeries.deserialize(buf, fmt)), });