UNPKG

nodejs-polars

Version:

Polars: Blazingly fast DataFrames in Rust, Python, Node.js, R and SQL

390 lines (389 loc) 16 kB
"use strict"; var __importDefault = (this && this.__importDefault) || function (mod) { return (mod && mod.__esModule) ? mod : { "default": mod }; }; Object.defineProperty(exports, "__esModule", { value: true }); exports.LazyDataFrame = exports._LazyDataFrame = void 0; const dataframe_1 = require("../dataframe"); const polars_internal_1 = __importDefault(require("../internals/polars_internal")); const utils_1 = require("../utils"); const expr_1 = require("./expr"); const groupby_1 = require("./groupby"); const inspect = Symbol.for("nodejs.util.inspect.custom"); const prepareGroupbyInputs = (by) => { if (Array.isArray(by)) { const newBy = []; for (let e of by) { if (typeof e === "string") { e = polars_internal_1.default.col(e); } newBy.push(e); } return newBy; } if (typeof by === "string") { return [polars_internal_1.default.col(by)]; } if (expr_1.Expr.isExpr(by)) { return [by._expr]; } return []; }; /** @ignore */ const _LazyDataFrame = (_ldf) => { const unwrap = (method, ...args) => { return _ldf[method](...args); }; const wrap = (method, ...args) => { return (0, exports._LazyDataFrame)(unwrap(method, ...args)); }; return { _ldf, [inspect]() { return _ldf.describeOptimizedPlan(); }, get [Symbol.toStringTag]() { return "LazyDataFrame"; }, get columns() { return _ldf.columns; }, describePlan() { return _ldf.describePlan(); }, describeOptimizedPlan() { return _ldf.describeOptimizedPlan(); }, cache() { return (0, exports._LazyDataFrame)(_ldf.cache()); }, clone() { return (0, exports._LazyDataFrame)(_ldf.clone()); }, collectSync() { return (0, dataframe_1._DataFrame)(_ldf.collectSync()); }, collect(opts) { if (opts?.noOptimization) { opts.predicatePushdown = false; opts.projectionPushdown = false; opts.slicePushdown = false; opts.commSubplanElim = false; opts.commSubexprElim = false; } if (opts?.streaming) opts.commSubplanElim = false; if (opts) { _ldf = _ldf.optimizationToggle(opts.typeCoercion, opts.predicatePushdown, opts.projectionPushdown, opts.simplifyExpression, opts.slicePushdown, opts.commSubplanElim, opts.commSubexprElim, opts.streaming); } return _ldf.collect().then(dataframe_1._DataFrame); }, drop(...cols) { return (0, exports._LazyDataFrame)(_ldf.dropColumns(cols.flat(2))); }, unique(opts = false, subset, keep = "first") { const defaultOptions = { maintainOrder: false, keep: "first", }; if (typeof opts === "boolean") { const o = { ...defaultOptions, maintainOrder: opts, subset, keep }; return (0, exports._LazyDataFrame)(_ldf.unique(o.maintainOrder, o?.subset?.flat(2), o.keep)); } if (opts.subset) { opts.subset = [opts.subset].flat(3); } const o = { ...defaultOptions, ...opts }; return (0, exports._LazyDataFrame)(_ldf.unique(o.maintainOrder, o.subset, o.keep)); }, dropNulls(...subset) { if (subset.length) { return wrap("dropNulls", subset.flat(2)); } return wrap("dropNulls"); }, explode(...columns) { if (!columns.length) { const cols = (0, utils_1.selectionToExprList)(_ldf.columns, false); return wrap("explode", cols); } const column = (0, utils_1.selectionToExprList)(columns, false); return wrap("explode", column); }, fetchSync(numRows, opts) { if (opts?.noOptimization) { opts.predicatePushdown = false; opts.projectionPushdown = false; opts.slicePushdown = false; opts.commSubplanElim = false; opts.commSubexprElim = false; } if (opts?.streaming) opts.commSubplanElim = false; if (opts) { _ldf = _ldf.optimizationToggle(opts.typeCoercion, opts.predicatePushdown, opts.projectionPushdown, opts.simplifyExpr, opts.slicePushdown, opts.commSubplanElim, opts.commSubexprElim, opts.streaming); } return (0, dataframe_1._DataFrame)(_ldf.fetchSync(numRows)); }, fetch(numRows, opts) { if (opts?.noOptimization) { opts.predicatePushdown = false; opts.projectionPushdown = false; opts.slicePushdown = false; opts.commSubplanElim = false; opts.commSubexprElim = false; } if (opts?.streaming) opts.commSubplanElim = false; if (opts) { _ldf = _ldf.optimizationToggle(opts.typeCoercion, opts.predicatePushdown, opts.projectionPushdown, opts.simplifyExpr, opts.slicePushdown, opts.commSubplanElim, opts.commSubexprElim, opts.streaming); } return _ldf.fetch(numRows).then(dataframe_1._DataFrame); }, first() { return this.fetchSync(1); }, fillNull(exprOrValue) { const fillValue = (0, expr_1.exprToLitOrExpr)(exprOrValue)._expr; return (0, exports._LazyDataFrame)(_ldf.fillNull(fillValue)); }, filter(exprOrValue) { const predicate = (0, expr_1.exprToLitOrExpr)(exprOrValue, false)._expr; return (0, exports._LazyDataFrame)(_ldf.filter(predicate)); }, groupBy(opt, maintainOrder = true) { if (opt?.by !== undefined) { const by = (0, utils_1.selectionToExprList)([opt.by], false); return (0, groupby_1._LazyGroupBy)(_ldf.groupby(by, opt.maintainOrder)); } const by = (0, utils_1.selectionToExprList)([opt], false); return (0, groupby_1._LazyGroupBy)(_ldf.groupby(by, maintainOrder)); }, groupByRolling({ indexColumn, by, period, offset, closed, check_sorted }) { offset = offset ?? `-${period}`; closed = closed ?? "right"; by = prepareGroupbyInputs(by); check_sorted = check_sorted ?? false; const lgb = _ldf.groupbyRolling(polars_internal_1.default.col(indexColumn), period, offset, closed, by, check_sorted); return (0, groupby_1._LazyGroupBy)(lgb); }, groupByDynamic({ indexColumn, every, period, offset, includeBoundaries, closed, by, start_by, check_sorted, }) { period = period ?? every; offset = offset ?? `-${period}`; closed = closed ?? "right"; by = prepareGroupbyInputs(by); includeBoundaries = includeBoundaries ?? false; start_by = start_by ?? "monday"; check_sorted = check_sorted ?? false; const lgb = _ldf.groupbyDynamic(polars_internal_1.default.col(indexColumn), every, period, offset, includeBoundaries, closed, by, start_by, check_sorted); return (0, groupby_1._LazyGroupBy)(lgb); }, head(len = 5) { return (0, exports._LazyDataFrame)(_ldf.slice(0, len)); }, inner() { return _ldf; }, join(df, options) { options = { how: "inner", suffix: "right", allowParallel: true, forceParallel: false, ...options, }; const { how, suffix, allowParallel, forceParallel } = options; if (how === "cross") { return (0, exports._LazyDataFrame)(_ldf.join(df._ldf, [], [], allowParallel, forceParallel, how, suffix, [], [])); } let leftOn; let rightOn; if (options.on) { const on = (0, utils_1.selectionToExprList)(options.on, false); leftOn = on; rightOn = on; } else if ((options.leftOn && !options.rightOn) || (options.rightOn && !options.leftOn)) { throw new TypeError("You should pass the column to join on as an argument."); } else { leftOn = (0, utils_1.selectionToExprList)(options.leftOn, false); rightOn = (0, utils_1.selectionToExprList)(options.rightOn, false); } const ldf = _ldf.join(df._ldf, leftOn, rightOn, allowParallel, forceParallel, how, suffix, [], []); return (0, exports._LazyDataFrame)(ldf); }, joinAsof(other, options) { options = { suffix: "_right", allowParallel: true, forceParallel: false, strategy: "backward", ...options, }; const { suffix, strategy, allowParallel, forceParallel } = options; let leftOn; let rightOn; if (!other?._ldf) { throw new TypeError("Expected a 'lazyFrame' as join table"); } if (options.on) { leftOn = rightOn = options.on; } else if ((options.leftOn && !options.rightOn) || (options.rightOn && !options.leftOn)) { throw new TypeError("You should pass the column to join on as an argument."); } else { leftOn = options.leftOn; rightOn = options.rightOn; } let byLeft; if (typeof options.byLeft === "string") { byLeft = [options.byLeft]; } else if (Array.isArray(options.byLeft)) { byLeft = options.byLeft; } let byRight; if (typeof options.byRight === "string") { byRight = [options.byRight]; } else if (Array.isArray(options.byRight)) { byRight = options.byRight; } if (typeof options.by === "string") { byLeft = byRight = [options.by]; } else if (Array.isArray(options.by)) { byLeft = byRight = options.by; } let toleranceStr; let toleranceNum; if (typeof options.tolerance === "string") { toleranceStr = options.tolerance; } else { toleranceNum = options.tolerance; } const ldf = _ldf.joinAsof(other._ldf, polars_internal_1.default.col(leftOn), polars_internal_1.default.col(rightOn), byLeft, byRight, allowParallel, forceParallel, suffix, strategy, toleranceNum, toleranceStr); return (0, exports._LazyDataFrame)(ldf); }, last() { return (0, exports._LazyDataFrame)(_ldf.tail(1)); }, limit(len = 5) { return (0, exports._LazyDataFrame)(_ldf.slice(0, len)); }, max() { return (0, exports._LazyDataFrame)(_ldf.max()); }, mean() { return (0, exports._LazyDataFrame)(_ldf.mean()); }, median() { return (0, exports._LazyDataFrame)(_ldf.median()); }, unpivot(ids, values) { return (0, exports._LazyDataFrame)(_ldf.unpivot((0, utils_1.columnOrColumnsStrict)(ids), (0, utils_1.columnOrColumnsStrict)(values))); }, min() { return (0, exports._LazyDataFrame)(_ldf.min()); }, quantile(quantile, interpolation = "nearest") { return (0, exports._LazyDataFrame)(_ldf.quantile(quantile, interpolation)); }, rename(mapping) { const existing = Object.keys(mapping); const replacements = Object.values(mapping); return (0, exports._LazyDataFrame)(_ldf.rename(existing, replacements)); }, reverse() { return (0, exports._LazyDataFrame)(_ldf.reverse()); }, select(...exprs) { const selections = (0, utils_1.selectionToExprList)(exprs, false); return (0, exports._LazyDataFrame)(_ldf.select(selections)); }, shift(periods) { return (0, exports._LazyDataFrame)(_ldf.shift(periods)); }, shiftAndFill(opts, fillValue) { if (typeof opts === "number") { return (0, exports._LazyDataFrame)(_ldf.shiftAndFill(opts, fillValue)); } return (0, exports._LazyDataFrame)(_ldf.shiftAndFill(opts?.n, opts?.fillValue)); }, slice(opt, len) { if (opt?.offset !== undefined) { return (0, exports._LazyDataFrame)(_ldf.slice(opt.offset, opt.length)); } return (0, exports._LazyDataFrame)(_ldf.slice(opt, len)); }, sort(arg, descending = false, nullsLast = false, maintainOrder = false) { if (arg?.by !== undefined) { return this.sort(arg.by, arg.descending, arg.nullsLast, arg.maintainOrder); } if (typeof arg === "string") { return wrap("sort", arg, descending, nullsLast, maintainOrder); } const by = (0, utils_1.selectionToExprList)(arg, false); return wrap("sortByExprs", by, descending, nullsLast, maintainOrder); }, std() { return (0, exports._LazyDataFrame)(_ldf.std()); }, sum() { return (0, exports._LazyDataFrame)(_ldf.sum()); }, var() { return (0, exports._LazyDataFrame)(_ldf.var()); }, tail(length = 5) { return (0, exports._LazyDataFrame)(_ldf.tail(length)); }, toJSON(...args) { // this is passed by `JSON.stringify` when calling `toJSON()` if (args[0] === "") { return JSON.parse(_ldf.serialize("json").toString()); } return _ldf.serialize("json").toString(); }, serialize(format) { return _ldf.serialize(format); }, withColumn(expr) { return (0, exports._LazyDataFrame)(_ldf.withColumn(expr._expr)); }, withColumns(...columns) { const exprs = (0, utils_1.selectionToExprList)(columns, false); return (0, exports._LazyDataFrame)(_ldf.withColumns(exprs)); }, withColumnRenamed(existing, replacement) { return (0, exports._LazyDataFrame)(_ldf.rename([existing], [replacement])); }, withRowCount(name = "row_nr") { return (0, exports._LazyDataFrame)(_ldf.withRowCount(name)); }, sinkCSV(path, options = {}) { options.maintainOrder = options.maintainOrder ?? false; _ldf.sinkCsv(path, options); }, sinkParquet(path, options = {}) { options.compression = options.compression ?? "zstd"; options.statistics = options.statistics ?? false; _ldf.sinkParquet(path, options); }, }; }; exports._LazyDataFrame = _LazyDataFrame; const isLazyDataFrame = (anyVal) => anyVal?.[Symbol.toStringTag] === "LazyDataFrame"; /** @ignore */ exports.LazyDataFrame = Object.assign(exports._LazyDataFrame, { deserialize: (buf, fmt) => (0, exports._LazyDataFrame)(polars_internal_1.default.JsLazyFrame.deserialize(buf, fmt)), fromExternal(external) { return (0, exports._LazyDataFrame)(polars_internal_1.default.JsLazyFrame.cloneExternal(external)); }, isLazyDataFrame, });