UNPKG

nodejs-polars

Version:

Polars: Blazingly fast DataFrames in Rust, Python, Node.js, R and SQL

428 lines (427 loc) 17.9 kB
"use strict"; var __importDefault = (this && this.__importDefault) || function (mod) { return (mod && mod.__esModule) ? mod : { "default": mod }; }; Object.defineProperty(exports, "__esModule", { value: true }); exports.LazyDataFrame = exports._LazyDataFrame = void 0; const dataframe_1 = require("../dataframe"); const polars_internal_1 = __importDefault(require("../internals/polars_internal")); const utils_1 = require("../utils"); const expr_1 = require("./expr"); const groupby_1 = require("./groupby"); const inspect = Symbol.for("nodejs.util.inspect.custom"); const prepareGroupbyInputs = (by) => { if (Array.isArray(by)) { const newBy = []; for (let e of by) { if (typeof e === "string") { e = polars_internal_1.default.col(e); } newBy.push(e); } return newBy; } if (typeof by === "string") { return [polars_internal_1.default.col(by)]; } if (expr_1.Expr.isExpr(by)) { return [by._expr]; } return []; }; /** @ignore */ const _LazyDataFrame = (_ldf) => { const unwrap = (method, ...args) => { return _ldf[method](...args); }; const wrap = (method, ...args) => { return (0, exports._LazyDataFrame)(unwrap(method, ...args)); }; return { _ldf, [inspect]() { return _ldf.describeOptimizedPlan(); }, get [Symbol.toStringTag]() { return "LazyDataFrame"; }, get columns() { return _ldf.columns; }, describePlan() { return _ldf.describePlan(); }, describeOptimizedPlan() { return _ldf.describeOptimizedPlan(); }, cache() { return (0, exports._LazyDataFrame)(_ldf.cache()); }, clone() { return (0, exports._LazyDataFrame)(_ldf.clone()); }, collectSync() { return (0, dataframe_1._DataFrame)(_ldf.collectSync()); }, collect(opts) { if (opts?.noOptimization) { opts.predicatePushdown = false; opts.projectionPushdown = false; opts.slicePushdown = false; opts.commSubplanElim = false; opts.commSubexprElim = false; } if (opts?.streaming) opts.commSubplanElim = false; if (opts) { _ldf = _ldf.optimizationToggle(opts.typeCoercion, opts.predicatePushdown, opts.projectionPushdown, opts.simplifyExpression, opts.slicePushdown, opts.commSubplanElim, opts.commSubexprElim, opts.streaming); } return _ldf.collect().then(dataframe_1._DataFrame); }, drop(...cols) { return (0, exports._LazyDataFrame)(_ldf.dropColumns(cols.flat(2))); }, unique(opts, keep = "any", maintainOrder = false) { // no arguments -> signal call with defaults if (arguments.length === 0) return (0, exports._LazyDataFrame)(_ldf.unique(null, keep, maintainOrder)); // string -> single-element array if (typeof opts === "string") return (0, exports._LazyDataFrame)(_ldf.unique([opts], keep, maintainOrder)); // array -> use as-is if (Array.isArray(opts)) return (0, exports._LazyDataFrame)(_ldf.unique(opts, keep, maintainOrder)); // object -> merge defaults, normalize subset to array (if present) if (opts && typeof opts === "object") { const o = { keep, maintainOrder, ...opts }; const subset = o.subset ? [o.subset].flat(3) : undefined; return (0, exports._LazyDataFrame)(_ldf.unique(subset, o.keep, o.maintainOrder)); } throw new TypeError("You should pass valid unique argument."); }, dropNulls(...subset) { if (subset.length) { return wrap("dropNulls", subset.flat(2)); } return wrap("dropNulls"); }, explode(...columns) { if (!columns.length) { const cols = (0, utils_1.selectionToExprList)(_ldf.columns, false); return wrap("explode", cols); } const column = (0, utils_1.selectionToExprList)(columns, false); return wrap("explode", column); }, fetchSync(numRows, opts) { if (opts?.noOptimization) { opts.predicatePushdown = false; opts.projectionPushdown = false; opts.slicePushdown = false; opts.commSubplanElim = false; opts.commSubexprElim = false; } if (opts?.streaming) opts.commSubplanElim = false; if (opts) { _ldf = _ldf.optimizationToggle(opts.typeCoercion, opts.predicatePushdown, opts.projectionPushdown, opts.simplifyExpr, opts.slicePushdown, opts.commSubplanElim, opts.commSubexprElim, opts.streaming); } return (0, dataframe_1._DataFrame)(_ldf.fetchSync(numRows)); }, fetch(numRows, opts) { if (opts?.noOptimization) { opts.predicatePushdown = false; opts.projectionPushdown = false; opts.slicePushdown = false; opts.commSubplanElim = false; opts.commSubexprElim = false; } if (opts?.streaming) opts.commSubplanElim = false; if (opts) { _ldf = _ldf.optimizationToggle(opts.typeCoercion, opts.predicatePushdown, opts.projectionPushdown, opts.simplifyExpr, opts.slicePushdown, opts.commSubplanElim, opts.commSubexprElim, opts.streaming); } return _ldf.fetch(numRows).then(dataframe_1._DataFrame); }, first() { return this.fetchSync(1); }, fillNull(exprOrValue) { const fillValue = (0, expr_1.exprToLitOrExpr)(exprOrValue)._expr; return (0, exports._LazyDataFrame)(_ldf.fillNull(fillValue)); }, filter(exprOrValue) { const predicate = (0, expr_1.exprToLitOrExpr)(exprOrValue, false)._expr; return (0, exports._LazyDataFrame)(_ldf.filter(predicate)); }, groupBy(by, maintainOrder = true) { if (maintainOrder?.maintainOrder !== undefined) { maintainOrder = maintainOrder.maintainOrder; } const expr = (0, utils_1.selectionToExprList)([by], false); return (0, groupby_1._LazyGroupBy)(_ldf.groupby(expr, maintainOrder)); }, groupByRolling({ indexColumn, by, period, offset, closed }) { offset = offset ?? `-${period}`; closed = closed ?? "right"; by = prepareGroupbyInputs(by); const lgb = _ldf.groupbyRolling(polars_internal_1.default.col(indexColumn), period, offset, closed, by); return (0, groupby_1._LazyGroupBy)(lgb); }, groupByDynamic({ indexColumn, every, period, offset, includeBoundaries, closed, label, by, startBy, }) { period = period ?? every; offset = offset ?? "0ns"; closed = closed ?? "left"; label = label ?? "left"; by = prepareGroupbyInputs(by); includeBoundaries = includeBoundaries ?? false; startBy = startBy ?? "monday"; const lgb = _ldf.groupbyDynamic(polars_internal_1.default.col(indexColumn), every, period, offset, label, includeBoundaries, closed, by, startBy); return (0, groupby_1._LazyGroupBy)(lgb); }, head(len = 5) { return (0, exports._LazyDataFrame)(_ldf.slice(0, len)); }, inner() { return _ldf; }, join(df, options) { options = { how: "inner", suffix: "right", allowParallel: true, forceParallel: false, ...options, }; const { how, suffix, allowParallel, forceParallel, coalesce, validate } = options; if (how === "cross") { return (0, exports._LazyDataFrame)(_ldf.join(df._ldf, [], [], allowParallel, forceParallel, how, suffix, coalesce, validate, [], [])); } let leftOn; let rightOn; if (options.on) { const on = (0, utils_1.selectionToExprList)(options.on, false); leftOn = on; rightOn = on; } else if ((options.leftOn && !options.rightOn) || (options.rightOn && !options.leftOn)) { throw new TypeError("You should pass the column to join on as an argument."); } else { leftOn = (0, utils_1.selectionToExprList)(options.leftOn, false); rightOn = (0, utils_1.selectionToExprList)(options.rightOn, false); } const ldf = _ldf.join(df._ldf, leftOn, rightOn, allowParallel, forceParallel, how, suffix, coalesce, validate, [], []); return (0, exports._LazyDataFrame)(ldf); }, joinAsof(other, options) { options = { suffix: "_right", allowParallel: true, forceParallel: false, strategy: "backward", checkSortedness: true, ...options, }; const { suffix, strategy, allowParallel, forceParallel, checkSortedness, } = options; let leftOn; let rightOn; if (!other?._ldf) { throw new TypeError("Expected a 'lazyFrame' as join table"); } if (options.on) { leftOn = rightOn = options.on; } else if ((options.leftOn && !options.rightOn) || (options.rightOn && !options.leftOn)) { throw new TypeError("You should pass the column to join on as an argument."); } else { leftOn = options.leftOn; rightOn = options.rightOn; } let byLeft; if (typeof options.byLeft === "string") { byLeft = [options.byLeft]; } else if (Array.isArray(options.byLeft)) { byLeft = options.byLeft; } let byRight; if (typeof options.byRight === "string") { byRight = [options.byRight]; } else if (Array.isArray(options.byRight)) { byRight = options.byRight; } if (typeof options.by === "string") { byLeft = byRight = [options.by]; } else if (Array.isArray(options.by)) { byLeft = byRight = options.by; } let toleranceStr; let toleranceNum; if (typeof options.tolerance === "string") { toleranceStr = options.tolerance; } else { toleranceNum = options.tolerance; } const ldf = _ldf.joinAsof(other._ldf, polars_internal_1.default.col(leftOn), polars_internal_1.default.col(rightOn), byLeft, byRight, allowParallel, forceParallel, suffix, strategy, toleranceNum, toleranceStr, checkSortedness ?? true); return (0, exports._LazyDataFrame)(ldf); }, last() { return (0, exports._LazyDataFrame)(_ldf.tail(1)); }, limit(len = 5) { return (0, exports._LazyDataFrame)(_ldf.slice(0, len)); }, max() { return (0, exports._LazyDataFrame)(_ldf.max()); }, mean() { return (0, exports._LazyDataFrame)(_ldf.mean()); }, median() { return (0, exports._LazyDataFrame)(_ldf.median()); }, melt(ids, values) { return (0, exports._LazyDataFrame)(_ldf.unpivot((0, utils_1.columnOrColumnsStrict)(ids), (0, utils_1.columnOrColumnsStrict)(values))); }, unpivot(ids, values, options) { options = { variableName: null, valueName: null, ...options, }; return (0, exports._LazyDataFrame)(_ldf.unpivot((0, utils_1.columnOrColumnsStrict)(ids), (0, utils_1.columnOrColumnsStrict)(values), options.variableName, options.valueName)); }, min() { return (0, exports._LazyDataFrame)(_ldf.min()); }, quantile(quantile, interpolation = "nearest") { return (0, exports._LazyDataFrame)(_ldf.quantile(quantile, interpolation)); }, rename(mapping) { const existing = Object.keys(mapping); const replacements = Object.values(mapping); return (0, exports._LazyDataFrame)(_ldf.rename(existing, replacements)); }, reverse() { return (0, exports._LazyDataFrame)(_ldf.reverse()); }, select(...exprs) { const selections = (0, utils_1.selectionToExprList)(exprs, false); return (0, exports._LazyDataFrame)(_ldf.select(selections)); }, shift(periods) { return (0, exports._LazyDataFrame)(_ldf.shift(periods)); }, shiftAndFill(opts, fillValue) { if (typeof opts === "number") { return (0, exports._LazyDataFrame)(_ldf.shiftAndFill(opts, fillValue)); } return (0, exports._LazyDataFrame)(_ldf.shiftAndFill(opts?.n, opts?.fillValue)); }, slice(opt, len) { if (opt?.offset !== undefined) { return (0, exports._LazyDataFrame)(_ldf.slice(opt.offset, opt.length)); } return (0, exports._LazyDataFrame)(_ldf.slice(opt, len)); }, sort(arg, descending = false, nullsLast = false, maintainOrder = false) { if (arg?.by !== undefined) { return this.sort(arg.by, arg.descending, arg.nullsLast, arg.maintainOrder); } if (typeof arg === "string") { return wrap("sort", arg, descending, nullsLast, maintainOrder); } const by = (0, utils_1.selectionToExprList)(arg, false); return wrap("sortByExprs", by, descending, nullsLast, maintainOrder); }, std() { return (0, exports._LazyDataFrame)(_ldf.std()); }, sum() { return (0, exports._LazyDataFrame)(_ldf.sum()); }, var() { return (0, exports._LazyDataFrame)(_ldf.var()); }, tail(length = 5) { return (0, exports._LazyDataFrame)(_ldf.tail(length)); }, toJSON(...args) { // this is passed by `JSON.stringify` when calling `toJSON()` if (args[0] === "") { return JSON.parse(_ldf.serialize("json").toString()); } return _ldf.serialize("json").toString(); }, serialize(format) { return _ldf.serialize(format); }, withColumn(column) { return this.withColumns(column); }, withColumns(...columns) { const exprs = (0, utils_1.selectionToExprList)(columns, false); return (0, exports._LazyDataFrame)(_ldf.withColumns(exprs)); }, withColumnRenamed(existing, replacement) { return (0, exports._LazyDataFrame)(_ldf.rename([existing], [replacement])); }, withRowCount(name = "row_nr") { return (0, exports._LazyDataFrame)(_ldf.withRowCount(name)); }, withRowIndex(name = "index", offset = 0) { return (0, exports._LazyDataFrame)(_ldf.withRowIndex(name, offset)); }, sinkCSV(path, options) { options = { ...dataframe_1.writeCsvDefaultOptions, ...options }; return _ldf.sinkCsv(path, options, { syncOnClose: "all", maintainOrder: false, mkdir: true, }); }, sinkParquet(path, options = {}) { options.compression = options.compression ?? "zstd"; options.statistics = options.statistics ?? false; options.sinkOptions = options.sinkOptions ?? { syncOnClose: "all", maintainOrder: false, mkdir: true, }; return _ldf.sinkParquet(path, options); }, sinkNdJson(path, options = {}) { options.retries = options.retries ?? 2; options.syncOnClose = options.syncOnClose ?? "all"; options.maintainOrder = options.maintainOrder ?? true; options.mkdir = options.mkdir ?? true; return _ldf.sinkJson(path, options); }, sinkIpc(path, options = {}) { options.compatLevel = options.compatLevel ?? "newest"; options.compression = options.compression ?? "uncompressed"; options.retries = options.retries ?? 2; options.syncOnClose = options.syncOnClose ?? "all"; options.maintainOrder = options.maintainOrder ?? true; options.mkdir = options.mkdir ?? true; return _ldf.sinkIpc(path, options); }, }; }; exports._LazyDataFrame = _LazyDataFrame; const isLazyDataFrame = (anyVal) => anyVal?.[Symbol.toStringTag] === "LazyDataFrame"; /** @ignore */ exports.LazyDataFrame = Object.assign(exports._LazyDataFrame, { deserialize: (buf, fmt) => (0, exports._LazyDataFrame)(polars_internal_1.default.JsLazyFrame.deserialize(buf, fmt)), fromExternal(external) { return (0, exports._LazyDataFrame)(polars_internal_1.default.JsLazyFrame.cloneExternal(external)); }, isLazyDataFrame, });