UNPKG

nodejs-polars

Version:

Polars: Blazingly fast DataFrames in Rust, Python, Node.js, R and SQL

158 lines (157 loc) 5.96 kB
"use strict"; var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) { if (k2 === undefined) k2 = k; var desc = Object.getOwnPropertyDescriptor(m, k); if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) { desc = { enumerable: true, get: function() { return m[k]; } }; } Object.defineProperty(o, k2, desc); }) : (function(o, m, k, k2) { if (k2 === undefined) k2 = k; o[k2] = m[k]; })); var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) { Object.defineProperty(o, "default", { enumerable: true, value: v }); }) : function(o, v) { o["default"] = v; }); var __importStar = (this && this.__importStar) || (function () { var ownKeys = function(o) { ownKeys = Object.getOwnPropertyNames || function (o) { var ar = []; for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k; return ar; }; return ownKeys(o); }; return function (mod) { if (mod && mod.__esModule) return mod; var result = {}; if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]); __setModuleDefault(result, mod); return result; }; })(); var __importDefault = (this && this.__importDefault) || function (mod) { return (mod && mod.__esModule) ? mod : { "default": mod }; }; Object.defineProperty(exports, "__esModule", { value: true }); exports._GroupBy = _GroupBy; exports.RollingGroupBy = RollingGroupBy; exports.DynamicGroupBy = DynamicGroupBy; const node_util_1 = __importDefault(require("node:util")); const dataframe_1 = require("./dataframe"); const functions_1 = require("./lazy/functions"); const utils = __importStar(require("./utils")); const inspect = Symbol.for("nodejs.util.inspect.custom"); const inspectOpts = { colors: true, depth: null }; /** @ignore */ function _GroupBy(df, by, maintainOrder = false) { const customInspect = () => node_util_1.default.formatWithOptions(inspectOpts, "GroupBy {by: %O}", by); const pivot = (opts, valuesCol) => { if (typeof opts === "string") { if (valuesCol) { return pivot({ pivotCol: opts, valuesCol }); } throw new Error("must specify both pivotCol and valuesCol"); } return PivotOps(df, by, opts.pivotCol, opts.valuesCol); }; const agg = (...aggs) => { if (utils.isExprArray(aggs)) { aggs = [aggs].flat(2); return (0, dataframe_1._DataFrame)(df) .lazy() .groupBy(by, maintainOrder) .agg(...aggs) .collectSync({ noOptimization: true }); } const pairs = Object.entries(aggs[0]).flatMap(([key, values]) => { return [values].flat(2).map((v) => (0, functions_1.col)(key)[v]()); }); return (0, dataframe_1._DataFrame)(df) .lazy() .groupBy(by, maintainOrder) .agg(...pairs) .collectSync({ noOptimization: true }); }; return Object.seal({ [inspect]: customInspect, agg, pivot, aggList: () => agg((0, functions_1.exclude)(by)), len() { return (0, dataframe_1._DataFrame)(df.groupby([by].flat(), by, "count")); }, first: () => agg((0, functions_1.exclude)(by).first()), groups() { return (0, dataframe_1._DataFrame)(df.groupby([by].flat(), null, "groups")); }, head: (n = 5) => agg((0, functions_1.exclude)(by).head(n)), last: () => agg((0, functions_1.exclude)(by).last()), max: () => agg((0, functions_1.exclude)(by).max()), mean: () => agg((0, functions_1.exclude)(by).mean()), median: () => agg((0, functions_1.exclude)(by).median()), min: () => agg((0, functions_1.exclude)(by).min()), nUnique: () => agg((0, functions_1.exclude)(by).nUnique()), quantile: (q) => agg((0, functions_1.exclude)(by).quantile(q)), sum: () => agg((0, functions_1.exclude)(by).sum()), tail: (n = 5) => agg((0, functions_1.exclude)(by).tail(n)), toString: () => "GroupBy", }); } function PivotOps(df, by, pivotCol, valueCol) { const pivot = (agg) => () => (0, dataframe_1._DataFrame)(df.pivot([by].flat(), [pivotCol], [valueCol], agg)); const customInspect = () => node_util_1.default.formatWithOptions(inspectOpts, "PivotOps {by: %O}", by); return { [inspect]: customInspect, first: pivot("first"), sum: pivot("sum"), min: pivot("min"), max: pivot("max"), mean: pivot("mean"), len: pivot("len"), median: pivot("median"), }; } /** @ignore */ function RollingGroupBy(df, indexColumn, period, offset, closed, by, check_sorted) { return { agg(column, ...columns) { return df .lazy() .groupByRolling({ indexColumn, period, offset, closed, by, check_sorted, }) .agg(column, ...columns) .collectSync(); }, }; } /** @ignore */ function DynamicGroupBy(df, indexColumn, every, period, offset, includeBoundaries, closed, by, start_by, check_sorted) { return { agg(column, ...columns) { return df .lazy() .groupByDynamic({ indexColumn, every, period, offset, includeBoundaries, closed, by, start_by, check_sorted, }) .agg(column, ...columns) .collectSync({ noOptimizations: true }); }, }; }