nodejs-polars
Version:
Polars: Blazingly fast DataFrames in Rust, Python, Node.js, R and SQL
158 lines (157 loc) • 5.96 kB
JavaScript
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
if (k2 === undefined) k2 = k;
var desc = Object.getOwnPropertyDescriptor(m, k);
if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
desc = { enumerable: true, get: function() { return m[k]; } };
}
Object.defineProperty(o, k2, desc);
}) : (function(o, m, k, k2) {
if (k2 === undefined) k2 = k;
o[k2] = m[k];
}));
var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
Object.defineProperty(o, "default", { enumerable: true, value: v });
}) : function(o, v) {
o["default"] = v;
});
var __importStar = (this && this.__importStar) || (function () {
var ownKeys = function(o) {
ownKeys = Object.getOwnPropertyNames || function (o) {
var ar = [];
for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
return ar;
};
return ownKeys(o);
};
return function (mod) {
if (mod && mod.__esModule) return mod;
var result = {};
if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
__setModuleDefault(result, mod);
return result;
};
})();
var __importDefault = (this && this.__importDefault) || function (mod) {
return (mod && mod.__esModule) ? mod : { "default": mod };
};
Object.defineProperty(exports, "__esModule", { value: true });
exports._GroupBy = _GroupBy;
exports.RollingGroupBy = RollingGroupBy;
exports.DynamicGroupBy = DynamicGroupBy;
const node_util_1 = __importDefault(require("node:util"));
const dataframe_1 = require("./dataframe");
const functions_1 = require("./lazy/functions");
const utils = __importStar(require("./utils"));
const inspect = Symbol.for("nodejs.util.inspect.custom");
const inspectOpts = { colors: true, depth: null };
/** @ignore */
function _GroupBy(df, by, maintainOrder = false) {
const customInspect = () => node_util_1.default.formatWithOptions(inspectOpts, "GroupBy {by: %O}", by);
const pivot = (opts, valuesCol) => {
if (typeof opts === "string") {
if (valuesCol) {
return pivot({ pivotCol: opts, valuesCol });
}
throw new Error("must specify both pivotCol and valuesCol");
}
return PivotOps(df, by, opts.pivotCol, opts.valuesCol);
};
const agg = (...aggs) => {
if (utils.isExprArray(aggs)) {
aggs = [aggs].flat(2);
return (0, dataframe_1._DataFrame)(df)
.lazy()
.groupBy(by, maintainOrder)
.agg(...aggs)
.collectSync({ noOptimization: true });
}
const pairs = Object.entries(aggs[0]).flatMap(([key, values]) => {
return [values].flat(2).map((v) => (0, functions_1.col)(key)[v]());
});
return (0, dataframe_1._DataFrame)(df)
.lazy()
.groupBy(by, maintainOrder)
.agg(...pairs)
.collectSync({ noOptimization: true });
};
return Object.seal({
[inspect]: customInspect,
agg,
pivot,
aggList: () => agg((0, functions_1.exclude)(by)),
len() {
return (0, dataframe_1._DataFrame)(df.groupby([by].flat(), by, "count"));
},
first: () => agg((0, functions_1.exclude)(by).first()),
groups() {
return (0, dataframe_1._DataFrame)(df.groupby([by].flat(), null, "groups"));
},
head: (n = 5) => agg((0, functions_1.exclude)(by).head(n)),
last: () => agg((0, functions_1.exclude)(by).last()),
max: () => agg((0, functions_1.exclude)(by).max()),
mean: () => agg((0, functions_1.exclude)(by).mean()),
median: () => agg((0, functions_1.exclude)(by).median()),
min: () => agg((0, functions_1.exclude)(by).min()),
nUnique: () => agg((0, functions_1.exclude)(by).nUnique()),
quantile: (q) => agg((0, functions_1.exclude)(by).quantile(q)),
sum: () => agg((0, functions_1.exclude)(by).sum()),
tail: (n = 5) => agg((0, functions_1.exclude)(by).tail(n)),
toString: () => "GroupBy",
});
}
function PivotOps(df, by, pivotCol, valueCol) {
const pivot = (agg) => () => (0, dataframe_1._DataFrame)(df.pivot([by].flat(), [pivotCol], [valueCol], agg));
const customInspect = () => node_util_1.default.formatWithOptions(inspectOpts, "PivotOps {by: %O}", by);
return {
[inspect]: customInspect,
first: pivot("first"),
sum: pivot("sum"),
min: pivot("min"),
max: pivot("max"),
mean: pivot("mean"),
len: pivot("len"),
median: pivot("median"),
};
}
/** @ignore */
function RollingGroupBy(df, indexColumn, period, offset, closed, by, check_sorted) {
return {
agg(column, ...columns) {
return df
.lazy()
.groupByRolling({
indexColumn,
period,
offset,
closed,
by,
check_sorted,
})
.agg(column, ...columns)
.collectSync();
},
};
}
/** @ignore */
function DynamicGroupBy(df, indexColumn, every, period, offset, includeBoundaries, closed, by, start_by, check_sorted) {
return {
agg(column, ...columns) {
return df
.lazy()
.groupByDynamic({
indexColumn,
every,
period,
offset,
includeBoundaries,
closed,
by,
start_by,
check_sorted,
})
.agg(column, ...columns)
.collectSync({ noOptimizations: true });
},
};
}
;