nodejs-polars
Version:
Polars: Blazingly fast DataFrames in Rust, Python, Node.js, R and SQL
390 lines (389 loc) • 16 kB
JavaScript
var __importDefault = (this && this.__importDefault) || function (mod) {
return (mod && mod.__esModule) ? mod : { "default": mod };
};
Object.defineProperty(exports, "__esModule", { value: true });
exports.LazyDataFrame = exports._LazyDataFrame = void 0;
const dataframe_1 = require("../dataframe");
const polars_internal_1 = __importDefault(require("../internals/polars_internal"));
const utils_1 = require("../utils");
const expr_1 = require("./expr");
const groupby_1 = require("./groupby");
const inspect = Symbol.for("nodejs.util.inspect.custom");
const prepareGroupbyInputs = (by) => {
if (Array.isArray(by)) {
const newBy = [];
for (let e of by) {
if (typeof e === "string") {
e = polars_internal_1.default.col(e);
}
newBy.push(e);
}
return newBy;
}
if (typeof by === "string") {
return [polars_internal_1.default.col(by)];
}
if (expr_1.Expr.isExpr(by)) {
return [by._expr];
}
return [];
};
/** @ignore */
const _LazyDataFrame = (_ldf) => {
const unwrap = (method, ...args) => {
return _ldf[method](...args);
};
const wrap = (method, ...args) => {
return (0, exports._LazyDataFrame)(unwrap(method, ...args));
};
return {
_ldf,
[inspect]() {
return _ldf.describeOptimizedPlan();
},
get [Symbol.toStringTag]() {
return "LazyDataFrame";
},
get columns() {
return _ldf.columns;
},
describePlan() {
return _ldf.describePlan();
},
describeOptimizedPlan() {
return _ldf.describeOptimizedPlan();
},
cache() {
return (0, exports._LazyDataFrame)(_ldf.cache());
},
clone() {
return (0, exports._LazyDataFrame)(_ldf.clone());
},
collectSync() {
return (0, dataframe_1._DataFrame)(_ldf.collectSync());
},
collect(opts) {
if (opts?.noOptimization) {
opts.predicatePushdown = false;
opts.projectionPushdown = false;
opts.slicePushdown = false;
opts.commSubplanElim = false;
opts.commSubexprElim = false;
}
if (opts?.streaming)
opts.commSubplanElim = false;
if (opts) {
_ldf = _ldf.optimizationToggle(opts.typeCoercion, opts.predicatePushdown, opts.projectionPushdown, opts.simplifyExpression, opts.slicePushdown, opts.commSubplanElim, opts.commSubexprElim, opts.streaming);
}
return _ldf.collect().then(dataframe_1._DataFrame);
},
drop(...cols) {
return (0, exports._LazyDataFrame)(_ldf.dropColumns(cols.flat(2)));
},
unique(opts = false, subset, keep = "first") {
const defaultOptions = {
maintainOrder: false,
keep: "first",
};
if (typeof opts === "boolean") {
const o = { ...defaultOptions, maintainOrder: opts, subset, keep };
return (0, exports._LazyDataFrame)(_ldf.unique(o.maintainOrder, o?.subset?.flat(2), o.keep));
}
if (opts.subset) {
opts.subset = [opts.subset].flat(3);
}
const o = { ...defaultOptions, ...opts };
return (0, exports._LazyDataFrame)(_ldf.unique(o.maintainOrder, o.subset, o.keep));
},
dropNulls(...subset) {
if (subset.length) {
return wrap("dropNulls", subset.flat(2));
}
return wrap("dropNulls");
},
explode(...columns) {
if (!columns.length) {
const cols = (0, utils_1.selectionToExprList)(_ldf.columns, false);
return wrap("explode", cols);
}
const column = (0, utils_1.selectionToExprList)(columns, false);
return wrap("explode", column);
},
fetchSync(numRows, opts) {
if (opts?.noOptimization) {
opts.predicatePushdown = false;
opts.projectionPushdown = false;
opts.slicePushdown = false;
opts.commSubplanElim = false;
opts.commSubexprElim = false;
}
if (opts?.streaming)
opts.commSubplanElim = false;
if (opts) {
_ldf = _ldf.optimizationToggle(opts.typeCoercion, opts.predicatePushdown, opts.projectionPushdown, opts.simplifyExpr, opts.slicePushdown, opts.commSubplanElim, opts.commSubexprElim, opts.streaming);
}
return (0, dataframe_1._DataFrame)(_ldf.fetchSync(numRows));
},
fetch(numRows, opts) {
if (opts?.noOptimization) {
opts.predicatePushdown = false;
opts.projectionPushdown = false;
opts.slicePushdown = false;
opts.commSubplanElim = false;
opts.commSubexprElim = false;
}
if (opts?.streaming)
opts.commSubplanElim = false;
if (opts) {
_ldf = _ldf.optimizationToggle(opts.typeCoercion, opts.predicatePushdown, opts.projectionPushdown, opts.simplifyExpr, opts.slicePushdown, opts.commSubplanElim, opts.commSubexprElim, opts.streaming);
}
return _ldf.fetch(numRows).then(dataframe_1._DataFrame);
},
first() {
return this.fetchSync(1);
},
fillNull(exprOrValue) {
const fillValue = (0, expr_1.exprToLitOrExpr)(exprOrValue)._expr;
return (0, exports._LazyDataFrame)(_ldf.fillNull(fillValue));
},
filter(exprOrValue) {
const predicate = (0, expr_1.exprToLitOrExpr)(exprOrValue, false)._expr;
return (0, exports._LazyDataFrame)(_ldf.filter(predicate));
},
groupBy(opt, maintainOrder = true) {
if (opt?.by !== undefined) {
const by = (0, utils_1.selectionToExprList)([opt.by], false);
return (0, groupby_1._LazyGroupBy)(_ldf.groupby(by, opt.maintainOrder));
}
const by = (0, utils_1.selectionToExprList)([opt], false);
return (0, groupby_1._LazyGroupBy)(_ldf.groupby(by, maintainOrder));
},
groupByRolling({ indexColumn, by, period, offset, closed, check_sorted }) {
offset = offset ?? `-${period}`;
closed = closed ?? "right";
by = prepareGroupbyInputs(by);
check_sorted = check_sorted ?? false;
const lgb = _ldf.groupbyRolling(polars_internal_1.default.col(indexColumn), period, offset, closed, by, check_sorted);
return (0, groupby_1._LazyGroupBy)(lgb);
},
groupByDynamic({ indexColumn, every, period, offset, includeBoundaries, closed, by, start_by, check_sorted, }) {
period = period ?? every;
offset = offset ?? `-${period}`;
closed = closed ?? "right";
by = prepareGroupbyInputs(by);
includeBoundaries = includeBoundaries ?? false;
start_by = start_by ?? "monday";
check_sorted = check_sorted ?? false;
const lgb = _ldf.groupbyDynamic(polars_internal_1.default.col(indexColumn), every, period, offset, includeBoundaries, closed, by, start_by, check_sorted);
return (0, groupby_1._LazyGroupBy)(lgb);
},
head(len = 5) {
return (0, exports._LazyDataFrame)(_ldf.slice(0, len));
},
inner() {
return _ldf;
},
join(df, options) {
options = {
how: "inner",
suffix: "right",
allowParallel: true,
forceParallel: false,
...options,
};
const { how, suffix, allowParallel, forceParallel } = options;
if (how === "cross") {
return (0, exports._LazyDataFrame)(_ldf.join(df._ldf, [], [], allowParallel, forceParallel, how, suffix, [], []));
}
let leftOn;
let rightOn;
if (options.on) {
const on = (0, utils_1.selectionToExprList)(options.on, false);
leftOn = on;
rightOn = on;
}
else if ((options.leftOn && !options.rightOn) ||
(options.rightOn && !options.leftOn)) {
throw new TypeError("You should pass the column to join on as an argument.");
}
else {
leftOn = (0, utils_1.selectionToExprList)(options.leftOn, false);
rightOn = (0, utils_1.selectionToExprList)(options.rightOn, false);
}
const ldf = _ldf.join(df._ldf, leftOn, rightOn, allowParallel, forceParallel, how, suffix, [], []);
return (0, exports._LazyDataFrame)(ldf);
},
joinAsof(other, options) {
options = {
suffix: "_right",
allowParallel: true,
forceParallel: false,
strategy: "backward",
...options,
};
const { suffix, strategy, allowParallel, forceParallel } = options;
let leftOn;
let rightOn;
if (!other?._ldf) {
throw new TypeError("Expected a 'lazyFrame' as join table");
}
if (options.on) {
leftOn = rightOn = options.on;
}
else if ((options.leftOn && !options.rightOn) ||
(options.rightOn && !options.leftOn)) {
throw new TypeError("You should pass the column to join on as an argument.");
}
else {
leftOn = options.leftOn;
rightOn = options.rightOn;
}
let byLeft;
if (typeof options.byLeft === "string") {
byLeft = [options.byLeft];
}
else if (Array.isArray(options.byLeft)) {
byLeft = options.byLeft;
}
let byRight;
if (typeof options.byRight === "string") {
byRight = [options.byRight];
}
else if (Array.isArray(options.byRight)) {
byRight = options.byRight;
}
if (typeof options.by === "string") {
byLeft = byRight = [options.by];
}
else if (Array.isArray(options.by)) {
byLeft = byRight = options.by;
}
let toleranceStr;
let toleranceNum;
if (typeof options.tolerance === "string") {
toleranceStr = options.tolerance;
}
else {
toleranceNum = options.tolerance;
}
const ldf = _ldf.joinAsof(other._ldf, polars_internal_1.default.col(leftOn), polars_internal_1.default.col(rightOn), byLeft, byRight, allowParallel, forceParallel, suffix, strategy, toleranceNum, toleranceStr);
return (0, exports._LazyDataFrame)(ldf);
},
last() {
return (0, exports._LazyDataFrame)(_ldf.tail(1));
},
limit(len = 5) {
return (0, exports._LazyDataFrame)(_ldf.slice(0, len));
},
max() {
return (0, exports._LazyDataFrame)(_ldf.max());
},
mean() {
return (0, exports._LazyDataFrame)(_ldf.mean());
},
median() {
return (0, exports._LazyDataFrame)(_ldf.median());
},
unpivot(ids, values) {
return (0, exports._LazyDataFrame)(_ldf.unpivot((0, utils_1.columnOrColumnsStrict)(ids), (0, utils_1.columnOrColumnsStrict)(values)));
},
min() {
return (0, exports._LazyDataFrame)(_ldf.min());
},
quantile(quantile, interpolation = "nearest") {
return (0, exports._LazyDataFrame)(_ldf.quantile(quantile, interpolation));
},
rename(mapping) {
const existing = Object.keys(mapping);
const replacements = Object.values(mapping);
return (0, exports._LazyDataFrame)(_ldf.rename(existing, replacements));
},
reverse() {
return (0, exports._LazyDataFrame)(_ldf.reverse());
},
select(...exprs) {
const selections = (0, utils_1.selectionToExprList)(exprs, false);
return (0, exports._LazyDataFrame)(_ldf.select(selections));
},
shift(periods) {
return (0, exports._LazyDataFrame)(_ldf.shift(periods));
},
shiftAndFill(opts, fillValue) {
if (typeof opts === "number") {
return (0, exports._LazyDataFrame)(_ldf.shiftAndFill(opts, fillValue));
}
return (0, exports._LazyDataFrame)(_ldf.shiftAndFill(opts?.n, opts?.fillValue));
},
slice(opt, len) {
if (opt?.offset !== undefined) {
return (0, exports._LazyDataFrame)(_ldf.slice(opt.offset, opt.length));
}
return (0, exports._LazyDataFrame)(_ldf.slice(opt, len));
},
sort(arg, descending = false, nullsLast = false, maintainOrder = false) {
if (arg?.by !== undefined) {
return this.sort(arg.by, arg.descending, arg.nullsLast, arg.maintainOrder);
}
if (typeof arg === "string") {
return wrap("sort", arg, descending, nullsLast, maintainOrder);
}
const by = (0, utils_1.selectionToExprList)(arg, false);
return wrap("sortByExprs", by, descending, nullsLast, maintainOrder);
},
std() {
return (0, exports._LazyDataFrame)(_ldf.std());
},
sum() {
return (0, exports._LazyDataFrame)(_ldf.sum());
},
var() {
return (0, exports._LazyDataFrame)(_ldf.var());
},
tail(length = 5) {
return (0, exports._LazyDataFrame)(_ldf.tail(length));
},
toJSON(...args) {
// this is passed by `JSON.stringify` when calling `toJSON()`
if (args[0] === "") {
return JSON.parse(_ldf.serialize("json").toString());
}
return _ldf.serialize("json").toString();
},
serialize(format) {
return _ldf.serialize(format);
},
withColumn(expr) {
return (0, exports._LazyDataFrame)(_ldf.withColumn(expr._expr));
},
withColumns(...columns) {
const exprs = (0, utils_1.selectionToExprList)(columns, false);
return (0, exports._LazyDataFrame)(_ldf.withColumns(exprs));
},
withColumnRenamed(existing, replacement) {
return (0, exports._LazyDataFrame)(_ldf.rename([existing], [replacement]));
},
withRowCount(name = "row_nr") {
return (0, exports._LazyDataFrame)(_ldf.withRowCount(name));
},
sinkCSV(path, options = {}) {
options.maintainOrder = options.maintainOrder ?? false;
_ldf.sinkCsv(path, options);
},
sinkParquet(path, options = {}) {
options.compression = options.compression ?? "zstd";
options.statistics = options.statistics ?? false;
_ldf.sinkParquet(path, options);
},
};
};
exports._LazyDataFrame = _LazyDataFrame;
const isLazyDataFrame = (anyVal) => anyVal?.[Symbol.toStringTag] === "LazyDataFrame";
/** @ignore */
exports.LazyDataFrame = Object.assign(exports._LazyDataFrame, {
deserialize: (buf, fmt) => (0, exports._LazyDataFrame)(polars_internal_1.default.JsLazyFrame.deserialize(buf, fmt)),
fromExternal(external) {
return (0, exports._LazyDataFrame)(polars_internal_1.default.JsLazyFrame.cloneExternal(external));
},
isLazyDataFrame,
});
;