nodejs-polars
Version:
Polars: Blazingly fast DataFrames in Rust, Python, Node.js, R and SQL
209 lines (207 loc) • 10 kB
JavaScript
;
var __importDefault = (this && this.__importDefault) || function (mod) {
return (mod && mod.__esModule) ? mod : { "default": mod };
};
Object.defineProperty(exports, "__esModule", { value: true });
exports.repeat = repeat;
exports.concat = concat;
const dataframe_1 = require("./dataframe");
const construction_1 = require("./internals/construction");
const polars_internal_1 = __importDefault(require("./internals/polars_internal"));
const dataframe_2 = require("./lazy/dataframe");
const series_1 = require("./series");
const utils_1 = require("./utils");
/**
* _Repeat a single value n times and collect into a Series._
* @param value - Value to repeat.
* @param n - Number of repeats
* @param name - Optional name of the Series
* @example
*
* ```
*
* > const s = pl.repeat("a", 5)
* > s.toArray()
* ["a", "a", "a", "a", "a"]
*
* ```
*/
function repeat(value, n, name = "") {
const dtype = (0, construction_1.jsTypeToPolarsType)(value);
const s = polars_internal_1.default.JsSeries.repeat(name, value, n, dtype);
return (0, series_1._Series)(s);
}
/**
* Aggregate all the Dataframes/Series in a List of DataFrames/Series to a single DataFrame/Series.
* @param items DataFrames/Series/LazyFrames to concatenate.
* @param options.rechunk Make sure that the result data is in contiguous memory.
* @param options.how Only used if the items are DataFrames. *Defaults to 'vertical'*
* - vertical: Applies multiple `vstack` operations.
* - verticalRelaxed: Same as `vertical`, but additionally coerces columns to their common supertype *if* they are mismatched (eg: Int32 → Int64).
* - horizontal: Stacks Series horizontally and fills with nulls if the lengths don't match.
* - diagonal: Finds a union between the column schemas and fills missing column values with ``null``.
* - diagonalRelaxed: Same as `diagonal`, but additionally coerces columns to their common supertype *if* they are mismatched (eg: Int32 → Int64).
* - align, alignFull, alignLeft, alignRight: Combines frames horizontally,
* auto-determining the common key columns and aligning rows using the same
* logic as `alignFrames` (note that "align" is an alias for "alignFull").
* The "align" strategy determines the type of join used to align the frames,
* equivalent to the "how" parameter on `alignFrames`. Note that the common join columns are automatically coalesced, but other column collisions
* will raise an error (if you need more control over this you should use a suitable `join` method directly).
* @param options.parallel - Only relevant for LazyFrames. This determines if the concatenated lazy computations may be executed in parallel.
* @example
* > const df1 = pl.DataFrame({"a": [1], "b": [3]});
* > const df2 = pl.DataFrame({"a": [2], "b": [4]});
* > pl.concat([df1, df2]);
* shape: (2, 2)
* ┌─────┬─────┐
* │ a ┆ b │
* │ --- ┆ --- │
* │ i64 ┆ i64 │
* ╞═════╪═════╡
* │ 1 ┆ 3 │
* ├╌╌╌╌╌┼╌╌╌╌╌┤
* │ 2 ┆ 4 │
* └─────┴─────┘
*
* > const a = pl.DataFrame({ a: ["a", "b"], b: [1, 2] });
* > const b = pl.DataFrame({ c: [5, 6], d: [7, 8], e: [9, 10]});
* > pl.concat([a, b], { how: "horizontal" });
*
* shape: (2, 5)
* ┌─────┬─────┬─────┬─────┬──────┐
* │ a ┆ b ┆ c ┆ d ┆ e │
* │ --- ┆ --- ┆ --- ┆ --- ┆ --- │
* │ str ┆ f64 ┆ f64 ┆ f64 ┆ f64 │
* ╞═════╪═════╪═════╪═════╪══════╡
* │ a ┆ 1.0 ┆ 5.0 ┆ 7.0 ┆ 9.0 │
* │ b ┆ 2.0 ┆ 6.0 ┆ 8.0 ┆ 10.0 │
* └─────┴─────┴─────┴─────┴──────┘
*
* > const df_d1 = pl.DataFrame({"a": [1], "b": [3]});
* > const df_d2 = pl.DataFrame({"a": [2], "c": [4]});
* > pl.concat([df_d1, df_d2], { how: "diagonal" });
*
* shape: (2, 3)
* ┌─────┬──────┬──────┐
* │ a ┆ b ┆ c │
* │ --- ┆ --- ┆ --- │
* │ i64 ┆ i64 ┆ i64 │
* ╞═════╪══════╪══════╡
* │ 1 ┆ 3 ┆ null │
* │ 2 ┆ null ┆ 4 │
* └─────┴──────┴──────┘
*
* The "align" strategies require at least one common column to align on:
>>> const df1 = pl.DataFrame({"id": [1, 2], "x": [3, 4]});
>>> const df2 = pl.DataFrame({"id": [2, 3], "y": [5, 6]});
>>> const df3 = pl.DataFrame({"id": [1, 3], "z": [7, 8]});
>>> pl.concat([df1, df2, df3], how="align"); // equivalent to "alignFull"
shape: (3, 4)
┌─────┬──────┬──────┬──────┐
│ id ┆ x ┆ y ┆ z │
│ --- ┆ --- ┆ --- ┆ --- │
│ i64 ┆ i64 ┆ i64 ┆ i64 │
╞═════╪══════╪══════╪══════╡
│ 1 ┆ 3 ┆ null ┆ 7 │
│ 2 ┆ 4 ┆ 5 ┆ null │
│ 3 ┆ null ┆ 6 ┆ 8 │
└─────┴──────┴──────┴──────┘
>>> pl.concat([df1, df2, df3], how="alignLeft");
shape: (2, 4)
┌─────┬─────┬──────┬──────┐
│ id ┆ x ┆ y ┆ z │
│ --- ┆ --- ┆ --- ┆ --- │
│ i64 ┆ i64 ┆ i64 ┆ i64 │
╞═════╪═════╪══════╪══════╡
│ 1 ┆ 3 ┆ null ┆ 7 │
│ 2 ┆ 4 ┆ 5 ┆ null │
└─────┴─────┴──────┴──────┘
>>> pl.concat([df1, df2, df3], how="alignRight");
shape: (2, 4)
┌─────┬──────┬──────┬─────┐
│ id ┆ x ┆ y ┆ z │
│ --- ┆ --- ┆ --- ┆ --- │
│ i64 ┆ i64 ┆ i64 ┆ i64 │
╞═════╪══════╪══════╪═════╡
│ 1 ┆ null ┆ null ┆ 7 │
│ 3 ┆ null ┆ 6 ┆ 8 │
└─────┴──────┴──────┴─────┘
>>> pl.concat([df1, df2, df3], how="alignInner");
shape: (0, 4)
┌─────┬─────┬─────┬─────┐
│ id ┆ x ┆ y ┆ z │
│ --- ┆ --- ┆ --- ┆ --- │
│ i64 ┆ i64 ┆ i64 ┆ i64 │
╞═════╪═════╪═════╪═════╡
└─────┴─────┴─────┴─────┘
*/
const computeAlignParams = (items, how) => {
const commonCols = (0, utils_1.commonValue)(...items.map((c) => c.columns));
const uniqueCols = [...new Set(items.flatMap((c) => c.columns))].sort();
// Join methods allowed: "full" | "left" | "inner" | "semi" | "anti" | undefined
const joinMethod = how === "align"
? "full"
: how.replace("align", "").toLocaleLowerCase();
return { commonCols, uniqueCols, joinMethod };
};
function concat(items, options = { rechunk: true, parallel: true, how: "vertical" }) {
const { rechunk, how, parallel } = options;
if (!items.length) {
throw new RangeError("cannot concat empty list");
}
// short-circuit common single-item cases early for all types
if (items.length === 1)
return items[0];
if ((0, utils_1.isDataFrameArray)(items)) {
let df;
switch (how) {
case "align":
case "alignFull":
case "alignInner":
case "alignLeft":
case "alignRight": {
const { commonCols, uniqueCols, joinMethod } = computeAlignParams(items, how);
df = (0, dataframe_1._DataFrame)(items.reduce((acc, curr) => acc.join(curr, { on: commonCols, how: joinMethod, coalesce: true })))
._df.sort(commonCols)
.select(uniqueCols);
break;
}
case "vertical":
df = items.reduce((acc, curr) => acc.vstack(curr));
break;
case "verticalRelaxed":
case "diagonalRelaxed":
df = (0, dataframe_2._LazyDataFrame)(polars_internal_1.default.concatLf(items.map((i) => i.inner().lazy()), how, rechunk ?? false, parallel ?? true, true, // to_supertypes
true)).collectSync();
break;
case "horizontal":
df = (0, dataframe_1._DataFrame)(polars_internal_1.default.horizontalConcat(items.map((i) => i.inner())));
break;
case "diagonal":
df = (0, dataframe_1._DataFrame)(polars_internal_1.default.diagonalConcat(items.map((i) => i.inner())));
break;
default:
throw new TypeError("unknown concat how option");
}
return rechunk ? df.rechunk() : df;
}
if ((0, utils_1.isLazyDataFrameArray)(items)) {
let df;
if (how?.startsWith("align")) {
const { commonCols, uniqueCols, joinMethod } = computeAlignParams(items, how);
df = (0, dataframe_2._LazyDataFrame)(items.reduce((acc, curr) => acc.join(curr, { on: commonCols, how: joinMethod, coalesce: true })))
._ldf.sort(commonCols)
.select(uniqueCols);
}
else {
df = (0, dataframe_2._LazyDataFrame)(polars_internal_1.default.concatLf(items.map((i) => i.inner()), how, rechunk ?? false, parallel ?? true, true, // to_supertypes
true));
}
return df;
}
if ((0, utils_1.isSeriesArray)(items)) {
const s = items.reduce((acc, curr) => acc.concat(curr));
return rechunk ? s.rechunk() : s;
}
throw new TypeError("can only concat series and dataframes");
}