UNPKG

nodejs-polars

Version:

Polars: Blazingly fast DataFrames in Rust, Python, Node.js, R and SQL

209 lines (207 loc) 10 kB
"use strict"; var __importDefault = (this && this.__importDefault) || function (mod) { return (mod && mod.__esModule) ? mod : { "default": mod }; }; Object.defineProperty(exports, "__esModule", { value: true }); exports.repeat = repeat; exports.concat = concat; const dataframe_1 = require("./dataframe"); const construction_1 = require("./internals/construction"); const polars_internal_1 = __importDefault(require("./internals/polars_internal")); const dataframe_2 = require("./lazy/dataframe"); const series_1 = require("./series"); const utils_1 = require("./utils"); /** * _Repeat a single value n times and collect into a Series._ * @param value - Value to repeat. * @param n - Number of repeats * @param name - Optional name of the Series * @example * * ``` * * > const s = pl.repeat("a", 5) * > s.toArray() * ["a", "a", "a", "a", "a"] * * ``` */ function repeat(value, n, name = "") { const dtype = (0, construction_1.jsTypeToPolarsType)(value); const s = polars_internal_1.default.JsSeries.repeat(name, value, n, dtype); return (0, series_1._Series)(s); } /** * Aggregate all the Dataframes/Series in a List of DataFrames/Series to a single DataFrame/Series. * @param items DataFrames/Series/LazyFrames to concatenate. * @param options.rechunk Make sure that the result data is in contiguous memory. * @param options.how Only used if the items are DataFrames. *Defaults to 'vertical'* * - vertical: Applies multiple `vstack` operations. * - verticalRelaxed: Same as `vertical`, but additionally coerces columns to their common supertype *if* they are mismatched (eg: Int32 → Int64). * - horizontal: Stacks Series horizontally and fills with nulls if the lengths don't match. * - diagonal: Finds a union between the column schemas and fills missing column values with ``null``. * - diagonalRelaxed: Same as `diagonal`, but additionally coerces columns to their common supertype *if* they are mismatched (eg: Int32 → Int64). * - align, alignFull, alignLeft, alignRight: Combines frames horizontally, * auto-determining the common key columns and aligning rows using the same * logic as `alignFrames` (note that "align" is an alias for "alignFull"). * The "align" strategy determines the type of join used to align the frames, * equivalent to the "how" parameter on `alignFrames`. Note that the common join columns are automatically coalesced, but other column collisions * will raise an error (if you need more control over this you should use a suitable `join` method directly). * @param options.parallel - Only relevant for LazyFrames. This determines if the concatenated lazy computations may be executed in parallel. * @example * > const df1 = pl.DataFrame({"a": [1], "b": [3]}); * > const df2 = pl.DataFrame({"a": [2], "b": [4]}); * > pl.concat([df1, df2]); * shape: (2, 2) * ┌─────┬─────┐ * │ a ┆ b │ * │ --- ┆ --- │ * │ i64 ┆ i64 │ * ╞═════╪═════╡ * │ 1 ┆ 3 │ * ├╌╌╌╌╌┼╌╌╌╌╌┤ * │ 2 ┆ 4 │ * └─────┴─────┘ * * > const a = pl.DataFrame({ a: ["a", "b"], b: [1, 2] }); * > const b = pl.DataFrame({ c: [5, 6], d: [7, 8], e: [9, 10]}); * > pl.concat([a, b], { how: "horizontal" }); * * shape: (2, 5) * ┌─────┬─────┬─────┬─────┬──────┐ * │ a ┆ b ┆ c ┆ d ┆ e │ * │ --- ┆ --- ┆ --- ┆ --- ┆ --- │ * │ str ┆ f64 ┆ f64 ┆ f64 ┆ f64 │ * ╞═════╪═════╪═════╪═════╪══════╡ * │ a ┆ 1.0 ┆ 5.0 ┆ 7.0 ┆ 9.0 │ * │ b ┆ 2.0 ┆ 6.0 ┆ 8.0 ┆ 10.0 │ * └─────┴─────┴─────┴─────┴──────┘ * * > const df_d1 = pl.DataFrame({"a": [1], "b": [3]}); * > const df_d2 = pl.DataFrame({"a": [2], "c": [4]}); * > pl.concat([df_d1, df_d2], { how: "diagonal" }); * * shape: (2, 3) * ┌─────┬──────┬──────┐ * │ a ┆ b ┆ c │ * │ --- ┆ --- ┆ --- │ * │ i64 ┆ i64 ┆ i64 │ * ╞═════╪══════╪══════╡ * │ 1 ┆ 3 ┆ null │ * │ 2 ┆ null ┆ 4 │ * └─────┴──────┴──────┘ * * The "align" strategies require at least one common column to align on: >>> const df1 = pl.DataFrame({"id": [1, 2], "x": [3, 4]}); >>> const df2 = pl.DataFrame({"id": [2, 3], "y": [5, 6]}); >>> const df3 = pl.DataFrame({"id": [1, 3], "z": [7, 8]}); >>> pl.concat([df1, df2, df3], how="align"); // equivalent to "alignFull" shape: (3, 4) ┌─────┬──────┬──────┬──────┐ │ id ┆ x ┆ y ┆ z │ │ --- ┆ --- ┆ --- ┆ --- │ │ i64 ┆ i64 ┆ i64 ┆ i64 │ ╞═════╪══════╪══════╪══════╡ │ 1 ┆ 3 ┆ null ┆ 7 │ │ 2 ┆ 4 ┆ 5 ┆ null │ │ 3 ┆ null ┆ 6 ┆ 8 │ └─────┴──────┴──────┴──────┘ >>> pl.concat([df1, df2, df3], how="alignLeft"); shape: (2, 4) ┌─────┬─────┬──────┬──────┐ │ id ┆ x ┆ y ┆ z │ │ --- ┆ --- ┆ --- ┆ --- │ │ i64 ┆ i64 ┆ i64 ┆ i64 │ ╞═════╪═════╪══════╪══════╡ │ 1 ┆ 3 ┆ null ┆ 7 │ │ 2 ┆ 4 ┆ 5 ┆ null │ └─────┴─────┴──────┴──────┘ >>> pl.concat([df1, df2, df3], how="alignRight"); shape: (2, 4) ┌─────┬──────┬──────┬─────┐ │ id ┆ x ┆ y ┆ z │ │ --- ┆ --- ┆ --- ┆ --- │ │ i64 ┆ i64 ┆ i64 ┆ i64 │ ╞═════╪══════╪══════╪═════╡ │ 1 ┆ null ┆ null ┆ 7 │ │ 3 ┆ null ┆ 6 ┆ 8 │ └─────┴──────┴──────┴─────┘ >>> pl.concat([df1, df2, df3], how="alignInner"); shape: (0, 4) ┌─────┬─────┬─────┬─────┐ │ id ┆ x ┆ y ┆ z │ │ --- ┆ --- ┆ --- ┆ --- │ │ i64 ┆ i64 ┆ i64 ┆ i64 │ ╞═════╪═════╪═════╪═════╡ └─────┴─────┴─────┴─────┘ */ const computeAlignParams = (items, how) => { const commonCols = (0, utils_1.commonValue)(...items.map((c) => c.columns)); const uniqueCols = [...new Set(items.flatMap((c) => c.columns))].sort(); // Join methods allowed: "full" | "left" | "inner" | "semi" | "anti" | undefined const joinMethod = how === "align" ? "full" : how.replace("align", "").toLocaleLowerCase(); return { commonCols, uniqueCols, joinMethod }; }; function concat(items, options = { rechunk: true, parallel: true, how: "vertical" }) { const { rechunk, how, parallel } = options; if (!items.length) { throw new RangeError("cannot concat empty list"); } // short-circuit common single-item cases early for all types if (items.length === 1) return items[0]; if ((0, utils_1.isDataFrameArray)(items)) { let df; switch (how) { case "align": case "alignFull": case "alignInner": case "alignLeft": case "alignRight": { const { commonCols, uniqueCols, joinMethod } = computeAlignParams(items, how); df = (0, dataframe_1._DataFrame)(items.reduce((acc, curr) => acc.join(curr, { on: commonCols, how: joinMethod, coalesce: true }))) ._df.sort(commonCols) .select(uniqueCols); break; } case "vertical": df = items.reduce((acc, curr) => acc.vstack(curr)); break; case "verticalRelaxed": case "diagonalRelaxed": df = (0, dataframe_2._LazyDataFrame)(polars_internal_1.default.concatLf(items.map((i) => i.inner().lazy()), how, rechunk ?? false, parallel ?? true, true, // to_supertypes true)).collectSync(); break; case "horizontal": df = (0, dataframe_1._DataFrame)(polars_internal_1.default.horizontalConcat(items.map((i) => i.inner()))); break; case "diagonal": df = (0, dataframe_1._DataFrame)(polars_internal_1.default.diagonalConcat(items.map((i) => i.inner()))); break; default: throw new TypeError("unknown concat how option"); } return rechunk ? df.rechunk() : df; } if ((0, utils_1.isLazyDataFrameArray)(items)) { let df; if (how?.startsWith("align")) { const { commonCols, uniqueCols, joinMethod } = computeAlignParams(items, how); df = (0, dataframe_2._LazyDataFrame)(items.reduce((acc, curr) => acc.join(curr, { on: commonCols, how: joinMethod, coalesce: true }))) ._ldf.sort(commonCols) .select(uniqueCols); } else { df = (0, dataframe_2._LazyDataFrame)(polars_internal_1.default.concatLf(items.map((i) => i.inner()), how, rechunk ?? false, parallel ?? true, true, // to_supertypes true)); } return df; } if ((0, utils_1.isSeriesArray)(items)) { const s = items.reduce((acc, curr) => acc.concat(curr)); return rechunk ? s.rechunk() : s; } throw new TypeError("can only concat series and dataframes"); }