UNPKG

nodejs-polars

Version:

Polars: Blazingly fast DataFrames in Rust, Python, Node.js, R and SQL

213 lines (212 loc) 8.53 kB
import { type DataFrame } from "./dataframe"; import type { Expr } from "./lazy/expr"; import type { ColumnsOrExpr, StartBy } from "./utils"; declare const inspect: unique symbol; /** * Starts a new GroupBy operation. */ export interface GroupBy { [inspect](): string; /** * Aggregate the groups into Series. * @deprecated */ aggList(): DataFrame; /** * Compute aggregations for each group of a group by operation. * ___ * @param columns - Aggregations to compute for each group of the group by operation, specified as positional arguments. * @example * ``` * const df = pl.DataFrame({ foo: [1, 2, 2, 3, 3], ham: [6.0, 6, 7, 8.0, 8.0], bar: ["a", "b", "c", "c", "c"], spam: ["a", "b", "c", "c", "c"], }); * // use lazy api rest parameter style * > df.groupBy('foo', 'bar').agg(pl.count('ham'), pl.col('spam')).sort(["foo", "bar"]); * shape: (4, 4) ┌─────┬─────┬─────┬────────────┐ │ foo ┆ bar ┆ ham ┆ spam │ │ --- ┆ --- ┆ --- ┆ --- │ │ f64 ┆ str ┆ u32 ┆ list[str] │ ╞═════╪═════╪═════╪════════════╡ │ 1.0 ┆ a ┆ 1 ┆ ["a"] │ │ 2.0 ┆ b ┆ 1 ┆ ["b"] │ │ 2.0 ┆ c ┆ 1 ┆ ["c"] │ │ 3.0 ┆ c ┆ 2 ┆ ["c", "c"] │ └─────┴─────┴─────┴────────────┘ * * > df.groupBy("bar").agg(pl.col("foo"), pl.col("ham"), pl.col("spam") ).sort("bar"); * shape: (3, 4) ┌─────┬─────────────────┬─────────────────┬─────────────────┐ │ bar ┆ foo ┆ ham ┆ spam │ │ --- ┆ --- ┆ --- ┆ --- │ │ str ┆ list[f64] ┆ list[f64] ┆ list[str] │ ╞═════╪═════════════════╪═════════════════╪═════════════════╡ │ a ┆ [1.0] ┆ [6.0] ┆ ["a"] │ │ b ┆ [2.0] ┆ [6.0] ┆ ["b"] │ │ c ┆ [2.0, 3.0, 3.0] ┆ [7.0, 8.0, 8.0] ┆ ["c", "c", "c"] │ └─────┴─────────────────┴─────────────────┴─────────────────┘ * > const h = pl.col("ham"); * > df.groupBy("bar").agg(h.sum().as("sum_ham"), h.min().as("min_ham"), h.max().as("max_ham")); * shape: (3, 4) ┌─────┬─────────┬─────────┬─────────┐ │ bar ┆ sum_ham ┆ min_ham ┆ max_ham │ │ --- ┆ --- ┆ --- ┆ --- │ │ str ┆ f64 ┆ f64 ┆ f64 │ ╞═════╪═════════╪═════════╪═════════╡ │ a ┆ 6.0 ┆ 6.0 ┆ 6.0 │ │ b ┆ 6.0 ┆ 6.0 ┆ 6.0 │ │ c ┆ 23.0 ┆ 7.0 ┆ 8.0 │ └─────┴─────────┴─────────┴─────────┘ * ``` */ agg(...columns: Expr[]): DataFrame; agg(columns: Record<string, keyof Expr | (keyof Expr)[]>): DataFrame; /** * Return the number of rows in each group. * @example * > df = pl.DataFrame({a: ["Apple", "Apple", "Orange"], b: [1, None, 2]}); > df.groupBy("a").len() shape: (2, 2) ┌────────┬─────┐ │ a ┆ len │ │ --- ┆ --- │ │ str ┆ u32 │ ╞════════╪═════╡ │ Apple ┆ 2 │ │ Orange ┆ 1 │ └────────┴─────┘ */ len(): DataFrame; /** * Aggregate the first values in the group. */ first(): DataFrame; /** * Return a `DataFrame` with: * - the groupby keys * - the group indexes aggregated as lists */ groups(): DataFrame; /** * Return first n rows of each group. * @param n -Number of values of the group to select * @example * ``` * > df = pl.DataFrame({ * > "letters": ["c", "c", "a", "c", "a", "b"], * > "nrs": [1, 2, 3, 4, 5, 6] * > }) * > df * shape: (6, 2) * ╭─────────┬─────╮ * │ letters ┆ nrs │ * │ --- ┆ --- │ * │ str ┆ i64 │ * ╞═════════╪═════╡ * │ "c" ┆ 1 │ * ├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┤ * │ "c" ┆ 2 │ * ├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┤ * │ "a" ┆ 3 │ * ├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┤ * │ "c" ┆ 4 │ * ├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┤ * │ "a" ┆ 5 │ * ├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┤ * │ "b" ┆ 6 │ * ╰─────────┴─────╯ * > df.groupby("letters") * > .head(2) * > .sort("letters"); * > >> * shape: (5, 2) * ╭─────────┬─────╮ * │ letters ┆ nrs │ * │ --- ┆ --- │ * │ str ┆ i64 │ * ╞═════════╪═════╡ * │ "a" ┆ 3 │ * ├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┤ * │ "a" ┆ 5 │ * ├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┤ * │ "b" ┆ 6 │ * ├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┤ * │ "c" ┆ 1 │ * ├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┤ * │ "c" ┆ 2 │ * ╰─────────┴─────╯ * ``` */ head(n?: number): DataFrame; /** * Aggregate the last values in the group. */ last(): DataFrame; /** * Reduce the groups to the maximal value. */ max(): DataFrame; /** * Reduce the groups to the mean values. */ mean(): DataFrame; /** * Return the median per group. */ median(): DataFrame; /** * Reduce the groups to the minimal value. */ min(): DataFrame; /** * Count the unique values per group. */ nUnique(): DataFrame; /** * Do a pivot operation based on the group key, a pivot column and an aggregation function on the values column. * @param pivotCol - Column to pivot. * @param valuesCol - Column that will be aggregated. * */ pivot(pivotCol: string, valuesCol: string): PivotOps; pivot({ pivotCol, valuesCol, }: { pivotCol: string; valuesCol: string; }): PivotOps; /** * Compute the quantile per group. */ quantile(quantile: number): DataFrame; /** * Reduce the groups to the sum. */ sum(): DataFrame; tail(n?: number): DataFrame; toString(): string; } export type PivotOps = Pick<GroupBy, "len" | "first" | "max" | "mean" | "median" | "min" | "sum"> & { [inspect](): string; }; /** @ignore */ export declare function _GroupBy(df: any, by: string[], maintainOrder?: boolean): GroupBy; /** * intermediate state of a rolling groupby */ export interface RollingGroupBy { agg(column: ColumnsOrExpr, ...columns: ColumnsOrExpr[]): DataFrame; } /** @ignore */ export declare function RollingGroupBy(df: any, indexColumn: ColumnsOrExpr, period: string, offset?: string, closed?: any, by?: ColumnsOrExpr): RollingGroupBy; /** * intermediate state of a dynamic groupby */ export interface DynamicGroupBy { agg(column: ColumnsOrExpr, ...columns: ColumnsOrExpr[]): DataFrame; } /** @ignore */ export declare function DynamicGroupBy(df: any, indexColumn: string, every: string, period?: string, offset?: string, label?: string, includeBoundaries?: boolean, closed?: string, by?: ColumnsOrExpr, startBy?: StartBy): DynamicGroupBy; export {};