UNPKG

nodejs-polars

Version:

Polars: Blazingly fast DataFrames in Rust, Python, Node.js, R and SQL

627 lines (621 loc) 25.3 kB
import { DataFrame } from "../dataframe"; import { DataType } from "../datatypes"; import { Series } from "../series"; import { type ExprOrString } from "../utils"; import { Expr } from "./expr"; /** * __A column in a DataFrame.__ * Can be used to select: * * a single column by name * * all columns by using a wildcard `"*"` * * column by regular expression if the regex starts with `^` and ends with `$` * @param col * @example * ``` * > df = pl.DataFrame({ * > "ham": [1, 2, 3], * > "hamburger": [11, 22, 33], * > "foo": [3, 2, 1]}) * > df.select(col("foo")) * shape: (3, 1) * ╭─────╮ * │ foo │ * │ --- │ * │ i64 │ * ╞═════╡ * │ 3 │ * ├╌╌╌╌╌┤ * │ 2 │ * ├╌╌╌╌╌┤ * │ 1 │ * ╰─────╯ * > df.select(col("*")) * shape: (3, 3) * ╭─────┬───────────┬─────╮ * │ ham ┆ hamburger ┆ foo │ * │ --- ┆ --- ┆ --- │ * │ i64 ┆ i64 ┆ i64 │ * ╞═════╪═══════════╪═════╡ * │ 1 ┆ 11 ┆ 3 │ * ├╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┤ * │ 2 ┆ 22 ┆ 2 │ * ├╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┤ * │ 3 ┆ 33 ┆ 1 │ * ╰─────┴───────────┴─────╯ * > df.select(col("^ham.*$")) * shape: (3, 2) * ╭─────┬───────────╮ * │ ham ┆ hamburger │ * │ --- ┆ --- │ * │ i64 ┆ i64 │ * ╞═════╪═══════════╡ * │ 1 ┆ 11 │ * ├╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┤ * │ 2 ┆ 22 │ * ├╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┤ * │ 3 ┆ 33 │ * ╰─────┴───────────╯ * > df.select(col("*").exclude("ham")) * shape: (3, 2) * ╭───────────┬─────╮ * │ hamburger ┆ foo │ * │ --- ┆ --- │ * │ i64 ┆ i64 │ * ╞═══════════╪═════╡ * │ 11 ┆ 3 │ * ├╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┤ * │ 22 ┆ 2 │ * ├╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┤ * │ 33 ┆ 1 │ * ╰───────────┴─────╯ * > df.select(col(["hamburger", "foo"]) * shape: (3, 2) * ╭───────────┬─────╮ * │ hamburger ┆ foo │ * │ --- ┆ --- │ * │ i64 ┆ i64 │ * ╞═══════════╪═════╡ * │ 11 ┆ 3 │ * ├╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┤ * │ 22 ┆ 2 │ * ├╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┤ * │ 33 ┆ 1 │ * ╰───────────┴─────╯ * > df.select(col(pl.Series(["hamburger", "foo"])) * shape: (3, 2) * ╭───────────┬─────╮ * │ hamburger ┆ foo │ * │ --- ┆ --- │ * │ i64 ┆ i64 │ * ╞═══════════╪═════╡ * │ 11 ┆ 3 │ * ├╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┤ * │ 22 ┆ 2 │ * ├╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┤ * │ 33 ┆ 1 │ * ╰───────────┴─────╯ * ``` */ export declare function col(col: string | string[] | Series | DataType): Expr; export declare function cols(col: string | string[]): Expr; export declare function cols(col: string, ...cols2: string[]): Expr; /** * Select nth column index in a DataFrame. * @param n - Column index to select, starting at 0. * @example * ``` * > df = pl.DataFrame({ * > "ham": [1, 2, 3], * > "hamburger": [11, 22, 33], * > "foo": [3, 2, 1]}) * > df.select(nth(2)) * shape: (3, 1) * ╭─────╮ * │ foo │ * │ --- │ * │ i64 │ * ╞═════╡ * │ 3 │ * ├╌╌╌╌╌┤ * │ 2 │ * ├╌╌╌╌╌┤ * │ 1 │ * ╰─────╯ * ``` */ export declare function nth(n: number): Expr; export declare function lit(value: any): Expr; /** * Generate a range of integers. * * This can be used in a `select`, `with_column` etc. * Be sure that the range size is equal to the DataFrame you are collecting. * @param start - Start of the range (inclusive). Defaults to 0. * @param end - End of the range (exclusive). If set to `None` (default), the value of `start` is used and `start` is set to `0`. * @param step - Step size of the range. * @param dtype - Data type of the range. * @param eager - Evaluate immediately and return a `Series`. If set to `False` (default), return an expression instead. * @returns Expr or Series Column of integer data type `dtype`. * @see {@link intRanges} * @example * ``` * > df.lazy() * > .filter(pl.col("foo").lt(pl.intRange(0, 100))) * > .collect() * ``` * * * Generate an index column by using `intRange` in conjunction with :func:`len`. * ``` * df = pl.DataFrame({"a": [1, 3, 5], "b": [2, 4, 6]}) df.select( ... pl.intRange(pl.len()).alias("index"), ... pl.all(), ... ) shape: (3, 3) ┌───────┬─────┬─────┐ │ index ┆ a ┆ b │ │ --- ┆ --- ┆ --- │ │ u32 ┆ i64 ┆ i64 │ ╞═══════╪═════╪═════╡ │ 0 ┆ 1 ┆ 2 │ │ 1 ┆ 3 ┆ 4 │ │ 2 ┆ 5 ┆ 6 │ └───────┴─────┴─────┘ * ``` */ export declare function intRange(opts: { start: number | Expr; end: number | Expr; step?: number | Expr; dtype?: DataType; eager?: false; }): Expr; export declare function intRange<DT extends DataType = DataType.Int64>(opts: { start: number | Expr; end: number | Expr; step?: number | Expr; dtype?: DT; eager: true; }): Series<DT>; export declare function intRange<DT extends DataType = DataType.Int64>(opts: { start: number | Expr; end: number | Expr; step?: number | Expr; dtype?: DT; eager?: boolean; }): Expr | Series<DT>; /** @deprecated *since 0.15.0* use `start` and `end` instead */ export declare function intRange(opts: { low: number | Expr; high: number | Expr; step?: number | Expr; dtype?: DataType; eager?: boolean; }): Expr | Series; export declare function intRange(start: number | Expr, end?: number | Expr, step?: number | Expr, dtype?: DataType, eager?: false): Expr; export declare function intRange<DT extends DataType = DataType.Int64>(start: number | Expr, end?: number | Expr, step?: number | Expr, dtype?: DT, eager?: true): Series<DT>; /*** * Generate a range of integers for each row of the input columns. * @param start - Start of the range (inclusive). Defaults to 0. * @param end - End of the range (exclusive). If set to `None` (default), the value of `start` is used and `start` is set to `0`. * @param step - Step size of the range. * @param dtype - Integer data type of the ranges. Defaults to `Int64`. * @param eager - Evaluate immediately and return a ``Series``. If set to ``False`` (default), return an expression instead. * @return - Expr or Series Column of data type `List(dtype)`. * @see {@link intRange} * @example * ``` * const df = pl.DataFrame({"a": [1, 2], "b": [3, 4]}) * const result = df.select(pl.intRanges("a", "b")); * ``` */ export declare function intRanges(start: any, end: any, step?: number, dtype?: DataType, eager?: false): Expr; export declare function intRanges(start: any, end: any, step?: number, dtype?: DataType, eager?: true): Series; /** Alias for `pl.col("*")` */ export declare function all(): Expr; /** * Return the row indices that would sort the columns. * @param exprs Column(s) to arg sort by. Accepts expression input. * @param *more_exprs Additional columns to arg sort by, specified as positional arguments. * @param descending Sort in descending order. When sorting by multiple columns, can be specified per column by passing a sequence of booleans. * @example * ``` * const df = pl.DataFrame({"a": [0, 1, 1, 0], "b": [3, 2, 3, 2],}); * df.select(pl.argSortBy(pl.col("a"))); * shape: (4, 1) * ┌─────┐ * │ a │ * │ --- │ * │ u32 │ * ╞═════╡ * │ 0 │ * │ 3 │ * │ 1 │ * │ 2 │ * └─────┘ * ``` */ export declare function argSortBy(exprs: Expr[] | string[], descending?: boolean | boolean[]): Expr; /** Alias for mean. @see {@link mean} */ export declare function avg(column: string): Expr; export declare function avg(column: Series): number; /** * Concat the arrays in a Series dtype List in linear time. * @param exprs Columns to concat into a List Series */ export declare function concatList(exprs: ExprOrString[]): Expr; export declare function concatList(expr: ExprOrString, ...exprs: ExprOrString[]): Expr; export declare function concatList(expr: ExprOrString, expr2: ExprOrString, ...exprs: ExprOrString[]): Expr; /** Concat Utf8 Series in linear time. Non utf8 columns are cast to utf8. */ export declare function concatString(opts: { exprs: ExprOrString[]; sep: string; ignoreNulls?: boolean; }): any; export declare function concatString(exprs: ExprOrString[], sep?: string, ignoreNulls?: boolean): any; /** Count the number of values in this column. */ export declare function count(column: string): Expr; export declare function count(column: Series): number; /** Compute the covariance between two columns/ expressions. */ export declare function cov(a: ExprOrString, b: ExprOrString, ddof?: number): Expr; /** * Exclude certain columns from a wildcard expression. * * Syntactic sugar for: * ``` * > pl.col("*").exclude(columns) * ``` */ export declare function exclude(columns: string[] | string): Expr; export declare function exclude(col: string, ...cols: string[]): Expr; /** Get the first value. */ export declare function first(): Expr; export declare function first(column: string): Expr; export declare function first<T>(column: Series): T; /** * String format utility for expressions * Note: strings will be interpolated as `col(<value>)`. if you want a literal string, use `lit(<value>)` * @example * ``` * > df = pl.DataFrame({ * ... "a": ["a", "b", "c"], * ... "b": [1, 2, 3], * ... }) * > df.select( * ... pl.format("foo_{}_bar_{}", pl.col("a"), "b").alias("fmt"), * ... ) * shape: (3, 1) * ┌─────────────┐ * │ fmt │ * │ --- │ * │ str │ * ╞═════════════╡ * │ foo_a_bar_1 │ * ├╌╌╌╌╌╌╌╌╌╌╌╌╌┤ * │ foo_b_bar_2 │ * ├╌╌╌╌╌╌╌╌╌╌╌╌╌┤ * │ foo_c_bar_3 │ * └─────────────┘ * * // You can use format as tag function as well * > pl.format("foo_{}_bar_{}", pl.col("a"), "b") // is the same as * > pl.format`foo_${pl.col("a")}_bar_${"b"}` * ``` */ export declare function format(strings: string | TemplateStringsArray, ...expr: ExprOrString[]): Expr; /** Syntactic sugar for `pl.col(column).aggGroups()` */ export declare function groups(column: string): Expr; /** Get the first n rows of an Expression. */ export declare function head(column: ExprOrString, n?: number): Expr; export declare function head(column: Series, n?: number): Series; /** Return the number of elements in the column. This is similar to `COUNT(*)` in SQL. @return Expr - Expression of data type :class:`UInt32`. @example ``` >>> const df = pl.DataFrame( ... { ... "a": [1, 2, None], ... "b": [3, None, None], ... "c": ["foo", "bar", "foo"], ... } ... ) >>> df.select(pl.len()) shape: (1, 1) ┌─────┐ │ len │ │ --- │ │ u32 │ ╞═════╡ │ 3 │ └─────┘ ``` Generate an index column by using `len` in conjunction with :func:`intRange`. ``` >>> df.select( ... pl.intRange(pl.len(), dtype=pl.UInt32).alias("index"), ... pl.all(), ... ) shape: (3, 4) ┌───────┬──────┬──────┬─────┐ │ index ┆ a ┆ b ┆ c │ │ --- ┆ --- ┆ --- ┆ --- │ │ u32 ┆ i64 ┆ i64 ┆ str │ ╞═══════╪══════╪══════╪═════╡ │ 0 ┆ 1 ┆ 3 ┆ foo │ │ 1 ┆ 2 ┆ null ┆ bar │ │ 2 ┆ null ┆ null ┆ foo │ └───────┴──────┴──────┴─────┘ ``` */ export declare function len(): Expr; /** Get the last value. */ export declare function last(column: ExprOrString | Series): any; /** Get the mean value. */ export declare function mean(column: ExprOrString): Expr; export declare function mean(column: Series): number; /** Get the median value. */ export declare function median(column: ExprOrString): Expr; export declare function median(column: Series): number; /** Count unique values. */ export declare function nUnique(column: ExprOrString): Expr; export declare function nUnique(column: Series): number; /** Compute the pearson's correlation between two columns. */ export declare function pearsonCorr(a: ExprOrString, b: ExprOrString): Expr; /** Get the quantile */ export declare function quantile(column: ExprOrString, q: number): Expr; export declare function quantile(column: Series, q: number): number; /** * __Run polars expressions without a context.__ * * This is syntactic sugar for running `df.select` on an empty DataFrame. */ export declare function select(expr: ExprOrString, ...exprs: ExprOrString[]): DataFrame<{}>; /** Compute the spearman rank correlation between two columns. */ export declare function spearmanRankCorr(a: ExprOrString, b: ExprOrString): Expr; /** Get the last n rows of an Expression. */ export declare function tail(column: ExprOrString, n?: number): Expr; export declare function tail(column: Series, n?: number): Series; /** Syntactic sugar for `pl.col(column).list()` */ export declare function list(column: ExprOrString): Expr; /** Collect several columns into a Series of dtype Struct Parameters ---------- @param exprs Columns/Expressions to collect into a Struct @param eager Evaluate immediately Examples -------- ``` >pl.DataFrame( ... { ... "int": [1, 2], ... "str": ["a", "b"], ... "bool": [True, None], ... "list": [[1, 2], [3]], ... } ... ).select([pl.struct(pl.all()).alias("my_struct")]) shape: (2, 1) ┌───────────────────────┐ │ my_struct │ │ --- │ │ struct{int, ... list} │ ╞═══════════════════════╡ │ {1,"a",true,[1, 2]} │ ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ │ {2,"b",null,[3]} │ └───────────────────────┘ // Only collect specific columns as a struct: >df = pl.DataFrame({ ... "a": [1, 2, 3, 4], ... "b": ["one", "two", "three", "four"], ... "c": [9, 8, 7, 6] ... }) >df.withColumn(pl.struct(pl.col(["a", "b"])).alias("a_and_b")) shape: (4, 4) ┌─────┬───────┬─────┬───────────────────────────────┐ │ a ┆ b ┆ c ┆ a_and_b │ │ --- ┆ --- ┆ --- ┆ --- │ │ i64 ┆ str ┆ i64 ┆ struct[2]{'a': i64, 'b': str} │ ╞═════╪═══════╪═════╪═══════════════════════════════╡ │ 1 ┆ one ┆ 9 ┆ {1,"one"} │ ├╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ │ 2 ┆ two ┆ 8 ┆ {2,"two"} │ ├╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ │ 3 ┆ three ┆ 7 ┆ {3,"three"} │ ├╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ │ 4 ┆ four ┆ 6 ┆ {4,"four"} │ └─────┴───────┴─────┴───────────────────────────────┘ ``` */ export declare function struct(exprs: Series[]): Series; export declare function struct(exprs: ExprOrString | ExprOrString[]): Expr; /** * Alias for an element in evaluated in an `eval` expression. * @example * * A horizontal rank computation by taking the elements of a list * * >df = pl.DataFrame({"a": [1, 8, 3], "b": [4, 5, 2]}) * >df.withColumn( * ... pl.concatList(["a", "b"]).arr.eval(pl.element().rank()).alias("rank") * ... ) * shape: (3, 3) * ┌─────┬─────┬────────────┐ * │ a ┆ b ┆ rank │ * │ --- ┆ --- ┆ --- │ * │ i64 ┆ i64 ┆ list[f32] │ * ╞═════╪═════╪════════════╡ * │ 1 ┆ 4 ┆ [1.0, 2.0] │ * ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┤ * │ 8 ┆ 5 ┆ [2.0, 1.0] │ * ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┤ * │ 3 ┆ 2 ┆ [2.0, 1.0] │ * └─────┴─────┴────────────┘ * * A mathematical operation on array elements * * >df = pl.DataFrame({"a": [1, 8, 3], "b": [4, 5, 2]}) * >df.withColumn( * ... pl.concatList(["a", "b"]).arr.eval(pl.element().multiplyBy(2)).alias("a_b_doubled") * ... ) * shape: (3, 3) * ┌─────┬─────┬─────────────┐ * │ a ┆ b ┆ a_b_doubled │ * │ --- ┆ --- ┆ --- │ * │ i64 ┆ i64 ┆ list[i64] │ * ╞═════╪═════╪═════════════╡ * │ 1 ┆ 4 ┆ [2, 8] │ * ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌┤ * │ 8 ┆ 5 ┆ [16, 10] │ * ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌┤ * │ 3 ┆ 2 ┆ [6, 4] │ * └─────┴─────┴─────────────┘ */ export declare function element(): Expr; /** * Compute the bitwise AND horizontally across columns. * @param *exprs * Column(s) to use in the aggregation. Accepts expression input. Strings are * parsed as column names, other non-expression inputs are parsed as literals. * * @example * ``` * >>> const df = pl.DataFrame( * { * "a": [false, false, true, true], * "b": [false, true, null, true], * "c": ["w", "x", "y", "z"], * } * ) * >>> df.withColumns(pl.allHorizontal([pl.col("a"), pl.col("b")])) * shape: (4, 4) * ┌───────┬───────┬─────┬───────┐ * │ a ┆ b ┆ c ┆ all │ * │ --- ┆ --- ┆ --- ┆ --- │ * │ bool ┆ bool ┆ str ┆ bool │ * ╞═══════╪═══════╪═════╪═══════╡ * │ false ┆ false ┆ w ┆ false │ * │ false ┆ true ┆ x ┆ false │ * │ true ┆ null ┆ y ┆ null │ * │ true ┆ true ┆ z ┆ true │ * └───────┴───────┴─────┴───────┘ * ``` */ export declare function allHorizontal(exprs: ExprOrString | ExprOrString[]): Expr; /** * Compute the bitwise OR horizontally across columns. * @param *exprs * Column(s) to use in the aggregation. Accepts expression input. Strings are * parsed as column names, other non-expression inputs are parsed as literals. * @example * ``` * >>> const df = pl.DataFrame( * ... { * ... "a": [false, false, true, null], * ... "b": [false, true, null, null], * ... "c": ["w", "x", "y", "z"], * ... } * ... ) * >>> df.withColumns(pl.anyHorizontal([pl.col("a"), pl.col("b")])) * shape: (4, 4) * ┌───────┬───────┬─────┬───────┐ * │ a ┆ b ┆ c ┆ any │ * │ --- ┆ --- ┆ --- ┆ --- │ * │ bool ┆ bool ┆ str ┆ bool │ * ╞═══════╪═══════╪═════╪═══════╡ * │ false ┆ false ┆ w ┆ false │ * │ false ┆ true ┆ x ┆ true │ * │ true ┆ null ┆ y ┆ true │ * │ null ┆ null ┆ z ┆ null │ * └───────┴───────┴─────┴───────┘ * ``` */ export declare function anyHorizontal(exprs: ExprOrString | ExprOrString[]): Expr; /** * Get the maximum value horizontally across columns. * @param *exprs * Column(s) to use in the aggregation. Accepts expression input. Strings are * parsed as column names, other non-expression inputs are parsed as literals. * @example * ``` * >>> const df = pl.DataFrame( * ... { * ... "a": [1, 8, 3], * ... "b": [4, 5, null], * ... "c": ["x", "y", "z"], * ... } * ... ) * >>> df.withColumns(pl.maxHorizontal(pl.col("a"), pl.col("b"))) * shape: (3, 4) * ┌─────┬──────┬─────┬─────┐ * │ a ┆ b ┆ c ┆ max │ * │ --- ┆ --- ┆ --- ┆ --- │ * │ i64 ┆ i64 ┆ str ┆ i64 │ * ╞═════╪══════╪═════╪═════╡ * │ 1 ┆ 4 ┆ x ┆ 4 │ * │ 8 ┆ 5 ┆ y ┆ 8 │ * │ 3 ┆ null ┆ z ┆ 3 │ * └─────┴──────┴─────┴─────┘ * ``` */ export declare function maxHorizontal(exprs: ExprOrString | ExprOrString[]): Expr; /** * Get the minimum value horizontally across columns. * @param *exprs * Column(s) to use in the aggregation. Accepts expression input. Strings are * parsed as column names, other non-expression inputs are parsed as literals. * @example * ``` * >>> const df = pl.DataFrame( * ... { * ... "a": [1, 8, 3], * ... "b": [4, 5, null], * ... "c": ["x", "y", "z"], * ... } * ... ) * >>> df.withColumns(pl.minHorizontal(pl.col("a"), pl.col("b"))) * shape: (3, 4) * ┌─────┬──────┬─────┬─────┐ * │ a ┆ b ┆ c ┆ min │ * │ --- ┆ --- ┆ --- ┆ --- │ * │ i64 ┆ i64 ┆ str ┆ i64 │ * ╞═════╪══════╪═════╪═════╡ * │ 1 ┆ 4 ┆ x ┆ 1 │ * │ 8 ┆ 5 ┆ y ┆ 5 │ * │ 3 ┆ null ┆ z ┆ 3 │ * └─────┴──────┴─────┴─────┘ * ``` */ export declare function minHorizontal(exprs: ExprOrString | ExprOrString[]): Expr; /** * Sum all values horizontally across columns. * @param *exprs * Column(s) to use in the aggregation. Accepts expression input. Strings are * parsed as column names, other non-expression inputs are parsed as literals. * @example * ``` * >>> const df = pl.DataFrame( * ... { * ... "a": [1, 8, 3], * ... "b": [4, 5, null], * ... "c": ["x", "y", "z"], * ... } * ... ) * >>> df.withColumns(pl.sumHorizontal(pl.col("a"), ol.col("b"))) * shape: (3, 4) * ┌─────┬──────┬─────┬──────┐ * │ a ┆ b ┆ c ┆ sum │ * │ --- ┆ --- ┆ --- ┆ --- │ * │ i64 ┆ i64 ┆ str ┆ i64 │ * ╞═════╪══════╪═════╪══════╡ * │ 1 ┆ 4 ┆ x ┆ 5 │ * │ 8 ┆ 5 ┆ y ┆ 13 │ * │ 3 ┆ null ┆ z ┆ null │ * └─────┴──────┴─────┴──────┘ * ``` */ export declare function sumHorizontal(exprs: ExprOrString | ExprOrString[]): Expr;