nodejs-polars
Version:
Polars: Blazingly fast DataFrames in Rust, Python, Node.js, R and SQL
1,280 lines (1,259 loc) • 46.6 kB
TypeScript
import type { DataType } from "./datatypes";
import type { Expr } from "./lazy/expr";
import type { ClosedWindow, InterpolationMethod, RollingOptions, RollingQuantileOptions, RollingSkewOptions } from "./types";
import type { ColumnsOrExpr, StartBy } from "./utils";
/**
* Arithmetic operations
*/
export interface Arithmetic<T> {
/**
* Add self to other
* @category Arithmetic
*/
add(other: any): T;
/**
* Subtract other from self
* @category Arithmetic
*/
sub(other: any): T;
/**
* Divide self by other
* @category Arithmetic
*/
div(other: any): T;
/**
* Multiply self by other
* @category Arithmetic
*/
mul(other: any): T;
/**
* Get the remainder of self divided by other
* @category Arithmetic
*/
rem(other: any): T;
/**
* Add self to other
* @category Arithmetic
*/
plus(other: any): T;
/**
* Subtract other from self
* @category Arithmetic
*/
minus(other: any): T;
/**
* Divide self by other
* @category Arithmetic
*/
divideBy(other: any): T;
/**
* Multiply self by other
* @category Arithmetic
*/
multiplyBy(other: any): T;
/**
* Get the remainder of self divided by other
* @category Arithmetic
*/
modulo(other: any): T;
}
export interface Comparison<T> {
/**
* Compare self to other: `self == other`
* @category Comparison
*/
eq(other: any): T;
/**
* Compare self to other: `self == other`
* @category Comparison
*/
equals(other: any): T;
/**
* Compare self to other: `self >= other`
* @category Comparison
*/
gtEq(other: any): T;
/**
* Compare self to other: `self >= other`
* @category Comparison
*/
greaterThanEquals(other: any): T;
/**
* Compare self to other: `self > other`
* @category Comparison
*/
gt(other: any): T;
/**
* Compare self to other: `self > other`
* @category Comparison
*/
greaterThan(other: any): T;
/**
* Compare self to other: `self <= other`
* @category Comparison
*/
ltEq(other: any): T;
/**
* Compare self to other: `self =< other`
* @category Comparison
*/
lessThanEquals(other: any): T;
/**
* Compare self to other: `self < other`
* @category Comparison
*/
lt(other: any): T;
/**
* Compare self to other: `self < other`
* @category Comparison
*/
lessThan(other: any): T;
/**
* Compare self to other: `self !== other`
* @category Comparison
*/
neq(other: any): T;
/**
* Compare self to other: `self !== other`
* @category Comparison
*/
notEquals(other: any): T;
}
/**
* A trait for cumulative operations.
*/
export interface Cumulative<T> {
/**
* Get an array with the cumulative count computed at every element.
* @category Cumulative
*/
cumCount(reverse?: boolean): T;
cumCount({ reverse }: {
reverse: boolean;
}): T;
/**
* __Get an array with the cumulative max computes at every element.__
* ___
* @param reverse - reverse the operation
* @example
* ```
* > const s = pl.Series("a", [1, 2, 3])
* > s.cumMax()
* shape: (3,)
* Series: 'b' [i64]
* [
* 1
* 2
* 3
* ]
* ```
* @category Cumulative
*/
cumMax(reverse?: boolean): T;
cumMax({ reverse }: {
reverse: boolean;
}): T;
/**
* __Get an array with the cumulative min computed at every element.__
* ___
* @param reverse - reverse the operation
* @example
* ```
* > const s = pl.Series("a", [1, 2, 3])
* > s.cumMin()
* shape: (3,)
* Series: 'b' [i64]
* [
* 1
* 1
* 1
* ]
* ```
* @category Cumulative
*/
cumMin(reverse?: boolean): T;
cumMin({ reverse }: {
reverse: boolean;
}): T;
/**
* __Get an array with the cumulative product computed at every element.__
* ___
* @param reverse - reverse the operation
* @example
* ```
* > const s = pl.Series("a", [1, 2, 3])
* > s.cumProd()
* shape: (3,)
* Series: 'b' [i64]
* [
* 1
* 2
* 6
* ]
* ```
* @category Cumulative
*/
cumProd(reverse?: boolean): T;
cumProd({ reverse }: {
reverse: boolean;
}): T;
/**
* __Get an array with the cumulative sum computed at every element.__
* ___
* @param reverse - reverse the operation
* @example
* ```
* > const s = pl.Series("a", [1, 2, 3])
* > s.cumSum()
* shape: (3,)
* Series: 'b' [i64]
* [
* 1
* 3
* 6
* ]
* ```
* @category Cumulative
*/
cumSum(reverse?: boolean): T;
cumSum({ reverse }: {
reverse: boolean;
}): T;
}
/**
* __A trait for DataFrame and Series that allows for the application of a rolling window.__
*/
export interface Rolling<T> {
/**
* __Apply a rolling max (moving max) over the values in this Series.__
*
* A window of length `window_size` will traverse the series. The values that fill this window
* will (optionally) be multiplied with the weights given by the `weight` vector.
*
* The resulting parameters' values will be aggregated into their sum.
* ___
* @param windowSize - The length of the window.
* @param weights - An optional slice with the same length as the window that will be multiplied
* elementwise with the values in the window.
* @param minPeriods The number of values in the window that should be non-null before computing a result.
* If undefined, it will be set equal to window size.
* @param center - Set the labels at the center of the window
* @category Rolling
*/
rollingMax(windowSize: number, weights?: Array<number>, minPeriods?: Array<number>, center?: boolean): T;
rollingMax(options: RollingOptions): T;
/**
* __Apply a rolling mean (moving mean) over the values in this Series.__
*
* A window of length `window_size` will traverse the series. The values that fill this window
* will (optionally) be multiplied with the weights given by the `weight` vector.
*
* The resulting parameters' values will be aggregated into their sum.
* ___
* @param windowSize - The length of the window.
* @param weights - An optional slice with the same length as the window that will be multiplied
* elementwise with the values in the window.
* @param minPeriods The number of values in the window that should be non-null before computing a result.
* If undefined, it will be set equal to window size.
* @param center - Set the labels at the center of the window
* @category Rolling
*/
rollingMean(windowSize: number, weights?: Array<number>, minPeriods?: Array<number>, center?: boolean): T;
rollingMean(options: RollingOptions): T;
/**
* __Apply a rolling min (moving min) over the values in this Series.__
*
* A window of length `window_size` will traverse the series. The values that fill this window
* will (optionally) be multiplied with the weights given by the `weight` vector.
*
* The resulting parameters' values will be aggregated into their sum.
* ___
* @param windowSize - The length of the window.
* @param weights - An optional slice with the same length as the window that will be multiplied
* elementwise with the values in the window.
* @param minPeriods The number of values in the window that should be non-null before computing a result.
* If undefined, it will be set equal to window size.
* @param center - Set the labels at the center of the window
* @category Rolling
*/
rollingMin(windowSize: number, weights?: Array<number>, minPeriods?: Array<number>, center?: boolean): T;
rollingMin(options: RollingOptions): T;
/**
* Compute a rolling std dev
*
* A window of length `window_size` will traverse the array. The values that fill this window
* will (optionally) be multiplied with the weights given by the `weight` vector. The resulting
* values will be aggregated to their sum.
* ___
* @param windowSize - The length of the window.
* @param weights - An optional slice with the same length as the window that will be multiplied
* elementwise with the values in the window.
* @param minPeriods The number of values in the window that should be non-null before computing a result.
* If undefined, it will be set equal to window size.
* @param center - Set the labels at the center of the window
* @param ddof
* "Delta Degrees of Freedom”: the divisor used in the calculation is N - ddof, where N represents the number of elements.
* By default ddof is 1.
* @category Rolling
*/
rollingStd(windowSize: number, weights?: Array<number>, minPeriods?: Array<number>, center?: boolean, ddof?: number): T;
rollingStd(options: RollingOptions): T;
/**
* __Apply a rolling sum (moving sum) over the values in this Series.__
*
* A window of length `window_size` will traverse the series. The values that fill this window
* will (optionally) be multiplied with the weights given by the `weight` vector.
*
* The resulting parameters' values will be aggregated into their sum.
* ___
* @param windowSize - The length of the window.
* @param weights - An optional slice with the same length as the window that will be multiplied
* elementwise with the values in the window.
* @param minPeriods The number of values in the window that should be non-null before computing a result.
* If undefined, it will be set equal to window size.
* @param center - Set the labels at the center of the window
* @category Rolling
*/
rollingSum(windowSize: number, weights?: Array<number>, minPeriods?: Array<number>, center?: boolean): T;
rollingSum(options: RollingOptions): T;
/**
* __Compute a rolling variance.__
*
* A window of length `window_size` will traverse the series. The values that fill this window
* will (optionally) be multiplied with the weights given by the `weight` vector.
*
* The resulting parameters' values will be aggregated into their sum.
* ___
* @param windowSize - The length of the window.
* @param weights - An optional slice with the same length as the window that will be multiplied
* elementwise with the values in the window.
* @param minPeriods The number of values in the window that should be non-null before computing a result.
* If undefined, it will be set equal to window size.
* @param center - Set the labels at the center of the window
* @param ddof
* "Delta Degrees of Freedom”: the divisor used in the calculation is N - ddof, where N represents the number of elements.
* By default ddof is 1.
* @category Rolling
*/
rollingVar(windowSize: number, weights?: Array<number>, minPeriods?: Array<number>, center?: boolean, ddof?: number): T;
rollingVar(options: RollingOptions): T;
/**
* Compute a rolling median
* @category Rolling
*/
rollingMedian(windowSize: number, weights?: Array<number>, minPeriods?: Array<number>, center?: boolean): T;
rollingMedian(options: RollingOptions): T;
/**
* Compute a rolling quantile
* @param quantile quantile to compute
* @param interpolation interpolation type
* @param windowSize Size of the rolling window
* @param weights - An optional slice with the same length as the window that will be multiplied
* elementwise with the values in the window.
* @param minPeriods The number of values in the window that should be non-null before computing a result.
* If undefined, it will be set equal to window size.
* @param center - Set the labels at the center of the window
* @category Rolling
*/
rollingQuantile(quantile: number, interpolation?: InterpolationMethod, windowSize?: number, weights?: Array<number>, minPeriods?: Array<number>, center?: boolean, by?: string, closed?: ClosedWindow): T;
rollingQuantile(options: RollingQuantileOptions): T;
/**
* Compute a rolling skew
* @param windowSize Size of the rolling window
* @param bias If false, then the calculations are corrected for statistical bias.
* @category Rolling
*/
rollingSkew(windowSize: number, bias?: boolean): T;
/**
* Compute a rolling skew
* @param options
* @param options.windowSize Size of the rolling window
* @param options.bias If false, then the calculations are corrected for statistical bias.
* @category Rolling
*/
rollingSkew(options: RollingSkewOptions): T;
}
export interface Round<T> {
/**
* Round underlying floating point data by `decimals` digits.
*
* Similar functionality to javascript `toFixed`
* @param decimals number of decimals to round by.
* @category Math
*/
round(decimals: number): T;
round(options: {
decimals: number;
}): T;
/**
* Floor underlying floating point array to the lowest integers smaller or equal to the float value.
* Only works on floating point Series
* @category Math
*/
floor(): T;
/**
* Ceil underlying floating point array to the highest integers smaller or equal to the float value.
* Only works on floating point Series
* @category Math
*/
ceil(): T;
/**
* Clip (limit) the values in an array to any value that fits in 64 floating point range.
* Only works for the following dtypes: {Int32, Int64, Float32, Float64, UInt32}.
* If you want to clip other dtypes, consider writing a when -> then -> otherwise expression
* @param min Minimum value
* @param max Maximum value
* @category Math
*/
clip(min: number, max: number): T;
clip(options: {
min: number;
max: number;
}): any;
}
export interface Sample<T> {
/**
* Sample from this DataFrame by setting either `n` or `frac`.
* @param n - Number of samples < self.len() .
* @param frac - Fraction between 0.0 and 1.0 .
* @param withReplacement - Sample with replacement.
* @param seed - Seed initialization. If not provided, a random seed will be used
* @example
* ```
* > df = pl.DataFrame({
* > "foo": [1, 2, 3],
* > "bar": [6, 7, 8],
* > "ham": ['a', 'b', 'c']
* > })
* > df.sample({n: 2})
* shape: (2, 3)
* ╭─────┬─────┬─────╮
* │ foo ┆ bar ┆ ham │
* │ --- ┆ --- ┆ --- │
* │ i64 ┆ i64 ┆ str │
* ╞═════╪═════╪═════╡
* │ 1 ┆ 6 ┆ "a" │
* ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┤
* │ 3 ┆ 8 ┆ "c" │
* ╰─────┴─────┴─────╯
* ```
* @category Math
*/
sample(n?: number, frac?: number, withReplacement?: boolean, seed?: number | bigint): T;
sample(opts?: {
n: number;
withReplacement?: boolean;
seed?: number | bigint;
}): T;
sample(opts?: {
frac: number;
withReplacement?: boolean;
seed?: number | bigint;
}): T;
}
export interface Bincode<T> {
(bincode: Uint8Array): T;
getState(T2: any): Uint8Array;
}
/**
* Functions that can be applied to dtype List
*/
export interface ListFunctions<T> {
argMin(): T;
argMax(): T;
/**
* Concat the arrays in a Series dtype List in linear time.
* @param other Column(s) to concat into a List Series
* @example
* -------
* ```
* df = pl.DataFrame({
* "a": [["a"], ["x"]],
* "b": [["b", "c"], ["y", "z"]],
* })
* df.select(pl.col("a").lst.concat("b"))
* shape: (2, 1)
* ┌─────────────────┐
* │ a │
* │ --- │
* │ list[str] │
* ╞═════════════════╡
* │ ["a", "b", "c"] │
* ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
* │ ["x", "y", "z"] │
* └─────────────────┘
* ```
* @category List
*/
concat(other: (string | T)[] | string | T): T;
/**
* Check if sublists contain the given item.
* @param item Item that will be checked for membership
* @example
* --------
* ```
* df = pl.DataFrame({"foo": [[3, 2, 1], [], [1, 2]]})
* df.select(pl.col("foo").lst.contains(1))
* shape: (3, 1)
* ┌───────┐
* │ foo │
* │ --- │
* │ bool │
* ╞═══════╡
* │ true │
* ├╌╌╌╌╌╌╌┤
* │ false │
* ├╌╌╌╌╌╌╌┤
* │ true │
* └───────┘
* ```
* @category List
*/
contains(item: any): T;
/**
* Calculate the n-th discrete difference of every sublist.
* @param n number of slots to shift
* @param nullBehavior 'ignore' | 'drop'
* ```
* s = pl.Series("a", [[1, 2, 3, 4], [10, 2, 1]])
* s.lst.diff()
*
* shape: (2,)
* Series: 'a' [list]
* [
* [null, 1, ... 1]
* [null, -8, -1]
* ]
* ```
* @category List
*/
diff(n?: number, nullBehavior?: "ignore" | "drop"): T;
/**
* Get the value by index in the sublists.
* So index `0` would return the first item of every sublist
* and index `-1` would return the last item of every sublist
* if an index is out of bounds, it will return a `null`.
* @category List
*/
get(index: number | Expr): T;
/**
* Run any polars expression against the lists' elements
* Parameters
* ----------
* @param expr
* Expression to run. Note that you can select an element with `pl.first()`, or `pl.col()`
* @param parallel
* Run all expression parallel. Don't activate this blindly.
* Parallelism is worth it if there is enough work to do per thread.
* This likely should not be use in the groupby context, because we already parallel execution per group
* @example
* >df = pl.DataFrame({"a": [1, 8, 3], "b": [4, 5, 2]})
* >df.withColumn(
* ... pl.concatList(["a", "b"]).lst.eval(pl.first().rank()).alias("rank")
* ... )
* shape: (3, 3)
* ┌─────┬─────┬────────────┐
* │ a ┆ b ┆ rank │
* │ --- ┆ --- ┆ --- │
* │ i64 ┆ i64 ┆ list [f32] │
* ╞═════╪═════╪════════════╡
* │ 1 ┆ 4 ┆ [1.0, 2.0] │
* ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┤
* │ 8 ┆ 5 ┆ [2.0, 1.0] │
* ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┤
* │ 3 ┆ 2 ┆ [2.0, 1.0] │
* └─────┴─────┴────────────┘
* @category List
*/
eval(expr: Expr, parallel?: boolean): T;
/**
* Get the first value of the sublists.
* @category List
*/
first(): T;
/**
* Slice the head of every sublist
* @param n - How many values to take in the slice.
* @example
* ```
* s = pl.Series("a", [[1, 2, 3, 4], [10, 2, 1]])
* s.lst.head(2)
* shape: (2,)
* Series: 'a' [list]
* [
* [1, 2]
* [10, 2]
* ]
* ```
* @category List
*/
head(n?: number): T;
/**
* Slice the tail of every sublist
* @param n - How many values to take in the slice.
* @example
* ```
* s = pl.Series("a", [[1, 2, 3, 4], [10, 2, 1]])
* s.lst.tail(2)
* shape: (2,)
* Series: 'a' [list]
* [
* [3, 4]
* [2, q]
* ]
* ```
* @category List
*/
tail(n?: number): T;
/**
* Join all string items in a sublist and place a separator between them.
* This errors if inner type of list `!= Utf8`.
* @param options.separator A string used to separate one element of the list from the next in the resulting string.
* If omitted, the list elements are separated with a comma.
* @param options.ignoreNulls - If true, null values will be ignored.
* @category List
*/
join(options: {
separator?: string | Expr;
ignoreNulls?: boolean;
}): T;
join(): T;
join(separator: string | Expr): T;
/**
* Get the last value of the sublists.
* @category List
*/
last(): T;
/**
* Get the length of the sublists.
* @category List
*/
lengths(): T;
/**
* Get the maximum value of the sublists.
* @category List
*/
max(): T;
/**
* Get the mean value of the sublists.
* @category List
*/
mean(): T;
/**
* Get the median value of the sublists.
* @category List
*/
min(): T;
/**
* Reverse the sublists.
* @category List
*/
reverse(): T;
/**
* Shift the sublists.
* @param periods - Number of periods to shift. Can be positive or negative.
* @category List
*/
shift(periods: number): T;
/**
* Slice the sublists.
* @param offset - The offset of the slice.
* @param length - The length of the slice.
* @category List
*/
slice(offset: number, length: number): T;
/**
* Sort the sublists.
* @param descending - Sort in reverse order.
* @category List
*/
sort(descending?: boolean): T;
sort(opt: {
descending: boolean;
}): T;
sort(opt: {
reverse: boolean;
}): T;
/**
* Sum all elements of the sublists.
* @category List
*/
sum(): T;
/**
* Get the unique values of the sublists.
* @category List
*/
unique(): T;
}
/**
* Functions that can be applied to a Date or Datetime column.
*/
export interface DateFunctions<T> {
/**
* Extract day from underlying Date representation.
* Can be performed on Date and Datetime.
*
* Returns the day of month starting from 1.
* The return value ranges from 1 to 31. (The last day of month differs by months.)
* @returns day as pl.UInt32
*/
day(): T;
/**
* Extract hour from underlying DateTime representation.
* Can be performed on Datetime.
*
* Returns the hour number from 0 to 23.
* @returns Hour as UInt32
*/
hour(): T;
/**
* Extract minutes from underlying DateTime representation.
* Can be performed on Datetime.
*
* Returns the minute number from 0 to 59.
* @returns minute as UInt32
*/
minute(): T;
/**
* Extract month from underlying Date representation.
* Can be performed on Date and Datetime.
*
* Returns the month number starting from 1.
* The return value ranges from 1 to 12.
* @returns Month as UInt32
*/
month(): T;
/**
* Extract seconds from underlying DateTime representation.
* Can be performed on Datetime.
*
* Returns the number of nanoseconds since the whole non-leap second.
* The range from 1,000,000,000 to 1,999,999,999 represents the leap second.
* @returns Nanosecond as UInt32
*/
nanosecond(): T;
/**
* Extract ordinal day from underlying Date representation.
* Can be performed on Date and Datetime.
*
* Returns the day of year starting from 1.
* The return value ranges from 1 to 366. (The last day of year differs by years.)
* @returns Day as UInt32
*/
ordinalDay(): T;
/**
* Extract seconds from underlying DateTime representation.
* Can be performed on Datetime.
*
* Returns the second number from 0 to 59.
* @returns Second as UInt32
*/
second(): T;
/**
* Format Date/datetime with a formatting rule: See [chrono strftime/strptime](https://docs.rs/chrono/0.4.19/chrono/format/strftime/index.html).
*/
strftime(fmt: string): T;
/** Return timestamp in ms as Int64 type. */
timestamp(): T;
/**
* Extract the week from the underlying Date representation.
* Can be performed on Date and Datetime
*
* Returns the ISO week number starting from 1.
* The return value ranges from 1 to 53. (The last week of year differs by years.)
* @returns Week number as UInt32
*/
week(): T;
/**
* Extract the week day from the underlying Date representation.
* Can be performed on Date and Datetime.
*
* Returns the weekday number where monday = 0 and sunday = 6
* @returns Week day as UInt32
*/
weekday(): T;
/**
* Extract year from underlying Date representation.
* Can be performed on Date and Datetime.
*
* Returns the year number in the calendar date.
* @returns Year as Int32
*/
year(): T;
}
export interface StringFunctions<T> {
/**
* Vertically concat the values in the Series to a single string value.
* @example
* ```
* > df = pl.DataFrame({"foo": [1, null, 2]})
* > df = df.select(pl.col("foo").str.concat("-"))
* > df
* shape: (1, 1)
* ┌──────────┐
* │ foo │
* │ --- │
* │ str │
* ╞══════════╡
* │ 1-null-2 │
* └──────────┘
* ```
*/
concat(delimiter: string, ignoreNulls?: boolean): T;
/**
* Check if strings in Series contain a substring that matches a pattern.
* @param pat A valid regular expression pattern, compatible with the `regex crate
* @param literal Treat `pattern` as a literal string, not as a regular expression.
* @param strict Raise an error if the underlying pattern is not a valid regex, otherwise mask out with a null value.
* @returns Boolean mask
* @example
* ```
* const df = pl.DataFrame({"txt": ["Crab", "cat and dog", "rab$bit", null]})
* df.select(
* ... pl.col("txt"),
* ... pl.col("txt").str.contains("cat|bit").alias("regex"),
* ... pl.col("txt").str.contains("rab$", true).alias("literal"),
* ... )
* shape: (4, 3)
* ┌─────────────┬───────┬─────────┐
* │ txt ┆ regex ┆ literal │
* │ --- ┆ --- ┆ --- │
* │ str ┆ bool ┆ bool │
* ╞═════════════╪═══════╪═════════╡
* │ Crab ┆ false ┆ false │
* │ cat and dog ┆ true ┆ false │
* │ rab$bit ┆ true ┆ true │
* │ null ┆ null ┆ null │
* └─────────────┴───────┴─────────┘
* ```
*/
contains(pat: string | RegExp | Expr, literal: boolean, strict: boolean): T;
/**
* Decodes a value using the provided encoding
* @param encoding - hex | base64
* @param strict - how to handle invalid inputs
*
* - true: method will throw error if unable to decode a value
* - false: unhandled values will be replaced with `null`
* @example
* ```
* > df = pl.DataFrame({"strings": ["666f6f", "626172", null]})
* > df.select(col("strings").str.decode("hex"))
* shape: (3, 1)
* ┌─────────┐
* │ strings │
* │ --- │
* │ str │
* ╞═════════╡
* │ foo │
* ├╌╌╌╌╌╌╌╌╌┤
* │ bar │
* ├╌╌╌╌╌╌╌╌╌┤
* │ null │
* └─────────┘
* ```
*/
decode(encoding: "hex" | "base64", strict?: boolean): T;
decode(options: {
encoding: "hex" | "base64";
strict?: boolean;
}): T;
/**
* Encodes a value using the provided encoding
* @param encoding - hex | base64
* @example
* ```
* > df = pl.DataFrame({"strings", ["foo", "bar", null]})
* > df.select(col("strings").str.encode("hex"))
* shape: (3, 1)
* ┌─────────┐
* │ strings │
* │ --- │
* │ str │
* ╞═════════╡
* │ 666f6f │
* ├╌╌╌╌╌╌╌╌╌┤
* │ 626172 │
* ├╌╌╌╌╌╌╌╌╌┤
* │ null │
* └─────────┘
* ```
*/
encode(encoding: "hex" | "base64"): T;
/**
* Extract the target capture group from provided patterns.
* @param pat A valid regex pattern
* @param groupIndex Index of the targeted capture group.
* Group 0 mean the whole pattern, first group begin at index 1
* Default to the first capture group
* @returns Utf8 array. Contain null if original value is null or regex capture nothing.
* @example
* ```
* > df = pl.DataFrame({
* ... 'a': [
* ... 'http://vote.com/ballon_dor?candidate=messi&ref=polars',
* ... 'http://vote.com/ballon_dor?candidat=jorginho&ref=polars',
* ... 'http://vote.com/ballon_dor?candidate=ronaldo&ref=polars'
* ... ]})
* > df.select(pl.col('a').str.extract(/candidate=(\w+)/, 1))
* shape: (3, 1)
* ┌─────────┐
* │ a │
* │ --- │
* │ str │
* ╞═════════╡
* │ messi │
* ├╌╌╌╌╌╌╌╌╌┤
* │ null │
* ├╌╌╌╌╌╌╌╌╌┤
* │ ronaldo │
* └─────────┘
* ```
*/
extract(pat: string | RegExp, groupIndex: number): T;
/**
* Extract the first match of json string with provided JSONPath expression.
* Throw errors if encounter invalid json strings.
* All return value will be casted to Utf8 regardless of the original value.
* @see https://goessner.net/articles/JsonPath/
* @param pat - A valid JSON path query string
* @returns Utf8 array. Contain null if original value is null or the `jsonPath` return nothing.
* @example
* ```
* > df = pl.DataFrame({
* ... 'json_val': [
* ... '{"a":"1"}',
* ... null,
* ... '{"a":2}',
* ... '{"a":2.1}',
* ... '{"a":true}'
* ... ]
* ... })
* > df.select(pl.col('json_val').str.jsonPathMatch('$.a')
* shape: (5,)
* Series: 'json_val' [str]
* [
* "1"
* null
* "2"
* "2.1"
* "true"
* ]
* ```
*/
jsonPathMatch(pat: string): T;
/** Get length of the string values in the Series. */
lengths(): T;
/** Remove leading whitespace. */
lstrip(): T;
/** Replace first regex match with a string value. */
replace(pat: string | RegExp, val: string): T;
/** Replace all regex matches with a string value. */
replaceAll(pat: string | RegExp, val: string): T;
/** Modify the strings to their lowercase equivalent. */
toLowerCase(): T;
/** Modify the strings to their uppercase equivalent. */
toUpperCase(): T;
/** Remove trailing whitespace. */
rstrip(): T;
/**
* Create subslices of the string values of a Utf8 Series.
* @param start - Start of the slice (negative indexing may be used).
* @param length - Optional length of the slice.
*/
slice(start: number, length?: number): T;
/**
* Split a string into substrings using the specified separator and return them as a Series.
* @param by — A string that identifies character or characters to use in separating the string.
* @param options.inclusive Include the split character/string in the results
*/
split(by: string, options?: {
inclusive?: boolean;
} | boolean): T;
/** Remove leading and trailing whitespace. */
strip(): T;
/**
* Parse a Series of dtype Utf8 to a Date/Datetime Series.
* @param datatype Date or Datetime.
* @param fmt formatting syntax. [Read more](https://docs.rs/chrono/0.4.19/chrono/format/strftime/index.html)
*/
strptime(datatype: DataType.Date | DataType.Datetime | typeof DataType.Datetime, fmt?: string): T;
}
export interface Serialize {
/**
* Serializes object to desired format via [serde](https://serde.rs/)
*
* @param format [json](https://github.com/serde-rs/json) | [bincode](https://github.com/bincode-org/bincode)
*
*/
serialize(format: "json" | "bincode"): Buffer;
}
export interface Deserialize<T> {
/**
* De-serializes buffer via [serde](https://serde.rs/)
* @param buf buffer to deserialize
* @param format [json](https://github.com/serde-rs/json) | [bincode](https://github.com/bincode-org/bincode)
*
*/
deserialize(buf: Buffer, format: "json" | "bincode"): T;
}
/**
* GroupBy operations that can be applied to a DataFrame or LazyFrame.
*/
export interface GroupByOps<T> {
/**
Create rolling groups based on a time column (or index value of type Int32, Int64).
Different from a rolling groupby the windows are now determined by the individual values and are not of constant
intervals. For constant intervals use {@link groupByDynamic}
The `period` and `offset` arguments are created with
the following string language:
- 1ns (1 nanosecond)
- 1us (1 microsecond)
- 1ms (1 millisecond)
- 1s (1 second)
- 1m (1 minute)
- 1h (1 hour)
- 1d (1 day)
- 1w (1 week)
- 1mo (1 calendar month)
- 1y (1 calendar year)
- 1i (1 index count)
Or combine them:
"3d12h4m25s" # 3 days, 12 hours, 4 minutes, and 25 seconds
In case of a groupby_rolling on an integer column, the windows are defined by:
- "1i" # length 1
- "10i" # length 10
@param opts.indexColumn Column used to group based on the time window.
Often to type Date/Datetime
This column must be sorted in ascending order. If not the output will not make sense.
In case of a rolling groupby on indices, dtype needs to be one of {Int32, Int64}. Note that
Int32 gets temporarily cast to Int64, so if performance matters use an Int64 column.
@param opts.period length of the window
@param opts.offset offset of the window. Default is `-period`
@param opts.closed Defines if the window interval is closed or not. Any of `{"left", "right", "both" "none"}`
@param opts.by Also group by this column/these columns
@example
```
>dates = [
... "2020-01-01 13:45:48",
... "2020-01-01 16:42:13",
... "2020-01-01 16:45:09",
... "2020-01-02 18:12:48",
... "2020-01-03 19:45:32",
... "2020-01-08 23:16:43",
... ]
>df = pl.DataFrame({"dt": dates, "a": [3, 7, 5, 9, 2, 1]}).withColumn(
... pl.col("dt").str.strptime(pl.Datetime)
... )
>out = df.groupbyRolling({indexColumn:"dt", period:"2d"}).agg(
... [
... pl.sum("a").alias("sum_a"),
... pl.min("a").alias("min_a"),
... pl.max("a").alias("max_a"),
... ]
... )
>assert(out["sum_a"].toArray() === [3, 10, 15, 24, 11, 1])
>assert(out["max_a"].toArray() === [3, 7, 7, 9, 9, 1])
>assert(out["min_a"].toArray() === [3, 3, 3, 3, 2, 1])
>out
shape: (6, 4)
┌─────────────────────┬───────┬───────┬───────┐
│ dt ┆ a_sum ┆ a_max ┆ a_min │
│ --- ┆ --- ┆ --- ┆ --- │
│ datetime[ms] ┆ i64 ┆ i64 ┆ i64 │
╞═════════════════════╪═══════╪═══════╪═══════╡
│ 2020-01-01 13:45:48 ┆ 3 ┆ 3 ┆ 3 │
├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┤
│ 2020-01-01 16:42:13 ┆ 10 ┆ 7 ┆ 3 │
├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┤
│ 2020-01-01 16:45:09 ┆ 15 ┆ 7 ┆ 3 │
├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┤
│ 2020-01-02 18:12:48 ┆ 24 ┆ 9 ┆ 3 │
├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┤
│ 2020-01-03 19:45:32 ┆ 11 ┆ 9 ┆ 2 │
├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┤
│ 2020-01-08 23:16:43 ┆ 1 ┆ 1 ┆ 1 │
└─────────────────────┴───────┴───────┴───────┘
```
*/
groupByRolling(opts: {
indexColumn: ColumnsOrExpr;
by?: ColumnsOrExpr;
period: string;
offset?: string;
closed?: "left" | "right" | "both" | "none";
}): T;
/**
Groups based on a time value (or index value of type Int32, Int64). Time windows are calculated and rows are assigned to windows.
Different from a normal groupby is that a row can be member of multiple groups. The time/index window could
be seen as a rolling window, with a window size determined by dates/times/values instead of slots in the DataFrame.
A window is defined by:
- every: interval of the window
- period: length of the window
- offset: offset of the window
The `every`, `period` and `offset` arguments are created with
the following string language:
- 1ns (1 nanosecond)
- 1us (1 microsecond)
- 1ms (1 millisecond)
- 1s (1 second)
- 1m (1 minute)
- 1h (1 hour)
- 1d (1 day)
- 1w (1 week)
- 1mo (1 calendar month)
- 1y (1 calendar year)
- 1i (1 index count)
Or combine them:
"3d12h4m25s" # 3 days, 12 hours, 4 minutes, and 25 seconds
In case of a groupbyDynamic on an integer column, the windows are defined by:
- "1i" # length 1
- "10i" # length 10
Parameters
----------
@param options.indexColumn Column used to group based on the time window.
Often to type Date/Datetime
This column must be sorted in ascending order. If not the output will not make sense.
In case of a dynamic groupby on indices, dtype needs to be one of {Int32, Int64}. Note that
Int32 gets temporarily cast to Int64, so if performance matters use an Int64 column.
@param options.every interval of the window
@param options.period length of the window, if None it is equal to 'every'
@param options.offset offset of the window if None and period is None it will be equal to negative `every`
@param options.label Define which label to use for the window: Any if {'left', 'right', 'datapoint'}
@param options.includeBoundaries add the lower and upper bound of the window to the "_lower_bound" and "_upper_bound" columns. This will impact performance because it's harder to parallelize
@param options.closed Defines if the window interval is closed or not. Any of {"left", "right", "both" "none"}
@param options.by Also group by this column/these columns
@param options.startBy The strategy to determine the start of the first window by. Any of {'window', 'datapoint', 'monday', 'tuesday', 'wednesday', 'thursday', 'friday', 'saturday', 'sunday'}
*/
groupByDynamic(options: {
indexColumn: string;
every: string;
period?: string;
offset?: string;
label?: string;
includeBoundaries?: boolean;
closed?: "left" | "right" | "both" | "none";
by?: ColumnsOrExpr;
startBy?: StartBy;
}): T;
}
/***
* Exponentially-weighted operations that can be applied to a Series and Expr
*/
export interface EwmOps<T> {
/**
* Exponentially-weighted moving average.
*
* @param alpha Specify smoothing factor alpha directly, :math:`0 < \alpha \leq 1`.
* @param adjust Divide by decaying adjustment factor in beginning periods to account for imbalance in relative weightings
* - When ``adjust: true`` the EW function is calculated using weights :math:`w_i = (1 - \alpha)^i`
* - When ``adjust=false`` the EW function is calculated recursively
* @param bias When ``bias: false``, apply a correction to make the estimate statistically unbiased.
* @param minPeriods Minimum number of observations in window required to have a value (otherwise result is null).
* @param ignoreNulls Ignore missing values when calculating weights.
* - When ``ignoreNulls: false`` (default), weights are based on absolute positions.
* - When ``ignoreNulls: true``, weights are based on relative positions.
* @returns Expr that evaluates to a float 64 Series.
* @example
* ```
* > const df = pl.DataFrame({a: [1, 2, 3]});
* > df.select(pl.col("a").ewmMean())
* shape: (3, 1)
* ┌──────────┐
* │ a │
* | --- │
* │ f64 │
* ╞══════════╡
* │ 1.0 │
* │ 1.666667 │
* │ 2.428571 │
* └──────────┘
* ```
*/
ewmMean(alpha?: number, adjust?: boolean, minPeriods?: number, bias?: boolean, ignoreNulls?: boolean): T;
ewmMean(opts: {
alpha?: number;
adjust?: boolean;
minPeriods?: number;
bias?: boolean;
ignoreNulls?: boolean;
}): T;
ewmMean(): T;
/**
* Exponentially-weighted standard deviation.
*
* @param alpha Specify smoothing factor alpha directly, :math:`0 < \alpha \leq 1`.
* @param adjust Divide by decaying adjustment factor in beginning periods to account for imbalance in relative weightings
* - When ``adjust: true`` the EW function is calculated using weights :math:`w_i = (1 - \alpha)^i`
* - When ``adjust: false`` the EW function is calculated recursively
* @param minPeriods Minimum number of observations in window required to have a value (otherwise result is null).
* @param bias When ``bias: false``, apply a correction to make the estimate statistically unbiased.
* @param ignoreNulls Ignore missing values when calculating weights.
* - When ``ignoreNulls: false`` (default), weights are based on absolute positions.
* For example, the weights of :math:`x_0` and :math:`x_2` used in calculating the final weighted average of
* - When ``ignoreNulls: true``, weights are based on relative positions.
* @returns Expr that evaluates to a float 64 Series.
* @example
* ```
* > const df = pl.DataFrame({a: [1, 2, 3]});
* > df.select(pl.col("a").ewmStd())
* shape: (3, 1)
* ┌──────────┐
* │ a │
* | --- │
* │ f64 │
* ╞══════════╡
* │ 0.0 │
* │ 0.707107 │
* │ 0.963624 │
* └──────────┘
* ```
*/
ewmStd(alpha?: number, adjust?: boolean, minPeriods?: number, bias?: boolean, ignoreNulls?: boolean): T;
ewmStd(opts: {
alpha?: number;
adjust?: boolean;
minPeriods?: number;
bias?: boolean;
ignoreNulls?: boolean;
}): T;
ewmStd(): T;
/**
* Exponentially-weighted variance.
*
* @param alpha Specify smoothing factor alpha directly, :math:`0 < \alpha \leq 1`.
* @param adjust Divide by decaying adjustment factor in beginning periods to account for imbalance in relative weightings
* - When ``adjust: true`` the EW function is calculated using weights :math:`w_i = (1 - \alpha)^i`
* - When ``adjust: false`` the EW function is calculated recursively
* @param minPeriods Minimum number of observations in window required to have a value (otherwise result is null).
* @param bias When ``bias: false``, apply a correction to make the estimate statistically unbiased.
* @param ignoreNulls Ignore missing values when calculating weights.
* - When ``ignoreNulls: false`` (default), weights are based on absolute positions.
* - When ``ignoreNulls=true``, weights are based on relative positions.
* @returns Expr that evaluates to a float 64 Series.
* @example
* ```
* > const df = pl.DataFrame({a: [1, 2, 3]});
* > df.select(pl.col("a").ewmVar())
* shape: (3, 1)
* ┌──────────┐
* │ a │
* | --- │
* │ f64 │
* ╞══════════╡
* │ 0.0 │
* │ 0.5 │
* │ 0.928571 │
* └──────────┘
* ```
*/
ewmVar(alpha?: number, adjust?: boolean, minPeriods?: number, bias?: boolean, ignoreNulls?: boolean): T;
ewmVar(opts: {
alpha?: number;
adjust?: boolean;
minPeriods?: number;
bias?: boolean;
ignoreNulls?: boolean;
}): T;
ewmVar(): T;
}