nodejs-polars
Version:
Polars: Blazingly fast DataFrames in Rust, Python, Node.js, R and SQL
1,427 lines (1,394 loc) • 55 kB
TypeScript
import type { DataType } from "./datatypes";
import type { Expr } from "./lazy/expr";
import type { ClosedWindow, InterpolationMethod, RollingOptions, RollingQuantileOptions, RollingSkewOptions, RoundMode } from "./types";
import type { ColumnsOrExpr, StartBy } from "./utils";
/**
* Arithmetic operations
*/
export interface Arithmetic<T> {
/**
* Add self to other
* @category Arithmetic
*/
add(other: any): T;
/**
* Subtract other from self
* @category Arithmetic
*/
sub(other: any): T;
/**
* Divide self by other
* @category Arithmetic
*/
div(other: any): T;
/**
* Multiply self by other
* @category Arithmetic
*/
mul(other: any): T;
/**
* Get the remainder of self divided by other
* @category Arithmetic
*/
rem(other: any): T;
/**
* Add self to other
* @category Arithmetic
*/
plus(other: any): T;
/**
* Subtract other from self
* @category Arithmetic
*/
minus(other: any): T;
/**
* Divide self by other
* @category Arithmetic
*/
divideBy(other: any): T;
/**
* Multiply self by other
* @category Arithmetic
*/
multiplyBy(other: any): T;
/**
* Get the remainder of self divided by other
* @category Arithmetic
*/
modulo(other: any): T;
}
export interface Comparison<T> {
/**
* Compare self to other: `self == other`
* @category Comparison
*/
eq(other: any): T;
/**
* Compare self to other: `self == other`
* @category Comparison
*/
equals(other: any): T;
/**
* Compare self to other: `self >= other`
* @category Comparison
*/
gtEq(other: any): T;
/**
* Compare self to other: `self >= other`
* @category Comparison
*/
greaterThanEquals(other: any): T;
/**
* Compare self to other: `self > other`
* @category Comparison
*/
gt(other: any): T;
/**
* Compare self to other: `self > other`
* @category Comparison
*/
greaterThan(other: any): T;
/**
* Compare self to other: `self <= other`
* @category Comparison
*/
ltEq(other: any): T;
/**
* Compare self to other: `self =< other`
* @category Comparison
*/
lessThanEquals(other: any): T;
/**
* Compare self to other: `self < other`
* @category Comparison
*/
lt(other: any): T;
/**
* Compare self to other: `self < other`
* @category Comparison
*/
lessThan(other: any): T;
/**
* Compare self to other: `self !== other`
* @category Comparison
*/
neq(other: any): T;
/**
* Compare self to other: `self !== other`
* @category Comparison
*/
notEquals(other: any): T;
}
/**
* A trait for cumulative operations.
*/
export interface Cumulative<T> {
/**
* Get an array with the cumulative count computed at every element.
* @category Cumulative
*/
cumCount(reverse?: boolean): T;
cumCount({ reverse }: {
reverse: boolean;
}): T;
/**
* __Get an array with the cumulative max computes at every element.__
* ___
* @param reverse - reverse the operation
* @example
* ```
* > const s = pl.Series("a", [1, 2, 3])
* > s.cumMax()
* shape: (3,)
* Series: 'b' [i64]
* [
* 1
* 2
* 3
* ]
* ```
* @category Cumulative
*/
cumMax(reverse?: boolean): T;
cumMax({ reverse }: {
reverse: boolean;
}): T;
/**
* __Get an array with the cumulative min computed at every element.__
* ___
* @param reverse - reverse the operation
* @example
* ```
* > const s = pl.Series("a", [1, 2, 3])
* > s.cumMin()
* shape: (3,)
* Series: 'b' [i64]
* [
* 1
* 1
* 1
* ]
* ```
* @category Cumulative
*/
cumMin(reverse?: boolean): T;
cumMin({ reverse }: {
reverse: boolean;
}): T;
/**
* __Get an array with the cumulative product computed at every element.__
* ___
* @param reverse - reverse the operation
* @example
* ```
* > const s = pl.Series("a", [1, 2, 3])
* > s.cumProd()
* shape: (3,)
* Series: 'b' [i64]
* [
* 1
* 2
* 6
* ]
* ```
* @category Cumulative
*/
cumProd(reverse?: boolean): T;
cumProd({ reverse }: {
reverse: boolean;
}): T;
/**
* __Get an array with the cumulative sum computed at every element.__
* ___
* @param reverse - reverse the operation
* @example
* ```
* > const s = pl.Series("a", [1, 2, 3])
* > s.cumSum()
* shape: (3,)
* Series: 'b' [i64]
* [
* 1
* 3
* 6
* ]
* ```
* @category Cumulative
*/
cumSum(reverse?: boolean): T;
cumSum({ reverse }: {
reverse: boolean;
}): T;
}
/**
* __A trait for DataFrame and Series that allows for the application of a rolling window.__
*/
export interface Rolling<T> {
/**
* __Apply a rolling max (moving max) over the values in this Series.__
*
* A window of length `window_size` will traverse the series. The values that fill this window
* will (optionally) be multiplied with the weights given by the `weight` vector.
*
* The resulting parameters' values will be aggregated into their sum.
* ___
* @param windowSize - The length of the window.
* @param weights - An optional slice with the same length as the window that will be multiplied
* elementwise with the values in the window.
* @param minPeriods The number of values in the window that should be non-null before computing a result.
* If undefined, it will be set equal to window size.
* @param center - Set the labels at the center of the window
* @category Rolling
*/
rollingMax(windowSize: number, weights?: Array<number>, minPeriods?: Array<number>, center?: boolean): T;
rollingMax(options: RollingOptions): T;
/**
* __Apply a rolling mean (moving mean) over the values in this Series.__
*
* A window of length `window_size` will traverse the series. The values that fill this window
* will (optionally) be multiplied with the weights given by the `weight` vector.
*
* The resulting parameters' values will be aggregated into their sum.
* ___
* @param windowSize - The length of the window.
* @param weights - An optional slice with the same length as the window that will be multiplied
* elementwise with the values in the window.
* @param minPeriods The number of values in the window that should be non-null before computing a result.
* If undefined, it will be set equal to window size.
* @param center - Set the labels at the center of the window
* @category Rolling
*/
rollingMean(windowSize: number, weights?: Array<number>, minPeriods?: Array<number>, center?: boolean): T;
rollingMean(options: RollingOptions): T;
/**
* __Apply a rolling min (moving min) over the values in this Series.__
*
* A window of length `window_size` will traverse the series. The values that fill this window
* will (optionally) be multiplied with the weights given by the `weight` vector.
*
* The resulting parameters' values will be aggregated into their sum.
* ___
* @param windowSize - The length of the window.
* @param weights - An optional slice with the same length as the window that will be multiplied
* elementwise with the values in the window.
* @param minPeriods The number of values in the window that should be non-null before computing a result.
* If undefined, it will be set equal to window size.
* @param center - Set the labels at the center of the window
* @category Rolling
*/
rollingMin(windowSize: number, weights?: Array<number>, minPeriods?: Array<number>, center?: boolean): T;
rollingMin(options: RollingOptions): T;
/**
* Compute a rolling std dev
*
* A window of length `window_size` will traverse the array. The values that fill this window
* will (optionally) be multiplied with the weights given by the `weight` vector. The resulting
* values will be aggregated to their sum.
* ___
* @param windowSize - The length of the window.
* @param weights - An optional slice with the same length as the window that will be multiplied
* elementwise with the values in the window.
* @param minPeriods The number of values in the window that should be non-null before computing a result.
* If undefined, it will be set equal to window size.
* @param center - Set the labels at the center of the window
* @param ddof
* "Delta Degrees of Freedom”: the divisor used in the calculation is N - ddof, where N represents the number of elements.
* By default ddof is 1.
* @category Rolling
*/
rollingStd(windowSize: number, weights?: Array<number>, minPeriods?: Array<number>, center?: boolean, ddof?: number): T;
rollingStd(options: RollingOptions): T;
/**
* __Apply a rolling sum (moving sum) over the values in this Series.__
*
* A window of length `window_size` will traverse the series. The values that fill this window
* will (optionally) be multiplied with the weights given by the `weight` vector.
*
* The resulting parameters' values will be aggregated into their sum.
* ___
* @param windowSize - The length of the window.
* @param weights - An optional slice with the same length as the window that will be multiplied
* elementwise with the values in the window.
* @param minPeriods The number of values in the window that should be non-null before computing a result.
* If undefined, it will be set equal to window size.
* @param center - Set the labels at the center of the window
* @category Rolling
*/
rollingSum(windowSize: number, weights?: Array<number>, minPeriods?: Array<number>, center?: boolean): T;
rollingSum(options: RollingOptions): T;
/**
* __Compute a rolling variance.__
*
* A window of length `window_size` will traverse the series. The values that fill this window
* will (optionally) be multiplied with the weights given by the `weight` vector.
*
* The resulting parameters' values will be aggregated into their sum.
* ___
* @param windowSize - The length of the window.
* @param weights - An optional slice with the same length as the window that will be multiplied
* elementwise with the values in the window.
* @param minPeriods The number of values in the window that should be non-null before computing a result.
* If undefined, it will be set equal to window size.
* @param center - Set the labels at the center of the window
* @param ddof
* "Delta Degrees of Freedom”: the divisor used in the calculation is N - ddof, where N represents the number of elements.
* By default ddof is 1.
* @category Rolling
*/
rollingVar(windowSize: number, weights?: Array<number>, minPeriods?: Array<number>, center?: boolean, ddof?: number): T;
rollingVar(options: RollingOptions): T;
/**
* Compute a rolling median
* @category Rolling
*/
rollingMedian(windowSize: number, weights?: Array<number>, minPeriods?: Array<number>, center?: boolean): T;
rollingMedian(options: RollingOptions): T;
/**
* Compute a rolling quantile
* @param quantile quantile to compute
* @param interpolation interpolation type
* @param windowSize Size of the rolling window
* @param weights - An optional slice with the same length as the window that will be multiplied
* elementwise with the values in the window.
* @param minPeriods The number of values in the window that should be non-null before computing a result.
* If undefined, it will be set equal to window size.
* @param center - Set the labels at the center of the window
* @category Rolling
*/
rollingQuantile(quantile: number, interpolation?: InterpolationMethod, windowSize?: number, weights?: Array<number>, minPeriods?: Array<number>, center?: boolean, by?: string, closed?: ClosedWindow): T;
rollingQuantile(options: RollingQuantileOptions): T;
/**
* Compute a rolling skew
* @param windowSize Size of the rolling window
* @param bias If false, then the calculations are corrected for statistical bias.
* @category Rolling
*/
rollingSkew(windowSize: number, bias?: boolean): T;
/**
* Compute a rolling skew
* @param options
* @param options.windowSize Size of the rolling window
* @param options.bias If false, then the calculations are corrected for statistical bias.
* @category Rolling
*/
rollingSkew(options: RollingSkewOptions): T;
}
export interface Round<T> {
/**
* Round underlying floating point data by `decimals` digits.
*
* Similar functionality to javascript `toFixed`
* @param decimals number of decimals to round by.
* @param mode Rounding mode, the default is "half to even" (also known as "bankers' rounding").
RoundMode.
* *halftoeven*
round to the nearest even number
* *halfawayfromzero*
round to the nearest number away from zero
* @category Math
*/
round(decimals: number, mode?: RoundMode): T;
round(options: {
decimals: number;
mode?: RoundMode;
}): T;
/**
* Floor underlying floating point array to the lowest integers smaller or equal to the float value.
* Only works on floating point Series
* @category Math
*/
floor(): T;
/**
* Ceil underlying floating point array to the highest integers smaller or equal to the float value.
* Only works on floating point Series
* @category Math
*/
ceil(): T;
/**
* Clip (limit) the values in an array to any value that fits in 64 floating point range.
* Only works for the following dtypes: {Int32, Int64, Float32, Float64, UInt32}.
* If you want to clip other dtypes, consider writing a when -> then -> otherwise expression
* @param min Minimum value
* @param max Maximum value
* @category Math
*/
clip(min: number, max: number): T;
clip(options: {
min: number;
max: number;
}): any;
}
export interface Sample<T> {
/**
* Sample from this DataFrame by setting either `n` or `frac`.
* @param n - Number of samples < self.len() .
* @param frac - Fraction between 0.0 and 1.0 .
* @param withReplacement - Sample with replacement.
* @param seed - Seed initialization. If not provided, a random seed will be used
* @example
* ```
* > df = pl.DataFrame({
* > "foo": [1, 2, 3],
* > "bar": [6, 7, 8],
* > "ham": ['a', 'b', 'c']
* > })
* > df.sample({n: 2})
* shape: (2, 3)
* ╭─────┬─────┬─────╮
* │ foo ┆ bar ┆ ham │
* │ --- ┆ --- ┆ --- │
* │ i64 ┆ i64 ┆ str │
* ╞═════╪═════╪═════╡
* │ 1 ┆ 6 ┆ "a" │
* ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┤
* │ 3 ┆ 8 ┆ "c" │
* ╰─────┴─────┴─────╯
* ```
* @category Math
*/
sample(n?: number, frac?: number, withReplacement?: boolean, seed?: number | bigint): T;
sample(opts?: {
n: number;
withReplacement?: boolean;
seed?: number | bigint;
}): T;
sample(opts?: {
frac: number;
withReplacement?: boolean;
seed?: number | bigint;
}): T;
}
export interface Bincode<T> {
(bincode: Uint8Array): T;
getState(T2: any): Uint8Array;
}
/**
* Functions that can be applied to dtype List
*/
export interface ListFunctions<T> {
/**
* Retrieve the index of the minimal value in every sublist.
* @returns Expression of data type :class:`UInt32` or :class:`UInt64`
* @example
* --------
* ```
* const s0 = pl.Series("a", [[1, 2], [2, 1]]);
* s0.list.argMax();
* Series: 'a' [u32]
* [
* 0
* 1
* ]
* ```
*/
argMin(): T;
/**
* Retrieve the index of the maximum value in every sublist.
* @returns Expression of data type :class:`UInt32` or :class:`UInt64`
* @example
* --------
* ```
* const s0 = pl.Series("a", [[1, 2], [2, 1]]);
* s0.list.argMax();
* Series: 'a' [u32]
* [
* 1
* 0
* ]
* ```
*/
argMax(): T;
/**
* Concat the arrays in a Series dtype List in linear time.
* @param other Column(s) to concat into a List Series
* @example
* -------
* ```
* df = pl.DataFrame({
* "a": [["a"], ["x"]],
* "b": [["b", "c"], ["y", "z"]],
* })
* df.select(pl.col("a").list.concat("b"))
* shape: (2, 1)
* ┌─────────────────┐
* │ a │
* │ --- │
* │ list[str] │
* ╞═════════════════╡
* │ ["a", "b", "c"] │
* ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
* │ ["x", "y", "z"] │
* └─────────────────┘
* ```
* @category List
*/
concat(other: (string | T)[] | string | T): T;
/**
* Check if sublists contain the given item.
* @param item Item that will be checked for membership
* @param nullBehavior - bool, default True If True, treat null as a distinct value. Null values will not propagate.
* @example
* --------
* ```
* df = pl.DataFrame({"foo": [[3, 2, 1], [], [1, 2]]})
* df.select(pl.col("foo").list.contains(1))
* shape: (3, 1)
* ┌───────┐
* │ foo │
* │ --- │
* │ bool │
* ╞═══════╡
* │ true │
* ├╌╌╌╌╌╌╌┤
* │ false │
* ├╌╌╌╌╌╌╌┤
* │ true │
* └───────┘
* ```
* @category List
*/
contains(item: any, nullBehavior?: boolean): T;
/**
* Calculate the n-th discrete difference of every sublist.
* @param n number of slots to shift
* @param nullBehavior 'ignore' | 'drop'
* ```
* s = pl.Series("a", [[1, 2, 3, 4], [10, 2, 1]])
* s.list.diff()
*
* shape: (2,)
* Series: 'a' [list]
* [
* [null, 1, ... 1]
* [null, -8, -1]
* ]
* ```
* @category List
*/
diff(n?: number, nullBehavior?: "ignore" | "drop"): T;
/**
* Get the value by index in the sublists.
* @param index - Index to return per sublist
* @param nullOnOob - Behavior if an index is out of bounds:
* True -> set as null
* False -> raise an error
* @example
* -------
* ```
* const s0 = pl.Series("a", [[1, 2], [2, 1]]);
* s0.list.get(0);
* Series: 'a' [f64]
[
1.0
2.0
]
* ```
* @category List
*/
get(index: number | Expr, nullOnOob?: boolean): T;
/**
* Run any polars expression against the lists' elements
* Parameters
* ----------
* @param expr
* Expression to run. Note that you can select an element with `pl.first()`, or `pl.col()`
* @example
* >df = pl.DataFrame({"a": [1, 8, 3], "b": [4, 5, 2]})
* >df.withColumn(
* ... pl.concatList(["a", "b"]).list.eval(pl.first().rank()).alias("rank")
* ... )
* shape: (3, 3)
* ┌─────┬─────┬────────────┐
* │ a ┆ b ┆ rank │
* │ --- ┆ --- ┆ --- │
* │ i64 ┆ i64 ┆ list [f32] │
* ╞═════╪═════╪════════════╡
* │ 1 ┆ 4 ┆ [1.0, 2.0] │
* ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┤
* │ 8 ┆ 5 ┆ [2.0, 1.0] │
* ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┤
* │ 3 ┆ 2 ┆ [2.0, 1.0] │
* └─────┴─────┴────────────┘
* @category List
*/
eval(expr: Expr): T;
/**
* Get the first value of the sublists.
* @category List
*/
first(): T;
/**
* Slice the head of every sublist
* @param n - How many values to take in the slice.
* @example
* ```
* s = pl.Series("a", [[1, 2, 3, 4], [10, 2, 1]])
* s.list.head(2)
* shape: (2,)
* Series: 'a' [list]
* [
* [1, 2]
* [10, 2]
* ]
* ```
* @category List
*/
head(n?: number): T;
/**
* Slice the tail of every sublist
* @param n - How many values to take in the slice.
* @example
* ```
* s = pl.Series("a", [[1, 2, 3, 4], [10, 2, 1]])
* s.list.tail(2)
* shape: (2,)
* Series: 'a' [list]
* [
* [3, 4]
* [2, q]
* ]
* ```
* @category List
*/
tail(n?: number): T;
/**
* Join all string items in a sublist and place a separator between them.
* This errors if inner type of list `!= Utf8`.
* @param options.separator A string used to separate one element of the list from the next in the resulting string.
* If omitted, the list elements are separated with a comma.
* @param options.ignoreNulls - If true, null values will be ignored.
* @category List
*/
join(options: {
separator?: string | Expr;
ignoreNulls?: boolean;
}): T;
join(): T;
join(separator: string | Expr): T;
/**
* Get the last value of the sublists.
* @category List
*/
last(): T;
/**
* Get the length of the sublists.
* @category List
*/
lengths(): T;
/**
* Get the maximum value of the sublists.
* @category List
*/
max(): T;
/**
* Get the mean value of the sublists.
* @category List
*/
mean(): T;
/**
* Get the median value of the sublists.
* @category List
*/
min(): T;
/**
* Reverse the sublists.
* @category List
*/
reverse(): T;
/**
* Shift the sublists.
* @param periods - Number of periods to shift. Can be positive or negative.
* @category List
*/
shift(periods: number): T;
/**
* Slice the sublists.
* @param offset - The offset of the slice.
* @param length - The length of the slice.
* @category List
*/
slice(offset: number, length: number): T;
/**
* Sort the sublists.
* @param descending - Sort in reverse order.
* @category List
*/
sort(descending?: boolean): T;
sort(opt: {
descending: boolean;
}): T;
/**
* Sum all elements of the sublists.
* @category List
*/
sum(): T;
/**
* Get the unique values of the sublists.
* @category List
*/
unique(): T;
}
/**
* Functions that can be applied to a Date or Datetime column.
*/
export interface DateFunctions<T> {
/**
* Extract day from underlying Date representation.
* Can be performed on Date and Datetime.
*
* Returns the day of month starting from 1.
* The return value ranges from 1 to 31. (The last day of month differs by months.)
* @returns day as pl.UInt32
*/
day(): T;
/**
* Extract hour from underlying DateTime representation.
* Can be performed on Datetime.
*
* Returns the hour number from 0 to 23.
* @returns Hour as UInt32
*/
hour(): T;
/**
* Extract minutes from underlying DateTime representation.
* Can be performed on Datetime.
*
* Returns the minute number from 0 to 59.
* @returns minute as UInt32
*/
minute(): T;
/**
* Extract month from underlying Date representation.
* Can be performed on Date and Datetime.
*
* Returns the month number starting from 1.
* The return value ranges from 1 to 12.
* @returns Month as UInt32
*/
month(): T;
/**
* Extract seconds from underlying DateTime representation.
* Can be performed on Datetime.
*
* Returns the number of nanoseconds since the whole non-leap second.
* The range from 1,000,000,000 to 1,999,999,999 represents the leap second.
* @returns Nanosecond as UInt32
*/
nanosecond(): T;
/**
* Extract ordinal day from underlying Date representation.
* Can be performed on Date and Datetime.
*
* Returns the day of year starting from 1.
* The return value ranges from 1 to 366. (The last day of year differs by years.)
* @returns Day as UInt32
*/
ordinalDay(): T;
/**
* Extract seconds from underlying DateTime representation.
* Can be performed on Datetime.
*
* Returns the second number from 0 to 59.
* @returns Second as UInt32
*/
second(): T;
/**
* Format Date/datetime with a formatting rule: See [chrono strftime/strptime](https://docs.rs/chrono/0.4.41/chrono/format/strftime/index.html).
*/
strftime(fmt: string): T;
/** Return timestamp in ms as Int64 type. */
timestamp(): T;
/**
* Extract the week from the underlying Date representation.
* Can be performed on Date and Datetime
*
* Returns the ISO week number starting from 1.
* The return value ranges from 1 to 53. (The last week of year differs by years.)
* @returns Week number as UInt32
*/
week(): T;
/**
* Extract the week day from the underlying Date representation.
* Can be performed on Date and Datetime.
*
* Returns the weekday number where monday = 0 and sunday = 6
* @returns Week day as UInt32
*/
weekday(): T;
/**
* Extract year from underlying Date representation.
* Can be performed on Date and Datetime.
*
* Returns the year number in the calendar date.
* @returns Year as Int32
*/
year(): T;
/**
* Divide the date/datetime range into buckets.
* Each date/datetime is mapped to the start of its bucket using the corresponding local datetime. Note that:
- Weekly buckets start on Monday.
- All other buckets start on the Unix epoch (1970-01-01).
- Ambiguous results are localised using the DST offset of the original
timestamp - for example, truncating `'2022-11-06 01:30:00 CST'` by
`'1h'` results in `'2022-11-06 01:00:00 CST'`, whereas truncating
`'2022-11-06 01:30:00 CDT'` by `'1h'` results in
`'2022-11-06 01:00:00 CDT'`.
Parameters
----------
@param every - The size of each bucket.
Notes
-----
The `every` argument is created with
the following string language:
- 1ns (1 nanosecond)
- 1us (1 microsecond)
- 1ms (1 millisecond)
- 1s (1 second)
- 1m (1 minute)
- 1h (1 hour)
- 1d (1 calendar day)
- 1w (1 calendar week)
- 1mo (1 calendar month)
- 1q (1 calendar quarter)
- 1y (1 calendar year)
By "calendar day", we mean the corresponding time on the next day (which may
not be 24 hours, due to daylight savings). Similarly for "calendar week",
"calendar month", "calendar quarter", and "calendar year".
@returns Expr Expression of data type :class:`Date` or :class:`Datetime`.
@example
--------
const df = pl.DataFrame([
pl.Series("datetime", [
new Date(Date.parse("2020-01-01T01:32:00.002+00:00")),
new Date(Date.parse("2020-01-01T02:02:01.030+00:00")),
new Date(Date.parse("2020-01-01T04:42:20.001+00:00")),
])]);
>>> df.select("datetime", pl.col("datetime").dt.truncate("1h").alias("hr0"));
shape: (3, 2)
┌─────────────────────────┬─────────────────────┐
│ datetime ┆ hr0 │
│ --- ┆ --- │
│ datetime[ms] ┆ datetime[ms] │
╞═════════════════════════╪═════════════════════╡
│ 2020-01-01 01:32:00.002 ┆ 2020-01-01 01:00:00 │
│ 2020-01-01 02:02:01.030 ┆ 2020-01-01 02:00:00 │
│ 2020-01-01 04:42:20.001 ┆ 2020-01-01 04:00:00 │
└─────────────────────────┴─────────────────────┘
>>> df.select("datetime", pl.col("datetime").dt.truncate("30m").alias("hr30m"));
shape: (3, 2)
┌─────────────────────────┬─────────────────────┐
│ datetime ┆ hr30m │
│ --- ┆ --- │
│ datetime[ms] ┆ datetime[ms] │
╞═════════════════════════╪═════════════════════╡
│ 2020-01-01 01:32:00.002 ┆ 2020-01-01 01:30:00 │
│ 2020-01-01 02:02:01.030 ┆ 2020-01-01 02:00:00 │
│ 2020-01-01 04:42:20.001 ┆ 2020-01-01 04:30:00 │
└─────────────────────────┴─────────────────────┘
*/
truncate(every: string | Expr): T;
/**
* Divide the date/datetime range into buckets.
- Each date/datetime in the first half of the interval is mapped to the start of its bucket.
- Each date/datetime in the second half of the interval is mapped to the end of its bucket.
- Half-way points are mapped to the start of their bucket.
Ambiguous results are localised using the DST offset of the original timestamp -
for example, rounding `'2022-11-06 01:20:00 CST'` by `'1h'` results in
`'2022-11-06 01:00:00 CST'`, whereas rounding `'2022-11-06 01:20:00 CDT'` by
`'1h'` results in `'2022-11-06 01:00:00 CDT'`.
@param every - Every interval start and period length
@returns Expr Expression of data type :class:`Date` or :class:`Datetime`.
Notes
-----
The `every` argument is created with the following small string formatting language:
- 1ns (1 nanosecond)
- 1us (1 microsecond)
- 1ms (1 millisecond)
- 1s (1 second)
- 1m (1 minute)
- 1h (1 hour)
- 1d (1 calendar day)
- 1w (1 calendar week)
- 1mo (1 calendar month)
- 1q (1 calendar quarter)
- 1y (1 calendar year)
By "calendar day", we mean the corresponding time on the next day (which may not be 24 hours, due to daylight savings).
Similarly for "calendar week", "calendar month", "calendar quarter", and "calendar year".
@example
--------
const df = pl.DataFrame([
pl.Series("datetime", [
new Date(Date.parse("2020-01-01T01:30:00.002+00:00")),
new Date(Date.parse("2020-01-01T02:02:01.030+00:00")),
new Date(Date.parse("2020-01-01T04:42:20.001+00:00")),
])]);
>>> df.select("datetime", pl.col("datetime").dt.round("1h").alias("hr0"));
shape: (3, 2)
┌─────────────────────────┬─────────────────────┐
│ datetime ┆ hr0 │
│ --- ┆ --- │
│ datetime[ms] ┆ datetime[ms] │
╞═════════════════════════╪═════════════════════╡
│ 2020-01-01 01:30:00.002 ┆ 2020-01-01 02:00:00 │
│ 2020-01-01 02:02:01.030 ┆ 2020-01-01 02:00:00 │
│ 2020-01-01 04:42:20.001 ┆ 2020-01-01 05:00:00 │
└─────────────────────────┴─────────────────────┘
>>> df.select("datetime", pl.col("datetime").dt.round("30m").alias("hr30m"));
shape: (3, 2)
┌─────────────────────────┬─────────────────────┐
│ datetime ┆ hr30m │
│ --- ┆ --- │
│ datetime[ms] ┆ datetime[ms] │
╞═════════════════════════╪═════════════════════╡
│ 2020-01-01 01:32:00.002 ┆ 2020-01-01 01:30:00 │
│ 2020-01-01 02:02:01.030 ┆ 2020-01-01 02:00:00 │
│ 2020-01-01 04:42:20.001 ┆ 2020-01-01 05:00:00 │
└─────────────────────────┴─────────────────────┘
*/
round(every: string | Expr): T;
}
export interface StringFunctions<T> {
/**
* Vertically concat the values in the Series to a single string value.
* @example
* ```
* > df = pl.DataFrame({"foo": [1, null, 2]})
* > df = df.select(pl.col("foo").str.concat("-"))
* > df
* shape: (1, 1)
* ┌──────────┐
* │ foo │
* │ --- │
* │ str │
* ╞══════════╡
* │ 1-null-2 │
* └──────────┘
* ```
*/
concat(delimiter: string, ignoreNulls?: boolean): T;
/**
* Check if strings in Series contain a substring that matches a pattern.
* @param pat A valid regular expression pattern, compatible with the `regex crate
* @param literal Treat `pattern` as a literal string, not as a regular expression.
* @param strict Raise an error if the underlying pattern is not a valid regex, otherwise mask out with a null value.
* @returns Boolean mask
* @example
* ```
* const df = pl.DataFrame({"txt": ["Crab", "cat and dog", "rab$bit", null]})
* df.select(
* ... pl.col("txt"),
* ... pl.col("txt").str.contains("cat|bit").alias("regex"),
* ... pl.col("txt").str.contains("rab$", true).alias("literal"),
* ... )
* shape: (4, 3)
* ┌─────────────┬───────┬─────────┐
* │ txt ┆ regex ┆ literal │
* │ --- ┆ --- ┆ --- │
* │ str ┆ bool ┆ bool │
* ╞═════════════╪═══════╪═════════╡
* │ Crab ┆ false ┆ false │
* │ cat and dog ┆ true ┆ false │
* │ rab$bit ┆ true ┆ true │
* │ null ┆ null ┆ null │
* └─────────────┴───────┴─────────┘
* ```
*/
contains(pat: string | RegExp | Expr, literal: boolean, strict: boolean): T;
/**
* Decodes a value using the provided encoding
* @param encoding - hex | base64
* @param strict - how to handle invalid inputs
*
* - true: method will throw error if unable to decode a value
* - false: unhandled values will be replaced with `null`
* @example
* ```
* > df = pl.DataFrame({"strings": ["666f6f", "626172", null]})
* > df.select(col("strings").str.decode("hex"))
* shape: (3, 1)
* ┌─────────┐
* │ strings │
* │ --- │
* │ str │
* ╞═════════╡
* │ foo │
* ├╌╌╌╌╌╌╌╌╌┤
* │ bar │
* ├╌╌╌╌╌╌╌╌╌┤
* │ null │
* └─────────┘
* ```
*/
decode(encoding: "hex" | "base64", strict?: boolean): T;
decode(options: {
encoding: "hex" | "base64";
strict?: boolean;
}): T;
/**
* Encodes a value using the provided encoding
* @param encoding - hex | base64
* @example
* ```
* > df = pl.DataFrame({"strings", ["foo", "bar", null]})
* > df.select(col("strings").str.encode("hex"))
* shape: (3, 1)
* ┌─────────┐
* │ strings │
* │ --- │
* │ str │
* ╞═════════╡
* │ 666f6f │
* ├╌╌╌╌╌╌╌╌╌┤
* │ 626172 │
* ├╌╌╌╌╌╌╌╌╌┤
* │ null │
* └─────────┘
* ```
*/
encode(encoding: "hex" | "base64"): T;
/**
* Extract the target capture group from provided patterns.
* @param pat A valid regex pattern
* @param groupIndex Index of the targeted capture group.
* Group 0 mean the whole pattern, first group begin at index 1
* Default to the first capture group
* @returns Utf8 array. Contain null if original value is null or regex capture nothing.
* @example
* ```
* > df = pl.DataFrame({
* ... 'a': [
* ... 'http://vote.com/ballon_dor?candidate=messi&ref=polars',
* ... 'http://vote.com/ballon_dor?candidat=jorginho&ref=polars',
* ... 'http://vote.com/ballon_dor?candidate=ronaldo&ref=polars'
* ... ]})
* > df.select(pl.col('a').str.extract(/candidate=(\w+)/, 1))
* shape: (3, 1)
* ┌─────────┐
* │ a │
* │ --- │
* │ str │
* ╞═════════╡
* │ messi │
* ├╌╌╌╌╌╌╌╌╌┤
* │ null │
* ├╌╌╌╌╌╌╌╌╌┤
* │ ronaldo │
* └─────────┘
* ```
*/
extract(pat: string | RegExp, groupIndex: number): T;
/**
* Extract the first match of json string with provided JSONPath expression.
* Throw errors if encounter invalid json strings.
* All return value will be casted to Utf8 regardless of the original value.
* @see https://goessner.net/articles/JsonPath/
* @param pat - A valid JSON path query string
* @returns Utf8 array. Contain null if original value is null or the `jsonPath` return nothing.
* @example
* ```
* > df = pl.DataFrame({
* ... 'json_val': [
* ... '{"a":"1"}',
* ... null,
* ... '{"a":2}',
* ... '{"a":2.1}',
* ... '{"a":true}'
* ... ]
* ... })
* > df.select(pl.col('json_val').str.jsonPathMatch('$.a')
* shape: (5,)
* Series: 'json_val' [str]
* [
* "1"
* null
* "2"
* "2.1"
* "true"
* ]
* ```
*/
jsonPathMatch(pat: string): T;
/** Get length of the string values in the Series. */
lengths(): T;
/** Remove leading whitespace. */
lstrip(): T;
/** Replace first regex match with a string value. */
replace(pat: string | RegExp, val: string): T;
/** Replace all regex matches with a string value. */
replaceAll(pat: string | RegExp, val: string): T;
/** Modify the strings to their lowercase equivalent. */
toLowerCase(): T;
/** Modify the strings to their uppercase equivalent. */
toUpperCase(): T;
/** Remove trailing whitespace. */
rstrip(): T;
/**
* Create subslices of the string values of a Utf8 Series.
* @param start - Start of the slice (negative indexing may be used).
* @param length - Optional length of the slice.
*/
slice(start: number, length?: number): T;
/**
* Split a string into substrings using the specified separator and return them as a Series.
* @param by — A string that identifies character or characters to use in separating the string.
* @param options.inclusive Include the split character/string in the results
*/
split(by: string, options?: {
inclusive?: boolean;
} | boolean): T;
/** Remove leading and trailing whitespace. */
strip(): T;
/**
* Parse a Series of dtype Utf8 to a Date/Datetime Series.
* @param datatype Date or Datetime.
* @param fmt formatting syntax. [Read more](https://docs.rs/chrono/0.4.19/chrono/format/strftime/index.html)
*/
strptime(datatype: DataType.Date | DataType.Datetime | typeof DataType.Datetime, fmt?: string): T;
}
export interface Serialize {
/**
* Serializes object to desired format via [serde](https://serde.rs/)
*
* @param format [json](https://github.com/serde-rs/json) | [bincode](https://github.com/bincode-org/bincode)
*
*/
serialize(format: "json" | "bincode"): Buffer;
}
export interface Deserialize<T> {
/**
* De-serializes buffer via [serde](https://serde.rs/)
* @param buf buffer to deserialize
* @param format [json](https://github.com/serde-rs/json) | [bincode](https://github.com/bincode-org/bincode)
*
*/
deserialize(buf: Buffer, format: "json" | "bincode"): T;
}
/**
* GroupBy operations that can be applied to a DataFrame or LazyFrame.
*/
export interface GroupByOps<T> {
/**
Create rolling groups based on a time column (or index value of type Int32, Int64).
Different from a rolling groupby the windows are now determined by the individual values and are not of constant
intervals. For constant intervals use {@link groupByDynamic}
The `period` and `offset` arguments are created with
the following string language:
- 1ns (1 nanosecond)
- 1us (1 microsecond)
- 1ms (1 millisecond)
- 1s (1 second)
- 1m (1 minute)
- 1h (1 hour)
- 1d (1 day)
- 1w (1 week)
- 1mo (1 calendar month)
- 1y (1 calendar year)
- 1i (1 index count)
Or combine them:
"3d12h4m25s" # 3 days, 12 hours, 4 minutes, and 25 seconds
In case of a groupby_rolling on an integer column, the windows are defined by:
- "1i" # length 1
- "10i" # length 10
@param opts.indexColumn Column used to group based on the time window.
Often to type Date/Datetime
This column must be sorted in ascending order. If not the output will not make sense.
In case of a rolling groupby on indices, dtype needs to be one of {Int32, Int64}. Note that
Int32 gets temporarily cast to Int64, so if performance matters use an Int64 column.
@param opts.period length of the window
@param opts.offset offset of the window. Default is `-period`
@param opts.closed Defines if the window interval is closed or not. Any of `{"left", "right", "both" "none"}`
@param opts.by Also group by this column/these columns
@example
```
>dates = [
... "2020-01-01 13:45:48",
... "2020-01-01 16:42:13",
... "2020-01-01 16:45:09",
... "2020-01-02 18:12:48",
... "2020-01-03 19:45:32",
... "2020-01-08 23:16:43",
... ]
>df = pl.DataFrame({"dt": dates, "a": [3, 7, 5, 9, 2, 1]}).withColumn(
... pl.col("dt").str.strptime(pl.Datetime)
... )
>out = df.groupbyRolling({indexColumn:"dt", period:"2d"}).agg(
... [
... pl.sum("a").alias("sum_a"),
... pl.min("a").alias("min_a"),
... pl.max("a").alias("max_a"),
... ]
... )
>assert(out["sum_a"].toArray() === [3, 10, 15, 24, 11, 1])
>assert(out["max_a"].toArray() === [3, 7, 7, 9, 9, 1])
>assert(out["min_a"].toArray() === [3, 3, 3, 3, 2, 1])
>out
shape: (6, 4)
┌─────────────────────┬───────┬───────┬───────┐
│ dt ┆ a_sum ┆ a_max ┆ a_min │
│ --- ┆ --- ┆ --- ┆ --- │
│ datetime[ms] ┆ i64 ┆ i64 ┆ i64 │
╞═════════════════════╪═══════╪═══════╪═══════╡
│ 2020-01-01 13:45:48 ┆ 3 ┆ 3 ┆ 3 │
├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┤
│ 2020-01-01 16:42:13 ┆ 10 ┆ 7 ┆ 3 │
├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┤
│ 2020-01-01 16:45:09 ┆ 15 ┆ 7 ┆ 3 │
├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┤
│ 2020-01-02 18:12:48 ┆ 24 ┆ 9 ┆ 3 │
├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┤
│ 2020-01-03 19:45:32 ┆ 11 ┆ 9 ┆ 2 │
├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┤
│ 2020-01-08 23:16:43 ┆ 1 ┆ 1 ┆ 1 │
└─────────────────────┴───────┴───────┴───────┘
```
*/
groupByRolling(opts: {
indexColumn: ColumnsOrExpr;
by?: ColumnsOrExpr;
period: string;
offset?: string;
closed?: "left" | "right" | "both" | "none";
}): T;
/**
Groups based on a time value (or index value of type Int32, Int64). Time windows are calculated and rows are assigned to windows.
Different from a normal groupby is that a row can be member of multiple groups. The time/index window could
be seen as a rolling window, with a window size determined by dates/times/values instead of slots in the DataFrame.
A window is defined by:
- every: interval of the window
- period: length of the window
- offset: offset of the window
The `every`, `period` and `offset` arguments are created with
the following string language:
- 1ns (1 nanosecond)
- 1us (1 microsecond)
- 1ms (1 millisecond)
- 1s (1 second)
- 1m (1 minute)
- 1h (1 hour)
- 1d (1 day)
- 1w (1 week)
- 1mo (1 calendar month)
- 1y (1 calendar year)
- 1i (1 index count)
Or combine them:
"3d12h4m25s" # 3 days, 12 hours, 4 minutes, and 25 seconds
In case of a groupbyDynamic on an integer column, the windows are defined by:
- "1i" # length 1
- "10i" # length 10
Parameters
----------
@param options.indexColumn Column used to group based on the time window.
Often to type Date/Datetime
This column must be sorted in ascending order. If not the output will not make sense.
In case of a dynamic groupby on indices, dtype needs to be one of {Int32, Int64}. Note that
Int32 gets temporarily cast to Int64, so if performance matters use an Int64 column.
@param options.every interval of the window
@param options.period length of the window, if None it is equal to 'every'
@param options.offset offset of the window if None and period is None it will be equal to negative `every`
@param options.label Define which label to use for the window: Any if {'left', 'right', 'datapoint'}
@param options.includeBoundaries add the lower and upper bound of the window to the "_lower_bound" and "_upper_bound" columns. This will impact performance because it's harder to parallelize
@param options.closed Defines if the window interval is closed or not. Any of {"left", "right", "both" "none"}
@param options.by Also group by this column/these columns
@param options.startBy The strategy to determine the start of the first window by. Any of {'window', 'datapoint', 'monday', 'tuesday', 'wednesday', 'thursday', 'friday', 'saturday', 'sunday'}
*/
groupByDynamic(options: {
indexColumn: string;
every: string;
period?: string;
offset?: string;
label?: string;
includeBoundaries?: boolean;
closed?: "left" | "right" | "both" | "none";
by?: ColumnsOrExpr;
startBy?: StartBy;
}): T;
}
/***
* Exponentially-weighted operations that can be applied to a Series and Expr
*/
export interface EwmOps<T> {
/**
* Exponentially-weighted moving average.
*
* @param alpha Specify smoothing factor alpha directly, :math:`0 < \alpha \leq 1`.
* @param adjust Divide by decaying adjustment factor in beginning periods to account for imbalance in relative weightings
* - When ``adjust: true`` the EW function is calculated using weights :math:`w_i = (1 - \alpha)^i`
* - When ``adjust=false`` the EW function is calculated recursively
* @param bias When ``bias: false``, apply a correction to make the estimate statistically unbiased.
* @param minPeriods Minimum number of observations in window required to have a value (otherwise result is null).
* @param ignoreNulls Ignore missing values when calculating weights.
* - When ``ignoreNulls: false`` (default), weights are based on absolute positions.
* - When ``ignoreNulls: true``, weights are based on relative positions.
* @returns Expr that evaluates to a float 64 Series.
* @example
* ```
* > const df = pl.DataFrame({a: [1, 2, 3]});
* > df.select(pl.col("a").ewmMean())
* shape: (3, 1)
* ┌──────────┐
* │ a │
* | --- │
* │ f64 │
* ╞══════════╡
* │ 1.0 │
* │ 1.666667 │
* │ 2.428571 │
* └──────────┘
* ```
*/
ewmMean(alpha?: number, adjust?: boolean, minPeriods?: number, bias?: boolean, ignoreNulls?: boolean): T;
ewmMean(opts: {
alpha?: number;
adjust?: boolean;
minPeriods?: number;
bias?: boolean;
ignoreNulls?: boolean;
}): T;
ewmMean(): T;
/**
* Exponentially-weighted standard deviation.
*
* @param alpha Specify smoothing factor alpha directly, :math:`0 < \alpha \leq 1`.
* @param adjust Divide by decaying adjustment factor in beginning periods to account for imbalance in relative weightings
* - When ``adjust: true`` the EW function is calculated using weights :math:`w_i = (1 - \alpha)^i`
* - When ``adjust: false`` the EW function is calculated recursively
* @param minPeriods Minimum number of observations in window required to have a value (otherwise result is null).
* @param bias When ``bias: false``, apply a correction to make the estimate statistically unbiased.
* @param ignoreNulls Ignore missing values when calculating weights.
* - When ``ignoreNulls: false`` (default), weights are based on absolute positions.
* For example, the weights of :math:`x_0` and :math:`x_2` used in calculating the final weighted average of
* - When ``ignoreNulls: true``, weights are based on relative positions.
* @returns Expr that evaluates to a float 64 Series.
* @example
* ```
* > const df = pl.DataFrame({a: [1, 2, 3]});
* > df.select(pl.col("a").ewmStd())
* shape: (3, 1)
* ┌──────────┐
* │ a │
* | --- │
* │ f64 │
* ╞══════════╡
* │ 0.0 │
* │ 0.707107 │
* │ 0.963624 │
* └──────────┘
* ```
*/
ewmStd(alpha?: number, adjust?: boolean, minPeriods?: number, bias?: boolean, ignoreNulls?: boolean): T;
ewmStd(opts: {
alpha?: number;
adjust?: boolean;
minPeriods?: number;
bias?: boolean;
ignoreNulls?: boolean;
}): T;
ewmStd(): T;
/**
* Exponentially-weighted variance.
*
* @param alpha Specify smoothing factor alpha directly, :math:`0 < \alpha \leq 1`.
* @param adjust Divide by decaying adjustment factor in beginning periods to account for imbalance in relative weightings
* - When ``adjust: true`` the EW function is calculated using weights :math:`w_i = (1 - \alpha)^i`
* - When ``adjust: false`` the EW function is calculated recursively
* @param minPeriods Minimum number of observations in window required to have a value (otherw