nodejs-polars
Version:
Polars: Blazingly fast DataFrames in Rust, Python, Node.js, R and SQL
1,511 lines (1,506 loc) • 75.8 kB
TypeScript
import { Writable } from "node:stream";
import { type GroupBy, RollingGroupBy } from "./groupby";
import { type LazyDataFrame } from "./lazy/dataframe";
import { Expr } from "./lazy/expr";
import { Series } from "./series";
import type { CsvWriterOptions, FillNullStrategy, JoinOptions, WriteAvroOptions, WriteIPCOptions, WriteParquetOptions } from "./types";
import { type DTypeToJs, DataType, type JsToDtype } from "./datatypes";
import { type ColumnSelection, type ColumnsOrExpr, type ExprOrString, type Simplify, type ValueOrArray } from "./utils";
import type { Arithmetic, Deserialize, GroupByOps, Sample, Serialize } from "./shared_traits";
declare const inspect: unique symbol;
/**
* Write methods for DataFrame
*/
interface WriteMethods {
/**
* __Write DataFrame to comma-separated values file (csv).__
*
* If no options are specified, it will return a new string containing the contents
* ___
* @param dest file or stream to write to
* @param options.includeBom - Whether to include UTF-8 BOM in the CSV output.
* @param options.lineTerminator - String used to end each row.
* @param options.includeHeader - Whether or not to include header in the CSV output.
* @param options.separator - Separate CSV fields with this symbol. _defaults to `,`
* @param options.quoteChar - Character to use for quoting. Default: \" Note: it will note be used when sep is used
* @param options.batchSize - Number of rows that will be processed per thread.
* @param options.datetimeFormat - A format string, with the specifiers defined by the
* `chrono <https://docs.rs/chrono/latest/chrono/format/strftime/index.html>`_
* Rust crate. If no format specified, the default fractional-second
* precision is inferred from the maximum timeunit found in the frame's
* Datetime cols (if any).
* @param options.dateFormat - A format string, with the specifiers defined by the
* `chrono <https://docs.rs/chrono/latest/chrono/format/strftime/index.html>`_
* Rust crate.
* @param options.timeFormat A format string, with the specifiers defined by the
* `chrono <https://docs.rs/chrono/latest/chrono/format/strftime/index.html>`_
* Rust crate.
* @param options.floatPrecision - Number of decimal places to write, applied to both `Float32` and `Float64` datatypes.
* @param options.nullValue - A string representing null values (defaulting to the empty string).
* @example
* ```
* > const df = pl.DataFrame({
* ... "foo": [1, 2, 3],
* ... "bar": [6, 7, 8],
* ... "ham": ['a', 'b', 'c']
* ... });
* > df.writeCSV();
* foo,bar,ham
* 1,6,a
* 2,7,b
* 3,8,c
*
* // using a file path
* > df.head(1).writeCSV("./foo.csv")
* // foo.csv
* foo,bar,ham
* 1,6,a
*
* // using a write stream
* > const writeStream = new Stream.Writable({
* ... write(chunk, encoding, callback) {
* ... console.log("writeStream: %O', chunk.toString());
* ... callback(null);
* ... }
* ... });
* > df.head(1).writeCSV(writeStream, {includeHeader: false});
* writeStream: '1,6,a'
* ```
* @category IO
*/
writeCSV(): Buffer;
writeCSV(options: CsvWriterOptions): Buffer;
writeCSV(dest: string | Writable, options?: CsvWriterOptions): void;
/**
* Write Dataframe to JSON string, file, or write stream
* @param destination file or write stream
* @param options
* @param options.format - json | lines
* @example
* ```
* > const df = pl.DataFrame({
* ... foo: [1,2,3],
* ... bar: ['a','b','c']
* ... })
*
* > df.writeJSON({format:"json"})
* `[ {"foo":1.0,"bar":"a"}, {"foo":2.0,"bar":"b"}, {"foo":3.0,"bar":"c"}]`
*
* > df.writeJSON({format:"lines"})
* `{"foo":1.0,"bar":"a"}
* {"foo":2.0,"bar":"b"}
* {"foo":3.0,"bar":"c"}`
*
* // writing to a file
* > df.writeJSON("/path/to/file.json", {format:'lines'})
* ```
* @category IO
*/
writeJSON(options?: {
format: "lines" | "json";
}): Buffer;
writeJSON(destination: string | Writable, options?: {
format: "lines" | "json";
}): void;
/**
* Write to Arrow IPC feather file, either to a file path or to a write stream.
* @param destination File path to which the file should be written, or writable.
* @param options.compression Compression method *defaults to "uncompressed"*
* @category IO
*/
writeIPC(options?: WriteIPCOptions): Buffer;
writeIPC(destination: string | Writable, options?: WriteIPCOptions): void;
/**
* Write to Arrow IPC stream file, either to a file path or to a write stream.
* @param destination File path to which the file should be written, or writable.
* @param options.compression Compression method *defaults to "uncompressed"*
* @category IO
*/
writeIPCStream(options?: WriteIPCOptions): Buffer;
writeIPCStream(destination: string | Writable, options?: WriteIPCOptions): void;
/**
* Write the DataFrame disk in parquet format.
* @param destination File path to which the file should be written, or writable.
* @param options.compression Compression method *defaults to "uncompressed"*
* @category IO
*/
writeParquet(options?: WriteParquetOptions): Buffer;
writeParquet(destination: string | Writable, options?: WriteParquetOptions): void;
/**
* Write the DataFrame disk in avro format.
* @param destination File path to which the file should be written, or writable.
* @param options.compression Compression method *defaults to "uncompressed"*
* @category IO
*/
writeAvro(options?: WriteAvroOptions): Buffer;
writeAvro(destination: string | Writable, options?: WriteAvroOptions): void;
}
/**
* A DataFrame is a two-dimensional data structure that represents data as a table
* with rows and columns.
*
* @param data - Object, Array, or Series
* Two-dimensional data in various forms. object must contain Arrays.
* Array may contain Series or other Arrays.
* @param columns - Array of str, default undefined
* Column labels to use for resulting DataFrame. If specified, overrides any
* labels already present in the data. Must match data dimensions.
* @param orient - 'col' | 'row' default undefined
* Whether to interpret two-dimensional data as columns or as rows. If None,
* the orientation is inferred by matching the columns and data dimensions. If
* this does not yield conclusive results, column orientation is used.
* @example
* Constructing a DataFrame from an object :
* ```
* > const data = {'a': [1n, 2n], 'b': [3, 4]};
* > const df = pl.DataFrame(data);
* > console.log(df.toString());
* shape: (2, 2)
* ╭─────┬─────╮
* │ a ┆ b │
* │ --- ┆ --- │
* │ u64 ┆ i64 │
* ╞═════╪═════╡
* │ 1 ┆ 3 │
* ├╌╌╌╌╌┼╌╌╌╌╌┤
* │ 2 ┆ 4 │
* ╰─────┴─────╯
* ```
* Notice that the dtype is automatically inferred as a polars Int64:
* ```
* > df.dtypes
* ['UInt64', `Int64']
* ```
* In order to specify dtypes for your columns, initialize the DataFrame with a list
* of Series instead:
* ```
* > const data = [pl.Series('col1', [1, 2], pl.Float32), pl.Series('col2', [3, 4], pl.Int64)];
* > const df2 = pl.DataFrame(series);
* > console.log(df2.toString());
* shape: (2, 2)
* ╭──────┬──────╮
* │ col1 ┆ col2 │
* │ --- ┆ --- │
* │ f32 ┆ i64 │
* ╞══════╪══════╡
* │ 1 ┆ 3 │
* ├╌╌╌╌╌╌┼╌╌╌╌╌╌┤
* │ 2 ┆ 4 │
* ╰──────┴──────╯
* ```
*
* Constructing a DataFrame from a list of lists, row orientation inferred:
* ```
* > const data = [[1, 2, 3], [4, 5, 6]];
* > const df4 = pl.DataFrame(data, ['a', 'b', 'c']);
* > console.log(df4.toString());
* shape: (2, 3)
* ╭─────┬─────┬─────╮
* │ a ┆ b ┆ c │
* │ --- ┆ --- ┆ --- │
* │ i64 ┆ i64 ┆ i64 │
* ╞═════╪═════╪═════╡
* │ 1 ┆ 2 ┆ 3 │
* ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┤
* │ 4 ┆ 5 ┆ 6 │
* ╰─────┴─────┴─────╯
* ```
*/
export interface DataFrame<T extends Record<string, Series> = any> extends Arithmetic<DataFrame<T>>, Sample<DataFrame<T>>, Arithmetic<DataFrame<T>>, WriteMethods, Serialize, GroupByOps<RollingGroupBy> {
/** @ignore */
_df: any;
dtypes: DataType[];
height: number;
shape: {
height: number;
width: number;
};
width: number;
get columns(): string[];
set columns(cols: string[]);
[inspect](): string;
[Symbol.iterator](): Generator<any, void, any>;
/**
* Very cheap deep clone.
*/
clone(): DataFrame<T>;
/**
* __Summary statistics for a DataFrame.__
*
* Only summarizes numeric datatypes at the moment and returns nulls for non numeric datatypes.
* ___
* Example
* ```
* > const df = pl.DataFrame({
* ... 'a': [1.0, 2.8, 3.0],
* ... 'b': [4, 5, 6],
* ... "c": [True, False, True]
* ... });
* ... df.describe()
* shape: (5, 4)
* ╭──────────┬───────┬─────┬──────╮
* │ describe ┆ a ┆ b ┆ c │
* │ --- ┆ --- ┆ --- ┆ --- │
* │ str ┆ f64 ┆ f64 ┆ f64 │
* ╞══════════╪═══════╪═════╪══════╡
* │ "mean" ┆ 2.267 ┆ 5 ┆ null │
* ├╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌┤
* │ "std" ┆ 1.102 ┆ 1 ┆ null │
* ├╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌┤
* │ "min" ┆ 1 ┆ 4 ┆ 0.0 │
* ├╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌┤
* │ "max" ┆ 3 ┆ 6 ┆ 1 │
* ├╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌┤
* │ "median" ┆ 2.8 ┆ 5 ┆ null │
* ╰──────────┴───────┴─────┴──────╯
* ```
*/
describe(): DataFrame;
/**
* __Remove column from DataFrame and return as new.__
* ___
* @param name
* @example
* ```
* > const df = pl.DataFrame({
* ... "foo": [1, 2, 3],
* ... "bar": [6.0, 7.0, 8.0],
* ... "ham": ['a', 'b', 'c'],
* ... "apple": ['a', 'b', 'c']
* ... });
* // df: pl.DataFrame<{
* // foo: pl.Series<Float64, "foo">;
* // bar: pl.Series<Float64, "bar">;
* // ham: pl.Series<Utf8, "ham">;
* // apple: pl.Series<Utf8, "apple">;
* // }>
* > const df2 = df.drop(['ham', 'apple']);
* // df2: pl.DataFrame<{
* // foo: pl.Series<Float64, "foo">;
* // bar: pl.Series<Float64, "bar">;
* // }>
* > console.log(df2.toString());
* shape: (3, 2)
* ╭─────┬─────╮
* │ foo ┆ bar │
* │ --- ┆ --- │
* │ i64 ┆ f64 │
* ╞═════╪═════╡
* │ 1 ┆ 6 │
* ├╌╌╌╌╌┼╌╌╌╌╌┤
* │ 2 ┆ 7 │
* ├╌╌╌╌╌┼╌╌╌╌╌┤
* │ 3 ┆ 8 │
* ╰─────┴─────╯
* ```
*/
drop<U extends string>(name: U): DataFrame<Simplify<Omit<T, U>>>;
drop<const U extends string[]>(names: U): DataFrame<Simplify<Omit<T, U[number]>>>;
drop<U extends string, const V extends string[]>(name: U, ...names: V): DataFrame<Simplify<Omit<T, U | V[number]>>>;
/**
* __Return a new DataFrame where the null values are dropped.__
*
* This method only drops nulls row-wise if any single value of the row is null.
* ___
* @example
* ```
* > const df = pl.DataFrame({
* ... "foo": [1, 2, 3],
* ... "bar": [6, null, 8],
* ... "ham": ['a', 'b', 'c']
* ... });
* > console.log(df.dropNulls().toString());
* shape: (2, 3)
* ┌─────┬─────┬─────┐
* │ foo ┆ bar ┆ ham │
* │ --- ┆ --- ┆ --- │
* │ i64 ┆ i64 ┆ str │
* ╞═════╪═════╪═════╡
* │ 1 ┆ 6 ┆ "a" │
* ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┤
* │ 3 ┆ 8 ┆ "c" │
* └─────┴─────┴─────┘
* ```
*/
dropNulls(column: keyof T): DataFrame<T>;
dropNulls(columns: (keyof T)[]): DataFrame<T>;
dropNulls(...columns: (keyof T)[]): DataFrame<T>;
/**
* __Explode `DataFrame` to long format by exploding a column with Lists.__
* ___
* @param columns - column or columns to explode
* @example
* ```
* > const df = pl.DataFrame({
* ... "letters": ["c", "c", "a", "c", "a", "b"],
* ... "nrs": [[1, 2], [1, 3], [4, 3], [5, 5, 5], [6], [2, 1, 2]]
* ... });
* > console.log(df.toString());
* shape: (6, 2)
* ╭─────────┬────────────╮
* │ letters ┆ nrs │
* │ --- ┆ --- │
* │ str ┆ list [i64] │
* ╞═════════╪════════════╡
* │ "c" ┆ [1, 2] │
* ├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┤
* │ "c" ┆ [1, 3] │
* ├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┤
* │ "a" ┆ [4, 3] │
* ├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┤
* │ "c" ┆ [5, 5, 5] │
* ├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┤
* │ "a" ┆ [6] │
* ├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┤
* │ "b" ┆ [2, 1, 2] │
* ╰─────────┴────────────╯
* > df.explode("nrs")
* shape: (13, 2)
* ╭─────────┬─────╮
* │ letters ┆ nrs │
* │ --- ┆ --- │
* │ str ┆ i64 │
* ╞═════════╪═════╡
* │ "c" ┆ 1 │
* ├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┤
* │ "c" ┆ 2 │
* ├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┤
* │ "c" ┆ 1 │
* ├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┤
* │ "c" ┆ 3 │
* ├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┤
* │ ... ┆ ... │
* ├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┤
* │ "c" ┆ 5 │
* ├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┤
* │ "a" ┆ 6 │
* ├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┤
* │ "b" ┆ 2 │
* ├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┤
* │ "b" ┆ 1 │
* ├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┤
* │ "b" ┆ 2 │
* ╰─────────┴─────╯
* ```
*/
explode(column: ExprOrString): DataFrame;
explode(columns: ExprOrString[]): DataFrame;
explode(column: ExprOrString, ...columns: ExprOrString[]): DataFrame;
/**
*
*
* __Extend the memory backed by this `DataFrame` with the values from `other`.__
* ___
Different from `vstack` which adds the chunks from `other` to the chunks of this `DataFrame`
`extent` appends the data from `other` to the underlying memory locations and thus may cause a reallocation.
If this does not cause a reallocation, the resulting data structure will not have any extra chunks
and thus will yield faster queries.
Prefer `extend` over `vstack` when you want to do a query after a single append. For instance during
online operations where you add `n` rows and rerun a query.
Prefer `vstack` over `extend` when you want to append many times before doing a query. For instance
when you read in multiple files and when to store them in a single `DataFrame`.
In the latter case, finish the sequence of `vstack` operations with a `rechunk`.
* @param other DataFrame to vertically add.
*/
extend(other: DataFrame<T>): DataFrame<T>;
/**
* Fill null/missing values by a filling strategy
*
* @param strategy - One of:
* - "backward"
* - "forward"
* - "mean"
* - "min'
* - "max"
* - "zero"
* - "one"
* @returns DataFrame with None replaced with the filling strategy.
*/
fillNull(strategy: FillNullStrategy): DataFrame<T>;
/**
* Filter the rows in the DataFrame based on a predicate expression.
* ___
* @param predicate - Expression that evaluates to a boolean Series.
* @example
* ```
* > const df = pl.DataFrame({
* ... "foo": [1, 2, 3],
* ... "bar": [6, 7, 8],
* ... "ham": ['a', 'b', 'c']
* ... });
* // Filter on one condition
* > df.filter(pl.col("foo").lt(3))
* shape: (2, 3)
* ┌─────┬─────┬─────┐
* │ foo ┆ bar ┆ ham │
* │ --- ┆ --- ┆ --- │
* │ i64 ┆ i64 ┆ str │
* ╞═════╪═════╪═════╡
* │ 1 ┆ 6 ┆ a │
* ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┤
* │ 2 ┆ 7 ┆ b │
* └─────┴─────┴─────┘
* // Filter on multiple conditions
* > df.filter(
* ... pl.col("foo").lt(3)
* ... .and(pl.col("ham").eq(pl.lit("a")))
* ... )
* shape: (1, 3)
* ┌─────┬─────┬─────┐
* │ foo ┆ bar ┆ ham │
* │ --- ┆ --- ┆ --- │
* │ i64 ┆ i64 ┆ str │
* ╞═════╪═════╪═════╡
* │ 1 ┆ 6 ┆ a │
* └─────┴─────┴─────┘
* ```
*/
filter(predicate: any): DataFrame<T>;
/**
* Find the index of a column by name.
* ___
* @param name -Name of the column to find.
* @example
* ```
* > const df = pl.DataFrame({
* ... "foo": [1, 2, 3],
* ... "bar": [6, 7, 8],
* ... "ham": ['a', 'b', 'c']
* ... });
* > df.findIdxByName("ham"))
* 2
* ```
*/
findIdxByName(name: keyof T): number;
/**
* __Apply a horizontal reduction on a DataFrame.__
*
* This can be used to effectively determine aggregations on a row level,
* and can be applied to any DataType that can be supercasted (casted to a similar parent type).
*
* An example of the supercast rules when applying an arithmetic operation on two DataTypes are for instance:
* - Int8 + Utf8 = Utf8
* - Float32 + Int64 = Float32
* - Float32 + Float64 = Float64
* ___
* @param operation - function that takes two `Series` and returns a `Series`.
* @returns Series
* @example
* ```
* > // A horizontal sum operation
* > let df = pl.DataFrame({
* ... "a": [2, 1, 3],
* ... "b": [1, 2, 3],
* ... "c": [1.0, 2.0, 3.0]
* ... });
* > df.fold((s1, s2) => s1.plus(s2))
* Series: 'a' [f64]
* [
* 4
* 5
* 9
* ]
* > // A horizontal minimum operation
* > df = pl.DataFrame({
* ... "a": [2, 1, 3],
* ... "b": [1, 2, 3],
* ... "c": [1.0, 2.0, 3.0]
* ... });
* > df.fold((s1, s2) => s1.zipWith(s1.lt(s2), s2))
* Series: 'a' [f64]
* [
* 1
* 1
* 3
* ]
* > // A horizontal string concatenation
* > df = pl.DataFrame({
* ... "a": ["foo", "bar", 2],
* ... "b": [1, 2, 3],
* ... "c": [1.0, 2.0, 3.0]
* ... })
* > df.fold((s1, s2) => s.plus(s2))
* Series: '' [f64]
* [
* "foo11"
* "bar22
* "233"
* ]
* ```
*/
fold(operation: (s1: Series, s2: Series) => Series): Series;
/**
* Check if DataFrame is equal to other.
* ___
* @param options
* @param options.other - DataFrame to compare.
* @param options.nullEqual Consider null values as equal.
* @example
* ```
* > const df1 = pl.DataFrame({
* ... "foo": [1, 2, 3],
* ... "bar": [6.0, 7.0, 8.0],
* ... "ham": ['a', 'b', 'c']
* ... })
* > const df2 = pl.DataFrame({
* ... "foo": [3, 2, 1],
* ... "bar": [8.0, 7.0, 6.0],
* ... "ham": ['c', 'b', 'a']
* ... })
* > df1.frameEqual(df1)
* true
* > df1.frameEqual(df2)
* false
* ```
*/
frameEqual(other: DataFrame): boolean;
frameEqual(other: DataFrame, nullEqual: boolean): boolean;
/**
* Get a single column as Series by name.
*
* ---
* @example
* ```
* > const df = pl.DataFrame({
* ... foo: [1, 2, 3],
* ... bar: [6, null, 8],
* ... ham: ["a", "b", "c"],
* ... });
* // df: pl.DataFrame<{
* // foo: pl.Series<Float64, "foo">;
* // bar: pl.Series<Float64, "bar">;
* // ham: pl.Series<Utf8, "ham">;
* // }>
* > const column = df.getColumn("foo");
* // column: pl.Series<Float64, "foo">
* ```
*/
getColumn<U extends keyof T>(name: U): T[U];
getColumn(name: string): Series;
/**
* Get the DataFrame as an Array of Series.
* ---
* @example
* ```
* > const df = pl.DataFrame({
* ... foo: [1, 2, 3],
* ... bar: [6, null, 8],
* ... ham: ["a", "b", "c"],
* ... });
* // df: pl.DataFrame<{
* // foo: pl.Series<Float64, "foo">;
* // bar: pl.Series<Float64, "bar">;
* // ham: pl.Series<Utf8, "ham">;
* // }>
* > const columns = df.getColumns();
* // columns: (pl.Series<Float64, "foo"> | pl.Series<Float64, "bar"> | pl.Series<Utf8, "ham">)[]
* ```
*/
getColumns(): T[keyof T][];
/**
* Start a groupby operation.
* ___
* @param by - Column(s) to group by.
*/
groupBy(...by: ColumnSelection[]): GroupBy;
/**
* Hash and combine the rows in this DataFrame. _(Hash value is UInt64)_
* @param k0 - seed parameter
* @param k1 - seed parameter
* @param k2 - seed parameter
* @param k3 - seed parameter
*/
hashRows(k0?: number, k1?: number, k2?: number, k3?: number): Series;
hashRows(options: {
k0?: number;
k1?: number;
k2?: number;
k3?: number;
}): Series;
/**
* Get first N rows as DataFrame.
* ___
* @param length - Length of the head.
* @example
* ```
* > const df = pl.DataFrame({
* ... "foo": [1, 2, 3, 4, 5],
* ... "bar": [6, 7, 8, 9, 10],
* ... "ham": ['a', 'b', 'c', 'd','e']
* ... });
* > df.head(3)
* shape: (3, 3)
* ╭─────┬─────┬─────╮
* │ foo ┆ bar ┆ ham │
* │ --- ┆ --- ┆ --- │
* │ i64 ┆ i64 ┆ str │
* ╞═════╪═════╪═════╡
* │ 1 ┆ 6 ┆ "a" │
* ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┤
* │ 2 ┆ 7 ┆ "b" │
* ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┤
* │ 3 ┆ 8 ┆ "c" │
* ╰─────┴─────┴─────╯
* ```
*/
head(length?: number): DataFrame<T>;
/**
* Return a new DataFrame grown horizontally by stacking multiple Series to it.
* @param columns - array of Series or DataFrame to stack
* @param inPlace - Modify in place
* @example
* ```
* > const df = pl.DataFrame({
* ... "foo": [1, 2, 3],
* ... "bar": [6, 7, 8],
* ... "ham": ['a', 'b', 'c']
* ... });
* // df: pl.DataFrame<{
* // foo: pl.Series<Float64, "foo">;
* // bar: pl.Series<Float64, "bar">;
* // ham: pl.Series<Utf8, "ham">;
* // }>
* > const x = pl.Series("apple", [10, 20, 30])
* // x: pl.Series<Float64, "apple">
* > df.hstack([x])
* // pl.DataFrame<{
* // foo: pl.Series<Float64, "foo">;
* // bar: pl.Series<Float64, "bar">;
* // ham: pl.Series<Utf8, "ham">;
* // apple: pl.Series<Float64, "apple">;
* // }>
* shape: (3, 4)
* ╭─────┬─────┬─────┬───────╮
* │ foo ┆ bar ┆ ham ┆ apple │
* │ --- ┆ --- ┆ --- ┆ --- │
* │ i64 ┆ i64 ┆ str ┆ i64 │
* ╞═════╪═════╪═════╪═══════╡
* │ 1 ┆ 6 ┆ "a" ┆ 10 │
* ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌┤
* │ 2 ┆ 7 ┆ "b" ┆ 20 │
* ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌┤
* │ 3 ┆ 8 ┆ "c" ┆ 30 │
* ╰─────┴─────┴─────┴───────╯
* ```
*/
hstack<U extends Record<string, Series> = any>(columns: DataFrame<U>): DataFrame<Simplify<T & U>>;
hstack<U extends Series[]>(columns: U): DataFrame<Simplify<T & {
[K in U[number] as K["name"]]: K;
}>>;
hstack(columns: Array<Series> | DataFrame): DataFrame;
hstack(columns: Array<Series> | DataFrame, inPlace?: boolean): void;
/**
* Insert a Series at a certain column index. This operation is in place.
* @param index - Column position to insert the new `Series` column.
* @param series - `Series` to insert
*/
insertAtIdx(index: number, series: Series): void;
/**
* Interpolate intermediate values. The interpolation method is linear.
*/
interpolate(): DataFrame<T>;
/**
* Get a mask of all duplicated rows in this DataFrame.
*/
isDuplicated(): Series;
/**
* Check if the dataframe is empty
*/
isEmpty(): boolean;
/**
* Get a mask of all unique rows in this DataFrame.
*/
isUnique(): Series;
/**
* __SQL like joins.__
* @param df - DataFrame to join with.
* @param options
* @param options.leftOn - Name(s) of the left join column(s).
* @param options.rightOn - Name(s) of the right join column(s).
* @param options.on - Name(s) of the join columns in both DataFrames.
* @param options.how - Join strategy
* @param options.suffix - Suffix to append to columns with a duplicate name.
* @see {@link JoinOptions}
* @example
* ```
* > const df = pl.DataFrame({
* ... "foo": [1, 2, 3],
* ... "bar": [6.0, 7.0, 8.0],
* ... "ham": ['a', 'b', 'c']
* ... });
* > const otherDF = pl.DataFrame({
* ... "apple": ['x', 'y', 'z'],
* ... "ham": ['a', 'b', 'd']
* ... });
* > df.join(otherDF, {on: 'ham'})
* shape: (2, 4)
* ╭─────┬─────┬─────┬───────╮
* │ foo ┆ bar ┆ ham ┆ apple │
* │ --- ┆ --- ┆ --- ┆ --- │
* │ i64 ┆ f64 ┆ str ┆ str │
* ╞═════╪═════╪═════╪═══════╡
* │ 1 ┆ 6 ┆ "a" ┆ "x" │
* ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌┤
* │ 2 ┆ 7 ┆ "b" ┆ "y" │
* ╰─────┴─────┴─────┴───────╯
* ```
*/
join(other: DataFrame, options: {
on: ValueOrArray<string>;
} & Omit<JoinOptions, "leftOn" | "rightOn">): DataFrame;
join(other: DataFrame, options: {
leftOn: ValueOrArray<string>;
rightOn: ValueOrArray<string>;
} & Omit<JoinOptions, "on">): DataFrame;
join(other: DataFrame, options: {
how: "cross";
suffix?: string;
}): DataFrame;
/**
* Perform an asof join. This is similar to a left-join except that we
* match on nearest key rather than equal keys.
*
* Both DataFrames must be sorted by the asofJoin key.
*
* For each row in the left DataFrame:
* - A "backward" search selects the last row in the right DataFrame whose
* 'on' key is less than or equal to the left's key.
*
* - A "forward" search selects the first row in the right DataFrame whose
* 'on' key is greater than or equal to the left's key.
*
* - A "nearest" search selects the last row in the right DataFrame whose value
* is nearest to the left's key. String keys are not currently supported for a
* nearest search.
*
* The default is "backward".
*
* @param other DataFrame to join with.
* @param options.leftOn Join column of the left DataFrame.
* @param options.rightOn Join column of the right DataFrame.
* @param options.on Join column of both DataFrames. If set, `leftOn` and `rightOn` should be undefined.
* @param options.byLeft join on these columns before doing asof join
* @param options.byRight join on these columns before doing asof join
* @param options.strategy One of 'forward', 'backward', 'nearest'
* @param options.suffix Suffix to append to columns with a duplicate name.
* @param options.tolerance
* Numeric tolerance. By setting this the join will only be done if the near keys are within this distance.
* If an asof join is done on columns of dtype "Date", "Datetime" you
* use the following string language:
*
* - 1ns *(1 nanosecond)*
* - 1us *(1 microsecond)*
* - 1ms *(1 millisecond)*
* - 1s *(1 second)*
* - 1m *(1 minute)*
* - 1h *(1 hour)*
* - 1d *(1 day)*
* - 1w *(1 week)*
* - 1mo *(1 calendar month)*
* - 1y *(1 calendar year)*
* - 1i *(1 index count)*
*
* Or combine them:
* - "3d12h4m25s" # 3 days, 12 hours, 4 minutes, and 25 seconds
* @param options.allowParallel Allow the physical plan to optionally evaluate the computation of both DataFrames up to the join in parallel.
* @param options.forceParallel Force the physical plan to evaluate the computation of both DataFrames up to the join in parallel.
*
* @example
* ```
* > const gdp = pl.DataFrame({
* ... date: [
* ... new Date('2016-01-01'),
* ... new Date('2017-01-01'),
* ... new Date('2018-01-01'),
* ... new Date('2019-01-01'),
* ... ], // note record date: Jan 1st (sorted!)
* ... gdp: [4164, 4411, 4566, 4696],
* ... })
* > const population = pl.DataFrame({
* ... date: [
* ... new Date('2016-05-12'),
* ... new Date('2017-05-12'),
* ... new Date('2018-05-12'),
* ... new Date('2019-05-12'),
* ... ], // note record date: May 12th (sorted!)
* ... "population": [82.19, 82.66, 83.12, 83.52],
* ... })
* > population.joinAsof(
* ... gdp,
* ... {leftOn:"date", rightOn:"date", strategy:"backward"}
* ... )
* shape: (4, 3)
* ┌─────────────────────┬────────────┬──────┐
* │ date ┆ population ┆ gdp │
* │ --- ┆ --- ┆ --- │
* │ datetime[μs] ┆ f64 ┆ i64 │
* ╞═════════════════════╪════════════╪══════╡
* │ 2016-05-12 00:00:00 ┆ 82.19 ┆ 4164 │
* ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌┤
* │ 2017-05-12 00:00:00 ┆ 82.66 ┆ 4411 │
* ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌┤
* │ 2018-05-12 00:00:00 ┆ 83.12 ┆ 4566 │
* ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌┤
* │ 2019-05-12 00:00:00 ┆ 83.52 ┆ 4696 │
* └─────────────────────┴────────────┴──────┘
* ```
*/
joinAsof(other: DataFrame, options: {
leftOn?: string;
rightOn?: string;
on?: string;
byLeft?: string | string[];
byRight?: string | string[];
by?: string | string[];
strategy?: "backward" | "forward" | "nearest";
suffix?: string;
tolerance?: number | string;
allowParallel?: boolean;
forceParallel?: boolean;
}): DataFrame;
lazy(): LazyDataFrame;
/**
* Get first N rows as DataFrame.
* @see {@link head}
*/
limit(length?: number): DataFrame<T>;
map<ReturnT>(func: (row: any[], i: number, arr: any[][]) => ReturnT): ReturnT[];
/**
* Aggregate the columns of this DataFrame to their maximum value.
* ___
* @param axis - either 0 or 1
* @example
* ```
* > const df = pl.DataFrame({
* ... "foo": [1, 2, 3],
* ... "bar": [6, 7, 8],
* ... "ham": ['a', 'b', 'c']
* ... });
* > df.max()
* shape: (1, 3)
* ╭─────┬─────┬──────╮
* │ foo ┆ bar ┆ ham │
* │ --- ┆ --- ┆ --- │
* │ i64 ┆ i64 ┆ str │
* ╞═════╪═════╪══════╡
* │ 3 ┆ 8 ┆ null │
* ╰─────┴─────┴──────╯
* ```
*/
max(): DataFrame<T>;
max(axis: 0): DataFrame<T>;
max(axis: 1): Series;
/**
* Aggregate the columns of this DataFrame to their mean value.
* ___
*
* @param axis - either 0 or 1
* @param nullStrategy - this argument is only used if axis == 1
*/
mean(): DataFrame<T>;
mean(axis: 0): DataFrame<T>;
mean(axis: 1): Series;
mean(axis: 1, nullStrategy?: "ignore" | "propagate"): Series;
/**
* Aggregate the columns of this DataFrame to their median value.
* ___
* @example
* ```
* > const df = pl.DataFrame({
* ... "foo": [1, 2, 3],
* ... "bar": [6, 7, 8],
* ... "ham": ['a', 'b', 'c']
* ... });
* > df.median();
* shape: (1, 3)
* ╭─────┬─────┬──────╮
* │ foo ┆ bar ┆ ham │
* │ --- ┆ --- ┆ --- │
* │ f64 ┆ f64 ┆ str │
* ╞═════╪═════╪══════╡
* │ 2 ┆ 7 ┆ null │
* ╰─────┴─────┴──────╯
* ```
*/
median(): DataFrame<T>;
/**
* Unpivot a DataFrame from wide to long format.
* ___
*
* @param idVars - Columns to use as identifier variables.
* @param valueVars - Values to use as value variables.
* @param variableName - Name to give to the `variable` column. Defaults to "variable"
* @param valueName - Name to give to the `value` column. Defaults to "value"
* @param streamable - Allow this node to run in the streaming engine.
If this runs in streaming, the output of the unpivot operation will not have a stable ordering.
* @example
* ```
* > const df1 = pl.DataFrame({
* ... 'id': [1],
* ... 'asset_key_1': ['123'],
* ... 'asset_key_2': ['456'],
* ... 'asset_key_3': ['abc'],
* ... });
* > df1.unpivot('id', ['asset_key_1', 'asset_key_2', 'asset_key_3']);
* shape: (3, 3)
* ┌─────┬─────────────┬───────┐
* │ id ┆ variable ┆ value │
* │ --- ┆ --- ┆ --- │
* │ f64 ┆ str ┆ str │
* ╞═════╪═════════════╪═══════╡
* │ 1 ┆ asset_key_1 ┆ 123 │
* ├╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┤
* │ 1 ┆ asset_key_2 ┆ 456 │
* ├╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┤
* │ 1 ┆ asset_key_3 ┆ abc │
* └─────┴─────────────┴───────┘
* ```
*/
unpivot(idVars: ColumnSelection, valueVars: ColumnSelection): DataFrame;
/**
* Aggregate the columns of this DataFrame to their minimum value.
* ___
* @param axis - either 0 or 1
* @example
* ```
* > const df = pl.DataFrame({
* ... "foo": [1, 2, 3],
* ... "bar": [6, 7, 8],
* ... "ham": ['a', 'b', 'c']
* ... });
* > df.min();
* shape: (1, 3)
* ╭─────┬─────┬──────╮
* │ foo ┆ bar ┆ ham │
* │ --- ┆ --- ┆ --- │
* │ i64 ┆ i64 ┆ str │
* ╞═════╪═════╪══════╡
* │ 1 ┆ 6 ┆ null │
* ╰─────┴─────┴──────╯
* ```
*/
min(): DataFrame<T>;
min(axis: 0): DataFrame<T>;
min(axis: 1): Series;
/**
* Get number of chunks used by the ChunkedArrays of this DataFrame.
*/
nChunks(): number;
/**
* Create a new DataFrame that shows the null counts per column.
* ___
* @example
* ```
* > const df = pl.DataFrame({
* ... "foo": [1, null, 3],
* ... "bar": [6, 7, null],
* ... "ham": ['a', 'b', 'c']
* ... });
* > df.nullCount();
* shape: (1, 3)
* ┌─────┬─────┬─────┐
* │ foo ┆ bar ┆ ham │
* │ --- ┆ --- ┆ --- │
* │ u32 ┆ u32 ┆ u32 │
* ╞═════╪═════╪═════╡
* │ 1 ┆ 1 ┆ 0 │
* └─────┴─────┴─────┘
* ```
*/
nullCount(): DataFrame<{
[K in keyof T]: Series<JsToDtype<number>, K & string>;
}>;
partitionBy(cols: string | string[], stable?: boolean, includeKey?: boolean): DataFrame<T>[];
partitionBy<T>(cols: string | string[], stable: boolean, includeKey: boolean, mapFn: (df: DataFrame) => T): T[];
/**
* Create a spreadsheet-style pivot table as a DataFrame.
*
* @param values The existing column(s) of values which will be moved under the new columns from index. If an
* aggregation is specified, these are the values on which the aggregation will be computed.
* If None, all remaining columns not specified on `on` and `index` will be used.
* At least one of `index` and `values` must be specified.
* @param options.index The column(s) that remain from the input to the output. The output DataFrame will have one row
* for each unique combination of the `index`'s values.
* If None, all remaining columns not specified on `on` and `values` will be used. At least one
* of `index` and `values` must be specified.
* @param options.on The column(s) whose values will be used as the new columns of the output DataFrame.
* @param options.aggregateFunc
* Any of:
* - "sum"
* - "max"
* - "min"
* - "mean"
* - "median"
* - "first"
* - "last"
* - "count"
* Defaults to "first"
* @param options.maintainOrder Sort the grouped keys so that the output order is predictable.
* @param options.sortColumns Sort the transposed columns by name. Default is by order of discovery.
* @param options.separator Used as separator/delimiter in generated column names.
* @example
* ```
* > const df = pl.DataFrame(
* ... {
* ... "foo": ["one", "one", "one", "two", "two", "two"],
* ... "bar": ["A", "B", "C", "A", "B", "C"],
* ... "baz": [1, 2, 3, 4, 5, 6],
* ... }
* ... );
* > df.pivot("baz", {index:"foo", on:"bar"});
* shape: (2, 4)
* ┌─────┬─────┬─────┬─────┐
* │ foo ┆ A ┆ B ┆ C │
* │ --- ┆ --- ┆ --- ┆ --- │
* │ str ┆ f64 ┆ f64 ┆ f64 │
* ╞═════╪═════╪═════╪═════╡
* │ one ┆ 1 ┆ 2 ┆ 3 │
* ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┤
* │ two ┆ 4 ┆ 5 ┆ 6 │
* └─────┴─────┴─────┴─────┘
* ```
*/
pivot(values: string | string[], options: {
index: string | string[];
on: string | string[];
aggregateFunc?: "sum" | "max" | "min" | "mean" | "median" | "first" | "last" | "count" | Expr;
maintainOrder?: boolean;
sortColumns?: boolean;
separator?: string;
}): DataFrame;
pivot(options: {
values: string | string[];
index: string | string[];
on: string | string[];
aggregateFunc?: "sum" | "max" | "min" | "mean" | "median" | "first" | "last" | "count" | Expr;
maintainOrder?: boolean;
sortColumns?: boolean;
separator?: string;
}): DataFrame;
/**
* Aggregate the columns of this DataFrame to their quantile value.
* @example
* ```
* > const df = pl.DataFrame({
* ... "foo": [1, 2, 3],
* ... "bar": [6, 7, 8],
* ... "ham": ['a', 'b', 'c']
* ... });
* > df.quantile(0.5);
* shape: (1, 3)
* ╭─────┬─────┬──────╮
* │ foo ┆ bar ┆ ham │
* │ --- ┆ --- ┆ --- │
* │ i64 ┆ i64 ┆ str │
* ╞═════╪═════╪══════╡
* │ 2 ┆ 7 ┆ null │
* ╰─────┴─────┴──────╯
* ```
*/
quantile(quantile: number): DataFrame<T>;
/**
* __Rechunk the data in this DataFrame to a contiguous allocation.__
*
* This will make sure all subsequent operations have optimal and predictable performance.
*/
rechunk(): DataFrame<T>;
/**
* __Rename column names.__
* ___
*
* @param mapping - Key value pairs that map from old name to new name.
* @example
* ```
* > const df = pl.DataFrame({
* ... "foo": [1, 2, 3],
* ... "bar": [6, 7, 8],
* ... "ham": ['a', 'b', 'c']
* ... });
* // df: pl.DataFrame<{
* // foo: pl.Series<Float64, "foo">;
* // bar: pl.Series<Float64, "bar">;
* // ham: pl.Series<Utf8, "ham">;
* // }>
* > df.rename({"foo": "apple"});
* ╭───────┬─────┬─────╮
* │ apple ┆ bar ┆ ham │
* │ --- ┆ --- ┆ --- │
* │ i64 ┆ i64 ┆ str │
* ╞═══════╪═════╪═════╡
* │ 1 ┆ 6 ┆ "a" │
* ├╌╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┤
* │ 2 ┆ 7 ┆ "b" │
* ├╌╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┤
* │ 3 ┆ 8 ┆ "c" │
* ╰───────┴─────┴─────╯
* ```
*/
rename<const U extends Partial<Record<keyof T, string>>>(mapping: U): DataFrame<{
[K in keyof T as U[K] extends string ? U[K] : K]: T[K];
}>;
rename(mapping: Record<string, string>): DataFrame;
/**
* Replace a column at an index location.
*
* @warning typescript cannot encode type mutation,
* so the type of the DataFrame will be incorrect. cast the type of dataframe manually.
* ___
* @param index - Column index
* @param newColumn - New column to insert
* @example
* ```
* > const df: pl.DataFrame = pl.DataFrame({
* ... "foo": [1, 2, 3],
* ... "bar": [6, 7, 8],
* ... "ham": ['a', 'b', 'c']
* ... });
* // df: pl.DataFrame<{
* // foo: pl.Series<Float64, "foo">;
* // bar: pl.Series<Float64, "bar">;
* // ham: pl.Series<Utf8, "ham">;
* // }>
* > const x = pl.Series("apple", [10, 20, 30]);
* // x: pl.Series<Float64, "apple">
* > df.replaceAtIdx(0, x);
* // df: pl.DataFrame<{
* // foo: pl.Series<Float64, "foo">; <- notice how the type is still the same!
* // bar: pl.Series<Float64, "bar">;
* // ham: pl.Series<Utf8, "ham">;
* // }>
* shape: (3, 3)
* ╭───────┬─────┬─────╮
* │ apple ┆ bar ┆ ham │
* │ --- ┆ --- ┆ --- │
* │ i64 ┆ i64 ┆ str │
* ╞═══════╪═════╪═════╡
* │ 10 ┆ 6 ┆ "a" │
* ├╌╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┤
* │ 20 ┆ 7 ┆ "b" │
* ├╌╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┤
* │ 30 ┆ 8 ┆ "c" │
* ╰───────┴─────┴─────╯
* ```
*/
replaceAtIdx(index: number, newColumn: Series): void;
/**
* Get a row as Array
* @param index - row index
* @example
* ```
* > const df = pl.DataFrame({
* ... "foo": [1, 2, 3],
* ... "bar": [6, 7, 8],
* ... "ham": ['a', 'b', 'c']
* ... });
* > df.row(2)
* [3, 8, 'c']
* ```
*/
row(index: number): Array<any>;
/**
* Convert columnar data to rows as arrays
*/
rows(): Array<Array<any>>;
/**
* @example
* ```
* > const df: pl.DataFrame = pl.DataFrame({
* ... "foo": [1, 2, 3],
* ... "bar": [6, 7, 8],
* ... "ham": ['a', 'b', 'c']
* ... });
* // df: pl.DataFrame<{
* // foo: pl.Series<Float64, "foo">;
* // bar: pl.Series<Float64, "bar">;
* // ham: pl.Series<Utf8, "ham">;
* // }>
* > df.schema
* // {
* // foo: Float64;
* // bar: Float64;
* // ham: Utf8;
* // }
* ```
*/
get schema(): {
[K in keyof T]: T[K]["dtype"];
};
/**
* Select columns from this DataFrame.
* ___
* @param columns - Column or columns to select.
* @example
* ```
* > const df = pl.DataFrame({
* ... "foo": [1, 2, 3],
* ... "bar": [6, 7, 8],
* ... "ham": ['a', 'b', 'c']
* ... });
* // df: pl.DataFrame<{
* // foo: pl.Series<Float64, "foo">;
* // bar: pl.Series<Float64, "bar">;
* // ham: pl.Series<Utf8, "ham">;
* // }>
* > df.select('foo');
* // pl.DataFrame<{
* // foo: pl.Series<Float64, "foo">;
* // }>
* shape: (3, 1)
* ┌─────┐
* │ foo │
* │ --- │
* │ i64 │
* ╞═════╡
* │ 1 │
* ├╌╌╌╌╌┤
* │ 2 │
* ├╌╌╌╌╌┤
* │ 3 │
* └─────┘
* ```
*/
select<U extends keyof T>(...columns: U[]): DataFrame<{
[P in U]: T[P];
}>;
select(...columns: ExprOrString[]): DataFrame<T>;
/**
* Shift the values by a given period and fill the parts that will be empty due to this operation
* with `Nones`.
* ___
* @param periods - Number of places to shift (may be negative).
* @example
* ```
* > const df = pl.DataFrame({
* ... "foo": [1, 2, 3],
* ... "bar": [6, 7, 8],
* ... "ham": ['a', 'b', 'c']
* ... });
* > df.shift(1);
* shape: (3, 3)
* ┌──────┬──────┬──────┐
* │ foo ┆ bar ┆ ham │
* │ --- ┆ --- ┆ --- │
* │ i64 ┆ i64 ┆ str │
* ╞══════╪══════╪══════╡
* │ null ┆ null ┆ null │
* ├╌╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌┤
* │ 1 ┆ 6 ┆ "a" │
* ├╌╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌┤
* │ 2 ┆ 7 ┆ "b" │
* └──────┴──────┴──────┘
* > df.shift(-1)
* shape: (3, 3)
* ┌──────┬──────┬──────┐
* │ foo ┆ bar ┆ ham │
* │ --- ┆ --- ┆ --- │
* │ i64 ┆ i64 ┆ str │
* ╞══════╪══════╪══════╡
* │ 2 ┆ 7 ┆ "b" │
* ├╌╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌┤
* │ 3 ┆ 8 ┆ "c" │
* ├╌╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌┤
* │ null ┆ null ┆ null │
* └──────┴──────┴──────┘
* ```
*/
shift(periods: number): DataFrame<T>;
shift({ periods }: {
periods: number;
}): DataFrame<T>;
/**
* Shift the values by a given period and fill the parts that will be empty due to this operation
* with the result of the `fill_value` expression.
* ___
* @param opts
* @param opts.n - Number of places to shift (may be negative).
* @param opts.fillValue - fill null values with this value.
* @example
* ```
* > const df = pl.DataFrame({
* ... "foo": [1, 2, 3],
* ... "bar": [6, 7, 8],
* ... "ham": ['a', 'b', 'c']
* ... });
* > df.shiftAndFill({n:1, fill_value:0});
* shape: (3, 3)
* ┌─────┬─────┬─────┐
* │ foo ┆ bar ┆ ham │
* │ --- ┆ --- ┆ --- │
* │ i64 ┆ i64 ┆ str │
* ╞═════╪═════╪═════╡
* │ 0 ┆ 0 ┆ "0" │
* ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┤
* │ 1 ┆ 6 ┆ "a" │
* ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┤
* │ 2 ┆ 7 ┆ "b" │
* └─────┴─────┴─────┘
* ```
*/
shiftAndFill(n: number, fillValue: number): DataFrame<T>;
shiftAndFill({ n, fillValue, }: {
n: number;
fillValue: number;
}): DataFrame<T>;
/**
* Shrink memory usage of this DataFrame to fit the exact capacity needed to hold the data.
*/
shrinkToFit(): DataFrame<T>;
shrinkToFit(inPlace: true): void;
shrinkToFit({ inPlace }: {
inPlace: true;
}): void;
/**
* Slice this DataFrame over the rows direction.
* ___
* @param opts
* @param opts.offset - Offset index.
* @param opts.length - Length of the slice
* @example
* ```
* > const df = pl.DataFrame({
* ... "foo": [1, 2, 3],
* ... "bar": [6.0, 7.0, 8.0],
* ... "ham": ['a', 'b', 'c']
* ... });
* > df.slice(1, 2); // Alternatively `df.slice({offset:1, length:2})`
* shape: (2, 3)
* ┌─────┬─────┬─────┐
* │ foo ┆ bar ┆ ham │
* │ --- ┆ --- ┆ --- │
* │ i64 ┆ i64 ┆ str │
* ╞═════╪═════╪═════╡
* │ 2 ┆ 7 ┆ "b" │
* ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┤
* │ 3 ┆ 8 ┆ "c" │
* └─────┴─────┴─────┘
* ```
*/
slice({ offset, length }: {
offset: number;
length: number;
}): DataFrame<T>;
slice(offset: number, length: number): DataFrame<T>;
/**
* Sort the DataFrame by column.
* ___
* @param by - Column(s) to sort by. Accepts expression input, including selectors. Strings are parsed as column names.
* @param reverse - Reverse/descending sort.
* @param descending - Sort in descending order. When sorting by multiple columns, can be specified per column by passing a sequence of booleans.
* @param nullsLast - Place null values last; can specify a single boolean applying to all columns or a sequence of booleans for per-column control.
* @param maintainOrder - Whether the order should be maintained if elements are equal.
*/
sort(by: ColumnsOrExpr, descending?: boolean, nullsLast?: boolean, maintainOrder?: boolean): DataFrame<T>;
sort({ by, reverse, // deprecated
maintainOrder, }: {
by: ColumnsOrExpr;
/** @deprecated *since 0.16.0* @use descending */
reverse?: boolean;
nullsLast?: boolean;
maintainOrder?: boolean;
}): DataFrame<T>;
sort({ by, descending, maintainOrder, }: {
by: ColumnsOrExpr;
descending?: boolean;
nullsLast?: boolean;
maintainOrder?: boolean;
}): DataFrame<T>;
/**
* Aggregate the columns of this DataFrame to their standard deviation value.
* ___
* @example
* ```
* > const df = pl.DataFrame({
* ... "foo": [1, 2, 3],
* ... "bar": [6, 7, 8],
* ... "ham": ['a', 'b', 'c']
* ... });
* > df.std();
* shape: (1, 3)
* ╭─────┬─────┬──────╮
* │ foo ┆ bar ┆ ham │
* │ --- ┆ --- ┆ --- │
* │ f64 ┆ f64 ┆ str │
* ╞═════╪═════╪══════╡
* │ 1 ┆ 1 ┆ null │
* ╰─────┴─────┴──────╯
* ```
*/
std(): DataFrame<T>;
/**
* Aggregate the columns of this DataFrame to their mean value.
* ___
*
* @param axis - either 0 or 1
* @param nullStrategy - this argument is only used if axis == 1
*/
sum(): DataFrame<T>;
sum(axis: 0): DataFrame<T>;
sum(axis: 1): Series;
sum(axis: 1, nullStrategy?: "ignore" | "propagate"): Series;
/**
* @example
* ```
* > const df = pl.DataFrame({
* ... "letters": ["c", "c", "a", "c", "a", "b"],
* ... "nrs": [1, 2, 3, 4, 5, 6]
* ... });
* > console.log(df.toString());
* shape: (6, 2)
* ╭─────────┬─────╮
* │ letters ┆ nrs │
* │ --- ┆ --- │
* │ str ┆ i64 │
* ╞═════════╪═════╡
* │ "c" ┆ 1 │
* ├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┤
* │ "c" ┆ 2 │
* ├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┤
* │ "a" ┆ 3 │
* ├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┤
* │ "c" ┆ 4 │
* ├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┤
* │ "a" ┆ 5 │
* ├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┤
* │ "b" ┆ 6 │
* ╰─────────┴─────╯
* > df.groupby("letters")
* ... .tail(2)
* ... .sort("letters")
* shape: (5, 2)
* ╭─────────┬─────╮
* │ letters ┆ nrs │
* │ --- ┆ --- │
* │ str ┆ i64 │
* ╞═════════╪═════╡
* │ "a" ┆ 3 │
* ├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┤
* │ "a" ┆ 5 │
* ├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┤
* │ "b" ┆ 6 │
* ├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┤
* │ "c" ┆ 2 │
* ├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┤
* │ "c" ┆ 4 │
* ╰─────────┴─────╯
* ```
*/
tail(length?: number): DataFrame<T>;
/**
* Converts dataframe object into row oriented javascript objects
* @example
* ```
* > df.toRecords()
* [
* {"foo":1.0,"bar":"a"},
* {"foo":2.0,"bar":"b"},
* {"foo":3.0,"bar":"c"}
* ]
* ```
* @category IO
*/
toRecords(): {
[K in keyof T]: DTypeToJs<T[K]["dtype"]> | null;
}[];
/**
* compat with `JSON.stringify`
* @ca