nodejs-polars
Version:
Polars: Blazingly fast DataFrames in Rust, Python, Node.js, R and SQL
539 lines (538 loc) • 21.6 kB
TypeScript
import { DataType } from "../../datatypes";
import type { StringFunctions } from "../../shared_traits";
import { Expr } from "../expr";
/**
* String functions for Lazy dataframes
*/
export interface ExprString extends StringFunctions<Expr> {
/**
* Vertically concat the values in the Expression to a single string value.
* @example
* ```
* >>> df = pl.DataFrame({"foo": [1, null, 2]})
* >>> df = df.select(pl.col("foo").str.concat("-"))
* >>> df
* shape: (1, 1)
* ┌──────────┐
* │ foo │
* │ --- │
* │ str │
* ╞══════════╡
* │ 1-null-2 │
* └──────────┘
* ```
*/
concat(delimiter: string, ignoreNulls?: boolean): Expr;
/**
* Check if strings in Expression contain a substring that matches a pattern.
* @param pat A valid regular expression pattern, compatible with the `regex crate
* @param literal Treat `pattern` as a literal string, not as a regular expression.
* @param strict Raise an error if the underlying pattern is not a valid regex, otherwise mask out with a null value.
* @returns Boolean mask
* @example
* ```
* const df = pl.DataFrame({"txt": ["Crab", "cat and dog", "rab$bit", null]})
* df.select(
* ... pl.col("txt"),
* ... pl.col("txt").str.contains("cat|bit").alias("regex"),
* ... pl.col("txt").str.contains("rab$", true).alias("literal"),
* ... )
* shape: (4, 3)
* ┌─────────────┬───────┬─────────┐
* │ txt ┆ regex ┆ literal │
* │ --- ┆ --- ┆ --- │
* │ str ┆ bool ┆ bool │
* ╞═════════════╪═══════╪═════════╡
* │ Crab ┆ false ┆ false │
* │ cat and dog ┆ true ┆ false │
* │ rab$bit ┆ true ┆ true │
* │ null ┆ null ┆ null │
* └─────────────┴───────┴─────────┘
* ```
*/
contains(pat: string | RegExp | Expr, literal?: boolean, strict?: boolean): Expr;
/**
* Decodes a value in Expression using the provided encoding
* @param encoding - hex | base64
* @param strict - how to handle invalid inputs
*
* - true: method will throw error if unable to decode a value
* - false: unhandled values will be replaced with `null`
* @example
* ```
* >>> df = pl.DataFrame({"strings": ["666f6f", "626172", null]})
* >>> df.select(col("strings").str.decode("hex"))
* shape: (3, 1)
* ┌─────────┐
* │ strings │
* │ --- │
* │ str │
* ╞═════════╡
* │ foo │
* ├╌╌╌╌╌╌╌╌╌┤
* │ bar │
* ├╌╌╌╌╌╌╌╌╌┤
* │ null │
* └─────────┘
* ```
*/
decode(encoding: "hex" | "base64", strict?: boolean): Expr;
decode(options: {
encoding: "hex" | "base64";
strict?: boolean;
}): Expr;
/**
* Encodes a value in Expression using the provided encoding
* @param encoding - hex | base64
* @example
* ```
* >>> df = pl.DataFrame({"strings", ["foo", "bar", null]})
* >>> df.select(col("strings").str.encode("hex"))
* shape: (3, 1)
* ┌─────────┐
* │ strings │
* │ --- │
* │ str │
* ╞═════════╡
* │ 666f6f │
* ├╌╌╌╌╌╌╌╌╌┤
* │ 626172 │
* ├╌╌╌╌╌╌╌╌╌┤
* │ null │
* └─────────┘
* ```
*/
encode(encoding: "hex" | "base64"): Expr;
/** Check if string values in Expression ends with a substring.
* @param suffix - Suffix substring or expression
* @example
* ```
* >>> df = pl.DataFrame({"fruits": ["apple", "mango", None]})
* >>> df.withColumns(
* ... pl.col("fruits").str.endsWith("go").alias("has_suffix"),
* ... )
* shape: (3, 2)
* ┌────────┬────────────┐
* │ fruits ┆ has_suffix │
* │ --- ┆ --- │
* │ str ┆ bool │
* ╞════════╪════════════╡
* │ apple ┆ false │
* │ mango ┆ true │
* │ null ┆ null │
* └────────┴────────────┘
*
* >>> df = pl.DataFrame(
* ... {"fruits": ["apple", "mango", "banana"], "suffix": ["le", "go", "nu"]}
* ... )
* >>> df.withColumns(
* ... pl.col("fruits").str.endsWith(pl.col("suffix")).alias("has_suffix"),
* ... )
* shape: (3, 3)
* ┌────────┬────────┬────────────┐
* │ fruits ┆ suffix ┆ has_suffix │
* │ --- ┆ --- ┆ --- │
* │ str ┆ str ┆ bool │
* ╞════════╪════════╪════════════╡
* │ apple ┆ le ┆ true │
* │ mango ┆ go ┆ true │
* │ banana ┆ nu ┆ false │
* └────────┴────────┴────────────┘
*
* Using `ends_with` as a filter condition:
*
* >>> df.filter(pl.col("fruits").str.endsWith("go"))
* shape: (1, 2)
* ┌────────┬────────┐
* │ fruits ┆ suffix │
* │ --- ┆ --- │
* │ str ┆ str │
* ╞════════╪════════╡
* │ mango ┆ go │
* └────────┴────────┘
* ```
*/
endsWith(suffix: string | Expr): Expr;
/**
* Extract the target capture group from provided patterns.
* @param pattern A valid regex pattern
* @param groupIndex Index of the targeted capture group.
* Group 0 mean the whole pattern, first group begin at index 1
* Default to the first capture group
* @returns Utf8 array. Contain null if original value is null or regex capture nothing.
* @example
* ```
* > df = pl.DataFrame({
* ... 'a': [
* ... 'http://vote.com/ballon_dor?candidate=messi&ref=polars',
* ... 'http://vote.com/ballon_dor?candidat=jorginho&ref=polars',
* ... 'http://vote.com/ballon_dor?candidate=ronaldo&ref=polars'
* ... ]})
* > df.select(pl.col('a').str.extract(/candidate=(\w+)/, 1))
* shape: (3, 1)
* ┌─────────┐
* │ a │
* │ --- │
* │ str │
* ╞═════════╡
* │ messi │
* ├╌╌╌╌╌╌╌╌╌┤
* │ null │
* ├╌╌╌╌╌╌╌╌╌┤
* │ ronaldo │
* └─────────┘
* ```
*/
extract(pattern: string | RegExp | Expr, groupIndex: number): Expr;
/**
* Parse string values in Expression as JSON.
* Throw errors if encounter invalid JSON strings.
* @param dtype The dtype to cast the extracted value to. If None, the dtype will be inferred from the JSON value.
* @param inferSchemaLength The maximum number of rows to scan for schema inference.
* @returns DF with struct
* @example
* ```
* >>> df = pl.DataFrame( {json: ['{"a":1, "b": true}', null, '{"a":2, "b": false}']} )
* >>> df.select(pl.col("json").str.jsonDecode())
* shape: (3, 1)
* ┌─────────────┐
* │ json │
* │ --- │
* │ struct[2] │
* ╞═════════════╡
* │ {1,true} │
* │ {null,null} │
* │ {2,false} │
* └─────────────┘
* See Also
* ----------
* jsonPathMatch : Extract the first match of json string with provided JSONPath expression.
*/
jsonDecode(dtype?: DataType, inferSchemaLength?: number): Expr;
/**
* Extract the first match of json string in Expression with provided JSONPath expression.
* Throw errors if encounter invalid json strings.
* All return value will be casted to Utf8 regardless of the original value.
* @see https://goessner.net/articles/JsonPath/
* @param pat - A valid JSON path query string
* @returns Utf8 array. Contain null if original value is null or the `jsonPath` return nothing.
* @example
* ```
* >>> df = pl.DataFrame({
* ... 'json_val': [
* ... '{"a":"1"}',
* ... null,
* ... '{"a":2}',
* ... '{"a":2.1}',
* ... '{"a":true}'
* ... ]
* ... })
* >>> df.select(pl.col('json_val').str.jsonPathMatch('$.a')
* shape: (5, 1)
*┌──────────┐
*│ json_val │
*│ --- │
*│ str │
*╞══════════╡
*│ 1 │
*│ null │
*│ 2 │
*│ 2.1 │
*│ true │
*└──────────┘
* ```
*/
jsonPathMatch(pat: string): Expr;
/** Get number of chars of the string values in Expression.
* ```
* df = pl.DataFrame({"a": ["Café", "345", "東京", null]})
* df.withColumns(
* pl.col("a").str.lengths().alias("n_chars"),
* )
* shape: (4, 3)
* ┌──────┬─────────┬─────────┐
* │ a ┆ n_chars ┆ n_bytes │
* │ --- ┆ --- ┆ --- │
* │ str ┆ u32 ┆ u32 │
* ╞══════╪═════════╪═════════╡
* │ Café ┆ 4 ┆ 5 │
* │ 345 ┆ 3 ┆ 3 │
* │ 東京 ┆ 2 ┆ 6 │
* │ null ┆ null ┆ null │
* └──────┴─────────┴─────────┘
* ```
*/
lengths(): Expr;
/** Remove leading whitespace of the string values in Expression. */
lstrip(): Expr;
/** Replace first match with a string value in Expression.
* @param pattern - A valid regex pattern, string or expression
* @param value Substring or expression to replace.
* @param literal Treat pattern as a literal string.
* Note: pattern as expression is not yet supported by polars
* @example
* ```
* df = pl.DataFrame({"cost": ["#12.34", "#56.78"], "text": ["123abc", "abc456"]})
* df = df.withColumns(
* pl.col("cost").str.replace(/#(\d+)/, "$$$1"),
* pl.col("text").str.replace("ab", "-")
* pl.col("text").str.replace("abc", pl.col("cost")).alias("expr")
* );
* shape: (2, 2)
* ┌────────┬───────┬───────────┐
* │ cost ┆ text │ expr │
* │ --- ┆ --- │ --- │
* │ str ┆ str │ str │
* ╞════════╪═══════╪═══════════╡
* │ $12.34 ┆ 123-c │ 123#12.34 │
* │ $56.78 ┆ -c456 │ #56.78456 │
* └────────┴───────┴───────────┘
* ```
*/
replace(pattern: string | RegExp | Expr, value: string | Expr, literal?: boolean, n?: number): Expr;
/** Replace all regex matches with a string value in Expression.
* @param pattern - A valid regex pattern, string or expression
* @param value Substring or expression to replace.
* @param literal Treat pattern as a literal string.
* Note: pattern as expression is not yet supported by polars
* @example
* ```
* df = df = pl.DataFrame({"weather": ["Rainy", "Sunny", "Cloudy", "Snowy"], "text": ["abcabc", "123a123", null, null]})
* df = df.withColumns(
* pl.col("weather").str.replaceAll(/foggy|rainy/i, "Sunny"),
* pl.col("text").str.replaceAll("a", "-")
* )
* shape: (4, 2)
* ┌─────────┬─────────┐
* │ weather ┆ text │
* │ --- ┆ --- │
* │ str ┆ str │
* ╞═════════╪═════════╡
* │ Sunny ┆ -bc-bc │
* │ Sunny ┆ 123-123 │
* │ Cloudy ┆ null │
* │ Snowy ┆ null │
* └─────────┴─────────┘
* ```
*/
replaceAll(pattern: string | RegExp | Expr, value: string | Expr, literal?: boolean): Expr;
/** Modify the string in Expression to their lowercase equivalent. */
toLowerCase(): Expr;
/** Modify the string in Expression to their uppercase equivalent. */
toUpperCase(): Expr;
/** Remove trailing whitespace. */
rstrip(): Expr;
/**
* Add a leading fillChar to a string in Expression until string length is reached.
* If string is longer or equal to given length no modifications will be done
* @param {number} length - of the final string
* @param {string} fillChar - that will fill the string.
* If a string longer than 1 character is provided only the first character will be used
* @example
* ```
* > df = pl.DataFrame({
* ... 'foo': [
* ... "a",
* ... "b",
* ... "LONG_WORD",
* ... "cow"
* ... ]})
* > df.select(pl.col('foo').str.padStart("_", 3)
* shape: (4, 1)
* ┌──────────┐
* │ a │
* │ -------- │
* │ str │
* ╞══════════╡
* │ __a │
* ├╌╌╌╌╌╌╌╌╌╌┤
* │ __b │
* ├╌╌╌╌╌╌╌╌╌╌┤
* │ LONG_WORD│
* ├╌╌╌╌╌╌╌╌╌╌┤
* │ cow │
* └──────────┘
* ```
*/
padStart(length: number, fillChar: string): Expr;
/**
* Add leading "0" to a string until string length is reached.
* If string is longer or equal to given length no modifications will be done
* @param {number} length - of the final string
* @see {@link padStart}
* @example
* ```
* > df = pl.DataFrame({
* ... 'foo': [
* ... "a",
* ... "b",
* ... "LONG_WORD",
* ... "cow"
* ... ]})
* > df.select(pl.col('foo').str.justify(3)
* shape: (4, 1)
* ┌──────────┐
* │ a │
* │ -------- │
* │ str │
* ╞══════════╡
* │ 00a │
* ├╌╌╌╌╌╌╌╌╌╌┤
* │ 00b │
* ├╌╌╌╌╌╌╌╌╌╌┤
* │ LONG_WORD│
* ├╌╌╌╌╌╌╌╌╌╌┤
* │ cow │
* └──────────┘
* ```
*/
zFill(length: number | Expr): Expr;
/**
* Add a trailing fillChar to a string until string length is reached.
* If string is longer or equal to given length no modifications will be done
* @param {number} length - of the final string
* @param {string} fillChar - that will fill the string.
* Note: If a string longer than 1 character is provided only the first character will be used
* @example
* ```
* > df = pl.DataFrame({
* ... 'foo': [
* ... "a",
* ... "b",
* ... "LONG_WORD",
* ... "cow"
* ... ]})
* > df.select(pl.col('foo').str.padEnd("_", 3)
* shape: (4, 1)
* ┌──────────┐
* │ a │
* │ -------- │
* │ str │
* ╞══════════╡
* │ a__ │
* ├╌╌╌╌╌╌╌╌╌╌┤
* │ b__ │
* ├╌╌╌╌╌╌╌╌╌╌┤
* │ LONG_WORD│
* ├╌╌╌╌╌╌╌╌╌╌┤
* │ cow │
* └──────────┘
* ```
*/
padEnd(length: number, fillChar: string): Expr;
/**
* Create subslices of the string values of a Utf8 Series.
* @param start - Start of the slice (negative indexing may be used).
* @param length - Optional length of the slice.
*/
slice(start: number | Expr, length?: number | Expr): Expr;
/**
* Split a string into substrings using the specified separator and return them as a Series.
* @param by — A string that identifies character or characters to use in separating the string.
* @param options.inclusive Include the split character/string in the results
*/
split(by: string, options?: {
inclusive?: boolean;
} | boolean): Expr;
/** Check if string values start with a substring.
* @param prefix - Prefix substring or expression
* @example
* ```
* >>> df = pl.DataFrame({"fruits": ["apple", "mango", None]})
* >>> df.withColumns(
* ... pl.col("fruits").str.startsWith("app").alias("has_prefix"),
* ... )
* shape: (3, 2)
* ┌────────┬────────────┐
* │ fruits ┆ has_prefix │
* │ --- ┆ --- │
* │ str ┆ bool │
* ╞════════╪════════════╡
* │ apple ┆ true │
* │ mango ┆ false │
* │ null ┆ null │
* └────────┴────────────┘
*
* >>> df = pl.DataFrame(
* ... {"fruits": ["apple", "mango", "banana"], "prefix": ["app", "na", "ba"]}
* ... )
* >>> df.withColumns(
* ... pl.col("fruits").str.startsWith(pl.col("prefix")).alias("has_prefix"),
* ... )
* shape: (3, 3)
* ┌────────┬────────┬────────────┐
* │ fruits ┆ prefix ┆ has_prefix │
* │ --- ┆ --- ┆ --- │
* │ str ┆ str ┆ bool │
* ╞════════╪════════╪════════════╡
* │ apple ┆ app ┆ true │
* │ mango ┆ na ┆ false │
* │ banana ┆ ba ┆ true │
* └────────┴────────┴────────────┘
*
* Using `starts_with` as a filter condition:
*
* >>> df.filter(pl.col("fruits").str.startsWith("app"))
* shape: (1, 2)
* ┌────────┬────────┐
* │ fruits ┆ prefix │
* │ --- ┆ --- │
* │ str ┆ str │
* ╞════════╪════════╡
* │ apple ┆ app │
* └────────┴────────┘
* ```
*/
startsWith(prefix: string | Expr): Expr;
/** Remove leading and trailing whitespace. */
strip(): Expr;
/**
* Parse a Series of dtype Utf8 to a Date/Datetime Series.
* @param datatype Date or Datetime.
* @param fmt formatting syntax. [Read more](https://docs.rs/chrono/0.4.19/chrono/format/strftime/index.html)
*/
strptime(datatype: DataType.Date, fmt?: string): Expr;
strptime(datatype: DataType.Datetime, fmt?: string): Expr;
strptime(datatype: typeof DataType.Datetime, fmt?: string): Expr;
/** Remove leading and trailing whitespace.
* @param prefix - Prefix substring or expression (null means whitespace)
* @example
* ```
* >>> df = pl.DataFrame({
* os: [
* "#Kali-Linux###",
* "$$$Debian-Linux$",
* null,
* "Ubuntu-Linux ",
* " Mac-Sierra",
* ],
* chars: ["#", "$", " ", " ", null],
* })
* >>> df.select(col("os").str.stripChars(col("chars")).as("os"))
* shape: (5, 1)
* ┌──────────────┐
* │ os │
* │ --- │
* │ str │
* ╞══════════════╡
* │ Kali-Linux │
* │ Debian-Linux │
* │ null │
* │ Ubuntu-Linux │
* │ Mac-Sierra │
* └──────────────┘
* ```
*/
stripChars(prefix: string | Expr): Expr;
/** Remove trailing characters.
* @param prefix - Prefix substring or expression (null means whitespace)
* @see stripChars
*/
stripCharsEnd(prefix: string | Expr): Expr;
/** Remove leading characters.
* @param prefix - Prefix substring or expression (null means whitespace)
* @see stripChars
*/
stripCharsStart(prefix: string | Expr): Expr;
}
export declare const ExprStringFunctions: (_expr: any) => ExprString;