UNPKG

nodejs-polars

Version:

Polars: Blazingly fast DataFrames in Rust, Python, Node.js, R and SQL

258 lines (257 loc) 8.98 kB
import { type Series } from "."; import type { DataType } from "../datatypes"; import type { StringFunctions } from "../shared_traits"; import type { Expr } from "./../lazy/expr/index"; /** * namespace containing series string functions */ export interface StringNamespace extends StringFunctions<Series> { /** * Vertically concat the values in the Series to a single string value. * @example * ``` * > pl.Series([1, null, 2]).str.concat("-")[0] * '1-null-2' * ``` */ concat(delimiter: string, ignoreNulls?: boolean): Series; /** * Check if strings in Series contain regex pattern. * @param pattern A valid regex pattern * @returns Boolean mask */ contains(pattern: string | RegExp): Series; /** * Decodes a value using the provided encoding * @param encoding - hex | base64 * @param strict - how to handle invalid inputs * * - true: method will throw error if unable to decode a value * - false: unhandled values will be replaced with `null` * @example * ``` * s = pl.Series("strings", ["666f6f", "626172", null]) * s.str.decode("hex") * shape: (3,) * Series: 'strings' [str] * [ * "foo", * "bar", * null * ] * ``` */ decode(encoding: "hex" | "base64", strict?: boolean): Series; decode(options: { encoding: "hex" | "base64"; strict?: boolean; }): Series; /** * Encodes a value using the provided encoding * @param encoding - hex | base64 * @example * ``` * s = pl.Series("strings", ["foo", "bar", null]) * s.str.encode("hex") * shape: (3,) * Series: 'strings' [str] * [ * "666f6f", * "626172", * null * ] * ``` */ encode(encoding: "hex" | "base64"): Series; /** * Extract the target capture group from provided patterns. * @param pattern A valid regex pattern * @param groupIndex Index of the targeted capture group. * Group 0 mean the whole pattern, first group begin at index 1 * Default to the first capture group * @returns Utf8 array. Contain null if original value is null or regex capture nothing. * @example * ``` * > df = pl.DataFrame({ * ... 'a': [ * ... 'http://vote.com/ballon_dor?candidate=messi&ref=polars', * ... 'http://vote.com/ballon_dor?candidat=jorginho&ref=polars', * ... 'http://vote.com/ballon_dor?candidate=ronaldo&ref=polars' * ... ]}) * > df.getColumn("a").str.extract(/candidate=(\w+)/, 1) * shape: (3, 1) * ┌─────────┐ * │ a │ * │ --- │ * │ str │ * ╞═════════╡ * │ messi │ * ├╌╌╌╌╌╌╌╌╌┤ * │ null │ * ├╌╌╌╌╌╌╌╌╌┤ * │ ronaldo │ * └─────────┘ * ``` */ extract(pattern: any, groupIndex: number): Series; /*** * Parse string values as JSON. * @returns Utf8 array. Contain null if original value is null or the `jsonPath` return nothing. * @example * s = pl.Series("json", ['{"a":1, "b": true}', null, '{"a":2, "b": false}']); * s.str.jsonDecode().as("json"); * shape: (3,) * Series: 'json' [struct[2]] * [ * {1,true} * {null,null} * {2,false} * ] */ jsonDecode(dtype?: DataType, inferSchemaLength?: number): Series; /** * Extract the first match of json string with provided JSONPath expression. * Throw errors if encounter invalid json strings. * All return value will be casted to Utf8 regardless of the original value. * @see https://goessner.net/articles/JsonPath/ * @param jsonPath - A valid JSON path query string * @param dtype - The dtype to cast the extracted value to. If None, the dtype will be inferred from the JSON value. * @param inferSchemaLength - How many rows to parse to determine the schema. If ``null`` all rows are used. * @returns Utf8 array. Contain null if original value is null or the `jsonPath` return nothing. * @example * ``` * > s = pl.Series('json_val', [ * ... '{"a":"1"}', * ... null, * ... '{"a":2}', * ... '{"a":2.1}', * ... '{"a":true}' * ... ]) * > s.str.jsonPathMatch('$.a') * shape: (5,) * Series: 'json_val' [str] * [ * "1" * null * "2" * "2.1" * "true" * ] * ``` */ jsonPathMatch(jsonPath: string): Series; /** Get length of the string values in the Series. */ lengths(): Series; /** Remove leading whitespace. */ lstrip(): Series; /** * Add a leading fillChar to a string until string length is reached. * If string is longer or equal to given length no modifications will be done * @param {number} length - of the final string * @param {string} fillChar - that will fill the string. * @note If a string longer than 1 character is provided only the first character will be used * @example * ``` * > df = pl.DataFrame({ * ... 'foo': [ * ... "a", * ... "b", * ... "LONG_WORD", * ... "cow" * ... ]}) * > df.select(pl.col('foo').str.padStart("_", 3) * shape: (4, 1) * ┌──────────┐ * │ a │ * │ -------- │ * │ str │ * ╞══════════╡ * │ __a │ * ├╌╌╌╌╌╌╌╌╌╌┤ * │ __b │ * ├╌╌╌╌╌╌╌╌╌╌┤ * │ LONG_WORD│ * ├╌╌╌╌╌╌╌╌╌╌┤ * │ cow │ * └──────────┘ * ``` */ padStart(length: number, fillChar: string): Series; /** * Add a leading '0' to a string until string length is reached. * If string is longer or equal to given length no modifications will be done * @param {number} length - of the final string * @example * ``` * > df = pl.DataFrame({ * ... 'foo': [ * ... "a", * ... "b", * ... "LONG_WORD", * ... "cow" * ... ]}) * > df.select(pl.col('foo').str.padStart(3) * shape: (4, 1) * ┌──────────┐ * │ a │ * │ -------- │ * │ str │ * ╞══════════╡ * │ 00a │ * ├╌╌╌╌╌╌╌╌╌╌┤ * │ 00b │ * ├╌╌╌╌╌╌╌╌╌╌┤ * │ LONG_WORD│ * ├╌╌╌╌╌╌╌╌╌╌┤ * │ cow │ * └──────────┘ * ``` */ zFill(length: number | Expr): Series; /** Add trailing zeros */ padEnd(length: number, fillChar: string): Series; /** * Replace first regex match with a string value. * @param pattern A valid regex pattern * @param value Substring to replace. */ replace(pattern: string | RegExp, value: string): Series; /** * Replace all regex matches with a string value. * @param pattern - A valid regex pattern * @param value Substring to replace. */ replaceAll(pattern: string | RegExp, value: string): Series; /** Modify the strings to their lowercase equivalent. */ toLowerCase(): Series; /** Modify the strings to their uppercase equivalent. */ toUpperCase(): Series; /** Remove trailing whitespace. */ rstrip(): Series; /** Remove leading and trailing whitespace. */ strip(): Series; /** * Create subslices of the string values of a Utf8 Series. * @param start - Start of the slice (negative indexing may be used). * @param length - Optional length of the slice. */ slice(start: number | Expr, length?: number | Expr): Series; /** * Split a string into substrings using the specified separator. * The return type will by of type List<Utf8> * @param separator — A string that identifies character or characters to use in separating the string. * @param inclusive Include the split character/string in the results */ split(separator: string, options?: { inclusive?: boolean; } | boolean): Series; /** * Parse a Series of dtype Utf8 to a Date/Datetime Series. * @param datatype Date or Datetime. * @param fmt formatting syntax. [Read more](https://docs.rs/chrono/0.4.19/chrono/format/strptime/index.html) */ strptime(datatype: DataType.Date, fmt?: string): Series; strptime(datatype: DataType.Datetime, fmt?: string): Series; strptime(datatype: typeof DataType.Datetime, fmt?: string): Series; } export declare const SeriesStringFunctions: (_s: any) => StringNamespace;