@berkelium/berkelium

export declare class DataFrame { private data; /** * Initializes a new instance of the DataFrame class with the provided data. * * @param {Record<string, any[]>[]} data - An array of records where each record * contains a string key and an array of any type values, representing the data * to be stored in the DataFrame. */ constructor(data: Record<string, any[]>[]); /** * Gets the column names of the DataFrame. * * @returns {string[]} - An array of strings containing the column names of the DataFrame. */ get columns(): string[]; /** * Gets the index labels of the DataFrame. * * @returns {number[]} - An array of numbers representing the index labels of the DataFrame. * Each index represents the position of the row in the data. */ get index(): number[]; /** * Gets the shape of the DataFrame as a tuple of two numbers. * The first number represents the number of rows and the second number * represents the number of columns. * * @returns {[number, number]} - A tuple of two numbers representing the shape of the DataFrame. */ get shape(): [number, number]; /** * Gets the data types of each column in the DataFrame. * * @returns {Record<string, DataType>} - An object mapping each column name to its data type. * If the DataFrame is empty, returns an empty object. */ get dTypes(): Record<string, DataType>; /** * Checks if all values in the specified column have the same data type. * * @param {string} column - The name of the column to check. * @returns {boolean} - `true` if all values in the column have the same data type as the column's data type; `false` otherwise. */ isSameType(column: string): boolean; /** * Returns an array of objects representing the rows that have a different data type than the data type of the specified column. * Each object contains the original row data with an additional "index" property set to the index of the row in the original DataFrame. * If no rows have a different data type, an empty array is returned. * @param {string} column - The name of the column to check. * @returns {Record<string, any>[]} - An array of objects representing the rows with a different data type. */ getWrongTypeRows(column: string): Record<string, any>[]; /** * Updates the value of a specific element in the DataFrame at the specified index and column. * @param {number} index - The index of the row to update. * @param {string} column - The name of the column to update. * @param {any} value - The new value to assign to the element. */ updateElement(index: number, column: string, value: any): void; /** * Deletes the specified rows from the DataFrame. * * @param {number[]} indices - An array of indices of the rows to delete. */ deleteObservations(indices: number[]): void; /** * Gets the first n rows of the DataFrame. * * @param {number} [n=5] - The number of rows to return. * @returns {DataFrame} - A new DataFrame containing the first n rows of the original DataFrame. */ head(n?: number): DataFrame; /** * Gets the last n rows of the DataFrame. * * @param {number} [n=5] - The number of rows to return. * @returns {DataFrame} - A new DataFrame containing the last n rows of the original DataFrame. */ tail(n?: number): DataFrame; copy(): DataFrame; /** * Returns an object containing information about the DataFrame. * * @returns { { shape: [number, number], columns: string[], dTypes: Record<string, DataType> } } * An object with the following properties: * - `shape`: A tuple of two numbers representing the number of rows and columns in the DataFrame. * - `columns`: An array of strings representing the column labels of the DataFrame. * - `dTypes`: An object mapping each column name to its data type. */ info(): { shape: [number, number]; columns: string[]; dTypes: Record<string, DataType>; }; /** * Returns the minimum value in the specified column. * * @param {string} column - The name of the column to find the minimum value in. * @returns {number} - The minimum value in the specified column. */ min(column: string): number; /** * Returns the maximum value in the specified column. * * @param {string} column - The name of the column to find the maximum value in. * @returns {number} - The maximum value in the specified column. */ max(column: string): number; /** * Calculates the quartiles of the given column. * * @param {string} column - The name of the column to calculate the quartiles for. * @returns {{ '25%': number, '50%': number, '75%': number }} An object containing the 25th, 50th, and 75th percentiles of the column. */ quartiles(column: string): { '25%': number; '50%': number; '75%': number; }; /** * Returns the median value of the specified column. * * @param {string} column - The name of the column to find the median value in. * @returns {number} - The median value in the specified column. */ median(column: string): number; /** * Calculates the mean value of the specified column. * * @param {string} column - The name of the column to calculate the mean value for. * @returns {number} - The mean value in the specified column. */ mean(column: string): number; /** * Returns the mode of the specified column. * * If the column is non-numeric, throws an error. * If the column has no modes, returns undefined. * If the column has one mode, returns that mode. * If the column has multiple modes, returns the maximum of the modes. * * @param {string} column - The name of the column to find the mode of. * @returns {number | undefined} - The mode of the column, or undefined if no mode exists. */ mode(column: string): number | undefined; /** * Calculates the standard deviation of the specified column. * * @param {string} column - The name of the column to calculate the standard deviation for. * @returns {number} - The standard deviation of the column. */ std(column: string): number; /** * Counts the number of rows in the DataFrame that have a value in the given column. * * @param {string} column - The name of the column to count. * @returns {number} - The number of rows in the DataFrame that have a value in the given column. */ count(column: string): number; /** * Returns a summary of the DataFrame's columns. * * If `categorical` is true, returns a dictionary where the keys are the column names and the values are an object with the following properties: * - `count`: The number of rows in the DataFrame that have a value in the given column. * - `unique`: The number of unique values in the given column. * - `top`: The most frequent value in the given column. * - `freq`: The frequency of the most frequent value in the given column. * * If `categorical` is false, returns a dictionary where the keys are the column names and the values are an object with the following properties: * - `count`: The number of rows in the DataFrame that have a value in the given column. * - `mean`: The mean of the given column. * - `std`: The standard deviation of the given column. * - `min`: The minimum value of the given column. * - `25%`: The 25th percentile of the given column. * - `50%`: The 50th percentile of the given column. * - `75%`: The 75th percentile of the given column. * - `max`: The maximum value of the given column. * * @param {boolean} [categorical=false] - Whether to calculate summary statistics for categorical or numerical columns. * @returns {Record<string, any>} - A dictionary with the summary statistics for each column in the DataFrame. */ describe(categorical?: boolean): Record<string, any>; /** * Returns true if the specified column contains a null or undefined value in any row of the DataFrame. * * @param {string} column - The name of the column to check. * @returns {boolean} - True if the column contains at least one null or undefined value, false otherwise. */ isNull(column: string): boolean; /** * Renames a column in the DataFrame. * * @param {string} oldName - The current name of the column to rename. * @param {string} newName - The new name for the column. * @returns {void} */ renameColumn(oldName: string, newName: string): void; /** * Checks if the DataFrame contains any undefined values in any column. * * @returns {boolean} - True if the DataFrame contains at least one undefined value, false otherwise. */ hasUndefined(): boolean; /** * Checks if the DataFrame contains any rows where the type of a value does not match the type of its column. * * @returns {boolean} - True if the DataFrame contains at least one row with a value of the wrong type, false otherwise. */ hasWrongDataTypes(): boolean; /** * Checks if the DataFrame contains any duplicate rows. * * @returns {boolean} - True if the DataFrame contains duplicate rows, false otherwise. */ hasDuplicates(): boolean; /** * Returns a new DataFrame containing only the unique rows from the original DataFrame. * The original DataFrame is not modified. * * @returns {DataFrame} - A new DataFrame with unique rows. */ dedup(): DataFrame; /** * Removes rows from the DataFrame that contain undefined values in any column. * * @returns {DataFrame} - A new DataFrame with rows containing undefined values removed. */ dropna(): DataFrame; /** * Fills null or undefined values in the DataFrame with the specified value. * * If a column name is specified, only that column will be filled. Otherwise, * all columns will be filled. * * @param {any} value - The value to fill null or undefined values with. * @param {string} [column] - The column to fill, if only one column should be filled. * @returns {DataFrame} - A new DataFrame with null or undefined values filled. */ fillna(value: any, column?: string): DataFrame; /** * Counts the occurrences of each unique value in the specified column. * * @param {string} column - The name of the column to count unique values for. * @returns {Record<any, number>} - An object mapping each unique value in the column * to the number of times it appears in the DataFrame. */ valueCounts(column: string): Record<any, number>; /** * Selects columns from the DataFrame that match the given data types. * * @param {DataType[]} types - The data types to select columns for. * @returns {DataFrame} - A new DataFrame with columns filtered by the given data types. */ selectDtypes(types: DataType[]): DataFrame; /** * Filters the DataFrame to only include rows that satisfy the given predicate. * * @param { (row: Record<string, any>) => boolean } predicate - A function that takes * a row as an argument and returns a boolean indicating whether the row should be * included in the filtered DataFrame. * @returns {DataFrame} - A new DataFrame containing only the filtered rows. */ filter(predicate: (row: Record<string, any>) => boolean): DataFrame; /** * Groups the DataFrame by the given column and returns a new object with the unique values * of the column as keys and the corresponding DataFrames as values. * * @param {string} col - The column to group by. * @returns {Record<string, DataFrame>} - A new object with the grouped DataFrames. */ groupBy(col: string): Record<string, DataFrame>; /** * Selects columns from the DataFrame and returns a new DataFrame with the selected columns. * * @param {Array<string>} columnNames - An array of column names to select from the DataFrame. * @returns {DataFrame} - A new DataFrame with the selected columns. */ select(columnNames: Array<string>): DataFrame; /** * Inserts a new column into the DataFrame with the given data array. * * @param {string} column - The name of the column to be inserted. * @param {any[]} dataArray - An array of data to populate the new column. The length of this array * should match the number of rows in the DataFrame. * @returns {DataFrame} - A new DataFrame with the inserted column. */ insert(column: string, dataArray: any[]): DataFrame; /** * Updates a column in the DataFrame with the given data array. * * @param {string} column - The name of the column to be updated. * @param {any[]} dataArray - An array of data to populate the column. The length of this array * should match the number of rows in the DataFrame. * @returns {DataFrame} - A new DataFrame with the updated column. */ update(column: string, dataArray: any[]): DataFrame; /** * Deletes a column from the DataFrame. * * @param {string} column - The name of the column to be deleted. * @returns {DataFrame} - A new DataFrame with the deleted column. */ delete(column: string): DataFrame; /** * Calculates the variance of each numerical column in the DataFrame. * * @returns {Record<string, any>[]} - An array of objects, each containing the name of a * numerical column and its variance. */ var(): Record<string, any>[]; /** * Calculates the variance of the specified column. * * @param {string} column - The name of the column to calculate the variance for. * @returns {number} - The variance of the column. * @private */ private calculateVariance; /** * Extracts the values of the specified column from each row in the DataFrame. * * @param {string} col - The name of the column to extract values from. * @returns {any[]} - An array containing the values of the specified column from each row. */ array(col: string): any[]; /** * Applies a transformation to the specified column in the DataFrame. * * @param {string} column - The name of the column to transform. * @param {(value: any) => any} fn - The transformation function to apply to each value in the * specified column. The function should take a single argument, the value of the column in the * current row, and return the transformed value. * @returns {DataFrame} - A new DataFrame with the transformed column. */ transform(column: string, fn: (value: any) => any): DataFrame; /** * Returns an array of unique values in the specified column of the DataFrame. * * @param {string} column - The name of the column to extract unique values from. * @returns {any[]} - An array of unique values in the specified column. */ unique(column: string): any[]; /** * Prints the DataFrame to the console. * * Returns the DataFrame data as an array of objects, which can be logged to the console. * * @returns {Record<string, any>[]} - The DataFrame data as an array of objects. */ print(): Record<string, any>[]; /** * Determines the most frequent data type in the given array. * * @param {any[]} arr - An array of values to analyze. * @returns {DataType} - The data type that appears most frequently in the array. * If multiple data types have the same frequency, one of them is returned. * Excludes 'undefined' types from consideration. */ private mostFrequentType; /** * Calculates the mode(s) of a given array of numbers. * * @param {number[]} values - An array of numbers for which to calculate the mode(s). * @returns {number[]} - An array containing the mode(s) of the input array. * If all numbers appear with the same frequency, returns an empty array. */ private calculateMode; /** * Calculates the frequency of each item in a given array. * * @param {Array<any>} arr - An array of values for which to calculate the frequency. * @returns {Map<any, number>} - A Map where the keys are the items in the array and the values * are the number of times each item appears in the array. If an item is undefined, it is * ignored. If the input array is empty, an empty Map is returned. */ private calculateFrequency; /** * Calculates the percentile value from a sorted array of numbers. * * @param {number} p - The percentile to calculate (between 0 and 1). * @param {number[]} values - A sorted array of numbers from which to calculate the percentile. * @returns {number} - The calculated percentile value. */ private getPercentile; /** * Checks if a value is not empty. * * @param {any} value - The value to check. * @returns {boolean} - `true` if the value is not empty, `false` otherwise. * * A value is considered empty if it is one of the following: * - null * - undefined * - false * - an empty string * - NaN * - an object with no keys * - an array with no elements */ private isNotEmpty; /** * Finds the key-value pair with the maximum value in a given Map. * * @param {Map<any, number>} map - The Map to search. * @returns {([any, number] | undefined)} - The key-value pair with the maximum value, * or `undefined` if the Map is empty. */ private getKeyWithMaxValue; } export type DataType = 'number' | 'string' | 'boolean' | 'object' | 'undefined'; //# sourceMappingURL=dataframe.d.ts.map