@berkelium/berkelium
Version:
A lightweight JavaScript library for data analysis and manipulation.
412 lines • 18.2 kB
TypeScript
export declare class DataFrame {
private data;
/**
* Initializes a new instance of the DataFrame class with the provided data.
*
* @param {Record<string, any[]>[]} data - An array of records where each record
* contains a string key and an array of any type values, representing the data
* to be stored in the DataFrame.
*/
constructor(data: Record<string, any[]>[]);
/**
* Gets the column names of the DataFrame.
*
* @returns {string[]} - An array of strings containing the column names of the DataFrame.
*/
get columns(): string[];
/**
* Gets the index labels of the DataFrame.
*
* @returns {number[]} - An array of numbers representing the index labels of the DataFrame.
* Each index represents the position of the row in the data.
*/
get index(): number[];
/**
* Gets the shape of the DataFrame as a tuple of two numbers.
* The first number represents the number of rows and the second number
* represents the number of columns.
*
* @returns {[number, number]} - A tuple of two numbers representing the shape of the DataFrame.
*/
get shape(): [number, number];
/**
* Gets the data types of each column in the DataFrame.
*
* @returns {Record<string, DataType>} - An object mapping each column name to its data type.
* If the DataFrame is empty, returns an empty object.
*/
get dTypes(): Record<string, DataType>;
/**
* Checks if all values in the specified column have the same data type.
*
* @param {string} column - The name of the column to check.
* @returns {boolean} - `true` if all values in the column have the same data type as the column's data type; `false` otherwise.
*/
isSameType(column: string): boolean;
/**
* Returns an array of objects representing the rows that have a different data type than the data type of the specified column.
* Each object contains the original row data with an additional "index" property set to the index of the row in the original DataFrame.
* If no rows have a different data type, an empty array is returned.
* @param {string} column - The name of the column to check.
* @returns {Record<string, any>[]} - An array of objects representing the rows with a different data type.
*/
getWrongTypeRows(column: string): Record<string, any>[];
/**
* Updates the value of a specific element in the DataFrame at the specified index and column.
* @param {number} index - The index of the row to update.
* @param {string} column - The name of the column to update.
* @param {any} value - The new value to assign to the element.
*/
updateElement(index: number, column: string, value: any): void;
/**
* Deletes the specified rows from the DataFrame.
*
* @param {number[]} indices - An array of indices of the rows to delete.
*/
deleteObservations(indices: number[]): void;
/**
* Gets the first n rows of the DataFrame.
*
* @param {number} [n=5] - The number of rows to return.
* @returns {DataFrame} - A new DataFrame containing the first n rows of the original DataFrame.
*/
head(n?: number): DataFrame;
/**
* Gets the last n rows of the DataFrame.
*
* @param {number} [n=5] - The number of rows to return.
* @returns {DataFrame} - A new DataFrame containing the last n rows of the original DataFrame.
*/
tail(n?: number): DataFrame;
copy(): DataFrame;
/**
* Returns an object containing information about the DataFrame.
*
* @returns { { shape: [number, number], columns: string[], dTypes: Record<string, DataType> } }
* An object with the following properties:
* - `shape`: A tuple of two numbers representing the number of rows and columns in the DataFrame.
* - `columns`: An array of strings representing the column labels of the DataFrame.
* - `dTypes`: An object mapping each column name to its data type.
*/
info(): {
shape: [number, number];
columns: string[];
dTypes: Record<string, DataType>;
};
/**
* Returns the minimum value in the specified column.
*
* @param {string} column - The name of the column to find the minimum value in.
* @returns {number} - The minimum value in the specified column.
*/
min(column: string): number;
/**
* Returns the maximum value in the specified column.
*
* @param {string} column - The name of the column to find the maximum value in.
* @returns {number} - The maximum value in the specified column.
*/
max(column: string): number;
/**
* Calculates the quartiles of the given column.
*
* @param {string} column - The name of the column to calculate the quartiles for.
* @returns {{ '25%': number, '50%': number, '75%': number }} An object containing the 25th, 50th, and 75th percentiles of the column.
*/
quartiles(column: string): {
'25%': number;
'50%': number;
'75%': number;
};
/**
* Returns the median value of the specified column.
*
* @param {string} column - The name of the column to find the median value in.
* @returns {number} - The median value in the specified column.
*/
median(column: string): number;
/**
* Calculates the mean value of the specified column.
*
* @param {string} column - The name of the column to calculate the mean value for.
* @returns {number} - The mean value in the specified column.
*/
mean(column: string): number;
/**
* Returns the mode of the specified column.
*
* If the column is non-numeric, throws an error.
* If the column has no modes, returns undefined.
* If the column has one mode, returns that mode.
* If the column has multiple modes, returns the maximum of the modes.
*
* @param {string} column - The name of the column to find the mode of.
* @returns {number | undefined} - The mode of the column, or undefined if no mode exists.
*/
mode(column: string): number | undefined;
/**
* Calculates the standard deviation of the specified column.
*
* @param {string} column - The name of the column to calculate the standard deviation for.
* @returns {number} - The standard deviation of the column.
*/
std(column: string): number;
/**
* Counts the number of rows in the DataFrame that have a value in the given column.
*
* @param {string} column - The name of the column to count.
* @returns {number} - The number of rows in the DataFrame that have a value in the given column.
*/
count(column: string): number;
/**
* Returns a summary of the DataFrame's columns.
*
* If `categorical` is true, returns a dictionary where the keys are the column names and the values are an object with the following properties:
* - `count`: The number of rows in the DataFrame that have a value in the given column.
* - `unique`: The number of unique values in the given column.
* - `top`: The most frequent value in the given column.
* - `freq`: The frequency of the most frequent value in the given column.
*
* If `categorical` is false, returns a dictionary where the keys are the column names and the values are an object with the following properties:
* - `count`: The number of rows in the DataFrame that have a value in the given column.
* - `mean`: The mean of the given column.
* - `std`: The standard deviation of the given column.
* - `min`: The minimum value of the given column.
* - `25%`: The 25th percentile of the given column.
* - `50%`: The 50th percentile of the given column.
* - `75%`: The 75th percentile of the given column.
* - `max`: The maximum value of the given column.
*
* @param {boolean} [categorical=false] - Whether to calculate summary statistics for categorical or numerical columns.
* @returns {Record<string, any>} - A dictionary with the summary statistics for each column in the DataFrame.
*/
describe(categorical?: boolean): Record<string, any>;
/**
* Returns true if the specified column contains a null or undefined value in any row of the DataFrame.
*
* @param {string} column - The name of the column to check.
* @returns {boolean} - True if the column contains at least one null or undefined value, false otherwise.
*/
isNull(column: string): boolean;
/**
* Renames a column in the DataFrame.
*
* @param {string} oldName - The current name of the column to rename.
* @param {string} newName - The new name for the column.
* @returns {void}
*/
renameColumn(oldName: string, newName: string): void;
/**
* Checks if the DataFrame contains any undefined values in any column.
*
* @returns {boolean} - True if the DataFrame contains at least one undefined value, false otherwise.
*/
hasUndefined(): boolean;
/**
* Checks if the DataFrame contains any rows where the type of a value does not match the type of its column.
*
* @returns {boolean} - True if the DataFrame contains at least one row with a value of the wrong type, false otherwise.
*/
hasWrongDataTypes(): boolean;
/**
* Checks if the DataFrame contains any duplicate rows.
*
* @returns {boolean} - True if the DataFrame contains duplicate rows, false otherwise.
*/
hasDuplicates(): boolean;
/**
* Returns a new DataFrame containing only the unique rows from the original DataFrame.
* The original DataFrame is not modified.
*
* @returns {DataFrame} - A new DataFrame with unique rows.
*/
dedup(): DataFrame;
/**
* Removes rows from the DataFrame that contain undefined values in any column.
*
* @returns {DataFrame} - A new DataFrame with rows containing undefined values removed.
*/
dropna(): DataFrame;
/**
* Fills null or undefined values in the DataFrame with the specified value.
*
* If a column name is specified, only that column will be filled. Otherwise,
* all columns will be filled.
*
* @param {any} value - The value to fill null or undefined values with.
* @param {string} [column] - The column to fill, if only one column should be filled.
* @returns {DataFrame} - A new DataFrame with null or undefined values filled.
*/
fillna(value: any, column?: string): DataFrame;
/**
* Counts the occurrences of each unique value in the specified column.
*
* @param {string} column - The name of the column to count unique values for.
* @returns {Record<any, number>} - An object mapping each unique value in the column
* to the number of times it appears in the DataFrame.
*/
valueCounts(column: string): Record<any, number>;
/**
* Selects columns from the DataFrame that match the given data types.
*
* @param {DataType[]} types - The data types to select columns for.
* @returns {DataFrame} - A new DataFrame with columns filtered by the given data types.
*/
selectDtypes(types: DataType[]): DataFrame;
/**
* Filters the DataFrame to only include rows that satisfy the given predicate.
*
* @param { (row: Record<string, any>) => boolean } predicate - A function that takes
* a row as an argument and returns a boolean indicating whether the row should be
* included in the filtered DataFrame.
* @returns {DataFrame} - A new DataFrame containing only the filtered rows.
*/
filter(predicate: (row: Record<string, any>) => boolean): DataFrame;
/**
* Groups the DataFrame by the given column and returns a new object with the unique values
* of the column as keys and the corresponding DataFrames as values.
*
* @param {string} col - The column to group by.
* @returns {Record<string, DataFrame>} - A new object with the grouped DataFrames.
*/
groupBy(col: string): Record<string, DataFrame>;
/**
* Selects columns from the DataFrame and returns a new DataFrame with the selected columns.
*
* @param {Array<string>} columnNames - An array of column names to select from the DataFrame.
* @returns {DataFrame} - A new DataFrame with the selected columns.
*/
select(columnNames: Array<string>): DataFrame;
/**
* Inserts a new column into the DataFrame with the given data array.
*
* @param {string} column - The name of the column to be inserted.
* @param {any[]} dataArray - An array of data to populate the new column. The length of this array
* should match the number of rows in the DataFrame.
* @returns {DataFrame} - A new DataFrame with the inserted column.
*/
insert(column: string, dataArray: any[]): DataFrame;
/**
* Updates a column in the DataFrame with the given data array.
*
* @param {string} column - The name of the column to be updated.
* @param {any[]} dataArray - An array of data to populate the column. The length of this array
* should match the number of rows in the DataFrame.
* @returns {DataFrame} - A new DataFrame with the updated column.
*/
update(column: string, dataArray: any[]): DataFrame;
/**
* Deletes a column from the DataFrame.
*
* @param {string} column - The name of the column to be deleted.
* @returns {DataFrame} - A new DataFrame with the deleted column.
*/
delete(column: string): DataFrame;
/**
* Calculates the variance of each numerical column in the DataFrame.
*
* @returns {Record<string, any>[]} - An array of objects, each containing the name of a
* numerical column and its variance.
*/
var(): Record<string, any>[];
/**
* Calculates the variance of the specified column.
*
* @param {string} column - The name of the column to calculate the variance for.
* @returns {number} - The variance of the column.
* @private
*/
private calculateVariance;
/**
* Extracts the values of the specified column from each row in the DataFrame.
*
* @param {string} col - The name of the column to extract values from.
* @returns {any[]} - An array containing the values of the specified column from each row.
*/
array(col: string): any[];
/**
* Applies a transformation to the specified column in the DataFrame.
*
* @param {string} column - The name of the column to transform.
* @param {(value: any) => any} fn - The transformation function to apply to each value in the
* specified column. The function should take a single argument, the value of the column in the
* current row, and return the transformed value.
* @returns {DataFrame} - A new DataFrame with the transformed column.
*/
transform(column: string, fn: (value: any) => any): DataFrame;
/**
* Returns an array of unique values in the specified column of the DataFrame.
*
* @param {string} column - The name of the column to extract unique values from.
* @returns {any[]} - An array of unique values in the specified column.
*/
unique(column: string): any[];
/**
* Prints the DataFrame to the console.
*
* Returns the DataFrame data as an array of objects, which can be logged to the console.
*
* @returns {Record<string, any>[]} - The DataFrame data as an array of objects.
*/
print(): Record<string, any>[];
/**
* Determines the most frequent data type in the given array.
*
* @param {any[]} arr - An array of values to analyze.
* @returns {DataType} - The data type that appears most frequently in the array.
* If multiple data types have the same frequency, one of them is returned.
* Excludes 'undefined' types from consideration.
*/
private mostFrequentType;
/**
* Calculates the mode(s) of a given array of numbers.
*
* @param {number[]} values - An array of numbers for which to calculate the mode(s).
* @returns {number[]} - An array containing the mode(s) of the input array.
* If all numbers appear with the same frequency, returns an empty array.
*/
private calculateMode;
/**
* Calculates the frequency of each item in a given array.
*
* @param {Array<any>} arr - An array of values for which to calculate the frequency.
* @returns {Map<any, number>} - A Map where the keys are the items in the array and the values
* are the number of times each item appears in the array. If an item is undefined, it is
* ignored. If the input array is empty, an empty Map is returned.
*/
private calculateFrequency;
/**
* Calculates the percentile value from a sorted array of numbers.
*
* @param {number} p - The percentile to calculate (between 0 and 1).
* @param {number[]} values - A sorted array of numbers from which to calculate the percentile.
* @returns {number} - The calculated percentile value.
*/
private getPercentile;
/**
* Checks if a value is not empty.
*
* @param {any} value - The value to check.
* @returns {boolean} - `true` if the value is not empty, `false` otherwise.
*
* A value is considered empty if it is one of the following:
* - null
* - undefined
* - false
* - an empty string
* - NaN
* - an object with no keys
* - an array with no elements
*/
private isNotEmpty;
/**
* Finds the key-value pair with the maximum value in a given Map.
*
* @param {Map<any, number>} map - The Map to search.
* @returns {([any, number] | undefined)} - The key-value pair with the maximum value,
* or `undefined` if the Map is empty.
*/
private getKeyWithMaxValue;
}
export type DataType = 'number' | 'string' | 'boolean' | 'object' | 'undefined';
//# sourceMappingURL=dataframe.d.ts.map