@doeixd/csv-utils
Version:
Utilities for csv files / arrays of objects
1,430 lines (1,345 loc) • 44.7 kB
text/typescript
/**
* @fileoverview Standalone utility functions for CSV data manipulation
* This module provides access to all CSV utility functions as standalone functions
* that take an array of objects as the first argument and return the results as an array of objects.
*/
import CSV, {
CSVError,
ComparisonCallback,
ModificationCallback,
TransformCallback,
EqualityCallback,
MergeCallback,
SortDirection,
AggregateOperation,
SimilarityMatch,
CSVArrayUtils,
CSVSchemaConfig
} from './index';
import { stringify as stringifyCSV } from 'csv/sync';
/**
* Adds a new column to each row in the data array.
* The new column's value can be a fixed default or derived from a function.
* If the column name already exists, its values will be overwritten.
*
* @template T - The type of objects in the input array.
* @template NewKey - The type of the new column's name (string literal).
* @template NewValue - The type of the new column's value.
* @param data - Array of objects to modify.
* @param columnName - The name of the new column.
* @param valueOrFn - A fixed value for the new column, or a function that
* takes the current row and returns the value for the new column.
* @returns A new array of objects with the added/updated column.
* @example
* ```typescript
* // interface User { id: number; name: string; }
* // const users: User[] = [{ id: 1, name: 'Alice' }];
*
* // Add a column with a fixed value
* const usersWithRole = addColumn(users, 'role', 'user');
* // usersWithRole is [{ id: 1, name: 'Alice', role: 'user' }]
*
* // Add a column with a derived value
* const usersWithLen = addColumn(users, 'nameLength', row => row.name.length);
* // usersWithLen is [{ id: 1, name: 'Alice', nameLength: 5 }]
* ```
*/
export function addColumn<
T extends Record<string, any>,
NewKey extends string,
NewValue
>(
data: T[],
columnName: NewKey,
valueOrFn: NewValue | ((row: T) => NewValue)
): Array<T & Record<NewKey, NewValue>> {
return CSV.fromData(data).addColumn(columnName, valueOrFn).toArray();
}
/**
* Removes one or more columns from each row in the data array.
* If a specified column does not exist, it's silently ignored.
*
* @template T - The type of objects in the input array.
* @template K - Union of the keys to be removed.
* @param data - Array of objects to modify.
* @param columnNames - A single column name or an array of column names to remove.
* Can be `keyof T` or a string.
* @returns A new array of objects with the specified columns removed.
* @example
* ```typescript
* // interface User { id: number; name: string; email: string; }
* // const users: User[] = [{ id: 1, name: 'Alice', email: 'a@ex.com' }];
*
* // Remove a single column
* const usersWithoutEmail = removeColumn(users, 'email');
* // usersWithoutEmail is [{ id: 1, name: 'Alice' }]
*
* // Remove multiple columns
* const usersOnlyId = removeColumn(users, ['name', 'email']);
* // usersOnlyId is [{ id: 1 }]
* ```
*/
export function removeColumn<
T extends Record<string, any>,
K extends keyof T | string
>(
data: T[],
columnNames: K | K[]
): Array<Omit<T, Extract<K, keyof T>>> {
return CSV.fromData(data).removeColumn(columnNames).toArray();
}
/**
* Renames a column in each row of the data array.
* If the old column name does not exist in a row, that row remains unchanged (but its type signature adapts).
* If the new column name already exists and is different from the old name, it will be overwritten.
*
* @template T - The type of objects in the input array.
* @template OldK - The type of the old column name.
* @template NewK - The type of the new column name (string literal).
* @param data - Array of objects to modify.
* @param oldName - The current name of the column. Can be `keyof T` or a string.
* @param newName - The new name for the column.
* @returns A new array of objects with the column renamed.
* @example
* ```typescript
* // interface User { userId: number; userName: string; }
* // const users: User[] = [{ userId: 1, userName: 'Alice' }];
*
* // Rename 'userId' to 'id'
* const usersRenamedId = renameColumn(users, 'userId', 'id');
* // usersRenamedId is [{ id: 1, userName: 'Alice' }]
* ```
*/
export function renameColumn<
T extends Record<string, any>,
OldK extends keyof T | string,
NewK extends string
>(
data: T[],
oldName: OldK,
newName: NewK
): Array<Omit<T, Extract<OldK, keyof T>> & Record<NewK, OldK extends keyof T ? T[OldK] : any>> {
return CSV.fromData(data).renameColumn(oldName, newName).toArray();
}
/**
* Reorders columns in each row of the data array according to the specified order.
* Columns not included in `orderedColumnNames` will be placed after the ordered ones,
* maintaining their original relative order among themselves.
* If `orderedColumnNames` contains names not present in the data, they are ignored.
*
* @template T - The type of objects in the input array.
* @param data - Array of objects to modify.
* @param orderedColumnNames - An array of column names (or `keyof T`) in the desired order.
* @returns A new array of objects with columns reordered.
* @example
* ```typescript
* // interface User { id: number; name: string; email: string; age: number }
* // const users: User[] = [{ id: 1, name: 'Alice', email: 'a@ex.com', age: 30 }];
*
* // Reorder to: name, id, email, age
* const reorderedUsers = reorderColumns(users, ['name', 'id']);
* // The keys in reorderedUsers[0] will be 'name', 'id', 'email', 'age' (in that order when iterated).
* ```
*/
export function reorderColumns<T extends Record<string, any>>(
data: T[],
orderedColumnNames: (keyof T | string)[]
): T[] {
return CSV.fromData(data).reorderColumns(orderedColumnNames).toArray();
}
/**
* Attempts to cast the values in a specified column to a given data type.
* If casting fails for a value (e.g., 'abc' to number), it becomes `null`.
* The generic type `T` of the array objects does not change in the function signature
* due to runtime casting limitations, but the underlying data's types will change.
*
* @template T - The type of objects in the input array.
* @param data - Array of objects to modify.
* @param columnName - The name of the column to cast. Can be `keyof T` or a string.
* @param targetType - The target data type: 'string', 'number', 'boolean', or 'date'.
* @returns A new array of objects with the column values cast.
* @example
* ```typescript
* // interface Product { id: string; price: string; available: string; }
* // const products: Product[] = [
* // { id: '1', price: '19.99', available: 'true' },
* // { id: '2', price: ' N/A ', available: '0' }
* // ];
*
* let castedProducts = castColumnType(products, 'id', 'number');
* castedProducts = castColumnType(castedProducts, 'price', 'number');
* castedProducts = castColumnType(castedProducts, 'available', 'boolean');
* // castedProducts might be:
* // [
* // { id: 1, price: 19.99, available: true },
* // { id: '2', price: null, available: false }
* // ] (Note: id for 2nd product became '2' due to Product interface, but underlying cast was attempted)
* // Actual type of castedProducts elements at runtime will differ from Product interface.
* ```
*/
export function castColumnType<T extends Record<string, any>>(
data: T[],
columnName: keyof T | string,
targetType: 'string' | 'number' | 'boolean' | 'date'
): T[] {
// The CSV class method returns CSV<T>, so the generic T is preserved.
// When calling toArray(), it becomes T[], which is accurate for the structure,
// but the runtime types of values within the objects will have changed.
return CSV.fromData(data).castColumnType(columnName, targetType).toArray();
}
/**
* Removes duplicate rows from the data array based on all columns or a specified subset of columns.
* The first occurrence of a unique row (or unique combination of values in `columnsToCheck`) is kept.
*
* @template T - The type of objects in the input array.
* @param data - Array of objects to deduplicate.
* @param columnsToCheck - Optional array of column names (`keyof T`) to check for duplication.
* If omitted or empty, all columns in a row are used.
* @returns A new array of objects with duplicate rows removed.
* @example
* ```typescript
* // interface Item { id: number; category: string; value: number }
* // const items: Item[] = [
* // { id: 1, category: 'A', value: 10 }, { id: 2, category: 'B', value: 20 },
* // { id: 1, category: 'A', value: 10 }, { id: 3, category: 'A', value: 30 }
* // ];
*
* const dedupedAll = deduplicate(items);
* // dedupedAll: [{ id: 1, category: 'A', value: 10 }, { id: 2, category: 'B', value: 20 }, { id: 3, category: 'A', value: 30 }]
*
* const dedupedByCat = deduplicate(items, ['category']);
* // dedupedByCat: [{ id: 1, category: 'A', value: 10 }, { id: 2, category: 'B', value: 20 }]
* ```
*/
export function deduplicate<T extends Record<string, any>>(
data: T[],
columnsToCheck?: (keyof T)[]
): T[] {
return CSV.fromData(data).deduplicate(columnsToCheck).toArray();
}
/**
* Splits the data array into two new arrays based on a condition.
* Rows for which the condition is true go into the `pass` array; others go into the `fail` array.
*
* @template T - The type of objects in the input array.
* @param data - Array of objects to split.
* @param condition - A function that takes a row and returns `true` if it should
* be included in the `pass` array.
* @returns An object containing two new arrays: `pass` and `fail`.
* @example
* ```typescript
* // interface User { id: number; name: string; age: number }
* // const users: User[] = [
* // { id: 1, name: 'Alice', age: 30 }, { id: 2, name: 'Bob', age: 22 },
* // { id: 3, name: 'Carol', age: 35 }
* // ];
*
* const { pass: adults, fail: minors } = split(users, row => row.age >= 30);
* // adults is [{ id: 1, name: 'Alice', age: 30 }, { id: 3, name: 'Carol', age: 35 }]
* // minors is [{ id: 2, name: 'Bob', age: 22 }]
* ```
*/
export function split<T extends Record<string, any>>(
data: T[],
condition: (row: T) => boolean
): { pass: T[]; fail: T[] } {
const { pass: passCsv, fail: failCsv } = CSV.fromData(data).split(condition);
return {
pass: passCsv.toArray(),
fail: failCsv.toArray(),
};
}
/**
* Joins the current data array (left table) with another data array (right table).
*
* @template T - Row type of the left data array.
* @template OtherRowType - Row type of the right data array.
* @template JoinedRowType - Row type of the resulting joined data.
* @param dataLeft - The left array of objects.
* @param dataRight - The right array of objects to join with.
* @param onConfig - An object specifying the join keys and type:
* `left`: The key (column name) from the `dataLeft`.
* `right`: The key (column name) from the `dataRight`.
* `type`: Optional join type: 'inner' (default), 'left', 'right', 'outer'.
* @param select - Optional function to transform the combined row. It receives `leftRow`
* (or `null`) and `rightRow` (or `null`).
* Default merge is `{ ...leftRow, ...rightRow }`.
* @returns A new array of objects with the joined data.
* @example
* ```typescript
* // interface User { id: number; name: string; cityId: number; }
* // interface City { cityId: number; cityName: string; }
* // const users: User[] = [{ id: 1, name: 'Alice', cityId: 101 }];
* // const cities: City[] = [{ cityId: 101, cityName: 'New York' }];
*
* const innerJoined = join(
* users,
* cities,
* { left: 'cityId', right: 'cityId', type: 'inner' }
* );
* // innerJoined: [{ id: 1, name: 'Alice', cityId: 101, cityName: 'New York' }]
* ```
*/
export function join<
T extends Record<string, any>,
OtherRowType extends Record<string, any>,
JoinedRowType extends Record<string, any> = T & Partial<OtherRowType>
>(
dataLeft: T[],
dataRight: OtherRowType[],
onConfig: {
left: keyof T;
right: keyof OtherRowType;
type?: 'inner' | 'left' | 'right' | 'outer';
},
select?: (leftRow: T | null, rightRow: OtherRowType | null) => JoinedRowType
): JoinedRowType[] {
const csvLeft = CSV.fromData(dataLeft);
const csvRight = CSV.fromData(dataRight);
return csvLeft.join<OtherRowType, JoinedRowType>(csvRight, onConfig, select).toArray();
}
/**
* Transforms data from a wide format to a long format (unpivots or melts).
* Specified `valueCols` are converted into two new columns: one for the original
* column name (variable) and one for its value. `idCols` are repeated.
*
* @template T - Row type of the input data.
* @template IdKeys - Keys of the identifier columns.
* @template ValueKeys - Keys of the value columns being unpivoted.
* @template VarNameCol - Type of the new variable name column.
* @template ValueNameCol - Type of the new value name column.
* @param data - Array of objects to unpivot.
* @param idCols - Array of column names (`keyof T`) that identify each observation.
* @param valueCols - Array of column names (`keyof T`) whose values will be unpivoted.
* @param varName - Name for the new column holding original column names. Defaults to 'variable'.
* @param valueName - Name for the new column holding values. Defaults to 'value'.
* @returns A new array of objects with the unpivoted data.
* @example
* ```typescript
* // interface Sales { product: string; q1_sales: number; q2_sales: number; }
* // const salesData: Sales[] = [{ product: 'A', q1_sales: 100, q2_sales: 150 }];
*
* const unpivoted = unpivot(
* salesData,
* ['product'],
* ['q1_sales', 'q2_sales'],
* 'quarter',
* 'amount'
* );
* // unpivoted:
* // [
* // { product: 'A', quarter: 'q1_sales', amount: 100 },
* // { product: 'A', quarter: 'q2_sales', amount: 150 }
* // ]
* ```
*/
export function unpivot<
T extends Record<string, any>,
IdKeys extends keyof T,
ValueKeys extends keyof T,
VarNameCol extends string = 'variable',
ValueNameCol extends string = 'value'
>(
data: T[],
idCols: IdKeys[],
valueCols: ValueKeys[],
varName: VarNameCol = 'variable' as VarNameCol,
valueName: ValueNameCol = 'value' as ValueNameCol
): Array<
Pick<T, IdKeys> &
Record<VarNameCol, ValueKeys extends string ? ValueKeys : string> &
Record<ValueNameCol, T[ValueKeys]>
> {
return CSV.fromData(data)
.unpivot<IdKeys, ValueKeys, VarNameCol, ValueNameCol>(idCols, valueCols, varName, valueName)
.toArray();
}
/**
* Fills missing values (`null` or `undefined`) in a specified column of the data array.
* The generic type T of array objects does not change, but underlying data types might.
*
* @template T - The type of objects in the input array.
* @template K - The key of the column to fill.
* @param data - Array of objects to modify.
* @param columnName - The name of the column to fill missing values in.
* @param valueOrFn - The value to fill with, or a function that takes the current row
* and returns the value to fill with. Can be of `any` type for flexibility.
* @returns A new array of objects with missing values filled.
* @example
* ```typescript
* // interface Product { name: string; price?: number | null; }
* // const products: Product[] = [ { name: 'Apple', price: 1.0 }, { name: 'Banana', price: null }];
*
* const filledProducts = fillMissingValues(products, 'price', 0);
* // filledProducts: [{ name: 'Apple', price: 1.0 }, { name: 'Banana', price: 0 }]
* ```
*/
export function fillMissingValues<T extends Record<string, any>, K extends keyof T>(
data: T[],
columnName: K,
valueOrFn: T[K] | any | ((row: T) => T[K] | any)
): T[] {
return CSV.fromData(data).fillMissingValues(columnName, valueOrFn).toArray();
}
/**
* Normalizes the text case of string values in a specified column of the data array.
* Non-string values or missing columns are not affected.
*
* @template T - The type of objects in the input array.
* @template K - The key of the column to normalize.
* @param data - Array of objects to modify.
* @param columnName - The name of the column to normalize.
* @param normalizationType - The type of normalization: 'lowercase', 'uppercase', or 'capitalize'.
* @returns A new array of objects with text normalized.
* @example
* ```typescript
* // interface City { name: string; countryCode: string; }
* // const cities: City[] = [{ name: 'new york city', countryCode: 'us' }];
*
* const capNames = normalizeText(cities, 'name', 'capitalize');
* // capNames[0].name is 'New York City'
* const upperCodes = normalizeText(cities, 'countryCode', 'uppercase');
* // upperCodes[0].countryCode is 'US'
* ```
*/
export function normalizeText<T extends Record<string, any>, K extends keyof T>(
data: T[],
columnName: K,
normalizationType: 'lowercase' | 'uppercase' | 'capitalize'
): T[] {
return CSV.fromData(data).normalizeText(columnName, normalizationType).toArray();
}
/**
* Trims leading and trailing whitespace from string values in specified columns of the data array.
* If no columns are specified, it attempts to trim all string values in all columns.
* Non-string values are not affected.
*
* @template T - The type of objects in the input array.
* @param data - Array of objects to modify.
* @param columns - Optional array of column names (`keyof T` or string) to trim.
* If omitted, all columns with string values are processed.
* @returns A new array of objects with whitespace trimmed.
* @example
* ```typescript
* // interface Contact { name: string; city: string; }
* // const contacts: Contact[] = [{ name: ' Alice ', city: ' New York ' }];
*
* const trimmedContacts = trimWhitespace(contacts, ['name', 'city']);
* // trimmedContacts[0] is { name: 'Alice', city: 'New York' }
*
* const trimmedAll = trimWhitespace(contacts); // Also trims 'name' and 'city'
* ```
*/
export function trimWhitespace<T extends Record<string, any>>(
data: T[],
columns?: (keyof T | string)[]
): T[] {
return CSV.fromData(data).trimWhitespace(columns).toArray();
}
/**
* Find the first row where column matches value exactly
* @param data - Array of objects to search
* @param value - The value to match
* @param column - The column to check (default: 'id')
* @returns The matching row or undefined
* @example
* ```typescript
* const product = findRow(products, 'P123', 'productId');
* ```
*/
export function findRow<T extends Record<string, any>>(
data: T[],
value: any,
column: keyof T = 'id' as keyof T
): T | undefined {
return CSV.fromData(data).findRow(value, column);
}
/**
* Find rows that match a regular expression
* @param data - Array of objects to search
* @param regex - The pattern to match
* @param column - The column to check (default: 'id')
* @returns The matching row or undefined
* @example
* ```typescript
* const product = findRowByRegex(products, /^P\d{3}$/, 'productId');
* ```
*/
export function findRowByRegex<T extends Record<string, any>>(
data: T[],
regex: RegExp,
column: keyof T = 'id' as keyof T
): T | undefined {
return CSV.fromData(data).findRowByRegex(regex, column);
}
/**
* Find all rows containing a value
* @param data - Array of objects to search
* @param value - The value to search for
* @param column - The column to check (default: 'id')
* @returns Array of matching rows
* @example
* ```typescript
* const electronicsProducts = findRows(products, 'Electronics', 'category');
* ```
*/
export function findRows<T extends Record<string, any>>(
data: T[],
value: any,
column: keyof T = 'id' as keyof T
): T[] {
return CSV.fromData(data).findRows(value, column);
}
/**
* Find the first row matching a condition
* @param data - Array of objects to search
* @param predicate - Function to test each row
* @returns The first matching row or undefined
* @example
* ```typescript
* const expensiveProduct = findRowWhere(products, p => p.price > 100);
* ```
*/
export function findRowWhere<T extends Record<string, any>>(
data: T[],
predicate: ComparisonCallback<T>
): T | undefined {
return CSV.fromData(data).findRowWhere(predicate);
}
/**
* Find all rows matching a condition
* @param data - Array of objects to search
* @param predicate - Function to test each row
* @returns Array of matching rows
* @example
* ```typescript
* const inStockProducts = findRowsWhere(products, p => p.inStock === true);
* ```
*/
export function findRowsWhere<T extends Record<string, any>>(
data: T[],
predicate: ComparisonCallback<T>
): T[] {
return CSV.fromData(data).findRowsWhere(predicate);
}
/**
* Find rows by similarity to a string value
* @param data - Array of objects to search
* @param str - The string to compare with
* @param column - The column to check
* @returns Array of matches with similarity scores
* @example
* ```typescript
* const similarProducts = findSimilarRows(products, 'Labtop', 'name');
* ```
*/
export function findSimilarRows<T extends Record<string, any>>(
data: T[],
str: string,
column: keyof T
): SimilarityMatch<T>[] {
return CSV.fromData(data).findSimilarRows(str, column);
}
/**
* Find the most similar row to a string value
* @param data - Array of objects to search
* @param str - The string to compare with
* @param column - The column to check
* @returns The best match or undefined
* @example
* ```typescript
* const closestMatch = findMostSimilarRow(products, 'Labtop', 'name');
* ```
*/
export function findMostSimilarRow<T extends Record<string, any>>(
data: T[],
str: string,
column: keyof T
): SimilarityMatch<T> | undefined {
return CSV.fromData(data).findMostSimilarRow(str, column);
}
/**
* Group rows by values in a column
* @param data - Array of objects to group
* @param column - The column to group by
* @returns Object with groups of rows
* @example
* ```typescript
* const productsByCategory = groupBy(products, 'category');
* ```
*/
export function groupBy<T extends Record<string, any>>(
data: T[],
column: keyof T
): Record<string, T[]> {
return CSV.fromData(data).groupBy(column);
}
/**
* Update all rows with new values
* @param data - Array of objects to update
* @param modifications - Object with new values or function that returns them
* @returns Updated array of objects
* @example
* ```typescript
* const updatedProducts = update(products, { currency: 'USD' });
* ```
*/
export function update<T extends Record<string, any>, E extends Partial<T> = T>(
data: T[],
modifications: (Partial<T> | ModificationCallback<T>) & E
): T[] {
return CSV.fromData(data).update(modifications).toArray();
}
/**
* Update rows that match a condition
* @param data - Array of objects to update
* @param condition - The condition to match
* @param modifications - Object with new values or function that returns them
* @returns Updated array of objects
* @example
* ```typescript
* const discounted = updateWhere(
* products,
* p => p.price > 100,
* p => ({ price: p.price * 0.9, discounted: true })
* );
* ```
*/
export function updateWhere<T extends Record<string, any>>(
data: T[],
condition: ComparisonCallback<T>,
modifications: Partial<T> | ModificationCallback<T>
): T[] {
return CSV.fromData(data).updateWhere(condition, modifications).toArray();
}
/**
* Update a specific column for all rows
* @param data - Array of objects to update
* @param column - The column to update
* @param value - New value or function to calculate it
* @returns Updated array of objects
* @example
* ```typescript
* const withTax = updateColumn(products, 'price', p => p * 1.2);
* ```
*/
export function updateColumn<T extends Record<string, any>, K extends keyof T>(
data: T[],
column: K,
value: T[K] | ((current: T[K], row: T) => T[K])
): T[] {
return CSV.fromData(data).updateColumn(column, value).toArray();
}
/**
* Transform rows into a different structure
* @param data - Array of objects to transform
* @param transformer - Function to transform each row
* @returns Transformed array of objects
* @example
* ```typescript
* interface ProductSummary { id: string; display: string; value: number }
*
* const summaries = transform<Product, ProductSummary>(products,
* p => ({
* id: p.id,
* display: `${p.name} (${p.category})`,
* value: p.price * p.stock
* })
* );
* ```
*/
export function transform<T extends Record<string, any>, R extends Record<string, any>>(
data: T[],
transformer: TransformCallback<T, R>
): R[] {
return CSV.fromData(data).transform(transformer).toArray();
}
/**
* Remove rows matching a condition
* @param data - Array of objects to filter
* @param condition - The condition to match
* @returns Filtered array of objects
* @example
* ```typescript
* const inStockOnly = removeWhere(products, p => !p.inStock);
* ```
*/
export function removeWhere<T extends Record<string, any>>(
data: T[],
condition: ComparisonCallback<T>
): T[] {
return CSV.fromData(data).removeWhere(condition).toArray();
}
/**
* Add new rows to the data
* @param data - Original array of objects
* @param rows - The rows to add
* @returns Combined array of objects
* @example
* ```typescript
* const expanded = append(products,
* { id: 'P004', name: 'Keyboard', price: 49.99, inStock: true },
* { id: 'P005', name: 'Mouse', price: 29.99, inStock: true }
* );
* ```
*/
export function append<T extends Record<string, any>>(
data: T[],
...rows: T[]
): T[] {
return CSV.fromData(data).append(...rows).toArray();
}
/**
* Sort rows by a column
* @param data - Array of objects to sort
* @param column - The column to sort by
* @param direction - Sort direction (default: 'asc')
* @returns Sorted array of objects
* @example
* ```typescript
* const byPriceDesc = sortBy(products, 'price', 'desc');
* ```
*/
export function sortBy<T extends Record<string, any>, K extends keyof T>(
data: T[],
column: K,
direction: SortDirection = 'asc'
): T[] {
return CSV.fromData(data).sortBy(column, direction).toArray();
}
/**
* Calculate aggregate values for a column
* @param data - Array of objects to aggregate
* @param column - The column to aggregate
* @param operation - The aggregation operation
* @returns The calculated value
* @example
* ```typescript
* const totalRevenue = aggregate(sales, 'amount', 'sum');
* const averagePrice = aggregate(products, 'price', 'avg');
* ```
*/
export function aggregate<T extends Record<string, any>, K extends keyof T>(
data: T[],
column: K,
operation: AggregateOperation = 'sum'
): number {
return CSV.fromData(data).aggregate(column, operation);
}
/**
* Get unique values from a column
* @param data - Array of objects to process
* @param column - The column to get values from
* @returns Array of unique values
* @example
* ```typescript
* const categories = distinct(products, 'category');
* ```
*/
export function distinct<T extends Record<string, any>, K extends keyof T>(
data: T[],
column: K
): Array<T[K]> {
return CSV.fromData(data).distinct(column);
}
/**
* Create a pivot table from the data
* @param data - Array of objects to pivot
* @param rowColumn - Column for row labels
* @param colColumn - Column for column labels
* @param valueColumn - Column for values
* @returns Pivot table as nested object
* @example
* ```typescript
* const salesByProductAndMonth = pivot(sales, 'product', 'month', 'amount');
* ```
*/
export function pivot<T extends Record<string, any>>(
data: T[],
rowColumn: keyof T,
colColumn: keyof T,
valueColumn: keyof T
): Record<string, Record<string, unknown>> {
return CSV.fromData(data).pivot(rowColumn, colColumn, valueColumn);
}
/**
* Merge two datasets
* @param dataA - First array of objects
* @param dataB - Second array of objects
* @param equalityFn - Function to determine equality
* @param mergeFn - Function to merge equal rows
* @returns Merged array of objects
* @example
* ```typescript
* const merged = merge(
* localInventory,
* warehouseInventory,
* (a, b) => a.id === b.id,
* (a, b) => ({ ...a, stock: a.stock + b.stock })
* );
* ```
*/
export function merge<T extends Record<string, any>, E extends Record<string, any>>(
dataA: T[],
dataB: E[],
equalityFn: EqualityCallback<T | E>,
mergeFn: MergeCallback<T, E>
): T[] {
return CSV.fromData(dataA).mergeWith(dataB, equalityFn, mergeFn).toArray();
}
/**
* Sample rows from the data
* @param data - Array of objects to sample
* @param count - Number of rows to sample (default: 1)
* @returns Sampled array of objects
* @example
* ```typescript
* const randomSample = sample(products, 3);
* ```
*/
export function sample<T extends Record<string, any>>(
data: T[],
count: number = 1
): T[] {
return CSV.fromData(data).sample(count).toArray();
}
/**
* Get the first n rows
* @param data - Array of objects
* @param count - Number of rows to get
* @returns First n rows
* @example
* ```typescript
* const topProducts = head(products, 5);
* ```
*/
export function head<T extends Record<string, any>>(
data: T[],
count: number = 10
): T[] {
return CSV.fromData(data).head(count).toArray();
}
/**
* Get the last n rows
* @param data - Array of objects
* @param count - Number of rows to get
* @returns Last n rows
* @example
* ```typescript
* const lastOrders = tail(orders, 5);
* ```
*/
export function tail<T extends Record<string, any>>(
data: T[],
count: number = 10
): T[] {
return CSV.fromData(data).tail(count).toArray();
}
/**
* Creates a base row with the structure of the data
* @param data - Array of objects
* @param defaults - Optional default values
* @returns A new object with the data structure
* @example
* ```typescript
* const template = getBaseRow(products);
* const template = getBaseRow(products, { inStock: true });
* ```
*/
export function getBaseRow<T extends Record<string, any>, R extends { [K in keyof T]?: any } = { [K in keyof T]?: undefined }>(
data: T[],
defaults?: Partial<T>
): R {
if (data.length === 0) {
throw new CSVError('Cannot create base row from empty data');
}
return CSV.fromData(data).getBaseRow(defaults);
}
/**
* Create a new row with the structure of the data
* @param data - Template array of objects
* @param rowData - The data to populate the row with
* @returns A new object with all data fields
* @example
* ```typescript
* const newProduct = createRow(products, {
* id: 'P006',
* name: 'Headphones',
* price: 79.99
* });
* ```
*/
export function createRow<T extends Record<string, any>>(
data: T[],
rowData: Partial<T> = {}
): T {
return CSV.fromData(data).createRow(rowData);
}
/**
* Map over an array of objects using the provided function
* Shorthand for data.map(), but included for consistency
* @param data - Array of objects
* @param mapFn - Mapping function
* @returns Mapped array
* @example
* ```typescript
* const prices = mapData(products, p => p.price);
* ```
*/
export function mapData<T extends Record<string, any>, R>(
data: T[],
mapFn: (row: T, index: number) => R
): R[] {
return data.map(mapFn);
}
/**
* Filter an array of objects using the provided predicate
* Shorthand for data.filter(), but included for consistency
* @param data - Array of objects
* @param predicate - Filter predicate
* @returns Filtered array
* @example
* ```typescript
* const availableProducts = filterData(products, p => p.stock > 0);
* ```
*/
export function filterData<T extends Record<string, any>>(
data: T[],
predicate: (row: T, index: number) => boolean
): T[] {
return data.filter(predicate);
}
/**
* Reduce an array of objects to a single value
* Shorthand for data.reduce(), but included for consistency
* @param data - Array of objects
* @param reduceFn - Reducer function
* @param initialValue - Initial value
* @returns Reduced value
* @example
* ```typescript
* const totalValue = reduceData(
* products,
* (total, p) => total + (p.price * p.stock),
* 0
* );
* ```
*/
export function reduceData<T extends Record<string, any>, R>(
data: T[],
reduceFn: (acc: R, row: T, index: number) => R,
initialValue: R
): R {
return data.reduce(reduceFn, initialValue);
}
// Export array transformation utilities
export const arrayTransformations = {
/**
* Transform arrays to structured objects
* @param data - Array of arrays or objects
* @param headerMap - Mapping configuration
* @param headerRow - Optional header row
* @returns Array of structured objects
*/
get arrayToObjArray() { return CSVArrayUtils.arrayToObjArray; },
/**
* Transform objects to arrays
* @param data - Array of structured objects
* @param headerMap - Mapping configuration
* @param headers - Column headers
* @param includeHeaders - Whether to include headers
* @returns Array of arrays
*/
get objArrayToArray() { return CSVArrayUtils.objArrayToArray; },
/**
* Group objects by field
* @param data - Array of objects
* @param field - Field to group by
* @returns Grouped objects
*/
get groupByField() { return CSVArrayUtils.groupByField; }
};
/**
* Get the number of rows in the data
* @param data - Array of objects
* @returns The number of rows
* @example
* ```typescript
* const rowCount = count(products);
* ```
*/
export function count<T extends Record<string, any>>(
data: T[]
): number {
return data.length;
}
/**
* Converts data to a CSV string
* @param data - Array of objects to convert
* @param options - Stringify options
* @returns CSV content as a string
* @example
* ```typescript
* const csvData = toString(products, { header: true });
* ```
*/
export function toString<T extends Record<string, any>>(
data: T[],
options: Parameters<typeof stringifyCSV>[1] = { header: true }
): string {
try {
return stringifyCSV(data, options);
} catch (error) {
throw new CSVError('Failed to convert data to CSV string', error);
}
}
/**
* Validates data against a schema
* @param data - Array of objects to validate
* @param schema - The schema configuration to use for validation
* @returns The validated data
* @example
* ```typescript
* const validatedProducts = validate(products, {
* type: 'standard',
* version: 1,
* mode: 'strict',
* schema: {
* id: { type: 'string', required: true },
* price: { type: 'number', required: true }
* }
* });
* ```
*/
export function validate<T extends Record<string, any>, U extends Record<string, any> = T>(
data: T[],
schema: CSVSchemaConfig<U>
): U[] {
if (data.length === 0) {
return [];
}
return CSV.fromData(data).validate(schema).toArray();
}
/**
* Process each row with a callback function
* @param data - Array of objects to process
* @param callback - Function to process each row
* @example
* ```typescript
* forEach(products, (product, index) => {
* console.log(`Product ${index}: ${product.name}`);
* });
* ```
*/
export function forEach<T extends Record<string, any>>(
data: T[],
callback: (row: T, index: number) => void
): void {
data.forEach(callback);
}
/**
* Process rows with an async callback
* @param data - Array of objects to process
* @param callback - Async function to process each row
* @param options - Options for batch processing
* @returns Promise that resolves when processing is complete
* @example
* ```typescript
* await forEachAsync(products, async (product) => {
* await api.updateProduct(product.id, product);
* }, { batchSize: 5 });
* ```
*/
export async function forEachAsync<T extends Record<string, any>>(
data: T[],
callback: (row: T, index: number) => Promise<void>,
options: { batchSize?: number; batchConcurrency?: number } = {}
): Promise<void> {
const batchSize = options.batchSize || 1;
const batchConcurrency = options.batchConcurrency || 1;
if (batchSize <= 1 && batchConcurrency <= 1) {
// Original sequential processing
for (let i = 0; i < data.length; i++) {
await callback(data[i], i);
}
return;
}
// Process data in batches with concurrency
const batches: T[][] = [];
for (let i = 0; i < data.length; i += batchSize) {
batches.push(data.slice(i, i + batchSize));
}
// Process batches with controlled concurrency
for (let i = 0; i < batches.length; i += batchConcurrency) {
const batchPromises = batches.slice(i, i + batchConcurrency).map(async (batch, batchIndex) => {
const startIdx = i * batchSize + batchIndex * batchSize;
const promises = batch.map((row, rowIndex) =>
callback(row, startIdx + rowIndex)
);
await Promise.all(promises);
});
await Promise.all(batchPromises);
}
}
/**
* Map over rows asynchronously
* @param data - Array of objects to transform
* @param transformer - Async function to transform each row
* @param options - Optional batch processing options
* @returns Promise resolving to array of transformed results
* @example
* ```typescript
* const enrichedProducts = await mapAsync(products,
* async (product) => {
* const details = await api.getProductDetails(product.id);
* return { ...product, details };
* },
* { batchSize: 5 }
* );
* ```
*/
export async function mapAsync<T extends Record<string, any>, R>(
data: T[],
transformer: (row: T, index: number) => Promise<R>,
options?: { batchSize?: number }
): Promise<R[]> {
const result: R[] = [];
const batchSize = options?.batchSize || 50;
for (let i = 0; i < data.length; i += batchSize) {
const batch = data.slice(i, i + batchSize);
const batchResults = await Promise.all(
batch.map((row, index) => transformer(row, i + index))
);
result.push(...batchResults);
}
return result;
}
/**
* Reduce rows asynchronously
* @param data - Array of objects to reduce
* @param reducer - Async reducer function
* @param initialValue - Initial accumulator value
* @param options - Optional processing options
* @returns Promise resolving to final accumulated value
* @example
* ```typescript
* const totalRevenue = await reduceAsync(
* orders,
* async (total, order) => {
* const exchangeRate = await getExchangeRate(order.currency);
* return total + (order.amount * exchangeRate);
* },
* 0
* );
* ```
*/
export async function reduceAsync<T extends Record<string, any>, R>(
data: T[],
reducer: (accumulator: R, row: T, index: number) => Promise<R>,
initialValue: R,
options?: { strategy?: 'sequential' | 'mapreduce', batchSize?: number }
): Promise<R> {
const strategy = options?.strategy || 'sequential';
const batchSize = options?.batchSize || 100;
if (strategy === 'sequential') {
// Simple sequential reduction
let accumulator = initialValue;
for (let i = 0; i < data.length; i++) {
accumulator = await reducer(accumulator, data[i], i);
}
return accumulator;
} else {
// Map-reduce strategy for better parallelism
// First map: Process items in batches
const batches = [];
for (let i = 0; i < data.length; i += batchSize) {
batches.push(data.slice(i, i + batchSize));
}
// Process each batch in parallel with its own accumulator
const batchResults = await Promise.all(
batches.map(async (batch, batchIndex) => {
let batchAccumulator = initialValue;
for (let i = 0; i < batch.length; i++) {
const index = batchIndex * batchSize + i;
batchAccumulator = await reducer(batchAccumulator, batch[i], index);
}
return batchAccumulator;
})
);
// Then reduce: Combine batch results
let finalResult = initialValue;
for (const result of batchResults) {
finalResult = await reducer(finalResult, result as unknown as T, -1);
}
return finalResult;
}
}
/**
* Sorts rows by a column using worker threads for large datasets
* @param data - Array of objects to sort
* @param column - The column to sort by
* @param direction - Sort direction (default: 'asc')
* @returns Promise resolving to sorted array
* @example
* ```typescript
* const sortedProducts = await sortByAsync(products, 'price', 'desc');
* ```
*/
export async function sortByAsync<T extends Record<string, any>, K extends keyof T>(
data: T[],
column: K,
direction: SortDirection = 'asc'
): Promise<T[]> {
// For small datasets, just use the regular sort
if (data.length <= 10000) {
return sortBy(data, column, direction);
}
// Define the compare function based on column and direction
const compare = (a: T, b: T): number => {
const aVal = a[column];
const bVal = b[column];
// Handle numeric values
if (typeof aVal === 'number' && typeof bVal === 'number') {
return direction === 'asc' ? aVal - bVal : bVal - aVal;
}
// Default string comparison
const aStr = String(aVal);
const bStr = String(bVal);
const comparison = aStr.localeCompare(bStr);
return direction === 'asc' ? comparison : -comparison;
};
// Perform parallel sorting with merge
try {
const cpuCount = require('os').cpus().length;
const workerCount = Math.min(cpuCount, 4); // Limit to 4 workers max
// Split into chunks for parallel processing
const chunkSize = Math.ceil(data.length / workerCount);
const chunks: T[][] = [];
for (let i = 0; i < data.length; i += chunkSize) {
chunks.push(data.slice(i, i + chunkSize));
}
// Sort each chunk (could be parallelized in workers)
const sortedChunks = await Promise.all(
chunks.map(chunk => {
return Promise.resolve([...chunk].sort(compare));
})
);
// Merge the sorted chunks (k-way merge)
return mergeKSortedArrays(sortedChunks, compare);
} catch (error) {
// Fallback to synchronous sort if something goes wrong
console.warn('Parallel sort failed, falling back to synchronous sort:', error);
return [...data].sort(compare);
}
}
/**
* Merges K sorted arrays into a single sorted array
* @param arrays - Array of sorted arrays
* @param compare - Compare function for sorting
* @returns Single sorted array
* @private
*/
function mergeKSortedArrays<T>(arrays: T[][], compare: (a: T, b: T) => number): T[] {
if (arrays.length === 0) return [];
if (arrays.length === 1) return arrays[0];
// Helper to merge two sorted arrays
const mergeTwoArrays = (a: T[], b: T[]): T[] => {
const result: T[] = [];
let i = 0, j = 0;
while (i < a.length && j < b.length) {
if (compare(a[i], b[j]) <= 0) {
result.push(a[i]);
i++;
} else {
result.push(b[j]);
j++;
}
}
// Add remaining elements
while (i < a.length) result.push(a[i++]);
while (j < b.length) result.push(b[j++]);
return result;
};
// Use a divide-and-conquer approach to merge all arrays
const mergeArrays = (start: number, end: number): T[] => {
if (start === end) {
return arrays[start];
}
if (end - start === 1) {
return mergeTwoArrays(arrays[start], arrays[end]);
}
const mid = Math.floor((start + end) / 2);
const left = mergeArrays(start, mid);
const right = mergeArrays(mid + 1, end);
return mergeTwoArrays(left, right);
};
return mergeArrays(0, arrays.length - 1);
}
/**
* Get the first n rows (alias for head)
* @param data - Array of objects
* @param count - Number of rows to get
* @returns First n rows
* @example
* ```typescript
* const topProducts = take(products, 5);
* ```
*/
export function take<T extends Record<string, any>>(
data: T[],
count: number = 10
): T[] {
return head(data, count);
}
// Export all the functions as a default object
export default {
findRow,
findRowByRegex,
findRows,
findRowWhere,
findRowsWhere,
findSimilarRows,
findMostSimilarRow,
groupBy,
update,
updateWhere,
updateColumn,
transform,
removeWhere,
append,
sortBy,
sortByAsync,
aggregate,
distinct,
pivot,
merge,
sample,
head,
tail,
take,
count,
toString,
validate,
forEach,
forEachAsync,
mapAsync,
reduceAsync,
getBaseRow,
createRow,
mapData,
filterData,
reduceData,
addColumn,
removeColumn,
renameColumn,
reorderColumns,
castColumnType,
deduplicate,
split,
join,
unpivot,
fillMissingValues,
normalizeText,
trimWhitespace,
arrayTransformations
};