@atjsh/llmlingua-2

Version:

JavaScript/TypeScript Implementation of LLMLingua-2

89 lines • 3.56 kB

TypeScript

/** * @categoryDescription Adaptors * A collection of utility functions and types for model-specific token handling. * * @categoryDescription Utils * A collection of utility functions. */ /** * Type definition for a logger function. * * @category Utils */ export type Logger = (...message: unknown[]) => void; /** * Implementation on `get_pure_token` function of original LLMLingua implementation. * @see [Original Implementation](https://github.com/microsoft/LLMLingua/blob/e4e172afb42d8ae3c0b6cb271a3f5d6a812846a0/llmlingua/utils.py#L108) * * @category Adaptors */ export type GetPureTokenFunction = (token: string | null | undefined) => string; /** * Implementation of `GetPureTokenFunction` for "XLM-RoBERTa Large" model. * * @category Adaptors */ export declare const get_pure_tokens_xlm_roberta_large: GetPureTokenFunction; /** * Implementation of `GetPureTokenFunction` for "BERT Base Multilingual Cased" model. * * @category Adaptors */ export declare const get_pure_tokens_bert_base_multilingual_cased: GetPureTokenFunction; /** * Implementation on `is_begin_of_new_word` function of original LLMLingua implementation. * @see [Original Implementation](https://github.com/microsoft/LLMLingua/blob/e4e172afb42d8ae3c0b6cb271a3f5d6a812846a0/llmlingua/utils.py#L81) * * @category Adaptors */ export type IsBeginOfNewWordFunction = (token: string | null | undefined, force_tokens?: string[], token_map?: Record<string, string>) => boolean; /** * Implementation of `IsBeginOfNewWordFunction` for "XLM-RoBERTa Large" model. * * @category Adaptors */ export declare const is_begin_of_new_word_xlm_roberta_large: IsBeginOfNewWordFunction; /** * Implementation of `IsBeginOfNewWordFunction` for "BERT Base Multilingual Cased" model. * * @category Adaptors */ export declare const is_begin_of_new_word_bert_base_multilingual_cased: IsBeginOfNewWordFunction; /** * Implementation on `replace_added_token` function of original LLMLingua implementation. * @see [Original Implementation](https://github.com/microsoft/LLMLingua/blob/e4e172afb42d8ae3c0b6cb271a3f5d6a812846a0/llmlingua/utils.py#L102) * * @category Utils */ export declare function replace_added_token(token: string, token_map: Record<string, string>): string; /** * Calculate the **p-th percentile** of a numeric array. * * The function follows the “inclusive” linear-interpolation rule used by Excel’s * `PERCENTILE.INC` and NumPy’s default percentile implementation: * * 1. The input array is **copied and sorted** (ascending) so the original order * is preserved. * 2. An index `k = (n − 1) × (p / 100)` is computed, where `n` is the array’s * length. * 3. If `k` is an integer, the element at that index is the percentile. * Otherwise, the result is the linear interpolation between the two nearest * ranks (`⌊k⌋` and `⌈k⌉`). * * @param {number[]} arr – Source data. The function does **not** mutate it. * @param {number} p – Desired percentile (0 ≤ `p` ≤ 100). * @returns {number} The computed percentile value. If the array is empty, * the function returns `0`. * * @throws {RangeError} If `p` is outside the 0–100 range. * * @example * const data = [7, 15, 36, 39, 40, 41]; * percentile(data, 25); // → 15 (1st quartile) * percentile(data, 50); // → 37.5 (median with interpolation) * percentile(data, 90); // → 40.5 * * @category Utils */ export declare function percentile(arr: number[], p: number): number; //# sourceMappingURL=utils.d.ts.map