UNPKG

@atjsh/llmlingua-2

Version:

JavaScript/TypeScript Implementation of LLMLingua-2

221 lines 6.88 kB
/** * @categoryDescription Core * Class & functions for customized use of prompt compression */ import { PreTrainedModel, PreTrainedTokenizer } from "@huggingface/transformers"; import { Tiktoken } from "js-tiktoken/lite"; import { GetPureTokenFunction, IsBeginOfNewWordFunction, Logger } from "./utils.js"; /** * Options for compressing prompts. * * @category Core */ export interface CompressPromptOptions { /** * Float value between 0 and 1 indicating the rate of compression. * 0.1 means 10% of the original tokens will be kept */ rate: number; /** * Target number of tokens to keep after compression. * If set, this will override the `rate` option. * * @defaultValue `-1` (no target) */ targetToken?: number; /** * How to convert token probabilities to word probabilities. * "mean" will average the probabilities of tokens in a word, * "first" will take the probability of the first token in a word. * * @defaultValue `"mean"` */ tokenToWord?: "mean" | "first"; /** * List of tokens that must be kept in the compressed prompt. * These tokens will not be removed regardless of their probability. * * @defaultValue `[]` */ forceTokens?: string[]; /** * If true, reserve a digit for forced tokens. * * @defaultValue `false` */ forceReserveDigit?: boolean; /** * If true, drop consecutive tokens that are forced. * This is useful to avoid keeping too many forced tokens in a row. * * @alpha * @defaultValue `false` */ dropConsecutive?: boolean; /** * List of tokens that indicate the end of a chunk. * The context will be split into chunks at these tokens. * @defaultValue `[".", "\n"]` */ chunkEndTokens?: string[]; } /** * Options for compressing prompts. * * @category Core */ export interface CompressPromptOptionsSnakeCase { /** * Float value between 0 and 1 indicating the rate of compression. * 0.1 means 10% of the original tokens will be kept * * @group Events */ rate: number; /** * Target number of tokens to keep after compression. * If set, this will override the `rate` option. * * @defaultValue `-1` (no target) */ target_token?: number; /** * How to convert token probabilities to word probabilities. * "mean" will average the probabilities of tokens in a word, * "first" will take the probability of the first token in a word. * * @defaultValue `"mean"` */ token_to_Word?: "mean" | "first"; /** * List of tokens that must be kept in the compressed prompt. * These tokens will not be removed regardless of their probability. * * @defaultValue `[]` */ force_tokens?: string[]; /** * If true, reserve a digit for forced tokens. * * @defaultValue `false` */ force_reserve_digit?: boolean; /** * If true, drop consecutive tokens that are forced. * This is useful to avoid keeping too many forced tokens in a row. * * @alpha * @defaultValue `false` */ drop_consecutive?: boolean; /** * List of tokens that indicate the end of a chunk. * The context will be split into chunks at these tokens. * @defaultValue `[".", "\n"]` */ chunk_end_tokens?: string[]; } /** * The TypeScript implementation on original `PromptCompressor`, which is a class for compressing prompts using a language model. * * @see [Original Implementation](https://github.com/microsoft/LLMLingua/blob/e4e172afb42d8ae3c0b6cb271a3f5d6a812846a0/llmlingua/prompt_compressor.py) * @category Core */ export declare class PromptCompressorLLMLingua2 { /** * The pre-trained model to use for compression. */ private readonly model; /** * The pre-trained tokenizer to use for compression. */ private readonly tokenizer; /** * Function to get the pure token from a token. * This is used to normalize tokens before processing. */ private readonly getPureToken; /** * Function to check if a token is the beginning of a new word. * This is used to determine how to merge tokens into words. */ private readonly isBeginOfNewWord; /** * The tokenizer to use calculating the compression rate. */ private readonly oaiTokenizer; /** * Configuration for LLMLingua2. */ private readonly llmlingua2Config; /** * Logger function to log messages. */ private readonly logger; private addedTokens; private specialTokens; constructor( /** * The pre-trained model to use for compression. */ model: PreTrainedModel, /** * The pre-trained tokenizer to use for compression. */ tokenizer: PreTrainedTokenizer, /** * Function to get the pure token from a token. * This is used to normalize tokens before processing. */ getPureToken: GetPureTokenFunction, /** * Function to check if a token is the beginning of a new word. * This is used to determine how to merge tokens into words. */ isBeginOfNewWord: IsBeginOfNewWordFunction, /** * The tokenizer to use calculating the compression rate. */ oaiTokenizer: Tiktoken, /** * Configuration for LLMLingua2. */ llmlingua2Config?: { /** * Maximum batch size for processing prompts. * This is used to limit the number of prompts processed in a single batch. */ max_batch_size: number; /** * Maximum number of tokens to force in the compression. * This is used to ensure that certain tokens are always included in the compressed prompt. */ max_force_token: number; /** * Maximum sequence length for the model. * This is used to limit the length of the input sequences to the model. */ max_seq_length: number; }, /** * Logger function to log messages. */ logger?: Logger); /** * Compresses a prompt based on the given options. */ compress(context: string, { rate, targetToken, tokenToWord, forceTokens, forceReserveDigit, dropConsecutive, chunkEndTokens, }: CompressPromptOptions): Promise<string>; /** * Compresses a prompt based on the given options. Alias for `compress`, but uses snake_case for options. * * @alias compress */ compress_prompt(context: string, options: CompressPromptOptionsSnakeCase): Promise<string>; private compressSingleContext; private chunkContext; private getTokenLength; private mergeTokenToWord; private tokenProbToWordProb; private compressContexts; } //# sourceMappingURL=prompt-compressor.d.ts.map