@atjsh/llmlingua-2
Version:
JavaScript/TypeScript Implementation of LLMLingua-2
221 lines • 6.88 kB
TypeScript
/**
* @categoryDescription Core
* Class & functions for customized use of prompt compression
*/
import { PreTrainedModel, PreTrainedTokenizer } from "@huggingface/transformers";
import { Tiktoken } from "js-tiktoken/lite";
import { GetPureTokenFunction, IsBeginOfNewWordFunction, Logger } from "./utils.js";
/**
* Options for compressing prompts.
*
* @category Core
*/
export interface CompressPromptOptions {
/**
* Float value between 0 and 1 indicating the rate of compression.
* 0.1 means 10% of the original tokens will be kept
*/
rate: number;
/**
* Target number of tokens to keep after compression.
* If set, this will override the `rate` option.
*
* @defaultValue `-1` (no target)
*/
targetToken?: number;
/**
* How to convert token probabilities to word probabilities.
* "mean" will average the probabilities of tokens in a word,
* "first" will take the probability of the first token in a word.
*
* @defaultValue `"mean"`
*/
tokenToWord?: "mean" | "first";
/**
* List of tokens that must be kept in the compressed prompt.
* These tokens will not be removed regardless of their probability.
*
* @defaultValue `[]`
*/
forceTokens?: string[];
/**
* If true, reserve a digit for forced tokens.
*
* @defaultValue `false`
*/
forceReserveDigit?: boolean;
/**
* If true, drop consecutive tokens that are forced.
* This is useful to avoid keeping too many forced tokens in a row.
*
* @alpha
* @defaultValue `false`
*/
dropConsecutive?: boolean;
/**
* List of tokens that indicate the end of a chunk.
* The context will be split into chunks at these tokens.
* @defaultValue `[".", "\n"]`
*/
chunkEndTokens?: string[];
}
/**
* Options for compressing prompts.
*
* @category Core
*/
export interface CompressPromptOptionsSnakeCase {
/**
* Float value between 0 and 1 indicating the rate of compression.
* 0.1 means 10% of the original tokens will be kept
*
* @group Events
*/
rate: number;
/**
* Target number of tokens to keep after compression.
* If set, this will override the `rate` option.
*
* @defaultValue `-1` (no target)
*/
target_token?: number;
/**
* How to convert token probabilities to word probabilities.
* "mean" will average the probabilities of tokens in a word,
* "first" will take the probability of the first token in a word.
*
* @defaultValue `"mean"`
*/
token_to_Word?: "mean" | "first";
/**
* List of tokens that must be kept in the compressed prompt.
* These tokens will not be removed regardless of their probability.
*
* @defaultValue `[]`
*/
force_tokens?: string[];
/**
* If true, reserve a digit for forced tokens.
*
* @defaultValue `false`
*/
force_reserve_digit?: boolean;
/**
* If true, drop consecutive tokens that are forced.
* This is useful to avoid keeping too many forced tokens in a row.
*
* @alpha
* @defaultValue `false`
*/
drop_consecutive?: boolean;
/**
* List of tokens that indicate the end of a chunk.
* The context will be split into chunks at these tokens.
* @defaultValue `[".", "\n"]`
*/
chunk_end_tokens?: string[];
}
/**
* The TypeScript implementation on original `PromptCompressor`, which is a class for compressing prompts using a language model.
*
* @see [Original Implementation](https://github.com/microsoft/LLMLingua/blob/e4e172afb42d8ae3c0b6cb271a3f5d6a812846a0/llmlingua/prompt_compressor.py)
* @category Core
*/
export declare class PromptCompressorLLMLingua2 {
/**
* The pre-trained model to use for compression.
*/
private readonly model;
/**
* The pre-trained tokenizer to use for compression.
*/
private readonly tokenizer;
/**
* Function to get the pure token from a token.
* This is used to normalize tokens before processing.
*/
private readonly getPureToken;
/**
* Function to check if a token is the beginning of a new word.
* This is used to determine how to merge tokens into words.
*/
private readonly isBeginOfNewWord;
/**
* The tokenizer to use calculating the compression rate.
*/
private readonly oaiTokenizer;
/**
* Configuration for LLMLingua2.
*/
private readonly llmlingua2Config;
/**
* Logger function to log messages.
*/
private readonly logger;
private addedTokens;
private specialTokens;
constructor(
/**
* The pre-trained model to use for compression.
*/
model: PreTrainedModel,
/**
* The pre-trained tokenizer to use for compression.
*/
tokenizer: PreTrainedTokenizer,
/**
* Function to get the pure token from a token.
* This is used to normalize tokens before processing.
*/
getPureToken: GetPureTokenFunction,
/**
* Function to check if a token is the beginning of a new word.
* This is used to determine how to merge tokens into words.
*/
isBeginOfNewWord: IsBeginOfNewWordFunction,
/**
* The tokenizer to use calculating the compression rate.
*/
oaiTokenizer: Tiktoken,
/**
* Configuration for LLMLingua2.
*/
llmlingua2Config?: {
/**
* Maximum batch size for processing prompts.
* This is used to limit the number of prompts processed in a single batch.
*/
max_batch_size: number;
/**
* Maximum number of tokens to force in the compression.
* This is used to ensure that certain tokens are always included in the compressed prompt.
*/
max_force_token: number;
/**
* Maximum sequence length for the model.
* This is used to limit the length of the input sequences to the model.
*/
max_seq_length: number;
},
/**
* Logger function to log messages.
*/
logger?: Logger);
/**
* Compresses a prompt based on the given options.
*/
compress(context: string, { rate, targetToken, tokenToWord, forceTokens, forceReserveDigit, dropConsecutive, chunkEndTokens, }: CompressPromptOptions): Promise<string>;
/**
* Compresses a prompt based on the given options. Alias for `compress`, but uses snake_case for options.
*
* @alias compress
*/
compress_prompt(context: string, options: CompressPromptOptionsSnakeCase): Promise<string>;
private compressSingleContext;
private chunkContext;
private getTokenLength;
private mergeTokenToWord;
private tokenProbToWordProb;
private compressContexts;
}
//# sourceMappingURL=prompt-compressor.d.ts.map