UNPKG

transformers-fork

Version:

State-of-the-art Machine Learning for the web. Run 🤗 Transformers directly in your browser, with no need for a server!

github.com/huggingface/transformers.js

huggingface/transformers.js

389 lines (338 loc) • 13.9 kB

JavaScript

/** * @module generation/configuration_utils */ import { pick } from "../utils/core.js"; /** * Class that holds a configuration for a generation task. */ export class GenerationConfig { // Parameters that control the length of the output /** * The maximum length the generated tokens can have. * Corresponds to the length of the input prompt + `max_new_tokens`. * Its effect is overridden by `max_new_tokens`, if also set. * @type {number} * @default 20 */ max_length = 20; /** * The maximum numbers of tokens to generate, ignoring the number of tokens in the prompt. * @type {number} * @default null */ max_new_tokens = null; /** * The minimum length of the sequence to be generated. * Corresponds to the length of the input prompt + `min_new_tokens`. * Its effect is overridden by `min_new_tokens`, if also set. * @type {number} * @default 0 */ min_length = 0; /** * The minimum numbers of tokens to generate, ignoring the number of tokens in the prompt. * @type {number} * @default null */ min_new_tokens = null; /** * Controls the stopping condition for beam-based methods, like beam-search. It accepts the following values: * - `true`, where the generation stops as soon as there are `num_beams` complete candidates; * - `false`, where an heuristic is applied and the generation stops when is it very unlikely to find better candidates; * - `"never"`, where the beam search procedure only stops when there cannot be better candidates (canonical beam search algorithm). * @type {boolean|"never"} * @default false */ early_stopping = false; /** * The maximum amount of time you allow the computation to run for in seconds. * Generation will still finish the current pass after allocated time has been passed. * @type {number} * @default null */ max_time = null; // Parameters that control the generation strategy used /** * Whether or not to use sampling; use greedy decoding otherwise. * @type {boolean} * @default false */ do_sample = false; /** * Number of beams for beam search. 1 means no beam search. * @type {number} * @default 1 */ num_beams = 1; /** * Number of groups to divide `num_beams` into in order to ensure diversity among different groups of beams. * See [this paper](https://arxiv.org/pdf/1610.02424.pdf) for more details. * @type {number} * @default 1 */ num_beam_groups = 1; /** * The values balance the model confidence and the degeneration penalty in contrastive search decoding. * @type {number} * @default null */ penalty_alpha = null; /** * Whether or not the model should use the past last key/values attentions (if applicable to the model) to speed up decoding. * @type {boolean} * @default true */ use_cache = true; // Parameters for manipulation of the model output logits /** * The value used to modulate the next token probabilities. * @type {number} * @default 1.0 */ temperature = 1.0; /** * The number of highest probability vocabulary tokens to keep for top-k-filtering. * @type {number} * @default 50 */ top_k = 50; /** * If set to float < 1, only the smallest set of most probable tokens with probabilities that add up to `top_p` or higher are kept for generation. * @type {number} * @default 1.0 */ top_p = 1.0; /** * Local typicality measures how similar the conditional probability of predicting a target token next is to the expected conditional probability of predicting a random token next, given the partial text already generated. * If set to float < 1, the smallest set of the most locally typical tokens with probabilities that add up to `typical_p` or higher are kept for generation. * See [this paper](https://arxiv.org/pdf/2202.00666.pdf) for more details. * @type {number} * @default 1.0 */ typical_p = 1.0; /** * If set to float strictly between 0 and 1, only tokens with a conditional probability greater than `epsilon_cutoff` will be sampled. * In the paper, suggested values range from 3e-4 to 9e-4, depending on the size of the model. * See [Truncation Sampling as Language Model Desmoothing](https://arxiv.org/abs/2210.15191) for more details. * @type {number} * @default 0.0 */ epsilon_cutoff = 0.0; /** * Eta sampling is a hybrid of locally typical sampling and epsilon sampling. * If set to float strictly between 0 and 1, a token is only considered if it is greater than either `eta_cutoff` or `sqrt(eta_cutoff) * exp(-entropy(softmax(next_token_logits)))`. * The latter term is intuitively the expected next token probability, scaled by `sqrt(eta_cutoff)`. In the paper, suggested values range from 3e-4 to 2e-3, depending on the size of the model. * See [Truncation Sampling as Language Model Desmoothing](https://arxiv.org/abs/2210.15191) for more details. * @type {number} * @default 0.0 */ eta_cutoff = 0.0; /** * This value is subtracted from a beam's score if it generates a token same as any beam from other group at a particular time. * Note that `diversity_penalty` is only effective if `group beam search` is enabled. * @type {number} * @default 0.0 */ diversity_penalty = 0.0; /** * The parameter for repetition penalty. 1.0 means no penalty. * See [this paper](https://arxiv.org/pdf/1909.05858.pdf) for more details. * @type {number} * @default 1.0 */ repetition_penalty = 1.0; /** * The paramater for encoder_repetition_penalty. * An exponential penalty on sequences that are not in the original input. * 1.0 means no penalty. * @type {number} * @default 1.0 */ encoder_repetition_penalty = 1.0; /** * Exponential penalty to the length that is used with beam-based generation. * It is applied as an exponent to the sequence length, which in turn is used to divide the score of the sequence. * Since the score is the log likelihood of the sequence (i.e. negative), `length_penalty` > 0.0 promotes longer sequences, while `length_penalty` < 0.0 encourages shorter sequences. * @type {number} * @default 1.0 */ length_penalty = 1.0; /** * If set to int > 0, all ngrams of that size can only occur once. * @type {number} * @default 0 */ no_repeat_ngram_size = 0; /** * List of token ids that are not allowed to be generated. * In order to get the token ids of the words that should not appear in the generated text, use * `tokenizer(bad_words, { add_prefix_space: true, add_special_tokens: false }).input_ids`. * @type {number[][]} * @default null */ bad_words_ids = null; /** * List of token ids that must be generated. * If given a `number[][]`, this is treated as a simple list of words that must be included, the opposite to `bad_words_ids`. * If given `number[][][]`, this triggers a [disjunctive constraint](https://github.com/huggingface/transformers/issues/14081), where one can allow different forms of each word. * @type {number[][]|number[][][]} * @default null */ force_words_ids = null; /** * Whether to renormalize the logits after applying all the logits processors or warpers (including the custom ones). * It's highly recommended to set this flag to `true` as the search algorithms suppose the score logits are normalized but some logit processors or warpers break the normalization. * @type {boolean} * @default false */ renormalize_logits = false; /** * Custom constraints that can be added to the generation to ensure that the output will contain the use of certain tokens as defined by `Constraint` objects, in the most sensible way possible. * @type {Object[]} * @default null */ constraints = null; /** * The id of the token to force as the first generated token after the `decoder_start_token_id`. * Useful for multilingual models like mBART where the first generated token needs to be the target language token. * @type {number} * @default null */ forced_bos_token_id = null; /** * The id of the token to force as the last generated token when `max_length` is reached. * Optionally, use a list to set multiple *end-of-sequence* tokens. * @type {number|number[]} * @default null */ forced_eos_token_id = null; /** * Whether to remove possible *nan* and *inf* outputs of the model to prevent the generation method to crash. Note that using `remove_invalid_values` can slow down generation. * @type {boolean} */ remove_invalid_values = false; /** * This Tuple adds an exponentially increasing length penalty, after a certain amount of tokens have been generated. * The tuple shall consist of: `(start_index, decay_factor)` where `start_index` indicates where penalty starts and `decay_factor` represents the factor of exponential decay. * @type {[number, number]} * @default null */ exponential_decay_length_penalty = null; /** * A list of tokens that will be suppressed at generation. * The `SuppressTokens` logit processor will set their log probs to `-inf` so that they are not sampled. * @type {number[]} * @default null */ suppress_tokens = null; /** * A streamer that will be used to stream the generation. * @type {import('./streamers.js').TextStreamer} * @default null */ streamer = null; /** * A list of tokens that will be suppressed at the beginning of the generation. * The `SuppressBeginTokens` logit processor will set their log probs to `-inf` so that they are not sampled. * @type {number[]} * @default null */ begin_suppress_tokens = null; /** * A list of pairs of integers which indicates a mapping from generation indices to token indices that will be forced before sampling. * For example, `[[1, 123]]` means the second generated token will always be a token of index 123. * @type {[number, number][]} * @default null */ forced_decoder_ids = null; /** * The guidance scale for classifier free guidance (CFG). CFG is enabled by setting `guidance_scale > 1`. * Higher guidance scale encourages the model to generate samples that are more closely linked to the input * prompt, usually at the expense of poorer quality. * @type {number} * @default null */ guidance_scale = null; // Parameters that define the output variables of `generate` /** * The number of independently computed returned sequences for each element in the batch. * @type {number} * @default 1 */ num_return_sequences = 1; /** * Whether or not to return the attentions tensors of all attention layers. * See `attentions` under returned tensors for more details. * @type {boolean} * @default false */ output_attentions = false; /** * Whether or not to return the hidden states of all layers. * See `hidden_states` under returned tensors for more details. * @type {boolean} * @default false */ output_hidden_states = false; /** * Whether or not to return the prediction scores. * See `scores` under returned tensors for more details. * @type {boolean} * @default false */ output_scores = false; /** * Whether or not to return a `ModelOutput` instead of a plain tuple. * @type {boolean} * @default false */ return_dict_in_generate = false; // Special tokens that can be used at generation time /** * The id of the *padding* token. * @type {number} * @default null */ pad_token_id = null; /** * The id of the *beginning-of-sequence* token. * @type {number} * @default null */ bos_token_id = null; /** * The id of the *end-of-sequence* token. * Optionally, use a list to set multiple *end-of-sequence* tokens. * @type {number|number[]} * @default null */ eos_token_id = null; // Generation parameters exclusive to encoder-decoder models /** * If set to int > 0, all ngrams of that size that occur in the `encoder_input_ids` cannot occur in the `decoder_input_ids`. * @type {number} * @default 0 */ encoder_no_repeat_ngram_size = 0; /** * If an encoder-decoder model starts decoding with a different token than *bos*, the id of that token. * @type {number} * @default null */ decoder_start_token_id = null; // Wild card /** * Additional generation kwargs will be forwarded to the `generate` function of the model. * Kwargs that are not present in `generate`'s signature will be used in the model forward pass. * @type {Object} * @default {} */ generation_kwargs = {}; /** * * @param {GenerationConfig|import('../configs.js').PretrainedConfig} config */ constructor(config) { Object.assign(this, pick(config, Object.getOwnPropertyNames(this))); } }