UNPKG

@huggingface/transformers

Version:

State-of-the-art Machine Learning for the web. Run 🤗 Transformers directly in your browser, with no need for a server!

90 lines (75 loc) • 3 kB
import { GenerationConfig } from "../../generation/configuration_utils.js"; export class WhisperGenerationConfig extends GenerationConfig { /** * Whether to return the timestamps with the text. This enables the `WhisperTimestampsLogitsProcessor`. * @type {boolean} */ return_timestamps = null; /** * Whether to return token-level timestamps * with the text. This can be used with or without the `return_timestamps` option. To get word-level * timestamps, use the tokenizer to group the tokens into words. * @type {boolean} */ return_token_timestamps = null; /** * The number of audio frames available in this chunk. This is only used generating word-level timestamps. * @type {number} */ num_frames = null; /** * Alignment heads to predict word-level timestamps. This is a list of [layer, head] pairs that * select the cross-attention heads that are highly correlated to word-level timing. * @type {[number, number][]} */ alignment_heads = null; /** * Task to use for generation, either "translate" or "transcribe". * @type {string} */ task = null; /** * Language token to use for generation, can be either in the form of `<|en|>`, `en` or `english`. * You can find all the possible language tokens in the `model.generation_config.lang_to_id` dictionary. * @type {string} */ language = null; /** * The id of the `"<|notimestamps|>"` token. * @type {number} */ no_timestamps_token_id = null; /** * Rank-1 list of token IDs created by passing text to [`~WhisperProcessor.get_prompt_ids`] that is * provided as a prompt to each chunk. This can be used to provide or "prompt-engineer" a context for * transcription, e.g. custom vocabularies or proper nouns to make it more likely to predict those words * correctly. It cannot be used in conjunction with `decoder_start_token_id` as it overwrites this value. * @type {number[]} */ prompt_ids = null; /** * Whether the model is multilingual or not. * @type {boolean} */ is_multilingual = null; /** * (Optional) A mapping from language tokens to their corresponding IDs. * Only required if the model is multilingual. * @type {Record<string, number>|null} */ lang_to_id = null; /** * (Optional) A mapping from task tokens to their corresponding IDs. * @type {Record<string, number>|null} */ task_to_id = null; /** * Used to set the maximum value of the initial timestamp. This is used to prevent the model from * predicting timestamps that are too far in the future. * @type {number} */ max_initial_timestamp_index = 1; } /** * @typedef {import('../../generation/parameters.js').GenerationFunctionParameters & {generation_config: WhisperGenerationConfig} & WhisperGenerationConfig} WhisperGenerationFunctionParameters */