UNPKG

@huggingface/inference

Version:

Typescript client for the Hugging Face Inference Providers and Inference Endpoints

101 lines (93 loc) 3.03 kB
import type { TextGenerationInput } from "@huggingface/tasks"; import { resolveProvider } from "../../lib/getInferenceProviderMapping"; import { getProviderHelper } from "../../lib/getProviderHelper"; import type { BaseArgs, Options } from "../../types"; import { innerStreamingRequest } from "../../utils/request"; export interface TextGenerationStreamToken { /** Token ID from the model tokenizer */ id: number; /** Token text */ text: string; /** Logprob */ logprob: number; /** * Is the token a special token * Can be used to ignore tokens when concatenating */ special: boolean; } export interface TextGenerationStreamPrefillToken { /** Token ID from the model tokenizer */ id: number; /** Token text */ text: string; /** * Logprob * Optional since the logprob of the first token cannot be computed */ logprob?: number; } export interface TextGenerationStreamBestOfSequence { /** Generated text */ generated_text: string; /** Generation finish reason */ finish_reason: TextGenerationStreamFinishReason; /** Number of generated tokens */ generated_tokens: number; /** Sampling seed if sampling was activated */ seed?: number; /** Prompt tokens */ prefill: TextGenerationStreamPrefillToken[]; /** Generated tokens */ tokens: TextGenerationStreamToken[]; } export type TextGenerationStreamFinishReason = /** number of generated tokens == `max_new_tokens` */ | "length" /** the model generated its end of sequence token */ | "eos_token" /** the model generated a text included in `stop_sequences` */ | "stop_sequence"; export interface TextGenerationStreamDetails { /** Generation finish reason */ finish_reason: TextGenerationStreamFinishReason; /** Number of generated tokens */ generated_tokens: number; /** Sampling seed if sampling was activated */ seed?: number; /** Prompt tokens */ prefill: TextGenerationStreamPrefillToken[]; /** */ tokens: TextGenerationStreamToken[]; /** Additional sequences when using the `best_of` parameter */ best_of_sequences?: TextGenerationStreamBestOfSequence[]; } export interface TextGenerationStreamOutput { index?: number; /** Generated token, one at a time */ token: TextGenerationStreamToken; /** * Complete generated text * Only available when the generation is finished */ generated_text: string | null; /** * Generation details * Only available when the generation is finished */ details: TextGenerationStreamDetails | null; } /** * Use to continue text from a prompt. Same as `textGeneration` but returns generator that can be read one token at a time */ export async function* textGenerationStream( args: BaseArgs & TextGenerationInput, options?: Options ): AsyncGenerator<TextGenerationStreamOutput> { const provider = await resolveProvider(args.provider, args.model, args.endpointUrl); const providerHelper = getProviderHelper(provider, "text-generation"); yield* innerStreamingRequest<TextGenerationStreamOutput>(args, providerHelper, { ...options, task: "text-generation", }); }