@huggingface/inference
Version:
Typescript client for the Hugging Face Inference Providers and Inference Endpoints
101 lines (93 loc) • 3.03 kB
text/typescript
import type { TextGenerationInput } from "@huggingface/tasks";
import { resolveProvider } from "../../lib/getInferenceProviderMapping";
import { getProviderHelper } from "../../lib/getProviderHelper";
import type { BaseArgs, Options } from "../../types";
import { innerStreamingRequest } from "../../utils/request";
export interface TextGenerationStreamToken {
/** Token ID from the model tokenizer */
id: number;
/** Token text */
text: string;
/** Logprob */
logprob: number;
/**
* Is the token a special token
* Can be used to ignore tokens when concatenating
*/
special: boolean;
}
export interface TextGenerationStreamPrefillToken {
/** Token ID from the model tokenizer */
id: number;
/** Token text */
text: string;
/**
* Logprob
* Optional since the logprob of the first token cannot be computed
*/
logprob?: number;
}
export interface TextGenerationStreamBestOfSequence {
/** Generated text */
generated_text: string;
/** Generation finish reason */
finish_reason: TextGenerationStreamFinishReason;
/** Number of generated tokens */
generated_tokens: number;
/** Sampling seed if sampling was activated */
seed?: number;
/** Prompt tokens */
prefill: TextGenerationStreamPrefillToken[];
/** Generated tokens */
tokens: TextGenerationStreamToken[];
}
export type TextGenerationStreamFinishReason =
/** number of generated tokens == `max_new_tokens` */
| "length"
/** the model generated its end of sequence token */
| "eos_token"
/** the model generated a text included in `stop_sequences` */
| "stop_sequence";
export interface TextGenerationStreamDetails {
/** Generation finish reason */
finish_reason: TextGenerationStreamFinishReason;
/** Number of generated tokens */
generated_tokens: number;
/** Sampling seed if sampling was activated */
seed?: number;
/** Prompt tokens */
prefill: TextGenerationStreamPrefillToken[];
/** */
tokens: TextGenerationStreamToken[];
/** Additional sequences when using the `best_of` parameter */
best_of_sequences?: TextGenerationStreamBestOfSequence[];
}
export interface TextGenerationStreamOutput {
index?: number;
/** Generated token, one at a time */
token: TextGenerationStreamToken;
/**
* Complete generated text
* Only available when the generation is finished
*/
generated_text: string | null;
/**
* Generation details
* Only available when the generation is finished
*/
details: TextGenerationStreamDetails | null;
}
/**
* Use to continue text from a prompt. Same as `textGeneration` but returns generator that can be read one token at a time
*/
export async function* textGenerationStream(
args: BaseArgs & TextGenerationInput,
options?: Options
): AsyncGenerator<TextGenerationStreamOutput> {
const provider = await resolveProvider(args.provider, args.model, args.endpointUrl);
const providerHelper = getProviderHelper(provider, "text-generation");
yield* innerStreamingRequest<TextGenerationStreamOutput>(args, providerHelper, {
...options,
task: "text-generation",
});
}