@huggingface/inference
Version:
Typescript client for the Hugging Face Inference Providers and Inference Endpoints
132 lines (112 loc) • 3.58 kB
text/typescript
import type { ChatCompletionInput, PipelineType } from "@huggingface/tasks";
import type { InferenceProviderModelMapping } from "./lib/getInferenceProviderMapping";
/**
* HF model id, like "meta-llama/Llama-3.3-70B-Instruct"
*/
export type ModelId = string;
export interface Options {
/**
* (Default: true) Boolean. If a request 503s, the request will be retried with the same parameters.
*/
retry_on_error?: boolean;
/**
* Custom fetch function to use instead of the default one, for example to use a proxy or edit headers.
*/
fetch?: typeof fetch;
/**
* Abort Controller signal to use for request interruption.
*/
signal?: AbortSignal;
/**
* (Default: "same-origin"). String | Boolean. Credentials to use for the request. If this is a string, it will be passed straight on. If it's a boolean, true will be "include" and false will not send credentials at all.
*/
includeCredentials?: string | boolean;
/**
* The billing account to use for the requests.
*
* By default the requests are billed on the user's account.
* Requests can only be billed to an organization the user is a member of, and which has subscribed to Enterprise Hub.
*/
billTo?: string;
}
export type InferenceTask = Exclude<PipelineType, "other"> | "conversational";
export const INFERENCE_PROVIDERS = [
"black-forest-labs",
"cerebras",
"cohere",
"fal-ai",
"featherless-ai",
"fireworks-ai",
"groq",
"hf-inference",
"hyperbolic",
"nebius",
"novita",
"nscale",
"openai",
"ovhcloud",
"replicate",
"sambanova",
"together",
] as const;
export const PROVIDERS_OR_POLICIES = [...INFERENCE_PROVIDERS, "auto"] as const;
export type InferenceProvider = (typeof INFERENCE_PROVIDERS)[number];
export type InferenceProviderOrPolicy = (typeof PROVIDERS_OR_POLICIES)[number];
export interface BaseArgs {
/**
* The access token to use. Without it, you'll get rate-limited quickly.
*
* Can be created for free in hf.co/settings/token
*
* You can also pass an external Inference provider's key if you intend to call a compatible provider like Sambanova, Together, Replicate...
*/
accessToken?: string;
/**
* The HF model to use.
*
* If not specified, will call huggingface.co/api/tasks to get the default model for the task.
*
* /!\ Legacy behavior allows this to be an URL, but this is deprecated and will be removed in the future.
* Use the `endpointUrl` parameter instead.
*/
model?: ModelId;
/**
* The URL of the endpoint to use.
*
* If not specified, will call the default router.huggingface.co Inference Providers endpoint.
*/
endpointUrl?: string;
/**
* Set an Inference provider to run this model on.
*
* Defaults to "auto" i.e. the first of the providers available for the model, sorted by the user's order in https://hf.co/settings/inference-providers.
*/
provider?: InferenceProviderOrPolicy;
}
export type RequestArgs = BaseArgs &
(
| { data: Blob | ArrayBuffer }
| { inputs: unknown }
| { prompt: string }
| { text: string }
| { audio_url: string }
| ChatCompletionInput
) & {
parameters?: Record<string, unknown>;
};
export type AuthMethod = "none" | "hf-token" | "credentials-include" | "provider-key";
export interface HeaderParams {
accessToken?: string;
authMethod: AuthMethod;
}
export interface UrlParams {
authMethod: AuthMethod;
model: string;
task?: InferenceTask;
}
export interface BodyParams<T extends Record<string, unknown> = Record<string, unknown>> {
args: T;
model: string;
mapping?: InferenceProviderModelMapping | undefined;
task?: InferenceTask;
}