@huggingface/inference

Version:

Typescript client for the Hugging Face Inference Providers and Inference Endpoints

76 lines (68 loc) • 2.6 kB

text/typescript

/** * See the registered mapping of HF model ID => OVHcloud model ID here: * * https://huggingface.co/api/partners/ovhcloud/models * * This is a publicly available mapping. * * If you want to try to run inference for a new model locally before it's registered on huggingface.co, * you can add it to the dictionary "HARDCODED_MODEL_ID_MAPPING" in consts.ts, for dev purposes. * * - If you work at OVHcloud and want to update this mapping, please use the model mapping API we provide on huggingface.co * - If you're a community member and want to add a new supported HF model to OVHcloud, please open an issue on the present repo * and we will tag OVHcloud team members. * * Thanks! */ import { BaseConversationalTask, BaseTextGenerationTask } from "./providerHelper"; import type { ChatCompletionOutput, TextGenerationOutput, TextGenerationOutputFinishReason } from "@huggingface/tasks"; import { InferenceOutputError } from "../lib/InferenceOutputError"; import type { BodyParams } from "../types"; import { omit } from "../utils/omit"; import type { TextGenerationInput } from "@huggingface/tasks"; const OVHCLOUD_API_BASE_URL = "https://oai.endpoints.kepler.ai.cloud.ovh.net"; interface OvhCloudTextCompletionOutput extends Omit<ChatCompletionOutput, "choices"> { choices: Array<{ text: string; finish_reason: TextGenerationOutputFinishReason; logprobs: unknown; index: number; }>; } export class OvhCloudConversationalTask extends BaseConversationalTask { constructor() { super("ovhcloud", OVHCLOUD_API_BASE_URL); } } export class OvhCloudTextGenerationTask extends BaseTextGenerationTask { constructor() { super("ovhcloud", OVHCLOUD_API_BASE_URL); } override preparePayload(params: BodyParams<TextGenerationInput>): Record<string, unknown> { return { model: params.model, ...omit(params.args, ["inputs", "parameters"]), ...(params.args.parameters ? { max_tokens: (params.args.parameters as Record<string, unknown>).max_new_tokens, ...omit(params.args.parameters as Record<string, unknown>, "max_new_tokens"), } : undefined), prompt: params.args.inputs, }; } override async getResponse(response: OvhCloudTextCompletionOutput): Promise<TextGenerationOutput> { if ( typeof response === "object" && "choices" in response && Array.isArray(response?.choices) && typeof response?.model === "string" ) { const completion = response.choices[0]; return { generated_text: completion.text, }; } throw new InferenceOutputError("Expected OVHcloud text generation response format"); } }