UNPKG

@huggingface/inference

Version:

Typescript client for the Hugging Face Inference Providers and Inference Endpoints

182 lines (181 loc) 9.61 kB
"use strict"; Object.defineProperty(exports, "__esModule", { value: true }); exports.innerRequest = innerRequest; exports.innerStreamingRequest = innerStreamingRequest; const makeRequestOptions_js_1 = require("../lib/makeRequestOptions.js"); const parse_js_1 = require("../vendor/fetch-event-source/parse.js"); const errors_js_1 = require("../errors.js"); function requestArgsToJson(args) { // Convert the entire args object to a JSON-serializable format const argsWithData = args; return JSON.parse(JSON.stringify({ ...argsWithData, data: argsWithData.data ? "[Blob or ArrayBuffer]" : null, })); } /** * Primitive to make custom calls to the inference provider */ async function innerRequest(args, providerHelper, options) { const { url, info } = await (0, makeRequestOptions_js_1.makeRequestOptions)(args, providerHelper, options); const response = await (options?.fetch ?? fetch)(url, info); const requestContext = { url, info }; if (options?.retry_on_error !== false && response.status === 503) { return innerRequest(args, providerHelper, options); } if (!response.ok) { const contentType = response.headers.get("Content-Type"); if (["application/json", "application/problem+json"].some((ct) => contentType?.startsWith(ct))) { const output = await response.json(); if ([400, 422, 404, 500].includes(response.status) && options?.chatCompletion) { throw new errors_js_1.InferenceClientProviderApiError(`Provider ${args.provider} does not seem to support chat completion for model ${args.model} . Error: ${JSON.stringify(output.error)}`, { url, method: info.method ?? "GET", headers: info.headers, body: requestArgsToJson(args), }, { requestId: response.headers.get("x-request-id") ?? "", status: response.status, body: output }); } if (typeof output.error === "string" || typeof output.detail === "string" || typeof output.message === "string") { throw new errors_js_1.InferenceClientProviderApiError(`Failed to perform inference: ${output.error ?? output.detail ?? output.message}`, { url, method: info.method ?? "GET", headers: info.headers, body: requestArgsToJson(args), }, { requestId: response.headers.get("x-request-id") ?? "", status: response.status, body: output }); } else { throw new errors_js_1.InferenceClientProviderApiError(`Failed to perform inference: an HTTP error occurred when requesting the provider.`, { url, method: info.method ?? "GET", headers: info.headers, body: requestArgsToJson(args), }, { requestId: response.headers.get("x-request-id") ?? "", status: response.status, body: output }); } } const message = contentType?.startsWith("text/plain;") ? await response.text() : undefined; throw new errors_js_1.InferenceClientProviderApiError(`Failed to perform inference: ${message ?? "an HTTP error occurred when requesting the provider"}`, { url, method: info.method ?? "GET", headers: info.headers, body: requestArgsToJson(args), }, { requestId: response.headers.get("x-request-id") ?? "", status: response.status, body: message ?? "" }); } if (response.headers.get("Content-Type")?.startsWith("application/json")) { const data = (await response.json()); return { data, requestContext }; } const blob = (await response.blob()); return { data: blob, requestContext }; } /** * Primitive to make custom inference calls that expect server-sent events, and returns the response through a generator */ async function* innerStreamingRequest(args, providerHelper, options) { const { url, info } = await (0, makeRequestOptions_js_1.makeRequestOptions)({ ...args, stream: true }, providerHelper, options); const response = await (options?.fetch ?? fetch)(url, info); if (options?.retry_on_error !== false && response.status === 503) { return yield* innerStreamingRequest(args, providerHelper, options); } if (!response.ok) { if (response.headers.get("Content-Type")?.startsWith("application/json")) { const output = await response.json(); if ([400, 422, 404, 500].includes(response.status) && options?.chatCompletion) { throw new errors_js_1.InferenceClientProviderApiError(`Provider ${args.provider} does not seem to support chat completion for model ${args.model} . Error: ${JSON.stringify(output.error)}`, { url, method: info.method ?? "GET", headers: info.headers, body: requestArgsToJson(args), }, { requestId: response.headers.get("x-request-id") ?? "", status: response.status, body: output }); } if (typeof output.error === "string") { throw new errors_js_1.InferenceClientProviderApiError(`Failed to perform inference: ${output.error}`, { url, method: info.method ?? "GET", headers: info.headers, body: requestArgsToJson(args), }, { requestId: response.headers.get("x-request-id") ?? "", status: response.status, body: output }); } if (output.error && "message" in output.error && typeof output.error.message === "string") { /// OpenAI errors throw new errors_js_1.InferenceClientProviderApiError(`Failed to perform inference: ${output.error.message}`, { url, method: info.method ?? "GET", headers: info.headers, body: requestArgsToJson(args), }, { requestId: response.headers.get("x-request-id") ?? "", status: response.status, body: output }); } // Sambanova errors if (typeof output.message === "string") { throw new errors_js_1.InferenceClientProviderApiError(`Failed to perform inference: ${output.message}`, { url, method: info.method ?? "GET", headers: info.headers, body: requestArgsToJson(args), }, { requestId: response.headers.get("x-request-id") ?? "", status: response.status, body: output }); } } throw new errors_js_1.InferenceClientProviderApiError(`Failed to perform inference: an HTTP error occurred when requesting the provider.`, { url, method: info.method ?? "GET", headers: info.headers, body: requestArgsToJson(args), }, { requestId: response.headers.get("x-request-id") ?? "", status: response.status, body: "" }); } if (!response.headers.get("content-type")?.startsWith("text/event-stream")) { throw new errors_js_1.InferenceClientProviderApiError(`Failed to perform inference: server does not support event stream content type, it returned ` + response.headers.get("content-type"), { url, method: info.method ?? "GET", headers: info.headers, body: requestArgsToJson(args), }, { requestId: response.headers.get("x-request-id") ?? "", status: response.status, body: "" }); } if (!response.body) { return; } const reader = response.body.getReader(); let events = []; const onEvent = (event) => { // accumulate events in array events.push(event); }; const onChunk = (0, parse_js_1.getLines)((0, parse_js_1.getMessages)(() => { }, () => { }, onEvent)); try { while (true) { const { done, value } = await reader.read(); if (done) { return; } onChunk(value); for (const event of events) { if (event.data.length > 0) { if (event.data === "[DONE]") { return; } const data = JSON.parse(event.data); if (typeof data === "object" && data !== null && "error" in data) { const errorStr = typeof data.error === "string" ? data.error : typeof data.error === "object" && data.error && "message" in data.error && typeof data.error.message === "string" ? data.error.message : JSON.stringify(data.error); throw new errors_js_1.InferenceClientProviderApiError(`Failed to perform inference: an occurred while streaming the response: ${errorStr}`, { url, method: info.method ?? "GET", headers: info.headers, body: requestArgsToJson(args), }, { requestId: response.headers.get("x-request-id") ?? "", status: response.status, body: data }); } yield data; } } events = []; } } finally { reader.releaseLock(); } }