@huggingface/inference
Version:
Typescript client for the Hugging Face Inference Providers and Inference Endpoints
182 lines (181 loc) • 9.61 kB
JavaScript
"use strict";
Object.defineProperty(exports, "__esModule", { value: true });
exports.innerRequest = innerRequest;
exports.innerStreamingRequest = innerStreamingRequest;
const makeRequestOptions_js_1 = require("../lib/makeRequestOptions.js");
const parse_js_1 = require("../vendor/fetch-event-source/parse.js");
const errors_js_1 = require("../errors.js");
function requestArgsToJson(args) {
// Convert the entire args object to a JSON-serializable format
const argsWithData = args;
return JSON.parse(JSON.stringify({
...argsWithData,
data: argsWithData.data ? "[Blob or ArrayBuffer]" : null,
}));
}
/**
* Primitive to make custom calls to the inference provider
*/
async function innerRequest(args, providerHelper, options) {
const { url, info } = await (0, makeRequestOptions_js_1.makeRequestOptions)(args, providerHelper, options);
const response = await (options?.fetch ?? fetch)(url, info);
const requestContext = { url, info };
if (options?.retry_on_error !== false && response.status === 503) {
return innerRequest(args, providerHelper, options);
}
if (!response.ok) {
const contentType = response.headers.get("Content-Type");
if (["application/json", "application/problem+json"].some((ct) => contentType?.startsWith(ct))) {
const output = await response.json();
if ([400, 422, 404, 500].includes(response.status) && options?.chatCompletion) {
throw new errors_js_1.InferenceClientProviderApiError(`Provider ${args.provider} does not seem to support chat completion for model ${args.model} . Error: ${JSON.stringify(output.error)}`, {
url,
method: info.method ?? "GET",
headers: info.headers,
body: requestArgsToJson(args),
}, { requestId: response.headers.get("x-request-id") ?? "", status: response.status, body: output });
}
if (typeof output.error === "string" || typeof output.detail === "string" || typeof output.message === "string") {
throw new errors_js_1.InferenceClientProviderApiError(`Failed to perform inference: ${output.error ?? output.detail ?? output.message}`, {
url,
method: info.method ?? "GET",
headers: info.headers,
body: requestArgsToJson(args),
}, { requestId: response.headers.get("x-request-id") ?? "", status: response.status, body: output });
}
else {
throw new errors_js_1.InferenceClientProviderApiError(`Failed to perform inference: an HTTP error occurred when requesting the provider.`, {
url,
method: info.method ?? "GET",
headers: info.headers,
body: requestArgsToJson(args),
}, { requestId: response.headers.get("x-request-id") ?? "", status: response.status, body: output });
}
}
const message = contentType?.startsWith("text/plain;") ? await response.text() : undefined;
throw new errors_js_1.InferenceClientProviderApiError(`Failed to perform inference: ${message ?? "an HTTP error occurred when requesting the provider"}`, {
url,
method: info.method ?? "GET",
headers: info.headers,
body: requestArgsToJson(args),
}, { requestId: response.headers.get("x-request-id") ?? "", status: response.status, body: message ?? "" });
}
if (response.headers.get("Content-Type")?.startsWith("application/json")) {
const data = (await response.json());
return { data, requestContext };
}
const blob = (await response.blob());
return { data: blob, requestContext };
}
/**
* Primitive to make custom inference calls that expect server-sent events, and returns the response through a generator
*/
async function* innerStreamingRequest(args, providerHelper, options) {
const { url, info } = await (0, makeRequestOptions_js_1.makeRequestOptions)({ ...args, stream: true }, providerHelper, options);
const response = await (options?.fetch ?? fetch)(url, info);
if (options?.retry_on_error !== false && response.status === 503) {
return yield* innerStreamingRequest(args, providerHelper, options);
}
if (!response.ok) {
if (response.headers.get("Content-Type")?.startsWith("application/json")) {
const output = await response.json();
if ([400, 422, 404, 500].includes(response.status) && options?.chatCompletion) {
throw new errors_js_1.InferenceClientProviderApiError(`Provider ${args.provider} does not seem to support chat completion for model ${args.model} . Error: ${JSON.stringify(output.error)}`, {
url,
method: info.method ?? "GET",
headers: info.headers,
body: requestArgsToJson(args),
}, { requestId: response.headers.get("x-request-id") ?? "", status: response.status, body: output });
}
if (typeof output.error === "string") {
throw new errors_js_1.InferenceClientProviderApiError(`Failed to perform inference: ${output.error}`, {
url,
method: info.method ?? "GET",
headers: info.headers,
body: requestArgsToJson(args),
}, { requestId: response.headers.get("x-request-id") ?? "", status: response.status, body: output });
}
if (output.error && "message" in output.error && typeof output.error.message === "string") {
/// OpenAI errors
throw new errors_js_1.InferenceClientProviderApiError(`Failed to perform inference: ${output.error.message}`, {
url,
method: info.method ?? "GET",
headers: info.headers,
body: requestArgsToJson(args),
}, { requestId: response.headers.get("x-request-id") ?? "", status: response.status, body: output });
}
// Sambanova errors
if (typeof output.message === "string") {
throw new errors_js_1.InferenceClientProviderApiError(`Failed to perform inference: ${output.message}`, {
url,
method: info.method ?? "GET",
headers: info.headers,
body: requestArgsToJson(args),
}, { requestId: response.headers.get("x-request-id") ?? "", status: response.status, body: output });
}
}
throw new errors_js_1.InferenceClientProviderApiError(`Failed to perform inference: an HTTP error occurred when requesting the provider.`, {
url,
method: info.method ?? "GET",
headers: info.headers,
body: requestArgsToJson(args),
}, { requestId: response.headers.get("x-request-id") ?? "", status: response.status, body: "" });
}
if (!response.headers.get("content-type")?.startsWith("text/event-stream")) {
throw new errors_js_1.InferenceClientProviderApiError(`Failed to perform inference: server does not support event stream content type, it returned ` +
response.headers.get("content-type"), {
url,
method: info.method ?? "GET",
headers: info.headers,
body: requestArgsToJson(args),
}, { requestId: response.headers.get("x-request-id") ?? "", status: response.status, body: "" });
}
if (!response.body) {
return;
}
const reader = response.body.getReader();
let events = [];
const onEvent = (event) => {
// accumulate events in array
events.push(event);
};
const onChunk = (0, parse_js_1.getLines)((0, parse_js_1.getMessages)(() => { }, () => { }, onEvent));
try {
while (true) {
const { done, value } = await reader.read();
if (done) {
return;
}
onChunk(value);
for (const event of events) {
if (event.data.length > 0) {
if (event.data === "[DONE]") {
return;
}
const data = JSON.parse(event.data);
if (typeof data === "object" && data !== null && "error" in data) {
const errorStr = typeof data.error === "string"
? data.error
: typeof data.error === "object" &&
data.error &&
"message" in data.error &&
typeof data.error.message === "string"
? data.error.message
: JSON.stringify(data.error);
throw new errors_js_1.InferenceClientProviderApiError(`Failed to perform inference: an occurred while streaming the response: ${errorStr}`, {
url,
method: info.method ?? "GET",
headers: info.headers,
body: requestArgsToJson(args),
}, { requestId: response.headers.get("x-request-id") ?? "", status: response.status, body: data });
}
yield data;
}
}
events = [];
}
}
finally {
reader.releaseLock();
}
}