UNPKG

@huggingface/inference

Version:

Typescript client for the Hugging Face Inference Providers and Inference Endpoints

49 lines (48 loc) 1.91 kB
"use strict"; Object.defineProperty(exports, "__esModule", { value: true }); exports.getDefaultTask = getDefaultTask; const config_js_1 = require("../config.js"); const isUrl_js_1 = require("./isUrl.js"); /** * We want to make calls to the huggingface hub the least possible, eg if * someone is calling Inference Endpoints 1000 times per second, we don't want * to make 1000 calls to the hub to get the task name. */ const taskCache = new Map(); const CACHE_DURATION = 10 * 60 * 1000; const MAX_CACHE_ITEMS = 1000; /** * Get the default task. Use a LRU cache of 1000 items with 10 minutes expiration * to avoid making too many calls to the HF hub. * * @returns The default task for the model, or `null` if it was impossible to get it */ async function getDefaultTask(model, accessToken, options) { if ((0, isUrl_js_1.isUrl)(model)) { return null; } const key = `${model}:${accessToken}`; let cachedTask = taskCache.get(key); if (cachedTask && cachedTask.date < new Date(Date.now() - CACHE_DURATION)) { taskCache.delete(key); cachedTask = undefined; } if (cachedTask === undefined) { const modelTask = await (options?.fetch ?? fetch)(`${config_js_1.HF_HUB_URL}/api/models/${model}?expand[]=pipeline_tag`, { headers: accessToken ? { Authorization: `Bearer ${accessToken}` } : {}, }) .then((resp) => resp.json()) .then((json) => json.pipeline_tag) .catch(() => null); if (!modelTask) { return null; } cachedTask = { task: modelTask, date: new Date() }; taskCache.set(key, { task: modelTask, date: new Date() }); if (taskCache.size > MAX_CACHE_ITEMS) { // eslint-disable-next-line @typescript-eslint/no-non-null-assertion taskCache.delete(taskCache.keys().next().value); } } return cachedTask.task; }