UNPKG

@lobehub/chat

Version:

Lobe Chat - an open-source, high-performance chatbot framework that supports speech synthesis, multimodal, and extensible Function Call plugin system. Supports one-click free deployment of your private ChatGPT/LLM web application.

105 lines (92 loc) 3.86 kB
import { HfInference } from '@huggingface/inference'; import urlJoin from 'url-join'; import type { ChatModelCard } from '@/types/llm'; import { AgentRuntimeErrorType } from '../error'; import { ModelProvider } from '../types'; import { createOpenAICompatibleRuntime } from '../utils/openaiCompatibleFactory'; import { convertIterableToStream } from '../utils/streams'; export interface HuggingFaceModelCard { id: string; tags: string[]; } export const LobeHuggingFaceAI = createOpenAICompatibleRuntime({ chatCompletion: { handleStreamBizErrorType: (error) => { // e.g.: Server meta-llama/Meta-Llama-3.1-8B-Instruct does not seem to support chat completion. Error: Model requires a Pro subscription; check out hf.co/pricing to learn more. Make sure to include your HF token in your query. if (error.message?.includes('Model requires a Pro subscription')) { return AgentRuntimeErrorType.PermissionDenied; } // e.g.: Server meta-llama/Meta-Llama-3.1-8B-Instruct does not seem to support chat completion. Error: Authorization header is correct, but the token seems invalid if (error.message?.includes('the token seems invalid')) { return AgentRuntimeErrorType.InvalidProviderAPIKey; } }, }, customClient: { createChatCompletionStream: (client: HfInference, payload, instance) => { const { max_tokens = 4096 } = payload; const hfRes = client.chatCompletionStream({ endpointUrl: instance.baseURL ? urlJoin(instance.baseURL, payload.model) : instance.baseURL, max_tokens: max_tokens, messages: payload.messages, model: payload.model, stream: true, temperature: payload.temperature, // `top_p` must be > 0.0 and < 1.0 top_p: payload?.top_p ? payload?.top_p >= 1 ? 0.99 : payload?.top_p <= 0 ? 0.01 : payload?.top_p : undefined, }); return convertIterableToStream(hfRes); }, createClient: (options) => new HfInference(options.apiKey), }, debug: { chatCompletion: () => process.env.DEBUG_HUGGINGFACE_CHAT_COMPLETION === '1', }, models: async () => { const { LOBE_DEFAULT_MODEL_LIST } = await import('@/config/aiModels'); const visionKeywords = ['image-text-to-text', 'multimodal', 'vision']; const reasoningKeywords = ['deepseek-r1', 'qvq', 'qwq']; // ref: https://huggingface.co/docs/hub/api const url = 'https://huggingface.co/api/models'; const response = await fetch(url, { method: 'GET', }); const json = await response.json(); const modelList: HuggingFaceModelCard[] = json; return modelList .map((model) => { const knownModel = LOBE_DEFAULT_MODEL_LIST.find( (m) => model.id.toLowerCase() === m.id.toLowerCase(), ); return { contextWindowTokens: knownModel?.contextWindowTokens ?? undefined, displayName: knownModel?.displayName ?? undefined, enabled: knownModel?.enabled || false, functionCall: model.tags.some((tag) => tag.toLowerCase().includes('function-calling')) || knownModel?.abilities?.functionCall || false, id: model.id, reasoning: model.tags.some((tag) => tag.toLowerCase().includes('reasoning')) || reasoningKeywords.some((keyword) => model.id.toLowerCase().includes(keyword)) || knownModel?.abilities?.reasoning || false, vision: model.tags.some((tag) => visionKeywords.some((keyword) => tag.toLowerCase().includes(keyword)), ) || knownModel?.abilities?.vision || false, }; }) .filter(Boolean) as ChatModelCard[]; }, provider: ModelProvider.HuggingFace, });