UNPKG

multi-llm

Version:

A unified TypeScript/JavaScript package to use LLMs across ALL platforms with support for 17 major providers, streaming, MCP tools, and intelligent response parsing

github.com/tluyben/multi-llm

tluyben/multi-llm

189 lines • 8.15 kB

JavaScript

"use strict"; var __importDefault = (this && this.__importDefault) || function (mod) { return (mod && mod.__esModule) ? mod : { "default": mod }; }; Object.defineProperty(exports, "__esModule", { value: true }); exports.HuggingFaceProvider = void 0; const axios_1 = __importDefault(require("axios")); const provider_1 = require("../provider"); const llm_1 = require("../llm"); const parser_1 = require("../utils/parser"); class HuggingFaceProvider extends provider_1.Provider { constructor(apiKey, baseUrl) { super(apiKey, baseUrl); this.baseUrl = baseUrl || 'https://api-inference.huggingface.co'; } async getModels() { // Return popular text-generation models available on HuggingFace Inference API return [ { id: 'mistralai/Mixtral-8x7B-Instruct-v0.1', name: 'Mixtral 8x7B Instruct v0.1', contextWindow: 32768, maxOutputTokens: 4096, description: 'Mixture of experts model by Mistral AI', pricing: { input: 0.5, output: 0.5, currency: 'USD' } }, { id: 'mistralai/Mistral-7B-Instruct-v0.2', name: 'Mistral 7B Instruct v0.2', contextWindow: 32768, maxOutputTokens: 4096, description: 'Mistral 7B fine-tuned for instruction following', pricing: { input: 0.2, output: 0.2, currency: 'USD' } }, { id: 'meta-llama/Llama-2-70b-chat-hf', name: 'Llama 2 70B Chat HF', contextWindow: 4096, maxOutputTokens: 4096, description: 'Meta Llama 2 70B optimized for dialogue use cases', pricing: { input: 1.0, output: 1.0, currency: 'USD' } }, { id: 'meta-llama/Llama-2-13b-chat-hf', name: 'Llama 2 13B Chat HF', contextWindow: 4096, maxOutputTokens: 4096, description: 'Meta Llama 2 13B optimized for dialogue use cases', pricing: { input: 0.3, output: 0.3, currency: 'USD' } }, { id: 'meta-llama/Llama-2-7b-chat-hf', name: 'Llama 2 7B Chat HF', contextWindow: 4096, maxOutputTokens: 4096, description: 'Meta Llama 2 7B optimized for dialogue use cases', pricing: { input: 0.1, output: 0.1, currency: 'USD' } }, { id: 'microsoft/DialoGPT-large', name: 'DialoGPT Large', contextWindow: 1024, maxOutputTokens: 1024, description: 'Large-scale pretrained dialogue response generation model', pricing: { input: 0.1, output: 0.1, currency: 'USD' } }, { id: 'microsoft/DialoGPT-medium', name: 'DialoGPT Medium', contextWindow: 1024, maxOutputTokens: 1024, description: 'Medium-scale pretrained dialogue response generation model', pricing: { input: 0.05, output: 0.05, currency: 'USD' } } ]; } createLLM(modelId) { return new llm_1.LLM(this, modelId); } async chat(modelId, messages, options, streamCallback) { try { // Convert messages to text format for HuggingFace const prompt = this.messagesToPrompt(messages); const payload = { inputs: prompt, parameters: { temperature: options.temperature || 0.7, max_new_tokens: options.maxTokens || 512, top_p: options.topP || 0.9, top_k: options.topK || 50, repetition_penalty: options.repetition_penalty || 1.0, do_sample: true, return_full_text: false }, options: { wait_for_model: true } }; if (streamCallback) { return this.streamChat(modelId, payload, streamCallback); } else { const response = await axios_1.default.post(`${this.baseUrl}/models/${modelId}`, payload, { headers: { 'Authorization': `Bearer ${this.apiKey}`, 'Content-Type': 'application/json' } }); // HuggingFace returns an array with generated text const generatedText = Array.isArray(response.data) ? response.data[0]?.generated_text : response.data.generated_text; const content = generatedText || ''; const parsed = parser_1.ResponseParser.parseResponse(content); return { raw: response.data, parsed, usage: { inputTokens: 0, // HuggingFace doesn't provide detailed token counts in free tier outputTokens: 0, totalTokens: 0 } }; } } catch (error) { throw new Error(`HuggingFace chat failed: ${error}`); } } async streamChat(modelId, payload, streamCallback) { return new Promise((resolve, reject) => { let fullContent = ''; // HuggingFace Inference API doesn't support streaming in the free tier // We'll simulate streaming by making a regular request and chunking the response const source = axios_1.default.post(`${this.baseUrl}/models/${modelId}`, payload, { headers: { 'Authorization': `Bearer ${this.apiKey}`, 'Content-Type': 'application/json' } }); source.then(response => { const generatedText = Array.isArray(response.data) ? response.data[0]?.generated_text : response.data.generated_text; const content = generatedText || ''; // Simulate streaming by sending content in chunks this.simulateStreaming(content, streamCallback).then(() => { const parsed = parser_1.ResponseParser.parseResponse(content); resolve({ raw: response.data, parsed, usage: undefined }); }); }).catch(reject); }); } async simulateStreaming(content, streamCallback) { const words = content.split(' '); let currentContent = ''; for (let i = 0; i < words.length; i++) { const word = words[i] + (i < words.length - 1 ? ' ' : ''); currentContent += word; streamCallback(word); // Small delay to simulate streaming await new Promise(resolve => setTimeout(resolve, 50)); } } messagesToPrompt(messages) { let prompt = ''; const systemMessage = messages.find(m => m.role === 'system'); if (systemMessage) { prompt += `System: ${systemMessage.content}\n\n`; } const conversationMessages = messages.filter(m => m.role !== 'system'); for (let i = 0; i < conversationMessages.length; i++) { const message = conversationMessages[i]; if (message.role === 'user') { prompt += `Human: ${message.content}\n\n`; } else if (message.role === 'assistant') { prompt += `Assistant: ${message.content}\n\n`; } } // Add the assistant prompt to indicate where the response should start if (conversationMessages[conversationMessages.length - 1]?.role === 'user') { prompt += 'Assistant: '; } return prompt; } } exports.HuggingFaceProvider = HuggingFaceProvider; //# sourceMappingURL=huggingface.js.map