multi-llm
Version:
A unified TypeScript/JavaScript package to use LLMs across ALL platforms with support for 17 major providers, streaming, MCP tools, and intelligent response parsing
189 lines • 8.15 kB
JavaScript
;
var __importDefault = (this && this.__importDefault) || function (mod) {
return (mod && mod.__esModule) ? mod : { "default": mod };
};
Object.defineProperty(exports, "__esModule", { value: true });
exports.HuggingFaceProvider = void 0;
const axios_1 = __importDefault(require("axios"));
const provider_1 = require("../provider");
const llm_1 = require("../llm");
const parser_1 = require("../utils/parser");
class HuggingFaceProvider extends provider_1.Provider {
constructor(apiKey, baseUrl) {
super(apiKey, baseUrl);
this.baseUrl = baseUrl || 'https://api-inference.huggingface.co';
}
async getModels() {
// Return popular text-generation models available on HuggingFace Inference API
return [
{
id: 'mistralai/Mixtral-8x7B-Instruct-v0.1',
name: 'Mixtral 8x7B Instruct v0.1',
contextWindow: 32768,
maxOutputTokens: 4096,
description: 'Mixture of experts model by Mistral AI',
pricing: { input: 0.5, output: 0.5, currency: 'USD' }
},
{
id: 'mistralai/Mistral-7B-Instruct-v0.2',
name: 'Mistral 7B Instruct v0.2',
contextWindow: 32768,
maxOutputTokens: 4096,
description: 'Mistral 7B fine-tuned for instruction following',
pricing: { input: 0.2, output: 0.2, currency: 'USD' }
},
{
id: 'meta-llama/Llama-2-70b-chat-hf',
name: 'Llama 2 70B Chat HF',
contextWindow: 4096,
maxOutputTokens: 4096,
description: 'Meta Llama 2 70B optimized for dialogue use cases',
pricing: { input: 1.0, output: 1.0, currency: 'USD' }
},
{
id: 'meta-llama/Llama-2-13b-chat-hf',
name: 'Llama 2 13B Chat HF',
contextWindow: 4096,
maxOutputTokens: 4096,
description: 'Meta Llama 2 13B optimized for dialogue use cases',
pricing: { input: 0.3, output: 0.3, currency: 'USD' }
},
{
id: 'meta-llama/Llama-2-7b-chat-hf',
name: 'Llama 2 7B Chat HF',
contextWindow: 4096,
maxOutputTokens: 4096,
description: 'Meta Llama 2 7B optimized for dialogue use cases',
pricing: { input: 0.1, output: 0.1, currency: 'USD' }
},
{
id: 'microsoft/DialoGPT-large',
name: 'DialoGPT Large',
contextWindow: 1024,
maxOutputTokens: 1024,
description: 'Large-scale pretrained dialogue response generation model',
pricing: { input: 0.1, output: 0.1, currency: 'USD' }
},
{
id: 'microsoft/DialoGPT-medium',
name: 'DialoGPT Medium',
contextWindow: 1024,
maxOutputTokens: 1024,
description: 'Medium-scale pretrained dialogue response generation model',
pricing: { input: 0.05, output: 0.05, currency: 'USD' }
}
];
}
createLLM(modelId) {
return new llm_1.LLM(this, modelId);
}
async chat(modelId, messages, options, streamCallback) {
try {
// Convert messages to text format for HuggingFace
const prompt = this.messagesToPrompt(messages);
const payload = {
inputs: prompt,
parameters: {
temperature: options.temperature || 0.7,
max_new_tokens: options.maxTokens || 512,
top_p: options.topP || 0.9,
top_k: options.topK || 50,
repetition_penalty: options.repetition_penalty || 1.0,
do_sample: true,
return_full_text: false
},
options: {
wait_for_model: true
}
};
if (streamCallback) {
return this.streamChat(modelId, payload, streamCallback);
}
else {
const response = await axios_1.default.post(`${this.baseUrl}/models/${modelId}`, payload, {
headers: {
'Authorization': `Bearer ${this.apiKey}`,
'Content-Type': 'application/json'
}
});
// HuggingFace returns an array with generated text
const generatedText = Array.isArray(response.data) ? response.data[0]?.generated_text : response.data.generated_text;
const content = generatedText || '';
const parsed = parser_1.ResponseParser.parseResponse(content);
return {
raw: response.data,
parsed,
usage: {
inputTokens: 0, // HuggingFace doesn't provide detailed token counts in free tier
outputTokens: 0,
totalTokens: 0
}
};
}
}
catch (error) {
throw new Error(`HuggingFace chat failed: ${error}`);
}
}
async streamChat(modelId, payload, streamCallback) {
return new Promise((resolve, reject) => {
let fullContent = '';
// HuggingFace Inference API doesn't support streaming in the free tier
// We'll simulate streaming by making a regular request and chunking the response
const source = axios_1.default.post(`${this.baseUrl}/models/${modelId}`, payload, {
headers: {
'Authorization': `Bearer ${this.apiKey}`,
'Content-Type': 'application/json'
}
});
source.then(response => {
const generatedText = Array.isArray(response.data) ? response.data[0]?.generated_text : response.data.generated_text;
const content = generatedText || '';
// Simulate streaming by sending content in chunks
this.simulateStreaming(content, streamCallback).then(() => {
const parsed = parser_1.ResponseParser.parseResponse(content);
resolve({
raw: response.data,
parsed,
usage: undefined
});
});
}).catch(reject);
});
}
async simulateStreaming(content, streamCallback) {
const words = content.split(' ');
let currentContent = '';
for (let i = 0; i < words.length; i++) {
const word = words[i] + (i < words.length - 1 ? ' ' : '');
currentContent += word;
streamCallback(word);
// Small delay to simulate streaming
await new Promise(resolve => setTimeout(resolve, 50));
}
}
messagesToPrompt(messages) {
let prompt = '';
const systemMessage = messages.find(m => m.role === 'system');
if (systemMessage) {
prompt += `System: ${systemMessage.content}\n\n`;
}
const conversationMessages = messages.filter(m => m.role !== 'system');
for (let i = 0; i < conversationMessages.length; i++) {
const message = conversationMessages[i];
if (message.role === 'user') {
prompt += `Human: ${message.content}\n\n`;
}
else if (message.role === 'assistant') {
prompt += `Assistant: ${message.content}\n\n`;
}
}
// Add the assistant prompt to indicate where the response should start
if (conversationMessages[conversationMessages.length - 1]?.role === 'user') {
prompt += 'Assistant: ';
}
return prompt;
}
}
exports.HuggingFaceProvider = HuggingFaceProvider;
//# sourceMappingURL=huggingface.js.map