abyss-ai

import { Log } from "../util/log" import { Global } from "../global" import * as path from "path" export namespace HuggingFaceModels { const log = Log.create({ service: "huggingface-models" }) const cachePath = path.join(Global.Path.cache, "huggingface-models.json") interface HFProvider { provider: string vendor: string } interface HFModelResponse { id: string name: string pipeline_tag: string providers: HFProvider[] context_length?: number } export async function refreshModels(): Promise<Record<string, any>> { try { log.info("Refreshing Hugging Face models from router API") const response = await fetch("https://router.huggingface.co/v1/models", { headers: { "User-Agent": "abyss-ai/1.0 (enhanced opencode fork)" } }) if (!response.ok) { throw new Error(`HTTP ${response.status}: ${response.statusText}`) } const rawModels: HFModelResponse[] = await response.json() // Filter and process models const processedModels = rawModels .filter(model => model.pipeline_tag === "text-generation" || model.pipeline_tag === "conversational" || model.pipeline_tag === "text2text-generation" ) .slice(0, 100) // Limit to top 100 models to avoid overwhelming .reduce((acc, model) => { const providersString = model.providers ?.map(p => p.vendor || p.provider) .filter(Boolean) .join(", ") || "HuggingFace" // Enhanced model categorization const modelLower = model.id.toLowerCase() const isReasoning = ['reasoning', 'r1', 'qwq', 'think', 'cot'].some(keyword => modelLower.includes(keyword) ) const isCoding = ['code', 'coder', 'codestral', 'deepseek-coder', 'qwen-coder'].some(keyword => modelLower.includes(keyword) ) const isVision = ['vision', 'multimodal', 'vl'].some(keyword => modelLower.includes(keyword) ) acc[model.id] = { id: model.id, name: model.name || model.id.split('/').pop() || model.id, attachment: isVision, reasoning: isReasoning, temperature: true, tool_call: !isReasoning, // Reasoning models often don't support tool calling release_date: "2024-01-01", // Default since HF doesn't provide this last_updated: new Date().toISOString().split('T')[0], modalities: { input: isVision ? ["text", "image"] : ["text"], output: ["text"] }, open_weights: true, // Most HF models are open cost: { input: 0, // HF router pricing varies by provider output: 0, cache_read: 0, cache_write: 0 }, limit: { context: model.context_length || (isReasoning ? 128000 : 32768), output: Math.min(model.context_length || 32768, 16384) }, providers: providersString, options: {}, // Add metadata for better categorization metadata: { category: isCoding ? "coding" : isReasoning ? "reasoning" : isVision ? "multimodal" : "general", provider_count: model.providers?.length || 1, is_popular: model.providers?.length > 2 } } return acc }, {} as Record<string, any>) // Cache the results with write const cacheFile = Bun.file(cachePath) await Bun.write(cacheFile, JSON.stringify(processedModels, null, 2)) log.info(`Successfully cached ${Object.keys(processedModels).length} HF models`) return processedModels } catch (error) { log.error("Failed to refresh HF models", { error }) // Try to load from cache try { const cached = await Bun.file(cachePath).json() log.info("Using cached HF models due to refresh failure") return cached } catch { // Return enhanced fallback models return getFallbackModels() } } } function getFallbackModels() { return { "meta-llama/Llama-3.1-8B-Instruct": { id: "meta-llama/Llama-3.1-8B-Instruct", name: "Llama 3.1 8B Instruct", attachment: false, reasoning: false, temperature: true, tool_call: true, cost: { input: 0, output: 0, cache_read: 0, cache_write: 0 }, limit: { context: 128000, output: 8192 }, providers: "AWS, Azure, Together", metadata: { category: "general", provider_count: 3, is_popular: true } }, "meta-llama/Llama-3.1-70B-Instruct": { id: "meta-llama/Llama-3.1-70B-Instruct", name: "Llama 3.1 70B Instruct", attachment: false, reasoning: false, temperature: true, tool_call: true, cost: { input: 0, output: 0, cache_read: 0, cache_write: 0 }, limit: { context: 128000, output: 8192 }, providers: "AWS, Azure, Together", metadata: { category: "general", provider_count: 3, is_popular: true } }, "deepseek-ai/DeepSeek-R1-Distill-Qwen-14B": { id: "deepseek-ai/DeepSeek-R1-Distill-Qwen-14B", name: "DeepSeek R1 Distill Qwen 14B", attachment: false, reasoning: true, temperature: true, tool_call: false, cost: { input: 0, output: 0, cache_read: 0, cache_write: 0 }, limit: { context: 65536, output: 8192 }, providers: "HuggingFace", metadata: { category: "reasoning", provider_count: 1, is_popular: true } }, "deepseek-ai/DeepSeek-Coder-V2-Lite-Instruct": { id: "deepseek-ai/DeepSeek-Coder-V2-Lite-Instruct", name: "DeepSeek Coder V2 Lite", attachment: false, reasoning: false, temperature: true, tool_call: true, cost: { input: 0, output: 0, cache_read: 0, cache_write: 0 }, limit: { context: 163840, output: 8192 }, providers: "HuggingFace", metadata: { category: "coding", provider_count: 1, is_popular: true } }, "Qwen/Qwen2.5-VL-7B-Instruct": { id: "Qwen/Qwen2.5-VL-7B-Instruct", name: "Qwen 2.5 VL 7B Instruct", attachment: true, reasoning: false, temperature: true, tool_call: true, cost: { input: 0, output: 0, cache_read: 0, cache_write: 0 }, limit: { context: 32768, output: 8192 }, providers: "HuggingFace", metadata: { category: "multimodal", provider_count: 1, is_popular: true } } } } export async function getModels() { const file = Bun.file(cachePath) const exists = await file.exists() if (!exists) { return await refreshModels() } // Refresh in background if cache is older than 1 hour try { const stats = await file.stat() const oneHourAgo = Date.now() - (60 * 60 * 1000) if (stats.mtime.getTime() < oneHourAgo) { // Don't await - refresh in background refreshModels().catch(err => log.error("Background refresh failed", { error: err }) ) } } catch { // Ignore stat errors, just continue } try { return await file.json() } catch { // If cache is corrupted, refresh return await refreshModels() } } export function getRecommendedModels() { return { reasoning: "deepseek-ai/DeepSeek-R1-Distill-Qwen-14B", coding: "deepseek-ai/DeepSeek-Coder-V2-Lite-Instruct", general: "meta-llama/Llama-3.1-70B-Instruct", fast: "meta-llama/Llama-3.1-8B-Instruct", multimodal: "Qwen/Qwen2.5-VL-7B-Instruct" } } export namespace ModelCategories { export const REASONING_MODELS = [ "deepseek-r1", "qwq", "reasoning", "think", "cot", "r1" ] export const CODING_MODELS = [ "code", "coder", "codestral", "deepseek-coder", "qwen-coder", "starcoder" ] export const LARGE_CONTEXT = [ "longwriter", "yarn", "longalpaca", "longchat", "128k", "200k" ] export const MULTIMODAL = [ "vision", "multimodal", "vl", "image", "visual" ] export function categorizeModel(modelId: string, modelName: string) { const text = `${modelId} ${modelName}`.toLowerCase() return { reasoning: REASONING_MODELS.some(keyword => text.includes(keyword)), coding: CODING_MODELS.some(keyword => text.includes(keyword)), largeContext: LARGE_CONTEXT.some(keyword => text.includes(keyword)), multimodal: MULTIMODAL.some(keyword => text.includes(keyword)) } } } }