abyss-ai
Version:
Autonomous AI coding agent - enhanced OpenCode with autonomous capabilities
259 lines (229 loc) • 8.8 kB
text/typescript
import { Log } from "../util/log"
import { Global } from "../global"
import * as path from "path"
export namespace HuggingFaceModels {
const log = Log.create({ service: "huggingface-models" })
const cachePath = path.join(Global.Path.cache, "huggingface-models.json")
interface HFProvider {
provider: string
vendor: string
}
interface HFModelResponse {
id: string
name: string
pipeline_tag: string
providers: HFProvider[]
context_length?: number
}
export async function refreshModels(): Promise<Record<string, any>> {
try {
log.info("Refreshing Hugging Face models from router API")
const response = await fetch("https://router.huggingface.co/v1/models", {
headers: {
"User-Agent": "abyss-ai/1.0 (enhanced opencode fork)"
}
})
if (!response.ok) {
throw new Error(`HTTP ${response.status}: ${response.statusText}`)
}
const rawModels: HFModelResponse[] = await response.json()
// Filter and process models
const processedModels = rawModels
.filter(model =>
model.pipeline_tag === "text-generation" ||
model.pipeline_tag === "conversational" ||
model.pipeline_tag === "text2text-generation"
)
.slice(0, 100) // Limit to top 100 models to avoid overwhelming
.reduce((acc, model) => {
const providersString = model.providers
?.map(p => p.vendor || p.provider)
.filter(Boolean)
.join(", ") || "HuggingFace"
// Enhanced model categorization
const modelLower = model.id.toLowerCase()
const isReasoning = ['reasoning', 'r1', 'qwq', 'think', 'cot'].some(keyword =>
modelLower.includes(keyword)
)
const isCoding = ['code', 'coder', 'codestral', 'deepseek-coder', 'qwen-coder'].some(keyword =>
modelLower.includes(keyword)
)
const isVision = ['vision', 'multimodal', 'vl'].some(keyword =>
modelLower.includes(keyword)
)
acc[model.id] = {
id: model.id,
name: model.name || model.id.split('/').pop() || model.id,
attachment: isVision,
reasoning: isReasoning,
temperature: true,
tool_call: !isReasoning, // Reasoning models often don't support tool calling
release_date: "2024-01-01", // Default since HF doesn't provide this
last_updated: new Date().toISOString().split('T')[0],
modalities: {
input: isVision ? ["text", "image"] : ["text"],
output: ["text"]
},
open_weights: true, // Most HF models are open
cost: {
input: 0, // HF router pricing varies by provider
output: 0,
cache_read: 0,
cache_write: 0
},
limit: {
context: model.context_length || (isReasoning ? 128000 : 32768),
output: Math.min(model.context_length || 32768, 16384)
},
providers: providersString,
options: {},
// Add metadata for better categorization
metadata: {
category: isCoding ? "coding" : isReasoning ? "reasoning" : isVision ? "multimodal" : "general",
provider_count: model.providers?.length || 1,
is_popular: model.providers?.length > 2
}
}
return acc
}, {} as Record<string, any>)
// Cache the results with write
const cacheFile = Bun.file(cachePath)
await Bun.write(cacheFile, JSON.stringify(processedModels, null, 2))
log.info(`Successfully cached ${Object.keys(processedModels).length} HF models`)
return processedModels
} catch (error) {
log.error("Failed to refresh HF models", { error })
// Try to load from cache
try {
const cached = await Bun.file(cachePath).json()
log.info("Using cached HF models due to refresh failure")
return cached
} catch {
// Return enhanced fallback models
return getFallbackModels()
}
}
}
function getFallbackModels() {
return {
"meta-llama/Llama-3.1-8B-Instruct": {
id: "meta-llama/Llama-3.1-8B-Instruct",
name: "Llama 3.1 8B Instruct",
attachment: false,
reasoning: false,
temperature: true,
tool_call: true,
cost: { input: 0, output: 0, cache_read: 0, cache_write: 0 },
limit: { context: 128000, output: 8192 },
providers: "AWS, Azure, Together",
metadata: { category: "general", provider_count: 3, is_popular: true }
},
"meta-llama/Llama-3.1-70B-Instruct": {
id: "meta-llama/Llama-3.1-70B-Instruct",
name: "Llama 3.1 70B Instruct",
attachment: false,
reasoning: false,
temperature: true,
tool_call: true,
cost: { input: 0, output: 0, cache_read: 0, cache_write: 0 },
limit: { context: 128000, output: 8192 },
providers: "AWS, Azure, Together",
metadata: { category: "general", provider_count: 3, is_popular: true }
},
"deepseek-ai/DeepSeek-R1-Distill-Qwen-14B": {
id: "deepseek-ai/DeepSeek-R1-Distill-Qwen-14B",
name: "DeepSeek R1 Distill Qwen 14B",
attachment: false,
reasoning: true,
temperature: true,
tool_call: false,
cost: { input: 0, output: 0, cache_read: 0, cache_write: 0 },
limit: { context: 65536, output: 8192 },
providers: "HuggingFace",
metadata: { category: "reasoning", provider_count: 1, is_popular: true }
},
"deepseek-ai/DeepSeek-Coder-V2-Lite-Instruct": {
id: "deepseek-ai/DeepSeek-Coder-V2-Lite-Instruct",
name: "DeepSeek Coder V2 Lite",
attachment: false,
reasoning: false,
temperature: true,
tool_call: true,
cost: { input: 0, output: 0, cache_read: 0, cache_write: 0 },
limit: { context: 163840, output: 8192 },
providers: "HuggingFace",
metadata: { category: "coding", provider_count: 1, is_popular: true }
},
"Qwen/Qwen2.5-VL-7B-Instruct": {
id: "Qwen/Qwen2.5-VL-7B-Instruct",
name: "Qwen 2.5 VL 7B Instruct",
attachment: true,
reasoning: false,
temperature: true,
tool_call: true,
cost: { input: 0, output: 0, cache_read: 0, cache_write: 0 },
limit: { context: 32768, output: 8192 },
providers: "HuggingFace",
metadata: { category: "multimodal", provider_count: 1, is_popular: true }
}
}
}
export async function getModels() {
const file = Bun.file(cachePath)
const exists = await file.exists()
if (!exists) {
return await refreshModels()
}
// Refresh in background if cache is older than 1 hour
try {
const stats = await file.stat()
const oneHourAgo = Date.now() - (60 * 60 * 1000)
if (stats.mtime.getTime() < oneHourAgo) {
// Don't await - refresh in background
refreshModels().catch(err =>
log.error("Background refresh failed", { error: err })
)
}
} catch {
// Ignore stat errors, just continue
}
try {
return await file.json()
} catch {
// If cache is corrupted, refresh
return await refreshModels()
}
}
export function getRecommendedModels() {
return {
reasoning: "deepseek-ai/DeepSeek-R1-Distill-Qwen-14B",
coding: "deepseek-ai/DeepSeek-Coder-V2-Lite-Instruct",
general: "meta-llama/Llama-3.1-70B-Instruct",
fast: "meta-llama/Llama-3.1-8B-Instruct",
multimodal: "Qwen/Qwen2.5-VL-7B-Instruct"
}
}
export namespace ModelCategories {
export const REASONING_MODELS = [
"deepseek-r1", "qwq", "reasoning", "think", "cot", "r1"
]
export const CODING_MODELS = [
"code", "coder", "codestral", "deepseek-coder", "qwen-coder", "starcoder"
]
export const LARGE_CONTEXT = [
"longwriter", "yarn", "longalpaca", "longchat", "128k", "200k"
]
export const MULTIMODAL = [
"vision", "multimodal", "vl", "image", "visual"
]
export function categorizeModel(modelId: string, modelName: string) {
const text = `${modelId} ${modelName}`.toLowerCase()
return {
reasoning: REASONING_MODELS.some(keyword => text.includes(keyword)),
coding: CODING_MODELS.some(keyword => text.includes(keyword)),
largeContext: LARGE_CONTEXT.some(keyword => text.includes(keyword)),
multimodal: MULTIMODAL.some(keyword => text.includes(keyword))
}
}
}
}