@nanocollective/nanocoder
Version:
A local-first CLI coding agent that brings the power of agentic coding tools like Claude Code and Gemini CLI to local models or controlled APIs like OpenRouter
354 lines • 12.1 kB
JavaScript
/**
* API client for models.dev
* Fetches and caches model metadata
*/
import { request } from 'undici';
import { TIMEOUT_HTTP_BODY_MS, TIMEOUT_HTTP_HEADERS_MS } from '../constants.js';
import { formatError } from '../utils/error-formatter.js';
import { getLogger } from '../utils/logging/index.js';
import { readCache, writeCache } from './models-cache.js';
const MODELS_DEV_API_URL = 'https://models.dev/api.json';
/**
* Fallback context limits for common Ollama model architectures
* Used when models.dev doesn't have the model data
*/
const OLLAMA_MODEL_CONTEXT_LIMITS = {
// Llama 3.2 models (not on models.dev)
'llama3.2': 128000,
'llama3.2:1b': 128000,
'llama3.2:3b': 128000,
// Llama 3.1 models (base matches wrong model on models.dev)
'llama3.1': 128000,
'llama3.1:8b': 128000,
'llama3.1:70b': 128000,
'llama3.1:405b': 128000,
// Llama 3 models (size variants not on models.dev)
'llama3:8b': 8192,
'llama3:70b': 8192,
// Llama 2 models (not on models.dev)
llama2: 4096,
'llama2:7b': 4096,
'llama2:13b': 4096,
'llama2:70b': 4096,
// Mistral models (base matches wrong model on models.dev)
mistral: 32000,
'mistral:7b': 32000,
'mixtral:8x7b': 32000,
'mixtral:8x22b': 64000,
'ministral:3b': 256000,
'ministral:8b': 256000,
// Qwen models (base names match wrong models on models.dev)
qwen: 32000,
'qwen:7b': 32000,
'qwen:14b': 32000,
qwen2: 32000,
'qwen2:7b': 32000,
'qwen2.5': 128000,
'qwen2.5:7b': 128000,
qwen3: 128000,
'qwen3:7b': 128000,
'qwen3:14b': 128000,
'qwen3:32b': 128000,
// Gemma models (base matches wrong model on models.dev)
gemma: 8192,
'gemma:2b': 8192,
'gemma:7b': 8192,
'gemma2:9b': 8192,
'gemma2:27b': 8192,
// DeepSeek models (base matches wrong model on models.dev)
'deepseek-coder': 16000,
'deepseek-coder-v2': 128000,
// Phi models (not on models.dev)
phi3: 128000,
'phi3:mini': 128000,
'phi3:medium': 128000,
// Moonshot AI models (kimi-for-coding is a provider, not a model ID)
'kimi-for-coding': 256000,
};
/**
* Extract base model architecture from Ollama model name
* e.g., "llama3.1:8b-instruct-q4_0" -> "llama3.1:8b"
*/
function extractOllamaModelBase(modelName) {
const lower = modelName.toLowerCase();
// Sort keys by length descending so longer/more specific keys match first
// e.g., "qwen3-coder:480b" matches before "qwen3", "mixtral:8x22b" before "mixtral"
const sortedKeys = Object.keys(OLLAMA_MODEL_CONTEXT_LIMITS).sort((a, b) => b.length - a.length);
// Try exact and prefix matches
for (const key of sortedKeys) {
if (lower === key ||
lower.startsWith(`${key}-`) ||
lower.startsWith(`${key}:`)) {
return key;
}
}
// Try to match base architecture (also sorted by specificity)
for (const key of sortedKeys) {
if (lower.includes(key)) {
return key;
}
}
return null;
}
/**
* Get fallback context limit for Ollama models
*/
function getOllamaFallbackContextLimit(modelName) {
const baseModel = extractOllamaModelBase(modelName);
if (!baseModel) {
return null;
}
return OLLAMA_MODEL_CONTEXT_LIMITS[baseModel] || null;
}
/**
* Fetch models data from models.dev API
* Falls back to cache if API is unavailable
*/
async function fetchModelsData() {
try {
const response = await request(MODELS_DEV_API_URL, {
method: 'GET',
headersTimeout: TIMEOUT_HTTP_HEADERS_MS,
bodyTimeout: TIMEOUT_HTTP_BODY_MS,
});
if (response.statusCode !== 200) {
throw new Error(`Failed to fetch models data: HTTP ${response.statusCode}`);
}
const body = await response.body.json();
const data = body;
// Cache the successful response
await writeCache(data);
return data;
}
catch (error) {
const logger = getLogger();
logger.warn({ error: formatError(error) }, 'Failed to fetch from models.dev');
// Try to use cached data as fallback
const cached = await readCache();
if (cached) {
logger.info('Using cached models data');
return cached.data;
}
return null;
}
}
/**
* Get models data, preferring cache if valid
*/
async function getModelsData() {
// Try cache first
const cached = await readCache();
if (cached) {
return cached.data;
}
// Fetch fresh data if cache is invalid
return fetchModelsData();
}
/**
* Find a model by ID across all providers
* Returns the model info and provider name
*/
async function findModelById(modelId) {
const data = await getModelsData();
if (!data) {
return null;
}
let bestMatch = null;
// Search through all providers, picking the match with highest context limit
for (const [_providerId, provider] of Object.entries(data)) {
// Skip malformed provider entries
if (!provider || typeof provider !== 'object' || !provider.models) {
continue;
}
const model = provider.models[modelId];
if (model) {
const contextLimit = model.limit?.context ?? null;
if (!bestMatch ||
(contextLimit !== null &&
(bestMatch.contextLimit === null ||
contextLimit > bestMatch.contextLimit))) {
bestMatch = {
id: model.id,
name: model.name,
provider: provider.name,
contextLimit,
outputLimit: model.limit?.output ?? null,
supportsToolCalls: model.tool_call ?? false,
cost: {
input: model.cost?.input ?? 0,
output: model.cost?.output ?? 0,
},
};
}
}
}
return bestMatch;
}
/**
* Find a model by partial name match
* Useful for local models where exact ID might not match
*/
async function findModelByName(modelName) {
// Empty string matches everything with .includes(), so return null early
if (!modelName) {
return null;
}
const data = await getModelsData();
if (!data) {
return null;
}
const lowerName = modelName.toLowerCase();
let bestMatch = null;
let bestScore = 0;
// Search through all providers with scored matching
for (const [_providerId, provider] of Object.entries(data)) {
// Skip malformed provider entries
if (!provider || typeof provider !== 'object' || !provider.models) {
continue;
}
for (const [_modelId, model] of Object.entries(provider.models)) {
// Skip malformed model entries
if (!model || typeof model !== 'object') {
continue;
}
const modelIdLower = model.id?.toLowerCase() ?? '';
const modelNameLower = model.name?.toLowerCase() ?? '';
let score = 0;
// Exact ID match → return immediately
if (modelIdLower === lowerName) {
return {
id: model.id,
name: model.name,
provider: provider.name,
contextLimit: model.limit?.context ?? null,
outputLimit: model.limit?.output ?? null,
supportsToolCalls: model.tool_call ?? false,
cost: {
input: model.cost?.input ?? 0,
output: model.cost?.output ?? 0,
},
};
}
// ID starts with search term → high score
if (modelIdLower.startsWith(lowerName)) {
score = 3;
}
// Name starts with search term → medium score
else if (modelNameLower.startsWith(lowerName)) {
score = 2;
}
// ID or Name contains search term → low score
else if (modelIdLower.includes(lowerName) ||
modelNameLower.includes(lowerName)) {
score = 1;
}
if (score > bestScore) {
bestScore = score;
bestMatch = {
id: model.id,
name: model.name,
provider: provider.name,
contextLimit: model.limit?.context ?? null,
outputLimit: model.limit?.output ?? null,
supportsToolCalls: model.tool_call ?? false,
cost: {
input: model.cost?.input ?? 0,
output: model.cost?.output ?? 0,
},
};
}
}
}
return bestMatch;
}
/**
* Singleton class for managing session-level context limit overrides.
* Allows users to manually set a context limit via /context-max command.
*/
class ContextLimitSessionManager {
_contextLimit = null;
get() {
return this._contextLimit;
}
set(limit) {
if (limit !== null && limit > 0) {
this._contextLimit = limit;
}
else {
this._contextLimit = null;
}
}
reset() {
this._contextLimit = null;
}
}
// Singleton instance
const contextLimitSession = new ContextLimitSessionManager();
export function setSessionContextLimit(limit) {
contextLimitSession.set(limit);
}
export function getSessionContextLimit() {
return contextLimitSession.get();
}
export function resetSessionContextLimit() {
contextLimitSession.reset();
}
/**
* Get context limit for a model
* Resolution order:
* 1. Session override (from /context-max command)
* 2. NANOCODER_CONTEXT_LIMIT env variable
* 3. models.dev lookup / hardcoded Ollama defaults
* 4. null (unknown)
*/
export async function getModelContextLimit(modelId) {
try {
// Check session override first (highest priority)
const sessionLimit = contextLimitSession.get();
if (sessionLimit !== null) {
return sessionLimit;
}
// Check environment variable fallback
const envLimit = process.env.NANOCODER_CONTEXT_LIMIT;
if (envLimit) {
const parsed = Number.parseInt(envLimit, 10);
if (!Number.isNaN(parsed) && parsed > 0) {
return parsed;
}
}
// Strip :cloud or -cloud suffix if present (Ollama cloud models)
const normalizedModelId = modelId.endsWith(':cloud') || modelId.endsWith('-cloud')
? modelId.slice(0, -6) // Remove ":cloud" or "-cloud"
: modelId;
// Try models.dev exact ID match first (primary source)
let modelInfo = await findModelById(normalizedModelId);
// Try models.dev partial name match if exact match fails
if (!modelInfo) {
modelInfo = await findModelByName(normalizedModelId);
}
// If found in models.dev, return that
if (modelInfo) {
return modelInfo.contextLimit;
}
// Fall back to hardcoded Ollama model defaults (offline fallback)
// Try original model ID first (handles entries like "kimi-k2:1t-cloud")
const ollamaLimitOriginal = getOllamaFallbackContextLimit(modelId);
if (ollamaLimitOriginal) {
return ollamaLimitOriginal;
}
// Try normalized ID (without cloud suffix)
const ollamaLimit = getOllamaFallbackContextLimit(normalizedModelId);
if (ollamaLimit) {
return ollamaLimit;
}
// No context limit found
return null;
}
catch (error) {
// Log error but don't crash - just return null
const logger = getLogger();
logger.error({ error: formatError(error), modelId }, 'Error getting model context limit');
return null;
}
}
//# sourceMappingURL=models-dev-client.js.map