UNPKG

@juspay/neurolink

Version:

Universal AI Development Platform with working MCP integration, multi-provider support, voice (TTS/STT/realtime), and professional CLI. 58+ external MCP servers discoverable, multimodal file processing, RAG pipelines. Build, test, and deploy AI applicatio

461 lines (460 loc) 17.2 kB
/** * Context Window Registry * * Accurate per-provider, per-model context window sizes (INPUT token limits). * These are distinct from OUTPUT token limits in tokens.ts. * * Sources: * - Anthropic: https://docs.anthropic.com/en/docs/about-claude/models * - OpenAI: https://platform.openai.com/docs/models * - Google: https://ai.google.dev/gemini-api/docs/models * - Others: Provider documentation as of Feb 2026 */ import { DynamicModelProvider } from "../core/dynamicModels.js"; import { logger } from "../utils/logger.js"; /** Default context window when provider/model is unknown */ export const DEFAULT_CONTEXT_WINDOW = 128_000; /** Maximum output reserve when maxTokens not specified */ export const MAX_DEFAULT_OUTPUT_RESERVE = 64_000; /** Default output reserve ratio (35% of context) */ export const DEFAULT_OUTPUT_RESERVE_RATIO = 0.35; /** * Per-provider, per-model context window sizes. * The "_default" key is the fallback for unknown models within a provider. */ export const MODEL_CONTEXT_WINDOWS = { deepseek: { _default: 64_000, "deepseek-chat": 64_000, "deepseek-reasoner": 64_000, }, "nvidia-nim": { _default: 128_000, "meta/llama-3.3-70b-instruct": 128_000, "meta/llama-3.1-405b-instruct": 128_000, "meta/llama-3.1-70b-instruct": 128_000, "meta/llama-3.2-90b-vision-instruct": 128_000, "meta/llama-3.2-11b-vision-instruct": 128_000, "nvidia/llama-3.3-nemotron-super-49b-v1": 128_000, "nvidia/llama-3.1-nemotron-nano-8b-v1": 128_000, "nvidia/llama-3.1-nemotron-70b-instruct": 128_000, "deepseek-ai/deepseek-r1": 128_000, "deepseek-ai/deepseek-r1-distill-llama-70b": 128_000, "mistralai/mixtral-8x22b-instruct-v0.1": 65_536, "mistralai/mixtral-8x7b-instruct-v0.1": 32_768, "microsoft/phi-4": 16_384, "google/gemma-3-27b-it": 8_192, }, "lm-studio": { _default: 8_192, }, llamacpp: { _default: 8_192, }, xai: { _default: 131_072, "grok-3": 131_072, "grok-3-mini": 131_072, "grok-2-latest": 131_072, "grok-2-vision-latest": 32_768, "grok-beta": 131_072, }, groq: { _default: 128_000, "llama-3.3-70b-versatile": 131_072, "llama-3.1-8b-instant": 128_000, "llama-3.2-90b-vision-preview": 128_000, "llama-3.2-11b-vision-preview": 128_000, "llama-guard-3-8b": 8_192, "gemma2-9b-it": 8_192, "mixtral-8x7b-32768": 32_768, }, cohere: { _default: 128_000, "command-r-plus": 128_000, "command-r": 128_000, "command-r7b-12-2024": 128_000, }, "together-ai": { _default: 128_000, "meta-llama/Llama-3.3-70B-Instruct-Turbo": 128_000, "meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo": 128_000, "meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo": 128_000, "meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo": 128_000, "mistralai/Mixtral-8x22B-Instruct-v0.1": 65_536, "mistralai/Mixtral-8x7B-Instruct-v0.1": 32_768, "Qwen/Qwen2.5-72B-Instruct-Turbo": 32_768, "Qwen/Qwen2.5-Coder-32B-Instruct": 32_768, "deepseek-ai/DeepSeek-R1": 64_000, "deepseek-ai/DeepSeek-V3": 64_000, "google/gemma-2-27b-it": 8_192, "microsoft/WizardLM-2-8x22B": 65_536, }, fireworks: { _default: 128_000, "accounts/fireworks/models/llama-v3p1-70b-instruct": 131_072, "accounts/fireworks/models/llama-v3p1-405b-instruct": 128_000, "accounts/fireworks/models/llama-v3p1-8b-instruct": 128_000, "accounts/fireworks/models/llama-v3p3-70b-instruct": 128_000, "accounts/fireworks/models/mixtral-8x22b-instruct": 65_536, "accounts/fireworks/models/qwen2p5-72b-instruct": 32_768, "accounts/fireworks/models/qwen2p5-coder-32b-instruct": 32_768, "accounts/fireworks/models/deepseek-v3": 64_000, }, perplexity: { _default: 127_000, sonar: 127_000, "sonar-pro": 200_000, "sonar-reasoning": 127_000, "sonar-reasoning-pro": 127_000, "sonar-deep-research": 200_000, }, cloudflare: { _default: 8_192, "@cf/meta/llama-3.3-70b-instruct-fp8-fast": 24_000, "@cf/meta/llama-3.1-70b-instruct": 24_000, "@cf/meta/llama-3.1-8b-instruct-fast": 24_000, "@cf/meta/llama-3.2-11b-vision-instruct": 24_000, "@cf/mistral/mistral-7b-instruct-v0.2": 32_768, "@cf/qwen/qwen1.5-14b-chat-awq": 7_500, "@cf/google/gemma-2b-it-lora": 4_096, }, replicate: { // Per-model — Replicate hosts arbitrary models; sensible default. _default: 32_768, }, voyage: { // Voyage embeddings: max input tokens vary 16K-32K per model _default: 32_000, "voyage-3.5": 32_000, "voyage-3.5-lite": 32_000, "voyage-3-large": 32_000, "voyage-code-3": 32_000, "voyage-finance-2": 32_000, "voyage-law-2": 16_000, "voyage-multilingual-2": 32_000, }, jina: { // Jina embeddings: 8K input tokens for v3; 8K for v2; 32K for ColBERT-v2 _default: 8_192, "jina-embeddings-v3": 8_192, "jina-embeddings-v2-base-en": 8_192, "jina-embeddings-v2-small-en": 8_192, "jina-colbert-v2": 32_000, }, stability: { // Image-gen — context is prompt length only; ~2000 char limit _default: 2_000, }, ideogram: { _default: 2_000, }, recraft: { _default: 2_000, }, anthropic: { _default: 200_000, // Claude 4.6 (Feb 2026) — 1M context window "claude-opus-4-6": 1_000_000, "claude-sonnet-4-6": 1_000_000, // Claude 4.5 "claude-opus-4-5-20251101": 200_000, "claude-sonnet-4-5-20250929": 200_000, "claude-haiku-4-5-20251001": 200_000, // Claude 4.x "claude-opus-4-1-20250805": 200_000, "claude-opus-4-20250514": 200_000, "claude-sonnet-4-20250514": 200_000, // Claude 3.x "claude-3-7-sonnet-20250219": 200_000, "claude-3-5-sonnet-20241022": 200_000, "claude-3-5-haiku-20241022": 200_000, "claude-3-opus-20240229": 200_000, "claude-3-sonnet-20240229": 200_000, "claude-3-haiku-20240307": 200_000, }, openai: { _default: 128_000, // GPT-5.4 family — 1.05M context "gpt-5.4": 1_050_000, "gpt-5.4-mini": 400_000, "gpt-5.4-nano": 400_000, "gpt-5.4-pro": 1_050_000, // GPT-5.x family — 400K context "gpt-5.3-codex": 400_000, "gpt-5.2": 400_000, "gpt-5.2-pro": 400_000, "gpt-5.2-codex": 400_000, "gpt-5.2-chat-latest": 128_000, "gpt-5.1": 400_000, "gpt-5.1-codex": 400_000, "gpt-5.1-codex-max": 400_000, "gpt-5.1-codex-mini": 400_000, "gpt-5.1-chat-latest": 128_000, "gpt-5": 400_000, "gpt-5-mini": 400_000, "gpt-5-nano": 400_000, "gpt-5-pro": 400_000, "gpt-5-codex": 400_000, "gpt-5-chat-latest": 128_000, // GPT Open Source "gpt-oss-120b": 128_000, "gpt-oss-20b": 128_000, // GPT-4.1 family — 1M context "gpt-4.1": 1_047_576, "gpt-4.1-mini": 1_047_576, "gpt-4.1-nano": 1_047_576, // GPT-4o "gpt-4o": 128_000, "gpt-4o-mini": 128_000, // O-series reasoning — 200K context o1: 200_000, "o1-mini": 128_000, "o1-pro": 200_000, o3: 200_000, "o3-mini": 200_000, "o3-pro": 200_000, "o4-mini": 200_000, // Legacy "gpt-4-turbo": 128_000, "gpt-4": 8_192, "gpt-3.5-turbo": 16_385, }, "google-ai": { _default: 1_048_576, // Gemini 3.1 Series (all require -preview suffix) "gemini-3.1-pro-preview": 1_048_576, "gemini-3.1-flash-lite-preview": 1_048_576, "gemini-3.1-flash-image-preview": 1_048_576, "gemini-3.1-pro-preview-customtools": 1_048_576, // Gemini 3 Series "gemini-3-flash-preview": 1_048_576, "gemini-3-pro-image-preview": 65_536, /** @deprecated SHUT DOWN March 9, 2026. Migrate to gemini-3.1-pro-preview. */ "gemini-3-pro-preview": 1_048_576, "gemini-2.5-pro": 1_048_576, "gemini-2.5-flash": 1_048_576, "gemini-2.5-flash-lite": 1_048_576, "gemini-2.5-flash-image": 32_768, "gemini-2.0-flash": 1_048_576, "gemini-1.5-pro": 2_097_152, "gemini-1.5-flash": 1_048_576, }, vertex: { _default: 1_048_576, // Claude on Vertex "claude-opus-4-6": 1_000_000, "claude-sonnet-4-6": 1_000_000, "claude-sonnet-4-5": 200_000, "claude-opus-4-5": 200_000, "claude-haiku-4-5": 200_000, "claude-sonnet-4": 200_000, "claude-sonnet-4-20250514": 200_000, "claude-opus-4-20250514": 200_000, "claude-opus-4": 200_000, // Gemini 3.1 on Vertex (all require -preview suffix) "gemini-3.1-pro-preview": 1_048_576, "gemini-3.1-flash-lite-preview": 1_048_576, "gemini-3.1-flash-image-preview": 1_048_576, "gemini-3.1-pro-preview-customtools": 1_048_576, // Gemini 3 on Vertex "gemini-3-flash-preview": 1_048_576, "gemini-3-pro-image-preview": 65_536, /** @deprecated SHUT DOWN March 9, 2026. Migrate to gemini-3.1-pro-preview. */ "gemini-3-pro-preview": 1_048_576, // Gemini 2.x on Vertex "gemini-2.5-pro": 1_048_576, "gemini-2.5-flash": 1_048_576, "gemini-2.0-flash": 1_048_576, "gemini-1.5-pro": 2_097_152, "gemini-1.5-flash": 1_048_576, }, bedrock: { _default: 200_000, // Claude 4.6 "anthropic.claude-opus-4-6-v1:0": 1_000_000, "anthropic.claude-sonnet-4-6": 1_000_000, // Claude 4.5 "anthropic.claude-opus-4-5-20251124-v1:0": 200_000, "anthropic.claude-sonnet-4-5-20250929-v1:0": 200_000, "anthropic.claude-haiku-4-5-20251001-v1:0": 200_000, // Claude legacy "anthropic.claude-3-5-sonnet-20241022-v1:0": 200_000, "anthropic.claude-3-5-haiku-20241022-v1:0": 200_000, "anthropic.claude-3-opus-20240229-v1:0": 200_000, "anthropic.claude-3-sonnet-20240229-v1:0": 200_000, "anthropic.claude-3-haiku-20240307-v1:0": 200_000, // Amazon Nova "amazon.nova-pro-v1:0": 300_000, "amazon.nova-lite-v1:0": 300_000, "amazon.nova-2-lite-v1:0": 1_000_000, // Writer "writer.palmyra-x5-v1:0": 1_000_000, "writer.palmyra-x4-v1:0": 128_000, // NVIDIA "nvidia.nemotron-nano-3-30b": 256_000, }, azure: { _default: 128_000, // GPT-5.4 "gpt-5.4": 1_050_000, "gpt-5.4-mini": 400_000, "gpt-5.4-nano": 400_000, "gpt-5.4-pro": 1_050_000, // GPT-5.x "gpt-5.2": 400_000, "gpt-5.2-pro": 400_000, "gpt-5.2-codex": 400_000, "gpt-5.1": 400_000, "gpt-5": 400_000, "gpt-5-mini": 400_000, // GPT-4.1 "gpt-4.1": 1_047_576, "gpt-4.1-mini": 1_047_576, // GPT-4o "gpt-4o": 128_000, "gpt-4o-mini": 128_000, // O-series o3: 200_000, "o3-mini": 200_000, "o4-mini": 200_000, // Legacy "gpt-4-turbo": 128_000, "gpt-4": 8_192, }, mistral: { _default: 128_000, "mistral-large-latest": 256_000, "mistral-large-2512": 256_000, "mistral-medium-latest": 128_000, "mistral-small-latest": 128_000, "codestral-latest": 256_000, "codestral-2508": 256_000, "devstral-2512": 256_000, "devstral-small-2512": 256_000, "magistral-medium-latest": 128_000, "mistral-small-2603": 256_000, }, ollama: { _default: 128_000, }, litellm: { _default: 128_000, }, huggingface: { _default: 32_000, }, sagemaker: { _default: 128_000, // NVIDIA Nemotron 3 Nano (February 2026) — 1M context "nvidia-nemotron-3-nano-30b": 1_000_000, // Qwen3 VL — 32K context "qwen3-vl-8b-instruct": 32_768, }, }; /** * Map of provider aliases to canonical MODEL_CONTEXT_WINDOWS keys. * * Callers reach `getContextWindowSize` via the unnormalized form on * `options.provider` (e.g. CLI `--provider lmstudio`, alias `llama.cpp`), * and `ProviderFactory.normalizeProviderName` runs only at instantiation — * its output never reaches budget calculations. Without this normalization * those alias forms miss the table and fall back to `DEFAULT_CONTEXT_WINDOW`, * understating the budget for LM Studio / llama.cpp / NVIDIA NIM. * * The keys here are the result of stripping non-alpha characters, so * `lm-studio` -> `lmstudio`, `nvidia-nim` -> `nvidianim`, `llama.cpp` -> `llamacpp`. */ const PROVIDER_ALIAS_MAP = { googleaistudio: "google-ai-studio", lmstudio: "lm-studio", llamacpp: "llamacpp", nvidianim: "nvidia-nim", nim: "nvidia-nim", nvidia: "nvidia-nim", deepseek: "deepseek", }; function normalizeProviderForLookup(provider) { const stripped = provider.toLowerCase().replace(/[^a-z]/g, ""); // On alias miss, return the *stripped* key — not the raw input — so case / // separator variants ("OpenAI", "open-ai", "Vertex AI") still find their // table entry under the lowercase canonical key instead of falling through // to DEFAULT_CONTEXT_WINDOW. return PROVIDER_ALIAS_MAP[stripped] ?? stripped; } /** * Resolve context window size for a provider/model combination. * * Priority: * 0. Dynamic model registry (DynamicModelProvider) — resolves cross-provider * models (e.g. Claude on Vertex) that the static table cannot handle * 1. Exact model match under provider in static registry * 2. Prefix match under provider in static registry * 3. Provider's _default in static registry * 4. Global DEFAULT_CONTEXT_WINDOW */ export function getContextWindowSize(provider, model) { // Step 0: Check dynamic model registry first. // This resolves cases where the runtime provider differs from the model's // origin (e.g. Claude running via Vertex would hit Vertex's Gemini default // in the static table). The dynamic registry knows the actual model metadata. if (model) { try { const dynamicProvider = DynamicModelProvider.getInstance(); const modelConfig = dynamicProvider.resolveModel(provider, model); if (modelConfig?.contextWindow) { logger.debug(`[ContextWindow] Resolved via dynamic registry: provider=${provider}, model=${model}, contextWindow=${modelConfig.contextWindow}`); return modelConfig.contextWindow; } } catch { // Dynamic registry not initialized yet — fall through to static lookup } } // Static fallback chain — normalize aliases first so "lmstudio" / "llama.cpp" / // "nvidianim" find their canonical entries instead of falling back to default. const canonical = normalizeProviderForLookup(provider); const providerWindows = MODEL_CONTEXT_WINDOWS[canonical] ?? MODEL_CONTEXT_WINDOWS[provider]; if (!providerWindows) { return DEFAULT_CONTEXT_WINDOW; } if (model && providerWindows[model] !== undefined) { return providerWindows[model]; } // Try partial match (model name may be a prefix) if (model) { for (const [key, value] of Object.entries(providerWindows)) { if (key !== "_default" && model.startsWith(key)) { return value; } } } return providerWindows._default ?? DEFAULT_CONTEXT_WINDOW; } /** * Calculate output token reserve for a given context window. * * Returns the *real* token count that will be reserved for output so callers * (`getAvailableInputTokens`, `BudgetChecker`, conversation-memory pruning, file * summarisation) compute input budget against the actual outgoing maxTokens. * * @param contextWindow - Total context window size * @param maxTokens - Explicit maxTokens from user config (if set) * @returns Number of tokens reserved for output (matches what's sent upstream) */ export function getOutputReserve(contextWindow, maxTokens) { if (maxTokens !== undefined && maxTokens > 0) { return maxTokens; } return Math.min(MAX_DEFAULT_OUTPUT_RESERVE, Math.ceil(contextWindow * DEFAULT_OUTPUT_RESERVE_RATIO)); } /** * Calculate available input tokens for a given provider/model. * * available = contextWindow - outputReserve */ export function getAvailableInputTokens(provider, model, maxTokens) { const contextWindow = getContextWindowSize(provider, model); const outputReserve = getOutputReserve(contextWindow, maxTokens); return contextWindow - outputReserve; }