UNPKG

@juspay/neurolink

Version:

Universal AI Development Platform with working MCP integration, multi-provider support, voice (TTS/STT/realtime), and professional CLI. 58+ external MCP servers discoverable, multimodal file processing, RAG pipelines. Build, test, and deploy AI applicatio

495 lines (494 loc) 22.9 kB
import { createOpenAICompatible } from "@ai-sdk/openai-compatible"; import { stepCountIs, streamText } from "ai"; import { NvidiaNimModels } from "../constants/enums.js"; import { BaseProvider } from "../core/baseProvider.js"; import { DEFAULT_MAX_STEPS } from "../core/constants.js"; import { streamAnalyticsCollector } from "../core/streamAnalytics.js"; import { isNeuroLink } from "../neurolink.js"; import { createProxyFetch, maskProxyUrl } from "../proxy/proxyFetch.js"; import { tracers, ATTR, withClientStreamSpan } from "../telemetry/index.js"; import { AuthenticationError, InvalidModelError, NetworkError, ProviderError, RateLimitError, } from "../types/index.js"; import { logger } from "../utils/logger.js"; import { createNvidiaNimConfig, getProviderModel, validateApiKey, } from "../utils/providerConfig.js"; import { composeAbortSignals, createTimeoutController, TimeoutError, } from "../utils/timeout.js"; import { emitToolEndFromStepFinish } from "../utils/toolEndEmitter.js"; import { resolveToolChoice } from "../utils/toolChoice.js"; import { toAnalyticsStreamResult } from "./providerTypeUtils.js"; /** * Decide whether a NIM 400 response body is a rejection of the named * field (as opposed to an unrelated 400 that happens to mention the * field name — e.g. when the user's prompt is echoed back inside the * error envelope). * * A rejection requires both: * - the field name appears in the body, and * - a rejection keyword (`unsupported`, `not supported`, `unknown`, * `invalid`, `unrecognized`, `does not support`) appears within * 80 characters of any occurrence. * * The 80-character window is loose enough to absorb NIM's "Unsupported * argument: `chat_template`" framing and tight enough that a 1KB error * body mentioning the field once in a code sample plus an unrelated * "invalid" elsewhere won't trigger a strip. */ const NIM_REJECTION_KEYWORDS = [ "unsupported", "not supported", "does not support", "unrecognized", "unknown field", "unknown parameter", "unknown argument", "invalid field", "invalid parameter", "invalid argument", ]; const isNimFieldRejection = (body, field) => { if (!body) { return false; } const lower = body.toLowerCase(); const fieldLower = field.toLowerCase(); let idx = lower.indexOf(fieldLower); while (idx !== -1) { const windowStart = Math.max(0, idx - 80); const windowEnd = Math.min(lower.length, idx + fieldLower.length + 80); const slice = lower.slice(windowStart, windowEnd); if (NIM_REJECTION_KEYWORDS.some((kw) => slice.includes(kw))) { return true; } idx = lower.indexOf(fieldLower, idx + fieldLower.length); } return false; }; /** * Strip an offending field from a JSON request body and return the rebuilt * stringified body. Returns `null` if the body isn't JSON-parseable or the * field isn't present (signal: nothing to retry). */ const stripFieldFromJsonBody = (body, field) => { try { const parsed = JSON.parse(body); let mutated = false; if (field === "chat_template" && "chat_template" in parsed) { delete parsed.chat_template; mutated = true; } if (field === "reasoning_budget") { const kw = parsed.chat_template_kwargs; if (kw && "reasoning_budget" in kw) { delete kw.reasoning_budget; mutated = true; if (Object.keys(kw).length === 0) { delete parsed.chat_template_kwargs; } } } if (!mutated) { return null; } return JSON.stringify(parsed); } catch { return null; } }; const makeLoggingFetch = (provider) => { const base = createProxyFetch(); return (async (input, init) => { const url = typeof input === "string" ? input : input instanceof URL ? input.toString() : input.url; const reqSize = init?.body && typeof init.body === "string" ? init.body.length : 0; let response = await base(input, init); // Generic NIM 400 retry-strip: works for BOTH generate and stream paths. // NIM sometimes returns HTTP 400 when a model rejects `reasoning_budget` // or `chat_template`. The stream path already retries by reconstructing // its provider options; this fetch-level retry is the symmetric fix for // generate (and any other transport that lands here). // // We require BOTH (a) the offending field name AND (b) a rejection // keyword (unsupported / not supported / unknown / invalid / // unrecognized / does not support) within 80 chars of it. Without the // rejection-keyword guard, an unrelated 400 whose error body happened // to mention `chat_template` (e.g. the user prompt got echoed back) // would cause us to silently strip a field the user actually wanted // sent, and either succeed for the wrong reason or fail with a // misleading error. if (response.status === 400 && typeof init?.body === "string" && init.body.length > 0) { const cloned = response.clone(); const body = await cloned.text().catch(() => ""); let retryBody = null; let stripped = null; if (isNimFieldRejection(body, "reasoning_budget")) { retryBody = stripFieldFromJsonBody(init.body, "reasoning_budget"); stripped = "reasoning_budget"; } else if (isNimFieldRejection(body, "chat_template")) { retryBody = stripFieldFromJsonBody(init.body, "chat_template"); stripped = "chat_template"; } if (retryBody !== null && stripped !== null) { logger.warn(`[${provider}] NIM rejected ${stripped}; retrying with field stripped`); response = await base(input, { ...init, body: retryBody }); } } if (!response.ok) { // If maskProxyUrl can't safely sanitize the URL (returns null), don't // log the raw URL — that defeats the redaction. Use a placeholder so // operators still get the warning without leaking credentials. const safeUrl = maskProxyUrl(url) ?? "<redacted>"; if (process.env.NEUROLINK_DEBUG_HTTP === "1") { const clone = response.clone(); const body = await clone.text().catch(() => "<unreadable>"); logger.warn(`[${provider}] upstream ${response.status}`, { url: safeUrl, body: body.slice(0, 800), reqSize, }); } else { logger.warn(`[${provider}] upstream ${response.status} url=${safeUrl} reqSize=${reqSize}`); } } return response; }); }; const NVIDIA_NIM_DEFAULT_BASE_URL = "https://integrate.api.nvidia.com/v1"; const envInt = (k) => { const v = process.env[k]; if (!v) { return undefined; } const parsed = Number.parseInt(v, 10); return Number.isFinite(parsed) ? parsed : undefined; }; const envFloat = (k) => { const v = process.env[k]; if (!v) { return undefined; } const parsed = Number.parseFloat(v); return Number.isFinite(parsed) ? parsed : undefined; }; const buildNvidiaNimExtraBody = (thinkingEnabled, maxTokens) => { const extra = {}; const topK = envInt("NVIDIA_NIM_TOP_K"); if (topK !== undefined && topK !== -1) { extra.top_k = topK; } const minP = envFloat("NVIDIA_NIM_MIN_P"); if (minP !== undefined && minP !== 0) { extra.min_p = minP; } const repPenalty = envFloat("NVIDIA_NIM_REPETITION_PENALTY"); if (repPenalty !== undefined && repPenalty !== 1) { extra.repetition_penalty = repPenalty; } const minTokens = envInt("NVIDIA_NIM_MIN_TOKENS"); if (minTokens !== undefined && minTokens !== 0) { extra.min_tokens = minTokens; } const chatTemplate = process.env.NVIDIA_NIM_CHAT_TEMPLATE; if (chatTemplate) { extra.chat_template = chatTemplate; } if (thinkingEnabled) { extra.chat_template_kwargs = { thinking: true, enable_thinking: true, ...(maxTokens ? { reasoning_budget: maxTokens } : {}), }; } return extra; }; const stripReasoningBudget = (body) => { const cloned = { ...body }; if (cloned.chat_template_kwargs) { const { reasoning_budget: _ignored, ...rest } = cloned.chat_template_kwargs; cloned.chat_template_kwargs = rest; if (Object.keys(cloned.chat_template_kwargs).length === 0) { delete cloned.chat_template_kwargs; } } return cloned; }; const stripChatTemplate = (body) => { const { chat_template: _ignored, ...rest } = body; return rest; }; const getNimApiKey = () => { return validateApiKey(createNvidiaNimConfig()); }; const getDefaultNimModel = () => { return getProviderModel("NVIDIA_NIM_MODEL", NvidiaNimModels.LLAMA_3_3_70B_INSTRUCT); }; /** * NVIDIA NIM Provider * Wraps NVIDIA's hosted (or self-hosted) inference endpoints via OpenAI-compat. * Passes NIM-specific extras (top_k, min_p, repetition_penalty, * chat_template_kwargs.reasoning_budget) via providerOptions.openai.body. * Implements one-retry-on-400 to drop unsupported extras gracefully. */ export class NvidiaNimProvider extends BaseProvider { model; apiKey; baseURL; constructor(modelName, sdk, _region, credentials) { const validatedNeurolink = isNeuroLink(sdk) ? sdk : undefined; super(modelName, "nvidia-nim", validatedNeurolink); // Trim the override before applying precedence. A blank/whitespace // `credentials.apiKey` should NOT bypass `getNimApiKey()` — that would // build a client with an unusable bearer token and fail at request time // with a confusing 401 instead of at construction time. const overrideApiKey = credentials?.apiKey?.trim(); this.apiKey = overrideApiKey && overrideApiKey.length > 0 ? overrideApiKey : getNimApiKey(); this.baseURL = credentials?.baseURL ?? process.env.NVIDIA_NIM_BASE_URL ?? NVIDIA_NIM_DEFAULT_BASE_URL; // We deliberately use `@ai-sdk/openai-compatible` rather than // `@ai-sdk/openai`. Two upstream behaviors of `@ai-sdk/openai` break us: // 1. It always sends `response_format: { type: "json_schema" }` when a // schema is provided. Most NIM-served chat models don't enforce // json_schema strictly — the schema goes through but `result.object` // stays empty because the SDK never gets the typed response back. // 2. It does not parse the `reasoning_content` field that NIM-hosted // reasoning models (deepseek-r1, qwq, llama-nemotron-ultra) emit, // so chain-of-thought is silently dropped. // `@ai-sdk/openai-compatible` honors `supportsStructuredOutputs: false` // (falls back to `{ type: "json_object" }` and injects the schema into // the prompt — works across the entire NIM model fleet) and parses both // `choice.message.reasoning_content` and `delta.reasoning_content` into // the SDK-standard `reasoning` part. NIM-specific extras (`min_tokens`, // `chat_template_kwargs.reasoning_budget`, `chat_template`) are still // injected via `providerOptions.openai.body` in `executeStreamInner`. const nim = createOpenAICompatible({ name: "nvidia-nim", apiKey: this.apiKey, baseURL: this.baseURL, fetch: makeLoggingFetch("nvidia-nim"), supportsStructuredOutputs: false, includeUsage: true, }); this.model = nim.chatModel(this.modelName); logger.debug("NVIDIA NIM Provider initialized", { modelName: this.modelName, providerName: this.providerName, baseURL: this.baseURL, }); } async executeStream(options, _analysisSchema) { return withClientStreamSpan({ name: "neurolink.provider.stream", tracer: tracers.provider, attributes: { [ATTR.GEN_AI_SYSTEM]: "nvidia-nim", [ATTR.GEN_AI_MODEL]: this.modelName, [ATTR.GEN_AI_OPERATION]: "stream", [ATTR.NL_STREAM_MODE]: true, }, }, async () => this.executeStreamInner(options), (r) => r.stream, (r, wrapped) => ({ ...r, stream: wrapped })); } async executeStreamInner(options) { this.validateStreamOptions(options); const startTime = Date.now(); const timeout = this.getTimeout(options); const timeoutController = createTimeoutController(timeout, this.providerName, "stream"); try { const shouldUseTools = !options.disableTools && this.supportsTools(); const tools = shouldUseTools ? options.tools || (await this.getAllTools()) : {}; const messages = await this.buildMessagesForStream(options); const model = await this.getAISDKModelWithMiddleware(options); // Callers pass `thinkingLevel` directly on generate/stream options // (matching Anthropic / Gemini 2.5+ / Gemini 3 conventions). Fall back // to the legacy `thinkingConfig.thinkingLevel` shape for compatibility. const tl = options.thinkingLevel ?? options.thinkingConfig?.thinkingLevel; const thinkingEnabled = tl !== undefined && tl !== "minimal"; let extraBody = buildNvidiaNimExtraBody(thinkingEnabled, options.maxTokens); // Inline the retry-strip union — CLAUDE.md rule 2 forbids type aliases // outside src/lib/types/. The two literals match the 400 error keys NIM // returns for the only two extras we know how to drop and retry. const callStream = (body, stripped = []) => streamText({ model, messages, temperature: options.temperature, maxOutputTokens: options.maxTokens, tools, stopWhen: stepCountIs(options.maxSteps || DEFAULT_MAX_STEPS), toolChoice: resolveToolChoice(options, tools, shouldUseTools), abortSignal: composeAbortSignals(options.abortSignal, timeoutController?.controller.signal), providerOptions: (() => { // StreamOptions doesn't formally type providerOptions but the // upstream Vercel AI SDK accepts it. Read it via an indexed access // and merge with NIM extras instead of overwriting any per-call // openai.body. const callerBase = options .providerOptions ?? {}; const callerOpenai = callerBase.openai ?? {}; const callerBody = callerOpenai.body ?? {}; // Per-call overrides win over env/NIM defaults — defaults first, // overrides last. chat_template_kwargs is merged shallowly too so // a request that only sets `reasoning_budget` doesn't drop the // env-driven `thinking: true` flag (and vice versa). const defaultsBody = body; const mergedBody = { ...defaultsBody, ...callerBody, }; const mergedKwargs = { ...(defaultsBody.chat_template_kwargs ?? {}), ...(callerBody.chat_template_kwargs ?? {}), }; // Apply retry-strip AFTER merging so caller-supplied copies of // the offending field are also dropped (otherwise the retry would // re-send the field that NIM just rejected). if (stripped.includes("chat_template")) { delete mergedBody.chat_template; } if (stripped.includes("reasoning_budget")) { delete mergedKwargs.reasoning_budget; } if (Object.keys(mergedKwargs).length > 0) { mergedBody.chat_template_kwargs = mergedKwargs; } else { delete mergedBody.chat_template_kwargs; } if (Object.keys(callerBase).length === 0 && Object.keys(mergedBody).length === 0) { return undefined; } return { ...callerBase, openai: { ...callerOpenai, body: mergedBody, }, // eslint-disable-next-line @typescript-eslint/no-explicit-any }; })(), experimental_telemetry: this.telemetryHandler.getTelemetryConfig(options), experimental_repairToolCall: this.getToolCallRepairFn(options), onStepFinish: ({ toolCalls, toolResults }) => { emitToolEndFromStepFinish(this.neurolink?.getEventEmitter(), toolResults); this.handleToolExecutionStorage(toolCalls, toolResults, options, new Date()).catch((error) => { logger.warn("[NvidiaNimProvider] Failed to store tool executions", { provider: this.providerName, error: error instanceof Error ? error.message : String(error), }); }); }, }); let result; try { result = await callStream(extraBody); } catch (error) { const errMsg = error instanceof Error ? error.message : String(error); const status = error?.statusCode; if (status === 400) { const lower = errMsg.toLowerCase(); if (lower.includes("reasoning_budget")) { logger.warn("NIM rejected reasoning_budget; retrying without it"); extraBody = stripReasoningBudget(extraBody); result = await callStream(extraBody, ["reasoning_budget"]); } else if (lower.includes("chat_template")) { logger.warn("NIM rejected chat_template; retrying without it"); extraBody = stripChatTemplate(extraBody); result = await callStream(extraBody, ["chat_template"]); } else { throw error; } } else { throw error; } } timeoutController?.cleanup(); const transformedStream = this.createTextStream(result); const analyticsPromise = streamAnalyticsCollector.createAnalytics(this.providerName, this.modelName, toAnalyticsStreamResult(result), Date.now() - startTime, { requestId: `nvidia-nim-stream-${Date.now()}`, streamingMode: true, }); return { stream: transformedStream, provider: this.providerName, model: this.modelName, analytics: analyticsPromise, metadata: { startTime, streamId: `nvidia-nim-${Date.now()}` }, }; } catch (error) { timeoutController?.cleanup(); throw this.handleProviderError(error); } } getProviderName() { return this.providerName; } getDefaultModel() { return getDefaultNimModel(); } getAISDKModel() { return this.model; } formatProviderError(error) { if (error instanceof TimeoutError) { return new NetworkError(`Request timed out: ${error.message}`, "nvidia-nim"); } const errorRecord = error; const message = typeof errorRecord?.message === "string" ? errorRecord.message : "Unknown error"; // NIM canonically returns HTTP 401/Unauthorized for invalid API keys, // but its OpenAI-compatible gateway sometimes surfaces a bare 400 + // "Bad Request" with no body details for both malformed-credentials // and bad-parameter cases. Because the two are indistinguishable from // the message alone, we DON'T promote bare 400/Bad Request to "invalid // key" here — that would mis-classify legitimate parameter errors // (e.g. unsupported `reasoning_budget`, unsupported `chat_template`) // as auth failures. Tests that probe the auth path (K1) detect // "bad request" / "400" themselves; tests that probe parameter retry // (K5) need the original "Bad Request" message to surface. if (message.includes("Invalid API key") || message.includes("401") || message.includes("Unauthorized")) { return new AuthenticationError("Invalid NVIDIA NIM API key. Get one at https://build.nvidia.com/settings/api-keys", "nvidia-nim"); } if (message.includes("rate limit") || message.includes("429")) { return new RateLimitError("NVIDIA NIM rate limit exceeded", "nvidia-nim"); } if (message.includes("404") || message.includes("model_not_found")) { return new InvalidModelError(`NVIDIA NIM model '${this.modelName}' not available. Browse the catalog at https://build.nvidia.com/models`, "nvidia-nim"); } if (message.includes("quota") || message.includes("403")) { return new ProviderError("NVIDIA NIM quota exceeded for your account", "nvidia-nim"); } return new ProviderError(`NVIDIA NIM error: ${message}`, "nvidia-nim"); } async validateConfiguration() { return typeof this.apiKey === "string" && this.apiKey.trim().length > 0; } getConfiguration() { return { provider: this.providerName, model: this.modelName, defaultModel: getDefaultNimModel(), baseURL: this.baseURL, }; } } export default NvidiaNimProvider;