@juspay/neurolink
Version:
Universal AI Development Platform with working MCP integration, multi-provider support, voice (TTS/STT/realtime), and professional CLI. 58+ external MCP servers discoverable, multimodal file processing, RAG pipelines. Build, test, and deploy AI applicatio
570 lines • 27.6 kB
JavaScript
import { createOpenAI } from "@ai-sdk/openai";
import { SpanKind, SpanStatusCode, trace } from "@opentelemetry/api";
import { NoOutputGeneratedError, Output, stepCountIs, streamText, } from "ai";
import { BaseProvider } from "../core/baseProvider.js";
import { DEFAULT_MAX_STEPS } from "../core/constants.js";
import { streamAnalyticsCollector } from "../core/streamAnalytics.js";
import { createProxyFetch } from "../proxy/proxyFetch.js";
import { AuthenticationError, InvalidModelError, ModelAccessDeniedError, NetworkError, ProviderError, RateLimitError, isModelAccessDeniedMessage, parseAllowedModels, } from "../types/index.js";
import { isAbortError } from "../utils/errorHandling.js";
import { emitToolEndFromStepFinish } from "../utils/toolEndEmitter.js";
import { logger } from "../utils/logger.js";
import { buildNoOutputSentinel, detectPostStreamNoOutput, stampNoOutputSpan, } from "../utils/noOutputSentinel.js";
import { calculateCost } from "../utils/pricing.js";
import { getProviderModel } from "../utils/providerConfig.js";
import { composeAbortSignals, createTimeoutController, TimeoutError, } from "../utils/timeout.js";
import { resolveToolChoice } from "../utils/toolChoice.js";
import { getModelId } from "./providerTypeUtils.js";
const streamTracer = trace.getTracer("neurolink.provider.litellm");
// Configuration helpers
const getLiteLLMConfig = () => {
return {
baseURL: process.env.LITELLM_BASE_URL || "http://localhost:4000",
apiKey: process.env.LITELLM_API_KEY || "sk-anything",
};
};
/**
* Returns the default model name for LiteLLM.
*
* LiteLLM uses a 'provider/model' format for model names.
* For example:
* - 'openai/gpt-4o-mini'
* - 'openai/gpt-3.5-turbo'
* - 'anthropic/claude-3-sonnet-20240229'
* - 'google/gemini-pro'
*
* You can override the default by setting the LITELLM_MODEL environment variable.
*/
const getDefaultLiteLLMModel = () => {
return getProviderModel("LITELLM_MODEL", "openai/gpt-4o-mini");
};
/**
* LiteLLM Provider - BaseProvider Implementation
* Provides access to 100+ models via LiteLLM proxy server
*/
export class LiteLLMProvider extends BaseProvider {
model;
credentials;
// Cache for available models to avoid repeated API calls
static modelsCache = [];
static modelsCacheTime = 0;
static MODELS_CACHE_DURATION = 10 * 60 * 1000; // 10 minutes
constructor(modelName, sdk, _region, credentials) {
super(modelName, "litellm", sdk);
// Store per-request credentials for use in embed/embedMany/fetchModelsFromAPI
this.credentials = credentials;
// Initialize LiteLLM using OpenAI SDK with explicit configuration
const config = getLiteLLMConfig();
// Create OpenAI SDK instance configured for LiteLLM proxy
// LiteLLM acts as a proxy server that implements the OpenAI-compatible API.
// To communicate with LiteLLM instead of the default OpenAI endpoint, we use createOpenAI
// with a custom baseURL and apiKey. This ensures all requests are routed through the LiteLLM
// proxy, allowing access to multiple models and custom authentication.
const customOpenAI = createOpenAI({
baseURL: credentials?.baseURL ?? config.baseURL,
apiKey: credentials?.apiKey ?? config.apiKey,
fetch: createProxyFetch(),
});
this.model = customOpenAI.chat(this.modelName || getDefaultLiteLLMModel());
logger.debug("LiteLLM Provider initialized", {
modelName: this.modelName,
provider: this.providerName,
baseURL: config.baseURL,
});
}
getProviderName() {
return "litellm";
}
getDefaultModel() {
return getDefaultLiteLLMModel();
}
/**
* Returns the Vercel AI SDK model instance for LiteLLM
*/
getAISDKModel() {
return this.model;
}
formatProviderError(error) {
if (error instanceof TimeoutError) {
return new NetworkError(`Request timed out: ${error.message}`, this.providerName);
}
// Check for timeout by error name and message as fallback
const errorRecord = error;
if (errorRecord?.name === "TimeoutError" ||
(typeof errorRecord?.message === "string" &&
errorRecord.message.toLowerCase().includes("timeout"))) {
return new NetworkError(`Request timed out: ${errorRecord?.message || "Unknown timeout"}`, this.providerName);
}
if (typeof errorRecord?.message === "string") {
if (errorRecord.message.includes("ECONNREFUSED") ||
errorRecord.message.includes("Failed to fetch")) {
return new NetworkError("LiteLLM proxy server not available. Please start the LiteLLM proxy server at " +
`${process.env.LITELLM_BASE_URL || "http://localhost:4000"}`, this.providerName);
}
// Curator P1-1: detect "team not allowed to access model" responses
// and surface as ModelAccessDeniedError with the allowed_models array
// parsed from the body. Must run before the generic "API key" check
// because LiteLLM phrases this as a 403 distinct from auth.
if (isModelAccessDeniedMessage(errorRecord.message)) {
return new ModelAccessDeniedError(errorRecord.message, {
provider: this.providerName,
requestedModel: this.modelName,
allowedModels: parseAllowedModels(errorRecord.message),
});
}
if (errorRecord.message.includes("API_KEY_INVALID") ||
errorRecord.message.includes("Invalid API key")) {
return new AuthenticationError("Invalid LiteLLM configuration. Please check your LITELLM_API_KEY environment variable.", this.providerName);
}
if (errorRecord.message.toLowerCase().includes("rate limit")) {
return new RateLimitError("LiteLLM rate limit exceeded. Please try again later.", this.providerName);
}
if (errorRecord.message.toLowerCase().includes("model") &&
errorRecord.message.toLowerCase().includes("not found")) {
return new InvalidModelError(`Model '${this.modelName}' not available in LiteLLM proxy. ` +
"Please check your LiteLLM configuration and ensure the model is configured.", this.providerName);
}
}
return new ProviderError(`LiteLLM error: ${errorRecord?.message || "Unknown error"}`, this.providerName);
}
/**
* LiteLLM supports tools for compatible models
*/
supportsTools() {
return true;
}
/**
* Provider-specific streaming implementation
* Note: This is only used when tools are disabled
*/
async executeStream(options, analysisSchema) {
this.validateStreamOptions(options);
const startTime = Date.now();
let chunkCount = 0; // Track chunk count for debugging
// Reviewer follow-up: capture upstream provider errors via onError so
// the post-stream NoOutput detect can propagate the *real* cause
// (content_filter, provider crash, etc.) into the sentinel's
// providerError / modelResponseRaw instead of "No output generated".
let capturedProviderError;
const timeout = this.getTimeout(options);
const timeoutController = createTimeoutController(timeout, this.providerName, "stream");
try {
// Build message array from options with multimodal support
// Using protected helper from BaseProvider to eliminate code duplication
const messages = await this.buildMessagesForStream(options);
const model = await this.getAISDKModelWithMiddleware(options); // This is where network connection happens!
// Get tools - options.tools is pre-merged by BaseProvider.stream()
const shouldUseTools = !options.disableTools && this.supportsTools();
const tools = shouldUseTools
? options.tools || (await this.getAllTools())
: {};
logger.debug(`LiteLLM: Tools for streaming`, {
shouldUseTools,
toolCount: Object.keys(tools).length,
toolNames: Object.keys(tools),
});
// Model-specific maxTokens handling - Gemini 2.5 models have issues with maxTokens
const modelName = this.modelName || getDefaultLiteLLMModel();
const isGemini25Model = modelName.includes("gemini-2.5") || modelName.includes("gemini/2.5");
const maxTokens = isGemini25Model ? undefined : options.maxTokens;
if (isGemini25Model && options.maxTokens) {
logger.debug(`LiteLLM: Skipping maxTokens for Gemini 2.5 model (known compatibility issue)`, {
modelName,
requestedMaxTokens: options.maxTokens,
});
}
// Build complete stream options with proper typing - matching Vertex pattern
let streamOptions = {
model: model,
messages: messages,
temperature: options.temperature,
...(maxTokens && { maxTokens }), // Conditionally include maxTokens
...(shouldUseTools &&
Object.keys(tools).length > 0 && {
tools,
toolChoice: resolveToolChoice(options, tools, shouldUseTools),
stopWhen: stepCountIs(options.maxSteps || DEFAULT_MAX_STEPS),
}),
abortSignal: composeAbortSignals(options.abortSignal, timeoutController?.controller.signal),
experimental_telemetry: this.telemetryHandler.getTelemetryConfig(options),
experimental_repairToolCall: this.getToolCallRepairFn(options),
onError: (event) => {
const error = event.error;
const errorMessage = error instanceof Error ? error.message : String(error);
// Reviewer follow-up: propagate the captured error to the
// post-stream NoOutput sentinel so telemetry sees the real
// provider cause instead of "No output generated".
capturedProviderError = error;
logger.error(`LiteLLM: Stream error`, {
provider: this.providerName,
modelName: this.modelName,
error: errorMessage,
chunkCount,
});
},
onFinish: (event) => {
logger.debug(`LiteLLM: Stream finished`, {
finishReason: event.finishReason,
totalChunks: chunkCount,
});
},
onChunk: () => {
chunkCount++;
},
onStepFinish: ({ toolCalls, toolResults }) => {
emitToolEndFromStepFinish(this.neurolink?.getEventEmitter(), toolResults);
logger.info("Tool execution completed", { toolResults, toolCalls });
for (const toolCall of toolCalls) {
collectedToolCalls.push({
toolCallId: toolCall.toolCallId,
toolName: toolCall.toolName,
args: toolCall.args ??
toolCall.input ??
toolCall
.parameters ??
{},
});
}
for (const toolResult of toolResults) {
const rawToolResult = toolResult;
collectedToolResults.push({
toolName: toolResult.toolName,
status: rawToolResult.error ? "failure" : "success",
output: (rawToolResult.output ??
rawToolResult.result) ??
undefined,
error: rawToolResult.error,
id: rawToolResult.toolCallId ?? toolResult.toolName,
});
}
this.handleToolExecutionStorage(toolCalls, toolResults, options, new Date()).catch((error) => {
logger.warn("[LiteLLMProvider] Failed to store tool executions", {
provider: this.providerName,
error: error instanceof Error ? error.message : String(error),
});
});
},
};
// Add analysisSchema support if provided
if (analysisSchema) {
try {
streamOptions = {
...streamOptions,
experimental_output: Output.object({
schema: analysisSchema,
}),
};
}
catch (error) {
logger.warn("Schema application failed, continuing without schema", {
error: String(error),
});
}
}
// Wrap streamText in an OTel span to capture provider-level latency, token usage, and cost
const streamSpan = streamTracer.startSpan("neurolink.provider.streamText", {
kind: SpanKind.CLIENT,
attributes: {
"gen_ai.system": "litellm",
"gen_ai.request.model": getModelId(model, this.modelName || "unknown"),
},
});
let result;
const collectedToolCalls = [];
const collectedToolResults = [];
try {
result = streamText(streamOptions);
}
catch (streamError) {
streamSpan.setStatus({
code: SpanStatusCode.ERROR,
message: streamError instanceof Error
? streamError.message
: String(streamError),
});
streamSpan.end();
throw streamError;
}
// Collect token usage, cost, and finish reason asynchronously when the stream completes,
// then end the span. This avoids blocking the stream consumer.
Promise.resolve(result.usage)
.then((usage) => {
streamSpan.setAttribute("gen_ai.usage.input_tokens", usage.inputTokens || 0);
streamSpan.setAttribute("gen_ai.usage.output_tokens", usage.outputTokens || 0);
const cost = calculateCost(this.providerName, this.modelName, {
input: usage.inputTokens || 0,
output: usage.outputTokens || 0,
total: (usage.inputTokens || 0) + (usage.outputTokens || 0),
});
if (cost && cost > 0) {
streamSpan.setAttribute("neurolink.cost", cost);
}
})
.catch(() => {
// Usage may not be available if the stream is aborted
});
Promise.resolve(result.finishReason)
.then((reason) => {
streamSpan.setAttribute("gen_ai.response.finish_reason", reason || "unknown");
})
.catch(() => {
// Finish reason may not be available if the stream is aborted
});
Promise.resolve(result.text)
.then(() => {
streamSpan.end();
})
.catch((err) => {
streamSpan.setStatus({
code: SpanStatusCode.ERROR,
message: err instanceof Error ? err.message : String(err),
});
streamSpan.end();
});
timeoutController?.cleanup();
const transformedStream = this.createLiteLLMTransformedStream(result, () => capturedProviderError);
// Create analytics promise that resolves after stream completion
const analyticsPromise = streamAnalyticsCollector.createAnalytics(this.providerName, this.modelName, result, Date.now() - startTime, {
requestId: options.requestId ??
`litellm-stream-${Date.now()}`,
streamingMode: true,
});
return {
stream: transformedStream,
provider: this.providerName,
model: this.modelName,
...(shouldUseTools && {
toolCalls: collectedToolCalls,
toolResults: collectedToolResults,
}),
analytics: analyticsPromise,
metadata: {
startTime,
streamId: `litellm-${Date.now()}`,
},
};
}
catch (error) {
timeoutController?.cleanup();
throw this.handleProviderError(error);
}
}
async *createLiteLLMTransformedStream(result, getCapturedProviderError) {
// Reviewer follow-up: gate the post-stream NoOutput detect on
// *content yielded*, not raw chunk count. AI SDK fullStream emits
// control events ({ type: "start" }, "step-start", etc.) before any
// text-delta — those incremented chunkCount and made the post-stream
// detect dead even when zero text was produced.
let contentYielded = 0;
try {
const streamToUse = result.fullStream || result.textStream;
for await (const chunk of streamToUse) {
if (chunk && typeof chunk === "object") {
if ("type" in chunk && chunk.type === "error") {
const errorChunk = chunk;
logger.error(`LiteLLM: Error chunk received:`, {
errorType: errorChunk.type,
errorDetails: errorChunk.error,
});
throw this.formatProviderError(new Error(`LiteLLM streaming error: ${errorChunk.error?.message || "Unknown error"}`));
}
if ("textDelta" in chunk) {
const textDelta = chunk.textDelta;
if (textDelta) {
contentYielded++;
yield { content: textDelta };
}
}
else if ("type" in chunk &&
chunk.type === "tool-call" &&
"toolCallId" in chunk) {
logger.debug("LiteLLM: Tool call", {
toolCallId: String(chunk.toolCallId),
toolName: "toolName" in chunk ? String(chunk.toolName) : "unknown",
});
}
}
else if (typeof chunk === "string") {
contentYielded++;
yield { content: chunk };
}
}
}
catch (streamError) {
if (NoOutputGeneratedError.isInstance(streamError)) {
logger.warn("LiteLLM: Stream produced no output (NoOutputGeneratedError) — caught from textStream");
// Yield the enriched sentinel so downstream telemetry has
// finishReason / usage / providerError. Match the other
// providers' pattern: yield + return (no throw). NeuroLink's
// iteration fallback at neurolink.ts only fires for
// looksLikeModelAccessDenied errors, so a NoOutput throw here
// would NOT trigger any fallback — and it would mask the
// already-yielded sentinel from consumers expecting a clean
// stream. The sentinel itself signals the no-output condition.
const sentinel = await buildNoOutputSentinel(streamError, result, getCapturedProviderError?.());
stampNoOutputSpan(sentinel);
yield sentinel;
return;
}
throw streamError;
}
// Curator P3-6 (round-2 fix): production trigger sets the error on
// result.finishReason rejection (NOT thrown from textStream).
// Surface that path here, matching the catch above (yield + return).
if (contentYielded === 0) {
const detected = await detectPostStreamNoOutput(result, getCapturedProviderError?.());
if (detected) {
logger.warn("LiteLLM: Stream produced no output (NoOutputGeneratedError) — caught from finishReason rejection");
stampNoOutputSpan(detected.sentinel);
yield detected.sentinel;
}
}
}
/**
* Generate an embedding for a single text input
* Uses the LiteLLM proxy with OpenAI-compatible embedding API
*/
async embed(text, modelName) {
const { embed: aiEmbed } = await import("ai");
const { createOpenAI } = await import("@ai-sdk/openai");
const config = getLiteLLMConfig();
const embeddingModelName = modelName ||
process.env.LITELLM_EMBEDDING_MODEL ||
"gemini-embedding-001";
const customOpenAI = createOpenAI({
baseURL: this.credentials?.baseURL ?? config.baseURL,
apiKey: this.credentials?.apiKey ?? config.apiKey,
fetch: createProxyFetch(),
});
const embeddingModel = customOpenAI.textEmbeddingModel(embeddingModelName);
const result = await aiEmbed({ model: embeddingModel, value: text });
return result.embedding;
}
/**
* Generate embeddings for multiple text inputs
* Uses the LiteLLM proxy with OpenAI-compatible embedding API
*/
async embedMany(texts, modelName) {
const { embedMany: aiEmbedMany } = await import("ai");
const { createOpenAI } = await import("@ai-sdk/openai");
const config = getLiteLLMConfig();
const embeddingModelName = modelName ||
process.env.LITELLM_EMBEDDING_MODEL ||
"gemini-embedding-001";
const customOpenAI = createOpenAI({
baseURL: this.credentials?.baseURL ?? config.baseURL,
apiKey: this.credentials?.apiKey ?? config.apiKey,
fetch: createProxyFetch(),
});
const embeddingModel = customOpenAI.textEmbeddingModel(embeddingModelName);
const result = await aiEmbedMany({ model: embeddingModel, values: texts });
return result.embeddings;
}
/**
* Get available models from LiteLLM proxy server
* Dynamically fetches from /v1/models endpoint with caching and fallback
*/
async getAvailableModels() {
const functionTag = "LiteLLMProvider.getAvailableModels";
const now = Date.now();
// Check if cached models are still valid
if (LiteLLMProvider.modelsCache.length > 0 &&
now - LiteLLMProvider.modelsCacheTime <
LiteLLMProvider.MODELS_CACHE_DURATION) {
logger.debug(`[${functionTag}] Using cached models`, {
cacheAge: Math.round((now - LiteLLMProvider.modelsCacheTime) / 1000),
modelCount: LiteLLMProvider.modelsCache.length,
});
return LiteLLMProvider.modelsCache;
}
// Try to fetch models dynamically
try {
const dynamicModels = await this.fetchModelsFromAPI();
if (dynamicModels.length > 0) {
// Cache successful result
LiteLLMProvider.modelsCache = dynamicModels;
LiteLLMProvider.modelsCacheTime = now;
logger.debug(`[${functionTag}] Successfully fetched models from API`, {
modelCount: dynamicModels.length,
});
return dynamicModels;
}
}
catch (error) {
logger.warn(`[${functionTag}] Failed to fetch models from API, using fallback`, {
error: error instanceof Error ? error.message : String(error),
});
}
// Fallback to hardcoded list if API fetch fails
const fallbackModels = process.env.LITELLM_FALLBACK_MODELS?.split(",")
.map((m) => m.trim())
.filter((m) => m.length > 0) || [
"openai/gpt-4o", // minimal safe baseline
"anthropic/claude-3-haiku",
"meta-llama/llama-3.1-8b-instruct",
"google/gemini-2.5-flash",
];
logger.debug(`[${functionTag}] Using fallback model list`, {
modelCount: fallbackModels.length,
});
return fallbackModels;
}
/**
* Fetch available models from LiteLLM proxy /v1/models endpoint
* @private
*/
async fetchModelsFromAPI() {
const functionTag = "LiteLLMProvider.fetchModelsFromAPI";
const config = getLiteLLMConfig();
const resolvedBaseURL = this.credentials?.baseURL ?? config.baseURL;
const resolvedApiKey = this.credentials?.apiKey ?? config.apiKey;
const modelsUrl = `${resolvedBaseURL}/v1/models`;
const controller = new AbortController();
const timeoutId = setTimeout(() => controller.abort(), 5000); // 5 second timeout
try {
logger.debug(`[${functionTag}] Fetching models from ${modelsUrl}`);
const proxyFetch = createProxyFetch();
const response = await proxyFetch(modelsUrl, {
method: "GET",
headers: {
Authorization: `Bearer ${resolvedApiKey}`,
"Content-Type": "application/json",
},
signal: controller.signal,
});
clearTimeout(timeoutId);
if (!response.ok) {
throw new Error(`HTTP ${response.status}: ${response.statusText}`);
}
const data = await response.json();
// Parse OpenAI-compatible models response
if (data && Array.isArray(data.data)) {
const models = data.data
.map((model) => typeof model === "object" &&
model !== null &&
"id" in model &&
typeof model.id === "string"
? model.id
: undefined)
.filter((id) => typeof id === "string" && id.length > 0)
.sort();
logger.debug(`[${functionTag}] Successfully parsed models`, {
totalModels: models.length,
sampleModels: models.slice(0, 5),
});
return models;
}
else {
throw new Error("Invalid response format: expected data.data array");
}
}
catch (error) {
clearTimeout(timeoutId);
if (isAbortError(error)) {
throw new NetworkError("Request timed out after 5 seconds", this.providerName);
}
throw error;
}
}
}
//# sourceMappingURL=litellm.js.map