UNPKG

@juspay/neurolink

Version:

Universal AI Development Platform with working MCP integration, multi-provider support, voice (TTS/STT/realtime), and professional CLI. 58+ external MCP servers discoverable, multimodal file processing, RAG pipelines. Build, test, and deploy AI applicatio

github.com/juspay/neurolink

juspay/neurolink

1,087 lines (1,086 loc) • 77.9 kB

JavaScript

import { createAnalytics } from "../core/analytics.js"; import { BaseProvider } from "../core/baseProvider.js"; import { DEFAULT_MAX_STEPS } from "../core/constants.js"; import { modelConfig } from "../core/modelConfiguration.js"; import { createProxyFetch } from "../proxy/proxyFetch.js"; import { logger } from "../utils/logger.js"; import { buildMultimodalMessagesArray } from "../utils/messageBuilder.js"; import { buildMultimodalOptions } from "../utils/multimodalOptionsBuilder.js"; import { estimateTokens } from "../utils/tokenEstimation.js"; import { InvalidModelError, NetworkError, ProviderError, } from "../types/index.js"; import { tracers, ATTR, withClientStreamSpan } from "../telemetry/index.js"; import { emitToolEndFromStepFinish } from "../utils/toolEndEmitter.js"; import { TimeoutError } from "../utils/timeout.js"; // Model version constants (configurable via environment) const DEFAULT_OLLAMA_MODEL = process.env.OLLAMA_MODEL || "llama3.1:8b"; const FALLBACK_OLLAMA_MODEL = "llama3.2:latest"; // Used when primary model fails // Configuration helpers const getOllamaBaseUrl = () => { return process.env.OLLAMA_BASE_URL || "http://localhost:11434"; }; const isOpenAICompatibleMode = () => { // Enable OpenAI-compatible API mode (/v1/chat/completions) instead of native Ollama API (/api/generate) // Useful for Ollama deployments that only support OpenAI-compatible routes (e.g., breezehq.dev) return process.env.OLLAMA_OPENAI_COMPATIBLE === "true"; }; // Create AbortController with timeout for better compatibility const createAbortSignalWithTimeout = (timeoutMs) => { const controller = new AbortController(); const timeoutId = setTimeout(() => controller.abort(), timeoutMs); // Clear timeout if signal is aborted through other means controller.signal.addEventListener("abort", () => { clearTimeout(timeoutId); }); return controller.signal; }; const getDefaultOllamaModel = () => { return process.env.OLLAMA_MODEL || DEFAULT_OLLAMA_MODEL; }; const getOllamaTimeout = () => { // Increased default timeout to 240000ms (4 minutes) to support slower native API responses // especially for larger models like aliafshar/gemma3-it-qat-tools:latest (12.2B parameters) return parseInt(process.env.OLLAMA_TIMEOUT || "240000", 10); }; function isOllamaHttpError(error) { return (error instanceof ProviderError && typeof error.statusCode === "number" && typeof error.responseBody === "string"); } async function createOllamaHttpError(response) { let responseBody = ""; try { responseBody = (await response.text()).trim(); } catch { // Ignore unreadable bodies } const suffix = responseBody ? ` - ${responseBody.slice(0, 500)}` : ""; const error = new ProviderError(`Ollama API error: ${response.status} ${response.statusText}${suffix}`, "ollama"); error.statusCode = response.status; error.statusText = response.statusText; error.responseBody = responseBody; return error; } // Create proxy-aware fetch instance const proxyFetch = createProxyFetch(); // Custom LanguageModel implementation for Ollama class OllamaLanguageModel { /** * Specification version for the AI SDK LanguageModel interface. * Uses "v2" for structural compatibility with AI SDK v6's `LanguageModelV2`. * The AI SDK checks this field to determine which interface version to use. */ specificationVersion = "v2"; provider = "ollama"; modelId; maxTokens; supportsStreaming = true; defaultObjectGenerationMode = "json"; /** * Supported URL patterns by media type. * Ollama runs locally and does not natively download URLs, so this is empty. * Required by the LanguageModelV2 interface. */ supportedUrls = {}; baseUrl; timeout; constructor(modelId, baseUrl, timeout) { this.modelId = modelId; this.baseUrl = baseUrl; this.timeout = timeout; } estimateTokenCount(text) { return estimateTokens(text, "ollama"); } convertMessagesToPrompt(messages) { return messages .map((msg) => { if (typeof msg.content === "string") { return `${msg.role}: ${msg.content}`; } return `${msg.role}: ${JSON.stringify(msg.content)}`; }) .join("\n"); } async doGenerate(options) { // Vercel AI SDK passes messages via options.messages (same as stream mode) // Check options.messages first, then fall back to options.prompt for backward compatibility const messages = options .messages || options .prompt || []; // Check if we should use OpenAI-compatible API const useOpenAIMode = isOpenAICompatibleMode(); if (useOpenAIMode) { // OpenAI-compatible mode: Use /v1/chat/completions const requestBody = { model: this.modelId, messages, temperature: options.temperature, max_tokens: options.maxTokens, stream: false, }; if (logger.shouldLog("debug")) { logger.debug("[OllamaLanguageModel] Using OpenAI-compatible API with messages:", JSON.stringify(messages, null, 2)); } const response = await proxyFetch(`${this.baseUrl}/v1/chat/completions`, { method: "POST", headers: { "Content-Type": "application/json" }, body: JSON.stringify(requestBody), signal: createAbortSignalWithTimeout(this.timeout), }); if (!response.ok) { throw await createOllamaHttpError(response); } const data = await response.json(); logger.debug("[OllamaLanguageModel] OpenAI API Response:", JSON.stringify(data, null, 2)); const text = data.choices?.[0]?.message?.content || ""; const usage = data.usage || {}; const promptTokens = usage.prompt_tokens ?? this.estimateTokenCount(JSON.stringify(messages)); const completionTokens = usage.completion_tokens ?? this.estimateTokenCount(text); return { content: text ? [{ type: "text", text }] : [], text, usage: { inputTokens: promptTokens, outputTokens: completionTokens, promptTokens, completionTokens, totalTokens: usage.total_tokens ?? promptTokens + completionTokens, }, finishReason: data.choices?.[0]?.finish_reason ?? "stop", warnings: [], request: { body: JSON.stringify(requestBody), }, response: { id: data.id, modelId: data.model, timestamp: new Date(), headers: {}, body: data, }, rawCall: { rawPrompt: messages, rawSettings: { model: this.modelId, temperature: options.temperature, max_tokens: options.maxTokens, }, }, rawResponse: { headers: {}, }, }; } else { // Native Ollama mode: Use /api/generate const prompt = this.convertMessagesToPrompt(messages); logger.debug("[OllamaLanguageModel] Using native API with prompt:", JSON.stringify(prompt)); const response = await proxyFetch(`${this.baseUrl}/api/generate`, { method: "POST", headers: { "Content-Type": "application/json" }, body: JSON.stringify({ model: this.modelId, prompt, stream: false, system: messages.find((m) => m.role === "system")?.content, options: { temperature: options.temperature, num_predict: options.maxTokens, }, }), signal: createAbortSignalWithTimeout(this.timeout), }); if (!response.ok) { throw await createOllamaHttpError(response); } const data = await response.json(); logger.debug("[OllamaLanguageModel] Native API Response:", JSON.stringify(data, null, 2)); const text = String(data.response ?? ""); const promptTokens = data.prompt_eval_count ?? this.estimateTokenCount(prompt); const completionTokens = data.eval_count ?? this.estimateTokenCount(text); const requestBody = { model: this.modelId, prompt, stream: false, system: messages.find((m) => m.role === "system")?.content, options: { temperature: options.temperature, num_predict: options.maxTokens, }, }; return { content: text ? [{ type: "text", text }] : [], text, usage: { inputTokens: promptTokens, outputTokens: completionTokens, promptTokens, completionTokens, totalTokens: promptTokens + completionTokens, }, finishReason: data.done_reason ?? "stop", warnings: [], request: { body: JSON.stringify(requestBody), }, response: { id: data.created_at, modelId: this.modelId, timestamp: data.created_at ? new Date(data.created_at) : new Date(), headers: {}, body: data, }, rawCall: { rawPrompt: prompt, rawSettings: { model: this.modelId, temperature: options.temperature, num_predict: options.maxTokens, }, }, rawResponse: { headers: {}, }, }; } } async doStream(options) { const messages = options .messages || []; // Check if we should use OpenAI-compatible API const useOpenAIMode = isOpenAICompatibleMode(); if (useOpenAIMode) { // OpenAI-compatible mode: Use /v1/chat/completions const requestUrl = `${this.baseUrl}/v1/chat/completions`; const requestBody = { model: this.modelId, messages, temperature: options.temperature, max_tokens: options.maxTokens, stream: true, }; logger.debug("[OllamaLanguageModel] doStream: Using OpenAI-compatible API", { url: requestUrl, baseUrl: this.baseUrl, modelId: this.modelId, requestBody: JSON.stringify(requestBody), }); const response = await proxyFetch(requestUrl, { method: "POST", headers: { "Content-Type": "application/json" }, body: JSON.stringify(requestBody), signal: createAbortSignalWithTimeout(this.timeout), }); logger.debug("[OllamaLanguageModel] doStream: Response received", { status: response.status, statusText: response.statusText, ok: response.ok, }); if (!response.ok) { throw await createOllamaHttpError(response); } const self = this; return { stream: new ReadableStream({ async start(controller) { try { for await (const chunk of self.parseOpenAIStreamResponse(response, messages)) { controller.enqueue(chunk); } controller.close(); } catch (error) { controller.error(error); } }, }), rawCall: { rawPrompt: messages, rawSettings: { model: this.modelId, temperature: options.temperature, max_tokens: options.maxTokens, }, }, rawResponse: { headers: {}, }, }; } else { // Native Ollama mode: Use /api/generate const prompt = this.convertMessagesToPrompt(messages); const requestUrl = `${this.baseUrl}/api/generate`; const requestBody = { model: this.modelId, prompt, stream: true, system: messages.find((m) => m.role === "system")?.content, options: { temperature: options.temperature, num_predict: options.maxTokens, }, }; logger.debug("[OllamaLanguageModel] doStream: Using native API", { url: requestUrl, baseUrl: this.baseUrl, modelId: this.modelId, requestBody: JSON.stringify(requestBody), }); const response = await proxyFetch(requestUrl, { method: "POST", headers: { "Content-Type": "application/json" }, body: JSON.stringify(requestBody), signal: createAbortSignalWithTimeout(this.timeout), }); logger.debug("[OllamaLanguageModel] doStream: Response received", { status: response.status, statusText: response.statusText, ok: response.ok, }); if (!response.ok) { throw await createOllamaHttpError(response); } const self = this; return { stream: new ReadableStream({ async start(controller) { try { for await (const chunk of self.parseStreamResponse(response)) { controller.enqueue(chunk); } controller.close(); } catch (error) { controller.error(error); } }, }), rawCall: { rawPrompt: messages, rawSettings: { model: this.modelId, temperature: options.temperature, num_predict: options.maxTokens, }, }, rawResponse: { headers: {}, }, }; } } async *parseStreamResponse(response) { const reader = response.body?.getReader(); if (!reader) { throw new Error("No response body"); } const decoder = new TextDecoder(); let buffer = ""; try { while (true) { const { done, value } = await reader.read(); if (done) { break; } buffer += decoder.decode(value, { stream: true }); const lines = buffer.split("\n"); buffer = lines.pop() || ""; for (const line of lines) { if (line.trim()) { try { const data = JSON.parse(line); if (data.response) { yield { type: "text-delta", textDelta: data.response, }; } if (data.done) { yield { type: "finish", finishReason: "stop", usage: { promptTokens: data.prompt_eval_count || this.estimateTokenCount(data.context || ""), completionTokens: data.eval_count || 0, }, }; return; } } catch (error) { logger.error("Error parsing Ollama stream response", { error, }); } } } } } finally { reader.releaseLock(); } } async *parseOpenAIStreamResponse(response, messages) { const reader = response.body?.getReader(); if (!reader) { throw new Error("No response body"); } const decoder = new TextDecoder(); let buffer = ""; // Estimate prompt tokens from messages (matches non-streaming behavior) const totalPromptTokens = this.estimateTokenCount(JSON.stringify(messages)); // Accumulate full completion text; estimate tokens once at the end to avoid // per-chunk rounding inflation that occurs when estimateTokenCount is called // on every delta and the results are summed. let completionText = ""; try { while (true) { const { done, value } = await reader.read(); if (done) { break; } buffer += decoder.decode(value, { stream: true }); const lines = buffer.split("\n"); buffer = lines.pop() || ""; for (const line of lines) { const trimmed = line.trim(); if (trimmed === "" || trimmed === "data: [DONE]") { continue; } if (trimmed.startsWith("data: ")) { try { const jsonStr = trimmed.slice(6); // Remove "data: " prefix const data = JSON.parse(jsonStr); // Extract content delta const content = data.choices?.[0]?.delta?.content; if (content) { yield { type: "text-delta", textDelta: content, }; completionText += content; } // Check for finish const finishReason = data.choices?.[0]?.finish_reason; if (finishReason === "stop") { // Prefer server-reported usage; fall back to a single estimate over // the full accumulated text (avoids per-chunk rounding inflation). const promptTokens = data.usage?.prompt_tokens || totalPromptTokens; const completionTokens = data.usage?.completion_tokens || this.estimateTokenCount(completionText); yield { type: "finish", finishReason: "stop", usage: { promptTokens, completionTokens, }, }; return; } } catch (error) { logger.error("Error parsing OpenAI stream response", { error, line: trimmed, }); } } } } // If loop exits without explicit finish, yield final finish yield { type: "finish", finishReason: "stop", usage: { promptTokens: totalPromptTokens, completionTokens: this.estimateTokenCount(completionText), }, }; } finally { reader.releaseLock(); } } } /** * Ollama Provider v2 - BaseProvider Implementation * * PHASE 3.7: BaseProvider wrap around existing custom Ollama implementation * * Features: * - Extends BaseProvider for shared functionality * - Preserves custom OllamaLanguageModel implementation * - Local model management and health checking * - Enhanced error handling with Ollama-specific guidance */ export class OllamaProvider extends BaseProvider { ollamaModel; baseUrl; timeout; constructor(modelName, credentials) { super(modelName, "ollama"); this.baseUrl = credentials?.baseURL ?? getOllamaBaseUrl(); this.timeout = getOllamaTimeout(); // Initialize Ollama model this.ollamaModel = new OllamaLanguageModel(this.modelName || getDefaultOllamaModel(), this.baseUrl, this.timeout); logger.debug("Ollama BaseProvider v2 initialized", { modelName: this.modelName, baseUrl: this.baseUrl, timeout: this.timeout, provider: this.providerName, }); } getProviderName() { return "ollama"; } getDefaultModel() { return getDefaultOllamaModel(); } /** * Returns the Vercel AI SDK model instance for Ollama. * * OllamaLanguageModel implements OllamaAsLanguageModel which is structurally * compatible with LanguageModelV2 (specificationVersion "v2", modelId, provider, * supportedUrls, doGenerate, doStream). */ getAISDKModel() { const model = this.ollamaModel; return model; } /** * Ollama Tool Calling Support (Enhanced 2025) * * Uses configurable model list from ModelConfiguration instead of hardcoded values. * Tool-capable models can be configured via OLLAMA_TOOL_CAPABLE_MODELS environment variable. * * **Configuration Options:** * - Environment variable: OLLAMA_TOOL_CAPABLE_MODELS (comma-separated list) * - Configuration file: providers.ollama.modelBehavior.toolCapableModels * - Fallback: Default list of known tool-capable models * * **Implementation Features:** * - Direct Ollama API integration (/v1/chat/completions) * - Automatic tool schema conversion to Ollama format * - Streaming tool calls with incremental response parsing * - Model compatibility validation and fallback handling * * @returns true for supported models, false for unsupported models */ supportsTools() { const modelName = (this.modelName ?? getDefaultOllamaModel()).toLowerCase(); // Get tool-capable models from configuration const ollamaConfig = modelConfig.getProviderConfiguration("ollama"); const toolCapableModels = ollamaConfig?.modelBehavior?.toolCapableModels || []; // Only disable tools if we have positive evidence the model doesn't support them // If toolCapableModels config is empty, assume tools are supported (don't make assumptions) if (toolCapableModels.length === 0) { logger.debug("Ollama tool calling enabled", { model: this.modelName, reason: "No tool-capable config defined, assuming tools supported", baseUrl: this.baseUrl, }); return true; } // Config exists - check if current model matches tool-capable model patterns const isToolCapable = toolCapableModels.some((capableModel) => modelName.includes(capableModel.toLowerCase())); if (isToolCapable) { logger.debug("Ollama tool calling enabled", { model: this.modelName, reason: "Model in tool-capable list", baseUrl: this.baseUrl, configuredModels: toolCapableModels.length, }); return true; } // Config exists and model is NOT in list - disable tools logger.debug("Ollama tool calling disabled", { model: this.modelName, reason: "Model not in tool-capable list", suggestion: "Consider using llama3.1:8b-instruct, mistral:7b-instruct, or hermes3:8b for tool calling", availableToolModels: toolCapableModels.slice(0, 3), // Show first 3 for brevity }); return false; } /** * Extract images from multimodal messages for Ollama API * Returns array of base64-encoded images */ extractImagesFromMessages(messages) { const images = []; for (const msg of messages) { if (Array.isArray(msg.content)) { for (const content of msg.content) { const typedContent = content; if (typedContent.type === "image" && typedContent.image) { const imageData = typeof typedContent.image === "string" ? typedContent.image.replace(/^data:image\/\w+;base64,/, "") : Buffer.from(typedContent.image).toString("base64"); images.push(imageData); } } } } return images; } /** * Convert multimodal messages to Ollama chat format * Extracts text content and handles images separately */ convertToOllamaMessages(messages) { return messages.map((msg) => { let textContent = ""; const images = []; if (typeof msg.content === "string") { textContent = msg.content; } else if (Array.isArray(msg.content)) { for (const content of msg.content) { const typedContent = content; if (typedContent.type === "text" && typedContent.text) { textContent += typedContent.text; } else if (typedContent.type === "image" && typedContent.image) { const imageData = typeof typedContent.image === "string" ? typedContent.image.replace(/^data:image\/\w+;base64,/, "") : Buffer.from(typedContent.image).toString("base64"); images.push(imageData); } } } const ollamaMsg = { role: (msg.role === "system" ? "system" : msg.role), content: textContent, }; if (images.length > 0) { ollamaMsg.images = images; } return ollamaMsg; }); } // executeGenerate removed - BaseProvider handles all generation with tools async executeStream(options, analysisSchema) { try { this.validateStreamOptions(options); await this.checkOllamaHealth(); // Check if tools are supported and provided const modelSupportsTools = this.supportsTools(); const hasTools = options.tools && Object.keys(options.tools).length > 0; if (modelSupportsTools && hasTools) { // Use chat API with tools for tool-capable models return this.executeStreamWithTools(options, analysisSchema); } else { // Use generate API for non-tool scenarios return this.executeStreamWithoutTools(options, analysisSchema); } } catch (error) { throw this.handleProviderError(error); } } /** * Execute streaming with Ollama's function calling support * Uses conversation loop to handle multi-step tool execution */ async executeStreamWithTools(options, _analysisSchema) { return withClientStreamSpan({ name: "neurolink.provider.stream", tracer: tracers.provider, attributes: { [ATTR.GEN_AI_SYSTEM]: "ollama", [ATTR.GEN_AI_MODEL]: this.modelName || FALLBACK_OLLAMA_MODEL, [ATTR.GEN_AI_OPERATION]: "stream", [ATTR.NL_HAS_TOOLS]: true, [ATTR.NL_STREAM_MODE]: true, }, }, async (span) => { const startTime = Date.now(); const maxIterations = options.maxSteps || DEFAULT_MAX_STEPS; let iteration = 0; // Get all available tools (direct + MCP + external) // BaseProvider.stream() pre-merges base tools + external tools into options.tools const allTools = options.tools || (await this.getAllTools()); // Convert tools to Ollama format const ollamaTools = this.convertToolsToOllamaFormat(allTools); span.setAttribute(ATTR.NL_TOOL_COUNT, ollamaTools.length); // Validate that PDFs are not provided if (options.input?.pdfFiles && options.input.pdfFiles.length > 0) { throw this.handleProviderError(new Error("PDF inputs are not supported by OllamaProvider. " + "Please remove PDFs or use a supported provider (OpenAI, Anthropic, Google Vertex AI, etc.).")); } // Initialize conversation history const conversationHistory = []; // Build initial messages const hasMultimodalInput = !!(options.input?.images?.length || options.input?.content?.length || options.input?.files?.length || options.input?.csvFiles?.length); if (hasMultimodalInput) { logger.debug(`Ollama: Detected multimodal input, using multimodal message builder`, { hasImages: !!options.input?.images?.length, imageCount: options.input?.images?.length || 0, }); const multimodalOptions = buildMultimodalOptions(options, this.providerName, this.modelName); const multimodalMessages = await buildMultimodalMessagesArray(multimodalOptions, this.providerName, this.modelName); conversationHistory.push(...this.convertToOllamaMessages(multimodalMessages)); } else { if (options.systemPrompt) { conversationHistory.push({ role: "system", content: options.systemPrompt, }); } conversationHistory.push({ role: "user", content: options.input.text ?? "", }); } // Capture instance references before the stream for use in the finally block. const ollamaNeurolink = this.neurolink; const ollamaProviderName = this.providerName; const ollamaModelName = this.modelName || FALLBACK_OLLAMA_MODEL; // Conversation loop for multi-step tool execution let totalInputTokens = 0; let totalOutputTokens = 0; let lastFinishReason; let ollamaStreamErrored = false; const stream = new ReadableStream({ start: async (controller) => { try { while (iteration < maxIterations) { logger.debug(`[OllamaProvider] Conversation iteration ${iteration + 1}/${maxIterations}`); // Make API request — request usage in stream_options so // Pipeline B gets real token counts for Langfuse cost dashboards. const response = await proxyFetch(`${this.baseUrl}/v1/chat/completions`, { method: "POST", headers: { "Content-Type": "application/json" }, body: JSON.stringify({ model: this.modelName || FALLBACK_OLLAMA_MODEL, messages: conversationHistory, tools: ollamaTools, tool_choice: "auto", stream: true, stream_options: { include_usage: true }, temperature: options.temperature, max_tokens: options.maxTokens, }), signal: createAbortSignalWithTimeout(this.timeout), }); if (!response.ok) { throw this.handleProviderError(await createOllamaHttpError(response)); } // Process response stream const { content, toolCalls, finishReason, usage } = await this.processOllamaResponse(response, controller); // Accumulate usage across iterations for Pipeline B if (usage) { totalInputTokens += usage.input; totalOutputTokens += usage.output; } if (finishReason) { lastFinishReason = finishReason; } // Add assistant message to history const assistantMessage = { role: "assistant", content: content || "", }; if (toolCalls && toolCalls.length > 0) { assistantMessage.tool_calls = toolCalls; } conversationHistory.push(assistantMessage); // Check finish reason if (finishReason === "stop" || !finishReason) { // Conversation complete span.setAttribute(ATTR.GEN_AI_FINISH_REASON, finishReason || "stop"); controller.close(); break; } else if (finishReason === "tool_calls" && toolCalls && toolCalls.length > 0) { // Execute tools logger.debug(`[OllamaProvider] Executing ${toolCalls.length} tools`); for (const tc of toolCalls) { span.addEvent("tool_call", { [ATTR.GEN_AI_TOOL_NAME]: tc.function.name, }); } const toolResults = await this.executeOllamaTools(toolCalls, options); // Add tool results to conversation const toolMessage = { role: "tool", content: JSON.stringify(toolResults), }; conversationHistory.push(toolMessage); iteration++; } else if (finishReason === "length") { // Max tokens reached, continue conversation logger.debug(`[OllamaProvider] Max tokens reached, continuing`); conversationHistory.push({ role: "user", content: "Please continue.", }); iteration++; } else { // Unknown finish reason, end conversation logger.warn(`[OllamaProvider] Unknown finish reason: ${finishReason}`); span.setAttribute(ATTR.GEN_AI_FINISH_REASON, finishReason); controller.close(); break; } } if (iteration >= maxIterations) { ollamaStreamErrored = true; controller.error(new Error(`Ollama conversation exceeded maximum iterations (${maxIterations})`)); } } catch (error) { ollamaStreamErrored = true; controller.error(error); } finally { // Resolve analytics with accumulated token counts so Pipeline A // and Pipeline B both get real usage data from Ollama. const aggregatedUsage = { input: totalInputTokens, output: totalOutputTokens, total: totalInputTokens + totalOutputTokens, }; resolveAnalytics(createAnalytics(this.providerName, this.modelName || FALLBACK_OLLAMA_MODEL, { usage: aggregatedUsage }, Date.now() - startTime, { requestId: `ollama-stream-${Date.now()}`, streamingMode: true, iterations: iteration, })); // Emit generation:end so Pipeline B (Langfuse) creates a GENERATION // observation. Ollama bypasses the Vercel AI SDK so // experimental_telemetry is never injected; we emit manually. const ollamaEmitter = ollamaNeurolink?.getEventEmitter(); if (ollamaEmitter) { // Collect accumulated text from conversation history const accumulatedContent = conversationHistory .filter((m) => m.role === "assistant") .map((m) => m.content) .join(""); ollamaEmitter.emit("generation:end", { provider: ollamaProviderName, responseTime: Date.now() - startTime, timestamp: Date.now(), result: { content: accumulatedContent, usage: aggregatedUsage, model: ollamaModelName, provider: ollamaProviderName, finishReason: ollamaStreamErrored ? "error" : (lastFinishReason ?? "stop"), }, success: !ollamaStreamErrored, }); } } }, }); // Defer analytics resolution until the stream's start callback finishes. // This ensures responseTime and iteration reflect the actual completed values // rather than values captured before the tool-loop executes. let resolveAnalytics; const analyticsPromise = new Promise((resolve) => { resolveAnalytics = resolve; }); return { stream: this.convertToAsyncIterable(stream), provider: this.providerName, model: this.modelName || FALLBACK_OLLAMA_MODEL, analytics: analyticsPromise, metadata: { startTime, streamId: `ollama-${Date.now()}`, }, }; }, (r) => r.stream, (r, wrapped) => ({ ...r, stream: wrapped })); } /** * Execute streaming without tools using the generate API * Fallback for non-tool scenarios or when chat API is unavailable */ async executeStreamWithoutTools(options, _analysisSchema) { return withClientStreamSpan({ name: "neurolink.provider.stream", tracer: tracers.provider, attributes: { [ATTR.GEN_AI_SYSTEM]: "ollama", [ATTR.GEN_AI_MODEL]: this.modelName || FALLBACK_OLLAMA_MODEL, [ATTR.GEN_AI_OPERATION]: "stream", [ATTR.NL_HAS_TOOLS]: false, [ATTR.NL_STREAM_MODE]: true, }, }, async () => { // Validate that PDFs are not provided if (options.input?.pdfFiles && options.input.pdfFiles.length > 0) { throw this.handleProviderError(new Error("PDF inputs are not supported by OllamaProvider. " + "Please remove PDFs or use a supported provider (OpenAI, Anthropic, Google Vertex AI, etc.).")); } // Check for multimodal input const hasMultimodalInput = !!(options.input?.images?.length || options.input?.content?.length || options.input?.files?.length || options.input?.csvFiles?.length); const useOpenAIMode = isOpenAICompatibleMode(); if (useOpenAIMode) { // OpenAI-compatible mode: Use /v1/chat/completions with messages logger.debug(`Ollama (OpenAI mode): Building messages for streaming`); const messages = []; if (options.systemPrompt) { messages.push({ role: "system", content: options.systemPrompt }); } if (hasMultimodalInput) { const multimodalOptions = buildMultimodalOptions(options, this.providerName, this.modelName); const multimodalMessages = await buildMultimodalMessagesArray(multimodalOptions, this.providerName, this.modelName); // Convert multimodal messages to text (OpenAI-compatible mode doesn't support images in /v1/chat/completions for Ollama) const content = multimodalMessages .map((msg) => typeof msg.content === "string" ? msg.content : "") .join("\n"); messages.push({ role: "user", content }); } else { messages.push({ role: "user", content: options.input.text ?? "" }); } const requestUrl = `${this.baseUrl}/v1/chat/completions`; const requestBody = { model: this.modelName || FALLBACK_OLLAMA_MODEL, messages, temperature: options.temperature, max_tokens: options.maxTokens, stream: true, }; logger.debug(`[Ollama OpenAI Mode] About to fetch:`, { url: requestUrl, baseUrl: this.baseUrl, modelName: this.modelName, requestBody: JSON.stringify(requestBody), }); const response = await proxyFetch(requestUrl, { method: "POST", headers: { "Content-Type": "application/json" }, body: JSON.stringify(requestBody), signal: createAbortSignalWithTimeout(this.timeout), }); logger.debug(`[Ollama OpenAI Mode] Response received:`, { status: response.status, statusText: response.statusText, ok: response.ok, }); if (!response.ok) { throw this.handleProviderError(await createOllamaHttpError(response)); } // Transform to async generator for OpenAI-compatible format const self = this; const transformedStream = async function* () { const generator = self.createOpenAIStream(response); for await (const chunk of generator) { yield chunk; } }; return { stream: transformedStream(), provider: self.providerName, model: self.modelName, }; } else { // Native Ollama mode: Use /api/generate let prompt = options.input.text; let images; if (hasMultimodalInput) { logger.debug(`Ollama (native mode): Detected multimodal input`, { hasImages: !!options.input?.images?.length, imageCount: options.input?.images?.length || 0, }); const multimodalOptions = buildMultimodalOptions(options, this.providerName, this.modelName); const multimodalMessages = await buildMultimodalMessagesArray(multimodalOptions, this.providerName, this.modelName); // Extract text from messages for prompt prompt = multimodalMessages .map((msg) => typeof msg.content === "string" ? msg.content : "") .join("\n"); // Extract images images = this.extractImagesFromMessages(multimodalMessages); } const requestBody = { model: this.modelName || FALLBACK_OLLAMA_MODEL, prompt, system: options.systemPrompt, stream: true, options: { temperature: options.temperature, num_predict: options.maxTokens, }, }; if (images && images.length > 0) { requestBody.images = images; } const requestUrl = `${this.baseUrl}/api/generate`; logger.debug(`[Ollama Native Mode] About to fetch:`, { url: requestUrl, baseUrl: this.baseUrl, modelName: this.modelName, requestBody: JSON.stringify(requestBody), }); const response = await proxyFetch(requestUrl, { method: "POST", headers: { "Content-Type": "application/json" }, body: JSON.stringify(requestBody), signal: createAbortSignalWithTimeout(this.timeout), }); logger.debug(`[Ollama Native Mode] Response received:`, { status: response.status, statusText: response.statusText, ok: response.ok, }); if (!response.ok) { throw this.handleProviderError(await createOllamaHttpError(response)); } // Transform to async generator to match other providers const self = this; const transformedStream = async function* () { const generator = self.createOllamaStream(response); for await (const chunk of generator) { yield chunk; } }; return { stream: transformedStream(), provider: this.providerName, model: this.modelName, }; } }, (r) => r.stream, (r, wrapped) => ({ ...r, stream: wrapped })); } /** * Convert AI SDK tools format to Ollama's function calling format */ convertToolsToOllamaFormat(tools) { if (!tools || typeof tools !== "object") { return []; } const toolsArray = Array.isArray(tools) ? tools : Object.values(tools); return toolsArray.map((tool) => ({ type: "function", function: { name: tool.name || tool.function?.name, description: tool.description || tool.function?.description, parameters: tool.parameters || tool.function?.parameters || { type: "object", properties: {}, required: [],