@juspay/neurolink
Version:
Universal AI Development Platform with working MCP integration, multi-provider support, voice (TTS/STT/realtime), and professional CLI. 58+ external MCP servers discoverable, multimodal file processing, RAG pipelines. Build, test, and deploy AI applicatio
1,087 lines (1,086 loc) • 77.9 kB
JavaScript
import { createAnalytics } from "../core/analytics.js";
import { BaseProvider } from "../core/baseProvider.js";
import { DEFAULT_MAX_STEPS } from "../core/constants.js";
import { modelConfig } from "../core/modelConfiguration.js";
import { createProxyFetch } from "../proxy/proxyFetch.js";
import { logger } from "../utils/logger.js";
import { buildMultimodalMessagesArray } from "../utils/messageBuilder.js";
import { buildMultimodalOptions } from "../utils/multimodalOptionsBuilder.js";
import { estimateTokens } from "../utils/tokenEstimation.js";
import { InvalidModelError, NetworkError, ProviderError, } from "../types/index.js";
import { tracers, ATTR, withClientStreamSpan } from "../telemetry/index.js";
import { emitToolEndFromStepFinish } from "../utils/toolEndEmitter.js";
import { TimeoutError } from "../utils/timeout.js";
// Model version constants (configurable via environment)
const DEFAULT_OLLAMA_MODEL = process.env.OLLAMA_MODEL || "llama3.1:8b";
const FALLBACK_OLLAMA_MODEL = "llama3.2:latest"; // Used when primary model fails
// Configuration helpers
const getOllamaBaseUrl = () => {
return process.env.OLLAMA_BASE_URL || "http://localhost:11434";
};
const isOpenAICompatibleMode = () => {
// Enable OpenAI-compatible API mode (/v1/chat/completions) instead of native Ollama API (/api/generate)
// Useful for Ollama deployments that only support OpenAI-compatible routes (e.g., breezehq.dev)
return process.env.OLLAMA_OPENAI_COMPATIBLE === "true";
};
// Create AbortController with timeout for better compatibility
const createAbortSignalWithTimeout = (timeoutMs) => {
const controller = new AbortController();
const timeoutId = setTimeout(() => controller.abort(), timeoutMs);
// Clear timeout if signal is aborted through other means
controller.signal.addEventListener("abort", () => {
clearTimeout(timeoutId);
});
return controller.signal;
};
const getDefaultOllamaModel = () => {
return process.env.OLLAMA_MODEL || DEFAULT_OLLAMA_MODEL;
};
const getOllamaTimeout = () => {
// Increased default timeout to 240000ms (4 minutes) to support slower native API responses
// especially for larger models like aliafshar/gemma3-it-qat-tools:latest (12.2B parameters)
return parseInt(process.env.OLLAMA_TIMEOUT || "240000", 10);
};
function isOllamaHttpError(error) {
return (error instanceof ProviderError &&
typeof error.statusCode === "number" &&
typeof error.responseBody === "string");
}
async function createOllamaHttpError(response) {
let responseBody = "";
try {
responseBody = (await response.text()).trim();
}
catch {
// Ignore unreadable bodies
}
const suffix = responseBody ? ` - ${responseBody.slice(0, 500)}` : "";
const error = new ProviderError(`Ollama API error: ${response.status} ${response.statusText}${suffix}`, "ollama");
error.statusCode = response.status;
error.statusText = response.statusText;
error.responseBody = responseBody;
return error;
}
// Create proxy-aware fetch instance
const proxyFetch = createProxyFetch();
// Custom LanguageModel implementation for Ollama
class OllamaLanguageModel {
/**
* Specification version for the AI SDK LanguageModel interface.
* Uses "v2" for structural compatibility with AI SDK v6's `LanguageModelV2`.
* The AI SDK checks this field to determine which interface version to use.
*/
specificationVersion = "v2";
provider = "ollama";
modelId;
maxTokens;
supportsStreaming = true;
defaultObjectGenerationMode = "json";
/**
* Supported URL patterns by media type.
* Ollama runs locally and does not natively download URLs, so this is empty.
* Required by the LanguageModelV2 interface.
*/
supportedUrls = {};
baseUrl;
timeout;
constructor(modelId, baseUrl, timeout) {
this.modelId = modelId;
this.baseUrl = baseUrl;
this.timeout = timeout;
}
estimateTokenCount(text) {
return estimateTokens(text, "ollama");
}
convertMessagesToPrompt(messages) {
return messages
.map((msg) => {
if (typeof msg.content === "string") {
return `${msg.role}: ${msg.content}`;
}
return `${msg.role}: ${JSON.stringify(msg.content)}`;
})
.join("\n");
}
async doGenerate(options) {
// Vercel AI SDK passes messages via options.messages (same as stream mode)
// Check options.messages first, then fall back to options.prompt for backward compatibility
const messages = options
.messages ||
options
.prompt ||
[];
// Check if we should use OpenAI-compatible API
const useOpenAIMode = isOpenAICompatibleMode();
if (useOpenAIMode) {
// OpenAI-compatible mode: Use /v1/chat/completions
const requestBody = {
model: this.modelId,
messages,
temperature: options.temperature,
max_tokens: options.maxTokens,
stream: false,
};
if (logger.shouldLog("debug")) {
logger.debug("[OllamaLanguageModel] Using OpenAI-compatible API with messages:", JSON.stringify(messages, null, 2));
}
const response = await proxyFetch(`${this.baseUrl}/v1/chat/completions`, {
method: "POST",
headers: { "Content-Type": "application/json" },
body: JSON.stringify(requestBody),
signal: createAbortSignalWithTimeout(this.timeout),
});
if (!response.ok) {
throw await createOllamaHttpError(response);
}
const data = await response.json();
logger.debug("[OllamaLanguageModel] OpenAI API Response:", JSON.stringify(data, null, 2));
const text = data.choices?.[0]?.message?.content || "";
const usage = data.usage || {};
const promptTokens = usage.prompt_tokens ??
this.estimateTokenCount(JSON.stringify(messages));
const completionTokens = usage.completion_tokens ?? this.estimateTokenCount(text);
return {
content: text ? [{ type: "text", text }] : [],
text,
usage: {
inputTokens: promptTokens,
outputTokens: completionTokens,
promptTokens,
completionTokens,
totalTokens: usage.total_tokens ?? promptTokens + completionTokens,
},
finishReason: data.choices?.[0]?.finish_reason ?? "stop",
warnings: [],
request: {
body: JSON.stringify(requestBody),
},
response: {
id: data.id,
modelId: data.model,
timestamp: new Date(),
headers: {},
body: data,
},
rawCall: {
rawPrompt: messages,
rawSettings: {
model: this.modelId,
temperature: options.temperature,
max_tokens: options.maxTokens,
},
},
rawResponse: {
headers: {},
},
};
}
else {
// Native Ollama mode: Use /api/generate
const prompt = this.convertMessagesToPrompt(messages);
logger.debug("[OllamaLanguageModel] Using native API with prompt:", JSON.stringify(prompt));
const response = await proxyFetch(`${this.baseUrl}/api/generate`, {
method: "POST",
headers: { "Content-Type": "application/json" },
body: JSON.stringify({
model: this.modelId,
prompt,
stream: false,
system: messages.find((m) => m.role === "system")?.content,
options: {
temperature: options.temperature,
num_predict: options.maxTokens,
},
}),
signal: createAbortSignalWithTimeout(this.timeout),
});
if (!response.ok) {
throw await createOllamaHttpError(response);
}
const data = await response.json();
logger.debug("[OllamaLanguageModel] Native API Response:", JSON.stringify(data, null, 2));
const text = String(data.response ?? "");
const promptTokens = data.prompt_eval_count ?? this.estimateTokenCount(prompt);
const completionTokens = data.eval_count ?? this.estimateTokenCount(text);
const requestBody = {
model: this.modelId,
prompt,
stream: false,
system: messages.find((m) => m.role === "system")?.content,
options: {
temperature: options.temperature,
num_predict: options.maxTokens,
},
};
return {
content: text ? [{ type: "text", text }] : [],
text,
usage: {
inputTokens: promptTokens,
outputTokens: completionTokens,
promptTokens,
completionTokens,
totalTokens: promptTokens + completionTokens,
},
finishReason: data.done_reason ?? "stop",
warnings: [],
request: {
body: JSON.stringify(requestBody),
},
response: {
id: data.created_at,
modelId: this.modelId,
timestamp: data.created_at ? new Date(data.created_at) : new Date(),
headers: {},
body: data,
},
rawCall: {
rawPrompt: prompt,
rawSettings: {
model: this.modelId,
temperature: options.temperature,
num_predict: options.maxTokens,
},
},
rawResponse: {
headers: {},
},
};
}
}
async doStream(options) {
const messages = options
.messages || [];
// Check if we should use OpenAI-compatible API
const useOpenAIMode = isOpenAICompatibleMode();
if (useOpenAIMode) {
// OpenAI-compatible mode: Use /v1/chat/completions
const requestUrl = `${this.baseUrl}/v1/chat/completions`;
const requestBody = {
model: this.modelId,
messages,
temperature: options.temperature,
max_tokens: options.maxTokens,
stream: true,
};
logger.debug("[OllamaLanguageModel] doStream: Using OpenAI-compatible API", {
url: requestUrl,
baseUrl: this.baseUrl,
modelId: this.modelId,
requestBody: JSON.stringify(requestBody),
});
const response = await proxyFetch(requestUrl, {
method: "POST",
headers: { "Content-Type": "application/json" },
body: JSON.stringify(requestBody),
signal: createAbortSignalWithTimeout(this.timeout),
});
logger.debug("[OllamaLanguageModel] doStream: Response received", {
status: response.status,
statusText: response.statusText,
ok: response.ok,
});
if (!response.ok) {
throw await createOllamaHttpError(response);
}
const self = this;
return {
stream: new ReadableStream({
async start(controller) {
try {
for await (const chunk of self.parseOpenAIStreamResponse(response, messages)) {
controller.enqueue(chunk);
}
controller.close();
}
catch (error) {
controller.error(error);
}
},
}),
rawCall: {
rawPrompt: messages,
rawSettings: {
model: this.modelId,
temperature: options.temperature,
max_tokens: options.maxTokens,
},
},
rawResponse: {
headers: {},
},
};
}
else {
// Native Ollama mode: Use /api/generate
const prompt = this.convertMessagesToPrompt(messages);
const requestUrl = `${this.baseUrl}/api/generate`;
const requestBody = {
model: this.modelId,
prompt,
stream: true,
system: messages.find((m) => m.role === "system")?.content,
options: {
temperature: options.temperature,
num_predict: options.maxTokens,
},
};
logger.debug("[OllamaLanguageModel] doStream: Using native API", {
url: requestUrl,
baseUrl: this.baseUrl,
modelId: this.modelId,
requestBody: JSON.stringify(requestBody),
});
const response = await proxyFetch(requestUrl, {
method: "POST",
headers: { "Content-Type": "application/json" },
body: JSON.stringify(requestBody),
signal: createAbortSignalWithTimeout(this.timeout),
});
logger.debug("[OllamaLanguageModel] doStream: Response received", {
status: response.status,
statusText: response.statusText,
ok: response.ok,
});
if (!response.ok) {
throw await createOllamaHttpError(response);
}
const self = this;
return {
stream: new ReadableStream({
async start(controller) {
try {
for await (const chunk of self.parseStreamResponse(response)) {
controller.enqueue(chunk);
}
controller.close();
}
catch (error) {
controller.error(error);
}
},
}),
rawCall: {
rawPrompt: messages,
rawSettings: {
model: this.modelId,
temperature: options.temperature,
num_predict: options.maxTokens,
},
},
rawResponse: {
headers: {},
},
};
}
}
async *parseStreamResponse(response) {
const reader = response.body?.getReader();
if (!reader) {
throw new Error("No response body");
}
const decoder = new TextDecoder();
let buffer = "";
try {
while (true) {
const { done, value } = await reader.read();
if (done) {
break;
}
buffer += decoder.decode(value, { stream: true });
const lines = buffer.split("\n");
buffer = lines.pop() || "";
for (const line of lines) {
if (line.trim()) {
try {
const data = JSON.parse(line);
if (data.response) {
yield {
type: "text-delta",
textDelta: data.response,
};
}
if (data.done) {
yield {
type: "finish",
finishReason: "stop",
usage: {
promptTokens: data.prompt_eval_count ||
this.estimateTokenCount(data.context || ""),
completionTokens: data.eval_count || 0,
},
};
return;
}
}
catch (error) {
logger.error("Error parsing Ollama stream response", {
error,
});
}
}
}
}
}
finally {
reader.releaseLock();
}
}
async *parseOpenAIStreamResponse(response, messages) {
const reader = response.body?.getReader();
if (!reader) {
throw new Error("No response body");
}
const decoder = new TextDecoder();
let buffer = "";
// Estimate prompt tokens from messages (matches non-streaming behavior)
const totalPromptTokens = this.estimateTokenCount(JSON.stringify(messages));
// Accumulate full completion text; estimate tokens once at the end to avoid
// per-chunk rounding inflation that occurs when estimateTokenCount is called
// on every delta and the results are summed.
let completionText = "";
try {
while (true) {
const { done, value } = await reader.read();
if (done) {
break;
}
buffer += decoder.decode(value, { stream: true });
const lines = buffer.split("\n");
buffer = lines.pop() || "";
for (const line of lines) {
const trimmed = line.trim();
if (trimmed === "" || trimmed === "data: [DONE]") {
continue;
}
if (trimmed.startsWith("data: ")) {
try {
const jsonStr = trimmed.slice(6); // Remove "data: " prefix
const data = JSON.parse(jsonStr);
// Extract content delta
const content = data.choices?.[0]?.delta?.content;
if (content) {
yield {
type: "text-delta",
textDelta: content,
};
completionText += content;
}
// Check for finish
const finishReason = data.choices?.[0]?.finish_reason;
if (finishReason === "stop") {
// Prefer server-reported usage; fall back to a single estimate over
// the full accumulated text (avoids per-chunk rounding inflation).
const promptTokens = data.usage?.prompt_tokens || totalPromptTokens;
const completionTokens = data.usage?.completion_tokens ||
this.estimateTokenCount(completionText);
yield {
type: "finish",
finishReason: "stop",
usage: {
promptTokens,
completionTokens,
},
};
return;
}
}
catch (error) {
logger.error("Error parsing OpenAI stream response", {
error,
line: trimmed,
});
}
}
}
}
// If loop exits without explicit finish, yield final finish
yield {
type: "finish",
finishReason: "stop",
usage: {
promptTokens: totalPromptTokens,
completionTokens: this.estimateTokenCount(completionText),
},
};
}
finally {
reader.releaseLock();
}
}
}
/**
* Ollama Provider v2 - BaseProvider Implementation
*
* PHASE 3.7: BaseProvider wrap around existing custom Ollama implementation
*
* Features:
* - Extends BaseProvider for shared functionality
* - Preserves custom OllamaLanguageModel implementation
* - Local model management and health checking
* - Enhanced error handling with Ollama-specific guidance
*/
export class OllamaProvider extends BaseProvider {
ollamaModel;
baseUrl;
timeout;
constructor(modelName, credentials) {
super(modelName, "ollama");
this.baseUrl = credentials?.baseURL ?? getOllamaBaseUrl();
this.timeout = getOllamaTimeout();
// Initialize Ollama model
this.ollamaModel = new OllamaLanguageModel(this.modelName || getDefaultOllamaModel(), this.baseUrl, this.timeout);
logger.debug("Ollama BaseProvider v2 initialized", {
modelName: this.modelName,
baseUrl: this.baseUrl,
timeout: this.timeout,
provider: this.providerName,
});
}
getProviderName() {
return "ollama";
}
getDefaultModel() {
return getDefaultOllamaModel();
}
/**
* Returns the Vercel AI SDK model instance for Ollama.
*
* OllamaLanguageModel implements OllamaAsLanguageModel which is structurally
* compatible with LanguageModelV2 (specificationVersion "v2", modelId, provider,
* supportedUrls, doGenerate, doStream).
*/
getAISDKModel() {
const model = this.ollamaModel;
return model;
}
/**
* Ollama Tool Calling Support (Enhanced 2025)
*
* Uses configurable model list from ModelConfiguration instead of hardcoded values.
* Tool-capable models can be configured via OLLAMA_TOOL_CAPABLE_MODELS environment variable.
*
* **Configuration Options:**
* - Environment variable: OLLAMA_TOOL_CAPABLE_MODELS (comma-separated list)
* - Configuration file: providers.ollama.modelBehavior.toolCapableModels
* - Fallback: Default list of known tool-capable models
*
* **Implementation Features:**
* - Direct Ollama API integration (/v1/chat/completions)
* - Automatic tool schema conversion to Ollama format
* - Streaming tool calls with incremental response parsing
* - Model compatibility validation and fallback handling
*
* @returns true for supported models, false for unsupported models
*/
supportsTools() {
const modelName = (this.modelName ?? getDefaultOllamaModel()).toLowerCase();
// Get tool-capable models from configuration
const ollamaConfig = modelConfig.getProviderConfiguration("ollama");
const toolCapableModels = ollamaConfig?.modelBehavior?.toolCapableModels || [];
// Only disable tools if we have positive evidence the model doesn't support them
// If toolCapableModels config is empty, assume tools are supported (don't make assumptions)
if (toolCapableModels.length === 0) {
logger.debug("Ollama tool calling enabled", {
model: this.modelName,
reason: "No tool-capable config defined, assuming tools supported",
baseUrl: this.baseUrl,
});
return true;
}
// Config exists - check if current model matches tool-capable model patterns
const isToolCapable = toolCapableModels.some((capableModel) => modelName.includes(capableModel.toLowerCase()));
if (isToolCapable) {
logger.debug("Ollama tool calling enabled", {
model: this.modelName,
reason: "Model in tool-capable list",
baseUrl: this.baseUrl,
configuredModels: toolCapableModels.length,
});
return true;
}
// Config exists and model is NOT in list - disable tools
logger.debug("Ollama tool calling disabled", {
model: this.modelName,
reason: "Model not in tool-capable list",
suggestion: "Consider using llama3.1:8b-instruct, mistral:7b-instruct, or hermes3:8b for tool calling",
availableToolModels: toolCapableModels.slice(0, 3), // Show first 3 for brevity
});
return false;
}
/**
* Extract images from multimodal messages for Ollama API
* Returns array of base64-encoded images
*/
extractImagesFromMessages(messages) {
const images = [];
for (const msg of messages) {
if (Array.isArray(msg.content)) {
for (const content of msg.content) {
const typedContent = content;
if (typedContent.type === "image" && typedContent.image) {
const imageData = typeof typedContent.image === "string"
? typedContent.image.replace(/^data:image\/\w+;base64,/, "")
: Buffer.from(typedContent.image).toString("base64");
images.push(imageData);
}
}
}
}
return images;
}
/**
* Convert multimodal messages to Ollama chat format
* Extracts text content and handles images separately
*/
convertToOllamaMessages(messages) {
return messages.map((msg) => {
let textContent = "";
const images = [];
if (typeof msg.content === "string") {
textContent = msg.content;
}
else if (Array.isArray(msg.content)) {
for (const content of msg.content) {
const typedContent = content;
if (typedContent.type === "text" && typedContent.text) {
textContent += typedContent.text;
}
else if (typedContent.type === "image" && typedContent.image) {
const imageData = typeof typedContent.image === "string"
? typedContent.image.replace(/^data:image\/\w+;base64,/, "")
: Buffer.from(typedContent.image).toString("base64");
images.push(imageData);
}
}
}
const ollamaMsg = {
role: (msg.role === "system" ? "system" : msg.role),
content: textContent,
};
if (images.length > 0) {
ollamaMsg.images = images;
}
return ollamaMsg;
});
}
// executeGenerate removed - BaseProvider handles all generation with tools
async executeStream(options, analysisSchema) {
try {
this.validateStreamOptions(options);
await this.checkOllamaHealth();
// Check if tools are supported and provided
const modelSupportsTools = this.supportsTools();
const hasTools = options.tools && Object.keys(options.tools).length > 0;
if (modelSupportsTools && hasTools) {
// Use chat API with tools for tool-capable models
return this.executeStreamWithTools(options, analysisSchema);
}
else {
// Use generate API for non-tool scenarios
return this.executeStreamWithoutTools(options, analysisSchema);
}
}
catch (error) {
throw this.handleProviderError(error);
}
}
/**
* Execute streaming with Ollama's function calling support
* Uses conversation loop to handle multi-step tool execution
*/
async executeStreamWithTools(options, _analysisSchema) {
return withClientStreamSpan({
name: "neurolink.provider.stream",
tracer: tracers.provider,
attributes: {
[ATTR.GEN_AI_SYSTEM]: "ollama",
[ATTR.GEN_AI_MODEL]: this.modelName || FALLBACK_OLLAMA_MODEL,
[ATTR.GEN_AI_OPERATION]: "stream",
[ATTR.NL_HAS_TOOLS]: true,
[ATTR.NL_STREAM_MODE]: true,
},
}, async (span) => {
const startTime = Date.now();
const maxIterations = options.maxSteps || DEFAULT_MAX_STEPS;
let iteration = 0;
// Get all available tools (direct + MCP + external)
// BaseProvider.stream() pre-merges base tools + external tools into options.tools
const allTools = options.tools ||
(await this.getAllTools());
// Convert tools to Ollama format
const ollamaTools = this.convertToolsToOllamaFormat(allTools);
span.setAttribute(ATTR.NL_TOOL_COUNT, ollamaTools.length);
// Validate that PDFs are not provided
if (options.input?.pdfFiles && options.input.pdfFiles.length > 0) {
throw this.handleProviderError(new Error("PDF inputs are not supported by OllamaProvider. " +
"Please remove PDFs or use a supported provider (OpenAI, Anthropic, Google Vertex AI, etc.)."));
}
// Initialize conversation history
const conversationHistory = [];
// Build initial messages
const hasMultimodalInput = !!(options.input?.images?.length ||
options.input?.content?.length ||
options.input?.files?.length ||
options.input?.csvFiles?.length);
if (hasMultimodalInput) {
logger.debug(`Ollama: Detected multimodal input, using multimodal message builder`, {
hasImages: !!options.input?.images?.length,
imageCount: options.input?.images?.length || 0,
});
const multimodalOptions = buildMultimodalOptions(options, this.providerName, this.modelName);
const multimodalMessages = await buildMultimodalMessagesArray(multimodalOptions, this.providerName, this.modelName);
conversationHistory.push(...this.convertToOllamaMessages(multimodalMessages));
}
else {
if (options.systemPrompt) {
conversationHistory.push({
role: "system",
content: options.systemPrompt,
});
}
conversationHistory.push({
role: "user",
content: options.input.text ?? "",
});
}
// Capture instance references before the stream for use in the finally block.
const ollamaNeurolink = this.neurolink;
const ollamaProviderName = this.providerName;
const ollamaModelName = this.modelName || FALLBACK_OLLAMA_MODEL;
// Conversation loop for multi-step tool execution
let totalInputTokens = 0;
let totalOutputTokens = 0;
let lastFinishReason;
let ollamaStreamErrored = false;
const stream = new ReadableStream({
start: async (controller) => {
try {
while (iteration < maxIterations) {
logger.debug(`[OllamaProvider] Conversation iteration ${iteration + 1}/${maxIterations}`);
// Make API request — request usage in stream_options so
// Pipeline B gets real token counts for Langfuse cost dashboards.
const response = await proxyFetch(`${this.baseUrl}/v1/chat/completions`, {
method: "POST",
headers: { "Content-Type": "application/json" },
body: JSON.stringify({
model: this.modelName || FALLBACK_OLLAMA_MODEL,
messages: conversationHistory,
tools: ollamaTools,
tool_choice: "auto",
stream: true,
stream_options: { include_usage: true },
temperature: options.temperature,
max_tokens: options.maxTokens,
}),
signal: createAbortSignalWithTimeout(this.timeout),
});
if (!response.ok) {
throw this.handleProviderError(await createOllamaHttpError(response));
}
// Process response stream
const { content, toolCalls, finishReason, usage } = await this.processOllamaResponse(response, controller);
// Accumulate usage across iterations for Pipeline B
if (usage) {
totalInputTokens += usage.input;
totalOutputTokens += usage.output;
}
if (finishReason) {
lastFinishReason = finishReason;
}
// Add assistant message to history
const assistantMessage = {
role: "assistant",
content: content || "",
};
if (toolCalls && toolCalls.length > 0) {
assistantMessage.tool_calls = toolCalls;
}
conversationHistory.push(assistantMessage);
// Check finish reason
if (finishReason === "stop" || !finishReason) {
// Conversation complete
span.setAttribute(ATTR.GEN_AI_FINISH_REASON, finishReason || "stop");
controller.close();
break;
}
else if (finishReason === "tool_calls" &&
toolCalls &&
toolCalls.length > 0) {
// Execute tools
logger.debug(`[OllamaProvider] Executing ${toolCalls.length} tools`);
for (const tc of toolCalls) {
span.addEvent("tool_call", {
[ATTR.GEN_AI_TOOL_NAME]: tc.function.name,
});
}
const toolResults = await this.executeOllamaTools(toolCalls, options);
// Add tool results to conversation
const toolMessage = {
role: "tool",
content: JSON.stringify(toolResults),
};
conversationHistory.push(toolMessage);
iteration++;
}
else if (finishReason === "length") {
// Max tokens reached, continue conversation
logger.debug(`[OllamaProvider] Max tokens reached, continuing`);
conversationHistory.push({
role: "user",
content: "Please continue.",
});
iteration++;
}
else {
// Unknown finish reason, end conversation
logger.warn(`[OllamaProvider] Unknown finish reason: ${finishReason}`);
span.setAttribute(ATTR.GEN_AI_FINISH_REASON, finishReason);
controller.close();
break;
}
}
if (iteration >= maxIterations) {
ollamaStreamErrored = true;
controller.error(new Error(`Ollama conversation exceeded maximum iterations (${maxIterations})`));
}
}
catch (error) {
ollamaStreamErrored = true;
controller.error(error);
}
finally {
// Resolve analytics with accumulated token counts so Pipeline A
// and Pipeline B both get real usage data from Ollama.
const aggregatedUsage = {
input: totalInputTokens,
output: totalOutputTokens,
total: totalInputTokens + totalOutputTokens,
};
resolveAnalytics(createAnalytics(this.providerName, this.modelName || FALLBACK_OLLAMA_MODEL, { usage: aggregatedUsage }, Date.now() - startTime, {
requestId: `ollama-stream-${Date.now()}`,
streamingMode: true,
iterations: iteration,
}));
// Emit generation:end so Pipeline B (Langfuse) creates a GENERATION
// observation. Ollama bypasses the Vercel AI SDK so
// experimental_telemetry is never injected; we emit manually.
const ollamaEmitter = ollamaNeurolink?.getEventEmitter();
if (ollamaEmitter) {
// Collect accumulated text from conversation history
const accumulatedContent = conversationHistory
.filter((m) => m.role === "assistant")
.map((m) => m.content)
.join("");
ollamaEmitter.emit("generation:end", {
provider: ollamaProviderName,
responseTime: Date.now() - startTime,
timestamp: Date.now(),
result: {
content: accumulatedContent,
usage: aggregatedUsage,
model: ollamaModelName,
provider: ollamaProviderName,
finishReason: ollamaStreamErrored
? "error"
: (lastFinishReason ?? "stop"),
},
success: !ollamaStreamErrored,
});
}
}
},
});
// Defer analytics resolution until the stream's start callback finishes.
// This ensures responseTime and iteration reflect the actual completed values
// rather than values captured before the tool-loop executes.
let resolveAnalytics;
const analyticsPromise = new Promise((resolve) => {
resolveAnalytics = resolve;
});
return {
stream: this.convertToAsyncIterable(stream),
provider: this.providerName,
model: this.modelName || FALLBACK_OLLAMA_MODEL,
analytics: analyticsPromise,
metadata: {
startTime,
streamId: `ollama-${Date.now()}`,
},
};
}, (r) => r.stream, (r, wrapped) => ({ ...r, stream: wrapped }));
}
/**
* Execute streaming without tools using the generate API
* Fallback for non-tool scenarios or when chat API is unavailable
*/
async executeStreamWithoutTools(options, _analysisSchema) {
return withClientStreamSpan({
name: "neurolink.provider.stream",
tracer: tracers.provider,
attributes: {
[ATTR.GEN_AI_SYSTEM]: "ollama",
[ATTR.GEN_AI_MODEL]: this.modelName || FALLBACK_OLLAMA_MODEL,
[ATTR.GEN_AI_OPERATION]: "stream",
[ATTR.NL_HAS_TOOLS]: false,
[ATTR.NL_STREAM_MODE]: true,
},
}, async () => {
// Validate that PDFs are not provided
if (options.input?.pdfFiles && options.input.pdfFiles.length > 0) {
throw this.handleProviderError(new Error("PDF inputs are not supported by OllamaProvider. " +
"Please remove PDFs or use a supported provider (OpenAI, Anthropic, Google Vertex AI, etc.)."));
}
// Check for multimodal input
const hasMultimodalInput = !!(options.input?.images?.length ||
options.input?.content?.length ||
options.input?.files?.length ||
options.input?.csvFiles?.length);
const useOpenAIMode = isOpenAICompatibleMode();
if (useOpenAIMode) {
// OpenAI-compatible mode: Use /v1/chat/completions with messages
logger.debug(`Ollama (OpenAI mode): Building messages for streaming`);
const messages = [];
if (options.systemPrompt) {
messages.push({ role: "system", content: options.systemPrompt });
}
if (hasMultimodalInput) {
const multimodalOptions = buildMultimodalOptions(options, this.providerName, this.modelName);
const multimodalMessages = await buildMultimodalMessagesArray(multimodalOptions, this.providerName, this.modelName);
// Convert multimodal messages to text (OpenAI-compatible mode doesn't support images in /v1/chat/completions for Ollama)
const content = multimodalMessages
.map((msg) => typeof msg.content === "string" ? msg.content : "")
.join("\n");
messages.push({ role: "user", content });
}
else {
messages.push({ role: "user", content: options.input.text ?? "" });
}
const requestUrl = `${this.baseUrl}/v1/chat/completions`;
const requestBody = {
model: this.modelName || FALLBACK_OLLAMA_MODEL,
messages,
temperature: options.temperature,
max_tokens: options.maxTokens,
stream: true,
};
logger.debug(`[Ollama OpenAI Mode] About to fetch:`, {
url: requestUrl,
baseUrl: this.baseUrl,
modelName: this.modelName,
requestBody: JSON.stringify(requestBody),
});
const response = await proxyFetch(requestUrl, {
method: "POST",
headers: { "Content-Type": "application/json" },
body: JSON.stringify(requestBody),
signal: createAbortSignalWithTimeout(this.timeout),
});
logger.debug(`[Ollama OpenAI Mode] Response received:`, {
status: response.status,
statusText: response.statusText,
ok: response.ok,
});
if (!response.ok) {
throw this.handleProviderError(await createOllamaHttpError(response));
}
// Transform to async generator for OpenAI-compatible format
const self = this;
const transformedStream = async function* () {
const generator = self.createOpenAIStream(response);
for await (const chunk of generator) {
yield chunk;
}
};
return {
stream: transformedStream(),
provider: self.providerName,
model: self.modelName,
};
}
else {
// Native Ollama mode: Use /api/generate
let prompt = options.input.text;
let images;
if (hasMultimodalInput) {
logger.debug(`Ollama (native mode): Detected multimodal input`, {
hasImages: !!options.input?.images?.length,
imageCount: options.input?.images?.length || 0,
});
const multimodalOptions = buildMultimodalOptions(options, this.providerName, this.modelName);
const multimodalMessages = await buildMultimodalMessagesArray(multimodalOptions, this.providerName, this.modelName);
// Extract text from messages for prompt
prompt = multimodalMessages
.map((msg) => typeof msg.content === "string" ? msg.content : "")
.join("\n");
// Extract images
images = this.extractImagesFromMessages(multimodalMessages);
}
const requestBody = {
model: this.modelName || FALLBACK_OLLAMA_MODEL,
prompt,
system: options.systemPrompt,
stream: true,
options: {
temperature: options.temperature,
num_predict: options.maxTokens,
},
};
if (images && images.length > 0) {
requestBody.images = images;
}
const requestUrl = `${this.baseUrl}/api/generate`;
logger.debug(`[Ollama Native Mode] About to fetch:`, {
url: requestUrl,
baseUrl: this.baseUrl,
modelName: this.modelName,
requestBody: JSON.stringify(requestBody),
});
const response = await proxyFetch(requestUrl, {
method: "POST",
headers: { "Content-Type": "application/json" },
body: JSON.stringify(requestBody),
signal: createAbortSignalWithTimeout(this.timeout),
});
logger.debug(`[Ollama Native Mode] Response received:`, {
status: response.status,
statusText: response.statusText,
ok: response.ok,
});
if (!response.ok) {
throw this.handleProviderError(await createOllamaHttpError(response));
}
// Transform to async generator to match other providers
const self = this;
const transformedStream = async function* () {
const generator = self.createOllamaStream(response);
for await (const chunk of generator) {
yield chunk;
}
};
return {
stream: transformedStream(),
provider: this.providerName,
model: this.modelName,
};
}
}, (r) => r.stream, (r, wrapped) => ({ ...r, stream: wrapped }));
}
/**
* Convert AI SDK tools format to Ollama's function calling format
*/
convertToolsToOllamaFormat(tools) {
if (!tools || typeof tools !== "object") {
return [];
}
const toolsArray = Array.isArray(tools) ? tools : Object.values(tools);
return toolsArray.map((tool) => ({
type: "function",
function: {
name: tool.name || tool.function?.name,
description: tool.description || tool.function?.description,
parameters: tool.parameters ||
tool.function?.parameters || {
type: "object",
properties: {},
required: [],