UNPKG

@juspay/neurolink

Version:

Universal AI Development Platform with working MCP integration, multi-provider support, voice (TTS/STT/realtime), and professional CLI. 58+ external MCP servers discoverable, multimodal file processing, RAG pipelines. Build, test, and deploy AI applicatio

273 lines (272 loc) 13.6 kB
import { createOpenAI } from "@ai-sdk/openai"; import { stepCountIs, streamText } from "ai"; import { BaseProvider } from "../core/baseProvider.js"; import { DEFAULT_MAX_STEPS } from "../core/constants.js"; import { streamAnalyticsCollector } from "../core/streamAnalytics.js"; import { isNeuroLink } from "../neurolink.js"; import { createProxyFetch } from "../proxy/proxyFetch.js"; import { createLoggingFetch } from "../utils/loggingFetch.js"; import { tracers, ATTR, withClientStreamSpan } from "../telemetry/index.js"; import { InvalidModelError, NetworkError, ProviderError, } from "../types/index.js"; import { logger } from "../utils/logger.js"; import { composeAbortSignals, createTimeoutController, TimeoutError, } from "../utils/timeout.js"; import { emitToolEndFromStepFinish } from "../utils/toolEndEmitter.js"; import { resolveToolChoice } from "../utils/toolChoice.js"; import { toAnalyticsStreamResult } from "./providerTypeUtils.js"; const LM_STUDIO_DEFAULT_BASE_URL = "http://localhost:1234/v1"; const LM_STUDIO_PLACEHOLDER_KEY = "lm-studio"; const FALLBACK_MODEL = "local-model"; const getLmStudioBaseURL = () => { return process.env.LM_STUDIO_BASE_URL || LM_STUDIO_DEFAULT_BASE_URL; }; /** * LM Studio Provider * Wraps the LM Studio local server (https://lmstudio.ai/) which exposes an * OpenAI-compatible API at http://localhost:1234/v1 by default. * Auto-discovers the loaded model via /v1/models if no model specified. */ export class LMStudioProvider extends BaseProvider { model; // The model name passed by the caller — never overwritten by auto-discovery, // so a discovery-miss FALLBACK_MODEL never poisons the next call's branch // through `if (explicit && explicit.trim() !== "")`. requestedModelName; baseURL; apiKey; discoveredModel; lmstudioClient; constructor(modelName, sdk, _region, credentials) { const validatedNeurolink = isNeuroLink(sdk) ? sdk : undefined; super(modelName, "lm-studio", validatedNeurolink); this.requestedModelName = modelName; this.baseURL = credentials?.baseURL ?? getLmStudioBaseURL(); // LM Studio's local server doesn't authenticate, but the AI SDK's // createOpenAI() requires an apiKey. Allow override via credentials/env // for users who run LM Studio behind an auth-proxying reverse-proxy. this.apiKey = credentials?.apiKey ?? process.env.LM_STUDIO_API_KEY ?? LM_STUDIO_PLACEHOLDER_KEY; this.lmstudioClient = createOpenAI({ baseURL: this.baseURL, apiKey: this.apiKey, fetch: createLoggingFetch("lm-studio"), }); logger.debug("LM Studio Provider initialized", { modelName: this.modelName, providerName: this.providerName, baseURL: this.baseURL, }); } async getAvailableModels(callerSignal) { const url = `${this.baseURL.replace(/\/$/, "")}/models`; // Use the proxy-aware fetch + bearer auth header so users running LM // Studio behind an auth-proxying reverse-proxy can still discover models. // Compose the caller's request signal (per-request timeout / abort) with // a fixed 5s discovery cap so cancellation propagates AND a hung server // can't stall provider initialization. const proxyFetch = createProxyFetch(); const discoveryTimeout = AbortSignal.timeout(5000); const composedSignal = callerSignal ? AbortSignal.any([callerSignal, discoveryTimeout]) : discoveryTimeout; const response = await proxyFetch(url, { headers: this.apiKey && this.apiKey !== LM_STUDIO_PLACEHOLDER_KEY ? { Authorization: `Bearer ${this.apiKey}` } : undefined, signal: composedSignal, }); if (!response.ok) { throw new Error(`LM Studio /v1/models returned ${response.status}: ${response.statusText}`); } const data = (await response.json()); return data.data.map((m) => m.id); } async getAISDKModel(signal) { if (this.model) { return this.model; } let modelToUse; let discoverySucceeded = false; // Use requestedModelName, not this.modelName — refreshHandlersForModel() // mutates this.modelName, so on a retry after a discovery miss the // FALLBACK_MODEL would look like an explicit user choice and we'd never // re-attempt /v1/models. The constructor-captured name preserves intent. const explicit = this.requestedModelName; if (explicit && explicit.trim() !== "") { modelToUse = explicit; discoverySucceeded = true; // explicit user choice — treat as success } else { try { const models = await this.getAvailableModels(signal); if (models.length > 0) { this.discoveredModel = models[0]; modelToUse = this.discoveredModel; discoverySucceeded = true; logger.info(`LM Studio auto-discovered model: ${modelToUse} (${models.length} loaded)`); } else { modelToUse = FALLBACK_MODEL; logger.warn("LM Studio /v1/models returned no models. Load a model in the LM Studio app."); } } catch (error) { logger.warn(`LM Studio model auto-discovery failed: ${error instanceof Error ? error.message : String(error)}`); modelToUse = FALLBACK_MODEL; } } // Persist resolved model on the instance and rebuild the composed // handlers (TelemetryHandler, MessageBuilder, etc.) so pricing / // telemetry / span attributes report the discovered model name. Plain // assignment to `this.modelName` is not enough — handlers cached the // pre-discovery value at construction time. this.refreshHandlersForModel(modelToUse); // .chat() — LM Studio exposes /v1/chat/completions, not /v1/responses const resolvedModel = this.lmstudioClient.chat(modelToUse); // Only memoize on actual success. After a discovery miss (server down, // empty /v1/models, /models 5xx), starting LM Studio or loading a model // should let the next call re-attempt discovery instead of being stuck // on FALLBACK_MODEL for the lifetime of this provider instance. if (discoverySucceeded) { this.model = resolvedModel; } return resolvedModel; } async executeStream(options, _analysisSchema) { // Resolve the LM Studio model BEFORE opening the span so OTEL // attributes, MessageBuilder, and downstream image/tool adapters all see // the discovered model id rather than the empty pre-discovery placeholder. // Pass the caller's abort signal so user cancellation / per-request // timeouts are honored during the discovery probe (not just after it). await this.getAISDKModel(options.abortSignal); return withClientStreamSpan({ name: "neurolink.provider.stream", tracer: tracers.provider, attributes: { [ATTR.GEN_AI_SYSTEM]: "lm-studio", [ATTR.GEN_AI_MODEL]: this.modelName || this.discoveredModel || FALLBACK_MODEL, [ATTR.GEN_AI_OPERATION]: "stream", [ATTR.NL_STREAM_MODE]: true, }, }, async () => this.executeStreamInner(options), (r) => r.stream, (r, wrapped) => ({ ...r, stream: wrapped })); } async executeStreamInner(options) { this.validateStreamOptions(options); const startTime = Date.now(); const timeout = this.getTimeout(options); const timeoutController = createTimeoutController(timeout, this.providerName, "stream"); try { const shouldUseTools = !options.disableTools && this.supportsTools(); const tools = shouldUseTools ? options.tools || (await this.getAllTools()) : {}; // Resolve the AI SDK model BEFORE building messages so message/image // adapters see the same handlers/model that streamText will use. Without // this, a fallback warm-up + late-server-start pattern could build // messages under FALLBACK_MODEL handlers and stream under a different // discovered model — and pay an extra `/v1/models` probe each time. const model = await this.getAISDKModelWithMiddleware(options); const messages = await this.buildMessagesForStream(options); const result = await streamText({ model, messages, temperature: options.temperature, maxOutputTokens: options.maxTokens, tools, stopWhen: stepCountIs(options.maxSteps || DEFAULT_MAX_STEPS), toolChoice: resolveToolChoice(options, tools, shouldUseTools), abortSignal: composeAbortSignals(options.abortSignal, timeoutController?.controller.signal), experimental_telemetry: this.telemetryHandler.getTelemetryConfig(options), experimental_repairToolCall: this.getToolCallRepairFn(options), onStepFinish: ({ toolCalls, toolResults }) => { emitToolEndFromStepFinish(this.neurolink?.getEventEmitter(), toolResults); this.handleToolExecutionStorage(toolCalls, toolResults, options, new Date()).catch((error) => { logger.warn("[LMStudioProvider] Failed to store tool executions", { provider: this.providerName, error: error instanceof Error ? error.message : String(error), }); }); }, }); timeoutController?.cleanup(); const transformedStream = this.createTextStream(result); const analyticsPromise = streamAnalyticsCollector.createAnalytics(this.providerName, this.modelName || this.discoveredModel || FALLBACK_MODEL, toAnalyticsStreamResult(result), Date.now() - startTime, { requestId: `lmstudio-stream-${Date.now()}`, streamingMode: true, }); return { stream: transformedStream, provider: this.providerName, model: this.modelName || this.discoveredModel || FALLBACK_MODEL, analytics: analyticsPromise, metadata: { startTime, streamId: `lmstudio-${Date.now()}` }, }; } catch (error) { timeoutController?.cleanup(); throw this.handleProviderError(error); } } getProviderName() { return this.providerName; } getDefaultModel() { return process.env.LM_STUDIO_MODEL || ""; } formatProviderError(error) { if (error instanceof TimeoutError) { return new NetworkError(`Request timed out: ${error.message}`, "lm-studio"); } const errorRecord = error; const message = typeof errorRecord?.message === "string" ? errorRecord.message : "Unknown error"; const cause = errorRecord?.cause ?? {}; const code = (errorRecord?.code ?? cause?.code); if (code === "ECONNREFUSED" || message.includes("ECONNREFUSED") || message.includes("Failed to fetch") || message.includes("fetch failed")) { return new NetworkError(`LM Studio server not reachable at ${this.baseURL}. ` + `Open the LM Studio app, load a model, and click "Start Server".`, "lm-studio"); } if (message.includes("model_not_found") || message.includes("404")) { return new InvalidModelError(`LM Studio model '${this.modelName}' is not loaded. Load it in the LM Studio app first.`, "lm-studio"); } return new ProviderError(`LM Studio error: ${message}`, "lm-studio"); } async validateConfiguration() { try { const url = `${this.baseURL.replace(/\/$/, "")}/models`; const proxyFetch = createProxyFetch(); const r = await proxyFetch(url, { headers: this.apiKey && this.apiKey !== LM_STUDIO_PLACEHOLDER_KEY ? { Authorization: `Bearer ${this.apiKey}` } : undefined, signal: AbortSignal.timeout(5000), }); if (!r.ok) { return false; } // A 200 with an empty data array means LM Studio is up but no model is // loaded — `getAISDKModel()` will fall back to FALLBACK_MODEL and the // first real request will fail. Require at least one loaded model so // health checks honestly reflect whether the provider is usable. const data = (await r.json().catch(() => null)); return Boolean(data?.data?.some((m) => typeof m?.id === "string" && m.id.trim().length > 0)); } catch { return false; } } getConfiguration() { return { provider: this.providerName, model: this.modelName || this.discoveredModel || FALLBACK_MODEL, defaultModel: this.getDefaultModel(), baseURL: this.baseURL, }; } } export default LMStudioProvider;