@juspay/neurolink
Version:
Universal AI Development Platform with working MCP integration, multi-provider support, voice (TTS/STT/realtime), and professional CLI. 58+ external MCP servers discoverable, multimodal file processing, RAG pipelines. Build, test, and deploy AI applicatio
273 lines (272 loc) • 13.6 kB
JavaScript
import { createOpenAI } from "@ai-sdk/openai";
import { stepCountIs, streamText } from "ai";
import { BaseProvider } from "../core/baseProvider.js";
import { DEFAULT_MAX_STEPS } from "../core/constants.js";
import { streamAnalyticsCollector } from "../core/streamAnalytics.js";
import { isNeuroLink } from "../neurolink.js";
import { createProxyFetch } from "../proxy/proxyFetch.js";
import { createLoggingFetch } from "../utils/loggingFetch.js";
import { tracers, ATTR, withClientStreamSpan } from "../telemetry/index.js";
import { InvalidModelError, NetworkError, ProviderError, } from "../types/index.js";
import { logger } from "../utils/logger.js";
import { composeAbortSignals, createTimeoutController, TimeoutError, } from "../utils/timeout.js";
import { emitToolEndFromStepFinish } from "../utils/toolEndEmitter.js";
import { resolveToolChoice } from "../utils/toolChoice.js";
import { toAnalyticsStreamResult } from "./providerTypeUtils.js";
const LM_STUDIO_DEFAULT_BASE_URL = "http://localhost:1234/v1";
const LM_STUDIO_PLACEHOLDER_KEY = "lm-studio";
const FALLBACK_MODEL = "local-model";
const getLmStudioBaseURL = () => {
return process.env.LM_STUDIO_BASE_URL || LM_STUDIO_DEFAULT_BASE_URL;
};
/**
* LM Studio Provider
* Wraps the LM Studio local server (https://lmstudio.ai/) which exposes an
* OpenAI-compatible API at http://localhost:1234/v1 by default.
* Auto-discovers the loaded model via /v1/models if no model specified.
*/
export class LMStudioProvider extends BaseProvider {
model;
// The model name passed by the caller — never overwritten by auto-discovery,
// so a discovery-miss FALLBACK_MODEL never poisons the next call's branch
// through `if (explicit && explicit.trim() !== "")`.
requestedModelName;
baseURL;
apiKey;
discoveredModel;
lmstudioClient;
constructor(modelName, sdk, _region, credentials) {
const validatedNeurolink = isNeuroLink(sdk) ? sdk : undefined;
super(modelName, "lm-studio", validatedNeurolink);
this.requestedModelName = modelName;
this.baseURL = credentials?.baseURL ?? getLmStudioBaseURL();
// LM Studio's local server doesn't authenticate, but the AI SDK's
// createOpenAI() requires an apiKey. Allow override via credentials/env
// for users who run LM Studio behind an auth-proxying reverse-proxy.
this.apiKey =
credentials?.apiKey ??
process.env.LM_STUDIO_API_KEY ??
LM_STUDIO_PLACEHOLDER_KEY;
this.lmstudioClient = createOpenAI({
baseURL: this.baseURL,
apiKey: this.apiKey,
fetch: createLoggingFetch("lm-studio"),
});
logger.debug("LM Studio Provider initialized", {
modelName: this.modelName,
providerName: this.providerName,
baseURL: this.baseURL,
});
}
async getAvailableModels(callerSignal) {
const url = `${this.baseURL.replace(/\/$/, "")}/models`;
// Use the proxy-aware fetch + bearer auth header so users running LM
// Studio behind an auth-proxying reverse-proxy can still discover models.
// Compose the caller's request signal (per-request timeout / abort) with
// a fixed 5s discovery cap so cancellation propagates AND a hung server
// can't stall provider initialization.
const proxyFetch = createProxyFetch();
const discoveryTimeout = AbortSignal.timeout(5000);
const composedSignal = callerSignal
? AbortSignal.any([callerSignal, discoveryTimeout])
: discoveryTimeout;
const response = await proxyFetch(url, {
headers: this.apiKey && this.apiKey !== LM_STUDIO_PLACEHOLDER_KEY
? { Authorization: `Bearer ${this.apiKey}` }
: undefined,
signal: composedSignal,
});
if (!response.ok) {
throw new Error(`LM Studio /v1/models returned ${response.status}: ${response.statusText}`);
}
const data = (await response.json());
return data.data.map((m) => m.id);
}
async getAISDKModel(signal) {
if (this.model) {
return this.model;
}
let modelToUse;
let discoverySucceeded = false;
// Use requestedModelName, not this.modelName — refreshHandlersForModel()
// mutates this.modelName, so on a retry after a discovery miss the
// FALLBACK_MODEL would look like an explicit user choice and we'd never
// re-attempt /v1/models. The constructor-captured name preserves intent.
const explicit = this.requestedModelName;
if (explicit && explicit.trim() !== "") {
modelToUse = explicit;
discoverySucceeded = true; // explicit user choice — treat as success
}
else {
try {
const models = await this.getAvailableModels(signal);
if (models.length > 0) {
this.discoveredModel = models[0];
modelToUse = this.discoveredModel;
discoverySucceeded = true;
logger.info(`LM Studio auto-discovered model: ${modelToUse} (${models.length} loaded)`);
}
else {
modelToUse = FALLBACK_MODEL;
logger.warn("LM Studio /v1/models returned no models. Load a model in the LM Studio app.");
}
}
catch (error) {
logger.warn(`LM Studio model auto-discovery failed: ${error instanceof Error ? error.message : String(error)}`);
modelToUse = FALLBACK_MODEL;
}
}
// Persist resolved model on the instance and rebuild the composed
// handlers (TelemetryHandler, MessageBuilder, etc.) so pricing /
// telemetry / span attributes report the discovered model name. Plain
// assignment to `this.modelName` is not enough — handlers cached the
// pre-discovery value at construction time.
this.refreshHandlersForModel(modelToUse);
// .chat() — LM Studio exposes /v1/chat/completions, not /v1/responses
const resolvedModel = this.lmstudioClient.chat(modelToUse);
// Only memoize on actual success. After a discovery miss (server down,
// empty /v1/models, /models 5xx), starting LM Studio or loading a model
// should let the next call re-attempt discovery instead of being stuck
// on FALLBACK_MODEL for the lifetime of this provider instance.
if (discoverySucceeded) {
this.model = resolvedModel;
}
return resolvedModel;
}
async executeStream(options, _analysisSchema) {
// Resolve the LM Studio model BEFORE opening the span so OTEL
// attributes, MessageBuilder, and downstream image/tool adapters all see
// the discovered model id rather than the empty pre-discovery placeholder.
// Pass the caller's abort signal so user cancellation / per-request
// timeouts are honored during the discovery probe (not just after it).
await this.getAISDKModel(options.abortSignal);
return withClientStreamSpan({
name: "neurolink.provider.stream",
tracer: tracers.provider,
attributes: {
[ATTR.GEN_AI_SYSTEM]: "lm-studio",
[ATTR.GEN_AI_MODEL]: this.modelName || this.discoveredModel || FALLBACK_MODEL,
[ATTR.GEN_AI_OPERATION]: "stream",
[ATTR.NL_STREAM_MODE]: true,
},
}, async () => this.executeStreamInner(options), (r) => r.stream, (r, wrapped) => ({ ...r, stream: wrapped }));
}
async executeStreamInner(options) {
this.validateStreamOptions(options);
const startTime = Date.now();
const timeout = this.getTimeout(options);
const timeoutController = createTimeoutController(timeout, this.providerName, "stream");
try {
const shouldUseTools = !options.disableTools && this.supportsTools();
const tools = shouldUseTools
? options.tools || (await this.getAllTools())
: {};
// Resolve the AI SDK model BEFORE building messages so message/image
// adapters see the same handlers/model that streamText will use. Without
// this, a fallback warm-up + late-server-start pattern could build
// messages under FALLBACK_MODEL handlers and stream under a different
// discovered model — and pay an extra `/v1/models` probe each time.
const model = await this.getAISDKModelWithMiddleware(options);
const messages = await this.buildMessagesForStream(options);
const result = await streamText({
model,
messages,
temperature: options.temperature,
maxOutputTokens: options.maxTokens,
tools,
stopWhen: stepCountIs(options.maxSteps || DEFAULT_MAX_STEPS),
toolChoice: resolveToolChoice(options, tools, shouldUseTools),
abortSignal: composeAbortSignals(options.abortSignal, timeoutController?.controller.signal),
experimental_telemetry: this.telemetryHandler.getTelemetryConfig(options),
experimental_repairToolCall: this.getToolCallRepairFn(options),
onStepFinish: ({ toolCalls, toolResults }) => {
emitToolEndFromStepFinish(this.neurolink?.getEventEmitter(), toolResults);
this.handleToolExecutionStorage(toolCalls, toolResults, options, new Date()).catch((error) => {
logger.warn("[LMStudioProvider] Failed to store tool executions", {
provider: this.providerName,
error: error instanceof Error ? error.message : String(error),
});
});
},
});
timeoutController?.cleanup();
const transformedStream = this.createTextStream(result);
const analyticsPromise = streamAnalyticsCollector.createAnalytics(this.providerName, this.modelName || this.discoveredModel || FALLBACK_MODEL, toAnalyticsStreamResult(result), Date.now() - startTime, {
requestId: `lmstudio-stream-${Date.now()}`,
streamingMode: true,
});
return {
stream: transformedStream,
provider: this.providerName,
model: this.modelName || this.discoveredModel || FALLBACK_MODEL,
analytics: analyticsPromise,
metadata: { startTime, streamId: `lmstudio-${Date.now()}` },
};
}
catch (error) {
timeoutController?.cleanup();
throw this.handleProviderError(error);
}
}
getProviderName() {
return this.providerName;
}
getDefaultModel() {
return process.env.LM_STUDIO_MODEL || "";
}
formatProviderError(error) {
if (error instanceof TimeoutError) {
return new NetworkError(`Request timed out: ${error.message}`, "lm-studio");
}
const errorRecord = error;
const message = typeof errorRecord?.message === "string"
? errorRecord.message
: "Unknown error";
const cause = errorRecord?.cause ?? {};
const code = (errorRecord?.code ?? cause?.code);
if (code === "ECONNREFUSED" ||
message.includes("ECONNREFUSED") ||
message.includes("Failed to fetch") ||
message.includes("fetch failed")) {
return new NetworkError(`LM Studio server not reachable at ${this.baseURL}. ` +
`Open the LM Studio app, load a model, and click "Start Server".`, "lm-studio");
}
if (message.includes("model_not_found") || message.includes("404")) {
return new InvalidModelError(`LM Studio model '${this.modelName}' is not loaded. Load it in the LM Studio app first.`, "lm-studio");
}
return new ProviderError(`LM Studio error: ${message}`, "lm-studio");
}
async validateConfiguration() {
try {
const url = `${this.baseURL.replace(/\/$/, "")}/models`;
const proxyFetch = createProxyFetch();
const r = await proxyFetch(url, {
headers: this.apiKey && this.apiKey !== LM_STUDIO_PLACEHOLDER_KEY
? { Authorization: `Bearer ${this.apiKey}` }
: undefined,
signal: AbortSignal.timeout(5000),
});
if (!r.ok) {
return false;
}
// A 200 with an empty data array means LM Studio is up but no model is
// loaded — `getAISDKModel()` will fall back to FALLBACK_MODEL and the
// first real request will fail. Require at least one loaded model so
// health checks honestly reflect whether the provider is usable.
const data = (await r.json().catch(() => null));
return Boolean(data?.data?.some((m) => typeof m?.id === "string" && m.id.trim().length > 0));
}
catch {
return false;
}
}
getConfiguration() {
return {
provider: this.providerName,
model: this.modelName || this.discoveredModel || FALLBACK_MODEL,
defaultModel: this.getDefaultModel(),
baseURL: this.baseURL,
};
}
}
export default LMStudioProvider;