@juspay/neurolink

Version:

Universal AI Development Platform with working MCP integration, multi-provider support, voice (TTS/STT/realtime), and professional CLI. 58+ external MCP servers discoverable, multimodal file processing, RAG pipelines. Build, test, and deploy AI applicatio

github.com/juspay/neurolink

juspay/neurolink

1,190 lines (1,189 loc) • 43.7 kB

TypeScript

import type { Schema, Tool, ToolChoice, StepResult, LanguageModel } from "ai"; import type { AIProviderName } from "../constants/enums.js"; import type { RAGConfig } from "./rag.js"; import type { AnalyticsData, TokenUsage } from "./analytics.js"; import type { JsonValue } from "./common.js"; import type { Content, ImageWithAltText } from "./content.js"; import type { ChatMessage, ConversationMemoryConfig } from "./conversation.js"; import type { EvaluationData } from "./evaluation.js"; import type { MiddlewareFactoryOptions, OnFinishCallback, OnErrorCallback } from "./middleware.js"; import type { DirectorModeOptions, DirectorSegment, VideoGenerationResult, VideoOutputOptions } from "./multimodal.js"; import type { PPTGenerationResult, PPTOutputOptions } from "./ppt.js"; import type { TTSOptions, TTSResult } from "./tts.js"; import type { STTOptions, STTResult } from "./stt.js"; import type { AvatarOptions, AvatarResult } from "./avatar.js"; import type { MusicOptions, MusicResult } from "./music.js"; import type { StandardRecord, ValidationSchema, ZodUnknownSchema } from "./aliases.js"; import type { NeurolinkCredentials } from "./providers.js"; import type { FileWithMetadata } from "./file.js"; import type { WorkflowConfig } from "./workflow.js"; /** * Generate function options type - Primary method for content generation * Supports multimodal content while maintaining backward compatibility */ export type GenerateOptions = { /** * Input content for generation. Optional for media-only modes (avatar, music, * video) where all configuration lives in `output`; the SDK synthesises an * empty `input` automatically when this field is omitted. */ input?: { /** Prompt text. Optional for media-only modes (avatar, music) that are driven by uploaded files rather than a prompt. */ text?: string; /** * Images to include in the request. * Supports simple image data (Buffer, string) or objects with alt text for accessibility. * * @example Simple usage * ```typescript * images: [imageBuffer, "https://example.com/image.jpg"] * ``` * * @example With alt text for accessibility * ```typescript * images: [ * { data: imageBuffer, altText: "Product screenshot showing main dashboard" }, * { data: "https://example.com/chart.png", altText: "Sales chart for Q3 2024" } * ] * ``` */ images?: Array<Buffer | string | ImageWithAltText>; csvFiles?: Array<Buffer | string>; pdfFiles?: Array<Buffer | string>; videoFiles?: Array<Buffer | string>; files?: Array<Buffer | string | FileWithMetadata>; content?: Content[]; /** * Director Mode segments. When provided, Director Mode is activated automatically. * Each segment contains its own prompt and image. * Must contain 2-10 segments. */ segments?: DirectorSegment[]; }; /** * Output configuration options * * @example Text output (default) * ```typescript * output: { format: "text" } * ``` * * @example Video generation with Veo 3.1 * ```typescript * output: { * mode: "video", * video: { * resolution: "1080p", * length: 8, * aspectRatio: "16:9", * audio: true * } * } * ``` */ output?: { /** Output format for text generation */ format?: "text" | "structured" | "json"; /** * Output mode - determines the type of content generated * - "text": Standard text generation (default) * - "video": Video generation using models like Veo 3.1 * - "ppt": PowerPoint presentation generation * - "avatar": Talking-head / lip-sync video (D-ID, HeyGen, Replicate-MuseTalk) * - "music": Music / sound generation (Beatoven, ElevenLabs Music, Lyria, Replicate) */ mode?: "text" | "video" | "ppt" | "avatar" | "music"; /** * Video generation configuration (used when mode is "video") * Requires an input image and text prompt */ video?: VideoOutputOptions; /** * PowerPoint generation configuration (used when mode is "ppt") * Generates slides based on text prompt */ ppt?: PPTOutputOptions; /** * Director Mode configuration (only used when input.segments is provided) * Controls transition prompts, durations, and concurrency. */ director?: DirectorModeOptions; /** * Avatar generation configuration (used when mode is "avatar") * Combines a portrait image with audio (or text via TTS) to produce * a lip-synced talking-head video. */ avatar?: AvatarOptions; /** * Music generation configuration (used when mode is "music") * Generates music / sound from a text prompt. */ music?: MusicOptions; }; csvOptions?: { maxRows?: number; formatStyle?: "raw" | "markdown" | "json"; includeHeaders?: boolean; }; videoOptions?: { frames?: number; quality?: number; format?: "jpeg" | "png"; transcribeAudio?: boolean; }; /** * Text-to-Speech (TTS) configuration * * Enable audio generation from the text response. The generated audio will be * returned in the result's `audio` field as a TTSResult object. * * @example Basic TTS * ```typescript * const result = await neurolink.generate({ * input: { text: "Tell me a story" }, * provider: "google-ai", * tts: { enabled: true, voice: "en-US-Neural2-C" } * }); * console.log(result.audio?.buffer); // Audio Buffer * ``` * * @example Advanced TTS with options * ```typescript * const result = await neurolink.generate({ * input: { text: "Speak slowly and clearly" }, * provider: "google-ai", * tts: { * enabled: true, * voice: "en-US-Neural2-D", * speed: 0.8, * pitch: 2.0, * format: "mp3", * quality: "standard" * } * }); * ``` */ tts?: TTSOptions; /** * Speech-to-Text (STT) configuration * * Enable audio transcription. When enabled, the audio provided via `stt.audio` * will be transcribed to text and used as the prompt. * * @example * ```typescript * const neurolink = new NeuroLink(); * const result = await neurolink.generate({ * input: { text: "" }, * provider: "openai", * stt: { enabled: true, provider: "whisper", language: "en-US", audio: audioBuffer } * }); * // STT transcribes the audio, result.transcription contains the transcription * ``` */ stt?: STTOptions & { provider?: string; audio?: Buffer | ArrayBuffer; }; /** * Thinking/reasoning configuration for extended thinking models * * Enables extended thinking capabilities for supported models. * * **Gemini 3 Models** (gemini-3.1-pro-preview, gemini-3-flash-preview): * Use `thinkingLevel` to control reasoning depth: * - `minimal` - Near-zero thinking (Flash only) * - `low` - Fast reasoning for simple tasks * - `medium` - Balanced reasoning/latency * - `high` - Maximum reasoning depth (default for Pro) * * **Anthropic Claude** (claude-3-7-sonnet, etc.): * Use `budgetTokens` to set token budget for thinking. * * @example Gemini 3 with thinking level * ```typescript * const result = await neurolink.generate({ * input: { text: "Solve this complex problem..." }, * provider: "google-ai", * model: "gemini-3.1-pro-preview", * thinkingConfig: { * thinkingLevel: "high" * } * }); * ``` * * @example Anthropic with budget tokens * ```typescript * const result = await neurolink.generate({ * input: { text: "Solve this complex math problem..." }, * provider: "anthropic", * model: "claude-3-7-sonnet-20250219", * thinkingConfig: { * enabled: true, * budgetTokens: 10000 * } * }); * ``` */ thinkingConfig?: { enabled?: boolean; type?: "enabled" | "disabled"; /** Token budget for thinking (Anthropic models) */ budgetTokens?: number; /** Thinking level for Gemini 3 models: minimal, low, medium, high */ thinkingLevel?: "minimal" | "low" | "medium" | "high"; }; provider?: AIProviderName | string; model?: string; region?: string; temperature?: number; maxTokens?: number; /** Top-p (nucleus) sampling parameter. Controls diversity of generated tokens. */ topP?: number; /** Top-k sampling parameter. Limits the number of tokens considered. (Google/Gemini models only) */ topK?: number; /** Stop sequences that will halt generation when encountered. */ stopSequences?: string[]; systemPrompt?: string; /** * Zod schema for structured output validation * * @important Google Gemini Limitation * Google Vertex AI and Google AI Studio cannot combine function calling with * structured output. You MUST use `disableTools: true` when using schemas with * Google providers. * * Error without disableTools: "Function calling with a response mime type: * 'application/json' is unsupported" * * This is a documented Google API limitation, not a NeuroLink bug. * All frameworks (LangChain, Vercel AI SDK, Agno, Instructor) use this approach. * * @example * ```typescript * // ✅ Correct for Google providers * const result = await neurolink.generate({ * schema: MySchema, * provider: "vertex", * disableTools: true // Required for Google * }); * * // ✅ No restriction for other providers * const result = await neurolink.generate({ * schema: MySchema, * provider: "openai" // Works without disableTools * }); * ``` * * @see https://ai.google.dev/gemini-api/docs/function-calling */ schema?: ValidationSchema; tools?: Record<string, Tool>; /** * Filter available tools by name. * Only tools with names in this array will be made available. * Used by dynamic arguments to dynamically select which tools to enable. * * @example * ```typescript * await neurolink.generate({ * input: { text: "Search for information" }, * enabledToolNames: ["websearchGrounding", "readFile"] * }); * ``` */ enabledToolNames?: string[]; timeout?: number | string; /** AbortSignal for external cancellation of the AI call */ abortSignal?: AbortSignal; /** * Disable tool execution (including built-in tools) * * @required For Google Gemini providers when using schemas * Google Vertex AI and Google AI Studio require this flag when using * structured output (schemas) due to Google API limitations. * * @example * ```typescript * // Required for Google providers with schemas * await neurolink.generate({ * schema: MySchema, * provider: "vertex", * disableTools: true * }); * ``` */ disableTools?: boolean; /** Include only these tools by name (whitelist). If set, only matching tools are available. */ toolFilter?: string[]; /** Exclude these tools by name (blacklist). Applied after toolFilter. */ excludeTools?: string[]; /** * Skip injecting tool schemas into the system prompt. * When true, tools are ONLY passed natively via the provider's `tools` parameter, * avoiding duplicate tool definitions (~30K tokens savings per call). * Default: false (backward compatible — tool schemas are injected into system prompt). */ skipToolPromptInjection?: boolean; /** Disable tool result caching for this request (overrides global mcp.cache.enabled) */ disableToolCache?: boolean; /** Maximum number of tool execution steps (default: 200) */ maxSteps?: number; /** * Tool choice configuration for the generation. * Controls whether and which tools the model must call. * * - `"auto"` (default): the model can choose whether and which tools to call * - `"none"`: no tool calls allowed * - `"required"`: the model must call at least one tool * - `{ type: "tool", toolName: string }`: the model must call the specified tool * * Note: When used without `prepareStep`, this applies to **every step** in the * `maxSteps` loop. Using `"required"` or `{ type: "tool" }` without `prepareStep` * will cause infinite tool calls until `maxSteps` is exhausted. */ toolChoice?: ToolChoice<Record<string, Tool>>; /** * Optional callback that runs before each step in a multi-step generation. * Allows dynamically changing `toolChoice` and available tools per step. * * This is the recommended way to enforce specific tool calls on certain steps * while allowing the model freedom on others. * * Maps to Vercel AI SDK's `experimental_prepareStep`. * * @example Force a specific tool on step 0, then switch to auto: * ```typescript * prepareStep: ({ stepNumber, steps }) => { * if (stepNumber === 0) { * return { * toolChoice: { type: 'tool', toolName: 'myTool' } * }; * } * return { toolChoice: 'auto' }; * } * ``` * * @see https://ai-sdk.dev/docs/reference/ai-sdk-core/generate-text#parameters */ prepareStep?: (options: { steps: StepResult<Record<string, Tool>>[]; stepNumber: number; maxSteps: number; model: LanguageModel; }) => PromiseLike<{ model?: LanguageModel; toolChoice?: ToolChoice<Record<string, Tool>>; experimental_activeTools?: string[]; } | undefined>; enableEvaluation?: boolean; enableAnalytics?: boolean; context?: StandardRecord; evaluationDomain?: string; toolUsageContext?: string; /** * @deprecated Use `conversationMessages` instead. This field uses a simple `{role, content}` shape * that is not consumed by `buildMessagesArray()` — messages passed here will NOT reach the AI model * as proper conversation turns. `conversationMessages` uses the full `ChatMessage` type and is * correctly wired through the entire generate pipeline. */ conversationHistory?: Array<{ role: string; content: string; }>; /** * Previous conversation as a ChatMessage array. * Messages are injected as proper multi-turn conversation history before the current prompt, * so the AI model sees them as real prior exchanges (not text dumped into the prompt). * Used by task continuation mode and available to external callers. */ conversationMessages?: ChatMessage[]; factoryConfig?: { domainType?: string; domainConfig?: StandardRecord; enhancementType?: "domain-configuration" | "streaming-optimization" | "mcp-integration" | "legacy-migration" | "context-conversion"; preserveLegacyFields?: boolean; validateDomainData?: boolean; }; streaming?: { enabled?: boolean; chunkSize?: number; bufferSize?: number; enableProgress?: boolean; fallbackToGenerate?: boolean; }; workflow?: string; workflowConfig?: WorkflowConfig; /** * RAG (Retrieval-Augmented Generation) configuration. * * When provided, NeuroLink automatically loads the specified files, chunks them, * generates embeddings, and creates a search tool that the AI model can invoke * on demand to find relevant context before answering. * * @example Basic RAG * ```typescript * const result = await neurolink.generate({ * input: { text: "What is RAG?" }, * provider: "vertex", * rag: { * files: ["./docs/guide.md"], * } * }); * ``` * * @example Advanced RAG with options * ```typescript * const result = await neurolink.generate({ * input: { text: "Explain chunking strategies" }, * provider: "vertex", * rag: { * files: ["./docs/guide.md", "./docs/api.md"], * strategy: "markdown", * chunkSize: 512, * topK: 5, * } * }); * ``` */ rag?: RAGConfig; /** * Maximum budget in USD for this session. When the accumulated cost of all * generate() calls on this NeuroLink instance exceeds this value, subsequent * calls will throw a budget-exceeded error before making the API request. * * @example * ```typescript * const result = await neurolink.generate({ * input: { text: "Summarize this" }, * maxBudgetUsd: 1.00 * }); * ``` */ maxBudgetUsd?: number; /** * Optional request identifier for observability and log correlation. * When provided, this ID is forwarded to spans, logs, and telemetry so * callers can correlate generation traces back to their own request lifecycle. */ requestId?: string; /** * File reference registry for on-demand file processing. * * When set, files above the "tiny" size tier (>10KB) will be registered * as lightweight references instead of being fully loaded into the prompt. * The LLM can then access file content on-demand via file tools * (list_attached_files, read_file_section, search_in_file). * * @internal Set by NeuroLink SDK — not typically used directly by consumers. */ fileRegistry?: unknown; /** Per-call middleware configuration. */ middleware?: MiddlewareFactoryOptions; /** Callback invoked when generation completes successfully. */ onFinish?: OnFinishCallback; /** Callback invoked when generation encounters an error. */ onError?: OnErrorCallback; /** Pre-validated user context for the request */ requestContext?: Record<string, unknown>; /** Raw auth token — validated by configured auth provider */ auth?: { token: string; }; /** * Per-provider credential overrides for this request. * Overrides instance-level credentials set in `new NeuroLink({ credentials })`. * Unset providers fall through to instance credentials, then environment variables. */ credentials?: NeurolinkCredentials; /** * Curator P2-3: per-call fallback callback. Overrides any * instance-level `providerFallback` set on `new NeuroLink({...})`. */ providerFallback?: (error: unknown) => Promise<{ provider?: string; model?: string; } | null>; /** * Curator P2-3: per-call ordered model chain. Overrides any * instance-level `modelChain`. Tried in order on model-access-denied. */ modelChain?: string[]; /** * Per-call memory control. * * Override the global memory SDK behavior for this specific call. * All flags default to `true` when the global memory SDK is enabled. * If the global memory SDK is disabled, these flags have no effect. * */ memory?: { /** Master toggle for this call. When false, both read and write are skipped. Defaults to true. */ enabled?: boolean; /** Whether to read condensed memory and prepend to prompt. Defaults to true. */ read?: boolean; /** Whether to write (add/condense) the conversation into memory after completion. Defaults to true. */ write?: boolean; /** * Additional users whose memory should be retrieved/stored alongside the primary user. * Each entry can override the condensation prompt and maxWords for that user. * Primary user is still determined by context.userId. */ additionalUsers?: AdditionalMemoryUser[]; }; }; /** * Represents an additional user whose memory should be included in a generate/stream call. * Allows per-user prompt overrides for different memory condensation strategies * (e.g. personal preferences vs org-level policies). */ export type AdditionalMemoryUser = { /** The user/owner ID to retrieve or store memory for. */ userId: string; /** * Human-readable label used in the formatted memory context. * E.g. "Organization Policy", "Team Context", "User Preferences". * If not provided, defaults to userId. */ label?: string; /** Whether to read this user's memory and include in context. Defaults to true. */ read?: boolean; /** Whether to write conversation into this user's memory. Defaults to true. */ write?: boolean; /** Custom condensation prompt for this user. Overrides the default Hippocampus prompt. */ prompt?: string; /** Max words for this user's condensed memory. Overrides the default maxWords. */ maxWords?: number; }; /** * Generate function result type - Primary output format * Future-ready for multi-modal outputs while maintaining text focus */ export type GenerateResult = { content: string; outputs?: { text: string; }; /** * Text-to-Speech audio result * * Contains the generated audio buffer and metadata when TTS is enabled. * Generated by TTSProcessor.synthesize() using the specified provider. * * @example Accessing TTS audio * ```typescript * const result = await neurolink.generate({ * input: { text: "Hello world" }, * provider: "google-ai", * tts: { enabled: true, voice: "en-US-Neural2-C" } * }); * * if (result.audio) { * console.log(`Audio size: ${result.audio.size} bytes`); * console.log(`Format: ${result.audio.format}`); * if (result.audio.duration) { * console.log(`Duration: ${result.audio.duration}s`); * } * if (result.audio.voice) { * console.log(`Voice: ${result.audio.voice}`); * } * // Save or play the audio buffer * fs.writeFileSync('output.mp3', result.audio.buffer); * } * ``` */ audio?: TTSResult; /** * Video generation result * * Contains the generated video buffer and metadata when video mode is enabled. * Present when `output.mode` is set to "video" in GenerateOptions. * * @example Accessing generated video * ```typescript * const result = await neurolink.generate({ * input: { text: "Product showcase", images: [imageBuffer] }, * provider: "vertex", * model: "veo-3.1", * output: { mode: "video", video: { resolution: "1080p" } } * }); * * if (result.video) { * fs.writeFileSync('output.mp4', result.video.data); * console.log(`Duration: ${result.video.metadata?.duration}s`); * console.log(`Dimensions: ${result.video.metadata?.dimensions?.width}x${result.video.metadata?.dimensions?.height}`); * } * ``` */ video?: VideoGenerationResult; /** * Avatar (talking-head) generation result (present when output.mode is "avatar") */ avatar?: AvatarResult; /** * Music generation result (present when output.mode is "music") */ music?: MusicResult; /** * PowerPoint generation result (present when output.mode is "ppt") * * @example * ```typescript * const result = await neurolink.generate({ * input: { text: "Introducing Our New Product" }, * model: "gemini-pro", * output: { mode: "ppt", ppt: { pages: 10, theme: "modern" } } * }); * * if (result.ppt) { * console.log(`Generated ${result.ppt.totalSlides} slides`); * console.log(`Saved at: ${result.ppt.filePath}`); * } * ``` */ ppt?: PPTGenerationResult; imageOutput?: { base64: string; } | null; provider?: string; model?: string; finishReason?: string; usage?: TokenUsage; responseTime?: number; toolCalls?: Array<{ toolCallId: string; toolName: string; args: StandardRecord; }>; toolResults?: unknown[]; toolsUsed?: string[]; toolExecutions?: Array<{ name: string; input: StandardRecord; output: unknown; }>; enhancedWithTools?: boolean; availableTools?: Array<{ name: string; description: string; parameters: StandardRecord; }>; analytics?: AnalyticsData; evaluation?: EvaluationData; factoryMetadata?: { enhancementApplied: boolean; enhancementType?: string; domainType?: string; processingTime?: number; configurationUsed?: StandardRecord; migrationPerformed?: boolean; legacyFieldsPreserved?: boolean; }; streamingMetadata?: { streamingUsed: boolean; fallbackToGenerate?: boolean; chunkCount?: number; streamingDuration?: number; streamId?: string; bufferOptimization?: boolean; }; workflow?: { originalResponse: string; processedResponse: string; ensembleResponses: Array<{ provider: string; model: string; content: string; responseTime: number; status: "success" | "failure" | "timeout" | "partial"; error?: string; }>; judgeScores?: { scores: Record<string, number>; reasoning?: string; selectedModel: string; }; selectedModel: string; metrics: { totalTime: number; ensembleTime: number; judgeTime?: number; conditioningTime?: number; }; workflowId: string; workflowName: string; }; /** Thinking/reasoning text from provider (Anthropic thinking blocks, Gemini thought parts) */ reasoning?: string; /** Token count for reasoning content */ reasoningTokens?: number; /** STT transcription result (present when stt.enabled is true and audio input was provided) */ transcription?: STTResult; retries?: { count: number; errors: Array<{ code: string; message: string; }>; }; }; /** * Unified options for both generation and streaming * Supports factory patterns and domain configuration */ export type UnifiedGenerationOptions = GenerateOptions & { preferStreaming?: boolean; streamingFallback?: boolean; }; /** * Enhanced provider type with generate method */ export type EnhancedProvider = { generate(options: GenerateOptions): Promise<GenerateResult>; getName(): string; isAvailable(): Promise<boolean>; }; /** * Factory-enhanced provider type * Supports domain configuration and streaming optimizations */ export type FactoryEnhancedProvider = EnhancedProvider & { generateWithFactory(options: UnifiedGenerationOptions): Promise<GenerateResult>; getDomainSupport(): string[]; getStreamingCapabilities(): { supportsStreaming: boolean; maxChunkSize: number; bufferOptimizations: boolean; }; }; /** * Text generation options type (consolidated from core types) * Extended to support video generation mode */ export type TextGenerationOptions = { prompt?: string; /** * Alternative input format for multimodal SDK operations. * * NOTE: This field is only used by the higher-level `generate()` API * (NeuroLink.generate, BaseProvider.generate). Legacy `generateText()` * callers must still use the `prompt` field directly. * * Supports text, images, and other multimodal inputs. */ input?: { /** Prompt text. Optional for media-only modes (avatar, music) that are driven by uploaded files rather than a prompt. */ text?: string; /** * Images to include in the request. * For video generation, the first image is used as the source frame. */ images?: Array<Buffer | string | ImageWithAltText>; pdfFiles?: Array<Buffer | string>; files?: Array<Buffer | string | FileWithMetadata>; /** Director Mode segments (2-10). When provided, Director Mode is activated. */ segments?: DirectorSegment[]; }; provider?: AIProviderName; model?: string; region?: string; temperature?: number; maxTokens?: number; /** Top-p (nucleus) sampling parameter. Controls diversity of generated tokens. */ topP?: number; /** Top-k sampling parameter. Limits the number of tokens considered. (Google/Gemini models only) */ topK?: number; /** Stop sequences that will halt generation when encountered. */ stopSequences?: string[]; systemPrompt?: string; schema?: ZodUnknownSchema | Schema<unknown>; /** * Output configuration options * * @example Video generation * ```typescript * output: { * mode: "video", * video: { resolution: "1080p", length: 8 } * } * ``` */ output?: { format?: "text" | "structured" | "json"; /** * Output mode - determines the type of content generated * - "text": Standard text generation (default) * - "video": Video generation using models like Veo 3.1 * - "ppt": PowerPoint presentation generation * - "avatar": Talking-head / lip-sync video (D-ID, HeyGen, Replicate-MuseTalk) * - "music": Music / sound generation (Beatoven, ElevenLabs Music, Lyria, Replicate) */ mode?: "text" | "video" | "ppt" | "avatar" | "music"; /** * Video generation configuration (used when mode is "video") */ video?: VideoOutputOptions; /** * PowerPoint generation configuration (used when mode is "ppt") */ ppt?: PPTOutputOptions; /** * Director Mode configuration (only used when input.segments is provided) */ director?: DirectorModeOptions; /** * Avatar generation configuration (used when mode is "avatar") */ avatar?: AvatarOptions; /** * Music generation configuration (used when mode is "music") */ music?: MusicOptions; }; tools?: Record<string, Tool>; /** * Filter available tools by name. * Only tools with names in this array will be made available. * Used by dynamic arguments to dynamically select which tools to enable. * Merged into `toolFilter` before tool filtering runs. * * @example * ```typescript * await neurolink.generate({ * input: { text: "Search for information" }, * enabledToolNames: ["websearchGrounding", "readFile"] * }); * ``` */ enabledToolNames?: string[]; timeout?: number | string; /** AbortSignal for external cancellation of the AI call */ abortSignal?: AbortSignal; disableTools?: boolean; maxSteps?: number; /** Include only these tools by name (whitelist). If set, only matching tools are available. */ toolFilter?: string[]; /** Exclude these tools by name (blacklist). Applied after toolFilter. */ excludeTools?: string[]; /** Disable tool result caching for this request (overrides global mcp.cache.enabled) */ disableToolCache?: boolean; /** * Tool choice configuration for the generation. * Controls whether and which tools the model must call. * * - `"auto"` (default): the model can choose whether and which tools to call * - `"none"`: no tool calls allowed * - `"required"`: the model must call at least one tool * - `{ type: "tool", toolName: string }`: the model must call the specified tool * * Note: When used without `prepareStep`, this applies to **every step** in the * `maxSteps` loop. Using `"required"` or `{ type: "tool" }` without `prepareStep` * will cause infinite tool calls until `maxSteps` is exhausted. */ toolChoice?: ToolChoice<Record<string, Tool>>; /** * Optional callback that runs before each step in a multi-step generation. * Allows dynamically changing `toolChoice` and available tools per step. * * This is the recommended way to enforce specific tool calls on certain steps * while allowing the model freedom on others. * * Maps to Vercel AI SDK's `experimental_prepareStep`. * * @example Force a specific tool on step 0, then switch to auto: * ```typescript * prepareStep: ({ stepNumber, steps }) => { * if (stepNumber === 0) { * return { * toolChoice: { type: 'tool', toolName: 'myTool' } * }; * } * return { toolChoice: 'auto' }; * } * ``` * * @see https://ai-sdk.dev/docs/reference/ai-sdk-core/generate-text#parameters */ prepareStep?: (options: { steps: StepResult<Record<string, Tool>>[]; stepNumber: number; maxSteps: number; model: LanguageModel; }) => PromiseLike<{ model?: LanguageModel; toolChoice?: ToolChoice<Record<string, Tool>>; experimental_activeTools?: string[]; } | undefined>; /** * Text-to-Speech (TTS) configuration * * Enable audio generation from text. Behavior depends on useAiResponse flag: * - When useAiResponse is false/undefined (default): TTS synthesizes the input text directly * - When useAiResponse is true: TTS synthesizes the AI-generated response * * @example Using input text (default) * ```typescript * const neurolink = new NeuroLink(); * const result = await neurolink.generate({ * input: { text: "Hello world" }, * provider: "google-ai", * tts: { enabled: true, voice: "en-US-Neural2-C" } * }); * // TTS synthesizes "Hello world" directly, no AI generation * ``` * * @example Using AI response * ```typescript * const neurolink = new NeuroLink(); * const result = await neurolink.generate({ * input: { text: "Tell me a joke" }, * provider: "google-ai", * tts: { enabled: true, useAiResponse: true, voice: "en-US-Neural2-C" } * }); * // AI generates the joke, then TTS synthesizes the AI's response * ``` */ tts?: TTSOptions; /** * Speech-to-Text (STT) configuration * * Enable audio transcription. When enabled, the audio provided via `stt.audio` * will be transcribed to text and used as the prompt. * * @example * ```typescript * const neurolink = new NeuroLink(); * const result = await neurolink.generate({ * input: { text: "" }, * provider: "openai", * stt: { enabled: true, provider: "whisper", language: "en-US", audio: audioBuffer } * }); * // STT transcribes the audio, result.transcription contains the transcription * ``` */ stt?: STTOptions & { provider?: string; audio?: Buffer | ArrayBuffer; }; enableEvaluation?: boolean; enableAnalytics?: boolean; context?: Record<string, JsonValue>; evaluationDomain?: string; toolUsageContext?: string; conversationHistory?: Array<{ role: string; content: string; }>; conversationMessages?: ChatMessage[]; conversationMemoryConfig?: Partial<ConversationMemoryConfig>; originalPrompt?: string; middleware?: MiddlewareFactoryOptions; onFinish?: OnFinishCallback; onError?: OnErrorCallback; expectedOutcome?: string; evaluationCriteria?: string[]; csvOptions?: { maxRows?: number; formatStyle?: "raw" | "markdown" | "json"; includeHeaders?: boolean; }; enableSummarization?: boolean; /** * File reference registry for on-demand file processing (internal). * * When set, files above the "tiny" size tier (>10KB) will be registered * as lightweight references instead of being fully loaded into the prompt. * The LLM can then access file content on-demand via file tools. * * @internal Set by NeuroLink SDK — not typically used directly by consumers. */ fileRegistry?: unknown; /** * Skip injecting tool schemas into the system prompt. * When true, tools are ONLY passed natively via the provider's `tools` parameter, * avoiding duplicate tool definitions (~30K tokens savings per call). * Default: false (backward compatible — tool schemas are injected into system prompt). */ skipToolPromptInjection?: boolean; /** * ## Extended Thinking Options * * NeuroLink provides multiple ways to configure extended thinking/reasoning. * These options interact as follows: * * ### Option Hierarchy (Priority: thinkingConfig > individual options) * * 1. **`thinkingConfig`** (recommended) - Full configuration object, highest priority * 2. **`thinking`**, **`thinkingBudget`**, **`thinkingLevel`** - Simplified CLI-friendly options * * When both are provided, `thinkingConfig` takes precedence. The simplified options * are automatically merged into `thinkingConfig` internally. * * ### Provider-Specific Behavior * * **Anthropic Claude (claude-3-7-sonnet, etc.):** * - Use `thinkingConfig.budgetTokens` or `thinkingBudget` * - Range: 5000-100000 tokens * - `thinkingLevel` is ignored for Anthropic * * **Google Gemini 3 (gemini-3.1-pro-preview, gemini-3-flash-preview):** * - Use `thinkingConfig.thinkingLevel` or `thinkingLevel` * - Levels: minimal, low, medium, high * - `budgetTokens` is ignored for Gemini (uses level-based allocation) * * ### Option Compatibility Matrix * * | Option | Anthropic | Gemini 3 | Other Providers | * |----------------|-----------|----------|-----------------| * | thinking | Yes | Yes | Ignored | * | thinkingBudget | Yes | Ignored | Ignored | * | thinkingLevel | Ignored | Yes | Ignored | * | thinkingConfig | Yes | Yes | Ignored | * * ### Examples * * ```typescript * // Simplified (CLI-friendly) - Anthropic * { thinking: true, thinkingBudget: 10000 } * * // Simplified (CLI-friendly) - Gemini 3 * { thinking: true, thinkingLevel: "high" } * * // Full config (recommended for SDK) * { thinkingConfig: { enabled: true, budgetTokens: 10000 } } // Anthropic * { thinkingConfig: { thinkingLevel: "high" } } // Gemini 3 * ``` */ /** * Enable extended thinking capability (simplified option). * Equivalent to `thinkingConfig.enabled = true`. * Works with both Anthropic and Gemini 3 models. */ thinking?: boolean; /** * Token budget for thinking (Anthropic models only). * Equivalent to `thinkingConfig.budgetTokens`. * Range: 5000-100000 tokens. Ignored for Gemini models. */ thinkingBudget?: number; /** * Thinking level for Gemini 3 models only. * Equivalent to `thinkingConfig.thinkingLevel`. * - `minimal` - Near-zero thinking (Flash only) * - `low` - Light reasoning * - `medium` - Balanced reasoning/latency * - `high` - Deep reasoning (Pro default) * Ignored for Anthropic models. */ thinkingLevel?: "minimal" | "low" | "medium" | "high"; /** * Full thinking/reasoning configuration (recommended for SDK usage). * Takes precedence over simplified options (thinking, thinkingBudget, thinkingLevel). * * @see Above documentation for provider-specific behavior and option compatibility. */ thinkingConfig?: { /** Enable extended thinking. Default: false */ enabled?: boolean; /** Explicit enable/disable type. Alternative to `enabled` boolean. */ type?: "enabled" | "disabled"; /** Token budget for thinking (Anthropic: 5000-100000). Ignored for Gemini. */ budgetTokens?: number; /** Thinking level (Gemini 3: minimal|low|medium|high). Ignored for Anthropic. */ thinkingLevel?: "minimal" | "low" | "medium" | "high"; }; /** * Per-provider credential overrides for this request. * Overrides instance-level credentials set in `new NeuroLink({ credentials })`. * Unset providers fall through to instance credentials, then environment variables. */ credentials?: NeurolinkCredentials; /** * Optional request identifier for observability and log correlation. * When provided, this ID is forwarded to spans, logs, and telemetry so * callers can correlate generation traces back to their own request lifecycle. */ requestId?: string; }; /** * Text generation result (consolidated from core types) */ export type TextGenerationResult = { content: string; finishReason?: string; provider?: string; model?: string; usage?: TokenUsage; responseTime?: number; toolsUsed?: string[]; toolExecutions?: Array<{ toolName: string; executionTime: number; success: boolean; serverId?: string; }>; enhancedWithTools?: boolean; availableTools?: Array<{ name: string; description: string; server: string; category?: string; }>; analytics?: AnalyticsData; evaluation?: EvaluationData; audio?: TTSResult; /** STT transcription result (present when stt input was processed) */ transcription?: STTResult; /** Video generation result */ video?: VideoGenerationResult; /** Avatar (talking-head) generation result */ avatar?: AvatarResult; /** Music generation result */ music?: MusicResult; /** PowerPoint generation result */ ppt?: PPTGenerationResult; /** Image generation output */ imageOutput?: { base64: string; } | null; /** Gemini 3 thought signature for reasoning continuity across turns */ thoughtSignature?: string; /** Thinking/reasoning text from provider (Anthropic thinking blocks, Gemini thought parts, DeepSeek/NIM reasoning_content) */ reasoning?: string; /** Token count for reasoning content */ reasoningTokens?: number; retries?: { count: number; errors: Array<{ code: string; message: string; }>; }; }; /** * Enhanced result type with optional analytics/evaluation */ export type EnhancedGenerateResult = GenerateResult & { analytics?: AnalyticsData; evaluation?: EvaluationData; }; /** * NL-004: Model alias/deprecation configuration. * Allows mapping deprecated model names to their replacements. */ export type ModelAliasConfig = { aliases: Record<string, { target: string; action: "warn" | "redirect" | "block"; reason?: string; }>; }; /** * Internal alias used by messageBuilder helpers after the entry-point * (`buildMultimodalMessagesArray`) has guaranteed that `input` is non-null. * All private helper functions that receive post-normalised options should * accept this type to avoid repetitive null checks on every `input.*` access. */ export type GenerateOptionsNormalized = GenerateOptions & { input: NonNullable<GenerateOptions["input"]>; };