@juspay/neurolink
Version:
Universal AI Development Platform with working MCP integration, multi-provider support, voice (TTS/STT/realtime), and professional CLI. 58+ external MCP servers discoverable, multimodal file processing, RAG pipelines. Build, test, and deploy AI applicatio
340 lines (339 loc) • 14.3 kB
TypeScript
import type { ZodType } from "zod";
import { type Schema, type LanguageModel } from "ai";
import { AIProviderName } from "../constants/enums.js";
import { BaseProvider } from "../core/baseProvider.js";
import type { EnhancedGenerateResult, TextGenerationOptions, StreamOptions, StreamResult } from "../types/index.js";
/**
* Resolve the effective Vertex region for a given model.
*
* Policy (matches the bugfixes-suite contract):
* - Every Gemini model (`gemini-*`) is force-routed to the `global` endpoint
* regardless of any caller-supplied region. Regional endpoints 404 for
* Gemini 3.x previews and the regional/global behaviour for 2.x is
* consistent enough that pinning all Gemini traffic to global is the
* right safe default. The legacy `GLOBAL_LOCATION_MODELS` allowlist is
* kept as a defence-in-depth fallback so any non-`gemini-` identifiers
* that still need global (e.g. image-gen aliases) keep working.
* - Non-Gemini models (Claude on Vertex, embeddings, custom models) keep
* the caller-supplied region or fall back to env-derived defaults.
*
* @param modelName - The target model identifier.
* @param configuredLocation - Caller-provided region (e.g. options.region).
* Used as the fallback for non-Gemini models; ignored for Gemini.
* @returns The region string to pass to the @google/genai client.
*/
export declare const resolveVertexLocation: (modelName: string | undefined, configuredLocation?: string) => string;
/**
* Google Vertex AI Provider v2 - BaseProvider Implementation
*
* Features:
* - Extends BaseProvider for shared functionality
* - Preserves existing Google Cloud authentication
* - Maintains Anthropic model support via dynamic imports
* - Fresh model creation for each request
* - Enhanced error handling with setup guidance
* - Tool registration and context management
*
* @important Tools + Schema Support (Fixed)
* Gemini models on Vertex AI now support combining function calling (tools) with
* structured output (JSON schema) simultaneously. The fix works by NOT setting
* `responseMimeType: "application/json"` when tools are present, which was
* causing the Google API error.
*
* The `responseSchema` is still set to guide the output structure, allowing
* tools to execute AND the final output to follow the schema format.
*
* @example Gemini models with tools + schemas
* ```typescript
* const provider = new GoogleVertexProvider("gemini-2.5-flash");
* const result = await provider.generate({
* input: { text: "Analyze data using tools" },
* schema: MySchema,
* output: { format: "json" },
* // No need for disableTools: true anymore!
* });
* ```
*
* @example Claude models (always supported both)
* ```typescript
* const provider = new GoogleVertexProvider("claude-3-5-sonnet-20241022");
* const result = await provider.generate({
* input: { text: "Analyze data" },
* schema: MySchema,
* output: { format: "json" }
* });
* ```
*
* @note "Too many states for serving" errors can still occur with very complex schemas + tools.
* Solution: Simplify schema or reduce number of tools if this occurs.
* @see https://cloud.google.com/vertex-ai/docs/generative-ai/learn/models
*/
export declare class GoogleVertexProvider extends BaseProvider {
private projectId;
private location;
private registeredTools;
private toolContext;
private static modelConfigCache;
private static modelConfigCacheTime;
private static readonly CACHE_DURATION;
private static readonly MAX_CACHE_SIZE;
private static maxTokensCache;
private static maxTokensCacheTime;
constructor(modelName?: string, _providerName?: string, sdk?: unknown, region?: string, credentials?: Record<string, unknown>);
protected getProviderName(): AIProviderName;
protected getDefaultModel(): string;
/**
* Returns the Vercel AI SDK model instance for Google Vertex
* Creates fresh model instances for each request
*/
protected getAISDKModel(): Promise<LanguageModel>;
/**
* Initialize model creation tracking
*/
private initializeModelCreationLogging;
/**
* Check if model is Anthropic-based and attempt creation
*/
private attemptAnthropicModelCreation;
/**
* Create Google Vertex model with comprehensive logging and error handling
*/
private createGoogleVertexModel;
/**
* @deprecated This method is no longer used. All models now use native SDKs.
*/
private createVertexInstance;
/**
* Gets the appropriate model instance (Google or Anthropic)
* Uses dual provider architecture for proper model routing
* Creates fresh instances for each request to ensure proper authentication
*/
private getModel;
/**
* Validate stream options
*/
private validateStreamOptionsOnly;
protected executeStream(options: StreamOptions, _analysisSchema?: ZodType<unknown> | Schema<unknown>): Promise<StreamResult>;
/**
* Emit `stream:end` so the Pipeline B observability listener creates a
* `model.generation` span for native Vertex stream traffic. Mirrors
* `emitGenerationEnd` (used by `generate()`).
*/
private emitStreamEnd;
/**
* Create @google/genai client configured for Vertex AI
*/
private createVertexGenAIClient;
/**
* Execute stream using native @google/genai SDK for Gemini 3 models on Vertex AI
* This bypasses @ai-sdk/google-vertex to properly handle thought_signature
*/
private executeNativeGemini3Stream;
/**
* Execute generate using native @google/genai SDK for Gemini 3 models on Vertex AI
* This bypasses @ai-sdk/google-vertex to properly handle thought_signature
*/
private executeNativeGemini3Generate;
/**
* Create native AnthropicVertex client for Claude models
*/
private createAnthropicVertexClient;
/**
* Execute stream using native @anthropic-ai/vertex-sdk for Claude models on Vertex AI
* This bypasses @ai-sdk/google-vertex completely and uses Anthropic's native SDK
*/
private executeNativeAnthropicStream;
/**
* Execute generate using native @anthropic-ai/vertex-sdk for Claude models on Vertex AI
*/
private executeNativeAnthropicGenerate;
/**
* Process CSV files and append content to options.input.text
* This ensures CSV data is available in the prompt for native Gemini 3 SDK calls
* Returns a new options object with modified input (immutable pattern)
*/
private processCSVFilesForNativeSDK;
/**
* Override stream to handle image generation models
* Image models don't support streaming, so we fall back to generate
*/
stream(optionsOrPrompt: StreamOptions | string): Promise<StreamResult>;
/**
* Override generate to route ALL models to native SDKs
* No more @ai-sdk/google-vertex dependency
*/
generate(optionsOrPrompt: TextGenerationOptions | string): Promise<EnhancedGenerateResult | null>;
/**
* Invoke `options.onFinish` with the lifecycle payload shape consumers
* (and `test:middleware`) expect. Pulled out so generate / image-gen /
* Anthropic / Gemini code paths share one implementation. Errors thrown
* by the user's callback are swallowed so they cannot poison the
* primary generate path — same contract as the AI SDK middleware
* wrapGenerate uses.
*/
private fireGenerateOnFinish;
/**
* Invoke `options.onError` with the lifecycle payload shape consumers
* (and `test:middleware`) expect. Mirrors {@link fireGenerateOnFinish}.
*/
private fireGenerateOnError;
/**
* Wrap a {@link StreamResult} so each text chunk drives `options.onChunk`
* and the final yield drives `options.onFinish`. Pipeline A providers get
* this for free via the AI SDK `wrapStream` middleware; native @google/genai
* bypasses that wrapper, so native consumers need their lifecycle
* callbacks invoked from here.
*/
private wrapStreamResultWithLifecycle;
/**
* Attach `gen_ai.usage.*` and `neurolink.cost` attributes to a span.
* Pulled out so the generate / stream / image-gen paths share one
* implementation, and so observability/tracing tests find consistent
* attributes regardless of which native sub-route fulfilled the request.
*/
private attachUsageAndCostAttributes;
/**
* Emit `generation:end` so the Pipeline B observability listener creates
* the corresponding `model.generation` span. Vertex bypasses the AI SDK
* (and therefore the experimental_telemetry plumbing), so this hand-off
* is the only way native Vertex calls show up in Langfuse / Pipeline B
* exporters. Mirrors the Bedrock + Ollama pattern.
*/
private emitGenerationEnd;
protected formatProviderError(error: unknown): Error;
/**
* Memory-safe cache management for model configurations
* Implements LRU eviction to prevent memory leaks in long-running processes
*/
private static evictLRUCacheEntries;
/**
* Access and refresh cache entry (moves to end for LRU)
*/
private static accessCacheEntry;
/**
* Memory-safe cached check for whether maxTokens should be set for the given model
* Optimized for streaming performance with LRU eviction to prevent memory leaks
*/
private shouldSetMaxTokensCached;
/**
* Memory-safe check if model has maxTokens issues using configuration-based approach
* This replaces hardcoded model-specific logic with configurable behavior
* Includes LRU caching to avoid repeated configuration lookups during streaming
*/
private modelHasMaxTokensIssues;
/**
* Check if Anthropic models are available
* @returns Promise<boolean> indicating if Anthropic support is available
*/
hasAnthropicSupport(): Promise<boolean>;
/**
* @deprecated This method is no longer used. Claude models now use native @anthropic-ai/vertex-sdk
* via executeNativeAnthropicStream and executeNativeAnthropicGenerate.
*/
createAnthropicModel(_modelName: string): Promise<LanguageModel | null>;
/**
* Validate Vertex AI authentication configuration
*/
private validateVertexAuthentication;
/**
* Validate Vertex AI project configuration
*/
private validateVertexProjectConfiguration;
/**
* Check if the specified region supports Anthropic models
*/
private checkVertexRegionalSupport;
/**
* Validate Anthropic model name format and availability
*/
private validateAnthropicModelName;
/**
* Analyze Anthropic model creation errors for detailed troubleshooting
*/
private analyzeAnthropicCreationError;
/**
* Get detailed troubleshooting steps based on error analysis
*/
private getAnthropicTroubleshootingSteps;
/**
* Register a tool with the AI provider
* @param name The name of the tool
* @param schema The Zod schema defining the tool's parameters
* @param description A description of what the tool does
* @param handler The function to execute when the tool is called
*/
registerTool(name: string, schema: ZodType<unknown>, description: string, handler: (params: Record<string, unknown>) => Promise<unknown>): void;
/**
* Set the context for tool execution
* @param context The context to use for tool execution
*/
setToolContext(context: Record<string, unknown>): void;
/**
* Get the current tool execution context
* @returns The current tool execution context
*/
getToolContext(): Record<string, unknown>;
/**
* Set the tool executor function for custom tool execution
* This method is called by BaseProvider.setupToolExecutor()
* @param executor Function to execute tools by name
*/
setToolExecutor(executor: (toolName: string, params: unknown) => Promise<unknown>): void;
/**
* Clear all static caches - useful for testing and memory cleanup
* Public method to allow external cache management
*/
static clearCaches(): void;
/**
* Get cache statistics for monitoring and debugging
*/
static getCacheStats(): {
modelConfigCacheSize: number;
maxTokensCacheSize: number;
maxCacheSize: number;
cacheAge: {
modelConfig: number;
maxTokens: number;
};
};
/**
* Detect image MIME type from buffer
*/
private detectImageType;
/**
* Estimate token count from text (simple character-based estimation)
*/
private estimateTokenCount;
/**
* Build image parts for multimodal content
*/
/**
* Overrides the BaseProvider's image generation method to implement it for Vertex AI.
* Uses REST API approach with google-auth-library for authentication.
* Supports PDF input for image generation with gemini-3-pro-image-preview (Nano Banana Pro).
* @param options The generation options containing the prompt and optional PDF files.
* @returns A promise that resolves to the generation result, including the image data.
*/
protected executeImageGeneration(options: TextGenerationOptions): Promise<EnhancedGenerateResult>;
/**
* Get model suggestions when a model is not found
*/
private getModelSuggestions;
/**
* Generate an embedding for `text` using Vertex via @google/genai.
*
* Replaces the previous `@ai-sdk/google-vertex` text embedding model
* path. Without this, RAG indexing falls through to BaseProvider.embed()
* which throws "Embedding generation is not supported by the vertex
* provider", and `neurolink rag index --provider=vertex` fails even
* though the SDK conceptually supports it.
*/
embed(text: string, modelName?: string): Promise<number[]>;
/**
* Batch-embed an array of strings via Vertex @google/genai.
* Mirrors {@link embed} but returns one vector per input string.
*/
embedMany(texts: string[], modelName?: string): Promise<number[][]>;
}
export default GoogleVertexProvider;
export { GoogleVertexProvider as GoogleVertexAI };