UNPKG

@llumiverse/drivers

Version:

LLM driver implementations. Currently supported are: openai, huggingface, bedrock, replicate.

github.com/vertesia/llumiverse

vertesia/llumiverse

644 lines • 29.4 kB

JavaScript

"use strict"; Object.defineProperty(exports, "__esModule", { value: true }); exports.VertexAIDriver = void 0; exports.trimModelName = trimModelName; const vertex_sdk_1 = require("@anthropic-ai/vertex-sdk"); const aiplatform_1 = require("@google-cloud/aiplatform"); const genai_1 = require("@google/genai"); const core_1 = require("@llumiverse/core"); const api_fetch_client_1 = require("@vertesia/api-fetch-client"); const google_auth_library_1 = require("google-auth-library"); const embeddings_image_js_1 = require("./embeddings/embeddings-image.js"); const embeddings_text_js_1 = require("./embeddings/embeddings-text.js"); const models_js_1 = require("./models.js"); const claude_js_1 = require("./models/claude.js"); const imagen_js_1 = require("./models/imagen.js"); function trimModelName(model) { const i = model.lastIndexOf("@"); return i > -1 ? model.substring(0, i) : model; } class VertexAIDriver extends core_1.AbstractDriver { static PROVIDER = "vertexai"; provider = VertexAIDriver.PROVIDER; aiplatform; anthropicClient; fetchClient; googleGenAI; googleGenAIRegion; googleGenAIFlex; llamaClient; modelGarden; imagenClient; googleAuth; authClientPromise; constructor(options) { super(options); this.aiplatform = undefined; this.anthropicClient = undefined; this.fetchClient = undefined; this.googleGenAI = undefined; this.googleGenAIRegion = undefined; this.googleGenAIFlex = undefined; this.modelGarden = undefined; this.llamaClient = undefined; this.imagenClient = undefined; this.googleAuth = new google_auth_library_1.GoogleAuth(options.googleAuthOptions); this.authClientPromise = undefined; } async getAuthClient() { if (!this.authClientPromise) { this.authClientPromise = this.googleAuth.getClient(); } return this.authClientPromise; } getGoogleGenAIClient(region = this.options.region, flex = false) { if (this.googleGenAI && this.googleGenAIRegion === region && this.googleGenAIFlex === flex) { // Return existing client if region and flex settings match return this.googleGenAI; } this.googleGenAI = this.buildGoogleGenAIClient(region, flex); this.googleGenAIRegion = region; this.googleGenAIFlex = flex; return this.googleGenAI; } buildGoogleGenAIClient(region, flex) { return new genai_1.GoogleGenAI({ project: this.options.project, location: region, vertexai: true, googleAuthOptions: this.options.googleAuthOptions || { scopes: ["https://www.googleapis.com/auth/cloud-platform"], }, ...(flex ? { httpOptions: { headers: { "X-Vertex-AI-LLM-Request-Type": "shared", "X-Vertex-AI-LLM-Shared-Request-Type": "flex", } } } : {}), }); } getFetchClient() { //Lazy initialization if (!this.fetchClient) { this.fetchClient = createFetchClient({ region: this.options.region, project: this.options.project, }).withAuthCallback(async () => { const token = await this.googleAuth.getAccessToken(); return `Bearer ${token}`; }); } return this.fetchClient; } getLLamaClient(region = "us-central1") { //Lazy initialization if (!this.llamaClient || this.llamaClient["region"] !== region) { this.llamaClient = createFetchClient({ region: region, project: this.options.project, apiVersion: "v1beta1", }).withAuthCallback(async () => { const token = await this.googleAuth.getAccessToken(); return `Bearer ${token}`; }); // Store the region for potential client reuse this.llamaClient["region"] = region; } return this.llamaClient; } async getAnthropicClient(region = this.options.region) { // Extract region prefix and map if it exists in ANTHROPIC_REGIONS, otherwise use as-is const getRegionPrefix = (r) => r.split('-')[0]; const regionPrefix = getRegionPrefix(region); const mappedRegion = claude_js_1.ANTHROPIC_REGIONS[regionPrefix] || region; const defaultRegionPrefix = getRegionPrefix(this.options.region); const defaultMappedRegion = claude_js_1.ANTHROPIC_REGIONS[defaultRegionPrefix] || this.options.region; // Get auth client to avoid version mismatch with GoogleAuth generic types const authClient = await this.getAuthClient(); // If mapped region is different from default mapped region, create one-off client if (mappedRegion !== defaultMappedRegion) { return new vertex_sdk_1.AnthropicVertex({ timeout: 20 * 60 * 10000, // Set to 20 minutes, 10 minute default, setting this disables long request error: https://github.com/anthropics/anthropic-sdk-typescript?#long-requests region: mappedRegion, projectId: this.options.project, authClient: authClient, }); } //Lazy initialization for default region if (!this.anthropicClient) { this.anthropicClient = new vertex_sdk_1.AnthropicVertex({ timeout: 20 * 60 * 10000, // Set to 20 minutes, 10 minute default, setting this disables long request error: https://github.com/anthropics/anthropic-sdk-typescript?#long-requests region: mappedRegion, projectId: this.options.project, authClient: authClient, }); } return this.anthropicClient; } async getAIPlatformClient() { //Lazy initialization if (!this.aiplatform) { const authClient = await this.getAuthClient(); this.aiplatform = new aiplatform_1.v1beta1.ModelServiceClient({ projectId: this.options.project, apiEndpoint: `${this.options.region}-${API_BASE_PATH}`, authClient, }); } return this.aiplatform; } async getModelGardenClient() { //Lazy initialization if (!this.modelGarden) { const authClient = await this.getAuthClient(); this.modelGarden = new aiplatform_1.v1beta1.ModelGardenServiceClient({ projectId: this.options.project, apiEndpoint: `${this.options.region}-${API_BASE_PATH}`, authClient, }); } return this.modelGarden; } async getImagenClient() { //Lazy initialization if (!this.imagenClient) { // TODO: make location configurable, fixed to us-central1 for now const authClient = await this.getAuthClient(); this.imagenClient = new aiplatform_1.PredictionServiceClient({ projectId: this.options.project, apiEndpoint: `us-central1-${API_BASE_PATH}`, authClient, }); } return this.imagenClient; } validateResult(result, options) { // Optionally preprocess the result before validation const modelDef = (0, models_js_1.getModelDefinition)(options.model); if (typeof modelDef.preValidationProcessing === "function") { const processed = modelDef.preValidationProcessing(result, options); result = processed.result; options = processed.options; } super.validateResult(result, options); } canStream(options) { if (this.isImageModel(options.model)) { return Promise.resolve(false); } return Promise.resolve((0, models_js_1.getModelDefinition)(options.model).model.can_stream === true); } isImageModel(model) { return model.includes("imagen"); } createPrompt(segments, options) { if (this.isImageModel(options.model)) { return new imagen_js_1.ImagenModelDefinition(options.model).createPrompt(this, segments, options); } return (0, models_js_1.getModelDefinition)(options.model).createPrompt(this, segments, options); } async requestTextCompletion(prompt, options) { return (0, models_js_1.getModelDefinition)(options.model).requestTextCompletion(this, prompt, options); } async requestTextCompletionStream(prompt, options) { return (0, models_js_1.getModelDefinition)(options.model).requestTextCompletionStream(this, prompt, options); } /** * Build conversation context after streaming completion. * Reconstructs the assistant message from accumulated results and applies stripping. * Handles both Gemini (Content[]) and Claude (ClaudePrompt) formats. */ buildStreamingConversation(prompt, result, toolUse, options) { // Handle Claude-style prompts (has 'messages' array) if ('messages' in prompt && Array.isArray(prompt.messages)) { return this.buildClaudeStreamingConversation(prompt, result, toolUse, options); } // Only handle Gemini-style prompts with contents array if (!('contents' in prompt) || !Array.isArray(prompt.contents)) { return undefined; } const completionResults = result; // Convert accumulated results to text content for assistant message const textContent = completionResults .map(r => { switch (r.type) { case 'text': return r.value; case 'json': return typeof r.value === 'string' ? r.value : JSON.stringify(r.value); case 'image': // Skip images in conversation - they're in the result return ''; default: return String(r.value || ''); } }) .join(''); // Build parts array for assistant message const parts = []; if (textContent) { parts.push({ text: textContent }); } // Add function calls if present (Gemini format) if (toolUse && toolUse.length > 0) { for (const tool of toolUse) { const functionCallPart = { functionCall: { name: tool.tool_name, args: tool.tool_input, } }; // Include thought_signature for Gemini thinking models (2.5+/3.0+) // This must be preserved in the conversation for subsequent API calls if (tool.thought_signature) { functionCallPart.thoughtSignature = tool.thought_signature; } parts.push(functionCallPart); } } // prompt.contents already includes the conversation history // (merged in requestTextCompletionStream via updateConversation), // so we use it directly — do NOT prepend options.conversation again. let conversation = [ ...prompt.contents, ]; // Only add assistant message if there's actual content // (Empty text parts can cause API errors) if (parts.length > 0) { conversation.push({ role: 'model', parts: parts }); } // Increment turn counter conversation = (0, core_1.incrementConversationTurn)(conversation); // Apply stripping based on options const currentTurn = (0, core_1.getConversationMeta)(conversation).turnNumber; const stripOptions = { keepForTurns: options.stripImagesAfterTurns ?? Infinity, currentTurn, textMaxTokens: options.stripTextMaxTokens }; let processedConversation = (0, core_1.stripBase64ImagesFromConversation)(conversation, stripOptions); processedConversation = (0, core_1.truncateLargeTextInConversation)(processedConversation, stripOptions); processedConversation = (0, core_1.stripHeartbeatsFromConversation)(processedConversation, { keepForTurns: options.stripHeartbeatsAfterTurns ?? 1, currentTurn, }); // Preserve system instruction in conversation for Gemini multi-turn support. // The Gemini API takes system as a separate parameter (not in contents), // so we must store it in the conversation wrapper to survive serialization. const geminiPrompt = prompt; if (geminiPrompt.system) { if (typeof processedConversation === 'object' && processedConversation !== null) { processedConversation = { ...processedConversation, _llumiverse_system: geminiPrompt.system }; } } return processedConversation; } /** * Build conversation for Claude streaming. * Creates assistant message with tool_use blocks in Claude's ContentBlock format. */ buildClaudeStreamingConversation(prompt, result, toolUse, options) { const completionResults = result; // Convert accumulated results to text content const textContent = completionResults .map(r => { switch (r.type) { case 'text': return r.value; case 'json': return typeof r.value === 'string' ? r.value : JSON.stringify(r.value); case 'image': return ''; default: return String(r.value || ''); } }) .join(''); // Build Claude-style ContentBlock array for assistant message const content = []; // Add text block if there's text content if (textContent) { content.push({ type: 'text', text: textContent }); } // Add tool_use blocks in Claude format if (toolUse && toolUse.length > 0) { for (const tool of toolUse) { content.push({ type: 'tool_use', id: tool.id, name: tool.tool_name, input: tool.tool_input ?? {} }); } } // Claude's requestTextCompletionStream does NOT mutate prompt.messages // to include history, so we must prepend options.conversation here. const existingMessages = options.conversation?.messages ?? []; const existingSystem = options.conversation?.system ?? prompt.system; // Build the new messages array const newMessages = [ ...existingMessages, ...prompt.messages, ]; // Only add assistant message if there's actual content // (Claude API rejects empty text content blocks) if (content.length > 0) { newMessages.push({ role: 'assistant', content: content }); } // Build the new conversation in ClaudePrompt format const conversation = { messages: newMessages, system: existingSystem }; // Increment turn counter const withTurn = (0, core_1.incrementConversationTurn)(conversation); // Apply stripping based on options const currentTurn = (0, core_1.getConversationMeta)(withTurn).turnNumber; const stripOptions = { keepForTurns: options.stripImagesAfterTurns ?? Infinity, currentTurn, textMaxTokens: options.stripTextMaxTokens }; let processedConversation = (0, core_1.stripBase64ImagesFromConversation)(withTurn, stripOptions); processedConversation = (0, core_1.truncateLargeTextInConversation)(processedConversation, stripOptions); processedConversation = (0, core_1.stripHeartbeatsFromConversation)(processedConversation, { keepForTurns: options.stripHeartbeatsAfterTurns ?? 1, currentTurn, }); return processedConversation; } async requestImageGeneration(_prompt, _options) { const splits = _options.model.split("/"); const modelName = trimModelName(splits[splits.length - 1]); return new imagen_js_1.ImagenModelDefinition(modelName).requestImageGeneration(this, _prompt, _options); } async getGenAIModelsArray(client) { const models = []; const pager = await client.models.list(); for await (const item of pager) { models.push(item); } return models; } async listModels(_params) { // Get clients const modelGarden = await this.getModelGardenClient(); const aiplatform = await this.getAIPlatformClient(); const globalGenAiClient = this.getGoogleGenAIClient("global"); let models = []; //Model Garden Publisher models - Pretrained models const publishers = ["google", "anthropic", "meta"]; // Meta "maas" models are LLama Models-As-A-Service. Non-maas models are not pre-deployed. const supportedModels = { google: ["gemini", "imagen"], anthropic: ["claude"], meta: ["maas"] }; // Additional models not in the listings, but we want to include // TODO: Remove once the models are available in the listing API, or no longer needed const additionalModels = { google: [ "imagen-3.0-fast-generate-001", ], anthropic: [], meta: [ "llama-4-maverick-17b-128e-instruct-maas", "llama-4-scout-17b-16e-instruct-maas", "llama-3.3-70b-instruct-maas", "llama-3.2-90b-vision-instruct-maas", "llama-3.1-405b-instruct-maas", "llama-3.1-70b-instruct-maas", "llama-3.1-8b-instruct-maas", ], }; //Used to exclude retired models that are still in the listing API but not available for use. //Or models we do not support yet const unsupportedModelsByPublisher = { google: ["gemini-pro", "gemini-ultra", "imagen-product-recontext-preview", "embedding", "gemini-live-2.5-flash-preview-native-audio", "computer-use-preview"], anthropic: [], meta: [], }; // Start all network requests in parallel const aiplatformPromise = aiplatform.listModels({ parent: `projects/${this.options.project}/locations/${this.options.region}`, }); const publisherPromises = publishers.map(async (publisher) => { const [response] = await modelGarden.listPublisherModels({ parent: `publishers/${publisher}`, orderBy: "name", listAllVersions: true, }); return { publisher, response }; }); const globalGooglePromise = this.getGenAIModelsArray(globalGenAiClient); // Await all network requests const [aiplatformResult, globalGoogleResult, ...publisherResults] = await Promise.all([ aiplatformPromise, globalGooglePromise, ...publisherPromises, ]); // Process aiplatform models, project specific models const [response] = aiplatformResult; models = models.concat(response.map((model) => ({ id: model.name?.split("/").pop() ?? "", name: model.displayName ?? "", provider: "vertexai" }))); // Process global google models from GenAI models = models.concat(globalGoogleResult.map((model) => { const modelCapability = (0, core_1.getModelCapabilities)(model.name ?? '', "vertexai"); return { id: "locations/global/" + model.name, name: "Global " + model.name?.split('/').pop(), provider: "vertexai", owner: "google", input_modalities: (0, core_1.modelModalitiesToArray)(modelCapability.input), output_modalities: (0, core_1.modelModalitiesToArray)(modelCapability.output), tool_support: modelCapability.tool_support, }; })); // Process publisher models for (const result of publisherResults) { const { publisher, response } = result; const modelFamily = supportedModels[publisher]; const retiredModels = unsupportedModelsByPublisher[publisher]; models = models.concat(response.filter((model) => { const modelName = model.name ?? ""; // Exclude retired models if (retiredModels.some(retiredModel => modelName.includes(retiredModel))) { return false; } // Check if the model belongs to the supported model families if (modelFamily.some(family => modelName.includes(family))) { return true; } return false; }).map(model => { const modelCapability = (0, core_1.getModelCapabilities)(model.name ?? '', "vertexai"); return { id: model.name ?? '', name: model.name?.split('/').pop() ?? '', provider: 'vertexai', owner: publisher, input_modalities: (0, core_1.modelModalitiesToArray)(modelCapability.input), output_modalities: (0, core_1.modelModalitiesToArray)(modelCapability.output), tool_support: modelCapability.tool_support, }; })); // Create global google gemini models for Gemini 2.5 and later, if missing from GenAI listing if (publisher === 'google') { const globalGeminiModels = response.filter((model) => { const modelName = model.name ?? ""; if (retiredModels.some(retiredModel => modelName.includes(retiredModel))) { return false; } if (modelFamily.some(family => modelName.includes(family))) { const versionMatch = modelName.match(/gemini-(\d+(?:\.\d+)?)/); if (versionMatch) { const version = parseFloat(versionMatch[1]); if (version >= 2.5) { // Check if already present const shortName = modelName.split('/').pop(); const globalName = "Global " + shortName; if (models.some(m => m.name === globalName)) { return false; } return true; } } return false; } return false; }).map(model => { const modelCapability = (0, core_1.getModelCapabilities)(model.name ?? '', "vertexai"); return { id: "locations/global/" + model.name, name: "Global " + model.name?.split('/').pop(), provider: 'vertexai', owner: publisher, input_modalities: (0, core_1.modelModalitiesToArray)(modelCapability.input), output_modalities: (0, core_1.modelModalitiesToArray)(modelCapability.output), tool_support: modelCapability.tool_support, }; }); models = models.concat(globalGeminiModels); } // Create global anthropic models for those not in NON_GLOBAL_ANTHROPIC_MODELS if (publisher === 'anthropic') { const globalAnthropicModels = response.filter((model) => { const modelName = model.name ?? ""; if (retiredModels.some(retiredModel => modelName.includes(retiredModel))) { return false; } if (modelFamily.some(family => modelName.includes(family))) { if (modelName.includes("claude-3-7")) { return true; } return !claude_js_1.NON_GLOBAL_ANTHROPIC_MODELS.some(nonGlobalModel => modelName.includes(nonGlobalModel)); } return false; }).map(model => { const modelCapability = (0, core_1.getModelCapabilities)(model.name ?? '', "vertexai"); return { id: "locations/global/" + model.name, name: "Global " + model.name?.split('/').pop(), provider: 'vertexai', owner: publisher, input_modalities: (0, core_1.modelModalitiesToArray)(modelCapability.input), output_modalities: (0, core_1.modelModalitiesToArray)(modelCapability.output), tool_support: modelCapability.tool_support, }; }); models = models.concat(globalAnthropicModels); } // Add additional models that are not in the listing for (const additionalModel of additionalModels[publisher]) { const publisherModelName = `publishers/${publisher}/models/${additionalModel}`; const modelCapability = (0, core_1.getModelCapabilities)(additionalModel, "vertexai"); models.push({ id: publisherModelName, name: additionalModel, provider: 'vertexai', owner: publisher, input_modalities: (0, core_1.modelModalitiesToArray)(modelCapability.input), output_modalities: (0, core_1.modelModalitiesToArray)(modelCapability.output), tool_support: modelCapability.tool_support, }); } } //Remove duplicates const uniqueModels = Array.from(new Set(models.map(a => a.id))) .map(id => { return models.find(a => a.id === id) ?? {}; }).sort((a, b) => a.id.localeCompare(b.id)); return uniqueModels; } validateConnection() { throw new Error("Method not implemented."); } async generateEmbeddings(options) { if (options.image || options.model?.includes("multimodal")) { if (options.text && options.image) { throw new Error("Text and Image simultaneous embedding not implemented. Submit separately"); } return (0, embeddings_image_js_1.getEmbeddingsForImages)(this, options); } const text_options = { content: options.text ?? "", model: options.model, }; return (0, embeddings_text_js_1.getEmbeddingsForText)(this, text_options); } /** * Cleanup Google Cloud clients when the driver is evicted from the cache. */ destroy() { this.aiplatform?.close(); this.modelGarden?.close(); this.imagenClient?.close(); } /** * Format VertexAI errors by routing to model-specific error handlers. * Each model definition (Gemini, Claude, Llama) can provide custom error parsing * based on their specific SDK error structures. * * @param error - The error from the VertexAI/model SDK * @param context - Context about where the error occurred * @returns A standardized LlumiverseError */ formatLlumiverseError(error, context) { // Get the model definition for this request const modelDef = (0, models_js_1.getModelDefinition)(context.model); // If the model definition provides custom error handling, use it if (modelDef.formatLlumiverseError) { try { return modelDef.formatLlumiverseError(this, error, context); } catch (formattingError) { // If model-specific handler throws, fall through to default handling // This allows model handlers to explicitly opt out for certain errors } } // Fall back to default AbstractDriver error handling return super.formatLlumiverseError(error, context); } } exports.VertexAIDriver = VertexAIDriver; //'us-central1-aiplatform.googleapis.com', const API_BASE_PATH = "aiplatform.googleapis.com"; function createFetchClient({ region, project, apiEndpoint, apiVersion = "v1", }) { const vertexBaseEndpoint = apiEndpoint ?? `${region}-${API_BASE_PATH}`; return new api_fetch_client_1.FetchClient(`https://${vertexBaseEndpoint}/${apiVersion}/projects/${project}/locations/${region}`).withHeaders({ "Content-Type": "application/json", }); } //# sourceMappingURL=index.js.map