@llumiverse/drivers
Version:
LLM driver implementations. Currently supported are: openai, huggingface, bedrock, replicate.
644 lines • 29.4 kB
JavaScript
"use strict";
Object.defineProperty(exports, "__esModule", { value: true });
exports.VertexAIDriver = void 0;
exports.trimModelName = trimModelName;
const vertex_sdk_1 = require("@anthropic-ai/vertex-sdk");
const aiplatform_1 = require("@google-cloud/aiplatform");
const genai_1 = require("@google/genai");
const core_1 = require("@llumiverse/core");
const api_fetch_client_1 = require("@vertesia/api-fetch-client");
const google_auth_library_1 = require("google-auth-library");
const embeddings_image_js_1 = require("./embeddings/embeddings-image.js");
const embeddings_text_js_1 = require("./embeddings/embeddings-text.js");
const models_js_1 = require("./models.js");
const claude_js_1 = require("./models/claude.js");
const imagen_js_1 = require("./models/imagen.js");
function trimModelName(model) {
const i = model.lastIndexOf("@");
return i > -1 ? model.substring(0, i) : model;
}
class VertexAIDriver extends core_1.AbstractDriver {
static PROVIDER = "vertexai";
provider = VertexAIDriver.PROVIDER;
aiplatform;
anthropicClient;
fetchClient;
googleGenAI;
googleGenAIRegion;
googleGenAIFlex;
llamaClient;
modelGarden;
imagenClient;
googleAuth;
authClientPromise;
constructor(options) {
super(options);
this.aiplatform = undefined;
this.anthropicClient = undefined;
this.fetchClient = undefined;
this.googleGenAI = undefined;
this.googleGenAIRegion = undefined;
this.googleGenAIFlex = undefined;
this.modelGarden = undefined;
this.llamaClient = undefined;
this.imagenClient = undefined;
this.googleAuth = new google_auth_library_1.GoogleAuth(options.googleAuthOptions);
this.authClientPromise = undefined;
}
async getAuthClient() {
if (!this.authClientPromise) {
this.authClientPromise = this.googleAuth.getClient();
}
return this.authClientPromise;
}
getGoogleGenAIClient(region = this.options.region, flex = false) {
if (this.googleGenAI &&
this.googleGenAIRegion === region &&
this.googleGenAIFlex === flex) {
// Return existing client if region and flex settings match
return this.googleGenAI;
}
this.googleGenAI = this.buildGoogleGenAIClient(region, flex);
this.googleGenAIRegion = region;
this.googleGenAIFlex = flex;
return this.googleGenAI;
}
buildGoogleGenAIClient(region, flex) {
return new genai_1.GoogleGenAI({
project: this.options.project,
location: region,
vertexai: true,
googleAuthOptions: this.options.googleAuthOptions || {
scopes: ["https://www.googleapis.com/auth/cloud-platform"],
},
...(flex ? {
httpOptions: {
headers: {
"X-Vertex-AI-LLM-Request-Type": "shared",
"X-Vertex-AI-LLM-Shared-Request-Type": "flex",
}
}
} : {}),
});
}
getFetchClient() {
//Lazy initialization
if (!this.fetchClient) {
this.fetchClient = createFetchClient({
region: this.options.region,
project: this.options.project,
}).withAuthCallback(async () => {
const token = await this.googleAuth.getAccessToken();
return `Bearer ${token}`;
});
}
return this.fetchClient;
}
getLLamaClient(region = "us-central1") {
//Lazy initialization
if (!this.llamaClient || this.llamaClient["region"] !== region) {
this.llamaClient = createFetchClient({
region: region,
project: this.options.project,
apiVersion: "v1beta1",
}).withAuthCallback(async () => {
const token = await this.googleAuth.getAccessToken();
return `Bearer ${token}`;
});
// Store the region for potential client reuse
this.llamaClient["region"] = region;
}
return this.llamaClient;
}
async getAnthropicClient(region = this.options.region) {
// Extract region prefix and map if it exists in ANTHROPIC_REGIONS, otherwise use as-is
const getRegionPrefix = (r) => r.split('-')[0];
const regionPrefix = getRegionPrefix(region);
const mappedRegion = claude_js_1.ANTHROPIC_REGIONS[regionPrefix] || region;
const defaultRegionPrefix = getRegionPrefix(this.options.region);
const defaultMappedRegion = claude_js_1.ANTHROPIC_REGIONS[defaultRegionPrefix] || this.options.region;
// Get auth client to avoid version mismatch with GoogleAuth generic types
const authClient = await this.getAuthClient();
// If mapped region is different from default mapped region, create one-off client
if (mappedRegion !== defaultMappedRegion) {
return new vertex_sdk_1.AnthropicVertex({
timeout: 20 * 60 * 10000, // Set to 20 minutes, 10 minute default, setting this disables long request error: https://github.com/anthropics/anthropic-sdk-typescript?#long-requests
region: mappedRegion,
projectId: this.options.project,
authClient: authClient,
});
}
//Lazy initialization for default region
if (!this.anthropicClient) {
this.anthropicClient = new vertex_sdk_1.AnthropicVertex({
timeout: 20 * 60 * 10000, // Set to 20 minutes, 10 minute default, setting this disables long request error: https://github.com/anthropics/anthropic-sdk-typescript?#long-requests
region: mappedRegion,
projectId: this.options.project,
authClient: authClient,
});
}
return this.anthropicClient;
}
async getAIPlatformClient() {
//Lazy initialization
if (!this.aiplatform) {
const authClient = await this.getAuthClient();
this.aiplatform = new aiplatform_1.v1beta1.ModelServiceClient({
projectId: this.options.project,
apiEndpoint: `${this.options.region}-${API_BASE_PATH}`,
authClient,
});
}
return this.aiplatform;
}
async getModelGardenClient() {
//Lazy initialization
if (!this.modelGarden) {
const authClient = await this.getAuthClient();
this.modelGarden = new aiplatform_1.v1beta1.ModelGardenServiceClient({
projectId: this.options.project,
apiEndpoint: `${this.options.region}-${API_BASE_PATH}`,
authClient,
});
}
return this.modelGarden;
}
async getImagenClient() {
//Lazy initialization
if (!this.imagenClient) {
// TODO: make location configurable, fixed to us-central1 for now
const authClient = await this.getAuthClient();
this.imagenClient = new aiplatform_1.PredictionServiceClient({
projectId: this.options.project,
apiEndpoint: `us-central1-${API_BASE_PATH}`,
authClient,
});
}
return this.imagenClient;
}
validateResult(result, options) {
// Optionally preprocess the result before validation
const modelDef = (0, models_js_1.getModelDefinition)(options.model);
if (typeof modelDef.preValidationProcessing === "function") {
const processed = modelDef.preValidationProcessing(result, options);
result = processed.result;
options = processed.options;
}
super.validateResult(result, options);
}
canStream(options) {
if (this.isImageModel(options.model)) {
return Promise.resolve(false);
}
return Promise.resolve((0, models_js_1.getModelDefinition)(options.model).model.can_stream === true);
}
isImageModel(model) {
return model.includes("imagen");
}
createPrompt(segments, options) {
if (this.isImageModel(options.model)) {
return new imagen_js_1.ImagenModelDefinition(options.model).createPrompt(this, segments, options);
}
return (0, models_js_1.getModelDefinition)(options.model).createPrompt(this, segments, options);
}
async requestTextCompletion(prompt, options) {
return (0, models_js_1.getModelDefinition)(options.model).requestTextCompletion(this, prompt, options);
}
async requestTextCompletionStream(prompt, options) {
return (0, models_js_1.getModelDefinition)(options.model).requestTextCompletionStream(this, prompt, options);
}
/**
* Build conversation context after streaming completion.
* Reconstructs the assistant message from accumulated results and applies stripping.
* Handles both Gemini (Content[]) and Claude (ClaudePrompt) formats.
*/
buildStreamingConversation(prompt, result, toolUse, options) {
// Handle Claude-style prompts (has 'messages' array)
if ('messages' in prompt && Array.isArray(prompt.messages)) {
return this.buildClaudeStreamingConversation(prompt, result, toolUse, options);
}
// Only handle Gemini-style prompts with contents array
if (!('contents' in prompt) || !Array.isArray(prompt.contents)) {
return undefined;
}
const completionResults = result;
// Convert accumulated results to text content for assistant message
const textContent = completionResults
.map(r => {
switch (r.type) {
case 'text':
return r.value;
case 'json':
return typeof r.value === 'string' ? r.value : JSON.stringify(r.value);
case 'image':
// Skip images in conversation - they're in the result
return '';
default:
return String(r.value || '');
}
})
.join('');
// Build parts array for assistant message
const parts = [];
if (textContent) {
parts.push({ text: textContent });
}
// Add function calls if present (Gemini format)
if (toolUse && toolUse.length > 0) {
for (const tool of toolUse) {
const functionCallPart = {
functionCall: {
name: tool.tool_name,
args: tool.tool_input,
}
};
// Include thought_signature for Gemini thinking models (2.5+/3.0+)
// This must be preserved in the conversation for subsequent API calls
if (tool.thought_signature) {
functionCallPart.thoughtSignature = tool.thought_signature;
}
parts.push(functionCallPart);
}
}
// prompt.contents already includes the conversation history
// (merged in requestTextCompletionStream via updateConversation),
// so we use it directly — do NOT prepend options.conversation again.
let conversation = [
...prompt.contents,
];
// Only add assistant message if there's actual content
// (Empty text parts can cause API errors)
if (parts.length > 0) {
conversation.push({
role: 'model',
parts: parts
});
}
// Increment turn counter
conversation = (0, core_1.incrementConversationTurn)(conversation);
// Apply stripping based on options
const currentTurn = (0, core_1.getConversationMeta)(conversation).turnNumber;
const stripOptions = {
keepForTurns: options.stripImagesAfterTurns ?? Infinity,
currentTurn,
textMaxTokens: options.stripTextMaxTokens
};
let processedConversation = (0, core_1.stripBase64ImagesFromConversation)(conversation, stripOptions);
processedConversation = (0, core_1.truncateLargeTextInConversation)(processedConversation, stripOptions);
processedConversation = (0, core_1.stripHeartbeatsFromConversation)(processedConversation, {
keepForTurns: options.stripHeartbeatsAfterTurns ?? 1,
currentTurn,
});
// Preserve system instruction in conversation for Gemini multi-turn support.
// The Gemini API takes system as a separate parameter (not in contents),
// so we must store it in the conversation wrapper to survive serialization.
const geminiPrompt = prompt;
if (geminiPrompt.system) {
if (typeof processedConversation === 'object' && processedConversation !== null) {
processedConversation = { ...processedConversation, _llumiverse_system: geminiPrompt.system };
}
}
return processedConversation;
}
/**
* Build conversation for Claude streaming.
* Creates assistant message with tool_use blocks in Claude's ContentBlock format.
*/
buildClaudeStreamingConversation(prompt, result, toolUse, options) {
const completionResults = result;
// Convert accumulated results to text content
const textContent = completionResults
.map(r => {
switch (r.type) {
case 'text':
return r.value;
case 'json':
return typeof r.value === 'string' ? r.value : JSON.stringify(r.value);
case 'image':
return '';
default:
return String(r.value || '');
}
})
.join('');
// Build Claude-style ContentBlock array for assistant message
const content = [];
// Add text block if there's text content
if (textContent) {
content.push({
type: 'text',
text: textContent
});
}
// Add tool_use blocks in Claude format
if (toolUse && toolUse.length > 0) {
for (const tool of toolUse) {
content.push({
type: 'tool_use',
id: tool.id,
name: tool.tool_name,
input: tool.tool_input ?? {}
});
}
}
// Claude's requestTextCompletionStream does NOT mutate prompt.messages
// to include history, so we must prepend options.conversation here.
const existingMessages = options.conversation?.messages ?? [];
const existingSystem = options.conversation?.system ?? prompt.system;
// Build the new messages array
const newMessages = [
...existingMessages,
...prompt.messages,
];
// Only add assistant message if there's actual content
// (Claude API rejects empty text content blocks)
if (content.length > 0) {
newMessages.push({
role: 'assistant',
content: content
});
}
// Build the new conversation in ClaudePrompt format
const conversation = {
messages: newMessages,
system: existingSystem
};
// Increment turn counter
const withTurn = (0, core_1.incrementConversationTurn)(conversation);
// Apply stripping based on options
const currentTurn = (0, core_1.getConversationMeta)(withTurn).turnNumber;
const stripOptions = {
keepForTurns: options.stripImagesAfterTurns ?? Infinity,
currentTurn,
textMaxTokens: options.stripTextMaxTokens
};
let processedConversation = (0, core_1.stripBase64ImagesFromConversation)(withTurn, stripOptions);
processedConversation = (0, core_1.truncateLargeTextInConversation)(processedConversation, stripOptions);
processedConversation = (0, core_1.stripHeartbeatsFromConversation)(processedConversation, {
keepForTurns: options.stripHeartbeatsAfterTurns ?? 1,
currentTurn,
});
return processedConversation;
}
async requestImageGeneration(_prompt, _options) {
const splits = _options.model.split("/");
const modelName = trimModelName(splits[splits.length - 1]);
return new imagen_js_1.ImagenModelDefinition(modelName).requestImageGeneration(this, _prompt, _options);
}
async getGenAIModelsArray(client) {
const models = [];
const pager = await client.models.list();
for await (const item of pager) {
models.push(item);
}
return models;
}
async listModels(_params) {
// Get clients
const modelGarden = await this.getModelGardenClient();
const aiplatform = await this.getAIPlatformClient();
const globalGenAiClient = this.getGoogleGenAIClient("global");
let models = [];
//Model Garden Publisher models - Pretrained models
const publishers = ["google", "anthropic", "meta"];
// Meta "maas" models are LLama Models-As-A-Service. Non-maas models are not pre-deployed.
const supportedModels = { google: ["gemini", "imagen"], anthropic: ["claude"], meta: ["maas"] };
// Additional models not in the listings, but we want to include
// TODO: Remove once the models are available in the listing API, or no longer needed
const additionalModels = {
google: [
"imagen-3.0-fast-generate-001",
],
anthropic: [],
meta: [
"llama-4-maverick-17b-128e-instruct-maas",
"llama-4-scout-17b-16e-instruct-maas",
"llama-3.3-70b-instruct-maas",
"llama-3.2-90b-vision-instruct-maas",
"llama-3.1-405b-instruct-maas",
"llama-3.1-70b-instruct-maas",
"llama-3.1-8b-instruct-maas",
],
};
//Used to exclude retired models that are still in the listing API but not available for use.
//Or models we do not support yet
const unsupportedModelsByPublisher = {
google: ["gemini-pro", "gemini-ultra", "imagen-product-recontext-preview", "embedding", "gemini-live-2.5-flash-preview-native-audio", "computer-use-preview"],
anthropic: [],
meta: [],
};
// Start all network requests in parallel
const aiplatformPromise = aiplatform.listModels({
parent: `projects/${this.options.project}/locations/${this.options.region}`,
});
const publisherPromises = publishers.map(async (publisher) => {
const [response] = await modelGarden.listPublisherModels({
parent: `publishers/${publisher}`,
orderBy: "name",
listAllVersions: true,
});
return { publisher, response };
});
const globalGooglePromise = this.getGenAIModelsArray(globalGenAiClient);
// Await all network requests
const [aiplatformResult, globalGoogleResult, ...publisherResults] = await Promise.all([
aiplatformPromise,
globalGooglePromise,
...publisherPromises,
]);
// Process aiplatform models, project specific models
const [response] = aiplatformResult;
models = models.concat(response.map((model) => ({
id: model.name?.split("/").pop() ?? "",
name: model.displayName ?? "",
provider: "vertexai"
})));
// Process global google models from GenAI
models = models.concat(globalGoogleResult.map((model) => {
const modelCapability = (0, core_1.getModelCapabilities)(model.name ?? '', "vertexai");
return {
id: "locations/global/" + model.name,
name: "Global " + model.name?.split('/').pop(),
provider: "vertexai",
owner: "google",
input_modalities: (0, core_1.modelModalitiesToArray)(modelCapability.input),
output_modalities: (0, core_1.modelModalitiesToArray)(modelCapability.output),
tool_support: modelCapability.tool_support,
};
}));
// Process publisher models
for (const result of publisherResults) {
const { publisher, response } = result;
const modelFamily = supportedModels[publisher];
const retiredModels = unsupportedModelsByPublisher[publisher];
models = models.concat(response.filter((model) => {
const modelName = model.name ?? "";
// Exclude retired models
if (retiredModels.some(retiredModel => modelName.includes(retiredModel))) {
return false;
}
// Check if the model belongs to the supported model families
if (modelFamily.some(family => modelName.includes(family))) {
return true;
}
return false;
}).map(model => {
const modelCapability = (0, core_1.getModelCapabilities)(model.name ?? '', "vertexai");
return {
id: model.name ?? '',
name: model.name?.split('/').pop() ?? '',
provider: 'vertexai',
owner: publisher,
input_modalities: (0, core_1.modelModalitiesToArray)(modelCapability.input),
output_modalities: (0, core_1.modelModalitiesToArray)(modelCapability.output),
tool_support: modelCapability.tool_support,
};
}));
// Create global google gemini models for Gemini 2.5 and later, if missing from GenAI listing
if (publisher === 'google') {
const globalGeminiModels = response.filter((model) => {
const modelName = model.name ?? "";
if (retiredModels.some(retiredModel => modelName.includes(retiredModel))) {
return false;
}
if (modelFamily.some(family => modelName.includes(family))) {
const versionMatch = modelName.match(/gemini-(\d+(?:\.\d+)?)/);
if (versionMatch) {
const version = parseFloat(versionMatch[1]);
if (version >= 2.5) {
// Check if already present
const shortName = modelName.split('/').pop();
const globalName = "Global " + shortName;
if (models.some(m => m.name === globalName)) {
return false;
}
return true;
}
}
return false;
}
return false;
}).map(model => {
const modelCapability = (0, core_1.getModelCapabilities)(model.name ?? '', "vertexai");
return {
id: "locations/global/" + model.name,
name: "Global " + model.name?.split('/').pop(),
provider: 'vertexai',
owner: publisher,
input_modalities: (0, core_1.modelModalitiesToArray)(modelCapability.input),
output_modalities: (0, core_1.modelModalitiesToArray)(modelCapability.output),
tool_support: modelCapability.tool_support,
};
});
models = models.concat(globalGeminiModels);
}
// Create global anthropic models for those not in NON_GLOBAL_ANTHROPIC_MODELS
if (publisher === 'anthropic') {
const globalAnthropicModels = response.filter((model) => {
const modelName = model.name ?? "";
if (retiredModels.some(retiredModel => modelName.includes(retiredModel))) {
return false;
}
if (modelFamily.some(family => modelName.includes(family))) {
if (modelName.includes("claude-3-7")) {
return true;
}
return !claude_js_1.NON_GLOBAL_ANTHROPIC_MODELS.some(nonGlobalModel => modelName.includes(nonGlobalModel));
}
return false;
}).map(model => {
const modelCapability = (0, core_1.getModelCapabilities)(model.name ?? '', "vertexai");
return {
id: "locations/global/" + model.name,
name: "Global " + model.name?.split('/').pop(),
provider: 'vertexai',
owner: publisher,
input_modalities: (0, core_1.modelModalitiesToArray)(modelCapability.input),
output_modalities: (0, core_1.modelModalitiesToArray)(modelCapability.output),
tool_support: modelCapability.tool_support,
};
});
models = models.concat(globalAnthropicModels);
}
// Add additional models that are not in the listing
for (const additionalModel of additionalModels[publisher]) {
const publisherModelName = `publishers/${publisher}/models/${additionalModel}`;
const modelCapability = (0, core_1.getModelCapabilities)(additionalModel, "vertexai");
models.push({
id: publisherModelName,
name: additionalModel,
provider: 'vertexai',
owner: publisher,
input_modalities: (0, core_1.modelModalitiesToArray)(modelCapability.input),
output_modalities: (0, core_1.modelModalitiesToArray)(modelCapability.output),
tool_support: modelCapability.tool_support,
});
}
}
//Remove duplicates
const uniqueModels = Array.from(new Set(models.map(a => a.id)))
.map(id => {
return models.find(a => a.id === id) ?? {};
}).sort((a, b) => a.id.localeCompare(b.id));
return uniqueModels;
}
validateConnection() {
throw new Error("Method not implemented.");
}
async generateEmbeddings(options) {
if (options.image || options.model?.includes("multimodal")) {
if (options.text && options.image) {
throw new Error("Text and Image simultaneous embedding not implemented. Submit separately");
}
return (0, embeddings_image_js_1.getEmbeddingsForImages)(this, options);
}
const text_options = {
content: options.text ?? "",
model: options.model,
};
return (0, embeddings_text_js_1.getEmbeddingsForText)(this, text_options);
}
/**
* Cleanup Google Cloud clients when the driver is evicted from the cache.
*/
destroy() {
this.aiplatform?.close();
this.modelGarden?.close();
this.imagenClient?.close();
}
/**
* Format VertexAI errors by routing to model-specific error handlers.
* Each model definition (Gemini, Claude, Llama) can provide custom error parsing
* based on their specific SDK error structures.
*
* @param error - The error from the VertexAI/model SDK
* @param context - Context about where the error occurred
* @returns A standardized LlumiverseError
*/
formatLlumiverseError(error, context) {
// Get the model definition for this request
const modelDef = (0, models_js_1.getModelDefinition)(context.model);
// If the model definition provides custom error handling, use it
if (modelDef.formatLlumiverseError) {
try {
return modelDef.formatLlumiverseError(this, error, context);
}
catch (formattingError) {
// If model-specific handler throws, fall through to default handling
// This allows model handlers to explicitly opt out for certain errors
}
}
// Fall back to default AbstractDriver error handling
return super.formatLlumiverseError(error, context);
}
}
exports.VertexAIDriver = VertexAIDriver;
//'us-central1-aiplatform.googleapis.com',
const API_BASE_PATH = "aiplatform.googleapis.com";
function createFetchClient({ region, project, apiEndpoint, apiVersion = "v1", }) {
const vertexBaseEndpoint = apiEndpoint ?? `${region}-${API_BASE_PATH}`;
return new api_fetch_client_1.FetchClient(`https://${vertexBaseEndpoint}/${apiVersion}/projects/${project}/locations/${region}`).withHeaders({
"Content-Type": "application/json",
});
}
//# sourceMappingURL=index.js.map