@genkit-ai/vertexai
Version:
Genkit AI framework plugin for Google Cloud Vertex AI APIs including Gemini APIs, Imagen, and more.
256 lines • 8.5 kB
JavaScript
import { z } from "genkit";
import {
embedderRef
} from "genkit/embedder";
import { predictModel } from "./predict.js";
const TaskTypeSchema = z.enum([
"RETRIEVAL_DOCUMENT",
"RETRIEVAL_QUERY",
"SEMANTIC_SIMILARITY",
"CLASSIFICATION",
"CLUSTERING"
]);
const VertexEmbeddingConfigSchema = z.object({
/**
* The `task_type` parameter is defined as the intended downstream application
* to help the model produce better quality embeddings.
**/
taskType: TaskTypeSchema.optional(),
title: z.string().optional(),
location: z.string().optional(),
version: z.string().optional(),
/**
* The `outputDimensionality` parameter allows you to specify the dimensionality of the embedding output.
* By default, the model generates embeddings with 768 dimensions. Models such as
* `text-embedding-004`, `text-embedding-005`, and `text-multilingual-embedding-002`
* allow the output dimensionality to be adjusted between 1 and 768.
* By selecting a smaller output dimensionality, users can save memory and storage space, leading to more efficient computations.
**/
outputDimensionality: z.number().min(1).max(768).optional()
});
function commonRef(name, input) {
return embedderRef({
name: `vertexai/${name}`,
configSchema: VertexEmbeddingConfigSchema,
info: {
dimensions: 768,
label: `Vertex AI - ${name}`,
supports: {
input: input ?? ["text"]
}
}
});
}
const textEmbeddingGecko003 = commonRef("textembedding-gecko@003");
const textEmbedding004 = commonRef("text-embedding-004");
const textEmbedding005 = commonRef("text-embedding-005");
const textEmbeddingGeckoMultilingual001 = commonRef(
"textembedding-gecko-multilingual@001"
);
const textMultilingualEmbedding002 = commonRef(
"text-multilingual-embedding-002"
);
const multimodalEmbedding001 = commonRef("multimodalembedding@001", [
"text",
"image",
"video"
]);
const SUPPORTED_EMBEDDER_MODELS = {
"textembedding-gecko@003": textEmbeddingGecko003,
"text-embedding-004": textEmbedding004,
"text-embedding-005": textEmbedding005,
"textembedding-gecko-multilingual@001": textEmbeddingGeckoMultilingual001,
"text-multilingual-embedding-002": textMultilingualEmbedding002,
"multimodalembedding@001": multimodalEmbedding001
};
function isObject(value) {
return typeof value === "object" && value !== null;
}
function isMultimodalEmbeddingPrediction(value) {
if (!isObject(value)) {
return false;
}
if (!value.textEmbedding && !value.imageEmbedding && !value.videoEmbeddings) {
return false;
}
if (value.textEmbedding && !Array.isArray(value.textEmbedding)) {
return false;
}
if (value.imageEmbedding && !Array.isArray(value.imageEmbedding)) {
return false;
}
if (value.videoEmbeddings && !Array.isArray(value.videoEmbeddings)) {
return false;
}
if (value.videoEmbeddings) {
for (const emb of value.videoEmbeddings) {
if (!isObject(emb)) {
return false;
}
if (!emb.embedding || !Array.isArray(emb.embedding)) {
return false;
}
}
}
return true;
}
function isMultiModal(embedder) {
const input = embedder.info?.supports?.input || "";
return input.includes("text") && input.includes("image") || false;
}
function checkValidDocument(embedder, doc) {
const isTextOnly = doc.text && doc.media.length == 0;
const isSingleMediaOnly = !doc.text && doc.media.length == 1;
if (isMultiModal(embedder)) {
if (embedder.name == "vertexai/multimodalembedding@001") {
if (!isTextOnly && !isSingleMediaOnly) {
throw new Error(
"Documents for multimodalembedding@001 must be either only text or a single media part."
);
}
return true;
}
throw new Error("Unknown multimodal embedder: " + embedder.name);
} else {
throw new Error("Not implemented");
}
}
function defineVertexAIEmbedder(ai, name, client, options) {
const embedder = SUPPORTED_EMBEDDER_MODELS[name];
const predictClients = {};
const predictClientFactory = (config) => {
const requestLocation = config?.location || options.location;
if (!predictClients[requestLocation]) {
predictClients[requestLocation] = predictModel(
client,
{
...options,
location: requestLocation
},
name
);
}
return predictClients[requestLocation];
};
return ai.defineEmbedder(
{
name: embedder.name,
configSchema: embedder.configSchema,
info: embedder.info
},
async (input, options2) => {
const predictClient = predictClientFactory(options2);
const response = await predictClient(
input.map((doc) => {
let instance;
if (isMultiModal(embedder) && checkValidDocument(embedder, doc)) {
instance = {};
if (doc.text) {
instance.text = doc.text;
}
for (var media of doc.media) {
if (isObject(media) && typeof media.url === "string" && typeof media.contentType === "string") {
if (media.contentType?.startsWith("image/")) {
if (media.url.startsWith("http") || media.url.startsWith("gs://")) {
instance.image = {
gcsUri: media.url,
mimeType: media.contentType
};
} else {
instance.image = {
bytesBase64Encoded: media.url,
mimeType: media.contentType
};
}
} else if (media.contentType.startsWith("video/")) {
if (media.url.startsWith("http") || media.url.startsWith("gs://")) {
instance.video = {
gcsUri: media.url
};
} else {
instance.video = {
bytesBase64Encoded: media.url
};
}
if (instance.video && doc.metadata && doc.metadata.videoSegmentConfig) {
instance.video.videoSegmentConfig = doc.metadata.videoSegmentConfig;
}
} else {
throw new Error(
`Unsupported contentType: '${media.contentType}`
);
}
} else {
throw new Error("Invalid media specified.");
}
}
} else {
instance = {
content: doc.text,
task_type: options2?.taskType,
title: options2?.title
};
}
return instance;
}),
{ outputDimensionality: options2?.outputDimensionality }
);
return {
embeddings: response.predictions.map((p) => {
if (isMultimodalEmbeddingPrediction(p)) {
const eArray = [];
if (p.imageEmbedding?.length) {
const imageResult = {
embedding: p.imageEmbedding,
metadata: { embedType: "imageEmbedding" }
};
eArray.push(imageResult);
}
if (p.textEmbedding?.length) {
const textResult = {
embedding: p.textEmbedding,
metadata: { embedType: "textEmbedding" }
};
eArray.push(textResult);
}
if (p.videoEmbeddings?.length) {
for (const ve of p.videoEmbeddings) {
if (ve.embedding?.length) {
const { embedding, ...metadata } = ve;
metadata.embedType = "videoEmbedding";
const videoResult = {
embedding,
metadata
};
eArray.push(videoResult);
}
}
}
return eArray;
} else {
return [
{
embedding: p.embeddings.values
}
];
}
}).reduce((accumulator, value) => {
return accumulator.concat(value);
}, [])
};
}
);
}
export {
SUPPORTED_EMBEDDER_MODELS,
TaskTypeSchema,
VertexEmbeddingConfigSchema,
defineVertexAIEmbedder,
multimodalEmbedding001,
textEmbedding004,
textEmbedding005,
textEmbeddingGecko003,
textEmbeddingGeckoMultilingual001,
textMultilingualEmbedding002
};
//# sourceMappingURL=embedder.mjs.map