UNPKG

@genkit-ai/vertexai

Version:

Genkit AI framework plugin for Google Cloud Vertex AI APIs including Gemini APIs, Imagen, and more.

1 lines 18.3 kB
{"version":3,"sources":["../src/embedder.ts"],"sourcesContent":["/**\n * Copyright 2024 Google LLC\n *\n * Licensed under the Apache License, Version 2.0 (the \"License\");\n * you may not use this file except in compliance with the License.\n * You may obtain a copy of the License at\n *\n * http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\nimport { Document, Genkit, z } from 'genkit';\nimport {\n EmbedderAction,\n EmbedderReference,\n embedderRef,\n} from 'genkit/embedder';\nimport { GoogleAuth } from 'google-auth-library';\nimport { PluginOptions } from './common/types.js';\nimport { PredictClient, predictModel } from './predict.js';\n\nexport const TaskTypeSchema = z.enum([\n 'RETRIEVAL_DOCUMENT',\n 'RETRIEVAL_QUERY',\n 'SEMANTIC_SIMILARITY',\n 'CLASSIFICATION',\n 'CLUSTERING',\n]);\n\nexport type TaskType = z.infer<typeof TaskTypeSchema>;\n\nexport const VertexEmbeddingConfigSchema = z.object({\n /**\n * The `task_type` parameter is defined as the intended downstream application\n * to help the model produce better quality embeddings.\n **/\n taskType: TaskTypeSchema.optional(),\n title: z.string().optional(),\n location: z.string().optional(),\n version: z.string().optional(),\n /**\n * The `outputDimensionality` parameter allows you to specify the dimensionality of the embedding output.\n * By default, the model generates embeddings with 768 dimensions. Models such as\n * `text-embedding-004`, `text-embedding-005`, and `text-multilingual-embedding-002`\n * allow the output dimensionality to be adjusted between 1 and 768.\n * By selecting a smaller output dimensionality, users can save memory and storage space, leading to more efficient computations.\n **/\n outputDimensionality: z.number().min(1).max(768).optional(),\n});\n\nexport type VertexEmbeddingConfig = z.infer<typeof VertexEmbeddingConfigSchema>;\n\ntype InputType = 'text' | 'image' | 'video';\n\nfunction commonRef(\n name: string,\n input?: InputType[]\n): EmbedderReference<typeof VertexEmbeddingConfigSchema> {\n return embedderRef({\n name: `vertexai/${name}`,\n configSchema: VertexEmbeddingConfigSchema,\n info: {\n dimensions: 768,\n label: `Vertex AI - ${name}`,\n supports: {\n input: input ?? ['text'],\n },\n },\n });\n}\n\nexport const textEmbeddingGecko003 = commonRef('textembedding-gecko@003');\nexport const textEmbedding004 = commonRef('text-embedding-004');\nexport const textEmbedding005 = commonRef('text-embedding-005');\nexport const textEmbeddingGeckoMultilingual001 = commonRef(\n 'textembedding-gecko-multilingual@001'\n);\nexport const textMultilingualEmbedding002 = commonRef(\n 'text-multilingual-embedding-002'\n);\nexport const multimodalEmbedding001 = commonRef('multimodalembedding@001', [\n 'text',\n 'image',\n 'video',\n]);\n\nexport const SUPPORTED_EMBEDDER_MODELS: Record<string, EmbedderReference> = {\n 'textembedding-gecko@003': textEmbeddingGecko003,\n 'text-embedding-004': textEmbedding004,\n 'text-embedding-005': textEmbedding005,\n 'textembedding-gecko-multilingual@001': textEmbeddingGeckoMultilingual001,\n 'text-multilingual-embedding-002': textMultilingualEmbedding002,\n 'multimodalembedding@001': multimodalEmbedding001,\n};\n\n// https://cloud.google.com/vertex-ai/generative-ai/docs/model-reference/multimodal-embeddings-api#request_body\ninterface MultimodalEmbeddingInstance {\n text?: string;\n image?: {\n // Union field can only be one of the following:\n bytesBase64Encoded?: string;\n gcsUri?: string;\n // End of list of possible types for union field.\n mimeType?: string;\n };\n video?: {\n // Union field can only be one of the following:\n bytesBase64Encoded?: string;\n gcsUri?: string;\n // End of list of possible types for union field.\n videoSegmentConfig?: {\n startOffsetSec: number;\n endOffsetSec: number;\n intervalSec: number;\n };\n };\n parameters?: {\n dimension: number;\n };\n}\n\ninterface VideoEmbedding {\n startOffsetSec: number;\n endOffsetSec: number;\n embedding: number[];\n}\n\ninterface MultimodalEmbeddingPrediction {\n textEmbedding?: number[];\n imageEmbedding?: number[];\n videoEmbeddings?: VideoEmbedding[];\n}\n\nfunction isObject(value: unknown): value is Record<string, unknown> {\n return typeof value === 'object' && value !== null;\n}\n\nfunction isMultimodalEmbeddingPrediction(\n value: unknown\n): value is MultimodalEmbeddingPrediction {\n if (!isObject(value)) {\n return false;\n }\n if (!value.textEmbedding && !value.imageEmbedding && !value.videoEmbeddings) {\n return false;\n }\n if (value.textEmbedding && !Array.isArray(value.textEmbedding)) {\n return false;\n }\n if (value.imageEmbedding && !Array.isArray(value.imageEmbedding)) {\n return false;\n }\n if (value.videoEmbeddings && !Array.isArray(value.videoEmbeddings)) {\n return false;\n }\n if (value.videoEmbeddings) {\n for (const emb of value.videoEmbeddings as Array<unknown>) {\n if (!isObject(emb)) {\n return false;\n }\n if (!emb.embedding || !Array.isArray(emb.embedding)) {\n return false;\n }\n }\n }\n\n return true;\n}\n\n// https://cloud.google.com/vertex-ai/generative-ai/docs/model-reference/text-embeddings-api#request_body\ninterface TextEmbeddingInstance {\n task_type?: TaskType;\n content: string;\n title?: string;\n}\n\ninterface TextEmbeddingPrediction {\n embeddings: {\n statistics: {\n truncated: boolean;\n token_count: number;\n };\n values: number[];\n };\n}\n\ntype EmbeddingInstance = TextEmbeddingInstance | MultimodalEmbeddingInstance;\ntype EmbeddingPrediction =\n | TextEmbeddingPrediction\n | MultimodalEmbeddingPrediction;\n\nfunction isMultiModal(embedder: EmbedderReference): boolean {\n const input = embedder.info?.supports?.input || '';\n return (input.includes('text') && input.includes('image')) || false;\n}\n\n/**\n * Determines if a document is valid for a particular embedder or not.\n * This is only used for multimodal embedders.\n * @param embedder the embedder name e.g. 'vertexai/multimodalembedding@001'\n * @param doc The document to check\n */\nfunction checkValidDocument(\n embedder: EmbedderReference,\n doc: Document\n): boolean {\n const isTextOnly = doc.text && doc.media.length == 0;\n const isSingleMediaOnly = !doc.text && doc.media.length == 1;\n if (isMultiModal(embedder)) {\n if (embedder.name == 'vertexai/multimodalembedding@001') {\n // We restrict which Document structure can be sent for this embedder because\n // while it could accept multiple text and image and video parts in a single\n // Document, it would return separate embeddings for each of those parts,\n // essentially just batching them. This is not consistent with our \"one\n // embedding per Document\" design. Since the same batching can be achieved by\n // sending multiple Documents with one part each, there seems to be no reason\n // to change the design.\n\n if (!isTextOnly && !isSingleMediaOnly) {\n throw new Error(\n 'Documents for multimodalembedding@001 must be either only text or a single media part.'\n );\n }\n return true;\n }\n throw new Error('Unknown multimodal embedder: ' + embedder.name);\n } else {\n // Not multimodal - unexpected usage.\n // Currently text-only embedders just ignore media.\n throw new Error('Not implemented');\n }\n}\n\ntype EmbeddingResult = {\n embedding: number[];\n metadata?: Record<string, unknown>;\n};\n\nexport function defineVertexAIEmbedder(\n ai: Genkit,\n name: string,\n client: GoogleAuth,\n options: PluginOptions\n): EmbedderAction<any> {\n const embedder = SUPPORTED_EMBEDDER_MODELS[name];\n const predictClients: Record<\n string,\n PredictClient<EmbeddingInstance, EmbeddingPrediction>\n > = {};\n const predictClientFactory = (\n config: VertexEmbeddingConfig\n ): PredictClient<EmbeddingInstance, EmbeddingPrediction> => {\n const requestLocation = config?.location || options.location;\n if (!predictClients[requestLocation]) {\n // TODO: Figure out how to allow different versions while still\n // sharing a single implementation.\n predictClients[requestLocation] = predictModel<\n EmbeddingInstance,\n EmbeddingPrediction\n >(\n client,\n {\n ...options,\n location: requestLocation,\n },\n name\n );\n }\n return predictClients[requestLocation];\n };\n\n return ai.defineEmbedder(\n {\n name: embedder.name,\n configSchema: embedder.configSchema,\n info: embedder.info!,\n },\n async (input, options) => {\n const predictClient = predictClientFactory(options);\n const response = await predictClient(\n input.map((doc: Document) => {\n let instance: EmbeddingInstance;\n if (isMultiModal(embedder) && checkValidDocument(embedder, doc)) {\n instance = {};\n if (doc.text) {\n instance.text = doc.text;\n }\n for (var media of doc.media) {\n if (\n isObject(media) &&\n typeof media.url === 'string' &&\n typeof media.contentType === 'string'\n ) {\n if (media.contentType?.startsWith('image/')) {\n if (\n media.url.startsWith('http') ||\n media.url.startsWith('gs://')\n ) {\n instance.image = {\n gcsUri: media.url,\n mimeType: media.contentType,\n };\n } else {\n instance.image = {\n bytesBase64Encoded: media.url,\n mimeType: media.contentType,\n };\n }\n } else if (media.contentType.startsWith('video/')) {\n if (\n media.url.startsWith('http') ||\n media.url.startsWith('gs://')\n ) {\n instance.video = {\n gcsUri: media.url,\n };\n } else {\n instance.video = {\n bytesBase64Encoded: media.url,\n };\n }\n if (\n instance.video &&\n doc.metadata &&\n doc.metadata.videoSegmentConfig\n ) {\n instance.video.videoSegmentConfig =\n doc.metadata.videoSegmentConfig;\n }\n } else {\n throw new Error(\n `Unsupported contentType: '${media.contentType}`\n );\n }\n } else {\n // It needs to be a {url:string, contentType:string} object.\n throw new Error('Invalid media specified.');\n }\n }\n } else {\n // Text only embedder\n instance = {\n content: doc.text,\n task_type: options?.taskType,\n title: options?.title,\n };\n }\n return instance;\n }),\n { outputDimensionality: options?.outputDimensionality }\n );\n return {\n embeddings: response.predictions\n .map((p: EmbeddingPrediction) => {\n if (isMultimodalEmbeddingPrediction(p)) {\n const eArray: EmbeddingResult[] = [];\n if (p.imageEmbedding?.length) {\n const imageResult: EmbeddingResult = {\n embedding: p.imageEmbedding,\n metadata: { embedType: 'imageEmbedding' },\n };\n eArray.push(imageResult);\n }\n if (p.textEmbedding?.length) {\n const textResult: EmbeddingResult = {\n embedding: p.textEmbedding,\n metadata: { embedType: 'textEmbedding' },\n };\n eArray.push(textResult);\n }\n if (p.videoEmbeddings?.length) {\n for (const ve of p.videoEmbeddings) {\n if (ve.embedding?.length) {\n const { embedding, ...metadata } = ve;\n (metadata as Record<string, unknown>).embedType =\n 'videoEmbedding';\n const videoResult: EmbeddingResult = {\n embedding,\n metadata,\n };\n eArray.push(videoResult);\n }\n }\n }\n return eArray;\n } else {\n return [\n {\n embedding: p.embeddings.values,\n },\n ];\n }\n })\n .reduce((accumulator, value) => {\n return accumulator.concat(value);\n }, []),\n };\n }\n );\n}\n"],"mappings":"AAgBA,SAA2B,SAAS;AACpC;AAAA,EAGE;AAAA,OACK;AAGP,SAAwB,oBAAoB;AAErC,MAAM,iBAAiB,EAAE,KAAK;AAAA,EACnC;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AACF,CAAC;AAIM,MAAM,8BAA8B,EAAE,OAAO;AAAA;AAAA;AAAA;AAAA;AAAA,EAKlD,UAAU,eAAe,SAAS;AAAA,EAClC,OAAO,EAAE,OAAO,EAAE,SAAS;AAAA,EAC3B,UAAU,EAAE,OAAO,EAAE,SAAS;AAAA,EAC9B,SAAS,EAAE,OAAO,EAAE,SAAS;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAQ7B,sBAAsB,EAAE,OAAO,EAAE,IAAI,CAAC,EAAE,IAAI,GAAG,EAAE,SAAS;AAC5D,CAAC;AAMD,SAAS,UACP,MACA,OACuD;AACvD,SAAO,YAAY;AAAA,IACjB,MAAM,YAAY,IAAI;AAAA,IACtB,cAAc;AAAA,IACd,MAAM;AAAA,MACJ,YAAY;AAAA,MACZ,OAAO,eAAe,IAAI;AAAA,MAC1B,UAAU;AAAA,QACR,OAAO,SAAS,CAAC,MAAM;AAAA,MACzB;AAAA,IACF;AAAA,EACF,CAAC;AACH;AAEO,MAAM,wBAAwB,UAAU,yBAAyB;AACjE,MAAM,mBAAmB,UAAU,oBAAoB;AACvD,MAAM,mBAAmB,UAAU,oBAAoB;AACvD,MAAM,oCAAoC;AAAA,EAC/C;AACF;AACO,MAAM,+BAA+B;AAAA,EAC1C;AACF;AACO,MAAM,yBAAyB,UAAU,2BAA2B;AAAA,EACzE;AAAA,EACA;AAAA,EACA;AACF,CAAC;AAEM,MAAM,4BAA+D;AAAA,EAC1E,2BAA2B;AAAA,EAC3B,sBAAsB;AAAA,EACtB,sBAAsB;AAAA,EACtB,wCAAwC;AAAA,EACxC,mCAAmC;AAAA,EACnC,2BAA2B;AAC7B;AAwCA,SAAS,SAAS,OAAkD;AAClE,SAAO,OAAO,UAAU,YAAY,UAAU;AAChD;AAEA,SAAS,gCACP,OACwC;AACxC,MAAI,CAAC,SAAS,KAAK,GAAG;AACpB,WAAO;AAAA,EACT;AACA,MAAI,CAAC,MAAM,iBAAiB,CAAC,MAAM,kBAAkB,CAAC,MAAM,iBAAiB;AAC3E,WAAO;AAAA,EACT;AACA,MAAI,MAAM,iBAAiB,CAAC,MAAM,QAAQ,MAAM,aAAa,GAAG;AAC9D,WAAO;AAAA,EACT;AACA,MAAI,MAAM,kBAAkB,CAAC,MAAM,QAAQ,MAAM,cAAc,GAAG;AAChE,WAAO;AAAA,EACT;AACA,MAAI,MAAM,mBAAmB,CAAC,MAAM,QAAQ,MAAM,eAAe,GAAG;AAClE,WAAO;AAAA,EACT;AACA,MAAI,MAAM,iBAAiB;AACzB,eAAW,OAAO,MAAM,iBAAmC;AACzD,UAAI,CAAC,SAAS,GAAG,GAAG;AAClB,eAAO;AAAA,MACT;AACA,UAAI,CAAC,IAAI,aAAa,CAAC,MAAM,QAAQ,IAAI,SAAS,GAAG;AACnD,eAAO;AAAA,MACT;AAAA,IACF;AAAA,EACF;AAEA,SAAO;AACT;AAwBA,SAAS,aAAa,UAAsC;AAC1D,QAAM,QAAQ,SAAS,MAAM,UAAU,SAAS;AAChD,SAAQ,MAAM,SAAS,MAAM,KAAK,MAAM,SAAS,OAAO,KAAM;AAChE;AAQA,SAAS,mBACP,UACA,KACS;AACT,QAAM,aAAa,IAAI,QAAQ,IAAI,MAAM,UAAU;AACnD,QAAM,oBAAoB,CAAC,IAAI,QAAQ,IAAI,MAAM,UAAU;AAC3D,MAAI,aAAa,QAAQ,GAAG;AAC1B,QAAI,SAAS,QAAQ,oCAAoC;AASvD,UAAI,CAAC,cAAc,CAAC,mBAAmB;AACrC,cAAM,IAAI;AAAA,UACR;AAAA,QACF;AAAA,MACF;AACA,aAAO;AAAA,IACT;AACA,UAAM,IAAI,MAAM,kCAAkC,SAAS,IAAI;AAAA,EACjE,OAAO;AAGL,UAAM,IAAI,MAAM,iBAAiB;AAAA,EACnC;AACF;AAOO,SAAS,uBACd,IACA,MACA,QACA,SACqB;AACrB,QAAM,WAAW,0BAA0B,IAAI;AAC/C,QAAM,iBAGF,CAAC;AACL,QAAM,uBAAuB,CAC3B,WAC0D;AAC1D,UAAM,kBAAkB,QAAQ,YAAY,QAAQ;AACpD,QAAI,CAAC,eAAe,eAAe,GAAG;AAGpC,qBAAe,eAAe,IAAI;AAAA,QAIhC;AAAA,QACA;AAAA,UACE,GAAG;AAAA,UACH,UAAU;AAAA,QACZ;AAAA,QACA;AAAA,MACF;AAAA,IACF;AACA,WAAO,eAAe,eAAe;AAAA,EACvC;AAEA,SAAO,GAAG;AAAA,IACR;AAAA,MACE,MAAM,SAAS;AAAA,MACf,cAAc,SAAS;AAAA,MACvB,MAAM,SAAS;AAAA,IACjB;AAAA,IACA,OAAO,OAAOA,aAAY;AACxB,YAAM,gBAAgB,qBAAqBA,QAAO;AAClD,YAAM,WAAW,MAAM;AAAA,QACrB,MAAM,IAAI,CAAC,QAAkB;AAC3B,cAAI;AACJ,cAAI,aAAa,QAAQ,KAAK,mBAAmB,UAAU,GAAG,GAAG;AAC/D,uBAAW,CAAC;AACZ,gBAAI,IAAI,MAAM;AACZ,uBAAS,OAAO,IAAI;AAAA,YACtB;AACA,qBAAS,SAAS,IAAI,OAAO;AAC3B,kBACE,SAAS,KAAK,KACd,OAAO,MAAM,QAAQ,YACrB,OAAO,MAAM,gBAAgB,UAC7B;AACA,oBAAI,MAAM,aAAa,WAAW,QAAQ,GAAG;AAC3C,sBACE,MAAM,IAAI,WAAW,MAAM,KAC3B,MAAM,IAAI,WAAW,OAAO,GAC5B;AACA,6BAAS,QAAQ;AAAA,sBACf,QAAQ,MAAM;AAAA,sBACd,UAAU,MAAM;AAAA,oBAClB;AAAA,kBACF,OAAO;AACL,6BAAS,QAAQ;AAAA,sBACf,oBAAoB,MAAM;AAAA,sBAC1B,UAAU,MAAM;AAAA,oBAClB;AAAA,kBACF;AAAA,gBACF,WAAW,MAAM,YAAY,WAAW,QAAQ,GAAG;AACjD,sBACE,MAAM,IAAI,WAAW,MAAM,KAC3B,MAAM,IAAI,WAAW,OAAO,GAC5B;AACA,6BAAS,QAAQ;AAAA,sBACf,QAAQ,MAAM;AAAA,oBAChB;AAAA,kBACF,OAAO;AACL,6BAAS,QAAQ;AAAA,sBACf,oBAAoB,MAAM;AAAA,oBAC5B;AAAA,kBACF;AACA,sBACE,SAAS,SACT,IAAI,YACJ,IAAI,SAAS,oBACb;AACA,6BAAS,MAAM,qBACb,IAAI,SAAS;AAAA,kBACjB;AAAA,gBACF,OAAO;AACL,wBAAM,IAAI;AAAA,oBACR,6BAA6B,MAAM,WAAW;AAAA,kBAChD;AAAA,gBACF;AAAA,cACF,OAAO;AAEL,sBAAM,IAAI,MAAM,0BAA0B;AAAA,cAC5C;AAAA,YACF;AAAA,UACF,OAAO;AAEL,uBAAW;AAAA,cACT,SAAS,IAAI;AAAA,cACb,WAAWA,UAAS;AAAA,cACpB,OAAOA,UAAS;AAAA,YAClB;AAAA,UACF;AACA,iBAAO;AAAA,QACT,CAAC;AAAA,QACD,EAAE,sBAAsBA,UAAS,qBAAqB;AAAA,MACxD;AACA,aAAO;AAAA,QACL,YAAY,SAAS,YAClB,IAAI,CAAC,MAA2B;AAC/B,cAAI,gCAAgC,CAAC,GAAG;AACtC,kBAAM,SAA4B,CAAC;AACnC,gBAAI,EAAE,gBAAgB,QAAQ;AAC5B,oBAAM,cAA+B;AAAA,gBACnC,WAAW,EAAE;AAAA,gBACb,UAAU,EAAE,WAAW,iBAAiB;AAAA,cAC1C;AACA,qBAAO,KAAK,WAAW;AAAA,YACzB;AACA,gBAAI,EAAE,eAAe,QAAQ;AAC3B,oBAAM,aAA8B;AAAA,gBAClC,WAAW,EAAE;AAAA,gBACb,UAAU,EAAE,WAAW,gBAAgB;AAAA,cACzC;AACA,qBAAO,KAAK,UAAU;AAAA,YACxB;AACA,gBAAI,EAAE,iBAAiB,QAAQ;AAC7B,yBAAW,MAAM,EAAE,iBAAiB;AAClC,oBAAI,GAAG,WAAW,QAAQ;AACxB,wBAAM,EAAE,WAAW,GAAG,SAAS,IAAI;AACnC,kBAAC,SAAqC,YACpC;AACF,wBAAM,cAA+B;AAAA,oBACnC;AAAA,oBACA;AAAA,kBACF;AACA,yBAAO,KAAK,WAAW;AAAA,gBACzB;AAAA,cACF;AAAA,YACF;AACA,mBAAO;AAAA,UACT,OAAO;AACL,mBAAO;AAAA,cACL;AAAA,gBACE,WAAW,EAAE,WAAW;AAAA,cAC1B;AAAA,YACF;AAAA,UACF;AAAA,QACF,CAAC,EACA,OAAO,CAAC,aAAa,UAAU;AAC9B,iBAAO,YAAY,OAAO,KAAK;AAAA,QACjC,GAAG,CAAC,CAAC;AAAA,MACT;AAAA,IACF;AAAA,EACF;AACF;","names":["options"]}