UNPKG

genkitx-pinecone

Version:

Genkit AI framework plugin for Pinecone vector database.

1 lines 16.3 kB
{"version":3,"sources":["../src/index.ts"],"sourcesContent":["/**\n * Copyright 2024 Google LLC\n *\n * Licensed under the Apache License, Version 2.0 (the \"License\");\n * you may not use this file except in compliance with the License.\n * You may obtain a copy of the License at\n *\n * http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\nimport {\n Pinecone,\n type CreateIndexOptions,\n type PineconeConfiguration,\n type RecordMetadata,\n} from '@pinecone-database/pinecone';\nimport { z, type Genkit } from 'genkit';\nimport { genkitPlugin, type GenkitPlugin } from 'genkit/plugin';\n\nimport type { EmbedderArgument, Embedding } from 'genkit/embedder';\nimport {\n CommonRetrieverOptionsSchema,\n Document,\n indexerRef,\n retrieverRef,\n} from 'genkit/retriever';\nimport { Md5 } from 'ts-md5';\n\nconst SparseVectorSchema = z\n .object({\n indices: z.number().array(),\n values: z.number().array(),\n })\n .refine(\n (input) => {\n return input.indices.length === input.values.length;\n },\n {\n message: 'Indices and values must be of the same length',\n }\n );\n\nconst PineconeRetrieverOptionsSchema = CommonRetrieverOptionsSchema.extend({\n k: z.number().max(1000),\n namespace: z.string().optional(),\n filter: z.record(z.string(), z.any()).optional(),\n // includeValues is always false\n // includeMetadata is always true\n sparseVector: SparseVectorSchema.optional(),\n});\n\nconst PineconeIndexerOptionsSchema = z.object({\n namespace: z.string().optional(),\n});\n\nconst CONTENT_KEY = '_content';\nconst CONTENT_TYPE = '_contentType';\n\n/**\n * pineconeRetrieverRef function creates a retriever for Pinecone.\n * @param params The params for the new Pinecone retriever\n * @param params.indexId The indexId for the Pinecone retriever\n * @param params.displayName A display name for the retriever.\nIf not specified, the default label will be `Pinecone - <indexId>`\n * @returns A reference to a Pinecone retriever.\n */\nexport const pineconeRetrieverRef = (params: {\n indexId: string;\n displayName?: string;\n}) => {\n return retrieverRef({\n name: `pinecone/${params.indexId}`,\n info: {\n label: params.displayName ?? `Pinecone - ${params.indexId}`,\n },\n configSchema: PineconeRetrieverOptionsSchema,\n });\n};\n\n/**\n * pineconeIndexerRef function creates an indexer for Pinecone.\n * @param params The params for the new Pinecone indexer.\n * @param params.indexId The indexId for the Pinecone indexer.\n * @param params.displayName A display name for the indexer.\nIf not specified, the default label will be `Pinecone - <indexId>`\n * @returns A reference to a Pinecone indexer.\n */\nexport const pineconeIndexerRef = (params: {\n indexId: string;\n displayName?: string;\n}) => {\n return indexerRef({\n name: `pinecone/${params.indexId}`,\n info: {\n label: params.displayName ?? `Pinecone - ${params.indexId}`,\n },\n configSchema: PineconeIndexerOptionsSchema.optional(),\n });\n};\n\n/**\n * Pinecone plugin that provides a Pinecone retriever and indexer\n * @param params An array of params to set up Pinecone retrievers and indexers\n * @param params.clientParams PineconeConfiguration containing the\nPINECONE_API_KEY. If not set, the PINECONE_API_KEY environment variable will\nbe used instead.\n * @param params.indexId The name of the index\n * @param params.embedder The embedder to use for the indexer and retriever\n * @param params.embedderOptions Options to customize the embedder\n * @returns The Pinecone Genkit plugin\n */\nexport function pinecone<EmbedderCustomOptions extends z.ZodTypeAny>(\n params: {\n clientParams?: PineconeConfiguration;\n indexId: string;\n contentKey?: string;\n embedder: EmbedderArgument<EmbedderCustomOptions>;\n embedderOptions?: z.infer<EmbedderCustomOptions>;\n }[]\n): GenkitPlugin {\n return genkitPlugin('pinecone', async (ai: Genkit) => {\n params.map((i) => configurePineconeRetriever(ai, i));\n params.map((i) => configurePineconeIndexer(ai, i));\n });\n}\n\nexport default pinecone;\n\n/**\n * Configures a Pinecone retriever.\n * @param ai A Genkit instance\n * @param params The params for the retriever\n * @param params.indexId The name of the retriever\n * @param params.clientParams PineconeConfiguration containing the\nPINECONE_API_KEY. If not set, the PINECONE_API_KEY environment variable will\nbe used instead.\n * @param params.textKey Deprecated. Please use contentKey.\n * @param params.contentKey The metadata key that contains the\ncontent. If not specified, the value '_content' is used by default.\n * @param params.embedder The embedder to use for the retriever\n * @param params.embedderOptions Options to customize the embedder\n * @returns A Pinecone retriever\n */\nexport function configurePineconeRetriever<\n EmbedderCustomOptions extends z.ZodTypeAny,\n>(\n ai: Genkit,\n params: {\n indexId: string;\n clientParams?: PineconeConfiguration;\n /**\n * @deprecated use contentKey instead.\n */\n textKey?: string;\n contentKey?: string;\n embedder: EmbedderArgument<EmbedderCustomOptions>;\n embedderOptions?: z.infer<EmbedderCustomOptions>;\n }\n) {\n const { indexId, embedder, embedderOptions } = {\n ...params,\n };\n const pineconeConfig = params.clientParams ?? getDefaultConfig();\n const contentKey = params.contentKey ?? params.textKey ?? CONTENT_KEY;\n const pinecone = new Pinecone(pineconeConfig);\n const index = pinecone.index(indexId);\n\n return ai.defineRetriever(\n {\n name: `pinecone/${params.indexId}`,\n configSchema: PineconeRetrieverOptionsSchema,\n },\n async (content, options) => {\n const queryEmbeddings = await ai.embed({\n embedder,\n content,\n options: embedderOptions,\n });\n const scopedIndex = !!options.namespace\n ? index.namespace(options.namespace)\n : index;\n const response = await scopedIndex.query({\n topK: options.k,\n vector: queryEmbeddings[0].embedding,\n includeValues: false,\n includeMetadata: true,\n });\n return {\n documents: response.matches\n .map((m) => m.metadata)\n .filter((m): m is RecordMetadata => !!m)\n .map((m) => {\n const metadata = m;\n return Document.fromData(\n metadata[contentKey] as string,\n metadata[CONTENT_TYPE] as string,\n metadata.docMetadata\n ? (JSON.parse(metadata.docMetadata as string) as Record<\n string,\n unknown\n >)\n : undefined\n );\n }),\n };\n }\n );\n}\n\n/**\n * Configures a Pinecone indexer.\n * @param ai A Genkit instance\n * @param params The params for the indexer\n * @param params.indexId The name of the indexer\n * @param params.clientParams PineconeConfiguration containing the\nPINECONE_API_KEY. If not set, the PINECONE_API_KEY environment variable will\nbe used instead.\n * @param params.textKey Deprecated. Please use contentKey.\n * @param params.contentKey The metadata key that contains the\ncontent. If not specified, the value '_content' is used by default.\n * @param params.embedder The embedder to use for the retriever\n * @param params.embedderOptions Options to customize the embedder\n * @returns A Genkit indexer\n */\nexport function configurePineconeIndexer<\n EmbedderCustomOptions extends z.ZodTypeAny,\n>(\n ai: Genkit,\n params: {\n indexId: string;\n clientParams?: PineconeConfiguration;\n /**\n * @deprecated use contentKey instead.\n */\n textKey?: string;\n contentKey?: string;\n embedder: EmbedderArgument<EmbedderCustomOptions>;\n embedderOptions?: z.infer<EmbedderCustomOptions>;\n }\n) {\n const { indexId, embedder, embedderOptions } = {\n ...params,\n };\n const pineconeConfig = params.clientParams ?? getDefaultConfig();\n const contentKey = params.contentKey ?? params.textKey ?? CONTENT_KEY;\n const pinecone = new Pinecone(pineconeConfig);\n const index = pinecone.index(indexId);\n\n return ai.defineIndexer(\n {\n name: `pinecone/${params.indexId}`,\n configSchema: PineconeIndexerOptionsSchema.optional(),\n },\n async (docs, options) => {\n const scopedIndex = !!options?.namespace\n ? index.namespace(options.namespace)\n : index;\n\n const embeddings = await Promise.all(\n docs.map((doc) =>\n ai.embed({\n embedder,\n content: doc,\n options: embedderOptions,\n })\n )\n );\n await scopedIndex.upsert(\n embeddings\n .map((value, i) => {\n const doc = docs[i];\n // The array of embeddings for this document\n const docEmbeddings: Embedding[] = value;\n\n // Create one doc per docEmbedding so we can store them 1:1.\n // They should be unique because the embedding metadata is\n // added to the new docs.\n const embeddingDocs = doc.getEmbeddingDocuments(docEmbeddings);\n\n return docEmbeddings.map((docEmbedding, j) => {\n const metadata: RecordMetadata = {\n docMetadata: JSON.stringify(embeddingDocs[j].metadata),\n };\n metadata[contentKey] = embeddingDocs[j].data;\n metadata[CONTENT_TYPE] = embeddingDocs[j].dataType || '';\n const id = Md5.hashStr(JSON.stringify(embeddingDocs[j]));\n return {\n id,\n values: docEmbedding.embedding,\n metadata,\n };\n });\n })\n .reduce((acc, val) => {\n return acc.concat(val);\n }, [])\n );\n }\n );\n}\n\n/**\n * Helper function for creating a Pinecone index.\n * @param params The params for creating a Pinecone index\n * @param params.clientParams The params to initialize Pinecone.\n * @param params.options The options for creating the index.\n * @returns A Pinecone index.\n */\nexport async function createPineconeIndex(params: {\n clientParams?: PineconeConfiguration;\n options: CreateIndexOptions;\n}) {\n const pineconeConfig = params.clientParams ?? getDefaultConfig();\n const pinecone = new Pinecone(pineconeConfig);\n return await pinecone.createIndex(params.options);\n}\n\n/**\n * Helper function to describe a Pinecone index. Use it to check if a newly created index is ready for use.\n * @param params The params for describing a Pinecone index.\n * @param params.clientParams The params to initialize Pinecone.\n * @param params.name The name of the Pinecone index to describe.\n * @return A description of the Pinecone index.\n */\nexport async function describePineconeIndex(params: {\n clientParams?: PineconeConfiguration;\n name: string;\n}) {\n const pineconeConfig = params.clientParams ?? getDefaultConfig();\n const pinecone = new Pinecone(pineconeConfig);\n return await pinecone.describeIndex(params.name);\n}\n\n/**\n * Helper function for deleting pinecone indices.\n * @param params The params for deleting a Pinecone index.\n * @param params.clientParams The params to initialize Pinecone.\n * @param params.name The name of the Pinecone index to delete.\n * @returns a void Promise that is fulfilled when the index has been deleted.\n */\nexport async function deletePineconeIndex(params: {\n clientParams?: PineconeConfiguration;\n name: string;\n}) {\n const pineconeConfig = params.clientParams ?? getDefaultConfig();\n const pinecone = new Pinecone(pineconeConfig);\n return await pinecone.deleteIndex(params.name);\n}\n\nfunction getDefaultConfig() {\n const maybeApiKey = process.env.PINECONE_API_KEY;\n if (!maybeApiKey)\n throw new Error(\n 'Please pass in the API key or set PINECONE_API_KEY environment variable.\\n' +\n 'For more details see https://genkit.dev/docs/plugins/pinecone'\n );\n return { apiKey: maybeApiKey } as PineconeConfiguration;\n}\n"],"mappings":"AAgBA;AAAA,EACE;AAAA,OAIK;AACP,SAAS,SAAsB;AAC/B,SAAS,oBAAuC;AAGhD;AAAA,EACE;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,OACK;AACP,SAAS,WAAW;AAEpB,MAAM,qBAAqB,EACxB,OAAO;AAAA,EACN,SAAS,EAAE,OAAO,EAAE,MAAM;AAAA,EAC1B,QAAQ,EAAE,OAAO,EAAE,MAAM;AAC3B,CAAC,EACA;AAAA,EACC,CAAC,UAAU;AACT,WAAO,MAAM,QAAQ,WAAW,MAAM,OAAO;AAAA,EAC/C;AAAA,EACA;AAAA,IACE,SAAS;AAAA,EACX;AACF;AAEF,MAAM,iCAAiC,6BAA6B,OAAO;AAAA,EACzE,GAAG,EAAE,OAAO,EAAE,IAAI,GAAI;AAAA,EACtB,WAAW,EAAE,OAAO,EAAE,SAAS;AAAA,EAC/B,QAAQ,EAAE,OAAO,EAAE,OAAO,GAAG,EAAE,IAAI,CAAC,EAAE,SAAS;AAAA;AAAA;AAAA,EAG/C,cAAc,mBAAmB,SAAS;AAC5C,CAAC;AAED,MAAM,+BAA+B,EAAE,OAAO;AAAA,EAC5C,WAAW,EAAE,OAAO,EAAE,SAAS;AACjC,CAAC;AAED,MAAM,cAAc;AACpB,MAAM,eAAe;AAUd,MAAM,uBAAuB,CAAC,WAG/B;AACJ,SAAO,aAAa;AAAA,IAClB,MAAM,YAAY,OAAO,OAAO;AAAA,IAChC,MAAM;AAAA,MACJ,OAAO,OAAO,eAAe,cAAc,OAAO,OAAO;AAAA,IAC3D;AAAA,IACA,cAAc;AAAA,EAChB,CAAC;AACH;AAUO,MAAM,qBAAqB,CAAC,WAG7B;AACJ,SAAO,WAAW;AAAA,IAChB,MAAM,YAAY,OAAO,OAAO;AAAA,IAChC,MAAM;AAAA,MACJ,OAAO,OAAO,eAAe,cAAc,OAAO,OAAO;AAAA,IAC3D;AAAA,IACA,cAAc,6BAA6B,SAAS;AAAA,EACtD,CAAC;AACH;AAaO,SAAS,SACd,QAOc;AACd,SAAO,aAAa,YAAY,OAAO,OAAe;AACpD,WAAO,IAAI,CAAC,MAAM,2BAA2B,IAAI,CAAC,CAAC;AACnD,WAAO,IAAI,CAAC,MAAM,yBAAyB,IAAI,CAAC,CAAC;AAAA,EACnD,CAAC;AACH;AAEA,IAAO,gBAAQ;AAiBR,SAAS,2BAGd,IACA,QAWA;AACA,QAAM,EAAE,SAAS,UAAU,gBAAgB,IAAI;AAAA,IAC7C,GAAG;AAAA,EACL;AACA,QAAM,iBAAiB,OAAO,gBAAgB,iBAAiB;AAC/D,QAAM,aAAa,OAAO,cAAc,OAAO,WAAW;AAC1D,QAAMA,YAAW,IAAI,SAAS,cAAc;AAC5C,QAAM,QAAQA,UAAS,MAAM,OAAO;AAEpC,SAAO,GAAG;AAAA,IACR;AAAA,MACE,MAAM,YAAY,OAAO,OAAO;AAAA,MAChC,cAAc;AAAA,IAChB;AAAA,IACA,OAAO,SAAS,YAAY;AAC1B,YAAM,kBAAkB,MAAM,GAAG,MAAM;AAAA,QACrC;AAAA,QACA;AAAA,QACA,SAAS;AAAA,MACX,CAAC;AACD,YAAM,cAAc,CAAC,CAAC,QAAQ,YAC1B,MAAM,UAAU,QAAQ,SAAS,IACjC;AACJ,YAAM,WAAW,MAAM,YAAY,MAAM;AAAA,QACvC,MAAM,QAAQ;AAAA,QACd,QAAQ,gBAAgB,CAAC,EAAE;AAAA,QAC3B,eAAe;AAAA,QACf,iBAAiB;AAAA,MACnB,CAAC;AACD,aAAO;AAAA,QACL,WAAW,SAAS,QACjB,IAAI,CAAC,MAAM,EAAE,QAAQ,EACrB,OAAO,CAAC,MAA2B,CAAC,CAAC,CAAC,EACtC,IAAI,CAAC,MAAM;AACV,gBAAM,WAAW;AACjB,iBAAO,SAAS;AAAA,YACd,SAAS,UAAU;AAAA,YACnB,SAAS,YAAY;AAAA,YACrB,SAAS,cACJ,KAAK,MAAM,SAAS,WAAqB,IAI1C;AAAA,UACN;AAAA,QACF,CAAC;AAAA,MACL;AAAA,IACF;AAAA,EACF;AACF;AAiBO,SAAS,yBAGd,IACA,QAWA;AACA,QAAM,EAAE,SAAS,UAAU,gBAAgB,IAAI;AAAA,IAC7C,GAAG;AAAA,EACL;AACA,QAAM,iBAAiB,OAAO,gBAAgB,iBAAiB;AAC/D,QAAM,aAAa,OAAO,cAAc,OAAO,WAAW;AAC1D,QAAMA,YAAW,IAAI,SAAS,cAAc;AAC5C,QAAM,QAAQA,UAAS,MAAM,OAAO;AAEpC,SAAO,GAAG;AAAA,IACR;AAAA,MACE,MAAM,YAAY,OAAO,OAAO;AAAA,MAChC,cAAc,6BAA6B,SAAS;AAAA,IACtD;AAAA,IACA,OAAO,MAAM,YAAY;AACvB,YAAM,cAAc,CAAC,CAAC,SAAS,YAC3B,MAAM,UAAU,QAAQ,SAAS,IACjC;AAEJ,YAAM,aAAa,MAAM,QAAQ;AAAA,QAC/B,KAAK;AAAA,UAAI,CAAC,QACR,GAAG,MAAM;AAAA,YACP;AAAA,YACA,SAAS;AAAA,YACT,SAAS;AAAA,UACX,CAAC;AAAA,QACH;AAAA,MACF;AACA,YAAM,YAAY;AAAA,QAChB,WACG,IAAI,CAAC,OAAO,MAAM;AACjB,gBAAM,MAAM,KAAK,CAAC;AAElB,gBAAM,gBAA6B;AAKnC,gBAAM,gBAAgB,IAAI,sBAAsB,aAAa;AAE7D,iBAAO,cAAc,IAAI,CAAC,cAAc,MAAM;AAC5C,kBAAM,WAA2B;AAAA,cAC/B,aAAa,KAAK,UAAU,cAAc,CAAC,EAAE,QAAQ;AAAA,YACvD;AACA,qBAAS,UAAU,IAAI,cAAc,CAAC,EAAE;AACxC,qBAAS,YAAY,IAAI,cAAc,CAAC,EAAE,YAAY;AACtD,kBAAM,KAAK,IAAI,QAAQ,KAAK,UAAU,cAAc,CAAC,CAAC,CAAC;AACvD,mBAAO;AAAA,cACL;AAAA,cACA,QAAQ,aAAa;AAAA,cACrB;AAAA,YACF;AAAA,UACF,CAAC;AAAA,QACH,CAAC,EACA,OAAO,CAAC,KAAK,QAAQ;AACpB,iBAAO,IAAI,OAAO,GAAG;AAAA,QACvB,GAAG,CAAC,CAAC;AAAA,MACT;AAAA,IACF;AAAA,EACF;AACF;AASA,eAAsB,oBAAoB,QAGvC;AACD,QAAM,iBAAiB,OAAO,gBAAgB,iBAAiB;AAC/D,QAAMA,YAAW,IAAI,SAAS,cAAc;AAC5C,SAAO,MAAMA,UAAS,YAAY,OAAO,OAAO;AAClD;AASA,eAAsB,sBAAsB,QAGzC;AACD,QAAM,iBAAiB,OAAO,gBAAgB,iBAAiB;AAC/D,QAAMA,YAAW,IAAI,SAAS,cAAc;AAC5C,SAAO,MAAMA,UAAS,cAAc,OAAO,IAAI;AACjD;AASA,eAAsB,oBAAoB,QAGvC;AACD,QAAM,iBAAiB,OAAO,gBAAgB,iBAAiB;AAC/D,QAAMA,YAAW,IAAI,SAAS,cAAc;AAC5C,SAAO,MAAMA,UAAS,YAAY,OAAO,IAAI;AAC/C;AAEA,SAAS,mBAAmB;AAC1B,QAAM,cAAc,QAAQ,IAAI;AAChC,MAAI,CAAC;AACH,UAAM,IAAI;AAAA,MACR;AAAA,IAEF;AACF,SAAO,EAAE,QAAQ,YAAY;AAC/B;","names":["pinecone"]}