@genkit-ai/dev-local-vectorstore
Version:
Genkit AI framework plugin for temporary local vector database.
1 lines • 9.23 kB
Source Map (JSON)
{"version":3,"sources":["../src/index.ts"],"sourcesContent":["/**\n * Copyright 2024 Google LLC\n *\n * Licensed under the Apache License, Version 2.0 (the \"License\");\n * you may not use this file except in compliance with the License.\n * You may obtain a copy of the License at\n *\n * http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\nimport similarity from 'compute-cosine-similarity';\nimport * as fs from 'fs';\nimport { z, type Embedding, type Genkit } from 'genkit';\nimport type { EmbedderArgument } from 'genkit/embedder';\nimport { genkitPlugin, type GenkitPlugin } from 'genkit/plugin';\nimport {\n CommonRetrieverOptionsSchema,\n Document,\n indexerRef,\n retrieverRef,\n type DocumentData,\n} from 'genkit/retriever';\nimport { Md5 } from 'ts-md5';\n\nconst _LOCAL_FILESTORE = '__db_{INDEX_NAME}.json';\n\ninterface DbValue {\n doc: DocumentData;\n embedding: Embedding;\n}\n\nfunction loadFilestore(indexName: string) {\n let existingData = {};\n const indexFileName = _LOCAL_FILESTORE.replace('{INDEX_NAME}', indexName);\n if (fs.existsSync(indexFileName)) {\n existingData = JSON.parse(fs.readFileSync(indexFileName).toString());\n }\n return existingData;\n}\n\nfunction addDocument(\n embedding: Embedding,\n doc: Document,\n contents: Record<string, DbValue>\n) {\n const id = Md5.hashStr(JSON.stringify(doc));\n if (!(id in contents)) {\n contents[id] = { doc, embedding };\n } else {\n console.debug(`Skipping ${id} since it is already present`);\n }\n}\n\ninterface Params<EmbedderCustomOptions extends z.ZodTypeAny> {\n indexName: string;\n embedder: EmbedderArgument<EmbedderCustomOptions>;\n embedderOptions?: z.infer<EmbedderCustomOptions>;\n}\n\n/**\n * Local file-based vectorstore plugin that provides retriever and indexer.\n *\n * NOT INTENDED FOR USE IN PRODUCTION\n */\nexport function devLocalVectorstore<EmbedderCustomOptions extends z.ZodTypeAny>(\n params: Params<EmbedderCustomOptions>[]\n): GenkitPlugin {\n return genkitPlugin('devLocalVectorstore', async (ai) => {\n params.map((p) => configureDevLocalRetriever(ai, p));\n params.map((p) => configureDevLocalIndexer(ai, p));\n });\n}\n\nexport default devLocalVectorstore;\n\n/**\n * Local file-based vectorstore retriever reference\n */\nexport function devLocalRetrieverRef(indexName: string) {\n return retrieverRef({\n name: `devLocalVectorstore/${indexName}`,\n info: {\n label: `Local file-based Retriever - ${indexName}`,\n },\n configSchema: CommonRetrieverOptionsSchema.optional(),\n });\n}\n\n/**\n * Local file-based indexer reference\n */\nexport function devLocalIndexerRef(indexName: string) {\n return indexerRef({\n name: `devLocalVectorstore/${indexName}`,\n info: {\n label: `Local file-based Indexer - ${indexName}`,\n },\n configSchema: z.null().optional(),\n });\n}\n\nasync function importDocumentsToLocalVectorstore<\n EmbedderCustomOptions extends z.ZodTypeAny,\n>(\n ai: Genkit,\n params: {\n indexName: string;\n docs: Array<Document>;\n embedder: EmbedderArgument<EmbedderCustomOptions>;\n embedderOptions?: z.infer<EmbedderCustomOptions>;\n }\n) {\n const { docs, embedder, embedderOptions } = { ...params };\n const data = loadFilestore(params.indexName);\n\n await Promise.all(\n docs.map(async (doc) => {\n const embeddings = await ai.embed({\n embedder,\n content: doc,\n options: embedderOptions,\n });\n const embeddingDocs = doc.getEmbeddingDocuments(embeddings);\n for (const i in embeddingDocs) {\n addDocument(embeddings[i], embeddingDocs[i], data);\n }\n })\n );\n\n // Update the file\n fs.writeFileSync(\n _LOCAL_FILESTORE.replace('{INDEX_NAME}', params.indexName),\n JSON.stringify(data, null, 2)\n );\n}\n\nasync function getClosestDocuments<\n I extends z.ZodTypeAny,\n EmbedderCustomOptions extends z.ZodTypeAny,\n>(params: {\n queryEmbeddings: Array<number>;\n db: Record<string, DbValue>;\n k: number;\n}): Promise<Document[]> {\n const scoredDocs: { score: number; doc: Document }[] = [];\n // Very dumb way to check for similar docs.\n for (const value of Object.values(params.db)) {\n const thisEmbedding = value.embedding.embedding;\n const score = similarity(params.queryEmbeddings, thisEmbedding) ?? 0;\n scoredDocs.push({\n score,\n doc: new Document(value.doc),\n });\n }\n\n scoredDocs.sort((a, b) => (a.score > b.score ? -1 : 1));\n return scoredDocs.slice(0, params.k).map((o) => o.doc);\n}\n\n/**\n * Configures a local vectorstore retriever\n */\nfunction configureDevLocalRetriever<EmbedderCustomOptions extends z.ZodTypeAny>(\n ai: Genkit,\n params: {\n indexName: string;\n embedder: EmbedderArgument<EmbedderCustomOptions>;\n embedderOptions?: z.infer<EmbedderCustomOptions>;\n }\n) {\n const { embedder, embedderOptions } = params;\n const vectorstore = ai.defineRetriever(\n {\n name: `devLocalVectorstore/${params.indexName}`,\n configSchema: CommonRetrieverOptionsSchema,\n },\n async (content, options) => {\n const db = loadFilestore(params.indexName);\n const embeddings = await ai.embed({\n embedder,\n content,\n options: embedderOptions,\n });\n return {\n documents: await getClosestDocuments({\n k: options?.k ?? 3,\n queryEmbeddings: embeddings[0].embedding,\n db,\n }),\n };\n }\n );\n return vectorstore;\n}\n\n/**\n * Configures a local vectorstore indexer.\n */\nfunction configureDevLocalIndexer<EmbedderCustomOptions extends z.ZodTypeAny>(\n ai: Genkit,\n params: {\n indexName: string;\n embedder: EmbedderArgument<EmbedderCustomOptions>;\n embedderOptions?: z.infer<EmbedderCustomOptions>;\n }\n) {\n const { embedder, embedderOptions } = params;\n const vectorstore = ai.defineIndexer(\n { name: `devLocalVectorstore/${params.indexName}` },\n async (docs) => {\n await importDocumentsToLocalVectorstore(ai, {\n indexName: params.indexName,\n docs,\n embedder,\n embedderOptions: embedderOptions,\n });\n }\n );\n return vectorstore;\n}\n"],"mappings":"AAgBA,OAAO,gBAAgB;AACvB,YAAY,QAAQ;AACpB,SAAS,SAAsC;AAE/C,SAAS,oBAAuC;AAChD;AAAA,EACE;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,OAEK;AACP,SAAS,WAAW;AAEpB,MAAM,mBAAmB;AAOzB,SAAS,cAAc,WAAmB;AACxC,MAAI,eAAe,CAAC;AACpB,QAAM,gBAAgB,iBAAiB,QAAQ,gBAAgB,SAAS;AACxE,MAAI,GAAG,WAAW,aAAa,GAAG;AAChC,mBAAe,KAAK,MAAM,GAAG,aAAa,aAAa,EAAE,SAAS,CAAC;AAAA,EACrE;AACA,SAAO;AACT;AAEA,SAAS,YACP,WACA,KACA,UACA;AACA,QAAM,KAAK,IAAI,QAAQ,KAAK,UAAU,GAAG,CAAC;AAC1C,MAAI,EAAE,MAAM,WAAW;AACrB,aAAS,EAAE,IAAI,EAAE,KAAK,UAAU;AAAA,EAClC,OAAO;AACL,YAAQ,MAAM,YAAY,EAAE,8BAA8B;AAAA,EAC5D;AACF;AAaO,SAAS,oBACd,QACc;AACd,SAAO,aAAa,uBAAuB,OAAO,OAAO;AACvD,WAAO,IAAI,CAAC,MAAM,2BAA2B,IAAI,CAAC,CAAC;AACnD,WAAO,IAAI,CAAC,MAAM,yBAAyB,IAAI,CAAC,CAAC;AAAA,EACnD,CAAC;AACH;AAEA,IAAO,gBAAQ;AAKR,SAAS,qBAAqB,WAAmB;AACtD,SAAO,aAAa;AAAA,IAClB,MAAM,uBAAuB,SAAS;AAAA,IACtC,MAAM;AAAA,MACJ,OAAO,gCAAgC,SAAS;AAAA,IAClD;AAAA,IACA,cAAc,6BAA6B,SAAS;AAAA,EACtD,CAAC;AACH;AAKO,SAAS,mBAAmB,WAAmB;AACpD,SAAO,WAAW;AAAA,IAChB,MAAM,uBAAuB,SAAS;AAAA,IACtC,MAAM;AAAA,MACJ,OAAO,8BAA8B,SAAS;AAAA,IAChD;AAAA,IACA,cAAc,EAAE,KAAK,EAAE,SAAS;AAAA,EAClC,CAAC;AACH;AAEA,eAAe,kCAGb,IACA,QAMA;AACA,QAAM,EAAE,MAAM,UAAU,gBAAgB,IAAI,EAAE,GAAG,OAAO;AACxD,QAAM,OAAO,cAAc,OAAO,SAAS;AAE3C,QAAM,QAAQ;AAAA,IACZ,KAAK,IAAI,OAAO,QAAQ;AACtB,YAAM,aAAa,MAAM,GAAG,MAAM;AAAA,QAChC;AAAA,QACA,SAAS;AAAA,QACT,SAAS;AAAA,MACX,CAAC;AACD,YAAM,gBAAgB,IAAI,sBAAsB,UAAU;AAC1D,iBAAW,KAAK,eAAe;AAC7B,oBAAY,WAAW,CAAC,GAAG,cAAc,CAAC,GAAG,IAAI;AAAA,MACnD;AAAA,IACF,CAAC;AAAA,EACH;AAGA,KAAG;AAAA,IACD,iBAAiB,QAAQ,gBAAgB,OAAO,SAAS;AAAA,IACzD,KAAK,UAAU,MAAM,MAAM,CAAC;AAAA,EAC9B;AACF;AAEA,eAAe,oBAGb,QAIsB;AACtB,QAAM,aAAiD,CAAC;AAExD,aAAW,SAAS,OAAO,OAAO,OAAO,EAAE,GAAG;AAC5C,UAAM,gBAAgB,MAAM,UAAU;AACtC,UAAM,QAAQ,WAAW,OAAO,iBAAiB,aAAa,KAAK;AACnE,eAAW,KAAK;AAAA,MACd;AAAA,MACA,KAAK,IAAI,SAAS,MAAM,GAAG;AAAA,IAC7B,CAAC;AAAA,EACH;AAEA,aAAW,KAAK,CAAC,GAAG,MAAO,EAAE,QAAQ,EAAE,QAAQ,KAAK,CAAE;AACtD,SAAO,WAAW,MAAM,GAAG,OAAO,CAAC,EAAE,IAAI,CAAC,MAAM,EAAE,GAAG;AACvD;AAKA,SAAS,2BACP,IACA,QAKA;AACA,QAAM,EAAE,UAAU,gBAAgB,IAAI;AACtC,QAAM,cAAc,GAAG;AAAA,IACrB;AAAA,MACE,MAAM,uBAAuB,OAAO,SAAS;AAAA,MAC7C,cAAc;AAAA,IAChB;AAAA,IACA,OAAO,SAAS,YAAY;AAC1B,YAAM,KAAK,cAAc,OAAO,SAAS;AACzC,YAAM,aAAa,MAAM,GAAG,MAAM;AAAA,QAChC;AAAA,QACA;AAAA,QACA,SAAS;AAAA,MACX,CAAC;AACD,aAAO;AAAA,QACL,WAAW,MAAM,oBAAoB;AAAA,UACnC,GAAG,SAAS,KAAK;AAAA,UACjB,iBAAiB,WAAW,CAAC,EAAE;AAAA,UAC/B;AAAA,QACF,CAAC;AAAA,MACH;AAAA,IACF;AAAA,EACF;AACA,SAAO;AACT;AAKA,SAAS,yBACP,IACA,QAKA;AACA,QAAM,EAAE,UAAU,gBAAgB,IAAI;AACtC,QAAM,cAAc,GAAG;AAAA,IACrB,EAAE,MAAM,uBAAuB,OAAO,SAAS,GAAG;AAAA,IAClD,OAAO,SAAS;AACd,YAAM,kCAAkC,IAAI;AAAA,QAC1C,WAAW,OAAO;AAAA,QAClB;AAAA,QACA;AAAA,QACA;AAAA,MACF,CAAC;AAAA,IACH;AAAA,EACF;AACA,SAAO;AACT;","names":[]}