UNPKG

genkitx-hnsw

Version:

Firebase Genkit AI framework plugin for HNSW vector database. Get AI response enriched with additional context and knowledge with HNSW Vector Database using RAG Implementation

1 lines 4.87 kB
{"version":3,"sources":["../../src/indexer/index.ts"],"sourcesContent":["/**\n * Copyright 2024 Bloom Labs Inc\n *\n * Licensed under the Apache License, Version 2.0 (the \"License\");\n * you may not use this file except in compliance with the License.\n * You may obtain a copy of the License at\n *\n * http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\nimport { GoogleGenerativeAIEmbeddings } from '@langchain/google-genai';\nimport { glob } from 'glob';\nimport fs from 'fs';\nimport { CharacterTextSplitter } from 'langchain/text_splitter';\nimport { HNSWLib } from 'langchain/vectorstores';\nimport { TaskType } from '@google/generative-ai';\n\nimport { IndexerFlowOptions, PluginOptions } from '../interfaces';\nimport {\n EMBEDDING_MODEL_NAME,\n EMBEDDING_MODEL,\n EMBEDDING_TITLE,\n} from '../constants';\n\nconst getFilesData = (files: string[]): string[] => {\n console.log(\n `Added ${files.length} files to data. Splitting text into chunks...`\n );\n const filesData: string[] = [];\n for (const file of files) {\n filesData.push(fs.readFileSync(file, 'utf-8'));\n }\n return filesData;\n};\n\nconst getFiles = async (input: string): Promise<string[]> => {\n try {\n return glob(input, { ignore: 'node_modules/**' });\n } catch (error) {\n console.error('Error fetching files:', error);\n throw error;\n }\n};\n\nconst getSplitter = (\n chunkSize: number | undefined,\n separator: string | undefined\n) => {\n return new CharacterTextSplitter({\n chunkSize: chunkSize || 12720,\n separator: separator || '\\n',\n });\n};\n\nconst saveVectorStore = async (\n docs: string[],\n apiKey: string | undefined,\n output: string\n) => {\n console.log('Initializing Store...');\n const store = await HNSWLib.fromTexts(\n docs,\n docs.map((_: any, i: any) => ({ id: i })),\n new GoogleGenerativeAIEmbeddings({\n apiKey: apiKey || process.env.GOOGLE_API_KEY,\n model: EMBEDDING_MODEL,\n modelName: EMBEDDING_MODEL_NAME,\n taskType: TaskType.RETRIEVAL_DOCUMENT,\n title: EMBEDDING_TITLE,\n })\n );\n console.log('Saving Vectorstore');\n await store.save(output);\n return `VectorStore saved to ${output}`;\n};\n\nconst getVectorDocument = (\n filesData: string[],\n textSplitter: { splitText: (arg0: any) => any }\n) => {\n let docs: string[] = [];\n for (const d of filesData) {\n const docOutput = textSplitter.splitText(d);\n docs = [...docs, ...docOutput];\n }\n return docs.splice(docs.length - 4, 4);\n};\n\nconst saveVectorIndexer = async (\n flowOptions: IndexerFlowOptions,\n pluginOptions: PluginOptions\n) => {\n const { dataPath, indexOutputPath, chunkSize, separator } = flowOptions;\n const { apiKey } = pluginOptions;\n\n const files: string[] = await getFiles(dataPath);\n const filesData = getFilesData(files);\n const textSplitter = getSplitter(chunkSize, separator);\n const vectorDocument = getVectorDocument(filesData, textSplitter);\n\n return saveVectorStore(vectorDocument, apiKey, indexOutputPath);\n};\n\nexport { saveVectorIndexer };\n"],"mappings":"AAgBA,SAAS,oCAAoC;AAC7C,SAAS,YAAY;AACrB,OAAO,QAAQ;AACf,SAAS,6BAA6B;AACtC,SAAS,eAAe;AACxB,SAAS,gBAAgB;AAGzB;AAAA,EACE;AAAA,EACA;AAAA,EACA;AAAA,OACK;AAEP,MAAM,eAAe,CAAC,UAA8B;AAClD,UAAQ;AAAA,IACN,SAAS,MAAM,MAAM;AAAA,EACvB;AACA,QAAM,YAAsB,CAAC;AAC7B,aAAW,QAAQ,OAAO;AACxB,cAAU,KAAK,GAAG,aAAa,MAAM,OAAO,CAAC;AAAA,EAC/C;AACA,SAAO;AACT;AAEA,MAAM,WAAW,OAAO,UAAqC;AAC3D,MAAI;AACF,WAAO,KAAK,OAAO,EAAE,QAAQ,kBAAkB,CAAC;AAAA,EAClD,SAAS,OAAO;AACd,YAAQ,MAAM,yBAAyB,KAAK;AAC5C,UAAM;AAAA,EACR;AACF;AAEA,MAAM,cAAc,CAClB,WACA,cACG;AACH,SAAO,IAAI,sBAAsB;AAAA,IAC/B,WAAW,aAAa;AAAA,IACxB,WAAW,aAAa;AAAA,EAC1B,CAAC;AACH;AAEA,MAAM,kBAAkB,OACtB,MACA,QACA,WACG;AACH,UAAQ,IAAI,uBAAuB;AACnC,QAAM,QAAQ,MAAM,QAAQ;AAAA,IAC1B;AAAA,IACA,KAAK,IAAI,CAAC,GAAQ,OAAY,EAAE,IAAI,EAAE,EAAE;AAAA,IACxC,IAAI,6BAA6B;AAAA,MAC/B,QAAQ,UAAU,QAAQ,IAAI;AAAA,MAC9B,OAAO;AAAA,MACP,WAAW;AAAA,MACX,UAAU,SAAS;AAAA,MACnB,OAAO;AAAA,IACT,CAAC;AAAA,EACH;AACA,UAAQ,IAAI,oBAAoB;AAChC,QAAM,MAAM,KAAK,MAAM;AACvB,SAAO,wBAAwB,MAAM;AACvC;AAEA,MAAM,oBAAoB,CACxB,WACA,iBACG;AACH,MAAI,OAAiB,CAAC;AACtB,aAAW,KAAK,WAAW;AACzB,UAAM,YAAY,aAAa,UAAU,CAAC;AAC1C,WAAO,CAAC,GAAG,MAAM,GAAG,SAAS;AAAA,EAC/B;AACA,SAAO,KAAK,OAAO,KAAK,SAAS,GAAG,CAAC;AACvC;AAEA,MAAM,oBAAoB,OACxB,aACA,kBACG;AACH,QAAM,EAAE,UAAU,iBAAiB,WAAW,UAAU,IAAI;AAC5D,QAAM,EAAE,OAAO,IAAI;AAEnB,QAAM,QAAkB,MAAM,SAAS,QAAQ;AAC/C,QAAM,YAAY,aAAa,KAAK;AACpC,QAAM,eAAe,YAAY,WAAW,SAAS;AACrD,QAAM,iBAAiB,kBAAkB,WAAW,YAAY;AAEhE,SAAO,gBAAgB,gBAAgB,QAAQ,eAAe;AAChE;","names":[]}