UNPKG

chunk-match

Version:

NodeJS library that semantically chunks text and matches it against a user query using cosine similarity for precise and relevant text retrieval

37 lines (32 loc) 1.32 kB
import { matchChunks } from '../chunk-match.js'; import fs from 'fs'; import { fileURLToPath } from 'url'; import { dirname, join } from 'path'; const __filename = fileURLToPath(import.meta.url); const __dirname = dirname(__filename); const textFiles = ['text-1.txt', 'text-2.txt', 'text-3.txt', 'text-4.txt', 'text-5.txt', 'text-6.txt', 'text-7.txt', 'text-8.txt', 'text-9.txt', 'text-10.txt']; const documents = await Promise.all(textFiles.map(async (textFile) => { let text = await fs.promises.readFile(join(__dirname, textFile), 'utf8'); return { document_name: textFile, document_text: text }; })); const options = { maxResults: 15, minSimilarity: 0.5, chunkingOptions: { maxTokenSize: 500, similarityThreshold: 0.5, dynamicThresholdLowerBound: 0.4, dynamicThresholdUpperBound: 0.8, numSimilaritySentencesLookahead: 3, combineChunks: true, combineChunksSimilarityThreshold: 0.7, onnxEmbeddingModel: "Xenova/all-MiniLM-L6-v2", dtype: "q8", localModelPath: "./models", modelCacheDir: "./models", } }; console.time('matchChunksDuration'); const results = await matchChunks(documents, 'cosine similarity LLM RAG vector embeddings', options); console.log(results); console.timeEnd('matchChunksDuration');