UNPKG

generator-begcode

Version:

Spring Boot + Angular/React/Vue in one handy generator

121 lines (120 loc) 5.21 kB
import path from 'path-browserify'; import { normalize, normalizedCosineSimilarity } from './utils.js'; import { LocalDocumentStore } from './LocalDocumentStore.js'; import { getTextFromNextChunks, getTextFromPriorChunks, sortDocumentsByIndex } from '../chunking/utils.js'; export class LocalCollection { uri; embeddingApi; workspace; documentStore; constructor(uri, embeddingApi, workspace) { this.uri = uri; this.embeddingApi = embeddingApi; this.workspace = workspace; this.documentStore = new LocalDocumentStore(this.workspace, uri); } get name() { return path.basename(this.uri); } async add(items, metadatas) { if (!items.length) { return; } const results = await this.embeddingApi.createEmbeddings(items); let idx = 0; for await (const result of results) { const metadata = metadatas && metadatas.length > idx ? metadatas[idx] : undefined; this.documentStore.add({ text: result.input, vector: result.embedding, metadata, }); idx += 1; } } async search(query, limit) { const queryEmbeddingResults = await this.embeddingApi.createEmbeddings(query); const queryVector = queryEmbeddingResults[0].embedding; const normalizedQueryVector = normalize(queryVector); const documents = this.documentStore.list(); const scores = documents.map(document => { const vector = document.vector(); const normalizedVector = normalize(vector); const score = normalizedCosineSimilarity(queryVector, normalizedQueryVector, vector, normalizedVector); return { document, score }; }); const sortedScores = scores.sort((a, b) => b.score - a.score); const results = sortedScores.map(({ document }) => { return document; }); return limit ? results.slice(0, limit) : results; } async *iterativeSearch(queryOrVector) { const queryVector = typeof queryOrVector === 'string' ? (await this.embeddingApi.createEmbeddings(queryOrVector))[0].embedding : queryOrVector; const normalizedQueryVector = normalize(queryVector); const documents = this.documentStore.list(); const scores = documents.map(document => { const vector = document.vector(); const normalizedVector = normalize(vector); const score = normalizedCosineSimilarity(queryVector, normalizedQueryVector, vector, normalizedVector); return { document, score }; }); const sortedScores = scores.sort((a, b) => b.score - a.score); for (const { document } of sortedScores) { yield document; } } async searchWithSurroundingContext(query, opts) { const { surroundingCharacters, overlap, limit } = opts; const halfSurroundChars = Math.floor(surroundingCharacters / 2); const results = await this.search(query); const resultsSortedByIndex = sortDocumentsByIndex(results); const surroundedResults = results.map(result => { const resultIndex = result.metadata().index; const textBehind = getTextFromPriorChunks({ sortedElements: resultsSortedByIndex, currentIndex: resultIndex, overlap: overlap ?? 0, characterLimit: halfSurroundChars, }); const textForward = getTextFromNextChunks({ sortedElements: resultsSortedByIndex, currentIndex: resultIndex, overlap: overlap ?? 0, characterLimit: halfSurroundChars, }); const withSurrounding = [textBehind, result.text(), textForward].join(''); return { match: result, withSurrounding, }; }); return limit ? surroundedResults.slice(0, limit) : surroundedResults; } async searchUnique(query, limit) { const queryEmbeddingResults = await this.embeddingApi.createEmbeddings(query); const queryVector = queryEmbeddingResults[0].embedding; const normalizedQueryVector = normalize(queryVector); const documents = this.documentStore.list(); const scores = documents.map(document => { const vector = document.vector(); const normalizedVector = normalize(vector); const score = normalizedCosineSimilarity(queryVector, normalizedQueryVector, vector, normalizedVector); return { document, score, text: document.text() }; }); const sortedScores = [...new Map(scores.map(x => [x.text, x])).values()].sort((a, b) => b.score - a.score); const topScores = sortedScores.slice(0, limit); const results = topScores.map(({ document }) => { return document; }); return results; } save() { this.workspace.mkdirSync(this.uri, { recursive: true }); } delete() { this.workspace.rmdirSync(this.uri, { recursive: true }); } }