UNPKG

@restnfeel/agentc-starter-kit

Version:

한국어 기업용 CMS 모듈 - Task Master AI와 함께 빠르게 웹사이트를 구현할 수 있는 재사용 가능한 컴포넌트 시스템

243 lines (206 loc) 7.31 kB
import { VectorStore, Document, DocumentChunk, SearchResult, EmbeddingModel, } from "../types"; import * as fs from "fs/promises"; import * as path from "path"; interface StoredVector { id: string; vector: number[]; chunk: DocumentChunk; document: Document; } export class MemoryVectorStore implements VectorStore { private vectors: StoredVector[] = []; private embeddingModel: EmbeddingModel; private storePath: string; constructor(embeddingModel: EmbeddingModel, storePath: string = "./store") { this.embeddingModel = embeddingModel; this.storePath = storePath; } async initialize(): Promise<void> { // Ensure store directory exists await fs.mkdir(this.storePath, { recursive: true }); // Try to load existing vectors await this.loadVectors(); } async addDocuments(documents: Document[]): Promise<void> { const newVectors: StoredVector[] = []; for (const document of documents) { if (!document.chunks) continue; // Generate embeddings for all chunks const chunkTexts = document.chunks.map((chunk) => chunk.content); const embeddings = await this.embeddingModel.embedBatch(chunkTexts); // Create vector entries for (let i = 0; i < document.chunks.length; i++) { const chunk = document.chunks[i]; const embedding = embeddings[i]; // Update chunk with embedding chunk.embedding = embedding; newVectors.push({ id: chunk.id, vector: embedding, chunk, document, }); } } // Add to memory store this.vectors.push(...newVectors); // Persist to disk await this.saveVectors(); } async search(query: string, k: number = 5): Promise<SearchResult[]> { if (this.vectors.length === 0) { return []; } // Generate embedding for query const queryEmbedding = await this.embeddingModel.embed(query); // Calculate similarities const similarities = this.vectors.map((vector) => ({ vector, score: this.cosineSimilarity(queryEmbedding, vector.vector), })); // Sort by similarity and take top k similarities.sort((a, b) => b.score - a.score); return similarities.slice(0, k).map((item) => ({ chunk: item.vector.chunk, score: item.score, document: item.vector.document, })); } async delete(documentId: string): Promise<void> { // Remove all vectors for the document this.vectors = this.vectors.filter( (vector) => vector.document.id !== documentId ); // Persist changes await this.saveVectors(); } async update(document: Document): Promise<void> { // Delete existing vectors for this document await this.delete(document.id); // Add updated document await this.addDocuments([document]); } async clear(): Promise<void> { this.vectors = []; await this.saveVectors(); } getCount(): number { return this.vectors.length; } private cosineSimilarity(a: number[], b: number[]): number { if (a.length !== b.length) { throw new Error("Vectors must have the same dimension"); } let dotProduct = 0; let normA = 0; let normB = 0; for (let i = 0; i < a.length; i++) { dotProduct += a[i] * b[i]; normA += a[i] * a[i]; normB += b[i] * b[i]; } const magnitude = Math.sqrt(normA) * Math.sqrt(normB); if (magnitude === 0) { return 0; } return dotProduct / magnitude; } private async saveVectors(): Promise<void> { try { const vectorsPath = path.join(this.storePath, "vectors.json"); const data = { vectors: this.vectors.map((v) => ({ id: v.id, vector: v.vector, chunk: { id: v.chunk.id, content: v.chunk.content, metadata: v.chunk.metadata, }, document: { id: v.document.id, metadata: v.document.metadata, source: v.document.source, }, documentId: v.document.id, documentSource: v.document.source, })), timestamp: new Date().toISOString(), }; await fs.writeFile(vectorsPath, JSON.stringify(data, null, 2)); } catch (error) { console.warn("Failed to save vectors to disk:", error); } } private async loadVectors(): Promise<void> { try { const vectorsPath = path.join(this.storePath, "vectors.json"); const data = await fs.readFile(vectorsPath, "utf-8"); const parsed = JSON.parse(data); if (parsed.vectors && Array.isArray(parsed.vectors)) { // Reconstruct the vectors with proper Document objects this.vectors = parsed.vectors .map((savedVector: any) => { // Create a minimal Document object if not fully saved const document: Document = { id: savedVector.documentId || savedVector.id, content: "", // We don't save full content, just reference metadata: { title: savedVector.document?.metadata?.title || savedVector.documentSource?.split("/").pop() || "Unknown Document", author: savedVector.document?.metadata?.author, createdAt: savedVector.document?.metadata?.createdAt ? new Date(savedVector.document.metadata.createdAt) : new Date(), updatedAt: savedVector.document?.metadata?.updatedAt ? new Date(savedVector.document.metadata.updatedAt) : new Date(), fileType: savedVector.document?.metadata?.fileType || "unknown", fileSize: savedVector.document?.metadata?.fileSize || 0, language: savedVector.document?.metadata?.language, description: savedVector.document?.metadata?.description, source: savedVector.documentSource || "unknown", ...savedVector.document?.metadata, }, source: savedVector.documentSource || "unknown", }; // Reconstruct the chunk const chunk: DocumentChunk = { id: savedVector.id, content: savedVector.chunk?.content || "", metadata: { documentId: savedVector.documentId || savedVector.id, chunkIndex: savedVector.chunk?.metadata?.chunkIndex || 0, startOffset: savedVector.chunk?.metadata?.startOffset || 0, endOffset: savedVector.chunk?.metadata?.endOffset || 0, tokens: savedVector.chunk?.metadata?.tokens || 0, source: savedVector.documentSource || "unknown", }, embedding: savedVector.vector, }; return { id: savedVector.id, vector: savedVector.vector, chunk, document, }; }) .filter((vector) => vector.chunk && vector.document); } else { this.vectors = []; } } catch (error) { // File doesn't exist or is corrupted - start fresh this.vectors = []; console.warn("Failed to load vectors, starting fresh:", error); } } }