@restnfeel/agentc-starter-kit
Version:
한국어 기업용 CMS 모듈 - Task Master AI와 함께 빠르게 웹사이트를 구현할 수 있는 재사용 가능한 컴포넌트 시스템
243 lines (206 loc) • 7.31 kB
text/typescript
import {
VectorStore,
Document,
DocumentChunk,
SearchResult,
EmbeddingModel,
} from "../types";
import * as fs from "fs/promises";
import * as path from "path";
interface StoredVector {
id: string;
vector: number[];
chunk: DocumentChunk;
document: Document;
}
export class MemoryVectorStore implements VectorStore {
private vectors: StoredVector[] = [];
private embeddingModel: EmbeddingModel;
private storePath: string;
constructor(embeddingModel: EmbeddingModel, storePath: string = "./store") {
this.embeddingModel = embeddingModel;
this.storePath = storePath;
}
async initialize(): Promise<void> {
// Ensure store directory exists
await fs.mkdir(this.storePath, { recursive: true });
// Try to load existing vectors
await this.loadVectors();
}
async addDocuments(documents: Document[]): Promise<void> {
const newVectors: StoredVector[] = [];
for (const document of documents) {
if (!document.chunks) continue;
// Generate embeddings for all chunks
const chunkTexts = document.chunks.map((chunk) => chunk.content);
const embeddings = await this.embeddingModel.embedBatch(chunkTexts);
// Create vector entries
for (let i = 0; i < document.chunks.length; i++) {
const chunk = document.chunks[i];
const embedding = embeddings[i];
// Update chunk with embedding
chunk.embedding = embedding;
newVectors.push({
id: chunk.id,
vector: embedding,
chunk,
document,
});
}
}
// Add to memory store
this.vectors.push(...newVectors);
// Persist to disk
await this.saveVectors();
}
async search(query: string, k: number = 5): Promise<SearchResult[]> {
if (this.vectors.length === 0) {
return [];
}
// Generate embedding for query
const queryEmbedding = await this.embeddingModel.embed(query);
// Calculate similarities
const similarities = this.vectors.map((vector) => ({
vector,
score: this.cosineSimilarity(queryEmbedding, vector.vector),
}));
// Sort by similarity and take top k
similarities.sort((a, b) => b.score - a.score);
return similarities.slice(0, k).map((item) => ({
chunk: item.vector.chunk,
score: item.score,
document: item.vector.document,
}));
}
async delete(documentId: string): Promise<void> {
// Remove all vectors for the document
this.vectors = this.vectors.filter(
(vector) => vector.document.id !== documentId
);
// Persist changes
await this.saveVectors();
}
async update(document: Document): Promise<void> {
// Delete existing vectors for this document
await this.delete(document.id);
// Add updated document
await this.addDocuments([document]);
}
async clear(): Promise<void> {
this.vectors = [];
await this.saveVectors();
}
getCount(): number {
return this.vectors.length;
}
private cosineSimilarity(a: number[], b: number[]): number {
if (a.length !== b.length) {
throw new Error("Vectors must have the same dimension");
}
let dotProduct = 0;
let normA = 0;
let normB = 0;
for (let i = 0; i < a.length; i++) {
dotProduct += a[i] * b[i];
normA += a[i] * a[i];
normB += b[i] * b[i];
}
const magnitude = Math.sqrt(normA) * Math.sqrt(normB);
if (magnitude === 0) {
return 0;
}
return dotProduct / magnitude;
}
private async saveVectors(): Promise<void> {
try {
const vectorsPath = path.join(this.storePath, "vectors.json");
const data = {
vectors: this.vectors.map((v) => ({
id: v.id,
vector: v.vector,
chunk: {
id: v.chunk.id,
content: v.chunk.content,
metadata: v.chunk.metadata,
},
document: {
id: v.document.id,
metadata: v.document.metadata,
source: v.document.source,
},
documentId: v.document.id,
documentSource: v.document.source,
})),
timestamp: new Date().toISOString(),
};
await fs.writeFile(vectorsPath, JSON.stringify(data, null, 2));
} catch (error) {
console.warn("Failed to save vectors to disk:", error);
}
}
private async loadVectors(): Promise<void> {
try {
const vectorsPath = path.join(this.storePath, "vectors.json");
const data = await fs.readFile(vectorsPath, "utf-8");
const parsed = JSON.parse(data);
if (parsed.vectors && Array.isArray(parsed.vectors)) {
// Reconstruct the vectors with proper Document objects
this.vectors = parsed.vectors
.map((savedVector: any) => {
// Create a minimal Document object if not fully saved
const document: Document = {
id: savedVector.documentId || savedVector.id,
content: "", // We don't save full content, just reference
metadata: {
title:
savedVector.document?.metadata?.title ||
savedVector.documentSource?.split("/").pop() ||
"Unknown Document",
author: savedVector.document?.metadata?.author,
createdAt: savedVector.document?.metadata?.createdAt
? new Date(savedVector.document.metadata.createdAt)
: new Date(),
updatedAt: savedVector.document?.metadata?.updatedAt
? new Date(savedVector.document.metadata.updatedAt)
: new Date(),
fileType: savedVector.document?.metadata?.fileType || "unknown",
fileSize: savedVector.document?.metadata?.fileSize || 0,
language: savedVector.document?.metadata?.language,
description: savedVector.document?.metadata?.description,
source: savedVector.documentSource || "unknown",
...savedVector.document?.metadata,
},
source: savedVector.documentSource || "unknown",
};
// Reconstruct the chunk
const chunk: DocumentChunk = {
id: savedVector.id,
content: savedVector.chunk?.content || "",
metadata: {
documentId: savedVector.documentId || savedVector.id,
chunkIndex: savedVector.chunk?.metadata?.chunkIndex || 0,
startOffset: savedVector.chunk?.metadata?.startOffset || 0,
endOffset: savedVector.chunk?.metadata?.endOffset || 0,
tokens: savedVector.chunk?.metadata?.tokens || 0,
source: savedVector.documentSource || "unknown",
},
embedding: savedVector.vector,
};
return {
id: savedVector.id,
vector: savedVector.vector,
chunk,
document,
};
})
.filter((vector) => vector.chunk && vector.document);
} else {
this.vectors = [];
}
} catch (error) {
// File doesn't exist or is corrupted - start fresh
this.vectors = [];
console.warn("Failed to load vectors, starting fresh:", error);
}
}
}