@escher-dbai/rag-module
Version:
Enterprise RAG module with chat context storage, vector search, and session management. Complete chat history retrieval and streaming content extraction for Electron apps.
477 lines (384 loc) • 13.3 kB
JavaScript
const fs = require('fs-extra');
const path = require('path');
const { EventEmitter } = require('events');
const { v4: uuidv4 } = require('uuid');
/**
* Local File-Based Vector Store
* Pure JavaScript implementation with no external dependencies
*/
class LocalFileVectorStore extends EventEmitter {
constructor(basePath, configManager, encryptionService) {
super();
this.basePath = basePath;
this.configManager = configManager;
this.encryptionService = encryptionService;
this.dataPath = path.join(basePath, 'data');
this.documentsFile = path.join(this.dataPath, 'documents.json');
this.searchIndexFile = path.join(this.dataPath, 'search-index.json');
this.dimensions = null;
this.initialized = false;
// In-memory cache for performance
this.documentCache = new Map();
this.searchIndex = null;
this.maxCacheSize = 1000;
}
setDimensions(dimensions) {
this.dimensions = dimensions;
}
async initialize() {
try {
await fs.ensureDir(this.dataPath);
// Initialize documents file if it doesn't exist
if (!await fs.pathExists(this.documentsFile)) {
await this._saveDocuments({
documents: [],
count: 0,
dimensions: this.dimensions,
created: new Date().toISOString(),
lastModified: new Date().toISOString()
});
}
// Initialize search index
if (!await fs.pathExists(this.searchIndexFile)) {
await this._saveSearchIndex({
byService: {},
byRegion: {},
byCloud: {},
byKeywords: {},
lastUpdated: new Date().toISOString()
});
}
// Load search index into memory
this.searchIndex = await this._loadSearchIndex();
this.initialized = true;
this.emit('initialized', 'Local file vector store initialized');
return true;
} catch (error) {
this.emit('error', `Failed to initialize local file vector store: ${error.message}`);
throw error;
}
}
async insertDocument(document) {
this._ensureInitialized();
try {
// Load existing documents
const data = await this._loadDocuments();
// Check if document already exists (update case)
const existingIndex = data.documents.findIndex(doc => doc.id === document.id);
const newDocument = {
id: document.id,
content: document.content,
embedding: document.embedding,
metadata: {
...document.metadata,
created_at: existingIndex === -1 ? new Date().toISOString() : data.documents[existingIndex].metadata.created_at,
updated_at: new Date().toISOString()
}
};
if (existingIndex !== -1) {
// Update existing document
data.documents[existingIndex] = newDocument;
} else {
// Add new document
data.documents.push(newDocument);
}
// Update search index
await this._updateSearchIndex(newDocument);
// Save documents
data.count = data.documents.length;
data.lastModified = new Date().toISOString();
await this._saveDocuments(data);
// Update cache
this._updateCache(newDocument);
this.emit('document-inserted', { id: document.id });
return uuidv4(); // Return a unique vector ID
} catch (error) {
this.emit('error', `Failed to insert document: ${error.message}`);
throw error;
}
}
async search(queryEmbedding, options = {}) {
this._ensureInitialized();
try {
const { limit = 10, scoreThreshold = 0.5, filter } = options;
// Load documents (use cache if possible)
const documents = await this._getFilteredDocuments(filter);
// Calculate similarities
const results = [];
for (const doc of documents) {
const similarity = this._cosineSimilarity(queryEmbedding, doc.embedding);
if (similarity >= scoreThreshold) {
results.push({
id: doc.id,
content: doc.content,
metadata: doc.metadata,
score: similarity
});
}
}
// Sort by similarity (highest first) and limit
return results
.sort((a, b) => b.score - a.score)
.slice(0, limit);
} catch (error) {
this.emit('error', `Search failed: ${error.message}`);
throw error;
}
}
async getById(id) {
this._ensureInitialized();
try {
// Check cache first
if (this.documentCache.has(id)) {
return this.documentCache.get(id);
}
// Load from file
const data = await this._loadDocuments();
const document = data.documents.find(doc => doc.id === id);
if (document) {
this._updateCache(document);
return document;
}
return null;
} catch (error) {
this.emit('error', `Failed to get document by ID: ${error.message}`);
return null;
}
}
async listDocuments(options = {}) {
this._ensureInitialized();
try {
const { limit = 50, filter } = options;
const documents = await this._getFilteredDocuments(filter);
return documents
.slice(0, limit)
.map(doc => ({
id: doc.id,
content: doc.content,
metadata: doc.metadata
}));
} catch (error) {
this.emit('error', `Failed to list documents: ${error.message}`);
throw error;
}
}
async updateDocument(id, updates) {
this._ensureInitialized();
try {
const data = await this._loadDocuments();
const docIndex = data.documents.findIndex(doc => doc.id === id);
if (docIndex === -1) {
return false;
}
// Update document
data.documents[docIndex] = {
...data.documents[docIndex],
...updates,
metadata: {
...data.documents[docIndex].metadata,
...updates.metadata,
updated_at: new Date().toISOString()
}
};
// Update search index if metadata changed
if (updates.metadata) {
await this._updateSearchIndex(data.documents[docIndex]);
}
// Save changes
data.lastModified = new Date().toISOString();
await this._saveDocuments(data);
// Update cache
this._updateCache(data.documents[docIndex]);
this.emit('document-updated', { id });
return true;
} catch (error) {
this.emit('error', `Failed to update document: ${error.message}`);
return false;
}
}
async deleteDocument(id) {
this._ensureInitialized();
try {
const data = await this._loadDocuments();
const originalLength = data.documents.length;
data.documents = data.documents.filter(doc => doc.id !== id);
if (data.documents.length === originalLength) {
return false; // Document not found
}
// Remove from search index
await this._removeFromSearchIndex(id);
// Save changes
data.count = data.documents.length;
data.lastModified = new Date().toISOString();
await this._saveDocuments(data);
// Remove from cache
this.documentCache.delete(id);
this.emit('document-deleted', { id });
return true;
} catch (error) {
this.emit('error', `Failed to delete document: ${error.message}`);
return false;
}
}
async getStats() {
this._ensureInitialized();
try {
const data = await this._loadDocuments();
return {
totalPoints: data.count,
dimensions: this.dimensions,
storageType: 'local-files',
lastModified: data.lastModified
};
} catch (error) {
return {
totalPoints: 0,
dimensions: this.dimensions,
storageType: 'local-files',
error: error.message
};
}
}
// Private methods
async _loadDocuments() {
try {
return await fs.readJson(this.documentsFile);
} catch (error) {
return {
documents: [],
count: 0,
dimensions: this.dimensions,
created: new Date().toISOString(),
lastModified: new Date().toISOString()
};
}
}
async _saveDocuments(data) {
await fs.writeJson(this.documentsFile, data, { spaces: 2 });
}
async _loadSearchIndex() {
try {
return await fs.readJson(this.searchIndexFile);
} catch (error) {
return {
byService: {},
byRegion: {},
byCloud: {},
byKeywords: {},
lastUpdated: new Date().toISOString()
};
}
}
async _saveSearchIndex(index) {
await fs.writeJson(this.searchIndexFile, index, { spaces: 2 });
}
async _updateSearchIndex(document) {
const metadata = document.metadata;
// Add to service index
if (metadata.service) {
if (!this.searchIndex.byService[metadata.service]) {
this.searchIndex.byService[metadata.service] = [];
}
if (!this.searchIndex.byService[metadata.service].includes(document.id)) {
this.searchIndex.byService[metadata.service].push(document.id);
}
}
// Add to region index
if (metadata.region) {
if (!this.searchIndex.byRegion[metadata.region]) {
this.searchIndex.byRegion[metadata.region] = [];
}
if (!this.searchIndex.byRegion[metadata.region].includes(document.id)) {
this.searchIndex.byRegion[metadata.region].push(document.id);
}
}
// Add to cloud index
if (metadata.cloud) {
if (!this.searchIndex.byCloud[metadata.cloud]) {
this.searchIndex.byCloud[metadata.cloud] = [];
}
if (!this.searchIndex.byCloud[metadata.cloud].includes(document.id)) {
this.searchIndex.byCloud[metadata.cloud].push(document.id);
}
}
this.searchIndex.lastUpdated = new Date().toISOString();
await this._saveSearchIndex(this.searchIndex);
}
async _removeFromSearchIndex(documentId) {
// Remove from all indices
Object.values(this.searchIndex.byService).forEach(ids => {
const index = ids.indexOf(documentId);
if (index !== -1) ids.splice(index, 1);
});
Object.values(this.searchIndex.byRegion).forEach(ids => {
const index = ids.indexOf(documentId);
if (index !== -1) ids.splice(index, 1);
});
Object.values(this.searchIndex.byCloud).forEach(ids => {
const index = ids.indexOf(documentId);
if (index !== -1) ids.splice(index, 1);
});
this.searchIndex.lastUpdated = new Date().toISOString();
await this._saveSearchIndex(this.searchIndex);
}
async _getFilteredDocuments(filter) {
if (!filter || Object.keys(filter).length === 0) {
const data = await this._loadDocuments();
return data.documents;
}
// Use search index for faster filtering
let candidateIds = null;
if (filter.service && this.searchIndex.byService[filter.service]) {
candidateIds = this.searchIndex.byService[filter.service];
}
if (filter.region && this.searchIndex.byRegion[filter.region]) {
const regionIds = this.searchIndex.byRegion[filter.region];
candidateIds = candidateIds ? candidateIds.filter(id => regionIds.includes(id)) : regionIds;
}
if (filter.cloud && this.searchIndex.byCloud[filter.cloud]) {
const cloudIds = this.searchIndex.byCloud[filter.cloud];
candidateIds = candidateIds ? candidateIds.filter(id => cloudIds.includes(id)) : cloudIds;
}
// Load documents
const data = await this._loadDocuments();
if (candidateIds) {
return data.documents.filter(doc => candidateIds.includes(doc.id));
}
// Fallback to full scan with metadata filtering
return data.documents.filter(doc => {
if (filter.service && doc.metadata.service !== filter.service) return false;
if (filter.region && doc.metadata.region !== filter.region) return false;
if (filter.cloud && doc.metadata.cloud !== filter.cloud) return false;
return true;
});
}
_cosineSimilarity(vecA, vecB) {
if (vecA.length !== vecB.length) {
throw new Error('Vectors must have the same dimensions');
}
let dotProduct = 0;
let normA = 0;
let normB = 0;
for (let i = 0; i < vecA.length; i++) {
dotProduct += vecA[i] * vecB[i];
normA += vecA[i] * vecA[i];
normB += vecB[i] * vecB[i];
}
const magnitude = Math.sqrt(normA) * Math.sqrt(normB);
return magnitude === 0 ? 0 : dotProduct / magnitude;
}
_updateCache(document) {
// Simple LRU cache implementation
if (this.documentCache.size >= this.maxCacheSize) {
const firstKey = this.documentCache.keys().next().value;
this.documentCache.delete(firstKey);
}
this.documentCache.set(document.id, document);
}
_ensureInitialized() {
if (!this.initialized) {
throw new Error('Local file vector store must be initialized before use');
}
}
}
module.exports = LocalFileVectorStore;