@escher-dbai/rag-module
Version:
Enterprise RAG module with chat context storage, vector search, and session management. Complete chat history retrieval and streaming content extraction for Electron apps.
78 lines (64 loc) • 2.03 kB
JavaScript
/**
* Indexing Service - Handles document processing and embedding generation
*/
class IndexingService {
constructor(embeddingService, encryptionService) {
this.embeddingService = embeddingService;
this.encryptionService = encryptionService;
// Default chunking configuration
this.chunkSize = 1024;
this.overlapSize = 200;
}
/**
* Generate embedding for text
* @param {string} text - Text to embed
* @returns {Promise<number[]>} - Embedding vector
*/
async generateEmbedding(text) {
return await this.embeddingService.embed(text);
}
/**
* Process and chunk document
* @param {string} content - Document content
* @param {Object} options - Chunking options
* @returns {Array<{content: string, index: number}>} - Document chunks
*/
chunkDocument(content, options = {}) {
const { chunkSize = this.chunkSize, overlapSize = this.overlapSize } = options;
if (!content || content.length <= chunkSize) {
return [{ content, index: 0 }];
}
const chunks = [];
let start = 0;
let chunkIndex = 0;
while (start < content.length) {
const end = Math.min(start + chunkSize, content.length);
const chunk = content.substring(start, end);
chunks.push({
content: chunk,
index: chunkIndex++
});
// Move start position with overlap
start = end - overlapSize;
if (start >= content.length - overlapSize) break;
}
return chunks;
}
/**
* Generate embeddings for multiple chunks
* @param {Array} chunks - Document chunks
* @returns {Promise<Array>} - Chunks with embeddings
*/
async generateChunkEmbeddings(chunks) {
const chunksWithEmbeddings = [];
for (const chunk of chunks) {
const embedding = await this.generateEmbedding(chunk.content);
chunksWithEmbeddings.push({
...chunk,
embedding
});
}
return chunksWithEmbeddings;
}
}
module.exports = IndexingService;