UNPKG

rag-system-pgvector

Version:

A complete Retrieval-Augmented Generation system using pgvector, LangChain, and LangGraph for Node.js applications with dynamic embedding and model providers, structured data queries, and chat history - supports OpenAI, Anthropic, HuggingFace, Azure, Goog

211 lines (172 loc) 5.42 kB
import { v4 as uuidv4 } from 'uuid'; class DocumentStore { constructor(config = {}) { if (!config.database?.pool) { throw new Error('Database pool is required in config.database.pool'); } this.pool = config.database.pool; } async saveDocument(documentData) { const client = await this.pool.connect(); try { await client.query('BEGIN'); // Insert document const documentId = uuidv4(); const documentQuery = ` INSERT INTO documents (id, title, content, file_path, file_type, metadata) VALUES ($1, $2, $3, $4, $5, $6) RETURNING id; `; await client.query(documentQuery, [ documentId, documentData.title, documentData.content, documentData.filePath, documentData.fileType, documentData.metadata, ]); // Insert document chunks with embeddings for (const chunk of documentData.chunks) { const chunkQuery = ` INSERT INTO document_chunks (document_id, chunk_index, content, embedding, metadata) VALUES ($1, $2, $3, $4, $5); `; await client.query(chunkQuery, [ documentId, chunk.index, chunk.content, JSON.stringify(chunk.embedding), // pgvector expects array as string chunk.metadata, ]); } await client.query('COMMIT'); console.log(`✓ Saved document ${documentData.title} with ${documentData.chunks.length} chunks`); return documentId; } catch (error) { await client.query('ROLLBACK'); console.error('Error saving document:', error); throw error; } finally { client.release(); } } async searchSimilarChunks(queryEmbedding, limit = 10, threshold = 0.5) { const client = await this.pool.connect(); try { const query = ` SELECT dc.id, dc.content, dc.chunk_index, dc.metadata, d.title, d.file_path, d.file_type, 1 - (dc.embedding <=> $1::vector) as similarity FROM document_chunks dc JOIN documents d ON dc.document_id = d.id WHERE 1 - (dc.embedding <=> $1::vector) > $2 ORDER BY dc.embedding <=> $1::vector LIMIT $3; `; const result = await client.query(query, [ JSON.stringify(queryEmbedding), threshold, limit, ]); return result.rows; } catch (error) { console.error('Error searching similar chunks:', error); throw error; } finally { client.release(); } } async getDocumentById(documentId) { const client = await this.pool.connect(); try { const documentQuery = ` SELECT * FROM documents WHERE id = $1; `; const chunksQuery = ` SELECT * FROM document_chunks WHERE document_id = $1 ORDER BY chunk_index; `; const [documentResult, chunksResult] = await Promise.all([ client.query(documentQuery, [documentId]), client.query(chunksQuery, [documentId]), ]); if (documentResult.rows.length === 0) { return null; } return { ...documentResult.rows[0], chunks: chunksResult.rows, }; } catch (error) { console.error('Error getting document:', error); throw error; } finally { client.release(); } } async getAllDocuments() { const client = await this.pool.connect(); try { const query = ` SELECT d.*, COUNT(dc.id) as chunk_count FROM documents d LEFT JOIN document_chunks dc ON d.id = dc.document_id GROUP BY d.id ORDER BY d.created_at DESC; `; const result = await client.query(query); return result.rows; } catch (error) { console.error('Error getting all documents:', error); throw error; } finally { client.release(); } } async deleteDocument(documentId) { const client = await this.pool.connect(); try { await client.query('BEGIN'); // Delete chunks first (due to foreign key constraint) await client.query('DELETE FROM document_chunks WHERE document_id = $1', [documentId]); // Delete document const result = await client.query('DELETE FROM documents WHERE id = $1 RETURNING *', [documentId]); await client.query('COMMIT'); return result.rows[0]; } catch (error) { await client.query('ROLLBACK'); console.error('Error deleting document:', error); throw error; } finally { client.release(); } } async saveSearchSession(query, results) { const client = await this.pool.connect(); try { const searchQuery = ` INSERT INTO search_sessions (query, results) VALUES ($1, $2) RETURNING id; `; const result = await client.query(searchQuery, [query, results]); return result.rows[0].id; } catch (error) { console.error('Error saving search session:', error); throw error; } finally { client.release(); } } } export { DocumentStore }; export default DocumentStore;