rag-system-pgvector
Version:
A complete Retrieval-Augmented Generation system using pgvector, LangChain, and LangGraph for Node.js applications with dynamic embedding and model providers, structured data queries, and chat history - supports OpenAI, Anthropic, HuggingFace, Azure, Goog
211 lines (172 loc) • 5.42 kB
JavaScript
import { v4 as uuidv4 } from 'uuid';
class DocumentStore {
constructor(config = {}) {
if (!config.database?.pool) {
throw new Error('Database pool is required in config.database.pool');
}
this.pool = config.database.pool;
}
async saveDocument(documentData) {
const client = await this.pool.connect();
try {
await client.query('BEGIN');
// Insert document
const documentId = uuidv4();
const documentQuery = `
INSERT INTO documents (id, title, content, file_path, file_type, metadata)
VALUES ($1, $2, $3, $4, $5, $6)
RETURNING id;
`;
await client.query(documentQuery, [
documentId,
documentData.title,
documentData.content,
documentData.filePath,
documentData.fileType,
documentData.metadata,
]);
// Insert document chunks with embeddings
for (const chunk of documentData.chunks) {
const chunkQuery = `
INSERT INTO document_chunks (document_id, chunk_index, content, embedding, metadata)
VALUES ($1, $2, $3, $4, $5);
`;
await client.query(chunkQuery, [
documentId,
chunk.index,
chunk.content,
JSON.stringify(chunk.embedding), // pgvector expects array as string
chunk.metadata,
]);
}
await client.query('COMMIT');
console.log(`✓ Saved document ${documentData.title} with ${documentData.chunks.length} chunks`);
return documentId;
} catch (error) {
await client.query('ROLLBACK');
console.error('Error saving document:', error);
throw error;
} finally {
client.release();
}
}
async searchSimilarChunks(queryEmbedding, limit = 10, threshold = 0.5) {
const client = await this.pool.connect();
try {
const query = `
SELECT
dc.id,
dc.content,
dc.chunk_index,
dc.metadata,
d.title,
d.file_path,
d.file_type,
1 - (dc.embedding <=> $1::vector) as similarity
FROM document_chunks dc
JOIN documents d ON dc.document_id = d.id
WHERE 1 - (dc.embedding <=> $1::vector) > $2
ORDER BY dc.embedding <=> $1::vector
LIMIT $3;
`;
const result = await client.query(query, [
JSON.stringify(queryEmbedding),
threshold,
limit,
]);
return result.rows;
} catch (error) {
console.error('Error searching similar chunks:', error);
throw error;
} finally {
client.release();
}
}
async getDocumentById(documentId) {
const client = await this.pool.connect();
try {
const documentQuery = `
SELECT * FROM documents WHERE id = $1;
`;
const chunksQuery = `
SELECT * FROM document_chunks
WHERE document_id = $1
ORDER BY chunk_index;
`;
const [documentResult, chunksResult] = await Promise.all([
client.query(documentQuery, [documentId]),
client.query(chunksQuery, [documentId]),
]);
if (documentResult.rows.length === 0) {
return null;
}
return {
...documentResult.rows[0],
chunks: chunksResult.rows,
};
} catch (error) {
console.error('Error getting document:', error);
throw error;
} finally {
client.release();
}
}
async getAllDocuments() {
const client = await this.pool.connect();
try {
const query = `
SELECT
d.*,
COUNT(dc.id) as chunk_count
FROM documents d
LEFT JOIN document_chunks dc ON d.id = dc.document_id
GROUP BY d.id
ORDER BY d.created_at DESC;
`;
const result = await client.query(query);
return result.rows;
} catch (error) {
console.error('Error getting all documents:', error);
throw error;
} finally {
client.release();
}
}
async deleteDocument(documentId) {
const client = await this.pool.connect();
try {
await client.query('BEGIN');
// Delete chunks first (due to foreign key constraint)
await client.query('DELETE FROM document_chunks WHERE document_id = $1', [documentId]);
// Delete document
const result = await client.query('DELETE FROM documents WHERE id = $1 RETURNING *', [documentId]);
await client.query('COMMIT');
return result.rows[0];
} catch (error) {
await client.query('ROLLBACK');
console.error('Error deleting document:', error);
throw error;
} finally {
client.release();
}
}
async saveSearchSession(query, results) {
const client = await this.pool.connect();
try {
const searchQuery = `
INSERT INTO search_sessions (query, results)
VALUES ($1, $2)
RETURNING id;
`;
const result = await client.query(searchQuery, [query, results]);
return result.rows[0].id;
} catch (error) {
console.error('Error saving search session:', error);
throw error;
} finally {
client.release();
}
}
}
export { DocumentStore };
export default DocumentStore;