UNPKG

@coworker-agency/rag

Version:

Retrieval Augmented Generation (RAG) library for document indexing, vector storage, and AI-powered question answering

120 lines (99 loc) 4.59 kB
/** * Context-Aware Vector Generation System * * This module provides utilities for creating context-aware vector embeddings * from document content, optimized for RAG (Retrieval Augmented Generation) systems. */ import { recursiveSplit } from './splitter.js'; import { fixChunks, extractChunkSummary } from './chunkFixer.js'; import { generateEmbeddings } from './embeddings.js'; /** * Extract a representative sample of the document for context * @param {string} documentContent - Full document content * @param {number} maxLength - Maximum length of excerpt * @returns {string} Document excerpt */ export function extractRelevantContext(documentContent, maxLength = 10000) { // For very large documents, we use the beginning, samples from the middle, and the end if (documentContent.length <= maxLength) { return documentContent; } // Take beginning (40%), samples from middle (20%), and end (40%) const beginLength = Math.floor(maxLength * 0.4); const endLength = Math.floor(maxLength * 0.4); const middleLength = maxLength - beginLength - endLength; const beginning = documentContent.substring(0, beginLength); const end = documentContent.substring(documentContent.length - endLength); // Take samples from the middle const middleStart = Math.floor(documentContent.length * 0.4); const middleEnd = Math.floor(documentContent.length * 0.6); const middleSection = documentContent.substring(middleStart, middleEnd); // Sample evenly from the middle section let middleSample = ''; const sampleSize = Math.floor(middleSection.length / (middleLength / 100)); for (let i = 0; i < middleSection.length; i += sampleSize) { if (middleSample.length < middleLength) { middleSample += middleSection.substring(i, i + 100) + ' ... '; } else { break; } } return beginning + ' ... ' + middleSample + ' ... ' + end; } /** * Generate context-aware vectors from document content * @param {string} documentContent - Text content of the document * @param {string} openaiApiKey - OpenAI API key * @param {Object} options - Options for the chunking and embedding process * @returns {Promise<Array<{context: string, content: string, vector: number[]}>>} Array of chunks with context and embeddings */ export async function getContextAwareVectors(documentContent, openaiApiKey, options = {}) { const { modelName = 'gpt-4o', embeddingsModel = 'text-embedding-3-small', chunkSize = 1000, chunkOverlap = 200, maxDocumentExcerptLength = 10000, batchSize = 10, maxLlmBatchSize = 5 } = options; try { console.log('Step 1: Creating chunks using recursive splitter...'); // Determine if this is a large document that needs special handling const isLargeDocument = documentContent.length > 100000; // 100K chars threshold const skipLlmRefinement = documentContent.length > 500000; // 500K chars threshold // Step 1: Split document into chunks const chunks = recursiveSplit(documentContent, chunkSize, chunkOverlap); console.log(`Created ${chunks.length} initial chunks`); console.log('Step 2: Fixing chunks using LLM...'); // Extract relevant context for the LLM to understand the document const documentExcerpt = extractRelevantContext(documentContent, maxDocumentExcerptLength); // Adjust batch size based on document size for memory efficiency const effectiveBatchSize = isLargeDocument ? Math.min(batchSize, maxLlmBatchSize) : batchSize; // Step 2: Fix chunks and add context using LLM const fixedChunks = await fixChunks(chunks, documentExcerpt, openaiApiKey, { modelName, batchSize: effectiveBatchSize, skipLlmRefinement, isLargeDocument }); console.log(`Processed ${fixedChunks.length} chunks with context`); // Check if chunks were successfully fixed if (!fixedChunks.length) { throw new Error('Failed to fix chunks using LLM'); } console.log('Step 3: Generating embeddings for chunks...'); // Step 3: Generate embeddings for each chunk const chunksWithVectors = await generateEmbeddings(fixedChunks, openaiApiKey, { embeddingsModel, batchSize: effectiveBatchSize }); console.log(`Generated embeddings for ${chunksWithVectors.length} chunks`); return chunksWithVectors; } catch (error) { console.error('Error in getContextAwareVectors:', error); throw error; } } // Export functions export { extractRelevantContext, getContextAwareVectors, extractChunkSummary };