@coworker-agency/rag
Version:
Retrieval Augmented Generation (RAG) library for document indexing, vector storage, and AI-powered question answering
120 lines (99 loc) • 4.59 kB
JavaScript
/**
* Context-Aware Vector Generation System
*
* This module provides utilities for creating context-aware vector embeddings
* from document content, optimized for RAG (Retrieval Augmented Generation) systems.
*/
import { recursiveSplit } from './splitter.js';
import { fixChunks, extractChunkSummary } from './chunkFixer.js';
import { generateEmbeddings } from './embeddings.js';
/**
* Extract a representative sample of the document for context
* @param {string} documentContent - Full document content
* @param {number} maxLength - Maximum length of excerpt
* @returns {string} Document excerpt
*/
export function extractRelevantContext(documentContent, maxLength = 10000) {
// For very large documents, we use the beginning, samples from the middle, and the end
if (documentContent.length <= maxLength) {
return documentContent;
}
// Take beginning (40%), samples from middle (20%), and end (40%)
const beginLength = Math.floor(maxLength * 0.4);
const endLength = Math.floor(maxLength * 0.4);
const middleLength = maxLength - beginLength - endLength;
const beginning = documentContent.substring(0, beginLength);
const end = documentContent.substring(documentContent.length - endLength);
// Take samples from the middle
const middleStart = Math.floor(documentContent.length * 0.4);
const middleEnd = Math.floor(documentContent.length * 0.6);
const middleSection = documentContent.substring(middleStart, middleEnd);
// Sample evenly from the middle section
let middleSample = '';
const sampleSize = Math.floor(middleSection.length / (middleLength / 100));
for (let i = 0; i < middleSection.length; i += sampleSize) {
if (middleSample.length < middleLength) {
middleSample += middleSection.substring(i, i + 100) + ' ... ';
} else {
break;
}
}
return beginning + ' ... ' + middleSample + ' ... ' + end;
}
/**
* Generate context-aware vectors from document content
* @param {string} documentContent - Text content of the document
* @param {string} openaiApiKey - OpenAI API key
* @param {Object} options - Options for the chunking and embedding process
* @returns {Promise<Array<{context: string, content: string, vector: number[]}>>} Array of chunks with context and embeddings
*/
export async function getContextAwareVectors(documentContent, openaiApiKey, options = {}) {
const {
modelName = 'gpt-4o',
embeddingsModel = 'text-embedding-3-small',
chunkSize = 1000,
chunkOverlap = 200,
maxDocumentExcerptLength = 10000,
batchSize = 10,
maxLlmBatchSize = 5
} = options;
try {
console.log('Step 1: Creating chunks using recursive splitter...');
// Determine if this is a large document that needs special handling
const isLargeDocument = documentContent.length > 100000; // 100K chars threshold
const skipLlmRefinement = documentContent.length > 500000; // 500K chars threshold
// Step 1: Split document into chunks
const chunks = recursiveSplit(documentContent, chunkSize, chunkOverlap);
console.log(`Created ${chunks.length} initial chunks`);
console.log('Step 2: Fixing chunks using LLM...');
// Extract relevant context for the LLM to understand the document
const documentExcerpt = extractRelevantContext(documentContent, maxDocumentExcerptLength);
// Adjust batch size based on document size for memory efficiency
const effectiveBatchSize = isLargeDocument ? Math.min(batchSize, maxLlmBatchSize) : batchSize;
// Step 2: Fix chunks and add context using LLM
const fixedChunks = await fixChunks(chunks, documentExcerpt, openaiApiKey, {
modelName,
batchSize: effectiveBatchSize,
skipLlmRefinement,
isLargeDocument
});
console.log(`Processed ${fixedChunks.length} chunks with context`);
// Check if chunks were successfully fixed
if (!fixedChunks.length) {
throw new Error('Failed to fix chunks using LLM');
}
console.log('Step 3: Generating embeddings for chunks...');
// Step 3: Generate embeddings for each chunk
const chunksWithVectors = await generateEmbeddings(fixedChunks, openaiApiKey, {
embeddingsModel,
batchSize: effectiveBatchSize
});
console.log(`Generated embeddings for ${chunksWithVectors.length} chunks`);
return chunksWithVectors;
} catch (error) {
console.error('Error in getContextAwareVectors:', error);
throw error;
}
}
// Export functions
export { extractRelevantContext, getContextAwareVectors, extractChunkSummary };