ruvector-extensions
Version:
Advanced features for ruvector: embeddings, UI, exports, temporal tracking, and persistence
353 lines (345 loc) • 15.5 kB
JavaScript
/**
* @fileoverview Comprehensive examples for the embeddings integration module
*
* This file demonstrates all features of the ruvector-extensions embeddings module:
* - Multiple embedding providers (OpenAI, Cohere, Anthropic, HuggingFace)
* - Batch processing
* - Error handling and retry logic
* - Integration with VectorDB
* - Search functionality
*
* @author ruv.io Team <info@ruv.io>
* @license MIT
*/
import { OpenAIEmbeddings, CohereEmbeddings, AnthropicEmbeddings, HuggingFaceEmbeddings, } from '../embeddings.js';
// ============================================================================
// Example 1: OpenAI Embeddings - Basic Usage
// ============================================================================
async function example1_OpenAIBasic() {
console.log('\n=== Example 1: OpenAI Embeddings - Basic Usage ===\n');
// Initialize OpenAI embeddings provider
const openai = new OpenAIEmbeddings({
apiKey: process.env.OPENAI_API_KEY || 'sk-...',
model: 'text-embedding-3-small', // 1536 dimensions
});
// Embed a single text
const singleEmbedding = await openai.embedText('Hello, world!');
console.log('Single embedding dimension:', singleEmbedding.length);
console.log('First 5 values:', singleEmbedding.slice(0, 5));
// Embed multiple texts
const texts = [
'Machine learning is fascinating',
'Deep learning uses neural networks',
'Natural language processing is important',
];
const result = await openai.embedTexts(texts);
console.log('\nBatch embeddings:');
console.log('Total embeddings:', result.embeddings.length);
console.log('Total tokens used:', result.totalTokens);
console.log('Provider:', result.metadata?.provider);
}
// ============================================================================
// Example 2: OpenAI with Custom Dimensions
// ============================================================================
async function example2_OpenAICustomDimensions() {
console.log('\n=== Example 2: OpenAI with Custom Dimensions ===\n');
// Use text-embedding-3-large with custom dimensions
const openai = new OpenAIEmbeddings({
apiKey: process.env.OPENAI_API_KEY || 'sk-...',
model: 'text-embedding-3-large',
dimensions: 1024, // Reduce from default 3072 to 1024
});
const embedding = await openai.embedText('Custom dimension embedding');
console.log('Embedding dimension:', embedding.length);
console.log('Expected:', openai.getDimension());
}
// ============================================================================
// Example 3: Cohere Embeddings with Search Types
// ============================================================================
async function example3_CohereSearchTypes() {
console.log('\n=== Example 3: Cohere Embeddings with Search Types ===\n');
const cohere = new CohereEmbeddings({
apiKey: process.env.COHERE_API_KEY || 'your-key',
model: 'embed-english-v3.0',
});
// Embed documents (for storage)
const documentEmbedder = new CohereEmbeddings({
apiKey: process.env.COHERE_API_KEY || 'your-key',
model: 'embed-english-v3.0',
inputType: 'search_document',
});
const documents = [
'The Eiffel Tower is in Paris',
'The Statue of Liberty is in New York',
'The Great Wall is in China',
];
const docResult = await documentEmbedder.embedTexts(documents);
console.log('Document embeddings created:', docResult.embeddings.length);
// Embed query (for searching)
const queryEmbedder = new CohereEmbeddings({
apiKey: process.env.COHERE_API_KEY || 'your-key',
model: 'embed-english-v3.0',
inputType: 'search_query',
});
const queryEmbedding = await queryEmbedder.embedText('famous landmarks in France');
console.log('Query embedding dimension:', queryEmbedding.length);
}
// ============================================================================
// Example 4: Anthropic/Voyage Embeddings
// ============================================================================
async function example4_AnthropicVoyage() {
console.log('\n=== Example 4: Anthropic/Voyage Embeddings ===\n');
const anthropic = new AnthropicEmbeddings({
apiKey: process.env.VOYAGE_API_KEY || 'your-voyage-key',
model: 'voyage-2',
inputType: 'document',
});
const texts = [
'Anthropic develops Claude AI',
'Voyage AI provides embedding models',
];
const result = await anthropic.embedTexts(texts);
console.log('Embeddings created:', result.embeddings.length);
console.log('Dimension:', anthropic.getDimension());
}
// ============================================================================
// Example 5: HuggingFace Local Embeddings
// ============================================================================
async function example5_HuggingFaceLocal() {
console.log('\n=== Example 5: HuggingFace Local Embeddings ===\n');
// Run embeddings locally - no API key needed!
const hf = new HuggingFaceEmbeddings({
model: 'Xenova/all-MiniLM-L6-v2',
normalize: true,
batchSize: 32,
});
const texts = [
'Local embeddings are fast',
'No API calls required',
'Privacy-friendly solution',
];
console.log('Processing locally...');
const result = await hf.embedTexts(texts);
console.log('Local embeddings created:', result.embeddings.length);
console.log('Dimension:', hf.getDimension());
}
// ============================================================================
// Example 6: Batch Processing Large Datasets
// ============================================================================
async function example6_BatchProcessing() {
console.log('\n=== Example 6: Batch Processing Large Datasets ===\n');
const openai = new OpenAIEmbeddings({
apiKey: process.env.OPENAI_API_KEY || 'sk-...',
});
// Generate 1000 sample texts
const largeDataset = Array.from({ length: 1000 }, (_, i) => `Document ${i}: Sample text for embedding`);
console.log('Processing 1000 texts...');
const startTime = Date.now();
const result = await openai.embedTexts(largeDataset);
const duration = Date.now() - startTime;
console.log(`Processed ${result.embeddings.length} texts in ${duration}ms`);
console.log(`Average: ${(duration / result.embeddings.length).toFixed(2)}ms per text`);
console.log(`Total tokens: ${result.totalTokens}`);
}
// ============================================================================
// Example 7: Error Handling and Retry Logic
// ============================================================================
async function example7_ErrorHandling() {
console.log('\n=== Example 7: Error Handling and Retry Logic ===\n');
// Configure custom retry logic
const openai = new OpenAIEmbeddings({
apiKey: process.env.OPENAI_API_KEY || 'sk-...',
retryConfig: {
maxRetries: 5,
initialDelay: 2000,
maxDelay: 30000,
backoffMultiplier: 2,
},
});
try {
// This will retry on rate limits or temporary errors
const result = await openai.embedTexts(['Test text']);
console.log('Success! Embeddings created:', result.embeddings.length);
}
catch (error) {
console.error('Failed after retries:', error.message);
console.error('Retryable:', error.retryable);
}
}
// ============================================================================
// Example 8: Integration with VectorDB - Insert
// ============================================================================
async function example8_VectorDBInsert() {
console.log('\n=== Example 8: Integration with VectorDB - Insert ===\n');
// Note: This example assumes VectorDB is available
// You'll need to import and initialize VectorDB first
const openai = new OpenAIEmbeddings({
apiKey: process.env.OPENAI_API_KEY || 'sk-...',
});
// Sample documents to embed and insert
const documents = [
{
id: 'doc1',
text: 'Machine learning enables computers to learn from data',
metadata: { category: 'AI', author: 'John Doe' },
},
{
id: 'doc2',
text: 'Deep learning uses neural networks with multiple layers',
metadata: { category: 'AI', author: 'Jane Smith' },
},
{
id: 'doc3',
text: 'Natural language processing helps computers understand text',
metadata: { category: 'NLP', author: 'John Doe' },
},
];
// Example usage (uncomment when VectorDB is available):
/*
const { VectorDB } = await import('ruvector');
const db = new VectorDB({ dimension: openai.getDimension() });
const insertedIds = await embedAndInsert(db, openai, documents, {
overwrite: true,
onProgress: (current, total) => {
console.log(`Progress: ${current}/${total} documents inserted`);
},
});
console.log('Inserted document IDs:', insertedIds);
*/
console.log('Documents prepared:', documents.length);
console.log('Ready for insertion when VectorDB is initialized');
}
// ============================================================================
// Example 9: Integration with VectorDB - Search
// ============================================================================
async function example9_VectorDBSearch() {
console.log('\n=== Example 9: Integration with VectorDB - Search ===\n');
const openai = new OpenAIEmbeddings({
apiKey: process.env.OPENAI_API_KEY || 'sk-...',
});
// Example usage (uncomment when VectorDB is available):
/*
const { VectorDB } = await import('ruvector');
const db = new VectorDB({ dimension: openai.getDimension() });
// First, insert some documents (see example 8)
// ...
// Now search for similar documents
const results = await embedAndSearch(
db,
openai,
'What is deep learning?',
{
topK: 5,
threshold: 0.7,
filter: { category: 'AI' },
}
);
console.log('Search results:');
results.forEach((result, i) => {
console.log(`${i + 1}. ${result.id} (similarity: ${result.score})`);
console.log(` Text: ${result.metadata?.text}`);
});
*/
console.log('Search functionality ready when VectorDB is initialized');
}
// ============================================================================
// Example 10: Comparing Multiple Providers
// ============================================================================
async function example10_CompareProviders() {
console.log('\n=== Example 10: Comparing Multiple Providers ===\n');
const text = 'Artificial intelligence is transforming the world';
// OpenAI
const openai = new OpenAIEmbeddings({
apiKey: process.env.OPENAI_API_KEY || 'sk-...',
});
// Cohere
const cohere = new CohereEmbeddings({
apiKey: process.env.COHERE_API_KEY || 'your-key',
});
// HuggingFace (local)
const hf = new HuggingFaceEmbeddings();
// Compare dimensions
console.log('Provider dimensions:');
console.log('- OpenAI:', openai.getDimension());
console.log('- Cohere:', cohere.getDimension());
console.log('- HuggingFace:', hf.getDimension());
// Compare batch sizes
console.log('\nMax batch sizes:');
console.log('- OpenAI:', openai.getMaxBatchSize());
console.log('- Cohere:', cohere.getMaxBatchSize());
console.log('- HuggingFace:', hf.getMaxBatchSize());
// Generate embeddings (uncomment to actually run):
/*
console.log('\nGenerating embeddings...');
const [openaiResult, cohereResult, hfResult] = await Promise.all([
openai.embedText(text),
cohere.embedText(text),
hf.embedText(text),
]);
console.log('All embeddings generated successfully!');
*/
}
// ============================================================================
// Example 11: Progressive Loading with Progress Tracking
// ============================================================================
async function example11_ProgressiveLoading() {
console.log('\n=== Example 11: Progressive Loading with Progress ===\n');
const openai = new OpenAIEmbeddings({
apiKey: process.env.OPENAI_API_KEY || 'sk-...',
});
const documents = Array.from({ length: 50 }, (_, i) => ({
id: `doc${i}`,
text: `Document ${i}: This is sample content for embedding`,
metadata: { index: i, batch: Math.floor(i / 10) },
}));
// Track progress
let processed = 0;
const progressBar = (current, total) => {
const percentage = Math.round((current / total) * 100);
const bar = '█'.repeat(percentage / 2) + '░'.repeat(50 - percentage / 2);
console.log(`[${bar}] ${percentage}% (${current}/${total})`);
};
// Example usage (uncomment when VectorDB is available):
/*
const { VectorDB } = await import('ruvector');
const db = new VectorDB({ dimension: openai.getDimension() });
await embedAndInsert(db, openai, documents, {
onProgress: progressBar,
});
*/
console.log('Ready to process', documents.length, 'documents with progress tracking');
}
// ============================================================================
// Main Function - Run All Examples
// ============================================================================
async function runAllExamples() {
console.log('╔════════════════════════════════════════════════════════════╗');
console.log('║ RUVector Extensions - Embeddings Integration Examples ║');
console.log('╚════════════════════════════════════════════════════════════╝');
// Note: Uncomment the examples you want to run
// Make sure you have the required API keys set in environment variables
try {
// await example1_OpenAIBasic();
// await example2_OpenAICustomDimensions();
// await example3_CohereSearchTypes();
// await example4_AnthropicVoyage();
// await example5_HuggingFaceLocal();
// await example6_BatchProcessing();
// await example7_ErrorHandling();
// await example8_VectorDBInsert();
// await example9_VectorDBSearch();
// await example10_CompareProviders();
// await example11_ProgressiveLoading();
console.log('\n✓ All examples completed successfully!');
}
catch (error) {
console.error('\n✗ Error running examples:', error);
}
}
// Run if executed directly
if (import.meta.url === `file://${process.argv[1]}`) {
runAllExamples();
}
// Export for use in other modules
export { example1_OpenAIBasic, example2_OpenAICustomDimensions, example3_CohereSearchTypes, example4_AnthropicVoyage, example5_HuggingFaceLocal, example6_BatchProcessing, example7_ErrorHandling, example8_VectorDBInsert, example9_VectorDBSearch, example10_CompareProviders, example11_ProgressiveLoading, };
//# sourceMappingURL=embeddings-example.js.map