UNPKG

cns-mcp-server

Version:

Central Nervous System MCP Server for Autonomous Multi-Agent Orchestration with free local embeddings

229 lines 9.2 kB
/** * Embedding providers for semantic search */ import { logger } from '../utils/logger.js'; import { CNSError } from '../utils/error-handler.js'; import { pipeline, env } from '@xenova/transformers'; import { homedir } from 'os'; import { join } from 'path'; /** * OpenAI Embedding Provider * Uses OpenAI's text-embedding-3-small model */ export class OpenAIEmbeddingProvider { apiKey; model; dimension; baseUrl; constructor(apiKey, model = 'text-embedding-3-small', dimension = 1536, baseUrl = 'https://api.openai.com/v1') { this.apiKey = apiKey; this.model = model; this.dimension = dimension; this.baseUrl = baseUrl; } getDimension() { return this.dimension; } getName() { return `OpenAI-${this.model}`; } async generateEmbedding(text) { if (!text.trim()) { throw new CNSError('Text cannot be empty for embedding generation', 'EMPTY_TEXT', {}, false); } try { const response = await fetch(`${this.baseUrl}/embeddings`, { method: 'POST', headers: { 'Authorization': `Bearer ${this.apiKey}`, 'Content-Type': 'application/json', }, body: JSON.stringify({ input: text, model: this.model, dimensions: this.dimension }), }); if (!response.ok) { const error = await response.text(); throw new Error(`OpenAI API error: ${response.status} - ${error}`); } const data = await response.json(); if (!data.data?.[0]?.embedding) { throw new Error('Invalid response format from OpenAI API'); } const embedding = data.data[0].embedding; if (embedding.length !== this.dimension) { throw new Error(`Expected embedding dimension ${this.dimension}, got ${embedding.length}`); } logger.info('Generated embedding via OpenAI', { model: this.model, textLength: text.length, embeddingDimension: embedding.length }); return embedding; } catch (error) { logger.error('Failed to generate embedding via OpenAI', { error, textPreview: text.substring(0, 100) }); throw new CNSError('OpenAI embedding generation failed', 'OPENAI_EMBEDDING_ERROR', { error: error instanceof Error ? error.message : error }, true // Retryable ); } } } /** * Mock Embedding Provider for testing * Generates random embeddings for development/testing */ export class MockEmbeddingProvider { dimension; constructor(dimension = 1536) { this.dimension = dimension; } getDimension() { return this.dimension; } getName() { return 'Mock-Provider'; } async generateEmbedding(text) { if (!text.trim()) { throw new CNSError('Text cannot be empty for embedding generation', 'EMPTY_TEXT', {}, false); } // Generate deterministic "embedding" based on text content for consistency in testing const hash = this.simpleHash(text); const embedding = Array.from({ length: this.dimension }, (_, i) => { return Math.sin(hash + i) * Math.cos(hash * i * 0.1); }); // Normalize to unit vector const magnitude = Math.sqrt(embedding.reduce((sum, val) => sum + val * val, 0)); const normalizedEmbedding = embedding.map(val => val / magnitude); logger.info('Generated mock embedding', { textLength: text.length, embeddingDimension: normalizedEmbedding.length }); return normalizedEmbedding; } simpleHash(str) { let hash = 0; for (let i = 0; i < str.length; i++) { const char = str.charCodeAt(i); hash = ((hash << 5) - hash) + char; hash = hash & hash; // Convert to 32-bit integer } return Math.abs(hash) / 1000000; // Normalize } } /** * Transformers.js Embedding Provider * Uses local models for free, offline embeddings */ export class TransformersEmbeddingProvider { pipeline = null; model; dimension; initPromise = null; constructor(model = 'Xenova/all-MiniLM-L6-v2', dimension = 384) { this.model = model; this.dimension = dimension; // Set model cache directory to ~/.cns/models/ const cnsDir = join(homedir(), '.cns', 'models'); env.cacheDir = cnsDir; logger.info('Transformers.js embedding provider initialized', { model: this.model, dimension: this.dimension, cachePath: cnsDir }); } async initialize() { if (this.pipeline) return; if (!this.initPromise) { this.initPromise = this._doInitialize(); } await this.initPromise; } async _doInitialize() { try { logger.info('Loading Transformers.js model...', { model: this.model }); // Create the pipeline - model will be downloaded on first use this.pipeline = await pipeline('feature-extraction', this.model, { quantized: true, // Use quantized model for smaller size and faster inference }); logger.info('Transformers.js model loaded successfully', { model: this.model }); } catch (error) { logger.error('Failed to load Transformers.js model', { model: this.model, error: error instanceof Error ? error.message : error }); throw new CNSError('Failed to initialize Transformers.js model', 'TRANSFORMERS_INIT_ERROR', { model: this.model, error: error instanceof Error ? error.message : error }); } } getDimension() { return this.dimension; } getName() { return `Transformers-${this.model.split('/').pop()}`; } async generateEmbedding(text) { if (!text.trim()) { throw new CNSError('Text cannot be empty for embedding generation', 'EMPTY_TEXT', {}, false); } await this.initialize(); try { // Generate embeddings using the pipeline const output = await this.pipeline(text, { pooling: 'mean', normalize: true }); // Convert tensor to array const embedding = Array.from(output.data); if (embedding.length !== this.dimension) { logger.warn(`Expected dimension ${this.dimension}, got ${embedding.length}. Using actual dimension.`); } logger.debug('Generated embedding via Transformers.js', { model: this.model, textLength: text.length, embeddingDimension: embedding.length }); return embedding; } catch (error) { logger.error('Failed to generate embedding via Transformers.js', { error, textPreview: text.substring(0, 100) }); throw new CNSError('Transformers.js embedding generation failed', 'TRANSFORMERS_EMBEDDING_ERROR', { error: error instanceof Error ? error.message : error }, true // Retryable ); } } } /** * Factory function to create embedding provider based on configuration */ export function createEmbeddingProvider(config) { const providerType = config.embedding_provider || process.env.EMBEDDING_PROVIDER || 'transformers'; switch (providerType.toLowerCase()) { case 'transformers': logger.info('Using Transformers.js embedding provider (free, local)'); return new TransformersEmbeddingProvider(config.embedding_model || process.env.EMBEDDING_MODEL || 'Xenova/all-MiniLM-L6-v2', config.embedding_dimension || parseInt(process.env.EMBEDDING_DIMENSION || '384')); case 'openai': { const apiKey = config.openai_api_key || process.env.OPENAI_API_KEY; if (!apiKey) { logger.warn('OpenAI API key not found, falling back to Transformers.js'); return new TransformersEmbeddingProvider(); } return new OpenAIEmbeddingProvider(apiKey, config.embedding_model || process.env.EMBEDDING_MODEL || 'text-embedding-3-small', config.embedding_dimension || parseInt(process.env.EMBEDDING_DIMENSION || '1536')); } case 'mock': logger.info('Using mock embedding provider for development/testing'); return new MockEmbeddingProvider(config.embedding_dimension || parseInt(process.env.EMBEDDING_DIMENSION || '384')); case 'none': logger.info('No embedding provider configured, semantic search disabled'); return null; default: logger.info('Unknown provider type, using Transformers.js as default'); return new TransformersEmbeddingProvider(); } } //# sourceMappingURL=embedding-providers.js.map