UNPKG

homeschool

Version:

🏠 Teach AI to understand natural language like a patient tutor. Advanced embedding-based function calling with semantic understanding, confidence scoring, and natural language parameter extraction.

github.com/trentbrew/homeschool

trentbrew/homeschool

641 lines (634 loc) • 20.3 kB

JavaScript

import { pipeline } from '@xenova/transformers'; /** * Cosine similarity utility with safety checks */ function cosineSimilarity(a, b) { if (!a || !b || a.length !== b.length) return 0; const dotProduct = a.reduce((sum, val, i) => sum + val * b[i], 0); const magnitudeA = Math.sqrt(a.reduce((sum, val) => sum + val * val, 0)); const magnitudeB = Math.sqrt(b.reduce((sum, val) => sum + val * val, 0)); // Prevent division by zero if (magnitudeA === 0 || magnitudeB === 0) return 0; const similarity = dotProduct / (magnitudeA * magnitudeB); // Ensure result is a valid number return isNaN(similarity) ? 0 : similarity; } /** * Batch cosine similarity calculation for efficiency */ function batchCosineSimilarity(query, vectors) { return vectors.map((vector) => cosineSimilarity(query, vector)); } /** * Semantic color extraction using embeddings */ async function extractSemanticColor(query, config, embedder) { const queryEmbedding = await embedder(query); const queryVector = Array.from(queryEmbedding.data); let bestColor = config.fallback || 'blue'; let bestScore = -1; // Check all color candidates if (config.semanticCandidates) { for (const color of config.semanticCandidates) { const colorEmbedding = await embedder(`color ${color}`); const colorVector = Array.from(colorEmbedding.data); const score = cosineSimilarity(queryVector, colorVector); if (score > bestScore) { bestScore = score; bestColor = color; } } } // Check for modifiers (light blue, dark red, etc.) if (config.modifierCandidates && config.semanticCandidates) { for (const modifier of config.modifierCandidates) { for (const color of config.semanticCandidates) { const modifiedColor = `${modifier} ${color}`; const modifiedEmbedding = await embedder(modifiedColor); const modifiedVector = Array.from(modifiedEmbedding.data); const score = cosineSimilarity(queryVector, modifiedVector); if (score > bestScore) { bestScore = score; bestColor = modifiedColor; } } } } return bestColor; } /** * Semantic category extraction using embeddings */ async function extractSemanticCategory(query, config, embedder) { const queryEmbedding = await embedder(query); const queryVector = Array.from(queryEmbedding.data); let bestCategory = config.fallback || 'general'; let bestScore = -1; if (config.semanticCandidates) { for (const category of config.semanticCandidates) { // Test multiple phrasings for better matching const testPhrases = [ `${category} related`, `this is about ${category}`, `${category} category`, `${category} topic`, ]; for (const phrase of testPhrases) { const categoryEmbedding = await embedder(phrase); const categoryVector = Array.from(categoryEmbedding.data); const score = cosineSimilarity(queryVector, categoryVector); if (score > bestScore) { bestScore = score; bestCategory = category; } } } } return bestCategory; } /** * Content isolation using semantic boundaries */ async function extractSemanticContent(query, embedder) { // Split query into potential content segments const words = query.split(' '); const segments = []; // Generate all possible contiguous segments for (let i = 0; i < words.length; i++) { for (let j = i + 1; j <= words.length; j++) { segments.push(words.slice(i, j).join(' ')); } } // Find segments that are semantically "content-like" vs "command-like" const contentPrompts = [ 'this is a message to display', 'this is content to show', 'this is text to output', 'this is information for the user', ]; const commandPrompts = [ 'this is a command instruction', 'this is an action to perform', 'this is a system directive', ]; let bestContent = 'Hello!'; let bestContentScore = -1; for (const segment of segments) { if (segment.length < 2) continue; // Skip very short segments let contentScore = 0; let commandScore = 0; // Score as content for (const contentPrompt of contentPrompts) { const testPhrase = `"${segment}" - ${contentPrompt}`; const embedding = await embedder(testPhrase); const vector = Array.from(embedding.data); const segmentEmbedding = await embedder(segment); const segmentVector = Array.from(segmentEmbedding.data); contentScore += cosineSimilarity(vector, segmentVector); } // Score as command for (const commandPrompt of commandPrompts) { const testPhrase = `"${segment}" - ${commandPrompt}`; const embedding = await embedder(testPhrase); const vector = Array.from(embedding.data); const segmentEmbedding = await embedder(segment); const segmentVector = Array.from(segmentEmbedding.data); commandScore += cosineSimilarity(vector, segmentVector); } // Prefer segments that are more content-like than command-like const netContentScore = contentScore - commandScore; if (netContentScore > bestContentScore) { bestContentScore = netContentScore; bestContent = segment; } } return bestContent; } /** * Main class for semantic function calling */ class SemanticFunctionCaller { constructor(config = {}) { this.embedder = null; this.tools = []; this.embeddingCache = {}; this.config = { embeddingModel: 'Xenova/all-MiniLM-L6-v2', defaultConfidenceThreshold: 0.25, enableCaching: true, verbose: false, ...config, }; } /** * Initialize the embedding model */ async initialize() { if (this.embedder) return; if (this.config.verbose) { console.log(`Loading embedding model: ${this.config.embeddingModel}`); } this.embedder = await pipeline('feature-extraction', this.config.embeddingModel); } /** * Register tools for function calling */ registerTools(tools) { this.tools = [...this.tools, ...tools]; if (this.config.verbose) { console.log(`Registered ${tools.length} tools:`, tools.map((t) => t.name)); } } /** * Clear all registered tools */ clearTools() { this.tools = []; this.embeddingCache = {}; } /** * Get embedding with caching */ async getEmbedding(text) { if (this.config.enableCaching && this.embeddingCache[text]) { return Array.from(this.embeddingCache[text]); } const embedding = await this.embedder(text); const vector = Array.from(embedding.data); if (this.config.enableCaching) { this.embeddingCache[text] = new Float32Array(vector); } return vector; } /** * Find the best tool match using multi-layer semantic analysis */ async findToolBySemanticLayers(query) { await this.initialize(); const queryVector = await this.getEmbedding(query); const results = []; for (const tool of this.tools) { let intentScore = 0; let contextScore = 0; let descScore = 0; const matches = []; // Layer 1: Intent matching - take best match let bestIntentScore = 0; for (const intent of tool.intentPatterns) { const intentVector = await this.getEmbedding(intent); const score = cosineSimilarity(queryVector, intentVector); if (score > bestIntentScore) { bestIntentScore = score; } matches.push({ type: 'intent', text: intent, score }); } intentScore = bestIntentScore; // Layer 2: Context matching - take best match let bestContextScore = 0; for (const context of tool.contexts) { const contextVector = await this.getEmbedding(context); const score = cosineSimilarity(queryVector, contextVector); if (score > bestContextScore) { bestContextScore = score; } matches.push({ type: 'context', text: context, score }); } contextScore = bestContextScore; // Layer 3: Description matching const descVector = await this.getEmbedding(tool.description); descScore = cosineSimilarity(queryVector, descVector); matches.push({ type: 'description', text: tool.description, score: descScore, }); // Calculate weighted total score const totalScore = intentScore * 0.4 + contextScore * 0.3 + descScore * 0.3; results.push({ tool: tool.name, totalScore, matches: matches.sort((a, b) => b.score - a.score), }); } return results.sort((a, b) => b.totalScore - a.totalScore)[0]; } /** * Extract parameters using semantic analysis */ async extractParameters(toolName, query) { const tool = this.tools.find((t) => t.name === toolName); if (!tool) return {}; await this.initialize(); const result = {}; for (const [paramName, paramConfig] of Object.entries(tool.parameters)) { if (paramConfig.type === 'semantic_color') { result[paramName] = await extractSemanticColor(query, paramConfig, this.embedder); } else if (paramConfig.type === 'extracted_content') { result[paramName] = await extractSemanticContent(query, this.embedder); } else if (paramConfig.type === 'semantic_category') { result[paramName] = await extractSemanticCategory(query, paramConfig, this.embedder); } } return result; } /** * Execute function calling with confidence scoring */ async execute(query, options = {}) { const opts = { gutInstinct: false, confidenceThreshold: this.config.defaultConfidenceThreshold, mode: 'standard', verbose: this.config.verbose, ...options, }; if (opts.verbose) { console.log('🔍 Semantic Function Calling Analysis:', { query, options: opts, }); } // Multi-layer semantic matching const toolMatch = await this.findToolBySemanticLayers(query); if (opts.verbose) { console.log('📊 Tool Analysis:', { selectedTool: toolMatch.tool, confidence: `${(toolMatch.totalScore * 100).toFixed(1)}%`, reasoning: toolMatch.matches .slice(0, 3) .map((m) => `${m.type}: "${m.text}" (${(m.score * 100).toFixed(1)}%)`), }); } // Confidence checking (unless in first instinct mode) if (opts.mode !== 'first_instinct') { const threshold = opts.gutInstinct ? 0.1 : opts.confidenceThreshold; if (toolMatch.totalScore < threshold) { if (opts.verbose) { console.log('❌ Confidence too low, not executing'); } return { success: false, reason: 'Low confidence in tool selection', confidence: toolMatch.totalScore, }; } if (opts.gutInstinct && toolMatch.totalScore < 0.25) { if (opts.verbose) { console.log('🎯 Trusting model intuition despite low confidence score'); } } } // Extract parameters const parameters = await this.extractParameters(toolMatch.tool, query); if (opts.verbose) { console.log('🎯 Extracted Parameters:', parameters); } return { success: true, tool: toolMatch.tool, parameters, confidence: toolMatch.totalScore, reasoning: toolMatch.matches, mode: opts.mode, }; } /** * Execute with first instinct mode (no confidence checking) */ async executeFirstInstinct(query) { return this.execute(query, { mode: 'first_instinct', verbose: this.config.verbose, }); } /** * Get cached embedding count (for monitoring) */ getCacheSize() { return Object.keys(this.embeddingCache).length; } /** * Clear embedding cache */ clearCache() { this.embeddingCache = {}; } } /** * Default parameter value database for common semantic types */ const defaultParameterDatabase = { colors: { basic: [ 'red', 'blue', 'green', 'yellow', 'purple', 'orange', 'pink', 'brown', 'black', 'white', 'gray', ], extended: [ 'crimson', 'navy', 'teal', 'coral', 'salmon', 'turquoise', 'indigo', 'violet', 'steelblue', 'lime', 'cyan', 'magenta', 'gold', 'silver', 'maroon', 'olive', 'aqua', ], modifiers: ['light', 'dark', 'bright', 'deep', 'pale', 'vivid', 'vibrant'], }, emotions: [ 'happy', 'sad', 'excited', 'calm', 'energetic', 'peaceful', 'angry', 'joyful', 'frustrated', 'content', 'anxious', 'relaxed', ], sizes: [ 'tiny', 'small', 'medium', 'large', 'huge', 'massive', 'mini', 'big', 'little', 'enormous', 'gigantic', ], directions: [ 'up', 'down', 'left', 'right', 'center', 'top', 'bottom', 'north', 'south', 'east', 'west', 'forward', 'backward', ], categories: [ 'general', 'work', 'personal', 'ideas', 'tasks', 'reminders', 'meeting', 'project', 'research', 'thoughts', 'quotes', 'learning', 'goals', 'planning', 'shopping', 'health', ], numbers: [ 'zero', 'one', 'two', 'three', 'four', 'five', 'six', 'seven', 'eight', 'nine', 'ten', 'first', 'second', 'third', 'few', 'many', ], booleans: [ 'yes', 'no', 'true', 'false', 'on', 'off', 'enable', 'disable', 'activate', 'deactivate', 'start', 'stop', 'begin', 'end', ], }; /** * Merge custom parameter database with defaults */ function mergeParameterDatabase(custom) { return { ...defaultParameterDatabase, ...custom, colors: { ...defaultParameterDatabase.colors, ...custom.colors, }, }; } /** * Homeschool - Teach AI to understand natural language like a patient tutor * Advanced embedding-based function calling with semantic understanding, * confidence scoring, and natural language parameter extraction */ // Main class const exampleTools = [ { name: 'changeBackgroundColor', description: 'Changes the background color of the web page', contexts: [ 'visual styling and appearance', 'color modification and theming', 'page aesthetics and design', ], intentPatterns: [ 'user wants to modify visual appearance', 'user wants to change colors', 'user wants to style the page', ], parameters: { color: { type: 'semantic_color', semanticCandidates: [ 'red', 'blue', 'green', 'yellow', 'purple', 'orange', 'pink', 'brown', 'black', 'white', 'gray', 'cyan', 'magenta', 'lime', 'navy', 'teal', 'silver', 'gold', 'coral', 'salmon', 'crimson', 'violet', 'indigo', 'turquoise', ], modifierCandidates: ['light', 'dark', 'bright', 'vibrant', 'pale'], fallback: 'blue', }, }, }, { name: 'displayText', description: 'Shows a message or text to the user', contexts: [ 'communication and messaging', 'information display and output', 'user interaction and feedback', ], intentPatterns: [ 'user wants to show information', 'user wants to communicate a message', 'user wants to display content', ], parameters: { text: { type: 'extracted_content', extractionStrategy: 'semantic_content_isolation', }, }, }, { name: 'takeNote', description: 'Saves a note or reminder for later reference', contexts: [ 'note taking and memory', 'information storage and organization', 'personal productivity and planning', 'documentation and records', 'task management and reminders', ], intentPatterns: [ 'user wants to remember something', 'user wants to save information', 'user wants to record a thought', 'user wants to jot down details', 'user wants to make a reminder', 'user wants to take notes', ], parameters: { note: { type: 'extracted_content', extractionStrategy: 'semantic_content_isolation', }, category: { type: 'semantic_category', semanticCandidates: [ 'general', 'work', 'personal', 'ideas', 'tasks', 'reminders', 'meeting', 'project', 'research', 'thoughts', 'quotes', 'learning', 'goals', 'planning', ], fallback: 'general', }, }, }, ]; // Version info const version = '0.1.0'; export { SemanticFunctionCaller, batchCosineSimilarity, cosineSimilarity, defaultParameterDatabase, exampleTools, extractSemanticCategory, extractSemanticColor, extractSemanticContent, mergeParameterDatabase, version }; //# sourceMappingURL=index.esm.js.map