homeschool
Version:
🏠 Teach AI to understand natural language like a patient tutor. Advanced embedding-based function calling with semantic understanding, confidence scoring, and natural language parameter extraction.
641 lines (634 loc) • 20.3 kB
JavaScript
import { pipeline } from '@xenova/transformers';
/**
* Cosine similarity utility with safety checks
*/
function cosineSimilarity(a, b) {
if (!a || !b || a.length !== b.length)
return 0;
const dotProduct = a.reduce((sum, val, i) => sum + val * b[i], 0);
const magnitudeA = Math.sqrt(a.reduce((sum, val) => sum + val * val, 0));
const magnitudeB = Math.sqrt(b.reduce((sum, val) => sum + val * val, 0));
// Prevent division by zero
if (magnitudeA === 0 || magnitudeB === 0)
return 0;
const similarity = dotProduct / (magnitudeA * magnitudeB);
// Ensure result is a valid number
return isNaN(similarity) ? 0 : similarity;
}
/**
* Batch cosine similarity calculation for efficiency
*/
function batchCosineSimilarity(query, vectors) {
return vectors.map((vector) => cosineSimilarity(query, vector));
}
/**
* Semantic color extraction using embeddings
*/
async function extractSemanticColor(query, config, embedder) {
const queryEmbedding = await embedder(query);
const queryVector = Array.from(queryEmbedding.data);
let bestColor = config.fallback || 'blue';
let bestScore = -1;
// Check all color candidates
if (config.semanticCandidates) {
for (const color of config.semanticCandidates) {
const colorEmbedding = await embedder(`color ${color}`);
const colorVector = Array.from(colorEmbedding.data);
const score = cosineSimilarity(queryVector, colorVector);
if (score > bestScore) {
bestScore = score;
bestColor = color;
}
}
}
// Check for modifiers (light blue, dark red, etc.)
if (config.modifierCandidates && config.semanticCandidates) {
for (const modifier of config.modifierCandidates) {
for (const color of config.semanticCandidates) {
const modifiedColor = `${modifier} ${color}`;
const modifiedEmbedding = await embedder(modifiedColor);
const modifiedVector = Array.from(modifiedEmbedding.data);
const score = cosineSimilarity(queryVector, modifiedVector);
if (score > bestScore) {
bestScore = score;
bestColor = modifiedColor;
}
}
}
}
return bestColor;
}
/**
* Semantic category extraction using embeddings
*/
async function extractSemanticCategory(query, config, embedder) {
const queryEmbedding = await embedder(query);
const queryVector = Array.from(queryEmbedding.data);
let bestCategory = config.fallback || 'general';
let bestScore = -1;
if (config.semanticCandidates) {
for (const category of config.semanticCandidates) {
// Test multiple phrasings for better matching
const testPhrases = [
`${category} related`,
`this is about ${category}`,
`${category} category`,
`${category} topic`,
];
for (const phrase of testPhrases) {
const categoryEmbedding = await embedder(phrase);
const categoryVector = Array.from(categoryEmbedding.data);
const score = cosineSimilarity(queryVector, categoryVector);
if (score > bestScore) {
bestScore = score;
bestCategory = category;
}
}
}
}
return bestCategory;
}
/**
* Content isolation using semantic boundaries
*/
async function extractSemanticContent(query, embedder) {
// Split query into potential content segments
const words = query.split(' ');
const segments = [];
// Generate all possible contiguous segments
for (let i = 0; i < words.length; i++) {
for (let j = i + 1; j <= words.length; j++) {
segments.push(words.slice(i, j).join(' '));
}
}
// Find segments that are semantically "content-like" vs "command-like"
const contentPrompts = [
'this is a message to display',
'this is content to show',
'this is text to output',
'this is information for the user',
];
const commandPrompts = [
'this is a command instruction',
'this is an action to perform',
'this is a system directive',
];
let bestContent = 'Hello!';
let bestContentScore = -1;
for (const segment of segments) {
if (segment.length < 2)
continue; // Skip very short segments
let contentScore = 0;
let commandScore = 0;
// Score as content
for (const contentPrompt of contentPrompts) {
const testPhrase = `"${segment}" - ${contentPrompt}`;
const embedding = await embedder(testPhrase);
const vector = Array.from(embedding.data);
const segmentEmbedding = await embedder(segment);
const segmentVector = Array.from(segmentEmbedding.data);
contentScore += cosineSimilarity(vector, segmentVector);
}
// Score as command
for (const commandPrompt of commandPrompts) {
const testPhrase = `"${segment}" - ${commandPrompt}`;
const embedding = await embedder(testPhrase);
const vector = Array.from(embedding.data);
const segmentEmbedding = await embedder(segment);
const segmentVector = Array.from(segmentEmbedding.data);
commandScore += cosineSimilarity(vector, segmentVector);
}
// Prefer segments that are more content-like than command-like
const netContentScore = contentScore - commandScore;
if (netContentScore > bestContentScore) {
bestContentScore = netContentScore;
bestContent = segment;
}
}
return bestContent;
}
/**
* Main class for semantic function calling
*/
class SemanticFunctionCaller {
constructor(config = {}) {
this.embedder = null;
this.tools = [];
this.embeddingCache = {};
this.config = {
embeddingModel: 'Xenova/all-MiniLM-L6-v2',
defaultConfidenceThreshold: 0.25,
enableCaching: true,
verbose: false,
...config,
};
}
/**
* Initialize the embedding model
*/
async initialize() {
if (this.embedder)
return;
if (this.config.verbose) {
console.log(`Loading embedding model: ${this.config.embeddingModel}`);
}
this.embedder = await pipeline('feature-extraction', this.config.embeddingModel);
}
/**
* Register tools for function calling
*/
registerTools(tools) {
this.tools = [...this.tools, ...tools];
if (this.config.verbose) {
console.log(`Registered ${tools.length} tools:`, tools.map((t) => t.name));
}
}
/**
* Clear all registered tools
*/
clearTools() {
this.tools = [];
this.embeddingCache = {};
}
/**
* Get embedding with caching
*/
async getEmbedding(text) {
if (this.config.enableCaching && this.embeddingCache[text]) {
return Array.from(this.embeddingCache[text]);
}
const embedding = await this.embedder(text);
const vector = Array.from(embedding.data);
if (this.config.enableCaching) {
this.embeddingCache[text] = new Float32Array(vector);
}
return vector;
}
/**
* Find the best tool match using multi-layer semantic analysis
*/
async findToolBySemanticLayers(query) {
await this.initialize();
const queryVector = await this.getEmbedding(query);
const results = [];
for (const tool of this.tools) {
let intentScore = 0;
let contextScore = 0;
let descScore = 0;
const matches = [];
// Layer 1: Intent matching - take best match
let bestIntentScore = 0;
for (const intent of tool.intentPatterns) {
const intentVector = await this.getEmbedding(intent);
const score = cosineSimilarity(queryVector, intentVector);
if (score > bestIntentScore) {
bestIntentScore = score;
}
matches.push({ type: 'intent', text: intent, score });
}
intentScore = bestIntentScore;
// Layer 2: Context matching - take best match
let bestContextScore = 0;
for (const context of tool.contexts) {
const contextVector = await this.getEmbedding(context);
const score = cosineSimilarity(queryVector, contextVector);
if (score > bestContextScore) {
bestContextScore = score;
}
matches.push({ type: 'context', text: context, score });
}
contextScore = bestContextScore;
// Layer 3: Description matching
const descVector = await this.getEmbedding(tool.description);
descScore = cosineSimilarity(queryVector, descVector);
matches.push({
type: 'description',
text: tool.description,
score: descScore,
});
// Calculate weighted total score
const totalScore = intentScore * 0.4 + contextScore * 0.3 + descScore * 0.3;
results.push({
tool: tool.name,
totalScore,
matches: matches.sort((a, b) => b.score - a.score),
});
}
return results.sort((a, b) => b.totalScore - a.totalScore)[0];
}
/**
* Extract parameters using semantic analysis
*/
async extractParameters(toolName, query) {
const tool = this.tools.find((t) => t.name === toolName);
if (!tool)
return {};
await this.initialize();
const result = {};
for (const [paramName, paramConfig] of Object.entries(tool.parameters)) {
if (paramConfig.type === 'semantic_color') {
result[paramName] = await extractSemanticColor(query, paramConfig, this.embedder);
}
else if (paramConfig.type === 'extracted_content') {
result[paramName] = await extractSemanticContent(query, this.embedder);
}
else if (paramConfig.type === 'semantic_category') {
result[paramName] = await extractSemanticCategory(query, paramConfig, this.embedder);
}
}
return result;
}
/**
* Execute function calling with confidence scoring
*/
async execute(query, options = {}) {
const opts = {
gutInstinct: false,
confidenceThreshold: this.config.defaultConfidenceThreshold,
mode: 'standard',
verbose: this.config.verbose,
...options,
};
if (opts.verbose) {
console.log('🔍 Semantic Function Calling Analysis:', {
query,
options: opts,
});
}
// Multi-layer semantic matching
const toolMatch = await this.findToolBySemanticLayers(query);
if (opts.verbose) {
console.log('📊 Tool Analysis:', {
selectedTool: toolMatch.tool,
confidence: `${(toolMatch.totalScore * 100).toFixed(1)}%`,
reasoning: toolMatch.matches
.slice(0, 3)
.map((m) => `${m.type}: "${m.text}" (${(m.score * 100).toFixed(1)}%)`),
});
}
// Confidence checking (unless in first instinct mode)
if (opts.mode !== 'first_instinct') {
const threshold = opts.gutInstinct ? 0.1 : opts.confidenceThreshold;
if (toolMatch.totalScore < threshold) {
if (opts.verbose) {
console.log('❌ Confidence too low, not executing');
}
return {
success: false,
reason: 'Low confidence in tool selection',
confidence: toolMatch.totalScore,
};
}
if (opts.gutInstinct && toolMatch.totalScore < 0.25) {
if (opts.verbose) {
console.log('🎯 Trusting model intuition despite low confidence score');
}
}
}
// Extract parameters
const parameters = await this.extractParameters(toolMatch.tool, query);
if (opts.verbose) {
console.log('🎯 Extracted Parameters:', parameters);
}
return {
success: true,
tool: toolMatch.tool,
parameters,
confidence: toolMatch.totalScore,
reasoning: toolMatch.matches,
mode: opts.mode,
};
}
/**
* Execute with first instinct mode (no confidence checking)
*/
async executeFirstInstinct(query) {
return this.execute(query, {
mode: 'first_instinct',
verbose: this.config.verbose,
});
}
/**
* Get cached embedding count (for monitoring)
*/
getCacheSize() {
return Object.keys(this.embeddingCache).length;
}
/**
* Clear embedding cache
*/
clearCache() {
this.embeddingCache = {};
}
}
/**
* Default parameter value database for common semantic types
*/
const defaultParameterDatabase = {
colors: {
basic: [
'red',
'blue',
'green',
'yellow',
'purple',
'orange',
'pink',
'brown',
'black',
'white',
'gray',
],
extended: [
'crimson',
'navy',
'teal',
'coral',
'salmon',
'turquoise',
'indigo',
'violet',
'steelblue',
'lime',
'cyan',
'magenta',
'gold',
'silver',
'maroon',
'olive',
'aqua',
],
modifiers: ['light', 'dark', 'bright', 'deep', 'pale', 'vivid', 'vibrant'],
},
emotions: [
'happy',
'sad',
'excited',
'calm',
'energetic',
'peaceful',
'angry',
'joyful',
'frustrated',
'content',
'anxious',
'relaxed',
],
sizes: [
'tiny',
'small',
'medium',
'large',
'huge',
'massive',
'mini',
'big',
'little',
'enormous',
'gigantic',
],
directions: [
'up',
'down',
'left',
'right',
'center',
'top',
'bottom',
'north',
'south',
'east',
'west',
'forward',
'backward',
],
categories: [
'general',
'work',
'personal',
'ideas',
'tasks',
'reminders',
'meeting',
'project',
'research',
'thoughts',
'quotes',
'learning',
'goals',
'planning',
'shopping',
'health',
],
numbers: [
'zero',
'one',
'two',
'three',
'four',
'five',
'six',
'seven',
'eight',
'nine',
'ten',
'first',
'second',
'third',
'few',
'many',
],
booleans: [
'yes',
'no',
'true',
'false',
'on',
'off',
'enable',
'disable',
'activate',
'deactivate',
'start',
'stop',
'begin',
'end',
],
};
/**
* Merge custom parameter database with defaults
*/
function mergeParameterDatabase(custom) {
return {
...defaultParameterDatabase,
...custom,
colors: {
...defaultParameterDatabase.colors,
...custom.colors,
},
};
}
/**
* Homeschool - Teach AI to understand natural language like a patient tutor
* Advanced embedding-based function calling with semantic understanding,
* confidence scoring, and natural language parameter extraction
*/
// Main class
const exampleTools = [
{
name: 'changeBackgroundColor',
description: 'Changes the background color of the web page',
contexts: [
'visual styling and appearance',
'color modification and theming',
'page aesthetics and design',
],
intentPatterns: [
'user wants to modify visual appearance',
'user wants to change colors',
'user wants to style the page',
],
parameters: {
color: {
type: 'semantic_color',
semanticCandidates: [
'red',
'blue',
'green',
'yellow',
'purple',
'orange',
'pink',
'brown',
'black',
'white',
'gray',
'cyan',
'magenta',
'lime',
'navy',
'teal',
'silver',
'gold',
'coral',
'salmon',
'crimson',
'violet',
'indigo',
'turquoise',
],
modifierCandidates: ['light', 'dark', 'bright', 'vibrant', 'pale'],
fallback: 'blue',
},
},
},
{
name: 'displayText',
description: 'Shows a message or text to the user',
contexts: [
'communication and messaging',
'information display and output',
'user interaction and feedback',
],
intentPatterns: [
'user wants to show information',
'user wants to communicate a message',
'user wants to display content',
],
parameters: {
text: {
type: 'extracted_content',
extractionStrategy: 'semantic_content_isolation',
},
},
},
{
name: 'takeNote',
description: 'Saves a note or reminder for later reference',
contexts: [
'note taking and memory',
'information storage and organization',
'personal productivity and planning',
'documentation and records',
'task management and reminders',
],
intentPatterns: [
'user wants to remember something',
'user wants to save information',
'user wants to record a thought',
'user wants to jot down details',
'user wants to make a reminder',
'user wants to take notes',
],
parameters: {
note: {
type: 'extracted_content',
extractionStrategy: 'semantic_content_isolation',
},
category: {
type: 'semantic_category',
semanticCandidates: [
'general',
'work',
'personal',
'ideas',
'tasks',
'reminders',
'meeting',
'project',
'research',
'thoughts',
'quotes',
'learning',
'goals',
'planning',
],
fallback: 'general',
},
},
},
];
// Version info
const version = '0.1.0';
export { SemanticFunctionCaller, batchCosineSimilarity, cosineSimilarity, defaultParameterDatabase, exampleTools, extractSemanticCategory, extractSemanticColor, extractSemanticContent, mergeParameterDatabase, version };
//# sourceMappingURL=index.esm.js.map