UNPKG

@simonecoelhosfo/optimizely-mcp-server

Version:

Optimizely MCP Server for AI assistants with integrated CLI tools

262 lines 9.38 kB
/** * FuzzyMatcher - Handles typos and variations in natural language queries * * IMPLEMENTATION STATUS: * COMPLETE: Levenshtein distance, soundex, n-gram matching * * Last Updated: July 3, 2025 */ import { ENTITY_PATTERNS } from '../patterns/EntityPatterns.js'; import { FIELD_PATTERNS } from '../patterns/FieldPatterns.js'; export class FuzzyMatcher { maxDistance; minSimilarity; constructor(config = {}) { this.maxDistance = config.maxDistance || 2; this.minSimilarity = config.minSimilarity || 0.7; } /** * Find best entity match using fuzzy matching */ findEntity(term) { const normalizedTerm = term.toLowerCase().trim(); // Try exact match first for (const [entityName, pattern] of Object.entries(ENTITY_PATTERNS)) { if (pattern.primary.test(normalizedTerm)) { return { original: term, matched: entityName, confidence: 1.0, type: 'entity', algorithm: 'exact' }; } } // Try fuzzy matching let bestMatch = null; let bestScore = 0; for (const entityName of Object.keys(ENTITY_PATTERNS)) { // Levenshtein distance const distance = this.levenshteinDistance(normalizedTerm, entityName); const similarity = 1 - (distance / Math.max(normalizedTerm.length, entityName.length)); if (similarity >= this.minSimilarity && similarity > bestScore) { bestScore = similarity; bestMatch = { original: term, matched: entityName, confidence: similarity, type: 'entity', algorithm: 'levenshtein' }; } // Try singular/plural variations const singular = this.getSingular(normalizedTerm); const plural = this.getPlural(normalizedTerm); if (entityName === singular || entityName === plural) { return { original: term, matched: entityName, confidence: 0.95, type: 'entity', algorithm: 'exact' }; } // Soundex matching for phonetic similarity if (this.soundex(normalizedTerm) === this.soundex(entityName)) { const soundexMatch = { original: term, matched: entityName, confidence: 0.8, type: 'entity', algorithm: 'soundex' }; if (soundexMatch.confidence > bestScore) { bestScore = soundexMatch.confidence; bestMatch = soundexMatch; } } // N-gram similarity const ngramSimilarity = this.ngramSimilarity(normalizedTerm, entityName, 2); if (ngramSimilarity >= this.minSimilarity && ngramSimilarity > bestScore) { bestScore = ngramSimilarity; bestMatch = { original: term, matched: entityName, confidence: ngramSimilarity, type: 'entity', algorithm: 'ngram' }; } } return bestMatch; } /** * Find best field match using fuzzy matching */ findField(term, entityContext) { const normalizedTerm = term.toLowerCase().trim(); // Try exact match first for (const [fieldName, pattern] of Object.entries(FIELD_PATTERNS)) { // Skip if wrong entity context if (pattern.entity && entityContext && pattern.entity !== entityContext) { continue; } for (const regex of pattern.patterns) { if (regex.test(normalizedTerm)) { return { original: term, matched: pattern.canonical, confidence: 1.0, type: 'field', algorithm: 'exact' }; } } } // Try fuzzy matching let bestMatch = null; let bestScore = 0; for (const [fieldName, pattern] of Object.entries(FIELD_PATTERNS)) { if (pattern.entity && entityContext && pattern.entity !== entityContext) { continue; } const canonical = pattern.canonical; const distance = this.levenshteinDistance(normalizedTerm, canonical); const similarity = 1 - (distance / Math.max(normalizedTerm.length, canonical.length)); if (similarity >= this.minSimilarity && similarity > bestScore) { bestScore = similarity; bestMatch = { original: term, matched: canonical, confidence: similarity, type: 'field', algorithm: 'levenshtein' }; } } return bestMatch; } /** * Levenshtein distance algorithm */ levenshteinDistance(a, b) { const matrix = []; for (let i = 0; i <= b.length; i++) { matrix[i] = [i]; } for (let j = 0; j <= a.length; j++) { matrix[0][j] = j; } for (let i = 1; i <= b.length; i++) { for (let j = 1; j <= a.length; j++) { if (b.charAt(i - 1) === a.charAt(j - 1)) { matrix[i][j] = matrix[i - 1][j - 1]; } else { matrix[i][j] = Math.min(matrix[i - 1][j - 1] + 1, // substitution matrix[i][j - 1] + 1, // insertion matrix[i - 1][j] + 1 // deletion ); } } } return matrix[b.length][a.length]; } /** * Soundex algorithm for phonetic matching */ soundex(str) { const s = str.toUpperCase().split(''); const firstLetter = s[0]; // Convert letters to numbers const codes = { A: '0', E: '0', I: '0', O: '0', U: '0', H: '0', W: '0', Y: '0', B: '1', F: '1', P: '1', V: '1', C: '2', G: '2', J: '2', K: '2', Q: '2', S: '2', X: '2', Z: '2', D: '3', T: '3', L: '4', M: '5', N: '5', R: '6' }; const encoded = s .map(char => codes[char] || '') .filter((num, index, arr) => num !== '' && num !== arr[index - 1]) .join(''); return (firstLetter + encoded + '000').slice(0, 4); } /** * N-gram similarity calculation */ ngramSimilarity(str1, str2, n = 2) { const ngrams1 = this.getNgrams(str1, n); const ngrams2 = this.getNgrams(str2, n); const intersection = ngrams1.filter(gram => ngrams2.includes(gram)); const union = [...new Set([...ngrams1, ...ngrams2])]; return union.length > 0 ? intersection.length / union.length : 0; } /** * Generate n-grams from string */ getNgrams(str, n) { const ngrams = []; const padded = `${'_'.repeat(n - 1)}${str}${'_'.repeat(n - 1)}`; for (let i = 0; i <= padded.length - n; i++) { ngrams.push(padded.slice(i, i + n)); } return ngrams; } /** * Get singular form of a word (simple rules) */ getSingular(word) { if (word.endsWith('ies')) { return word.slice(0, -3) + 'y'; } else if (word.endsWith('es')) { return word.slice(0, -2); } else if (word.endsWith('s') && !word.endsWith('ss')) { return word.slice(0, -1); } return word; } /** * Get plural form of a word (simple rules) */ getPlural(word) { if (word.endsWith('y') && !/[aeiou]y$/i.test(word)) { return word.slice(0, -1) + 'ies'; } else if (word.endsWith('s') || word.endsWith('x') || word.endsWith('z') || word.endsWith('ch') || word.endsWith('sh')) { return word + 'es'; } else { return word + 's'; } } /** * Find best matches for a phrase */ findBestMatches(phrase, maxResults = 3) { const words = phrase.toLowerCase().split(/\s+/); const matches = []; for (const word of words) { // Try entity matching const entityMatch = this.findEntity(word); if (entityMatch && entityMatch.confidence >= this.minSimilarity) { matches.push(entityMatch); } // Try field matching const fieldMatch = this.findField(word); if (fieldMatch && fieldMatch.confidence >= this.minSimilarity) { matches.push(fieldMatch); } } // Sort by confidence and return top results return matches .sort((a, b) => b.confidence - a.confidence) .slice(0, maxResults); } } //# sourceMappingURL=FuzzyMatcher.js.map