@simonecoelhosfo/optimizely-mcp-server
Version:
Optimizely MCP Server for AI assistants with integrated CLI tools
262 lines • 9.38 kB
JavaScript
/**
* FuzzyMatcher - Handles typos and variations in natural language queries
*
* IMPLEMENTATION STATUS:
* COMPLETE: Levenshtein distance, soundex, n-gram matching
*
* Last Updated: July 3, 2025
*/
import { ENTITY_PATTERNS } from '../patterns/EntityPatterns.js';
import { FIELD_PATTERNS } from '../patterns/FieldPatterns.js';
export class FuzzyMatcher {
maxDistance;
minSimilarity;
constructor(config = {}) {
this.maxDistance = config.maxDistance || 2;
this.minSimilarity = config.minSimilarity || 0.7;
}
/**
* Find best entity match using fuzzy matching
*/
findEntity(term) {
const normalizedTerm = term.toLowerCase().trim();
// Try exact match first
for (const [entityName, pattern] of Object.entries(ENTITY_PATTERNS)) {
if (pattern.primary.test(normalizedTerm)) {
return {
original: term,
matched: entityName,
confidence: 1.0,
type: 'entity',
algorithm: 'exact'
};
}
}
// Try fuzzy matching
let bestMatch = null;
let bestScore = 0;
for (const entityName of Object.keys(ENTITY_PATTERNS)) {
// Levenshtein distance
const distance = this.levenshteinDistance(normalizedTerm, entityName);
const similarity = 1 - (distance / Math.max(normalizedTerm.length, entityName.length));
if (similarity >= this.minSimilarity && similarity > bestScore) {
bestScore = similarity;
bestMatch = {
original: term,
matched: entityName,
confidence: similarity,
type: 'entity',
algorithm: 'levenshtein'
};
}
// Try singular/plural variations
const singular = this.getSingular(normalizedTerm);
const plural = this.getPlural(normalizedTerm);
if (entityName === singular || entityName === plural) {
return {
original: term,
matched: entityName,
confidence: 0.95,
type: 'entity',
algorithm: 'exact'
};
}
// Soundex matching for phonetic similarity
if (this.soundex(normalizedTerm) === this.soundex(entityName)) {
const soundexMatch = {
original: term,
matched: entityName,
confidence: 0.8,
type: 'entity',
algorithm: 'soundex'
};
if (soundexMatch.confidence > bestScore) {
bestScore = soundexMatch.confidence;
bestMatch = soundexMatch;
}
}
// N-gram similarity
const ngramSimilarity = this.ngramSimilarity(normalizedTerm, entityName, 2);
if (ngramSimilarity >= this.minSimilarity && ngramSimilarity > bestScore) {
bestScore = ngramSimilarity;
bestMatch = {
original: term,
matched: entityName,
confidence: ngramSimilarity,
type: 'entity',
algorithm: 'ngram'
};
}
}
return bestMatch;
}
/**
* Find best field match using fuzzy matching
*/
findField(term, entityContext) {
const normalizedTerm = term.toLowerCase().trim();
// Try exact match first
for (const [fieldName, pattern] of Object.entries(FIELD_PATTERNS)) {
// Skip if wrong entity context
if (pattern.entity && entityContext && pattern.entity !== entityContext) {
continue;
}
for (const regex of pattern.patterns) {
if (regex.test(normalizedTerm)) {
return {
original: term,
matched: pattern.canonical,
confidence: 1.0,
type: 'field',
algorithm: 'exact'
};
}
}
}
// Try fuzzy matching
let bestMatch = null;
let bestScore = 0;
for (const [fieldName, pattern] of Object.entries(FIELD_PATTERNS)) {
if (pattern.entity && entityContext && pattern.entity !== entityContext) {
continue;
}
const canonical = pattern.canonical;
const distance = this.levenshteinDistance(normalizedTerm, canonical);
const similarity = 1 - (distance / Math.max(normalizedTerm.length, canonical.length));
if (similarity >= this.minSimilarity && similarity > bestScore) {
bestScore = similarity;
bestMatch = {
original: term,
matched: canonical,
confidence: similarity,
type: 'field',
algorithm: 'levenshtein'
};
}
}
return bestMatch;
}
/**
* Levenshtein distance algorithm
*/
levenshteinDistance(a, b) {
const matrix = [];
for (let i = 0; i <= b.length; i++) {
matrix[i] = [i];
}
for (let j = 0; j <= a.length; j++) {
matrix[0][j] = j;
}
for (let i = 1; i <= b.length; i++) {
for (let j = 1; j <= a.length; j++) {
if (b.charAt(i - 1) === a.charAt(j - 1)) {
matrix[i][j] = matrix[i - 1][j - 1];
}
else {
matrix[i][j] = Math.min(matrix[i - 1][j - 1] + 1, // substitution
matrix[i][j - 1] + 1, // insertion
matrix[i - 1][j] + 1 // deletion
);
}
}
}
return matrix[b.length][a.length];
}
/**
* Soundex algorithm for phonetic matching
*/
soundex(str) {
const s = str.toUpperCase().split('');
const firstLetter = s[0];
// Convert letters to numbers
const codes = {
A: '0', E: '0', I: '0', O: '0', U: '0', H: '0', W: '0', Y: '0',
B: '1', F: '1', P: '1', V: '1',
C: '2', G: '2', J: '2', K: '2', Q: '2', S: '2', X: '2', Z: '2',
D: '3', T: '3',
L: '4',
M: '5', N: '5',
R: '6'
};
const encoded = s
.map(char => codes[char] || '')
.filter((num, index, arr) => num !== '' && num !== arr[index - 1])
.join('');
return (firstLetter + encoded + '000').slice(0, 4);
}
/**
* N-gram similarity calculation
*/
ngramSimilarity(str1, str2, n = 2) {
const ngrams1 = this.getNgrams(str1, n);
const ngrams2 = this.getNgrams(str2, n);
const intersection = ngrams1.filter(gram => ngrams2.includes(gram));
const union = [...new Set([...ngrams1, ...ngrams2])];
return union.length > 0 ? intersection.length / union.length : 0;
}
/**
* Generate n-grams from string
*/
getNgrams(str, n) {
const ngrams = [];
const padded = `${'_'.repeat(n - 1)}${str}${'_'.repeat(n - 1)}`;
for (let i = 0; i <= padded.length - n; i++) {
ngrams.push(padded.slice(i, i + n));
}
return ngrams;
}
/**
* Get singular form of a word (simple rules)
*/
getSingular(word) {
if (word.endsWith('ies')) {
return word.slice(0, -3) + 'y';
}
else if (word.endsWith('es')) {
return word.slice(0, -2);
}
else if (word.endsWith('s') && !word.endsWith('ss')) {
return word.slice(0, -1);
}
return word;
}
/**
* Get plural form of a word (simple rules)
*/
getPlural(word) {
if (word.endsWith('y') && !/[aeiou]y$/i.test(word)) {
return word.slice(0, -1) + 'ies';
}
else if (word.endsWith('s') || word.endsWith('x') || word.endsWith('z') ||
word.endsWith('ch') || word.endsWith('sh')) {
return word + 'es';
}
else {
return word + 's';
}
}
/**
* Find best matches for a phrase
*/
findBestMatches(phrase, maxResults = 3) {
const words = phrase.toLowerCase().split(/\s+/);
const matches = [];
for (const word of words) {
// Try entity matching
const entityMatch = this.findEntity(word);
if (entityMatch && entityMatch.confidence >= this.minSimilarity) {
matches.push(entityMatch);
}
// Try field matching
const fieldMatch = this.findField(word);
if (fieldMatch && fieldMatch.confidence >= this.minSimilarity) {
matches.push(fieldMatch);
}
}
// Sort by confidence and return top results
return matches
.sort((a, b) => b.confidence - a.confidence)
.slice(0, maxResults);
}
}
//# sourceMappingURL=FuzzyMatcher.js.map