ms365-mcp-server
Version:
Microsoft 365 MCP Server for managing Microsoft 365 email through natural language interactions with full OAuth2 authentication support
515 lines (514 loc) • 19.8 kB
JavaScript
import { logger } from './api.js';
export class EnhancedFuzzySearch {
constructor(ms365Operations) {
this.ms365Operations = ms365Operations;
}
/**
* Enhanced fuzzy search with multiple matching strategies
*/
async search(query, emails, options = {}) {
const opts = { ...EnhancedFuzzySearch.DEFAULT_OPTIONS, ...options };
const results = [];
logger.log(`🔍 Enhanced fuzzy search for query: "${query}"`);
// Normalize and preprocess query
const normalizedQuery = this.normalizeQuery(query);
const queryTerms = this.extractQueryTerms(normalizedQuery);
const expandedTerms = this.expandQueryWithSynonyms(queryTerms, opts);
for (const email of emails) {
const matchResults = this.matchEmail(email, query, queryTerms, expandedTerms, opts);
if (matchResults.length > 0) {
// Combine multiple matches into single result
const bestMatch = matchResults.reduce((best, current) => current.score > best.score ? current : best);
results.push(bestMatch);
}
}
// Sort by score and apply limits
const sortedResults = this.sortAndFilterResults(results, opts);
logger.log(`🔍 Enhanced fuzzy search found ${sortedResults.length} results`);
return sortedResults;
}
/**
* Match an email against search criteria using multiple strategies
*/
matchEmail(email, originalQuery, queryTerms, expandedTerms, options) {
const results = [];
const emailText = this.extractEmailText(email);
// 1. Exact matching
if (this.hasExactMatch(emailText, originalQuery)) {
results.push({
email,
score: 1.0,
matchedFields: this.getMatchedFields(email, originalQuery),
matchType: 'exact',
explanation: `Exact match found for "${originalQuery}"`
});
}
// 2. Fuzzy matching with typo tolerance
if (options.enableTypoTolerance) {
const fuzzyScore = this.calculateFuzzyScore(emailText, queryTerms, options.typoTolerance);
if (fuzzyScore > 0.5) {
results.push({
email,
score: fuzzyScore * 0.9, // Slightly lower than exact
matchedFields: this.getMatchedFields(email, queryTerms.join(' ')),
matchType: 'fuzzy',
explanation: `Fuzzy match with ${Math.round(fuzzyScore * 100)}% similarity`
});
}
}
// 3. Semantic matching
if (options.enableSemanticMatching) {
const semanticScore = this.calculateSemanticScore(emailText, queryTerms, options.semanticThreshold);
if (semanticScore > options.semanticThreshold) {
results.push({
email,
score: semanticScore * 0.8,
matchedFields: this.getMatchedFields(email, queryTerms.join(' ')),
matchType: 'semantic',
explanation: `Semantic match with ${Math.round(semanticScore * 100)}% relevance`
});
}
}
// 4. Synonym matching
if (options.enableSynonyms && expandedTerms.length > queryTerms.length) {
const synonymScore = this.calculateSynonymScore(emailText, expandedTerms);
if (synonymScore > 0.6) {
results.push({
email,
score: synonymScore * 0.7,
matchedFields: this.getMatchedFields(email, expandedTerms.join(' ')),
matchType: 'synonym',
explanation: `Synonym match found`
});
}
}
// 5. Phonetic matching
if (options.enablePhoneticMatching) {
const phoneticScore = this.calculatePhoneticScore(emailText, queryTerms);
if (phoneticScore > 0.7) {
results.push({
email,
score: phoneticScore * 0.6,
matchedFields: this.getMatchedFields(email, queryTerms.join(' ')),
matchType: 'phonetic',
explanation: `Phonetic match found`
});
}
}
return results;
}
/**
* Normalize query for better matching
*/
normalizeQuery(query) {
return query
.toLowerCase()
.trim()
.replace(/[^\w\s-]/g, ' ')
.replace(/\s+/g, ' ');
}
/**
* Extract meaningful terms from query
*/
extractQueryTerms(query) {
const stopWords = new Set(['the', 'and', 'or', 'but', 'in', 'on', 'at', 'to', 'for', 'of', 'with', 'by', 'a', 'an', 'is', 'are', 'was', 'were', 'be', 'been', 'have', 'has', 'had', 'do', 'does', 'did', 'will', 'would', 'could', 'should', 'may', 'might', 'can', 'this', 'that', 'these', 'those']);
const terms = query.split(/\s+/)
.filter(term => term.length > 2 && !stopWords.has(term));
return terms;
}
/**
* Expand query terms with synonyms
*/
expandQueryWithSynonyms(terms, options) {
if (!options.enableSynonyms)
return terms;
const expanded = [...terms];
for (const term of terms) {
const synonyms = EnhancedFuzzySearch.SYNONYMS[term] || [];
expanded.push(...synonyms);
}
return expanded;
}
/**
* Extract searchable text from email
*/
extractEmailText(email) {
return [
email.subject,
email.bodyPreview,
email.from.name,
email.from.address,
...email.toRecipients.map(r => `${r.name} ${r.address}`),
...email.ccRecipients.map(r => `${r.name} ${r.address}`)
].join(' ').toLowerCase();
}
/**
* Check for exact matches
*/
hasExactMatch(text, query) {
return text.includes(query.toLowerCase());
}
/**
* Calculate fuzzy score with typo tolerance
*/
calculateFuzzyScore(text, queryTerms, tolerance) {
let totalScore = 0;
let matchedTerms = 0;
for (const term of queryTerms) {
const termScore = this.findBestTermMatch(text, term, tolerance);
if (termScore > 0.5) {
totalScore += termScore;
matchedTerms++;
}
}
return matchedTerms > 0 ? totalScore / queryTerms.length : 0;
}
/**
* Find best match for a term in text
*/
findBestTermMatch(text, term, tolerance) {
const words = text.split(/\s+/);
let bestScore = 0;
for (const word of words) {
const similarity = this.calculateStringSimilarity(word, term);
if (similarity > bestScore && similarity >= tolerance) {
bestScore = similarity;
}
}
return bestScore;
}
/**
* Calculate semantic relevance score
*/
calculateSemanticScore(text, queryTerms, threshold) {
// Simple semantic scoring based on term co-occurrence and context
let score = 0;
const words = text.split(/\s+/);
const windowSize = 10; // Look at words within 10 positions
for (const term of queryTerms) {
const termPositions = this.findTermPositions(words, term);
for (const pos of termPositions) {
// Score based on term frequency and context
score += 0.1;
// Bonus for terms appearing close to each other
const windowStart = Math.max(0, pos - windowSize);
const windowEnd = Math.min(words.length, pos + windowSize);
const windowWords = words.slice(windowStart, windowEnd);
for (const otherTerm of queryTerms) {
if (otherTerm !== term && windowWords.includes(otherTerm)) {
score += 0.2;
}
}
}
}
return Math.min(1, score);
}
/**
* Find positions of a term in word array
*/
findTermPositions(words, term) {
const positions = [];
for (let i = 0; i < words.length; i++) {
if (words[i].includes(term) || this.calculateStringSimilarity(words[i], term) > 0.8) {
positions.push(i);
}
}
return positions;
}
/**
* Calculate synonym matching score
*/
calculateSynonymScore(text, expandedTerms) {
let matchCount = 0;
for (const term of expandedTerms) {
if (text.includes(term)) {
matchCount++;
}
}
return expandedTerms.length > 0 ? matchCount / expandedTerms.length : 0;
}
/**
* Calculate phonetic matching score
*/
calculatePhoneticScore(text, queryTerms) {
let score = 0;
const words = text.split(/\s+/);
for (const term of queryTerms) {
const termSoundex = this.soundex(term);
for (const word of words) {
const wordSoundex = this.soundex(word);
if (termSoundex === wordSoundex) {
score += 1;
break;
}
}
}
return queryTerms.length > 0 ? score / queryTerms.length : 0;
}
/**
* Soundex phonetic algorithm implementation
*/
soundex(word) {
if (!word || word.length === 0)
return '';
const soundexMap = {
'B': '1', 'F': '1', 'P': '1', 'V': '1',
'C': '2', 'G': '2', 'J': '2', 'K': '2', 'Q': '2', 'S': '2', 'X': '2', 'Z': '2',
'D': '3', 'T': '3',
'L': '4',
'M': '5', 'N': '5',
'R': '6'
};
let soundexCode = word.charAt(0).toUpperCase();
let prevCode = '';
for (let i = 1; i < word.length && soundexCode.length < 4; i++) {
const char = word.charAt(i).toUpperCase();
const code = soundexMap[char] || '';
if (code && code !== prevCode) {
soundexCode += code;
prevCode = code;
}
}
return soundexCode.padEnd(4, '0');
}
/**
* Get fields that matched the query
*/
getMatchedFields(email, query) {
const fields = [];
const lowerQuery = query.toLowerCase();
if (email.subject.toLowerCase().includes(lowerQuery)) {
fields.push('subject');
}
if (email.bodyPreview.toLowerCase().includes(lowerQuery)) {
fields.push('body');
}
if (email.from.name.toLowerCase().includes(lowerQuery) ||
email.from.address.toLowerCase().includes(lowerQuery)) {
fields.push('from');
}
if (email.toRecipients.some(r => r.name.toLowerCase().includes(lowerQuery) ||
r.address.toLowerCase().includes(lowerQuery))) {
fields.push('to');
}
return fields;
}
/**
* Sort and filter results
*/
sortAndFilterResults(results, options) {
// Remove duplicates
const uniqueResults = this.removeDuplicateResults(results);
// Apply recency boost if enabled
if (options.boostRecentEmails) {
uniqueResults.forEach(result => {
const daysOld = this.getDaysOld(result.email.receivedDateTime);
if (daysOld < 7) {
result.score *= 1.2; // 20% boost for recent emails
}
else if (daysOld < 30) {
result.score *= 1.1; // 10% boost for emails within a month
}
});
}
// Sort by score (descending)
uniqueResults.sort((a, b) => b.score - a.score);
// Apply result limit
return uniqueResults.slice(0, options.maxResults);
}
/**
* Remove duplicate results
*/
removeDuplicateResults(results) {
const seen = new Set();
const unique = [];
for (const result of results) {
if (!seen.has(result.email.id)) {
seen.add(result.email.id);
unique.push(result);
}
}
return unique;
}
/**
* Get days old for an email
*/
getDaysOld(dateTime) {
const emailDate = new Date(dateTime);
const now = new Date();
const diffTime = Math.abs(now.getTime() - emailDate.getTime());
return Math.ceil(diffTime / (1000 * 60 * 60 * 24));
}
/**
* Calculate string similarity (Levenshtein distance)
*/
calculateStringSimilarity(str1, str2) {
if (str1 === str2)
return 1;
const longer = str1.length > str2.length ? str1 : str2;
const shorter = str1.length > str2.length ? str2 : str1;
if (longer.length === 0)
return 1;
const editDistance = this.levenshteinDistance(longer, shorter);
return (longer.length - editDistance) / longer.length;
}
/**
* Calculate Levenshtein distance
*/
levenshteinDistance(str1, str2) {
const matrix = Array(str2.length + 1).fill(null).map(() => Array(str1.length + 1).fill(null));
for (let i = 0; i <= str1.length; i++)
matrix[0][i] = i;
for (let j = 0; j <= str2.length; j++)
matrix[j][0] = j;
for (let j = 1; j <= str2.length; j++) {
for (let i = 1; i <= str1.length; i++) {
const indicator = str1[i - 1] === str2[j - 1] ? 0 : 1;
matrix[j][i] = Math.min(matrix[j][i - 1] + 1, matrix[j - 1][i] + 1, matrix[j - 1][i - 1] + indicator);
}
}
return matrix[str2.length][str1.length];
}
/**
* Apply stemming to a word
*/
stemWord(word) {
for (const rule of EnhancedFuzzySearch.STEMMING_RULES) {
if (word.endsWith(rule.suffix)) {
return word.slice(0, -rule.suffix.length) + rule.replacement;
}
}
return word;
}
/**
* Search with natural language understanding
*/
async naturalLanguageSearch(query, emails, options = {}) {
// Parse natural language patterns
const parsedQuery = this.parseNaturalLanguageQuery(query);
// Apply time-based filters if detected
let filteredEmails = emails;
if (parsedQuery.timeFilter) {
filteredEmails = this.applyTimeFilter(emails, parsedQuery.timeFilter);
}
// Apply sender/recipient filters
if (parsedQuery.senderFilter) {
filteredEmails = filteredEmails.filter(email => email.from.name.toLowerCase().includes(parsedQuery.senderFilter) ||
email.from.address.toLowerCase().includes(parsedQuery.senderFilter));
}
// Run enhanced search on filtered emails
return await this.search(parsedQuery.cleanQuery, filteredEmails, options);
}
/**
* Parse natural language query
*/
parseNaturalLanguageQuery(query) {
let cleanQuery = query.toLowerCase();
let timeFilter;
let senderFilter;
let importanceFilter;
// Time patterns
const timePatterns = [
{ pattern: /\b(recent|recently|latest|new)\b/g, filter: 'recent' },
{ pattern: /\b(last week|past week|this week)\b/g, filter: 'last_week' },
{ pattern: /\b(last month|past month|this month)\b/g, filter: 'last_month' },
{ pattern: /\b(few weeks|several weeks|couple weeks)\b/g, filter: 'few_weeks' }
];
for (const { pattern, filter } of timePatterns) {
if (pattern.test(cleanQuery)) {
timeFilter = filter;
cleanQuery = cleanQuery.replace(pattern, '').trim();
break;
}
}
// Sender patterns
const senderMatch = cleanQuery.match(/\b(from|by)\s+(\w+)/);
if (senderMatch) {
senderFilter = senderMatch[2];
cleanQuery = cleanQuery.replace(senderMatch[0], '').trim();
}
// Importance patterns
if (/\b(important|urgent|priority|critical)\b/.test(cleanQuery)) {
importanceFilter = 'high';
cleanQuery = cleanQuery.replace(/\b(important|urgent|priority|critical)\b/g, '').trim();
}
return {
cleanQuery: cleanQuery.trim(),
timeFilter: timeFilter,
senderFilter,
importanceFilter: importanceFilter
};
}
/**
* Apply time-based filters
*/
applyTimeFilter(emails, timeFilter) {
const now = new Date();
let filterDate;
switch (timeFilter) {
case 'recent':
filterDate = new Date(now.getTime() - 7 * 24 * 60 * 60 * 1000); // 7 days
break;
case 'last_week':
filterDate = new Date(now.getTime() - 7 * 24 * 60 * 60 * 1000);
break;
case 'last_month':
filterDate = new Date(now.getTime() - 30 * 24 * 60 * 60 * 1000);
break;
case 'few_weeks':
filterDate = new Date(now.getTime() - 21 * 24 * 60 * 60 * 1000); // 3 weeks
break;
default:
return emails;
}
return emails.filter(email => new Date(email.receivedDateTime) >= filterDate);
}
}
EnhancedFuzzySearch.DEFAULT_OPTIONS = {
enableSemanticMatching: true,
enableTypoTolerance: true,
enableSynonyms: true,
enableStemming: true,
enablePhoneticMatching: true,
typoTolerance: 0.8,
semanticThreshold: 0.6,
maxResults: 100,
boostRecentEmails: true
};
// Common synonyms for email search
EnhancedFuzzySearch.SYNONYMS = {
'important': ['urgent', 'critical', 'priority', 'asap', 'high priority'],
'meeting': ['call', 'conference', 'discussion', 'session', 'appointment'],
'document': ['file', 'paper', 'report', 'attachment', 'doc'],
'invoice': ['bill', 'receipt', 'payment', 'charge', 'statement'],
'order': ['purchase', 'request', 'requisition', 'procurement'],
'contract': ['agreement', 'deal', 'arrangement', 'pact'],
'issue': ['problem', 'bug', 'error', 'trouble', 'concern'],
'update': ['news', 'information', 'status', 'progress', 'report'],
'deadline': ['due date', 'expiry', 'cutoff', 'timeline'],
'review': ['evaluation', 'assessment', 'analysis', 'feedback'],
'approval': ['authorization', 'permission', 'consent', 'clearance'],
'budget': ['cost', 'expense', 'funding', 'financial'],
'project': ['initiative', 'task', 'assignment', 'work'],
'client': ['customer', 'consumer', 'user', 'patron'],
'vendor': ['supplier', 'provider', 'contractor', 'partner'],
'government': ['federal', 'state', 'local', 'municipal', 'public', 'official'],
'tax': ['irs', 'revenue', 'taxation', 'levy', 'duty'],
'notice': ['notification', 'alert', 'warning', 'announcement'],
'response': ['reply', 'answer', 'feedback', 'acknowledgment'],
'request': ['ask', 'inquiry', 'question', 'petition']
};
// Common stemming rules
EnhancedFuzzySearch.STEMMING_RULES = [
{ suffix: 'ies', replacement: 'y' },
{ suffix: 'ied', replacement: 'y' },
{ suffix: 'ying', replacement: 'y' },
{ suffix: 'ing', replacement: '' },
{ suffix: 'ly', replacement: '' },
{ suffix: 'ed', replacement: '' },
{ suffix: 'ies', replacement: 'y' },
{ suffix: 'ied', replacement: 'y' },
{ suffix: 'ies', replacement: 'y' },
{ suffix: 'es', replacement: '' },
{ suffix: 's', replacement: '' }
];