UNPKG

ms365-mcp-server

Version:

Microsoft 365 MCP Server for managing Microsoft 365 email through natural language interactions with full OAuth2 authentication support

515 lines (514 loc) 19.8 kB
import { logger } from './api.js'; export class EnhancedFuzzySearch { constructor(ms365Operations) { this.ms365Operations = ms365Operations; } /** * Enhanced fuzzy search with multiple matching strategies */ async search(query, emails, options = {}) { const opts = { ...EnhancedFuzzySearch.DEFAULT_OPTIONS, ...options }; const results = []; logger.log(`🔍 Enhanced fuzzy search for query: "${query}"`); // Normalize and preprocess query const normalizedQuery = this.normalizeQuery(query); const queryTerms = this.extractQueryTerms(normalizedQuery); const expandedTerms = this.expandQueryWithSynonyms(queryTerms, opts); for (const email of emails) { const matchResults = this.matchEmail(email, query, queryTerms, expandedTerms, opts); if (matchResults.length > 0) { // Combine multiple matches into single result const bestMatch = matchResults.reduce((best, current) => current.score > best.score ? current : best); results.push(bestMatch); } } // Sort by score and apply limits const sortedResults = this.sortAndFilterResults(results, opts); logger.log(`🔍 Enhanced fuzzy search found ${sortedResults.length} results`); return sortedResults; } /** * Match an email against search criteria using multiple strategies */ matchEmail(email, originalQuery, queryTerms, expandedTerms, options) { const results = []; const emailText = this.extractEmailText(email); // 1. Exact matching if (this.hasExactMatch(emailText, originalQuery)) { results.push({ email, score: 1.0, matchedFields: this.getMatchedFields(email, originalQuery), matchType: 'exact', explanation: `Exact match found for "${originalQuery}"` }); } // 2. Fuzzy matching with typo tolerance if (options.enableTypoTolerance) { const fuzzyScore = this.calculateFuzzyScore(emailText, queryTerms, options.typoTolerance); if (fuzzyScore > 0.5) { results.push({ email, score: fuzzyScore * 0.9, // Slightly lower than exact matchedFields: this.getMatchedFields(email, queryTerms.join(' ')), matchType: 'fuzzy', explanation: `Fuzzy match with ${Math.round(fuzzyScore * 100)}% similarity` }); } } // 3. Semantic matching if (options.enableSemanticMatching) { const semanticScore = this.calculateSemanticScore(emailText, queryTerms, options.semanticThreshold); if (semanticScore > options.semanticThreshold) { results.push({ email, score: semanticScore * 0.8, matchedFields: this.getMatchedFields(email, queryTerms.join(' ')), matchType: 'semantic', explanation: `Semantic match with ${Math.round(semanticScore * 100)}% relevance` }); } } // 4. Synonym matching if (options.enableSynonyms && expandedTerms.length > queryTerms.length) { const synonymScore = this.calculateSynonymScore(emailText, expandedTerms); if (synonymScore > 0.6) { results.push({ email, score: synonymScore * 0.7, matchedFields: this.getMatchedFields(email, expandedTerms.join(' ')), matchType: 'synonym', explanation: `Synonym match found` }); } } // 5. Phonetic matching if (options.enablePhoneticMatching) { const phoneticScore = this.calculatePhoneticScore(emailText, queryTerms); if (phoneticScore > 0.7) { results.push({ email, score: phoneticScore * 0.6, matchedFields: this.getMatchedFields(email, queryTerms.join(' ')), matchType: 'phonetic', explanation: `Phonetic match found` }); } } return results; } /** * Normalize query for better matching */ normalizeQuery(query) { return query .toLowerCase() .trim() .replace(/[^\w\s-]/g, ' ') .replace(/\s+/g, ' '); } /** * Extract meaningful terms from query */ extractQueryTerms(query) { const stopWords = new Set(['the', 'and', 'or', 'but', 'in', 'on', 'at', 'to', 'for', 'of', 'with', 'by', 'a', 'an', 'is', 'are', 'was', 'were', 'be', 'been', 'have', 'has', 'had', 'do', 'does', 'did', 'will', 'would', 'could', 'should', 'may', 'might', 'can', 'this', 'that', 'these', 'those']); const terms = query.split(/\s+/) .filter(term => term.length > 2 && !stopWords.has(term)); return terms; } /** * Expand query terms with synonyms */ expandQueryWithSynonyms(terms, options) { if (!options.enableSynonyms) return terms; const expanded = [...terms]; for (const term of terms) { const synonyms = EnhancedFuzzySearch.SYNONYMS[term] || []; expanded.push(...synonyms); } return expanded; } /** * Extract searchable text from email */ extractEmailText(email) { return [ email.subject, email.bodyPreview, email.from.name, email.from.address, ...email.toRecipients.map(r => `${r.name} ${r.address}`), ...email.ccRecipients.map(r => `${r.name} ${r.address}`) ].join(' ').toLowerCase(); } /** * Check for exact matches */ hasExactMatch(text, query) { return text.includes(query.toLowerCase()); } /** * Calculate fuzzy score with typo tolerance */ calculateFuzzyScore(text, queryTerms, tolerance) { let totalScore = 0; let matchedTerms = 0; for (const term of queryTerms) { const termScore = this.findBestTermMatch(text, term, tolerance); if (termScore > 0.5) { totalScore += termScore; matchedTerms++; } } return matchedTerms > 0 ? totalScore / queryTerms.length : 0; } /** * Find best match for a term in text */ findBestTermMatch(text, term, tolerance) { const words = text.split(/\s+/); let bestScore = 0; for (const word of words) { const similarity = this.calculateStringSimilarity(word, term); if (similarity > bestScore && similarity >= tolerance) { bestScore = similarity; } } return bestScore; } /** * Calculate semantic relevance score */ calculateSemanticScore(text, queryTerms, threshold) { // Simple semantic scoring based on term co-occurrence and context let score = 0; const words = text.split(/\s+/); const windowSize = 10; // Look at words within 10 positions for (const term of queryTerms) { const termPositions = this.findTermPositions(words, term); for (const pos of termPositions) { // Score based on term frequency and context score += 0.1; // Bonus for terms appearing close to each other const windowStart = Math.max(0, pos - windowSize); const windowEnd = Math.min(words.length, pos + windowSize); const windowWords = words.slice(windowStart, windowEnd); for (const otherTerm of queryTerms) { if (otherTerm !== term && windowWords.includes(otherTerm)) { score += 0.2; } } } } return Math.min(1, score); } /** * Find positions of a term in word array */ findTermPositions(words, term) { const positions = []; for (let i = 0; i < words.length; i++) { if (words[i].includes(term) || this.calculateStringSimilarity(words[i], term) > 0.8) { positions.push(i); } } return positions; } /** * Calculate synonym matching score */ calculateSynonymScore(text, expandedTerms) { let matchCount = 0; for (const term of expandedTerms) { if (text.includes(term)) { matchCount++; } } return expandedTerms.length > 0 ? matchCount / expandedTerms.length : 0; } /** * Calculate phonetic matching score */ calculatePhoneticScore(text, queryTerms) { let score = 0; const words = text.split(/\s+/); for (const term of queryTerms) { const termSoundex = this.soundex(term); for (const word of words) { const wordSoundex = this.soundex(word); if (termSoundex === wordSoundex) { score += 1; break; } } } return queryTerms.length > 0 ? score / queryTerms.length : 0; } /** * Soundex phonetic algorithm implementation */ soundex(word) { if (!word || word.length === 0) return ''; const soundexMap = { 'B': '1', 'F': '1', 'P': '1', 'V': '1', 'C': '2', 'G': '2', 'J': '2', 'K': '2', 'Q': '2', 'S': '2', 'X': '2', 'Z': '2', 'D': '3', 'T': '3', 'L': '4', 'M': '5', 'N': '5', 'R': '6' }; let soundexCode = word.charAt(0).toUpperCase(); let prevCode = ''; for (let i = 1; i < word.length && soundexCode.length < 4; i++) { const char = word.charAt(i).toUpperCase(); const code = soundexMap[char] || ''; if (code && code !== prevCode) { soundexCode += code; prevCode = code; } } return soundexCode.padEnd(4, '0'); } /** * Get fields that matched the query */ getMatchedFields(email, query) { const fields = []; const lowerQuery = query.toLowerCase(); if (email.subject.toLowerCase().includes(lowerQuery)) { fields.push('subject'); } if (email.bodyPreview.toLowerCase().includes(lowerQuery)) { fields.push('body'); } if (email.from.name.toLowerCase().includes(lowerQuery) || email.from.address.toLowerCase().includes(lowerQuery)) { fields.push('from'); } if (email.toRecipients.some(r => r.name.toLowerCase().includes(lowerQuery) || r.address.toLowerCase().includes(lowerQuery))) { fields.push('to'); } return fields; } /** * Sort and filter results */ sortAndFilterResults(results, options) { // Remove duplicates const uniqueResults = this.removeDuplicateResults(results); // Apply recency boost if enabled if (options.boostRecentEmails) { uniqueResults.forEach(result => { const daysOld = this.getDaysOld(result.email.receivedDateTime); if (daysOld < 7) { result.score *= 1.2; // 20% boost for recent emails } else if (daysOld < 30) { result.score *= 1.1; // 10% boost for emails within a month } }); } // Sort by score (descending) uniqueResults.sort((a, b) => b.score - a.score); // Apply result limit return uniqueResults.slice(0, options.maxResults); } /** * Remove duplicate results */ removeDuplicateResults(results) { const seen = new Set(); const unique = []; for (const result of results) { if (!seen.has(result.email.id)) { seen.add(result.email.id); unique.push(result); } } return unique; } /** * Get days old for an email */ getDaysOld(dateTime) { const emailDate = new Date(dateTime); const now = new Date(); const diffTime = Math.abs(now.getTime() - emailDate.getTime()); return Math.ceil(diffTime / (1000 * 60 * 60 * 24)); } /** * Calculate string similarity (Levenshtein distance) */ calculateStringSimilarity(str1, str2) { if (str1 === str2) return 1; const longer = str1.length > str2.length ? str1 : str2; const shorter = str1.length > str2.length ? str2 : str1; if (longer.length === 0) return 1; const editDistance = this.levenshteinDistance(longer, shorter); return (longer.length - editDistance) / longer.length; } /** * Calculate Levenshtein distance */ levenshteinDistance(str1, str2) { const matrix = Array(str2.length + 1).fill(null).map(() => Array(str1.length + 1).fill(null)); for (let i = 0; i <= str1.length; i++) matrix[0][i] = i; for (let j = 0; j <= str2.length; j++) matrix[j][0] = j; for (let j = 1; j <= str2.length; j++) { for (let i = 1; i <= str1.length; i++) { const indicator = str1[i - 1] === str2[j - 1] ? 0 : 1; matrix[j][i] = Math.min(matrix[j][i - 1] + 1, matrix[j - 1][i] + 1, matrix[j - 1][i - 1] + indicator); } } return matrix[str2.length][str1.length]; } /** * Apply stemming to a word */ stemWord(word) { for (const rule of EnhancedFuzzySearch.STEMMING_RULES) { if (word.endsWith(rule.suffix)) { return word.slice(0, -rule.suffix.length) + rule.replacement; } } return word; } /** * Search with natural language understanding */ async naturalLanguageSearch(query, emails, options = {}) { // Parse natural language patterns const parsedQuery = this.parseNaturalLanguageQuery(query); // Apply time-based filters if detected let filteredEmails = emails; if (parsedQuery.timeFilter) { filteredEmails = this.applyTimeFilter(emails, parsedQuery.timeFilter); } // Apply sender/recipient filters if (parsedQuery.senderFilter) { filteredEmails = filteredEmails.filter(email => email.from.name.toLowerCase().includes(parsedQuery.senderFilter) || email.from.address.toLowerCase().includes(parsedQuery.senderFilter)); } // Run enhanced search on filtered emails return await this.search(parsedQuery.cleanQuery, filteredEmails, options); } /** * Parse natural language query */ parseNaturalLanguageQuery(query) { let cleanQuery = query.toLowerCase(); let timeFilter; let senderFilter; let importanceFilter; // Time patterns const timePatterns = [ { pattern: /\b(recent|recently|latest|new)\b/g, filter: 'recent' }, { pattern: /\b(last week|past week|this week)\b/g, filter: 'last_week' }, { pattern: /\b(last month|past month|this month)\b/g, filter: 'last_month' }, { pattern: /\b(few weeks|several weeks|couple weeks)\b/g, filter: 'few_weeks' } ]; for (const { pattern, filter } of timePatterns) { if (pattern.test(cleanQuery)) { timeFilter = filter; cleanQuery = cleanQuery.replace(pattern, '').trim(); break; } } // Sender patterns const senderMatch = cleanQuery.match(/\b(from|by)\s+(\w+)/); if (senderMatch) { senderFilter = senderMatch[2]; cleanQuery = cleanQuery.replace(senderMatch[0], '').trim(); } // Importance patterns if (/\b(important|urgent|priority|critical)\b/.test(cleanQuery)) { importanceFilter = 'high'; cleanQuery = cleanQuery.replace(/\b(important|urgent|priority|critical)\b/g, '').trim(); } return { cleanQuery: cleanQuery.trim(), timeFilter: timeFilter, senderFilter, importanceFilter: importanceFilter }; } /** * Apply time-based filters */ applyTimeFilter(emails, timeFilter) { const now = new Date(); let filterDate; switch (timeFilter) { case 'recent': filterDate = new Date(now.getTime() - 7 * 24 * 60 * 60 * 1000); // 7 days break; case 'last_week': filterDate = new Date(now.getTime() - 7 * 24 * 60 * 60 * 1000); break; case 'last_month': filterDate = new Date(now.getTime() - 30 * 24 * 60 * 60 * 1000); break; case 'few_weeks': filterDate = new Date(now.getTime() - 21 * 24 * 60 * 60 * 1000); // 3 weeks break; default: return emails; } return emails.filter(email => new Date(email.receivedDateTime) >= filterDate); } } EnhancedFuzzySearch.DEFAULT_OPTIONS = { enableSemanticMatching: true, enableTypoTolerance: true, enableSynonyms: true, enableStemming: true, enablePhoneticMatching: true, typoTolerance: 0.8, semanticThreshold: 0.6, maxResults: 100, boostRecentEmails: true }; // Common synonyms for email search EnhancedFuzzySearch.SYNONYMS = { 'important': ['urgent', 'critical', 'priority', 'asap', 'high priority'], 'meeting': ['call', 'conference', 'discussion', 'session', 'appointment'], 'document': ['file', 'paper', 'report', 'attachment', 'doc'], 'invoice': ['bill', 'receipt', 'payment', 'charge', 'statement'], 'order': ['purchase', 'request', 'requisition', 'procurement'], 'contract': ['agreement', 'deal', 'arrangement', 'pact'], 'issue': ['problem', 'bug', 'error', 'trouble', 'concern'], 'update': ['news', 'information', 'status', 'progress', 'report'], 'deadline': ['due date', 'expiry', 'cutoff', 'timeline'], 'review': ['evaluation', 'assessment', 'analysis', 'feedback'], 'approval': ['authorization', 'permission', 'consent', 'clearance'], 'budget': ['cost', 'expense', 'funding', 'financial'], 'project': ['initiative', 'task', 'assignment', 'work'], 'client': ['customer', 'consumer', 'user', 'patron'], 'vendor': ['supplier', 'provider', 'contractor', 'partner'], 'government': ['federal', 'state', 'local', 'municipal', 'public', 'official'], 'tax': ['irs', 'revenue', 'taxation', 'levy', 'duty'], 'notice': ['notification', 'alert', 'warning', 'announcement'], 'response': ['reply', 'answer', 'feedback', 'acknowledgment'], 'request': ['ask', 'inquiry', 'question', 'petition'] }; // Common stemming rules EnhancedFuzzySearch.STEMMING_RULES = [ { suffix: 'ies', replacement: 'y' }, { suffix: 'ied', replacement: 'y' }, { suffix: 'ying', replacement: 'y' }, { suffix: 'ing', replacement: '' }, { suffix: 'ly', replacement: '' }, { suffix: 'ed', replacement: '' }, { suffix: 'ies', replacement: 'y' }, { suffix: 'ied', replacement: 'y' }, { suffix: 'ies', replacement: 'y' }, { suffix: 'es', replacement: '' }, { suffix: 's', replacement: '' } ];