UNPKG

ms365-mcp-server

Version:

Microsoft 365 MCP Server for managing Microsoft 365 email through natural language interactions with full OAuth2 authentication support

600 lines (599 loc) 27.3 kB
import { logger } from './api.js'; export class LargeMailboxSearch { constructor(ms365Operations, contextAwareSearch, intelligenceEngine, proactiveIntelligence) { this.ms365Operations = ms365Operations; this.contextAwareSearch = contextAwareSearch; this.intelligenceEngine = intelligenceEngine; this.proactiveIntelligence = proactiveIntelligence; } /** * Perform scalable search for large mailboxes (20k+ emails) */ async search(query, options = {}) { const startTime = Date.now(); const opts = { ...LargeMailboxSearch.DEFAULT_OPTIONS, ...options }; logger.log(`🔍 Large mailbox search initiated for: "${query}"`); logger.log(`📊 Search options: ${JSON.stringify(opts)}`); // Dynamic timeout based on mailbox size estimation const estimatedSize = await this.quickMailboxSizeEstimate(); const baseTimeout = 60000; // 60 seconds base const searchTimeout = estimatedSize > 50000 ? 180000 : // 3 minutes for very large mailboxes (50k+) estimatedSize > 20000 ? 120000 : // 2 minutes for large mailboxes (20k+) baseTimeout; // 1 minute for smaller mailboxes logger.log(`⏱️ Dynamic timeout set to ${searchTimeout / 1000} seconds based on estimated mailbox size: ${estimatedSize}`); // Progressive timeout with partial results const searchPromise = this.performProgressiveSearch(query, opts, searchTimeout); const timeoutPromise = new Promise((_, reject) => { setTimeout(() => { reject(new Error(`Large mailbox search timed out after ${searchTimeout / 1000} seconds. Try using more specific search terms, reducing the time window (timeWindowDays), or using smaller maxTotalResults.`)); }, searchTimeout); }); return Promise.race([searchPromise, timeoutPromise]); } async performActualSearch(query, opts) { const startTime = Date.now(); const result = { query, totalEmailsInMailbox: 0, searchStrategy: '', tierResults: [], finalResults: [], confidence: 0, searchTime: 0, recommendations: [] }; // Get total mailbox size estimate result.totalEmailsInMailbox = await this.estimateMailboxSize(); if (result.totalEmailsInMailbox < 1000) { // Small mailbox - use direct approach return await this.performDirectSearch(query, opts, result); } else if (result.totalEmailsInMailbox < 10000) { // Medium mailbox - use filtered approach return await this.performFilteredSearch(query, opts, result); } else { // Large mailbox (20k+) - use multi-tier approach return await this.performTieredSearch(query, opts, result); } } /** * Multi-tier search strategy for 20k+ emails */ async performTieredSearch(query, options, result) { result.searchStrategy = 'multi-tier-large-mailbox'; // Define search tiers in order of priority const tiers = this.buildSearchTiers(query, options); let allResults = []; let totalSearched = 0; for (const tier of tiers) { if (allResults.length >= options.maxTotalResults) { logger.log(`🎯 Reached target results (${options.maxTotalResults}), stopping search`); break; } const tierStartTime = Date.now(); logger.log(`🔍 Executing tier: ${tier.name}`); try { // Use native MS365 search for initial filtering const nativeResults = await this.ms365Operations.searchEmails({ ...tier.searchCriteria, maxResults: Math.min(options.batchSize, 500) }); const emailsSearched = nativeResults.messages.length; totalSearched += emailsSearched; if (emailsSearched > 0) { // Apply AI intelligence to filtered results let intelligentResults = []; if (tier.name.includes('context')) { // Use context-aware search const contextResults = await this.contextAwareSearch.search(query, nativeResults.messages); intelligentResults = contextResults.emails; } else { // Use fuzzy/intelligent search const fuzzyResults = await this.intelligenceEngine.intelligentSearch(query, nativeResults.messages, { maxResults: 50 }); intelligentResults = fuzzyResults.results; } // Add to results (avoid duplicates) const newResults = intelligentResults.filter(email => !allResults.some(existing => existing.id === email.id)); allResults.push(...newResults); const tierTime = Date.now() - tierStartTime; result.tierResults.push({ tier: tier.name, emailsSearched, resultsFound: newResults.length, processingTime: tierTime }); logger.log(`✅ Tier "${tier.name}": ${emailsSearched} searched, ${newResults.length} found (${tierTime}ms)`); } else { logger.log(`⚠️ Tier "${tier.name}": No emails found matching criteria`); } } catch (error) { logger.error(`❌ Error in tier "${tier.name}":`, error); } } // Limit final results and calculate confidence result.finalResults = allResults.slice(0, options.maxTotalResults); result.confidence = this.calculateSearchConfidence(result, totalSearched); result.searchTime = Date.now() - Date.now(); result.recommendations = this.generateLargeMailboxRecommendations(result, options); logger.log(`🎯 Large mailbox search completed: ${result.finalResults.length} results from ${totalSearched} emails searched`); return result; } /** * Build prioritized search tiers based on query analysis */ buildSearchTiers(query, options) { const tiers = []; const currentDate = new Date(); // Tier 1: Recent emails with query terms (highest priority) const recentDays = Math.min(options.timeWindowDays / 4, 30); // Last 30 days max const recentDate = new Date(currentDate.getTime() - recentDays * 24 * 60 * 60 * 1000); tiers.push({ name: 'recent-context-search', searchCriteria: { query: query, after: recentDate.toISOString().split('T')[0], folder: 'inbox' }, priority: 1, estimatedResults: 100 }); // Tier 2: Subject line matches (medium-high priority) tiers.push({ name: 'subject-fuzzy-search', searchCriteria: { subject: query, maxResults: 300 }, priority: 2, estimatedResults: 150 }); // Tier 3: Sender-based search if query contains person/email if (this.queryContainsPerson(query)) { const personName = this.extractPersonFromQuery(query); tiers.push({ name: 'sender-intelligent-search', searchCriteria: { from: personName, maxResults: 200 }, priority: 3, estimatedResults: 100 }); } // Tier 4: Extended time range for important keywords if (this.queryContainsImportantKeywords(query)) { const extendedDate = new Date(currentDate.getTime() - options.timeWindowDays * 24 * 60 * 60 * 1000); tiers.push({ name: 'extended-context-search', searchCriteria: { query: query, after: extendedDate.toISOString().split('T')[0], maxResults: 400 }, priority: 4, estimatedResults: 200 }); } // Tier 5: Folder-specific search for document-related queries if (this.queryContainsDocumentKeywords(query)) { tiers.push({ name: 'attachment-search', searchCriteria: { query: query, hasAttachment: true, maxResults: 200 }, priority: 5, estimatedResults: 80 }); } // Sort by priority and return return tiers.sort((a, b) => a.priority - b.priority); } /** * Estimate total mailbox size */ async estimateMailboxSize() { try { // Get sample from different folders to estimate total const folders = ['inbox', 'sent', 'archive']; let totalEstimate = 0; for (const folder of folders) { try { const sample = await this.ms365Operations.searchEmails({ query: '*', folder: folder, maxResults: 50 }); // Rough estimation: if we get 50, assume there are many more if (sample.messages.length === 50) { totalEstimate += 1000; // Conservative estimate per active folder } else { totalEstimate += sample.messages.length; } } catch (error) { // Folder might not exist, continue } } return Math.max(totalEstimate, 1000); } catch (error) { logger.error('Error estimating mailbox size:', error); return 5000; // Default assumption for large mailbox } } /** * Direct search for small mailboxes */ async performDirectSearch(query, options, result) { result.searchStrategy = 'direct-small-mailbox'; const emails = await this.ms365Operations.searchEmails({ query: '*', maxResults: 1000 }); const contextResults = await this.contextAwareSearch.search(query, emails.messages); result.finalResults = contextResults.emails.slice(0, options.maxTotalResults); result.confidence = contextResults.confidence; result.tierResults = [{ tier: 'direct-search', emailsSearched: emails.messages.length, resultsFound: result.finalResults.length, processingTime: 0 }]; return result; } /** * Filtered search for medium mailboxes */ async performFilteredSearch(query, options, result) { result.searchStrategy = 'filtered-medium-mailbox'; // Use native search to filter first const filtered = await this.ms365Operations.searchEmails({ query: query, maxResults: 500 }); const intelligentResults = await this.intelligenceEngine.intelligentSearch(query, filtered.messages, { maxResults: options.maxTotalResults }); result.finalResults = intelligentResults.results; result.confidence = intelligentResults.insights.averageConfidence; result.tierResults = [{ tier: 'filtered-search', emailsSearched: filtered.messages.length, resultsFound: result.finalResults.length, processingTime: intelligentResults.insights.processingTime }]; return result; } // Helper methods queryContainsPerson(query) { return /\b(from|by|sent by)\s+\w+/.test(query.toLowerCase()) || /\b[A-Z][a-z]+\s+[A-Z][a-z]+\b/.test(query); } extractPersonFromQuery(query) { const match = query.match(/(?:from|by|sent by)\s+([a-zA-Z\s]+)/i); return match ? match[1].trim() : ''; } queryContainsImportantKeywords(query) { const important = ['tax', 'legal', 'contract', 'government', 'urgent', 'important']; return important.some(keyword => query.toLowerCase().includes(keyword)); } queryContainsDocumentKeywords(query) { const docKeywords = ['document', 'file', 'pdf', 'attachment', 'report']; return docKeywords.some(keyword => query.toLowerCase().includes(keyword)); } calculateSearchConfidence(result, totalSearched) { const searchCoverage = Math.min(totalSearched / result.totalEmailsInMailbox, 1.0); const resultQuality = result.finalResults.length > 0 ? 0.8 : 0.3; return (searchCoverage * 0.4) + (resultQuality * 0.6); } generateLargeMailboxRecommendations(result, options) { const recommendations = []; if (result.finalResults.length === 0) { recommendations.push("Try broader search terms or extend time range"); recommendations.push("Check if emails might be in Archive or Sent folders"); } if (result.totalEmailsInMailbox > 20000) { recommendations.push("Consider organizing emails into folders for faster searches"); recommendations.push("Use specific date ranges when possible (e.g., 'last month', '2023')"); } const totalSearched = result.tierResults.reduce((sum, tier) => sum + tier.emailsSearched, 0); const searchPercentage = (totalSearched / result.totalEmailsInMailbox) * 100; if (searchPercentage < 10) { recommendations.push(`Only searched ${searchPercentage.toFixed(1)}% of emails. Use more specific terms for better coverage.`); } return recommendations; } /** * Quick mailbox size estimation for dynamic timeout calculation */ async quickMailboxSizeEstimate() { try { // Try to get a quick count from recent emails to estimate total size const graphClient = await this.ms365Operations.getGraphClient(); // Sample recent emails to estimate total mailbox size const recentSample = await graphClient .api('/me/messages') .select('id') .top(100) .get(); if (recentSample.value && recentSample.value.length > 0) { // If we get a full page of 100, estimate larger mailbox if (recentSample.value.length === 100) { // Try a folder count approach for better estimation try { const folders = await graphClient.api('/me/mailFolders').get(); const folderCount = folders.value ? folders.value.length : 1; // Rough estimation: assume 1000+ emails per active folder const estimatedSize = folderCount * 1000; logger.log(`📊 Quick mailbox size estimate: ${estimatedSize} (based on ${folderCount} folders)`); return Math.min(estimatedSize, 100000); // Cap at 100k } catch { // Fallback to conservative estimate return 10000; } } else { // Small mailbox return recentSample.value.length * 10; // Multiply by 10 for rough total } } // Default fallback return 5000; } catch (error) { logger.error('❌ Error in quick mailbox size estimation:', error); return 10000; // Conservative default } } /** * Progressive search with partial results and early termination */ async performProgressiveSearch(query, opts, totalTimeout) { const startTime = Date.now(); const result = { query, totalEmailsInMailbox: 0, searchStrategy: 'progressive-search', tierResults: [], finalResults: [], confidence: 0, searchTime: 0, recommendations: [] }; // Quick mailbox size for strategy selection result.totalEmailsInMailbox = await this.quickMailboxSizeEstimate(); try { if (result.totalEmailsInMailbox < 1000) { // Small mailbox - use direct approach return await this.performDirectSearch(query, opts, result); } else if (result.totalEmailsInMailbox < 10000) { // Medium mailbox - use filtered approach with timeout check return await this.performFilteredSearchWithTimeout(query, opts, result, totalTimeout, startTime); } else { // Large mailbox - use progressive tiered approach return await this.performProgressiveTieredSearch(query, opts, result, totalTimeout, startTime); } } catch (error) { // If search fails, return partial results if any if (result.finalResults.length > 0) { result.searchTime = Date.now() - startTime; result.confidence = 0.3; // Low confidence for partial results result.recommendations.push(`Search interrupted but found ${result.finalResults.length} partial results. Consider using more specific search terms.`); logger.log(`⚠️ Returning ${result.finalResults.length} partial results due to error: ${error}`); return result; } else { throw error; } } } /** * Progressive tiered search with timeout monitoring */ async performProgressiveTieredSearch(query, options, result, totalTimeout, startTime) { result.searchStrategy = 'progressive-tiered-large-mailbox'; // Build optimized search tiers for large mailboxes const tiers = this.buildOptimizedSearchTiers(query, options); let allResults = []; let totalSearched = 0; const timePerTier = Math.floor(totalTimeout / tiers.length * 0.8); // Reserve 20% for processing for (let i = 0; i < tiers.length; i++) { const tier = tiers[i]; const tierStartTime = Date.now(); // Check overall timeout if (Date.now() - startTime > totalTimeout * 0.9) { logger.log(`⏱️ Approaching timeout, stopping at tier ${i + 1}/${tiers.length}`); break; } // Check if we have enough results if (allResults.length >= options.maxTotalResults) { logger.log(`🎯 Reached target results (${options.maxTotalResults}), stopping search`); break; } logger.log(`🔍 Executing tier ${i + 1}/${tiers.length}: ${tier.name} (${timePerTier / 1000}s limit)`); try { // Set timeout for this tier const tierPromise = this.executeTierSearch(tier, query, options); const tierTimeoutPromise = new Promise((_, reject) => { setTimeout(() => reject(new Error('Tier timeout')), timePerTier); }); const tierResults = await Promise.race([tierPromise, tierTimeoutPromise]); if (tierResults && tierResults.length > 0) { // Add to results (avoid duplicates) const newResults = tierResults.filter((email) => !allResults.some(existing => existing.id === email.id)); allResults.push(...newResults); totalSearched += tierResults.length; const tierTime = Date.now() - tierStartTime; result.tierResults.push({ tier: tier.name, emailsSearched: tierResults.length, resultsFound: newResults.length, processingTime: tierTime }); logger.log(`✅ Tier "${tier.name}": ${tierResults.length} searched, ${newResults.length} new results (${tierTime}ms)`); } else { logger.log(`⚠️ Tier "${tier.name}": No results found`); } } catch (error) { logger.log(`⚠️ Tier "${tier.name}" timed out or failed: ${error.message}`); // Continue with next tier } } // Finalize results result.finalResults = allResults.slice(0, options.maxTotalResults); result.confidence = this.calculateSearchConfidence(result, totalSearched); result.searchTime = Date.now() - startTime; result.recommendations = this.generateOptimizedRecommendations(result, options); logger.log(`🎯 Progressive search completed: ${result.finalResults.length} results from ${totalSearched} emails searched in ${result.searchTime}ms`); return result; } /** * Execute individual tier search with optimizations */ async executeTierSearch(tier, query, options) { // Use faster native search for initial filtering const nativeResults = await this.ms365Operations.searchEmails({ ...tier.searchCriteria, maxResults: Math.min(options.batchSize, 300) // Reduced batch size for faster execution }); const emailsFound = nativeResults.messages || []; if (emailsFound.length === 0) { return []; } // Apply lighter AI processing for speed if (tier.name.includes('recent') || tier.name.includes('urgent')) { // Skip heavy AI processing for time-sensitive tiers return emailsFound.slice(0, 30); // Return top 30 for speed } else { // Use intelligent search for better relevance const fuzzyResults = await this.intelligenceEngine.intelligentSearch(query, emailsFound, { maxResults: 25 } // Reduced for speed ); return fuzzyResults.results || []; } } /** * Filtered search with timeout monitoring for medium mailboxes */ async performFilteredSearchWithTimeout(query, options, result, totalTimeout, startTime) { result.searchStrategy = 'filtered-search-with-timeout'; // Use existing filtered search but with timeout monitoring try { const filteredResult = await this.performFilteredSearch(query, options, result); return filteredResult; } catch (error) { // If timeout, return partial results if available if (result.finalResults.length > 0) { result.searchTime = Date.now() - startTime; result.confidence = 0.5; result.recommendations.push('Search timed out but returned partial results. Try using more specific search terms.'); return result; } throw error; } } /** * Build optimized search tiers for faster large mailbox searches */ buildOptimizedSearchTiers(query, options) { const tiers = []; const currentDate = new Date(); // Tier 1: Very recent emails (last 7 days) - highest priority, fastest const veryRecentDate = new Date(currentDate.getTime() - 7 * 24 * 60 * 60 * 1000); tiers.push({ name: 'very-recent-urgent-search', searchCriteria: { query: query, after: veryRecentDate.toISOString().split('T')[0], folder: 'inbox' }, priority: 1, estimatedResults: 50 }); // Tier 2: Recent emails (last 30 days) with subject focus const recentDate = new Date(currentDate.getTime() - 30 * 24 * 60 * 60 * 1000); tiers.push({ name: 'recent-subject-search', searchCriteria: { subject: query, after: recentDate.toISOString().split('T')[0], maxResults: 200 }, priority: 2, estimatedResults: 100 }); // Tier 3: Sender-based search if query contains person/email (faster than content search) if (this.queryContainsPerson(query)) { const personName = this.extractPersonFromQuery(query); tiers.push({ name: 'sender-optimized-search', searchCriteria: { from: personName, after: recentDate.toISOString().split('T')[0], maxResults: 150 }, priority: 3, estimatedResults: 75 }); } // Tier 4: Extended time range but limited scope (only if still needed) const extendedDate = new Date(currentDate.getTime() - Math.min(options.timeWindowDays, 90) * 24 * 60 * 60 * 1000); tiers.push({ name: 'extended-limited-search', searchCriteria: { query: query, after: extendedDate.toISOString().split('T')[0], maxResults: 250 }, priority: 4, estimatedResults: 125 }); return tiers.slice(0, 4); // Limit to 4 tiers for speed } /** * Generate optimized recommendations for performance */ generateOptimizedRecommendations(result, options) { const recommendations = []; if (result.finalResults.length === 0) { recommendations.push('No results found. Try using broader search terms or increasing timeWindowDays.'); recommendations.push('Consider searching for sender names, subject keywords, or specific date ranges.'); } else if (result.finalResults.length < 10) { recommendations.push('Few results found. Consider expanding search terms or increasing timeWindowDays.'); } if (result.searchTime > 30000) { // > 30 seconds recommendations.push('Search took longer than expected. Try using more specific terms, reduce timeWindowDays, or lower maxTotalResults.'); } if (result.confidence < 0.5) { recommendations.push('Low confidence results. Use more specific search terms for better accuracy.'); } if (result.totalEmailsInMailbox > 50000) { recommendations.push('Very large mailbox detected. For best performance, use specific date ranges (timeWindowDays: 30) and targeted search terms.'); } return recommendations; } } LargeMailboxSearch.DEFAULT_OPTIONS = { maxTotalResults: 50, enableTieredSearch: true, enableCaching: false, prioritizeRecent: true, timeWindowDays: 90, batchSize: 150, maxBatches: 8 };