ms365-mcp-server
Version:
Microsoft 365 MCP Server for managing Microsoft 365 email through natural language interactions with full OAuth2 authentication support
600 lines (599 loc) • 27.3 kB
JavaScript
import { logger } from './api.js';
export class LargeMailboxSearch {
constructor(ms365Operations, contextAwareSearch, intelligenceEngine, proactiveIntelligence) {
this.ms365Operations = ms365Operations;
this.contextAwareSearch = contextAwareSearch;
this.intelligenceEngine = intelligenceEngine;
this.proactiveIntelligence = proactiveIntelligence;
}
/**
* Perform scalable search for large mailboxes (20k+ emails)
*/
async search(query, options = {}) {
const startTime = Date.now();
const opts = { ...LargeMailboxSearch.DEFAULT_OPTIONS, ...options };
logger.log(`🔍 Large mailbox search initiated for: "${query}"`);
logger.log(`📊 Search options: ${JSON.stringify(opts)}`);
// Dynamic timeout based on mailbox size estimation
const estimatedSize = await this.quickMailboxSizeEstimate();
const baseTimeout = 60000; // 60 seconds base
const searchTimeout = estimatedSize > 50000 ?
180000 : // 3 minutes for very large mailboxes (50k+)
estimatedSize > 20000 ?
120000 : // 2 minutes for large mailboxes (20k+)
baseTimeout; // 1 minute for smaller mailboxes
logger.log(`⏱️ Dynamic timeout set to ${searchTimeout / 1000} seconds based on estimated mailbox size: ${estimatedSize}`);
// Progressive timeout with partial results
const searchPromise = this.performProgressiveSearch(query, opts, searchTimeout);
const timeoutPromise = new Promise((_, reject) => {
setTimeout(() => {
reject(new Error(`Large mailbox search timed out after ${searchTimeout / 1000} seconds. Try using more specific search terms, reducing the time window (timeWindowDays), or using smaller maxTotalResults.`));
}, searchTimeout);
});
return Promise.race([searchPromise, timeoutPromise]);
}
async performActualSearch(query, opts) {
const startTime = Date.now();
const result = {
query,
totalEmailsInMailbox: 0,
searchStrategy: '',
tierResults: [],
finalResults: [],
confidence: 0,
searchTime: 0,
recommendations: []
};
// Get total mailbox size estimate
result.totalEmailsInMailbox = await this.estimateMailboxSize();
if (result.totalEmailsInMailbox < 1000) {
// Small mailbox - use direct approach
return await this.performDirectSearch(query, opts, result);
}
else if (result.totalEmailsInMailbox < 10000) {
// Medium mailbox - use filtered approach
return await this.performFilteredSearch(query, opts, result);
}
else {
// Large mailbox (20k+) - use multi-tier approach
return await this.performTieredSearch(query, opts, result);
}
}
/**
* Multi-tier search strategy for 20k+ emails
*/
async performTieredSearch(query, options, result) {
result.searchStrategy = 'multi-tier-large-mailbox';
// Define search tiers in order of priority
const tiers = this.buildSearchTiers(query, options);
let allResults = [];
let totalSearched = 0;
for (const tier of tiers) {
if (allResults.length >= options.maxTotalResults) {
logger.log(`🎯 Reached target results (${options.maxTotalResults}), stopping search`);
break;
}
const tierStartTime = Date.now();
logger.log(`🔍 Executing tier: ${tier.name}`);
try {
// Use native MS365 search for initial filtering
const nativeResults = await this.ms365Operations.searchEmails({
...tier.searchCriteria,
maxResults: Math.min(options.batchSize, 500)
});
const emailsSearched = nativeResults.messages.length;
totalSearched += emailsSearched;
if (emailsSearched > 0) {
// Apply AI intelligence to filtered results
let intelligentResults = [];
if (tier.name.includes('context')) {
// Use context-aware search
const contextResults = await this.contextAwareSearch.search(query, nativeResults.messages);
intelligentResults = contextResults.emails;
}
else {
// Use fuzzy/intelligent search
const fuzzyResults = await this.intelligenceEngine.intelligentSearch(query, nativeResults.messages, { maxResults: 50 });
intelligentResults = fuzzyResults.results;
}
// Add to results (avoid duplicates)
const newResults = intelligentResults.filter(email => !allResults.some(existing => existing.id === email.id));
allResults.push(...newResults);
const tierTime = Date.now() - tierStartTime;
result.tierResults.push({
tier: tier.name,
emailsSearched,
resultsFound: newResults.length,
processingTime: tierTime
});
logger.log(`✅ Tier "${tier.name}": ${emailsSearched} searched, ${newResults.length} found (${tierTime}ms)`);
}
else {
logger.log(`⚠️ Tier "${tier.name}": No emails found matching criteria`);
}
}
catch (error) {
logger.error(`❌ Error in tier "${tier.name}":`, error);
}
}
// Limit final results and calculate confidence
result.finalResults = allResults.slice(0, options.maxTotalResults);
result.confidence = this.calculateSearchConfidence(result, totalSearched);
result.searchTime = Date.now() - Date.now();
result.recommendations = this.generateLargeMailboxRecommendations(result, options);
logger.log(`🎯 Large mailbox search completed: ${result.finalResults.length} results from ${totalSearched} emails searched`);
return result;
}
/**
* Build prioritized search tiers based on query analysis
*/
buildSearchTiers(query, options) {
const tiers = [];
const currentDate = new Date();
// Tier 1: Recent emails with query terms (highest priority)
const recentDays = Math.min(options.timeWindowDays / 4, 30); // Last 30 days max
const recentDate = new Date(currentDate.getTime() - recentDays * 24 * 60 * 60 * 1000);
tiers.push({
name: 'recent-context-search',
searchCriteria: {
query: query,
after: recentDate.toISOString().split('T')[0],
folder: 'inbox'
},
priority: 1,
estimatedResults: 100
});
// Tier 2: Subject line matches (medium-high priority)
tiers.push({
name: 'subject-fuzzy-search',
searchCriteria: {
subject: query,
maxResults: 300
},
priority: 2,
estimatedResults: 150
});
// Tier 3: Sender-based search if query contains person/email
if (this.queryContainsPerson(query)) {
const personName = this.extractPersonFromQuery(query);
tiers.push({
name: 'sender-intelligent-search',
searchCriteria: {
from: personName,
maxResults: 200
},
priority: 3,
estimatedResults: 100
});
}
// Tier 4: Extended time range for important keywords
if (this.queryContainsImportantKeywords(query)) {
const extendedDate = new Date(currentDate.getTime() - options.timeWindowDays * 24 * 60 * 60 * 1000);
tiers.push({
name: 'extended-context-search',
searchCriteria: {
query: query,
after: extendedDate.toISOString().split('T')[0],
maxResults: 400
},
priority: 4,
estimatedResults: 200
});
}
// Tier 5: Folder-specific search for document-related queries
if (this.queryContainsDocumentKeywords(query)) {
tiers.push({
name: 'attachment-search',
searchCriteria: {
query: query,
hasAttachment: true,
maxResults: 200
},
priority: 5,
estimatedResults: 80
});
}
// Sort by priority and return
return tiers.sort((a, b) => a.priority - b.priority);
}
/**
* Estimate total mailbox size
*/
async estimateMailboxSize() {
try {
// Get sample from different folders to estimate total
const folders = ['inbox', 'sent', 'archive'];
let totalEstimate = 0;
for (const folder of folders) {
try {
const sample = await this.ms365Operations.searchEmails({
query: '*',
folder: folder,
maxResults: 50
});
// Rough estimation: if we get 50, assume there are many more
if (sample.messages.length === 50) {
totalEstimate += 1000; // Conservative estimate per active folder
}
else {
totalEstimate += sample.messages.length;
}
}
catch (error) {
// Folder might not exist, continue
}
}
return Math.max(totalEstimate, 1000);
}
catch (error) {
logger.error('Error estimating mailbox size:', error);
return 5000; // Default assumption for large mailbox
}
}
/**
* Direct search for small mailboxes
*/
async performDirectSearch(query, options, result) {
result.searchStrategy = 'direct-small-mailbox';
const emails = await this.ms365Operations.searchEmails({
query: '*',
maxResults: 1000
});
const contextResults = await this.contextAwareSearch.search(query, emails.messages);
result.finalResults = contextResults.emails.slice(0, options.maxTotalResults);
result.confidence = contextResults.confidence;
result.tierResults = [{
tier: 'direct-search',
emailsSearched: emails.messages.length,
resultsFound: result.finalResults.length,
processingTime: 0
}];
return result;
}
/**
* Filtered search for medium mailboxes
*/
async performFilteredSearch(query, options, result) {
result.searchStrategy = 'filtered-medium-mailbox';
// Use native search to filter first
const filtered = await this.ms365Operations.searchEmails({
query: query,
maxResults: 500
});
const intelligentResults = await this.intelligenceEngine.intelligentSearch(query, filtered.messages, { maxResults: options.maxTotalResults });
result.finalResults = intelligentResults.results;
result.confidence = intelligentResults.insights.averageConfidence;
result.tierResults = [{
tier: 'filtered-search',
emailsSearched: filtered.messages.length,
resultsFound: result.finalResults.length,
processingTime: intelligentResults.insights.processingTime
}];
return result;
}
// Helper methods
queryContainsPerson(query) {
return /\b(from|by|sent by)\s+\w+/.test(query.toLowerCase()) ||
/\b[A-Z][a-z]+\s+[A-Z][a-z]+\b/.test(query);
}
extractPersonFromQuery(query) {
const match = query.match(/(?:from|by|sent by)\s+([a-zA-Z\s]+)/i);
return match ? match[1].trim() : '';
}
queryContainsImportantKeywords(query) {
const important = ['tax', 'legal', 'contract', 'government', 'urgent', 'important'];
return important.some(keyword => query.toLowerCase().includes(keyword));
}
queryContainsDocumentKeywords(query) {
const docKeywords = ['document', 'file', 'pdf', 'attachment', 'report'];
return docKeywords.some(keyword => query.toLowerCase().includes(keyword));
}
calculateSearchConfidence(result, totalSearched) {
const searchCoverage = Math.min(totalSearched / result.totalEmailsInMailbox, 1.0);
const resultQuality = result.finalResults.length > 0 ? 0.8 : 0.3;
return (searchCoverage * 0.4) + (resultQuality * 0.6);
}
generateLargeMailboxRecommendations(result, options) {
const recommendations = [];
if (result.finalResults.length === 0) {
recommendations.push("Try broader search terms or extend time range");
recommendations.push("Check if emails might be in Archive or Sent folders");
}
if (result.totalEmailsInMailbox > 20000) {
recommendations.push("Consider organizing emails into folders for faster searches");
recommendations.push("Use specific date ranges when possible (e.g., 'last month', '2023')");
}
const totalSearched = result.tierResults.reduce((sum, tier) => sum + tier.emailsSearched, 0);
const searchPercentage = (totalSearched / result.totalEmailsInMailbox) * 100;
if (searchPercentage < 10) {
recommendations.push(`Only searched ${searchPercentage.toFixed(1)}% of emails. Use more specific terms for better coverage.`);
}
return recommendations;
}
/**
* Quick mailbox size estimation for dynamic timeout calculation
*/
async quickMailboxSizeEstimate() {
try {
// Try to get a quick count from recent emails to estimate total size
const graphClient = await this.ms365Operations.getGraphClient();
// Sample recent emails to estimate total mailbox size
const recentSample = await graphClient
.api('/me/messages')
.select('id')
.top(100)
.get();
if (recentSample.value && recentSample.value.length > 0) {
// If we get a full page of 100, estimate larger mailbox
if (recentSample.value.length === 100) {
// Try a folder count approach for better estimation
try {
const folders = await graphClient.api('/me/mailFolders').get();
const folderCount = folders.value ? folders.value.length : 1;
// Rough estimation: assume 1000+ emails per active folder
const estimatedSize = folderCount * 1000;
logger.log(`📊 Quick mailbox size estimate: ${estimatedSize} (based on ${folderCount} folders)`);
return Math.min(estimatedSize, 100000); // Cap at 100k
}
catch {
// Fallback to conservative estimate
return 10000;
}
}
else {
// Small mailbox
return recentSample.value.length * 10; // Multiply by 10 for rough total
}
}
// Default fallback
return 5000;
}
catch (error) {
logger.error('❌ Error in quick mailbox size estimation:', error);
return 10000; // Conservative default
}
}
/**
* Progressive search with partial results and early termination
*/
async performProgressiveSearch(query, opts, totalTimeout) {
const startTime = Date.now();
const result = {
query,
totalEmailsInMailbox: 0,
searchStrategy: 'progressive-search',
tierResults: [],
finalResults: [],
confidence: 0,
searchTime: 0,
recommendations: []
};
// Quick mailbox size for strategy selection
result.totalEmailsInMailbox = await this.quickMailboxSizeEstimate();
try {
if (result.totalEmailsInMailbox < 1000) {
// Small mailbox - use direct approach
return await this.performDirectSearch(query, opts, result);
}
else if (result.totalEmailsInMailbox < 10000) {
// Medium mailbox - use filtered approach with timeout check
return await this.performFilteredSearchWithTimeout(query, opts, result, totalTimeout, startTime);
}
else {
// Large mailbox - use progressive tiered approach
return await this.performProgressiveTieredSearch(query, opts, result, totalTimeout, startTime);
}
}
catch (error) {
// If search fails, return partial results if any
if (result.finalResults.length > 0) {
result.searchTime = Date.now() - startTime;
result.confidence = 0.3; // Low confidence for partial results
result.recommendations.push(`Search interrupted but found ${result.finalResults.length} partial results. Consider using more specific search terms.`);
logger.log(`⚠️ Returning ${result.finalResults.length} partial results due to error: ${error}`);
return result;
}
else {
throw error;
}
}
}
/**
* Progressive tiered search with timeout monitoring
*/
async performProgressiveTieredSearch(query, options, result, totalTimeout, startTime) {
result.searchStrategy = 'progressive-tiered-large-mailbox';
// Build optimized search tiers for large mailboxes
const tiers = this.buildOptimizedSearchTiers(query, options);
let allResults = [];
let totalSearched = 0;
const timePerTier = Math.floor(totalTimeout / tiers.length * 0.8); // Reserve 20% for processing
for (let i = 0; i < tiers.length; i++) {
const tier = tiers[i];
const tierStartTime = Date.now();
// Check overall timeout
if (Date.now() - startTime > totalTimeout * 0.9) {
logger.log(`⏱️ Approaching timeout, stopping at tier ${i + 1}/${tiers.length}`);
break;
}
// Check if we have enough results
if (allResults.length >= options.maxTotalResults) {
logger.log(`🎯 Reached target results (${options.maxTotalResults}), stopping search`);
break;
}
logger.log(`🔍 Executing tier ${i + 1}/${tiers.length}: ${tier.name} (${timePerTier / 1000}s limit)`);
try {
// Set timeout for this tier
const tierPromise = this.executeTierSearch(tier, query, options);
const tierTimeoutPromise = new Promise((_, reject) => {
setTimeout(() => reject(new Error('Tier timeout')), timePerTier);
});
const tierResults = await Promise.race([tierPromise, tierTimeoutPromise]);
if (tierResults && tierResults.length > 0) {
// Add to results (avoid duplicates)
const newResults = tierResults.filter((email) => !allResults.some(existing => existing.id === email.id));
allResults.push(...newResults);
totalSearched += tierResults.length;
const tierTime = Date.now() - tierStartTime;
result.tierResults.push({
tier: tier.name,
emailsSearched: tierResults.length,
resultsFound: newResults.length,
processingTime: tierTime
});
logger.log(`✅ Tier "${tier.name}": ${tierResults.length} searched, ${newResults.length} new results (${tierTime}ms)`);
}
else {
logger.log(`⚠️ Tier "${tier.name}": No results found`);
}
}
catch (error) {
logger.log(`⚠️ Tier "${tier.name}" timed out or failed: ${error.message}`);
// Continue with next tier
}
}
// Finalize results
result.finalResults = allResults.slice(0, options.maxTotalResults);
result.confidence = this.calculateSearchConfidence(result, totalSearched);
result.searchTime = Date.now() - startTime;
result.recommendations = this.generateOptimizedRecommendations(result, options);
logger.log(`🎯 Progressive search completed: ${result.finalResults.length} results from ${totalSearched} emails searched in ${result.searchTime}ms`);
return result;
}
/**
* Execute individual tier search with optimizations
*/
async executeTierSearch(tier, query, options) {
// Use faster native search for initial filtering
const nativeResults = await this.ms365Operations.searchEmails({
...tier.searchCriteria,
maxResults: Math.min(options.batchSize, 300) // Reduced batch size for faster execution
});
const emailsFound = nativeResults.messages || [];
if (emailsFound.length === 0) {
return [];
}
// Apply lighter AI processing for speed
if (tier.name.includes('recent') || tier.name.includes('urgent')) {
// Skip heavy AI processing for time-sensitive tiers
return emailsFound.slice(0, 30); // Return top 30 for speed
}
else {
// Use intelligent search for better relevance
const fuzzyResults = await this.intelligenceEngine.intelligentSearch(query, emailsFound, { maxResults: 25 } // Reduced for speed
);
return fuzzyResults.results || [];
}
}
/**
* Filtered search with timeout monitoring for medium mailboxes
*/
async performFilteredSearchWithTimeout(query, options, result, totalTimeout, startTime) {
result.searchStrategy = 'filtered-search-with-timeout';
// Use existing filtered search but with timeout monitoring
try {
const filteredResult = await this.performFilteredSearch(query, options, result);
return filteredResult;
}
catch (error) {
// If timeout, return partial results if available
if (result.finalResults.length > 0) {
result.searchTime = Date.now() - startTime;
result.confidence = 0.5;
result.recommendations.push('Search timed out but returned partial results. Try using more specific search terms.');
return result;
}
throw error;
}
}
/**
* Build optimized search tiers for faster large mailbox searches
*/
buildOptimizedSearchTiers(query, options) {
const tiers = [];
const currentDate = new Date();
// Tier 1: Very recent emails (last 7 days) - highest priority, fastest
const veryRecentDate = new Date(currentDate.getTime() - 7 * 24 * 60 * 60 * 1000);
tiers.push({
name: 'very-recent-urgent-search',
searchCriteria: {
query: query,
after: veryRecentDate.toISOString().split('T')[0],
folder: 'inbox'
},
priority: 1,
estimatedResults: 50
});
// Tier 2: Recent emails (last 30 days) with subject focus
const recentDate = new Date(currentDate.getTime() - 30 * 24 * 60 * 60 * 1000);
tiers.push({
name: 'recent-subject-search',
searchCriteria: {
subject: query,
after: recentDate.toISOString().split('T')[0],
maxResults: 200
},
priority: 2,
estimatedResults: 100
});
// Tier 3: Sender-based search if query contains person/email (faster than content search)
if (this.queryContainsPerson(query)) {
const personName = this.extractPersonFromQuery(query);
tiers.push({
name: 'sender-optimized-search',
searchCriteria: {
from: personName,
after: recentDate.toISOString().split('T')[0],
maxResults: 150
},
priority: 3,
estimatedResults: 75
});
}
// Tier 4: Extended time range but limited scope (only if still needed)
const extendedDate = new Date(currentDate.getTime() - Math.min(options.timeWindowDays, 90) * 24 * 60 * 60 * 1000);
tiers.push({
name: 'extended-limited-search',
searchCriteria: {
query: query,
after: extendedDate.toISOString().split('T')[0],
maxResults: 250
},
priority: 4,
estimatedResults: 125
});
return tiers.slice(0, 4); // Limit to 4 tiers for speed
}
/**
* Generate optimized recommendations for performance
*/
generateOptimizedRecommendations(result, options) {
const recommendations = [];
if (result.finalResults.length === 0) {
recommendations.push('No results found. Try using broader search terms or increasing timeWindowDays.');
recommendations.push('Consider searching for sender names, subject keywords, or specific date ranges.');
}
else if (result.finalResults.length < 10) {
recommendations.push('Few results found. Consider expanding search terms or increasing timeWindowDays.');
}
if (result.searchTime > 30000) { // > 30 seconds
recommendations.push('Search took longer than expected. Try using more specific terms, reduce timeWindowDays, or lower maxTotalResults.');
}
if (result.confidence < 0.5) {
recommendations.push('Low confidence results. Use more specific search terms for better accuracy.');
}
if (result.totalEmailsInMailbox > 50000) {
recommendations.push('Very large mailbox detected. For best performance, use specific date ranges (timeWindowDays: 30) and targeted search terms.');
}
return recommendations;
}
}
LargeMailboxSearch.DEFAULT_OPTIONS = {
maxTotalResults: 50,
enableTieredSearch: true,
enableCaching: false,
prioritizeRecent: true,
timeWindowDays: 90,
batchSize: 150,
maxBatches: 8
};