UNPKG

@snehal96/unimail

Version:

Unified email fetching & document extraction layer for modern web apps

549 lines (548 loc) 24.6 kB
import { Client } from '@microsoft/microsoft-graph-client'; import { ConfidentialClientApplication } from '@azure/msal-node'; import { EmailParserService } from '../services/EmailParserService.js'; import { OAuthService } from '../auth/OAuthService.js'; import { OutlookOAuthProvider } from '../auth/providers/OutlookOAuthProvider.js'; import { EmailStreamService } from '../services/EmailStreamService.js'; export class OutlookAdapter { constructor() { this.initialized = false; this.emailParserService = new EmailParserService(); } /** * Initialize the Outlook adapter with credentials. * This method supports both traditional refresh token authentication * and the new OAuth flow using an auth code. */ async initialize(credentials) { this.credentials_ = credentials; if (credentials.accessToken) { // If access token is provided, use it directly this.accessToken_ = credentials.accessToken; } else { // Create MSAL app this.msalApp_ = new ConfidentialClientApplication({ auth: { clientId: this.credentials_.clientId, clientSecret: this.credentials_.clientSecret, // Use tenant ID if provided, otherwise use common endpoint authority: `https://login.microsoftonline.com/${this.credentials_.tenantId || 'common'}` } }); // Handle OAuth flow if auth code is provided instead of refresh token if (!this.credentials_.refreshToken && this.credentials_.authCode) { if (!this.credentials_.redirectUri) { throw new Error('redirectUri is required when using authCode for authentication'); } try { // Exchange the auth code for tokens const tokenRequest = { code: this.credentials_.authCode, scopes: ['Mail.Read', 'offline_access'], redirectUri: this.credentials_.redirectUri, }; const response = await this.msalApp_.acquireTokenByCode(tokenRequest); // Save the refresh token // MSAL doesn't directly expose refreshToken in its types but it may be in the response if (response.refreshToken) { this.credentials_.refreshToken = response.refreshToken; } else { throw new Error('No refresh token received. Make sure you are requesting offline access.'); } // Save the access token for immediate use this.accessToken_ = response.accessToken; } catch (error) { throw new Error(`Failed to exchange auth code for tokens: ${error.message}`); } } else if (!this.credentials_.refreshToken) { throw new Error('Either refreshToken or authCode must be provided in the credentials'); } } // Initialize the graph client await this.authenticate(); this.initialized = true; } /** * Start the OAuth flow to get authorization from the user * @returns The authorization URL that the user should visit */ static async startOAuthFlow(clientId, clientSecret, redirectUri, tenantId, port = 3000, callbackPath = '/oauth/oauth2callback') { const oauthService = new OAuthService(new OutlookOAuthProvider()); const authUrl = await oauthService.startOAuthFlow({ clientId, clientSecret, redirectUri, scopes: ['Mail.Read', 'offline_access', 'openid', 'profile', 'User.Read'], prompt: 'consent' }, undefined, // No user ID needed for this flow callbackPath, port); return authUrl; } /** * Handle the OAuth callback manually (for server-side applications) * @returns TokenData containing access and refresh tokens */ static async handleOAuthCallback(code, clientId, clientSecret, redirectUri, tenantId) { const oauthService = new OAuthService(new OutlookOAuthProvider()); const options = { clientId, clientSecret, redirectUri, scopes: ['Mail.Read', 'offline_access', 'openid', 'profile', 'User.Read'], tenantId }; const tokenData = await oauthService.handleCallback(code, options); return { accessToken: tokenData.accessToken, refreshToken: tokenData.refreshToken }; } ensureInitialized() { if (!this.initialized || !this.graphClient_ || !this.credentials_) { throw new Error('OutlookAdapter not initialized. Call initialize(credentials) first.'); } } async authenticate() { if (!this.credentials_ && !this.msalApp_) { throw new Error('OutlookAdapter credentials not set. Call initialize(credentials) first.'); } try { // If we already have a valid access token from the auth code flow, use it if (this.accessToken_) { // Create the graph client with the existing token this.graphClient_ = Client.init({ authProvider: (done) => { done(null, this.accessToken_); }, }); return; } // Otherwise, use refresh token to get a new access token if (!this.credentials_?.refreshToken) { throw new Error('No refresh token available for authentication'); } const tokenRequest = { refreshToken: this.credentials_?.refreshToken, scopes: ['https://graph.microsoft.com/Mail.Read', 'offline_access'], }; const response = await this.msalApp_?.acquireTokenByRefreshToken(tokenRequest); if (!response || !response.accessToken) { throw new Error('Failed to acquire access token'); } // Save the access token this.accessToken_ = response.accessToken; // Create the graph client with the new token this.graphClient_ = Client.init({ authProvider: (done) => { done(null, this.accessToken_); }, }); } catch (error) { console.error('Outlook authentication error:', error); // Provide more specific error messages if (error?.message?.includes('interaction_required')) { throw new Error('Outlook authentication failed: Interactive login is required. The refresh token may be expired.'); } throw new Error(`Outlook authentication failed: ${error.message}`); } } async fetchEmails(options) { this.ensureInitialized(); await this.authenticate(); // Ensure token is fresh const { limit = 10, query, since, before, unreadOnly, includeBody = true, includeAttachments = true, format, pageToken, pageSize, getAllPages = false } = options; // Determine the best format strategy based on options let fetchStrategy = 'full'; // If format isn't explicitly specified, infer it based on what the user needs if (!format) { if (!includeBody && !includeAttachments) { fetchStrategy = 'metadata'; // Just need headers } else if (!includeBody || !includeAttachments) { fetchStrategy = 'minimal'; // Need some content but not everything } else { fetchStrategy = 'full'; // Need everything } } else { // Map Gmail-style format options to Outlook strategies switch (format) { case 'metadata': fetchStrategy = 'metadata'; break; case 'full': fetchStrategy = 'minimal'; break; case 'raw': default: fetchStrategy = 'full'; break; } } // Build Outlook-specific filter let filter = ''; if (since) { const sinceDate = typeof since === 'string' ? new Date(since) : since; filter += filter ? ' and ' : ''; filter += `receivedDateTime ge ${sinceDate.toISOString()}`; } if (before) { const beforeDate = typeof before === 'string' ? new Date(before) : before; filter += filter ? ' and ' : ''; filter += `receivedDateTime le ${beforeDate.toISOString()}`; } if (unreadOnly) { filter += filter ? ' and ' : ''; filter += 'isRead eq false'; } // Search term (query) is handled differently in Outlook than filter const searchTerm = query || undefined; try { if (getAllPages) { // Show deprecation warning to match Gmail behavior console.warn('Warning: getAllPages option is deprecated and may cause memory issues with large datasets. Consider using streamEmails() instead.'); return await this.fetchAllEmailPages(filter, searchTerm, limit, fetchStrategy, includeBody, includeAttachments); } else { return await this.fetchEmailPage(filter, searchTerm, pageSize || limit, fetchStrategy, includeBody, includeAttachments, pageToken); } } catch (error) { console.error('Error fetching Outlook emails:', error); // Check for specific Microsoft Graph API errors if (error.code === '401' || error.status === 401) { throw new Error(`Outlook authentication error (401). Check your refresh token and API permissions. Original: ${error.message}`); } if (error.code === 'InvalidAuthenticationToken' || error.message?.includes('InvalidAuthenticationToken')) { throw new Error(`Outlook authentication token is invalid or expired. Please re-authenticate. Original: ${error.message}`); } if (error.code === 'Forbidden' || error.status === 403) { throw new Error(`Outlook API access forbidden. Check your application permissions for Mail.Read. Original: ${error.message}`); } throw new Error(`Failed to fetch Outlook emails: ${error.message}`); } } /** * Fetches a single page of emails with enhanced format support */ async fetchEmailPage(filter, searchTerm, maxResults, fetchStrategy, includeBody, includeAttachments, skipToken) { // Build the initial request let messagesRequest = this.graphClient_.api('/me/messages') .top(maxResults); // Add filter if specified if (filter) { messagesRequest = messagesRequest.filter(filter); } // Add search capability if a query was provided if (searchTerm) { messagesRequest = messagesRequest.search(searchTerm); } // Add skip token for pagination if provided if (skipToken) { messagesRequest = messagesRequest.skipToken(skipToken); } // Select fields based on fetch strategy and requirements let select = this.buildSelectFields(fetchStrategy, includeBody, includeAttachments); messagesRequest = messagesRequest.select(select.join(',')); // Execute the request const response = await messagesRequest.get(); // Check if we have messages if (!response.value || response.value.length === 0) { return { emails: [], nextPageToken: response['@odata.nextLink'] ? this.extractSkipTokenFromNextLink(response['@odata.nextLink']) : undefined, totalCount: undefined // Outlook API doesn't provide a count }; } // Process each message const normalizedEmails = []; for (const message of response.value) { let normalized = this.mapOutlookMessageToNormalized(message); // Apply fetch strategy modifications if (fetchStrategy === 'metadata' && !includeBody) { // Remove body content for metadata-only requests normalized.bodyText = undefined; normalized.bodyHtml = undefined; } // Fetch attachments if message has any and we are requested to include them if (includeAttachments && message.hasAttachments && fetchStrategy !== 'metadata') { const attachments = await this.fetchAttachments(message.id); normalized.attachments = attachments; } else if (!includeAttachments || fetchStrategy === 'metadata') { // Clear attachments but keep count if we have it normalized.attachments = []; } normalizedEmails.push(normalized); } return { emails: normalizedEmails, nextPageToken: response['@odata.nextLink'] ? this.extractSkipTokenFromNextLink(response['@odata.nextLink']) : undefined, totalCount: undefined // Microsoft Graph API doesn't provide total count }; } /** * Fetches all pages of emails up to the specified limit with enhanced format support */ async fetchAllEmailPages(filter, searchTerm, limit, fetchStrategy, includeBody, includeAttachments, requestPageSize) { const allEmails = []; let nextPageToken; // Use a reasonable page size (50 is optimal for Outlook API) const pageSize = requestPageSize || Math.min(limit, 50); do { const response = await this.fetchEmailPage(filter, searchTerm, pageSize, fetchStrategy, includeBody, includeAttachments, nextPageToken); allEmails.push(...response.emails); nextPageToken = response.nextPageToken; // Stop if we've reached the limit or there are no more pages if (!nextPageToken || allEmails.length >= limit) { break; } } while (true); // Enforce the limit (in case we fetched more than needed) const limitedEmails = allEmails.slice(0, limit); return { emails: limitedEmails, // Don't return nextPageToken if we've fetched all pages or reached the limit nextPageToken: allEmails.length >= limit ? nextPageToken : undefined, totalCount: undefined // Outlook API doesn't provide a count }; } /** * Helper method to build field selection based on fetch strategy */ buildSelectFields(fetchStrategy, includeBody, includeAttachments) { // Base fields always needed let select = ['id', 'conversationId', 'subject', 'from', 'toRecipients', 'ccRecipients', 'bccRecipients', 'receivedDateTime', 'internetMessageId', 'importance', 'categories']; // Add attachment info if needed if (includeAttachments && fetchStrategy !== 'metadata') { select.push('hasAttachments'); } // Add body fields based on strategy and requirements switch (fetchStrategy) { case 'full': if (includeBody) { select.push('body'); } else { select.push('bodyPreview'); } break; case 'minimal': if (includeBody) { select.push('body'); } else { select.push('bodyPreview'); } break; case 'metadata': // Only include preview for metadata-only requests select.push('bodyPreview'); break; } return select; } /** * Helper method to determine fetch strategy from options (similar to Gmail's format detection) */ determineOutlookFetchStrategy(options) { if (options.format) { switch (options.format) { case 'metadata': return 'metadata'; case 'full': return 'minimal'; case 'raw': default: return 'full'; } } const includeBody = options.includeBody !== false; const includeAttachments = options.includeAttachments !== false; if (!includeBody && !includeAttachments) { return 'metadata'; } else if (!includeBody || !includeAttachments) { return 'minimal'; } else { return 'full'; } } /** * Fetches attachments for a message */ async fetchAttachments(messageId) { try { const attachmentsResponse = await this.graphClient_.api(`/me/messages/${messageId}/attachments`) .select('id,name,contentType,size,isInline,contentId,contentBytes') .get(); if (!attachmentsResponse.value || attachmentsResponse.value.length === 0) { return []; } return attachmentsResponse.value.map((att) => { const attachment = { filename: att.name, mimeType: att.contentType, size: att.size, contentId: att.contentId, }; // Convert Base64 content to Buffer if available if (att.contentBytes) { attachment.buffer = Buffer.from(att.contentBytes, 'base64'); } return attachment; }); } catch (error) { console.error(`Error fetching attachments for message ${messageId}:`, error); return []; } } /** * Maps an Outlook message to our normalized email format */ mapOutlookMessageToNormalized(message) { return { id: message.id, threadId: message.conversationId, from: message.from ? message.from.emailAddress.address : '', to: (message.toRecipients || []).map(r => r.emailAddress.address), cc: message.ccRecipients ? message.ccRecipients.map(r => r.emailAddress.address) : undefined, bcc: message.bccRecipients ? message.bccRecipients.map(r => r.emailAddress.address) : undefined, subject: message.subject, bodyText: message.bodyPreview, bodyHtml: message.body?.contentType === 'html' ? message.body.content : undefined, attachments: [], // Will be filled separately if needed date: new Date(message.receivedDateTime || message.sentDateTime || Date.now()), // Map categories to labels for consistency with Gmail implementation labels: message.categories || [], provider: 'outlook', raw: message }; } /** * Extracts the skip token from Outlook's nextLink URL */ extractSkipTokenFromNextLink(nextLink) { const match = nextLink.match(/\$skiptoken=([^&]+)/); return match ? match[1] : undefined; } /** * Stream emails using async generator * This method provides memory-efficient streaming of emails */ async *streamEmails(options) { this.ensureInitialized(); await this.authenticate(); // Validate options EmailStreamService.validateStreamOptions(options); // Build Outlook filter and search term from options const { filter, searchTerm } = this.buildOutlookQueryFromStreamOptions(options); // Determine fetch strategy const fetchStrategy = this.determineOutlookFetchStrategy(options); // Create the fetch function for the stream service const fetchPageFn = async (pageToken, pageSize) => { return await this.fetchEmailPage(filter, searchTerm, pageSize || options.batchSize || 50, fetchStrategy, options.includeBody !== false, options.includeAttachments !== false, pageToken); }; // Use the stream service to create the generator yield* EmailStreamService.createEmailStream(fetchPageFn, options); } /** * Stream emails with callback-based progress tracking * Provides detailed progress information and error handling */ async fetchEmailsStream(options, callbacks) { this.ensureInitialized(); await this.authenticate(); // Create enhanced progress tracking let totalCount; const enhancedCallbacks = { ...callbacks, onBatch: async (emails, progress) => { // Enhance progress with Outlook-specific information const enhancedProgress = { ...progress, total: totalCount, estimatedRemaining: EmailStreamService.calculateEstimatedRemaining(totalCount, progress.current) }; if (callbacks.onBatch) { await callbacks.onBatch(emails, enhancedProgress); } }, onProgress: async (progress) => { const enhancedProgress = { ...progress, total: totalCount, estimatedRemaining: EmailStreamService.calculateEstimatedRemaining(totalCount, progress.current) }; if (callbacks.onProgress) { await callbacks.onProgress(enhancedProgress); } } }; // Create stream generator and process it const streamGenerator = this.streamEmails(options); // Get total count from first batch if available const firstBatch = await streamGenerator.next(); if (!firstBatch.done && firstBatch.value.length > 0) { // Try to get total count - this is a best effort for Outlook // Note: Microsoft Graph API doesn't provide exact counts easily // We'll skip this optimization for now // Process the first batch we already retrieved if (enhancedCallbacks.onBatch) { const progress = { current: firstBatch.value.length, total: totalCount, batchCount: 1, estimatedRemaining: EmailStreamService.calculateEstimatedRemaining(totalCount, firstBatch.value.length) }; await enhancedCallbacks.onBatch(firstBatch.value, progress); } // Create a new generator that includes the first batch const remainingGenerator = async function* () { yield firstBatch.value; yield* streamGenerator; }; await EmailStreamService.processEmailStream(remainingGenerator(), enhancedCallbacks); } else { // No emails found await EmailStreamService.processEmailStream(streamGenerator, enhancedCallbacks); } } /** * Helper method to build Outlook query from stream options */ buildOutlookQueryFromStreamOptions(options) { const filters = []; // Date filters if (options.since) { const sinceDate = typeof options.since === 'string' ? new Date(options.since) : options.since; filters.push(`receivedDateTime ge ${sinceDate.toISOString()}`); } if (options.before) { const beforeDate = typeof options.before === 'string' ? new Date(options.before) : options.before; filters.push(`receivedDateTime le ${beforeDate.toISOString()}`); } // Unread filter if (options.unreadOnly) { filters.push('isRead eq false'); } const filter = filters.length > 0 ? filters.join(' and ') : ''; // Search term (for content search) const searchTerm = options.query || undefined; return { filter, searchTerm }; } }