@snehal96/unimail
Version:
Unified email fetching & document extraction layer for modern web apps
693 lines (692 loc) • 30.9 kB
JavaScript
import { google } from 'googleapis';
import { EmailParserService } from '../services/EmailParserService.js';
import { EmailStreamService } from '../services/EmailStreamService.js';
import { OAuthService } from '../auth/OAuthService.js';
import { GoogleOAuthProvider } from '../auth/providers/GoogleOAuthProvider.js';
export class GmailAdapter {
constructor() {
this.initialized = false;
this.emailParserService = new EmailParserService();
}
/**
* Initialize the Gmail adapter with credentials.
* This method now supports both traditional refresh token authentication
* and the new OAuth flow using an auth code.
*/
async initialize(credentials) {
this.credentials_ = credentials;
// Create OAuth2Client
this.oauth2Client_ = new google.auth.OAuth2(this.credentials_.clientId, this.credentials_.clientSecret, this.credentials_.redirectUri);
// Handle OAuth flow if auth code is provided instead of refresh token
if (!this.credentials_.refreshToken && this.credentials_.authCode) {
if (!this.credentials_.redirectUri) {
throw new Error('redirectUri is required when using authCode for authentication');
}
try {
// Exchange the auth code for tokens
const { tokens } = await this.oauth2Client_.getToken(this.credentials_.authCode);
// Save the refresh token
if (tokens.refresh_token) {
this.credentials_.refreshToken = tokens.refresh_token;
}
else {
throw new Error('No refresh token received. Make sure you are requesting offline access and forcing consent.');
}
// Set the credentials
this.oauth2Client_.setCredentials(tokens);
}
catch (error) {
throw new Error(`Failed to exchange auth code for tokens: ${error.message}`);
}
}
else if (this.credentials_.refreshToken) {
// Use existing refresh token
this.oauth2Client_.setCredentials({ refresh_token: this.credentials_.refreshToken });
}
else {
throw new Error('Either refreshToken or authCode must be provided in the credentials');
}
// Create Gmail API client
this.gmail_ = google.gmail({ version: 'v1', auth: this.oauth2Client_ });
this.initialized = true;
}
/**
* Start the OAuth flow to get authorization from the user
* @returns The authorization URL that the user should visit
*/
static async startOAuthFlow(clientId, clientSecret, redirectUri, port = 3000, callbackPath = '/oauth/callback') {
const oauthService = new OAuthService(new GoogleOAuthProvider());
const authUrl = await oauthService.startOAuthFlow({
clientId,
clientSecret,
redirectUri,
scopes: ['https://mail.google.com/'],
accessType: 'offline',
prompt: 'consent'
}, undefined, // No user ID needed for this flow
callbackPath, port);
return authUrl;
}
/**
* Handle the OAuth callback manually (for server-side applications)
* @returns TokenData containing access and refresh tokens
*/
static async handleOAuthCallback(code, clientId, clientSecret, redirectUri) {
const oauthService = new OAuthService(new GoogleOAuthProvider());
const tokenData = await oauthService.handleCallback(code, {
clientId,
clientSecret,
redirectUri,
scopes: ['https://mail.google.com/']
});
return {
accessToken: tokenData.accessToken,
refreshToken: tokenData.refreshToken
};
}
ensureInitialized() {
if (!this.initialized || !this.oauth2Client_ || !this.gmail_ || !this.credentials_) {
throw new Error('GmailAdapter not initialized. Call initialize(credentials) first.');
}
}
async authenticate() {
this.ensureInitialized();
try {
// The getAccessToken method will handle refreshing if necessary
const tokenResponse = await this.oauth2Client_.getAccessToken();
if (!tokenResponse.token) {
throw new Error('Failed to refresh access token.');
}
// console.log('Gmail authentication successful, token refreshed/validated.');
}
catch (error) {
console.error('Gmail authentication error:', error);
// Provide more specific error messages based on the type of error
if (error.message?.includes('invalid_grant')) {
throw new Error('Gmail authentication failed: Invalid grant. Refresh token might be expired or revoked.');
}
throw new Error(`Gmail authentication failed: ${error.message}`);
}
}
async fetchEmails(options) {
this.ensureInitialized();
await this.authenticate(); // Ensure token is fresh
const { limit = 10, query, since, before, unreadOnly, includeBody = true, includeAttachments = true, format, pageToken, pageSize, getAllPages = false } = options;
let gmailQuery = query || '';
// Determine the best format to use based on options
let messageFormat = format || 'raw';
// If format isn't explicitly specified, infer it based on what the user needs
if (!format) {
if (!includeBody && !includeAttachments) {
messageFormat = 'metadata'; // Just need headers
}
else if (includeBody && includeAttachments) {
// Keep 'raw' for backward compatibility and most complete parsing
messageFormat = 'raw';
}
else {
// Need some message content but not everything
messageFormat = 'full';
}
}
if (since) {
const sinceDate = typeof since === 'string' ? new Date(since) : since;
gmailQuery += ` after:${sinceDate.getFullYear()}/${sinceDate.getMonth() + 1}/${sinceDate.getDate()}`;
}
if (before) {
const beforeDate = typeof before === 'string' ? new Date(before) : before;
gmailQuery += ` before:${beforeDate.getFullYear()}/${beforeDate.getMonth() + 1}/${beforeDate.getDate()}`;
}
if (unreadOnly) {
gmailQuery += ' is:unread';
}
gmailQuery = gmailQuery.trim();
try {
if (getAllPages) {
// Show deprecation warning
console.warn('Warning: getAllPages option is deprecated and may cause memory issues with large datasets. Consider using streamEmails() instead.');
// Fetch all pages up to limit
return await this.fetchAllEmailPages(gmailQuery, limit, messageFormat, includeBody, includeAttachments);
}
else {
// Fetch a single page
return await this.fetchEmailPage(gmailQuery, pageSize || limit, messageFormat, includeBody, includeAttachments, pageToken);
}
}
catch (error) {
console.error('Error fetching Gmail emails:', error);
// Check for specific Google API errors if possible
if (error.code === 401) {
throw new Error(`Gmail authentication error (401). Check your refresh token and API permissions. Original: ${error.message}`);
}
throw new Error(`Failed to fetch Gmail emails: ${error.message}`);
}
}
/**
* Stream emails in batches using async generator
* Memory-efficient way to process large numbers of emails
*/
async *streamEmails(options) {
this.ensureInitialized();
await this.authenticate();
// Validate options
EmailStreamService.validateStreamOptions(options);
// Build Gmail query from options
const gmailQuery = this.buildGmailQuery(options);
// Determine format based on options
const messageFormat = this.determineMessageFormat(options);
// Create the fetch function for the stream service
const fetchPageFn = async (pageToken, pageSize) => {
return await this.fetchEmailPage(gmailQuery, pageSize || options.batchSize || 50, messageFormat, options.includeBody !== false, options.includeAttachments !== false, pageToken);
};
// Use the stream service to create the generator
yield* EmailStreamService.createEmailStream(fetchPageFn, options);
}
/**
* Stream emails with callback-based progress tracking
* Provides detailed progress information and error handling
*/
async fetchEmailsStream(options, callbacks) {
this.ensureInitialized();
await this.authenticate();
// Create enhanced progress tracking
let totalCount;
let processedCount = 0;
const enhancedCallbacks = {
...callbacks,
onBatch: async (emails, progress) => {
// Enhance progress with Gmail-specific information
const enhancedProgress = {
...progress,
total: totalCount,
estimatedRemaining: EmailStreamService.calculateEstimatedRemaining(totalCount, progress.current)
};
if (callbacks.onBatch) {
await callbacks.onBatch(emails, enhancedProgress);
}
},
onProgress: async (progress) => {
const enhancedProgress = {
...progress,
total: totalCount,
estimatedRemaining: EmailStreamService.calculateEstimatedRemaining(totalCount, progress.current)
};
if (callbacks.onProgress) {
await callbacks.onProgress(enhancedProgress);
}
}
};
// Create stream generator and process it
const streamGenerator = this.streamEmails(options);
// Get total count from first batch if available
const firstBatch = await streamGenerator.next();
if (!firstBatch.done && firstBatch.value.length > 0) {
// Try to get total count - this is a best effort
try {
const countResponse = await this.gmail_.users.messages.list({
userId: 'me',
q: this.buildGmailQuery(options) || undefined,
maxResults: 1
});
totalCount = countResponse.data.resultSizeEstimate || undefined;
}
catch (error) {
// Ignore errors getting total count
console.warn('Could not get total email count:', error);
}
// Process the first batch we already retrieved
if (enhancedCallbacks.onBatch) {
const progress = {
current: firstBatch.value.length,
total: totalCount,
batchCount: 1,
estimatedRemaining: EmailStreamService.calculateEstimatedRemaining(totalCount, firstBatch.value.length)
};
await enhancedCallbacks.onBatch(firstBatch.value, progress);
}
// Create a new generator that includes the first batch
const remainingGenerator = async function* () {
yield firstBatch.value;
yield* streamGenerator;
};
await EmailStreamService.processEmailStream(remainingGenerator(), enhancedCallbacks);
}
else {
// No emails found
await EmailStreamService.processEmailStream(streamGenerator, enhancedCallbacks);
}
}
/**
* Helper method to build Gmail query from stream options
*/
buildGmailQuery(options) {
let gmailQuery = options.query || '';
if (options.since) {
const sinceDate = typeof options.since === 'string' ? new Date(options.since) : options.since;
gmailQuery += ` after:${sinceDate.getFullYear()}/${sinceDate.getMonth() + 1}/${sinceDate.getDate()}`;
}
if (options.before) {
const beforeDate = typeof options.before === 'string' ? new Date(options.before) : options.before;
gmailQuery += ` before:${beforeDate.getFullYear()}/${beforeDate.getMonth() + 1}/${beforeDate.getDate()}`;
}
if (options.unreadOnly) {
gmailQuery += ' is:unread';
}
return gmailQuery.trim();
}
/**
* Helper method to determine message format from options
*/
determineMessageFormat(options) {
if (options.format) {
return options.format;
}
const includeBody = options.includeBody !== false;
const includeAttachments = options.includeAttachments !== false;
if (!includeBody && !includeAttachments) {
return 'metadata';
}
else if (includeBody && includeAttachments) {
return 'raw';
}
else {
return 'full';
}
}
/**
* Fetches a single page of emails
*/
async fetchEmailPage(query, maxResults, messageFormat, includeBody, includeAttachments, pageToken) {
const listMessagesResponse = await this.gmail_.users.messages.list({
userId: 'me',
q: query || undefined, // q parameter cannot be empty string
maxResults,
pageToken
});
const messages = listMessagesResponse.data.messages;
if (!messages || messages.length === 0) {
return {
emails: [],
nextPageToken: listMessagesResponse.data.nextPageToken || undefined,
totalCount: listMessagesResponse.data.resultSizeEstimate || undefined
};
}
const normalizedEmails = [];
for (const messageHeader of messages) {
if (!messageHeader.id)
continue;
const messageResponse = await this.gmail_.users.messages.get({
userId: 'me',
id: messageHeader.id,
format: messageFormat, // Use the determined format
});
let normalized;
if (messageFormat === 'raw' && messageResponse.data.raw) {
// Process using raw email format
const rawEmail = Buffer.from(messageResponse.data.raw, 'base64').toString('utf-8');
normalized = await this.emailParserService.parseEmail(rawEmail, messageResponse.data.id, 'gmail');
normalized.threadId = messageResponse.data.threadId || normalized.threadId;
normalized.labels = messageResponse.data.labelIds || normalized.labels;
}
else {
// Process using structured data from Gmail API
normalized = await this.parseStructuredMessage(messageResponse.data, includeBody, includeAttachments);
}
// Implement skipping inline images
normalized.attachments = normalized.attachments.filter(att => {
if (att.contentId && normalized.bodyHtml?.includes(`cid:${att.contentId.replace(/[<>]/g, '')}`)) {
// This is likely an inline image referenced in the HTML
return false;
}
return true;
});
normalizedEmails.push(normalized);
}
return {
emails: normalizedEmails,
nextPageToken: listMessagesResponse.data.nextPageToken || undefined,
totalCount: listMessagesResponse.data.resultSizeEstimate || undefined
};
}
/**
* Fetches all pages of emails up to the specified limit
*/
async fetchAllEmailPages(query, limit, messageFormat, includeBody, includeAttachments, size) {
const allEmails = [];
let nextPageToken;
let totalCount;
// Use a reasonable page size (Gmail API default is 100)
const pageSize = size || Math.min(limit, 100);
do {
const response = await this.fetchEmailPage(query, pageSize, messageFormat, includeBody, includeAttachments, nextPageToken);
allEmails.push(...response.emails);
nextPageToken = response.nextPageToken;
// Store the total count from the first response
if (totalCount === undefined && response.totalCount !== undefined) {
totalCount = response.totalCount;
}
// Stop if we've reached the limit or there are no more pages
if (!nextPageToken || allEmails.length >= limit) {
break;
}
} while (true);
// Enforce the limit (in case we fetched more than needed)
const limitedEmails = allEmails.slice(0, limit);
return {
emails: limitedEmails,
// Don't return nextPageToken if we've fetched all pages or reached the limit
nextPageToken: allEmails.length >= limit ? nextPageToken : undefined,
totalCount
};
}
/**
* Parses a Gmail message from structured data (when using 'full' or 'metadata' format)
* @param message The Gmail message object from the API
* @param includeBody Whether to include message body content
* @param includeAttachments Whether to include attachment data
*/
async parseStructuredMessage(message, includeBody = true, includeAttachments = true) {
// Initialize the normalized email
const normalized = {
id: message.id,
threadId: message.threadId || undefined,
from: '',
to: [],
attachments: [],
date: new Date(),
provider: 'gmail',
labels: message.labelIds || []
};
// Extract headers
if (message.payload?.headers) {
for (const header of message.payload.headers) {
switch (header.name?.toLowerCase()) {
case 'from':
normalized.from = header.value || '';
break;
case 'to':
normalized.to = header.value?.split(',').map(addr => addr.trim()) || [];
break;
case 'cc':
normalized.cc = header.value?.split(',').map(addr => addr.trim()) || [];
break;
case 'bcc':
normalized.bcc = header.value?.split(',').map(addr => addr.trim()) || [];
break;
case 'subject':
normalized.subject = header.value || undefined;
break;
case 'date':
normalized.date = header.value ? new Date(header.value) : new Date();
break;
}
}
}
// Extract body and attachments only if needed and available
if (includeBody && message.payload) {
// Process parts only if we're using 'full' format and have parts
if (message.payload.parts && message.payload.parts.length > 0) {
// Extract text and HTML bodies
for (const part of message.payload.parts) {
// Plain text body
if (part.mimeType === 'text/plain' && part.body?.data) {
normalized.bodyText = Buffer.from(part.body.data, 'base64').toString('utf-8');
}
// HTML body
else if (part.mimeType === 'text/html' && part.body?.data) {
normalized.bodyHtml = Buffer.from(part.body.data, 'base64').toString('utf-8');
}
// Handle attachments
else if (includeAttachments && part.filename && part.body) {
const attachment = {
filename: part.filename,
mimeType: part.mimeType || 'application/octet-stream',
size: part.body.size || 0,
contentId: part.headers?.find(h => h.name?.toLowerCase() === 'content-id')?.value || undefined,
};
// Only fetch attachment data if we need the buffer and we have an attachment ID
if (part.body.attachmentId) {
try {
const attachmentResponse = await this.gmail_.users.messages.attachments.get({
userId: 'me',
messageId: message.id,
id: part.body.attachmentId
});
if (attachmentResponse.data.data) {
attachment.buffer = Buffer.from(attachmentResponse.data.data, 'base64');
}
}
catch (error) {
console.error(`Failed to fetch attachment ${part.filename}:`, error);
}
}
normalized.attachments.push(attachment);
}
}
}
// Single part message with body directly in payload
else if (message.payload.body?.data) {
const bodyContent = Buffer.from(message.payload.body.data, 'base64').toString('utf-8');
if (message.payload.mimeType === 'text/html') {
normalized.bodyHtml = bodyContent;
}
else {
normalized.bodyText = bodyContent;
}
}
}
return normalized;
}
// =====================================================
// SYNC CAPABILITIES - Gmail History API & Push Notifications
// =====================================================
/**
* Get the current history ID for this Gmail account.
* This serves as a starting point for tracking changes.
*/
async getCurrentHistoryId() {
this.ensureInitialized();
await this.authenticate();
try {
// Get the profile to get the current history ID
const profileResponse = await this.gmail_.users.getProfile({
userId: 'me'
});
return profileResponse.data.historyId;
}
catch (error) {
throw new Error(`Failed to get current history ID: ${error.message}`);
}
}
/**
* Get history records since the specified history ID.
* This allows you to see what changed in the mailbox.
*/
async getHistory(startHistoryId, options = {}) {
this.ensureInitialized();
await this.authenticate();
const { maxResults = 100, labelIds, includeDeleted = true } = options;
try {
// Gmail API expects labelId as a single string, not an array
// If multiple labels are provided, we'll need to make multiple calls or handle differently
const labelId = labelIds && labelIds.length > 0 ? labelIds[0] : undefined;
const historyResponse = await this.gmail_.users.history.list({
userId: 'me',
startHistoryId,
maxResults,
labelId,
historyTypes: includeDeleted ? ['messageAdded', 'messageDeleted', 'labelAdded', 'labelRemoved'] : ['messageAdded', 'labelAdded', 'labelRemoved']
});
const history = (historyResponse.data.history || []).map((record) => ({
id: record.id,
messages: record.messages,
messagesAdded: record.messagesAdded,
messagesDeleted: record.messagesDeleted,
labelsAdded: record.labelsAdded,
labelsRemoved: record.labelsRemoved
}));
return {
history,
nextPageToken: historyResponse.data.nextPageToken || undefined,
historyId: historyResponse.data.historyId
};
}
catch (error) {
// Handle case where start history ID is too old
if (error.code === 404 || error.message?.includes('historyId')) {
throw new Error(`History ID ${startHistoryId} is too old or invalid. Use getCurrentHistoryId() to get a fresh starting point.`);
}
throw new Error(`Failed to get history: ${error.message}`);
}
}
/**
* Get a specific email by its ID.
* Useful for fetching full details of emails found in history records.
*/
async getEmailById(id) {
this.ensureInitialized();
await this.authenticate();
try {
const messageResponse = await this.gmail_.users.messages.get({
userId: 'me',
id,
format: 'raw' // Use raw format for complete parsing
});
if (!messageResponse.data) {
return null;
}
let normalized;
if (messageResponse.data.raw) {
// Process using raw email format
const rawEmail = Buffer.from(messageResponse.data.raw, 'base64').toString('utf-8');
normalized = await this.emailParserService.parseEmail(rawEmail, messageResponse.data.id, 'gmail');
normalized.threadId = messageResponse.data.threadId || normalized.threadId;
normalized.labels = messageResponse.data.labelIds || normalized.labels;
}
else {
// Fallback to structured parsing
normalized = await this.parseStructuredMessage(messageResponse.data, true, true);
}
return normalized;
}
catch (error) {
if (error.code === 404) {
return null; // Email not found or no access
}
throw new Error(`Failed to get email by ID ${id}: ${error.message}`);
}
}
/**
* Set up Gmail push notifications to receive real-time updates.
* Requires a Google Cloud Pub/Sub topic and proper webhook setup.
*/
async setupPushNotifications(config) {
this.ensureInitialized();
await this.authenticate();
try {
const watchRequest = {
userId: 'me',
requestBody: {
topicName: config.topicName,
labelIds: config.labelIds,
labelFilterAction: config.labelFilterAction || 'include'
}
};
const watchResponse = await this.gmail_.users.watch(watchRequest);
return {
historyId: watchResponse.data.historyId,
expiration: parseInt(watchResponse.data.expiration),
topicName: config.topicName
};
}
catch (error) {
throw new Error(`Failed to setup push notifications: ${error.message}`);
}
}
/**
* Stop Gmail push notifications.
*/
async stopPushNotifications() {
this.ensureInitialized();
await this.authenticate();
try {
await this.gmail_.users.stop({
userId: 'me'
});
}
catch (error) {
throw new Error(`Failed to stop push notifications: ${error.message}`);
}
}
/**
* Process sync changes from a given history ID.
* This is a higher-level method that processes history records and returns structured results.
*/
async processSync(options = {}) {
this.ensureInitialized();
await this.authenticate();
const { startHistoryId, maxResults = 100 } = options;
if (!startHistoryId) {
throw new Error('startHistoryId is required for processSync');
}
try {
const historyResponse = await this.getHistory(startHistoryId, options);
const addedEmails = [];
const deletedEmailIds = [];
const updatedEmails = [];
const processedIds = new Set();
// Process history records
for (const record of historyResponse.history) {
// Handle new messages
if (record.messagesAdded) {
for (const added of record.messagesAdded) {
if (!processedIds.has(added.message.id)) {
const email = await this.getEmailById(added.message.id);
if (email) {
addedEmails.push(email);
processedIds.add(added.message.id);
}
}
}
}
// Handle deleted messages
if (record.messagesDeleted) {
for (const deleted of record.messagesDeleted) {
if (!processedIds.has(deleted.message.id)) {
deletedEmailIds.push(deleted.message.id);
processedIds.add(deleted.message.id);
}
}
}
// Handle label changes (treat as updates)
if (record.labelsAdded || record.labelsRemoved) {
const labelChanges = [
...(record.labelsAdded || []),
...(record.labelsRemoved || [])
];
for (const change of labelChanges) {
if (!processedIds.has(change.message.id)) {
const email = await this.getEmailById(change.message.id);
if (email) {
updatedEmails.push(email);
processedIds.add(change.message.id);
}
}
}
}
}
return {
processedHistoryRecords: historyResponse.history.length,
addedEmails,
deletedEmailIds,
updatedEmails,
newHistoryId: historyResponse.historyId,
hasMoreChanges: !!historyResponse.nextPageToken,
nextPageToken: historyResponse.nextPageToken
};
}
catch (error) {
throw new Error(`Failed to process sync: ${error.message}`);
}
}
}