UNPKG

ms365-mcp-server

Version:

Microsoft 365 MCP Server for managing Microsoft 365 email through natural language interactions with full OAuth2 authentication support

434 lines (433 loc) 16 kB
import { logger } from './api.js'; import * as fs from 'fs/promises'; import * as path from 'path'; import * as crypto from 'crypto'; export class DocumentWorkflow { constructor(options = {}) { this.options = { ...DocumentWorkflow.DEFAULT_OPTIONS, ...options }; } /** * Process all attachments from an email */ async processEmailAttachments(email, attachments) { const results = []; logger.log(`📎 Processing ${attachments.length} attachments from email: ${email.subject}`); // Ensure temp directory exists await this.ensureTempDirectory(); for (const attachment of attachments) { try { const result = await this.processAttachment(email, attachment); results.push(result); } catch (error) { logger.error(`Error processing attachment ${attachment.name}:`, error); results.push({ attachment, status: 'failed', metadata: { originalName: attachment.name, size: attachment.size || 0, mimeType: attachment.contentType || 'unknown', isPasswordProtected: false, requiresPassword: false, extractedText: false, processingTime: 0 }, errors: [error instanceof Error ? error.message : String(error)] }); } } logger.log(`📎 Processed ${results.length} attachments, ${results.filter(r => r.status === 'success').length} successful`); return results; } /** * Process a single attachment */ async processAttachment(email, attachment) { const startTime = Date.now(); // Check file size if (attachment.size && attachment.size > this.options.maxFileSize) { return { attachment, status: 'too_large', metadata: { originalName: attachment.name, size: attachment.size, mimeType: attachment.contentType || 'unknown', isPasswordProtected: false, requiresPassword: false, extractedText: false, processingTime: Date.now() - startTime }, errors: [`File size ${attachment.size} exceeds maximum ${this.options.maxFileSize}`] }; } // Check file type const fileExtension = path.extname(attachment.name).toLowerCase().substring(1); if (!this.options.allowedTypes.includes(fileExtension)) { return { attachment, status: 'unsupported', metadata: { originalName: attachment.name, size: attachment.size || 0, mimeType: attachment.contentType || 'unknown', isPasswordProtected: false, requiresPassword: false, extractedText: false, processingTime: Date.now() - startTime }, errors: [`File type ${fileExtension} not supported`] }; } // Save attachment to temp file const tempFilePath = await this.saveAttachmentToTemp(attachment); try { // Check if password protected const isPasswordProtected = await this.isPasswordProtected(tempFilePath, fileExtension); let textContent; let requiresPassword = false; let extractedText = false; if (isPasswordProtected) { // Try to decrypt with common passwords const decryptResult = await this.attemptDecryption(email, tempFilePath, fileExtension); if (decryptResult.success) { textContent = decryptResult.textContent; extractedText = true; } else { requiresPassword = true; } } else { // Extract text content if (this.options.extractTextContent) { const extracted = await this.extractTextContent(tempFilePath, fileExtension); textContent = extracted || undefined; extractedText = !!extracted; } } return { attachment, status: requiresPassword ? 'password_protected' : 'success', filePath: tempFilePath, textContent, metadata: { originalName: attachment.name, size: attachment.size || 0, mimeType: attachment.contentType || 'unknown', isPasswordProtected, requiresPassword, extractedText, processingTime: Date.now() - startTime } }; } catch (error) { // Clean up temp file on error await this.cleanupTempFile(tempFilePath); throw error; } } /** * Save attachment to temporary file */ async saveAttachmentToTemp(attachment) { const tempFileName = `${crypto.randomUUID()}_${attachment.name}`; const tempFilePath = path.join(this.options.tempDir, tempFileName); const buffer = Buffer.from(attachment.contentBytes, 'base64'); await fs.writeFile(tempFilePath, buffer); return tempFilePath; } /** * Check if file is password protected */ async isPasswordProtected(filePath, fileExtension) { try { switch (fileExtension) { case 'pdf': return await this.isPdfPasswordProtected(filePath); case 'docx': case 'xlsx': case 'pptx': return await this.isOfficeDocPasswordProtected(filePath); default: return false; } } catch (error) { logger.error(`Error checking password protection for ${filePath}:`, error); return false; } } /** * Check if PDF is password protected */ async isPdfPasswordProtected(filePath) { try { const buffer = await fs.readFile(filePath); const content = buffer.toString('binary'); // Look for encryption markers in PDF return content.includes('/Encrypt') || content.includes('/P -'); } catch (error) { return false; } } /** * Check if Office document is password protected */ async isOfficeDocPasswordProtected(filePath) { try { const buffer = await fs.readFile(filePath); // Office documents are ZIP files, check for encryption const content = buffer.toString('binary'); return content.includes('EncryptedPackage') || content.includes('EncryptionInfo'); } catch (error) { return false; } } /** * Attempt to decrypt password-protected file */ async attemptDecryption(email, filePath, fileExtension) { const passwords = this.generatePasswordList(email); for (const password of passwords) { try { const textContent = await this.decryptAndExtractText(filePath, fileExtension, password); if (textContent) { logger.log(`✅ Successfully decrypted ${path.basename(filePath)} with password strategy`); return { success: true, textContent, usedPassword: password }; } } catch (error) { // Continue to next password } } return { success: false }; } /** * Generate password list based on email context */ generatePasswordList(email) { const passwords = []; for (const strategy of DocumentWorkflow.PASSWORD_STRATEGIES) { if (strategy.generator) { const generated = strategy.generator(email); passwords.push(...generated); } else { passwords.push(...strategy.passwords); } } // Remove duplicates and limit attempts const uniquePasswords = Array.from(new Set(passwords)); return uniquePasswords.slice(0, this.options.passwordAttempts); } /** * Decrypt and extract text from password-protected file */ async decryptAndExtractText(filePath, fileExtension, password) { // This is a simplified implementation // In practice, you'd use libraries like pdf-lib, node-pdftk, or office-specific libraries switch (fileExtension) { case 'pdf': return await this.decryptPdf(filePath, password); case 'docx': case 'xlsx': case 'pptx': return await this.decryptOfficeDoc(filePath, password); default: return null; } } /** * Decrypt PDF (placeholder implementation) */ async decryptPdf(filePath, password) { // This would use a library like pdf-lib or pdf2pic with password support // For now, return null indicating we couldn't decrypt return null; } /** * Decrypt Office document (placeholder implementation) */ async decryptOfficeDoc(filePath, password) { // This would use a library that supports password-protected Office documents // For now, return null indicating we couldn't decrypt return null; } /** * Extract text content from unprotected file */ async extractTextContent(filePath, fileExtension) { try { switch (fileExtension) { case 'txt': return await this.extractTextFromTxt(filePath); case 'pdf': return await this.extractTextFromPdf(filePath); case 'docx': return await this.extractTextFromDocx(filePath); default: return null; } } catch (error) { logger.error(`Error extracting text from ${filePath}:`, error); return null; } } /** * Extract text from TXT file */ async extractTextFromTxt(filePath) { const buffer = await fs.readFile(filePath); return buffer.toString('utf8'); } /** * Extract text from PDF (placeholder implementation) */ async extractTextFromPdf(filePath) { // This would use a library like pdf-parse or pdf2pic // For now, return placeholder return `[PDF text content from ${path.basename(filePath)}]`; } /** * Extract text from DOCX (placeholder implementation) */ async extractTextFromDocx(filePath) { // This would use a library like mammoth or docx-parser // For now, return placeholder return `[DOCX text content from ${path.basename(filePath)}]`; } /** * Ensure temp directory exists */ async ensureTempDirectory() { try { await fs.mkdir(this.options.tempDir, { recursive: true }); } catch (error) { logger.error(`Error creating temp directory ${this.options.tempDir}:`, error); } } /** * Clean up temporary file */ async cleanupTempFile(filePath) { try { await fs.unlink(filePath); } catch (error) { logger.error(`Error cleaning up temp file ${filePath}:`, error); } } /** * Clean up all temporary files */ async cleanupAllTempFiles() { try { const files = await fs.readdir(this.options.tempDir); for (const file of files) { const filePath = path.join(this.options.tempDir, file); await this.cleanupTempFile(filePath); } } catch (error) { logger.error(`Error cleaning up temp directory:`, error); } } /** * Get processing statistics */ getProcessingStats(results) { const stats = { total: results.length, successful: results.filter(r => r.status === 'success').length, passwordProtected: results.filter(r => r.status === 'password_protected').length, failed: results.filter(r => r.status === 'failed').length, textExtracted: results.filter(r => r.metadata.extractedText).length, averageProcessingTime: 0 }; if (results.length > 0) { const totalTime = results.reduce((sum, r) => sum + r.metadata.processingTime, 0); stats.averageProcessingTime = totalTime / results.length; } return stats; } } DocumentWorkflow.DEFAULT_OPTIONS = { tempDir: '/tmp/ms365-documents', maxFileSize: 50 * 1024 * 1024, // 50MB allowedTypes: ['pdf', 'docx', 'xlsx', 'pptx', 'txt', 'rtf'], autoDecryptPDFs: true, extractTextContent: true, scanForMalware: false, passwordAttempts: 10 }; // Common password strategies DocumentWorkflow.PASSWORD_STRATEGIES = [ { name: 'common_passwords', passwords: ['password', '123456', 'admin', 'user', 'default', 'temp', 'document'] }, { name: 'email_based', passwords: [], generator: (email) => { const senderName = email.from.name.toLowerCase().replace(/\s+/g, ''); const senderFirstName = email.from.name.split(' ')[0].toLowerCase(); const domain = email.from.address.split('@')[1]?.toLowerCase() || ''; const year = new Date().getFullYear().toString(); const lastYear = (new Date().getFullYear() - 1).toString(); return [ senderName, senderFirstName, domain, `${senderName}${year}`, `${senderFirstName}${year}`, `${senderName}${lastYear}`, `${domain}${year}`, email.from.address.toLowerCase(), email.from.address.split('@')[0].toLowerCase() ]; } }, { name: 'date_based', passwords: [], generator: (email) => { const emailDate = new Date(email.receivedDateTime); const formats = [ emailDate.toISOString().split('T')[0], // YYYY-MM-DD emailDate.toISOString().split('T')[0].replace(/-/g, ''), // YYYYMMDD `${emailDate.getMonth() + 1}/${emailDate.getDate()}/${emailDate.getFullYear()}`, `${emailDate.getDate()}/${emailDate.getMonth() + 1}/${emailDate.getFullYear()}`, emailDate.getFullYear().toString(), (emailDate.getFullYear() - 1).toString() ]; return formats; } }, { name: 'subject_based', passwords: [], generator: (email) => { const subject = email.subject.toLowerCase(); const words = subject.split(/\s+/).filter(word => word.length > 3); const combinations = []; // Add individual words combinations.push(...words); // Add word combinations for (let i = 0; i < words.length - 1; i++) { combinations.push(words[i] + words[i + 1]); } // Add numbers found in subject const numbers = subject.match(/\d+/g) || []; combinations.push(...numbers); return combinations; } } ];