ms365-mcp-server
Version:
Microsoft 365 MCP Server for managing Microsoft 365 email through natural language interactions with full OAuth2 authentication support
434 lines (433 loc) • 16 kB
JavaScript
import { logger } from './api.js';
import * as fs from 'fs/promises';
import * as path from 'path';
import * as crypto from 'crypto';
export class DocumentWorkflow {
constructor(options = {}) {
this.options = { ...DocumentWorkflow.DEFAULT_OPTIONS, ...options };
}
/**
* Process all attachments from an email
*/
async processEmailAttachments(email, attachments) {
const results = [];
logger.log(`📎 Processing ${attachments.length} attachments from email: ${email.subject}`);
// Ensure temp directory exists
await this.ensureTempDirectory();
for (const attachment of attachments) {
try {
const result = await this.processAttachment(email, attachment);
results.push(result);
}
catch (error) {
logger.error(`Error processing attachment ${attachment.name}:`, error);
results.push({
attachment,
status: 'failed',
metadata: {
originalName: attachment.name,
size: attachment.size || 0,
mimeType: attachment.contentType || 'unknown',
isPasswordProtected: false,
requiresPassword: false,
extractedText: false,
processingTime: 0
},
errors: [error instanceof Error ? error.message : String(error)]
});
}
}
logger.log(`📎 Processed ${results.length} attachments, ${results.filter(r => r.status === 'success').length} successful`);
return results;
}
/**
* Process a single attachment
*/
async processAttachment(email, attachment) {
const startTime = Date.now();
// Check file size
if (attachment.size && attachment.size > this.options.maxFileSize) {
return {
attachment,
status: 'too_large',
metadata: {
originalName: attachment.name,
size: attachment.size,
mimeType: attachment.contentType || 'unknown',
isPasswordProtected: false,
requiresPassword: false,
extractedText: false,
processingTime: Date.now() - startTime
},
errors: [`File size ${attachment.size} exceeds maximum ${this.options.maxFileSize}`]
};
}
// Check file type
const fileExtension = path.extname(attachment.name).toLowerCase().substring(1);
if (!this.options.allowedTypes.includes(fileExtension)) {
return {
attachment,
status: 'unsupported',
metadata: {
originalName: attachment.name,
size: attachment.size || 0,
mimeType: attachment.contentType || 'unknown',
isPasswordProtected: false,
requiresPassword: false,
extractedText: false,
processingTime: Date.now() - startTime
},
errors: [`File type ${fileExtension} not supported`]
};
}
// Save attachment to temp file
const tempFilePath = await this.saveAttachmentToTemp(attachment);
try {
// Check if password protected
const isPasswordProtected = await this.isPasswordProtected(tempFilePath, fileExtension);
let textContent;
let requiresPassword = false;
let extractedText = false;
if (isPasswordProtected) {
// Try to decrypt with common passwords
const decryptResult = await this.attemptDecryption(email, tempFilePath, fileExtension);
if (decryptResult.success) {
textContent = decryptResult.textContent;
extractedText = true;
}
else {
requiresPassword = true;
}
}
else {
// Extract text content
if (this.options.extractTextContent) {
const extracted = await this.extractTextContent(tempFilePath, fileExtension);
textContent = extracted || undefined;
extractedText = !!extracted;
}
}
return {
attachment,
status: requiresPassword ? 'password_protected' : 'success',
filePath: tempFilePath,
textContent,
metadata: {
originalName: attachment.name,
size: attachment.size || 0,
mimeType: attachment.contentType || 'unknown',
isPasswordProtected,
requiresPassword,
extractedText,
processingTime: Date.now() - startTime
}
};
}
catch (error) {
// Clean up temp file on error
await this.cleanupTempFile(tempFilePath);
throw error;
}
}
/**
* Save attachment to temporary file
*/
async saveAttachmentToTemp(attachment) {
const tempFileName = `${crypto.randomUUID()}_${attachment.name}`;
const tempFilePath = path.join(this.options.tempDir, tempFileName);
const buffer = Buffer.from(attachment.contentBytes, 'base64');
await fs.writeFile(tempFilePath, buffer);
return tempFilePath;
}
/**
* Check if file is password protected
*/
async isPasswordProtected(filePath, fileExtension) {
try {
switch (fileExtension) {
case 'pdf':
return await this.isPdfPasswordProtected(filePath);
case 'docx':
case 'xlsx':
case 'pptx':
return await this.isOfficeDocPasswordProtected(filePath);
default:
return false;
}
}
catch (error) {
logger.error(`Error checking password protection for ${filePath}:`, error);
return false;
}
}
/**
* Check if PDF is password protected
*/
async isPdfPasswordProtected(filePath) {
try {
const buffer = await fs.readFile(filePath);
const content = buffer.toString('binary');
// Look for encryption markers in PDF
return content.includes('/Encrypt') || content.includes('/P -');
}
catch (error) {
return false;
}
}
/**
* Check if Office document is password protected
*/
async isOfficeDocPasswordProtected(filePath) {
try {
const buffer = await fs.readFile(filePath);
// Office documents are ZIP files, check for encryption
const content = buffer.toString('binary');
return content.includes('EncryptedPackage') || content.includes('EncryptionInfo');
}
catch (error) {
return false;
}
}
/**
* Attempt to decrypt password-protected file
*/
async attemptDecryption(email, filePath, fileExtension) {
const passwords = this.generatePasswordList(email);
for (const password of passwords) {
try {
const textContent = await this.decryptAndExtractText(filePath, fileExtension, password);
if (textContent) {
logger.log(`✅ Successfully decrypted ${path.basename(filePath)} with password strategy`);
return { success: true, textContent, usedPassword: password };
}
}
catch (error) {
// Continue to next password
}
}
return { success: false };
}
/**
* Generate password list based on email context
*/
generatePasswordList(email) {
const passwords = [];
for (const strategy of DocumentWorkflow.PASSWORD_STRATEGIES) {
if (strategy.generator) {
const generated = strategy.generator(email);
passwords.push(...generated);
}
else {
passwords.push(...strategy.passwords);
}
}
// Remove duplicates and limit attempts
const uniquePasswords = Array.from(new Set(passwords));
return uniquePasswords.slice(0, this.options.passwordAttempts);
}
/**
* Decrypt and extract text from password-protected file
*/
async decryptAndExtractText(filePath, fileExtension, password) {
// This is a simplified implementation
// In practice, you'd use libraries like pdf-lib, node-pdftk, or office-specific libraries
switch (fileExtension) {
case 'pdf':
return await this.decryptPdf(filePath, password);
case 'docx':
case 'xlsx':
case 'pptx':
return await this.decryptOfficeDoc(filePath, password);
default:
return null;
}
}
/**
* Decrypt PDF (placeholder implementation)
*/
async decryptPdf(filePath, password) {
// This would use a library like pdf-lib or pdf2pic with password support
// For now, return null indicating we couldn't decrypt
return null;
}
/**
* Decrypt Office document (placeholder implementation)
*/
async decryptOfficeDoc(filePath, password) {
// This would use a library that supports password-protected Office documents
// For now, return null indicating we couldn't decrypt
return null;
}
/**
* Extract text content from unprotected file
*/
async extractTextContent(filePath, fileExtension) {
try {
switch (fileExtension) {
case 'txt':
return await this.extractTextFromTxt(filePath);
case 'pdf':
return await this.extractTextFromPdf(filePath);
case 'docx':
return await this.extractTextFromDocx(filePath);
default:
return null;
}
}
catch (error) {
logger.error(`Error extracting text from ${filePath}:`, error);
return null;
}
}
/**
* Extract text from TXT file
*/
async extractTextFromTxt(filePath) {
const buffer = await fs.readFile(filePath);
return buffer.toString('utf8');
}
/**
* Extract text from PDF (placeholder implementation)
*/
async extractTextFromPdf(filePath) {
// This would use a library like pdf-parse or pdf2pic
// For now, return placeholder
return `[PDF text content from ${path.basename(filePath)}]`;
}
/**
* Extract text from DOCX (placeholder implementation)
*/
async extractTextFromDocx(filePath) {
// This would use a library like mammoth or docx-parser
// For now, return placeholder
return `[DOCX text content from ${path.basename(filePath)}]`;
}
/**
* Ensure temp directory exists
*/
async ensureTempDirectory() {
try {
await fs.mkdir(this.options.tempDir, { recursive: true });
}
catch (error) {
logger.error(`Error creating temp directory ${this.options.tempDir}:`, error);
}
}
/**
* Clean up temporary file
*/
async cleanupTempFile(filePath) {
try {
await fs.unlink(filePath);
}
catch (error) {
logger.error(`Error cleaning up temp file ${filePath}:`, error);
}
}
/**
* Clean up all temporary files
*/
async cleanupAllTempFiles() {
try {
const files = await fs.readdir(this.options.tempDir);
for (const file of files) {
const filePath = path.join(this.options.tempDir, file);
await this.cleanupTempFile(filePath);
}
}
catch (error) {
logger.error(`Error cleaning up temp directory:`, error);
}
}
/**
* Get processing statistics
*/
getProcessingStats(results) {
const stats = {
total: results.length,
successful: results.filter(r => r.status === 'success').length,
passwordProtected: results.filter(r => r.status === 'password_protected').length,
failed: results.filter(r => r.status === 'failed').length,
textExtracted: results.filter(r => r.metadata.extractedText).length,
averageProcessingTime: 0
};
if (results.length > 0) {
const totalTime = results.reduce((sum, r) => sum + r.metadata.processingTime, 0);
stats.averageProcessingTime = totalTime / results.length;
}
return stats;
}
}
DocumentWorkflow.DEFAULT_OPTIONS = {
tempDir: '/tmp/ms365-documents',
maxFileSize: 50 * 1024 * 1024, // 50MB
allowedTypes: ['pdf', 'docx', 'xlsx', 'pptx', 'txt', 'rtf'],
autoDecryptPDFs: true,
extractTextContent: true,
scanForMalware: false,
passwordAttempts: 10
};
// Common password strategies
DocumentWorkflow.PASSWORD_STRATEGIES = [
{
name: 'common_passwords',
passwords: ['password', '123456', 'admin', 'user', 'default', 'temp', 'document']
},
{
name: 'email_based',
passwords: [],
generator: (email) => {
const senderName = email.from.name.toLowerCase().replace(/\s+/g, '');
const senderFirstName = email.from.name.split(' ')[0].toLowerCase();
const domain = email.from.address.split('@')[1]?.toLowerCase() || '';
const year = new Date().getFullYear().toString();
const lastYear = (new Date().getFullYear() - 1).toString();
return [
senderName,
senderFirstName,
domain,
`${senderName}${year}`,
`${senderFirstName}${year}`,
`${senderName}${lastYear}`,
`${domain}${year}`,
email.from.address.toLowerCase(),
email.from.address.split('@')[0].toLowerCase()
];
}
},
{
name: 'date_based',
passwords: [],
generator: (email) => {
const emailDate = new Date(email.receivedDateTime);
const formats = [
emailDate.toISOString().split('T')[0], // YYYY-MM-DD
emailDate.toISOString().split('T')[0].replace(/-/g, ''), // YYYYMMDD
`${emailDate.getMonth() + 1}/${emailDate.getDate()}/${emailDate.getFullYear()}`,
`${emailDate.getDate()}/${emailDate.getMonth() + 1}/${emailDate.getFullYear()}`,
emailDate.getFullYear().toString(),
(emailDate.getFullYear() - 1).toString()
];
return formats;
}
},
{
name: 'subject_based',
passwords: [],
generator: (email) => {
const subject = email.subject.toLowerCase();
const words = subject.split(/\s+/).filter(word => word.length > 3);
const combinations = [];
// Add individual words
combinations.push(...words);
// Add word combinations
for (let i = 0; i < words.length - 1; i++) {
combinations.push(words[i] + words[i + 1]);
}
// Add numbers found in subject
const numbers = subject.match(/\d+/g) || [];
combinations.push(...numbers);
return combinations;
}
}
];