UNPKG

partial-emlx-converter

Version:

Convert .emlx and .partial.emlx files created by Apple’s Mail.app to .eml

387 lines (386 loc) 16.7 kB
"use strict"; Object.defineProperty(exports, "__esModule", { value: true }); exports.processCli = exports.SkipEmlxTransform = exports.EmlxFlagNames = exports.processEmlx = exports.imapImport = exports.processEmlxs = void 0; const fs = require("fs"); const glob = require("glob"); const stream = require("stream"); const path = require("path"); const ProgressBar = require("progress"); const util = require("util"); const commander = require("commander"); const plist = require("plist"); const imap = require("imapflow"); // @ts-ignore const mailsplit_1 = require("mailsplit"); const stream_1 = require("stream"); const Debug = require("debug"); const debug = Debug('converter'); class DeletedMessageError extends Error { constructor(...args) { super(...args); this.name = 'DeletedMessageError'; } } async function setupEnv(inputDir) { const files = await util.promisify(glob)('**/*.emlx', { cwd: inputDir }); const bar = new ProgressBar('Converting [:bar] :percent :etas :file', { total: files.length, width: 40 }); return { files, bar }; } async function processEmlxs(inputDir, outputDir, ignoreErrors, skipDeleted) { const { files, bar } = await setupEnv(inputDir); for (const file of files) { bar.tick({ file }); const resultPath = path.join(outputDir, `${stripExtension(path.basename(file))}.eml`); try { const writeStream = fs.createWriteStream(resultPath); const res = await processEmlx(path.join(inputDir, file), writeStream, ignoreErrors, skipDeleted); res.messages.forEach(message => bar.interrupt(`${file}: ${message}`)); } catch (e) { if (e instanceof DeletedMessageError && e.message == 'DELETED') { bar.interrupt(`${file}: Message is marked as deleted (skipped)`); fs.unlinkSync(resultPath); continue; } bar.interrupt(`Encountered error when processing ${file} -- run with '--ignoreErrors' argument to avoid aborting the conversion.`); bar.terminate(); throw e; } } } exports.processEmlxs = processEmlxs; async function imapImport(inputDir, options) { const conn = new imap.ImapFlow({ host: options.host, port: options.port, auth: { user: options.user, pass: options.pass }, secure: options.tls == 'yes', logger: false }); await conn.connect(); const { files, bar } = await setupEnv(inputDir); try { for (const file of files) { bar.tick({ file }); try { let writeStream; const writeStreamCollector = new Promise(resolve => { writeStream = new (class extends stream.Writable { constructor() { super(...arguments); // _write(chunk: any, encoding: string, callback: (error?: Error | null) => void): void; this._buf = []; } _write(chunk, _encoding, callback) { this._buf.push(chunk); callback(); } _final(callback) { resolve(Buffer.concat(this._buf)); callback(); } })(); }); const res = await processEmlx(path.join(inputDir, file), // eslint-disable-next-line @typescript-eslint/no-non-null-assertion writeStream, options.ignoreErrors, options.skipDeleted); res.messages.forEach(message => bar.interrupt(`${file}: ${message}`)); const msgData = await writeStreamCollector; const dateRecvTS = res.plData['date-received']; let dateRecv; if (!dateRecvTS) { throw new Error('no date-received in plist!'); // we could get 'Date' from message headers als fallback when this happens. } else { // Hint: this is the fix the timestamp to UTC dateRecv = new Date(dateRecvTS * 1000); const tzo = dateRecv.getTimezoneOffset(); dateRecv = new Date((dateRecvTS - tzo) * 1000); } // map emlx flags to imap flags const imapFlags = []; for (const flag of res.flags) { if (flag == 'read') imapFlags.push('\\Seen'); else if (flag == 'answered') imapFlags.push('\\Answered'); else if (flag == 'deleted') imapFlags.push('\\Deleted'); else if (flag == 'draft') imapFlags.push('\\Draft'); else if (flag == 'flagged') imapFlags.push('\\Flagged'); } await conn.append(options.mailbox, msgData, imapFlags, dateRecv); } catch (e) { if (e instanceof DeletedMessageError && e.message == 'DELETED') { bar.interrupt(`${file}: Message is marked as deleted (skipped)`); continue; } if (e instanceof Error) { bar.interrupt(`Caught Error: ${e.message}`); if (e.message.startsWith('Could not get attachment')) { bar.interrupt(`Encountered error when processing ${file} -- run with '--ignoreErrors' argument to avoid aborting the conversion.`); } } bar.terminate(); throw e; } } } finally { await conn.logout(); } } exports.imapImport = imapImport; // 'X-Apple-Content-Length' denotes an external attachment in case of .partial.emlx const appleContentLengthHeader = 'X-Apple-Content-Length'; /** * Process a single .emlx or .partial.emlx file. * * @param emlxFile Path to the file. * @param resultStream The stream to which to write the result. * @param ignoreErrors `true` to suppress throwing errors * (e.g. when attachment is missing). In this case, the * result array will contain a list of errors. * @returns List of error messages (when `ignoreErrors` was enabled) */ async function processEmlx(emlxFile, resultStream, ignoreErrors = false, skipDeleted = false) { const messages = []; // see here for a an example how to implement the Rewriter: // https://github.com/andris9/mailsplit/blob/master/examples/rewrite-html.js // eslint-disable-next-line @typescript-eslint/no-explicit-any const rewriter = new mailsplit_1.Rewriter((node) => node.headers.hasHeader(appleContentLengthHeader)); // eslint-disable-next-line @typescript-eslint/no-explicit-any rewriter.on('node', (data) => { data.node.headers.remove(appleContentLengthHeader); data.decoder.on('data', () => { // no op (callback needs to be here though!) }); data.decoder.on('end', () => { // console.log(`\n\n${emlxFile} ${JSON.stringify(data.node.parentNode.headers.lines, null, 2)}\n\n`); integrateAttachment(emlxFile, data).catch(err => { // propagate error event if (ignoreErrors) { // just store in `messages` messages.push(err.message); } else { // emit (and then throw) rewriter.emit('error', err); } }); }); }); const emlxTransform = new SkipEmlxTransform(skipDeleted); await util.promisify(stream_1.pipeline)(fs.createReadStream(emlxFile), emlxTransform, new mailsplit_1.Splitter(), rewriter, new mailsplit_1.Joiner(), resultStream); return { messages: messages, flags: emlxTransform.flags, plData: emlxTransform.plData }; } exports.processEmlx = processEmlx; // eslint-disable-next-line @typescript-eslint/no-explicit-any async function integrateAttachment(emlxFile, data) { const attachmentDirectoryPath = path.join(path.dirname(emlxFile), '..', 'Attachments', stripExtension(path.basename(emlxFile)), data.node.partNr.join('.') // e.g. array [1, 1, 2] ); // first try to get the name as explicitly specified in the email text // (this seems like the most reliable way), but if that does not work, // check the `Attachments` directory structure. See: // https://github.com/qqilihq/partial-emlx-converter/issues/3 const fileNames = [data.node.filename, await getFilenameFromFileSystem(attachmentDirectoryPath)].filter((f) => !!f); let processedAttachment = false; for (const fileName of fileNames) { const filePath = path.join(attachmentDirectoryPath, fileName); try { await new Promise((resolve, reject) => { const stream = fs.createReadStream(filePath); stream.on('error', error => reject(error)); stream.on('close', () => resolve()); stream.pipe(data.encoder); }); processedAttachment = true; break; } catch (e) { // ignore here, keep trying } } if (!processedAttachment) { data.encoder.end(); let message = 'Could not get attachment file'; if (fileNames.length > 0) { message += ` (tried ${fileNames.join(', ')})`; } throw new Error(message); } } /** * In case we cannot extract the attachment filename from the * email, we detrmine it by looking into the file system. We * expect, that the corresponding attachment directory * (e.g. `1.2`) contains exactly *one* file (ignoring `.DS_Store`). * * This is necessary, because Mail.app uses a language-specific * default name for attachments without explicitly given * file name (e.g. 'Mail-Anhang.jpeg' on a German system). * * @param pathToDirectory Path to the attachment directory (e.g. `.../1.2`) * @returns The filname, or `null` in case it could not be determined. */ async function getFilenameFromFileSystem(pathToDirectory) { try { // ignore `.DS_Store` const files = (await fs.promises.readdir(pathToDirectory)).filter(file => !file.startsWith('.DS_Store')); if (files.length !== 1) { const filenames = files.length > 0 ? `(${files.join(', ')})` : ''; debug(`Couldn’t determine attachment; expected '${pathToDirectory}' ` + `to contain one file, but there were: ${files.length} ${filenames}`); return null; } else { return files[0]; } } catch (e) { debug(`Couldn’t read attachments in '${pathToDirectory}'`); return null; } } function stripExtension(fileName) { return fileName.replace(/\..*/, ''); } exports.EmlxFlagNames = [ 'read', 'deleted', 'answered', 'encrypted', 'flagged', 'recent', 'draft', 'initial', 'forwarded', 'redirected', 'signed', 'junk', 'notJunk' ]; // emlx file contain the length of the 'payload' in the first line; // this allows to strip away the plist epilogue at the end of the // files easily class SkipEmlxTransform extends stream_1.Transform { constructor(skipDeleted = false) { super(); this.bytesToRead = undefined; this.bytesRead = 0; this.plistChunks = []; this.flags = []; this.plData = {}; this.skipDeleted = skipDeleted; } _transform(chunk, _encoding, callback) { let offset; let length; if (!this.bytesToRead) { const payloadLengthMatch = /^(\d+)\s+/.exec(chunk.toString('utf8')); if (!payloadLengthMatch) { // XXX first chunk could theoretically be smaller, // then we’d need to buffer the chunks until the // first linebreak -- seems unlikely though. return callback(new Error('Invalid structure; content did not start with payload length')); } this.bytesToRead = parseInt(payloadLengthMatch[1], 10); offset = payloadLengthMatch[0].length; length = Math.min(this.bytesToRead + offset, chunk.length); } else { offset = 0; length = Math.min(this.bytesToRead - this.bytesRead, chunk.length); } let slicedChunk = chunk.slice(offset, length); this.bytesRead += slicedChunk.length; if (this.bytesRead === this.bytesToRead) { // fix for #5 -- an end boundary string which is only terminated // with a single '-' is corrected to double '--' here const temp = slicedChunk.toString('utf8'); if (temp.endsWith('-') && !temp.endsWith('--')) { const nextChars = chunk.slice(length, length + 5).toString('utf8'); if (nextChars === '<?xml') { slicedChunk = Buffer.concat([slicedChunk, Buffer.from('-')]); } } this.plistChunks.push(chunk.slice(length, chunk.length)); } callback(undefined, slicedChunk); } _flush(callback) { // we parse & process the trailing plist data from the emlx file const plistDict = Buffer.concat(this.plistChunks).toString('utf8'); try { this.plData = plist.parse(plistDict); // the flags are documented here: https://docs.fileformat.com/email/emlx/ const flagsVal = this.plData['flags']; let flagBit = 0; for (const flagName of exports.EmlxFlagNames) { const mask = 1 << flagBit; if (flagsVal & mask) { this.flags.push(flagName); } if (flagBit == 9) { // flags jump from bit 9 to bit 23 (10-15: attachment count; 16-22: prio) flagBit = 23; } else { flagBit++; } } } catch (_a) { // ignore plist parsing errors } // skip deleted messages if (this.skipDeleted && this.flags.includes('deleted')) { callback(new DeletedMessageError('DELETED')); } else { callback(); } } } exports.SkipEmlxTransform = SkipEmlxTransform; function processCli() { const program = new commander.Command(); program.name('partial-emlx-converter').description('Read .emlx files and convert them to .eml files'); program .command('convert', { isDefault: true }) .description('convert .emlx-files from input folder to .eml files in output folder') .option('--ignoreErrors', "Don't abort the conversion on error (see the log output for details in this case)") .option('--skipDeleted', 'Skip messages marked as deleted') .argument('<input_directory>', 'input folder to read .emlx-files from') .argument('<output_directory>', 'output folder for .eml-files') .action((inputDir, outputDir, options) => { processEmlxs(inputDir, outputDir, options.ignoreErrors, options.skipDeleted).catch(err => console.error(err)); }); program .command('imapImport') .description('Import mails from emlx to IMAP server') .option('-p,--port <port_number>', 'IMAP port', parseInt, 993) .requiredOption('-u,--user <username>', 'User for IMAP authentication') .addOption(new commander.Option('--pass <password>', 'Password for IMAP authentication') .makeOptionMandatory(true) .env('IMAP_PASS')) .requiredOption('-h,--host <hostname>', 'IMAP server hostname') .requiredOption('-m,--mailbox <mailbox>', 'IMAP mailbox to import mails into', 'import') .addOption(new commander.Option('--tls <mode>', 'Use `no` to disable TLS') .choices(['yes', 'no']) .default('yes', 'tls enabled')) .option('--skipDeleted', 'Skip messages marked as deleted') .option('--ignoreErrors', "Don't abort conversion on error (see the log output for details in this case)") .argument('<input_directory>', 'input folder to read .emlx-files from') .action((input, options) => { imapImport(input, options).catch(err => console.error(err)); }); program.parse(process.argv); } exports.processCli = processCli;