postal-mime
Version:
Email parser for browser environments
536 lines (438 loc) • 19 kB
JavaScript
import MimeNode from './mime-node.js';
import { textToHtml, htmlToText, formatTextHeader, formatHtmlHeader } from './text-format.js';
import addressParser from './address-parser.js';
import { decodeWords, textEncoder, blobToArrayBuffer } from './decode-strings.js';
import { base64ArrayBuffer } from './base64-encoder.js';
export { addressParser, decodeWords };
export default class PostalMime {
static parse(buf, options) {
const parser = new PostalMime(options);
return parser.parse(buf);
}
constructor(options) {
this.options = options || {};
this.root = this.currentNode = new MimeNode({
postalMime: this
});
this.boundaries = [];
this.textContent = {};
this.attachments = [];
this.attachmentEncoding =
(this.options.attachmentEncoding || '')
.toString()
.replace(/[-_\s]/g, '')
.trim()
.toLowerCase() || 'arraybuffer';
this.started = false;
}
async finalize() {
// close all pending nodes
await this.root.finalize();
}
async processLine(line, isFinal) {
let boundaries = this.boundaries;
// check if this is a mime boundary
if (boundaries.length && line.length > 2 && line[0] === 0x2d && line[1] === 0x2d) {
// could be a boundary marker
for (let i = boundaries.length - 1; i >= 0; i--) {
let boundary = boundaries[i];
if (line.length !== boundary.value.length + 2 && line.length !== boundary.value.length + 4) {
continue;
}
let isTerminator = line.length === boundary.value.length + 4;
if (isTerminator && (line[line.length - 2] !== 0x2d || line[line.length - 1] !== 0x2d)) {
continue;
}
let boudaryMatches = true;
for (let i = 0; i < boundary.value.length; i++) {
if (line[i + 2] !== boundary.value[i]) {
boudaryMatches = false;
break;
}
}
if (!boudaryMatches) {
continue;
}
if (isTerminator) {
await boundary.node.finalize();
this.currentNode = boundary.node.parentNode || this.root;
} else {
// finalize any open child nodes (should be just one though)
await boundary.node.finalizeChildNodes();
this.currentNode = new MimeNode({
postalMime: this,
parentNode: boundary.node
});
}
if (isFinal) {
return this.finalize();
}
return;
}
}
this.currentNode.feed(line);
if (isFinal) {
return this.finalize();
}
}
readLine() {
let startPos = this.readPos;
let endPos = this.readPos;
let res = () => {
return {
bytes: new Uint8Array(this.buf, startPos, endPos - startPos),
done: this.readPos >= this.av.length
};
};
while (this.readPos < this.av.length) {
const c = this.av[this.readPos++];
if (c !== 0x0d && c !== 0x0a) {
endPos = this.readPos;
}
if (c === 0x0a) {
return res();
}
}
return res();
}
async processNodeTree() {
// get text nodes
let textContent = {};
let textTypes = new Set();
let textMap = (this.textMap = new Map());
let forceRfc822Attachments = this.forceRfc822Attachments();
let walk = async (node, alternative, related) => {
alternative = alternative || false;
related = related || false;
if (!node.contentType.multipart) {
// is it inline message/rfc822
if (this.isInlineMessageRfc822(node) && !forceRfc822Attachments) {
const subParser = new PostalMime();
node.subMessage = await subParser.parse(node.content);
if (!textMap.has(node)) {
textMap.set(node, {});
}
let textEntry = textMap.get(node);
// default to text if there is no content
if (node.subMessage.text || !node.subMessage.html) {
textEntry.plain = textEntry.plain || [];
textEntry.plain.push({ type: 'subMessage', value: node.subMessage });
textTypes.add('plain');
}
if (node.subMessage.html) {
textEntry.html = textEntry.html || [];
textEntry.html.push({ type: 'subMessage', value: node.subMessage });
textTypes.add('html');
}
if (subParser.textMap) {
subParser.textMap.forEach((subTextEntry, subTextNode) => {
textMap.set(subTextNode, subTextEntry);
});
}
for (let attachment of node.subMessage.attachments || []) {
this.attachments.push(attachment);
}
}
// is it text?
else if (this.isInlineTextNode(node)) {
let textType = node.contentType.parsed.value.substr(node.contentType.parsed.value.indexOf('/') + 1);
let selectorNode = alternative || node;
if (!textMap.has(selectorNode)) {
textMap.set(selectorNode, {});
}
let textEntry = textMap.get(selectorNode);
textEntry[textType] = textEntry[textType] || [];
textEntry[textType].push({ type: 'text', value: node.getTextContent() });
textTypes.add(textType);
}
// is it an attachment
else if (node.content) {
const filename = node.contentDisposition.parsed.params.filename || node.contentType.parsed.params.name || null;
const attachment = {
filename: filename ? decodeWords(filename) : null,
mimeType: node.contentType.parsed.value,
disposition: node.contentDisposition.parsed.value || null
};
if (related && node.contentId) {
attachment.related = true;
}
if (node.contentDescription) {
attachment.description = node.contentDescription;
}
if (node.contentId) {
attachment.contentId = node.contentId;
}
switch (node.contentType.parsed.value) {
// Special handling for calendar events
case 'text/calendar':
case 'application/ics': {
if (node.contentType.parsed.params.method) {
attachment.method = node.contentType.parsed.params.method.toString().toUpperCase().trim();
}
// Enforce into unicode
const decodedText = node.getTextContent().replace(/\r?\n/g, '\n').replace(/\n*$/, '\n');
attachment.content = textEncoder.encode(decodedText);
break;
}
// Regular attachments
default:
attachment.content = node.content;
}
this.attachments.push(attachment);
}
} else if (node.contentType.multipart === 'alternative') {
alternative = node;
} else if (node.contentType.multipart === 'related') {
related = node;
}
for (let childNode of node.childNodes) {
await walk(childNode, alternative, related);
}
};
await walk(this.root, false, []);
textMap.forEach(mapEntry => {
textTypes.forEach(textType => {
if (!textContent[textType]) {
textContent[textType] = [];
}
if (mapEntry[textType]) {
mapEntry[textType].forEach(textEntry => {
switch (textEntry.type) {
case 'text':
textContent[textType].push(textEntry.value);
break;
case 'subMessage':
{
switch (textType) {
case 'html':
textContent[textType].push(formatHtmlHeader(textEntry.value));
break;
case 'plain':
textContent[textType].push(formatTextHeader(textEntry.value));
break;
}
}
break;
}
});
} else {
let alternativeType;
switch (textType) {
case 'html':
alternativeType = 'plain';
break;
case 'plain':
alternativeType = 'html';
break;
}
(mapEntry[alternativeType] || []).forEach(textEntry => {
switch (textEntry.type) {
case 'text':
switch (textType) {
case 'html':
textContent[textType].push(textToHtml(textEntry.value));
break;
case 'plain':
textContent[textType].push(htmlToText(textEntry.value));
break;
}
break;
case 'subMessage':
{
switch (textType) {
case 'html':
textContent[textType].push(formatHtmlHeader(textEntry.value));
break;
case 'plain':
textContent[textType].push(formatTextHeader(textEntry.value));
break;
}
}
break;
}
});
}
});
});
Object.keys(textContent).forEach(textType => {
textContent[textType] = textContent[textType].join('\n');
});
this.textContent = textContent;
}
isInlineTextNode(node) {
if (node.contentDisposition.parsed.value === 'attachment') {
// no matter the type, this is an attachment
return false;
}
switch (node.contentType.parsed.value) {
case 'text/html':
case 'text/plain':
return true;
case 'text/calendar':
case 'text/csv':
default:
return false;
}
}
isInlineMessageRfc822(node) {
if (node.contentType.parsed.value !== 'message/rfc822') {
return false;
}
let disposition = node.contentDisposition.parsed.value || (this.options.rfc822Attachments ? 'attachment' : 'inline');
return disposition === 'inline';
}
// Check if this is a specially crafted report email where message/rfc822 content should not be inlined
forceRfc822Attachments() {
if (this.options.forceRfc822Attachments) {
return true;
}
let forceRfc822Attachments = false;
let walk = node => {
if (!node.contentType.multipart) {
if (['message/delivery-status', 'message/feedback-report'].includes(node.contentType.parsed.value)) {
forceRfc822Attachments = true;
}
}
for (let childNode of node.childNodes) {
walk(childNode);
}
};
walk(this.root);
return forceRfc822Attachments;
}
async resolveStream(stream) {
let chunkLen = 0;
let chunks = [];
const reader = stream.getReader();
while (true) {
const { done, value } = await reader.read();
if (done) {
break;
}
chunks.push(value);
chunkLen += value.length;
}
const result = new Uint8Array(chunkLen);
let chunkPointer = 0;
for (let chunk of chunks) {
result.set(chunk, chunkPointer);
chunkPointer += chunk.length;
}
return result;
}
async parse(buf) {
if (this.started) {
throw new Error('Can not reuse parser, create a new PostalMime object');
}
this.started = true;
// Check if the input is a readable stream and resolve it into an ArrayBuffer
if (buf && typeof buf.getReader === 'function') {
buf = await this.resolveStream(buf);
}
// Should it throw for an empty value instead of defaulting to an empty ArrayBuffer?
buf = buf || new ArrayBuffer(0);
// Cast string input to Uint8Array
if (typeof buf === 'string') {
buf = textEncoder.encode(buf);
}
// Cast Blob to ArrayBuffer
if (buf instanceof Blob || Object.prototype.toString.call(buf) === '[object Blob]') {
buf = await blobToArrayBuffer(buf);
}
// Cast Node.js Buffer object or Uint8Array into ArrayBuffer
if (buf.buffer instanceof ArrayBuffer) {
buf = new Uint8Array(buf).buffer;
}
this.buf = buf;
this.av = new Uint8Array(buf);
this.readPos = 0;
while (this.readPos < this.av.length) {
const line = this.readLine();
await this.processLine(line.bytes, line.done);
}
await this.processNodeTree();
const message = {
headers: this.root.headers.map(entry => ({ key: entry.key, value: entry.value })).reverse()
};
for (const key of ['from', 'sender']) {
const addressHeader = this.root.headers.find(line => line.key === key);
if (addressHeader && addressHeader.value) {
const addresses = addressParser(addressHeader.value);
if (addresses && addresses.length) {
message[key] = addresses[0];
}
}
}
for (const key of ['delivered-to', 'return-path']) {
const addressHeader = this.root.headers.find(line => line.key === key);
if (addressHeader && addressHeader.value) {
const addresses = addressParser(addressHeader.value);
if (addresses && addresses.length && addresses[0].address) {
const camelKey = key.replace(/\-(.)/g, (o, c) => c.toUpperCase());
message[camelKey] = addresses[0].address;
}
}
}
for (const key of ['to', 'cc', 'bcc', 'reply-to']) {
const addressHeaders = this.root.headers.filter(line => line.key === key);
let addresses = [];
addressHeaders
.filter(entry => entry && entry.value)
.map(entry => addressParser(entry.value))
.forEach(parsed => (addresses = addresses.concat(parsed || [])));
if (addresses && addresses.length) {
const camelKey = key.replace(/\-(.)/g, (o, c) => c.toUpperCase());
message[camelKey] = addresses;
}
}
for (const key of ['subject', 'message-id', 'in-reply-to', 'references']) {
const header = this.root.headers.find(line => line.key === key);
if (header && header.value) {
const camelKey = key.replace(/\-(.)/g, (o, c) => c.toUpperCase());
message[camelKey] = decodeWords(header.value);
}
}
let dateHeader = this.root.headers.find(line => line.key === 'date');
if (dateHeader) {
let date = new Date(dateHeader.value);
if (!date || date.toString() === 'Invalid Date') {
date = dateHeader.value;
} else {
// enforce ISO format if seems to be a valid date
date = date.toISOString();
}
message.date = date;
}
if (this.textContent?.html) {
message.html = this.textContent.html;
}
if (this.textContent?.plain) {
message.text = this.textContent.plain;
}
message.attachments = this.attachments;
switch (this.attachmentEncoding) {
case 'arraybuffer':
break;
case 'base64':
for (let attachment of message.attachments || []) {
if (attachment?.content) {
attachment.content = base64ArrayBuffer(attachment.content);
attachment.encoding = 'base64';
}
}
break;
case 'utf8':
let attachmentDecoder = new TextDecoder('utf8');
for (let attachment of message.attachments || []) {
if (attachment?.content) {
attachment.content = attachmentDecoder.decode(attachment.content);
attachment.encoding = 'utf8';
}
}
break;
default:
throw new Error('Unknwon attachment encoding');
}
return message;
}
}