postal-mime
Version:
Email parser for Node.js and browser environments
326 lines (323 loc) • 9.66 kB
JavaScript
"use strict";
var __defProp = Object.defineProperty;
var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
var __getOwnPropNames = Object.getOwnPropertyNames;
var __hasOwnProp = Object.prototype.hasOwnProperty;
var __export = (target, all) => {
for (var name in all)
__defProp(target, name, { get: all[name], enumerable: true });
};
var __copyProps = (to, from, except, desc) => {
if (from && typeof from === "object" || typeof from === "function") {
for (let key of __getOwnPropNames(from))
if (!__hasOwnProp.call(to, key) && key !== except)
__defProp(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable });
}
return to;
};
var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
var address_parser_exports = {};
__export(address_parser_exports, {
default: () => address_parser_default
});
module.exports = __toCommonJS(address_parser_exports);
var import_decode_strings = require("./decode-strings.cjs");
function _handleAddress(tokens, depth) {
let isGroup = false;
let state = "text";
let address;
let addresses = [];
let data = {
address: [],
comment: [],
group: [],
text: [],
textWasQuoted: []
// Track which text tokens came from inside quotes
};
let i;
let len;
let insideQuotes = false;
for (i = 0, len = tokens.length; i < len; i++) {
let token = tokens[i];
let prevToken = i ? tokens[i - 1] : null;
if (token.type === "operator") {
switch (token.value) {
case "<":
state = "address";
insideQuotes = false;
break;
case "(":
state = "comment";
insideQuotes = false;
break;
case ":":
state = "group";
isGroup = true;
insideQuotes = false;
break;
case '"':
insideQuotes = !insideQuotes;
state = "text";
break;
default:
state = "text";
insideQuotes = false;
break;
}
} else if (token.value) {
if (state === "address") {
token.value = token.value.replace(/^[^<]*<\s*/, "");
}
if (prevToken && prevToken.noBreak && data[state].length) {
data[state][data[state].length - 1] += token.value;
if (state === "text" && insideQuotes) {
data.textWasQuoted[data.textWasQuoted.length - 1] = true;
}
} else {
data[state].push(token.value);
if (state === "text") {
data.textWasQuoted.push(insideQuotes);
}
}
}
}
if (!data.text.length && data.comment.length) {
data.text = data.comment;
data.comment = [];
}
if (isGroup) {
data.text = data.text.join(" ");
let groupMembers = [];
if (data.group.length) {
let parsedGroup = addressParser(data.group.join(","), { _depth: depth + 1 });
parsedGroup.forEach((member) => {
if (member.group) {
groupMembers = groupMembers.concat(member.group);
} else {
groupMembers.push(member);
}
});
}
addresses.push({
name: (0, import_decode_strings.decodeWords)(data.text || address && address.name),
group: groupMembers
});
} else {
if (!data.address.length && data.text.length) {
for (i = data.text.length - 1; i >= 0; i--) {
if (!data.textWasQuoted[i] && data.text[i].match(/^[^@\s]+@[^@\s]+$/)) {
data.address = data.text.splice(i, 1);
data.textWasQuoted.splice(i, 1);
break;
}
}
let _regexHandler = function(address2) {
if (!data.address.length) {
data.address = [address2.trim()];
return " ";
} else {
return address2;
}
};
if (!data.address.length) {
for (i = data.text.length - 1; i >= 0; i--) {
if (!data.textWasQuoted[i]) {
data.text[i] = data.text[i].replace(/\s*\b[^@\s]+@[^\s]+\b\s*/, _regexHandler).trim();
if (data.address.length) {
break;
}
}
}
}
}
if (!data.text.length && data.comment.length) {
data.text = data.comment;
data.comment = [];
}
if (data.address.length > 1) {
data.text = data.text.concat(data.address.splice(1));
}
data.text = data.text.join(" ");
data.address = data.address.join(" ");
if (!data.address && /^=\?[^=]+?=$/.test(data.text.trim())) {
const decodedText = (0, import_decode_strings.decodeWords)(data.text);
if (/<[^<>]+@[^<>]+>/.test(decodedText)) {
const parsedSubAddresses = addressParser(decodedText);
if (parsedSubAddresses && parsedSubAddresses.length) {
return parsedSubAddresses;
}
}
return [{ address: "", name: decodedText }];
}
address = {
address: data.address || data.text || "",
name: (0, import_decode_strings.decodeWords)(data.text || data.address || "")
};
if (address.address === address.name) {
if ((address.address || "").match(/@/)) {
address.name = "";
} else {
address.address = "";
}
}
addresses.push(address);
}
return addresses;
}
class Tokenizer {
constructor(str) {
this.str = (str || "").toString();
this.operatorCurrent = "";
this.operatorExpecting = "";
this.node = null;
this.escaped = false;
this.list = [];
this.operators = {
'"': '"',
"(": ")",
"<": ">",
",": "",
":": ";",
// Semicolons are not a legal delimiter per the RFC2822 grammar other
// than for terminating a group, but they are also not valid for any
// other use in this context. Given that some mail clients have
// historically allowed the semicolon as a delimiter equivalent to the
// comma in their UI, it makes sense to treat them the same as a comma
// when used outside of a group.
";": ""
};
}
/**
* Tokenizes the original input string
*
* @return {Array} An array of operator|text tokens
*/
tokenize() {
let list = [];
for (let i = 0, len = this.str.length; i < len; i++) {
let chr = this.str.charAt(i);
let nextChr = i < len - 1 ? this.str.charAt(i + 1) : null;
this.checkChar(chr, nextChr);
}
this.list.forEach((node) => {
node.value = (node.value || "").toString().trim();
if (node.value) {
list.push(node);
}
});
return list;
}
/**
* Checks if a character is an operator or text and acts accordingly
*
* @param {String} chr Character from the address field
*/
checkChar(chr, nextChr) {
if (this.escaped) {
} else if (chr === this.operatorExpecting) {
this.node = {
type: "operator",
value: chr
};
if (nextChr && ![" ", " ", "\r", "\n", ",", ";"].includes(nextChr)) {
this.node.noBreak = true;
}
this.list.push(this.node);
this.node = null;
this.operatorExpecting = "";
this.escaped = false;
return;
} else if (!this.operatorExpecting && chr in this.operators) {
this.node = {
type: "operator",
value: chr
};
this.list.push(this.node);
this.node = null;
this.operatorExpecting = this.operators[chr];
this.escaped = false;
return;
} else if (this.operatorExpecting === '"' && chr === "\\") {
this.escaped = true;
return;
}
if (!this.node) {
this.node = {
type: "text",
value: ""
};
this.list.push(this.node);
}
if (chr === "\n") {
chr = " ";
}
if (chr.charCodeAt(0) >= 33 || [" ", " "].includes(chr)) {
this.node.value += chr;
}
this.escaped = false;
}
}
const MAX_NESTED_GROUP_DEPTH = 50;
function addressParser(str, options) {
options = options || {};
let depth = options._depth || 0;
if (depth > MAX_NESTED_GROUP_DEPTH) {
return [];
}
let tokenizer = new Tokenizer(str);
let tokens = tokenizer.tokenize();
let addresses = [];
let address = [];
let parsedAddresses = [];
tokens.forEach((token) => {
if (token.type === "operator" && (token.value === "," || token.value === ";")) {
if (address.length) {
addresses.push(address);
}
address = [];
} else {
address.push(token);
}
});
if (address.length) {
addresses.push(address);
}
addresses.forEach((address2) => {
address2 = _handleAddress(address2, depth);
if (address2.length) {
parsedAddresses = parsedAddresses.concat(address2);
}
});
if (options.flatten) {
let addresses2 = [];
let walkAddressList = (list) => {
list.forEach((address2) => {
if (address2.group) {
return walkAddressList(address2.group);
} else {
addresses2.push(address2);
}
});
};
walkAddressList(parsedAddresses);
return addresses2;
}
return parsedAddresses;
}
var address_parser_default = addressParser;
// Make default export work naturally with require()
if (module.exports.default) {
var defaultExport = module.exports.default;
var namedExports = {};
for (var key in module.exports) {
if (key !== 'default' && key !== '__esModule') {
namedExports[key] = module.exports[key];
}
}
module.exports = defaultExport;
Object.assign(module.exports, namedExports);
// Preserve __esModule and .default for bundler/transpiler interop
Object.defineProperty(module.exports, '__esModule', { value: true });
module.exports.default = defaultExport;
}