UNPKG

postal-mime

Version:

Email parser for Node.js and browser environments

326 lines (323 loc) 9.66 kB
"use strict"; var __defProp = Object.defineProperty; var __getOwnPropDesc = Object.getOwnPropertyDescriptor; var __getOwnPropNames = Object.getOwnPropertyNames; var __hasOwnProp = Object.prototype.hasOwnProperty; var __export = (target, all) => { for (var name in all) __defProp(target, name, { get: all[name], enumerable: true }); }; var __copyProps = (to, from, except, desc) => { if (from && typeof from === "object" || typeof from === "function") { for (let key of __getOwnPropNames(from)) if (!__hasOwnProp.call(to, key) && key !== except) __defProp(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable }); } return to; }; var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod); var address_parser_exports = {}; __export(address_parser_exports, { default: () => address_parser_default }); module.exports = __toCommonJS(address_parser_exports); var import_decode_strings = require("./decode-strings.cjs"); function _handleAddress(tokens, depth) { let isGroup = false; let state = "text"; let address; let addresses = []; let data = { address: [], comment: [], group: [], text: [], textWasQuoted: [] // Track which text tokens came from inside quotes }; let i; let len; let insideQuotes = false; for (i = 0, len = tokens.length; i < len; i++) { let token = tokens[i]; let prevToken = i ? tokens[i - 1] : null; if (token.type === "operator") { switch (token.value) { case "<": state = "address"; insideQuotes = false; break; case "(": state = "comment"; insideQuotes = false; break; case ":": state = "group"; isGroup = true; insideQuotes = false; break; case '"': insideQuotes = !insideQuotes; state = "text"; break; default: state = "text"; insideQuotes = false; break; } } else if (token.value) { if (state === "address") { token.value = token.value.replace(/^[^<]*<\s*/, ""); } if (prevToken && prevToken.noBreak && data[state].length) { data[state][data[state].length - 1] += token.value; if (state === "text" && insideQuotes) { data.textWasQuoted[data.textWasQuoted.length - 1] = true; } } else { data[state].push(token.value); if (state === "text") { data.textWasQuoted.push(insideQuotes); } } } } if (!data.text.length && data.comment.length) { data.text = data.comment; data.comment = []; } if (isGroup) { data.text = data.text.join(" "); let groupMembers = []; if (data.group.length) { let parsedGroup = addressParser(data.group.join(","), { _depth: depth + 1 }); parsedGroup.forEach((member) => { if (member.group) { groupMembers = groupMembers.concat(member.group); } else { groupMembers.push(member); } }); } addresses.push({ name: (0, import_decode_strings.decodeWords)(data.text || address && address.name), group: groupMembers }); } else { if (!data.address.length && data.text.length) { for (i = data.text.length - 1; i >= 0; i--) { if (!data.textWasQuoted[i] && data.text[i].match(/^[^@\s]+@[^@\s]+$/)) { data.address = data.text.splice(i, 1); data.textWasQuoted.splice(i, 1); break; } } let _regexHandler = function(address2) { if (!data.address.length) { data.address = [address2.trim()]; return " "; } else { return address2; } }; if (!data.address.length) { for (i = data.text.length - 1; i >= 0; i--) { if (!data.textWasQuoted[i]) { data.text[i] = data.text[i].replace(/\s*\b[^@\s]+@[^\s]+\b\s*/, _regexHandler).trim(); if (data.address.length) { break; } } } } } if (!data.text.length && data.comment.length) { data.text = data.comment; data.comment = []; } if (data.address.length > 1) { data.text = data.text.concat(data.address.splice(1)); } data.text = data.text.join(" "); data.address = data.address.join(" "); if (!data.address && /^=\?[^=]+?=$/.test(data.text.trim())) { const decodedText = (0, import_decode_strings.decodeWords)(data.text); if (/<[^<>]+@[^<>]+>/.test(decodedText)) { const parsedSubAddresses = addressParser(decodedText); if (parsedSubAddresses && parsedSubAddresses.length) { return parsedSubAddresses; } } return [{ address: "", name: decodedText }]; } address = { address: data.address || data.text || "", name: (0, import_decode_strings.decodeWords)(data.text || data.address || "") }; if (address.address === address.name) { if ((address.address || "").match(/@/)) { address.name = ""; } else { address.address = ""; } } addresses.push(address); } return addresses; } class Tokenizer { constructor(str) { this.str = (str || "").toString(); this.operatorCurrent = ""; this.operatorExpecting = ""; this.node = null; this.escaped = false; this.list = []; this.operators = { '"': '"', "(": ")", "<": ">", ",": "", ":": ";", // Semicolons are not a legal delimiter per the RFC2822 grammar other // than for terminating a group, but they are also not valid for any // other use in this context. Given that some mail clients have // historically allowed the semicolon as a delimiter equivalent to the // comma in their UI, it makes sense to treat them the same as a comma // when used outside of a group. ";": "" }; } /** * Tokenizes the original input string * * @return {Array} An array of operator|text tokens */ tokenize() { let list = []; for (let i = 0, len = this.str.length; i < len; i++) { let chr = this.str.charAt(i); let nextChr = i < len - 1 ? this.str.charAt(i + 1) : null; this.checkChar(chr, nextChr); } this.list.forEach((node) => { node.value = (node.value || "").toString().trim(); if (node.value) { list.push(node); } }); return list; } /** * Checks if a character is an operator or text and acts accordingly * * @param {String} chr Character from the address field */ checkChar(chr, nextChr) { if (this.escaped) { } else if (chr === this.operatorExpecting) { this.node = { type: "operator", value: chr }; if (nextChr && ![" ", " ", "\r", "\n", ",", ";"].includes(nextChr)) { this.node.noBreak = true; } this.list.push(this.node); this.node = null; this.operatorExpecting = ""; this.escaped = false; return; } else if (!this.operatorExpecting && chr in this.operators) { this.node = { type: "operator", value: chr }; this.list.push(this.node); this.node = null; this.operatorExpecting = this.operators[chr]; this.escaped = false; return; } else if (this.operatorExpecting === '"' && chr === "\\") { this.escaped = true; return; } if (!this.node) { this.node = { type: "text", value: "" }; this.list.push(this.node); } if (chr === "\n") { chr = " "; } if (chr.charCodeAt(0) >= 33 || [" ", " "].includes(chr)) { this.node.value += chr; } this.escaped = false; } } const MAX_NESTED_GROUP_DEPTH = 50; function addressParser(str, options) { options = options || {}; let depth = options._depth || 0; if (depth > MAX_NESTED_GROUP_DEPTH) { return []; } let tokenizer = new Tokenizer(str); let tokens = tokenizer.tokenize(); let addresses = []; let address = []; let parsedAddresses = []; tokens.forEach((token) => { if (token.type === "operator" && (token.value === "," || token.value === ";")) { if (address.length) { addresses.push(address); } address = []; } else { address.push(token); } }); if (address.length) { addresses.push(address); } addresses.forEach((address2) => { address2 = _handleAddress(address2, depth); if (address2.length) { parsedAddresses = parsedAddresses.concat(address2); } }); if (options.flatten) { let addresses2 = []; let walkAddressList = (list) => { list.forEach((address2) => { if (address2.group) { return walkAddressList(address2.group); } else { addresses2.push(address2); } }); }; walkAddressList(parsedAddresses); return addresses2; } return parsedAddresses; } var address_parser_default = addressParser; // Make default export work naturally with require() if (module.exports.default) { var defaultExport = module.exports.default; var namedExports = {}; for (var key in module.exports) { if (key !== 'default' && key !== '__esModule') { namedExports[key] = module.exports[key]; } } module.exports = defaultExport; Object.assign(module.exports, namedExports); // Preserve __esModule and .default for bundler/transpiler interop Object.defineProperty(module.exports, '__esModule', { value: true }); module.exports.default = defaultExport; }