UNPKG

imapflow

Version:

IMAP Client for Node

712 lines (617 loc) 35.6 kB
/* eslint new-cap: 0 */ 'use strict'; const imapFormalSyntax = require('./imap-formal-syntax'); const STATE_ATOM = 0x001; const STATE_LITERAL = 0x002; const STATE_NORMAL = 0x003; const STATE_PARTIAL = 0x004; const STATE_SEQUENCE = 0x005; const STATE_STRING = 0x006; const STATE_TEXT = 0x007; const RE_DIGITS = /^\d+$/; const RE_SINGLE_DIGIT = /^\d$/; // Prevents stack overflow from maliciously crafted deeply-nested IMAP input (e.g., (((((...)))))) const MAX_NODE_DEPTH = 25; /** * Tokenizes an IMAP attribute string into a tree of typed nodes. * Handles all IMAP data types: atoms, quoted strings, literals (including literal8), * sequences, lists (parenthesized groups), sections (bracketed groups), and partial ranges. * Enforces a maximum nesting depth of {@link MAX_NODE_DEPTH} to prevent stack overflow * from malicious input. */ class TokenParser { /** * Creates a new TokenParser. * * @param {ParserInstance} parent - The parent ParserInstance that owns this token parser. Used to access the parsed command for context-sensitive parsing. * @param {number} startPos - The starting position offset in the original input, used for error reporting. * @param {string} str - The attribute string to tokenize. * @param {Object} [options] - Parser options. * @param {boolean} [options.literalPlus] - Whether the LITERAL+ extension is in use. * @param {Array<Buffer>} [options.literals] - Pre-parsed literal values from the input stream. */ constructor(parent, startPos, str, options) { this.str = (str || '').toString(); this.options = options || {}; this.parent = parent; this.tree = this.currentNode = this.createNode(); this.pos = startPos || 0; this.currentNode.type = 'TREE'; this.state = STATE_NORMAL; } /** * Processes the input string and returns the parsed attributes as a flat array of typed objects. * Each attribute is an object with a `type` (e.g., "ATOM", "STRING", "LITERAL", "SEQUENCE") * and a `value` property. Lists are represented as nested arrays. Sections and partials are * attached as properties on the preceding attribute object. * * @returns {Promise<Array>} A promise that resolves to an array of parsed attribute objects and nested arrays. * @throws {Error} If the input contains syntax errors or unclosed nodes. */ async getAttributes() { await this.processString(); const attributes = []; let branch = attributes; let walk = async node => { let curBranch = branch; let elm; let partial; if (!node.isClosed && node.type === 'SEQUENCE' && node.value === '*') { node.isClosed = true; node.type = 'ATOM'; } // If the node was never closed, throw it if (!node.isClosed) { let error = new Error(`Unexpected end of input at position ${this.pos + this.str.length - 1} [E9]`); error.code = 'ParserError9'; error.parserContext = { input: this.str, pos: this.pos + this.str.length - 1 }; throw error; } let type = (node.type || '').toString().toUpperCase(); switch (type) { case 'LITERAL': case 'STRING': case 'SEQUENCE': elm = { type: node.type.toUpperCase(), value: node.value }; branch.push(elm); break; case 'ATOM': if (node.value.toUpperCase() === 'NIL') { branch.push(null); break; } elm = { type: node.type.toUpperCase(), value: node.value }; branch.push(elm); break; case 'SECTION': branch = branch[branch.length - 1].section = []; break; case 'LIST': elm = []; branch.push(elm); branch = elm; break; case 'PARTIAL': partial = node.value.split('.').map(Number); branch[branch.length - 1].partial = partial; break; } for (let childNode of node.childNodes) { await walk(childNode); } branch = curBranch; }; await walk(this.tree); return attributes; } /** * Creates a new node in the parse tree. Each node represents a token or structural * element (e.g., atom, string, literal, list, section, partial). The node is automatically * appended to the parent's childNodes array if a parent is provided. * * @param {Object} [parentNode] - The parent node to attach this node to. If omitted, creates a root node. * @param {number} [startPos] - The starting position of this node in the original input string. * @returns {Object} The newly created node with childNodes, type, value, and isClosed properties. * @throws {Error} If the nesting depth exceeds MAX_NODE_DEPTH. */ createNode(parentNode, startPos) { let node = { childNodes: [], type: false, value: '', isClosed: true }; if (parentNode) { node.parentNode = parentNode; node.depth = parentNode.depth + 1; } else { node.depth = 0; } if (node.depth > MAX_NODE_DEPTH) { let error = new Error('Too much nesting in IMAP string'); error.code = 'MAX_IMAP_NESTING_REACHED'; error._imapStr = this.str; throw error; } if (typeof startPos === 'number') { node.startPos = startPos; } if (parentNode) { parentNode.childNodes.push(node); } return node; } /** * Processes the entire input string character by character using a state machine. * Transitions between states (NORMAL, ATOM, STRING, LITERAL, SEQUENCE, PARTIAL, TEXT) * based on the current character and builds the parse tree. This is the main parsing * loop that drives the tokenization. * * @returns {Promise<void>} * @throws {Error} If the input contains unexpected characters, unclosed structures, or other syntax errors. */ async processString() { let chr, i, len; const checkSP = () => { // jump to the next non whitespace pos while (this.str.charAt(i + 1) === ' ') { i++; } }; for (i = 0, len = this.str.length; i < len; i++) { chr = this.str.charAt(i); switch (this.state) { case STATE_NORMAL: switch (chr) { // DQUOTE starts a new string case '"': this.currentNode = this.createNode(this.currentNode, this.pos + i); this.currentNode.type = 'string'; this.state = STATE_STRING; this.currentNode.isClosed = false; break; // ( starts a new list case '(': this.currentNode = this.createNode(this.currentNode, this.pos + i); this.currentNode.type = 'LIST'; this.currentNode.isClosed = false; break; // ) closes a list case ')': if (this.currentNode.type !== 'LIST') { let error = new Error(`Unexpected list terminator ) at position ${this.pos + i} [E10]`); error.code = 'ParserError10'; error.parserContext = { input: this.str, pos: this.pos + i, chr }; throw error; } this.currentNode.isClosed = true; this.currentNode.endPos = this.pos + i; this.currentNode = this.currentNode.parentNode; checkSP(); break; // ] closes section group case ']': if (this.currentNode.type !== 'SECTION') { let error = new Error(`Unexpected section terminator ] at position ${this.pos + i} [E11]`); error.code = 'ParserError11'; error.parserContext = { input: this.str, pos: this.pos + i, chr }; throw error; } this.currentNode.isClosed = true; this.currentNode.endPos = this.pos + i; this.currentNode = this.currentNode.parentNode; checkSP(); break; // < starts a new partial byte range (e.g., BODY[]<0.1024>) case '<': // '<' is only a partial range marker when it immediately follows ']', // which occurs in BODY[section]<origin.length> responses. // In all other contexts, '<' is treated as the start of an ATOM. if (this.str.charAt(i - 1) !== ']') { this.currentNode = this.createNode(this.currentNode, this.pos + i); this.currentNode.type = 'ATOM'; this.currentNode.value = chr; this.state = STATE_ATOM; } else { this.currentNode = this.createNode(this.currentNode, this.pos + i); this.currentNode.type = 'PARTIAL'; this.state = STATE_PARTIAL; this.currentNode.isClosed = false; } break; // literal8 (RFC 3516): uses ~{size} prefix instead of {size} // literal8 allows binary data containing NUL bytes, unlike regular literals case '~': { let nextChr = this.str.charAt(i + 1); if (nextChr !== '{') { if (imapFormalSyntax['ATOM-CHAR']().includes(nextChr)) { // '~' not followed by '{' but followed by an ATOM char: treat as ATOM this.currentNode = this.createNode(this.currentNode, this.pos + i); this.currentNode.type = 'ATOM'; this.currentNode.value = chr; this.state = STATE_ATOM; break; } let error = new Error(`Unexpected literal8 marker at position ${this.pos + i} [E12]`); error.code = 'ParserError12'; error.parserContext = { input: this.str, pos: this.pos + i, chr }; throw error; } // Mark the next literal as literal8 type; consumed when '{' is encountered this.expectedLiteralType = 'literal8'; break; } // { starts a new literal (regular {size}\r\n or literal8 ~{size}\r\n) case '{': this.currentNode = this.createNode(this.currentNode, this.pos + i); this.currentNode.type = 'LITERAL'; // Use literal8 type if '~' was seen immediately before, otherwise standard literal this.currentNode.literalType = this.expectedLiteralType || 'literal'; this.expectedLiteralType = false; this.state = STATE_LITERAL; this.currentNode.isClosed = false; break; // * starts a new sequence case '*': this.currentNode = this.createNode(this.currentNode, this.pos + i); this.currentNode.type = 'SEQUENCE'; this.currentNode.value = chr; this.currentNode.isClosed = false; this.state = STATE_SEQUENCE; break; // normally a space should never occur case ' ': // just ignore break; // [ starts section case '[': // If it is the *first* element after response command, then process as a response argument list // Status responses (OK/NO/BAD/BYE/PREAUTH) use [code] for response codes if (['OK', 'NO', 'BAD', 'BYE', 'PREAUTH'].includes(this.parent.command.toUpperCase()) && this.currentNode === this.tree) { this.currentNode.endPos = this.pos + i; this.currentNode = this.createNode(this.currentNode, this.pos + i); this.currentNode.type = 'ATOM'; this.currentNode = this.createNode(this.currentNode, this.pos + i); this.currentNode.type = 'SECTION'; this.currentNode.isClosed = false; this.state = STATE_NORMAL; // RFC 2221 REFERRAL special case: the payload is an RFC 2192/RFC 5092 // IMAP URL (e.g., imap://user@host/mailbox) which contains characters // that would break normal ATOM parsing (colons, slashes, etc.). // We handle this by consuming everything up to ']' as a single ATOM value. if (this.str.substr(i + 1, 9).toUpperCase() === 'REFERRAL ') { // create the REFERRAL atom this.currentNode = this.createNode(this.currentNode, this.pos + i + 1); this.currentNode.type = 'ATOM'; this.currentNode.endPos = this.pos + i + 8; this.currentNode.value = 'REFERRAL'; this.currentNode = this.currentNode.parentNode; // eat all the way through the ] to be the IMAPURL token. this.currentNode = this.createNode(this.currentNode, this.pos + i + 10); // just call this an ATOM, even though IMAPURL might be more correct this.currentNode.type = 'ATOM'; // jump i to the ']' i = this.str.indexOf(']', i + 10); this.currentNode.endPos = this.pos + i - 1; this.currentNode.value = this.str.substring(this.currentNode.startPos - this.pos, this.currentNode.endPos - this.pos + 1); this.currentNode = this.currentNode.parentNode; // close out the SECTION this.currentNode.isClosed = true; this.currentNode = this.currentNode.parentNode; checkSP(); } break; } /* falls through */ default: // Any ATOM supported char starts a new Atom sequence, otherwise throw an error // Allow \ as the first char for atom to support system flags // Allow % to support LIST '' % // Allow 8bit characters (presumably unicode) if (!imapFormalSyntax['ATOM-CHAR']().includes(chr) && chr !== '\\' && chr !== '%' && chr.charCodeAt(0) < 0x80) { let error = new Error(`Unexpected char at position ${this.pos + i} [E13: ${JSON.stringify(chr)}]`); error.code = 'ParserError13'; error.parserContext = { input: this.str, pos: this.pos + i, chr }; throw error; } this.currentNode = this.createNode(this.currentNode, this.pos + i); this.currentNode.type = 'ATOM'; this.currentNode.value = chr; this.state = STATE_ATOM; break; } break; case STATE_ATOM: // An atom is terminated by: space, closing delimiter of parent node, // or encountering a '[' that starts a section for BODY/BINARY commands. // space finishes an atom if (chr === ' ') { this.currentNode.endPos = this.pos + i - 1; this.currentNode = this.currentNode.parentNode; this.state = STATE_NORMAL; break; } // ')' or ']' terminates the atom AND closes the enclosing LIST or SECTION if ( this.currentNode.parentNode && ((chr === ')' && this.currentNode.parentNode.type === 'LIST') || (chr === ']' && this.currentNode.parentNode.type === 'SECTION')) ) { this.currentNode.endPos = this.pos + i - 1; this.currentNode = this.currentNode.parentNode; this.currentNode.isClosed = true; this.currentNode.endPos = this.pos + i; this.currentNode = this.currentNode.parentNode; this.state = STATE_NORMAL; checkSP(); break; } // If the atom so far is all digits and we see ',' or ':', it is actually // a sequence set (e.g., "1:5" or "1,3,5"), so reclassify and switch state if ((chr === ',' || chr === ':') && RE_DIGITS.test(this.currentNode.value)) { this.currentNode.type = 'SEQUENCE'; this.currentNode.isClosed = true; this.state = STATE_SEQUENCE; } // [ starts a section group for this element // Allowed only for selected elements, otherwise falls through to regular ATOM processing if (chr === '[' && ['BODY', 'BODY.PEEK', 'BINARY', 'BINARY.PEEK'].includes(this.currentNode.value.toUpperCase())) { this.currentNode.endPos = this.pos + i; this.currentNode = this.createNode(this.currentNode.parentNode, this.pos + i); this.currentNode.type = 'SECTION'; this.currentNode.isClosed = false; this.state = STATE_NORMAL; break; } // if the char is not ATOM compatible, throw. Allow \* as an exception if ( !imapFormalSyntax['ATOM-CHAR']().includes(chr) && chr.charCodeAt(0) < 0x80 && // allow 8bit (presumably unicode) bytes chr !== ']' && !(chr === '*' && this.currentNode.value === '\\') && (!this.parent || !this.parent.command || !['NO', 'BAD', 'OK'].includes(this.parent.command)) ) { let error = new Error(`Unexpected char at position ${this.pos + i} [E16: ${JSON.stringify(chr)}]`); error.code = 'ParserError16'; error.parserContext = { input: this.str, pos: this.pos + i, chr }; throw error; } else if (this.currentNode.value === '\\*') { let error = new Error(`Unexpected char at position ${this.pos + i} [E17: ${JSON.stringify(chr)}]`); error.code = 'ParserError17'; error.parserContext = { input: this.str, pos: this.pos + i, chr }; throw error; } this.currentNode.value += chr; break; case STATE_STRING: // DQUOTE ends the string sequence if (chr === '"') { this.currentNode.endPos = this.pos + i; this.currentNode.isClosed = true; this.currentNode = this.currentNode.parentNode; this.state = STATE_NORMAL; checkSP(); break; } // \ Escapes the following char if (chr === '\\') { i++; if (i >= len) { let error = new Error(`Unexpected end of input at position ${this.pos + i} [E18]`); error.code = 'ParserError18'; error.parserContext = { input: this.str, pos: this.pos + i }; throw error; } chr = this.str.charAt(i); } this.currentNode.value += chr; break; case STATE_PARTIAL: if (chr === '>') { if (this.currentNode.value.at(-1) === '.') { let error = new Error(`Unexpected end of partial at position ${this.pos + i} [E19]`); error.code = 'ParserError19'; error.parserContext = { input: this.str, pos: this.pos + i, chr }; throw error; } this.currentNode.endPos = this.pos + i; this.currentNode.isClosed = true; this.currentNode = this.currentNode.parentNode; this.state = STATE_NORMAL; checkSP(); break; } if (chr === '.' && (this.currentNode.value === '' || this.currentNode.value.includes('.'))) { let error = new Error(`Unexpected partial separator . at position ${this.pos + i} [E20]`); error.code = 'ParserError20'; error.parserContext = { input: this.str, pos: this.pos + i, chr }; throw error; } if (!imapFormalSyntax.DIGIT().includes(chr) && chr !== '.') { let error = new Error(`Unexpected char at position ${this.pos + i} [E21: ${JSON.stringify(chr)}]`); error.code = 'ParserError21'; error.parserContext = { input: this.str, pos: this.pos + i, chr }; throw error; } if ((this.currentNode.value === '0' || this.currentNode.value.endsWith('.0')) && chr !== '.') { let error = new Error(`Invalid partial at position ${this.pos + i} [E22: ${JSON.stringify(chr)}]`); error.code = 'ParserError22'; error.parserContext = { input: this.str, pos: this.pos + i, chr }; throw error; } this.currentNode.value += chr; break; case STATE_LITERAL: if (this.currentNode.started) { // only relevant if literals are not already parsed out from input // Disabled NULL byte check // See https://github.com/emailjs/emailjs-imap-handler/commit/f11b2822bedabe492236e8263afc630134a3c41c /* if (chr === '\u0000') { throw new Error('Unexpected \\x00 at position ' + (this.pos + i)); } */ this.currentNode.chBuffer[this.currentNode.chPos++] = chr.charCodeAt(0); if (this.currentNode.chPos >= this.currentNode.literalLength) { this.currentNode.endPos = this.pos + i; this.currentNode.isClosed = true; this.currentNode.value = this.currentNode.chBuffer.toString('binary'); this.currentNode.chBuffer = Buffer.alloc(0); this.currentNode = this.currentNode.parentNode; this.state = STATE_NORMAL; checkSP(); } break; } if (chr === '+' && this.options.literalPlus) { this.currentNode.literalPlus = true; break; } if (chr === '}') { if (!('literalLength' in this.currentNode)) { let error = new Error(`Unexpected literal prefix end char } at position ${this.pos + i} [E23]`); error.code = 'ParserError23'; error.parserContext = { input: this.str, pos: this.pos + i, chr }; throw error; } if (this.str.charAt(i + 1) === '\n') { i++; } else if (this.str.charAt(i + 1) === '\r' && this.str.charAt(i + 2) === '\n') { i += 2; } else { let error = new Error(`Unexpected char at position ${this.pos + i} [E24: ${JSON.stringify(chr)}]`); error.code = 'ParserError24'; error.parserContext = { input: this.str, pos: this.pos + i, chr }; throw error; } this.currentNode.literalLength = Number(this.currentNode.literalLength); if (!this.currentNode.literalLength) { // special case where literal content length is 0 // close the node right away, do not wait for additional input this.currentNode.endPos = this.pos + i; this.currentNode.isClosed = true; this.currentNode = this.currentNode.parentNode; this.state = STATE_NORMAL; checkSP(); } else if (this.options.literals) { // use the next precached literal values this.currentNode.value = this.options.literals.shift(); // only APPEND arguments are kept as Buffers /* if ((this.parent.command || '').toString().toUpperCase() !== 'APPEND') { this.currentNode.value = this.currentNode.value.toString('binary'); } */ this.currentNode.endPos = this.pos + i + this.currentNode.value.length; this.currentNode.started = false; this.currentNode.isClosed = true; this.currentNode = this.currentNode.parentNode; this.state = STATE_NORMAL; checkSP(); } else { this.currentNode.started = true; // Allocate expected size buffer. Max size check is already performed // Maybe should use allocUnsafe instead? this.currentNode.chBuffer = Buffer.alloc(this.currentNode.literalLength); this.currentNode.chPos = 0; } break; } if (!imapFormalSyntax.DIGIT().includes(chr)) { let error = new Error(`Unexpected char at position ${this.pos + i} [E25: ${JSON.stringify(chr)}]`); error.code = 'ParserError25'; error.parserContext = { input: this.str, pos: this.pos + i, chr }; throw error; } if (this.currentNode.literalLength === '0') { let error = new Error(`Invalid literal at position ${this.pos + i} [E26]`); error.code = 'ParserError26'; error.parserContext = { input: this.str, pos: this.pos + i, chr }; throw error; } this.currentNode.literalLength = (this.currentNode.literalLength || '') + chr; break; case STATE_SEQUENCE: // space finishes the sequence set if (chr === ' ') { if (!RE_SINGLE_DIGIT.test(this.currentNode.value.at(-1)) && this.currentNode.value.at(-1) !== '*') { let error = new Error(`Unexpected whitespace at position ${this.pos + i} [E27]`); error.code = 'ParserError27'; error.parserContext = { input: this.str, pos: this.pos + i, chr }; throw error; } if (this.currentNode.value !== '*' && this.currentNode.value.at(-1) === '*' && this.currentNode.value.at(-2) !== ':') { let error = new Error(`Unexpected whitespace at position ${this.pos + i} [E28]`); error.code = 'ParserError28'; error.parserContext = { input: this.str, pos: this.pos + i, chr }; throw error; } this.currentNode.isClosed = true; this.currentNode.endPos = this.pos + i - 1; this.currentNode = this.currentNode.parentNode; this.state = STATE_NORMAL; break; } else if (this.currentNode.parentNode && chr === ']' && this.currentNode.parentNode.type === 'SECTION') { this.currentNode.endPos = this.pos + i - 1; this.currentNode = this.currentNode.parentNode; this.currentNode.isClosed = true; this.currentNode.endPos = this.pos + i; this.currentNode = this.currentNode.parentNode; this.state = STATE_NORMAL; checkSP(); break; } if (chr === ':') { if (!RE_SINGLE_DIGIT.test(this.currentNode.value.at(-1)) && this.currentNode.value.at(-1) !== '*') { let error = new Error(`Unexpected range separator : at position ${this.pos + i} [E29]`); error.code = 'ParserError29'; error.parserContext = { input: this.str, pos: this.pos + i, chr }; throw error; } } else if (chr === '*') { if (![',', ':'].includes(this.currentNode.value.at(-1))) { let error = new Error(`Unexpected range wildcard at position ${this.pos + i} [E30]`); error.code = 'ParserError30'; error.parserContext = { input: this.str, pos: this.pos + i, chr }; throw error; } } else if (chr === ',') { if (!RE_SINGLE_DIGIT.test(this.currentNode.value.at(-1)) && this.currentNode.value.at(-1) !== '*') { let error = new Error(`Unexpected sequence separator , at position ${this.pos + i} [E31]`); error.code = 'ParserError31'; error.parserContext = { input: this.str, pos: this.pos + i, chr }; throw error; } if (this.currentNode.value.at(-1) === '*' && this.currentNode.value.at(-2) !== ':') { let error = new Error(`Unexpected sequence separator , at position ${this.pos + i} [E32]`); error.code = 'ParserError32'; error.parserContext = { input: this.str, pos: this.pos + i, chr }; throw error; } } else if (!RE_SINGLE_DIGIT.test(chr)) { let error = new Error(`Unexpected char at position ${this.pos + i} [E33: ${JSON.stringify(chr)}]`); error.code = 'ParserError33'; error.parserContext = { input: this.str, pos: this.pos + i, chr }; throw error; } if (RE_SINGLE_DIGIT.test(chr) && this.currentNode.value.at(-1) === '*') { let error = new Error(`Unexpected number at position ${this.pos + i} [E34: ${JSON.stringify(chr)}]`); error.code = 'ParserError34'; error.parserContext = { input: this.str, pos: this.pos + i, chr }; throw error; } this.currentNode.value += chr; break; case STATE_TEXT: this.currentNode.value += chr; break; } } } } module.exports.TokenParser = TokenParser;