imapflow
Version:
IMAP Client for Node
712 lines (617 loc) • 35.6 kB
JavaScript
/* eslint new-cap: 0 */
'use strict';
const imapFormalSyntax = require('./imap-formal-syntax');
const STATE_ATOM = 0x001;
const STATE_LITERAL = 0x002;
const STATE_NORMAL = 0x003;
const STATE_PARTIAL = 0x004;
const STATE_SEQUENCE = 0x005;
const STATE_STRING = 0x006;
const STATE_TEXT = 0x007;
const RE_DIGITS = /^\d+$/;
const RE_SINGLE_DIGIT = /^\d$/;
// Prevents stack overflow from maliciously crafted deeply-nested IMAP input (e.g., (((((...))))))
const MAX_NODE_DEPTH = 25;
/**
* Tokenizes an IMAP attribute string into a tree of typed nodes.
* Handles all IMAP data types: atoms, quoted strings, literals (including literal8),
* sequences, lists (parenthesized groups), sections (bracketed groups), and partial ranges.
* Enforces a maximum nesting depth of {@link MAX_NODE_DEPTH} to prevent stack overflow
* from malicious input.
*/
class TokenParser {
/**
* Creates a new TokenParser.
*
* @param {ParserInstance} parent - The parent ParserInstance that owns this token parser. Used to access the parsed command for context-sensitive parsing.
* @param {number} startPos - The starting position offset in the original input, used for error reporting.
* @param {string} str - The attribute string to tokenize.
* @param {Object} [options] - Parser options.
* @param {boolean} [options.literalPlus] - Whether the LITERAL+ extension is in use.
* @param {Array<Buffer>} [options.literals] - Pre-parsed literal values from the input stream.
*/
constructor(parent, startPos, str, options) {
this.str = (str || '').toString();
this.options = options || {};
this.parent = parent;
this.tree = this.currentNode = this.createNode();
this.pos = startPos || 0;
this.currentNode.type = 'TREE';
this.state = STATE_NORMAL;
}
/**
* Processes the input string and returns the parsed attributes as a flat array of typed objects.
* Each attribute is an object with a `type` (e.g., "ATOM", "STRING", "LITERAL", "SEQUENCE")
* and a `value` property. Lists are represented as nested arrays. Sections and partials are
* attached as properties on the preceding attribute object.
*
* @returns {Promise<Array>} A promise that resolves to an array of parsed attribute objects and nested arrays.
* @throws {Error} If the input contains syntax errors or unclosed nodes.
*/
async getAttributes() {
await this.processString();
const attributes = [];
let branch = attributes;
let walk = async node => {
let curBranch = branch;
let elm;
let partial;
if (!node.isClosed && node.type === 'SEQUENCE' && node.value === '*') {
node.isClosed = true;
node.type = 'ATOM';
}
// If the node was never closed, throw it
if (!node.isClosed) {
let error = new Error(`Unexpected end of input at position ${this.pos + this.str.length - 1} [E9]`);
error.code = 'ParserError9';
error.parserContext = { input: this.str, pos: this.pos + this.str.length - 1 };
throw error;
}
let type = (node.type || '').toString().toUpperCase();
switch (type) {
case 'LITERAL':
case 'STRING':
case 'SEQUENCE':
elm = {
type: node.type.toUpperCase(),
value: node.value
};
branch.push(elm);
break;
case 'ATOM':
if (node.value.toUpperCase() === 'NIL') {
branch.push(null);
break;
}
elm = {
type: node.type.toUpperCase(),
value: node.value
};
branch.push(elm);
break;
case 'SECTION':
branch = branch[branch.length - 1].section = [];
break;
case 'LIST':
elm = [];
branch.push(elm);
branch = elm;
break;
case 'PARTIAL':
partial = node.value.split('.').map(Number);
branch[branch.length - 1].partial = partial;
break;
}
for (let childNode of node.childNodes) {
await walk(childNode);
}
branch = curBranch;
};
await walk(this.tree);
return attributes;
}
/**
* Creates a new node in the parse tree. Each node represents a token or structural
* element (e.g., atom, string, literal, list, section, partial). The node is automatically
* appended to the parent's childNodes array if a parent is provided.
*
* @param {Object} [parentNode] - The parent node to attach this node to. If omitted, creates a root node.
* @param {number} [startPos] - The starting position of this node in the original input string.
* @returns {Object} The newly created node with childNodes, type, value, and isClosed properties.
* @throws {Error} If the nesting depth exceeds MAX_NODE_DEPTH.
*/
createNode(parentNode, startPos) {
let node = {
childNodes: [],
type: false,
value: '',
isClosed: true
};
if (parentNode) {
node.parentNode = parentNode;
node.depth = parentNode.depth + 1;
} else {
node.depth = 0;
}
if (node.depth > MAX_NODE_DEPTH) {
let error = new Error('Too much nesting in IMAP string');
error.code = 'MAX_IMAP_NESTING_REACHED';
error._imapStr = this.str;
throw error;
}
if (typeof startPos === 'number') {
node.startPos = startPos;
}
if (parentNode) {
parentNode.childNodes.push(node);
}
return node;
}
/**
* Processes the entire input string character by character using a state machine.
* Transitions between states (NORMAL, ATOM, STRING, LITERAL, SEQUENCE, PARTIAL, TEXT)
* based on the current character and builds the parse tree. This is the main parsing
* loop that drives the tokenization.
*
* @returns {Promise<void>}
* @throws {Error} If the input contains unexpected characters, unclosed structures, or other syntax errors.
*/
async processString() {
let chr, i, len;
const checkSP = () => {
// jump to the next non whitespace pos
while (this.str.charAt(i + 1) === ' ') {
i++;
}
};
for (i = 0, len = this.str.length; i < len; i++) {
chr = this.str.charAt(i);
switch (this.state) {
case STATE_NORMAL:
switch (chr) {
// DQUOTE starts a new string
case '"':
this.currentNode = this.createNode(this.currentNode, this.pos + i);
this.currentNode.type = 'string';
this.state = STATE_STRING;
this.currentNode.isClosed = false;
break;
// ( starts a new list
case '(':
this.currentNode = this.createNode(this.currentNode, this.pos + i);
this.currentNode.type = 'LIST';
this.currentNode.isClosed = false;
break;
// ) closes a list
case ')':
if (this.currentNode.type !== 'LIST') {
let error = new Error(`Unexpected list terminator ) at position ${this.pos + i} [E10]`);
error.code = 'ParserError10';
error.parserContext = { input: this.str, pos: this.pos + i, chr };
throw error;
}
this.currentNode.isClosed = true;
this.currentNode.endPos = this.pos + i;
this.currentNode = this.currentNode.parentNode;
checkSP();
break;
// ] closes section group
case ']':
if (this.currentNode.type !== 'SECTION') {
let error = new Error(`Unexpected section terminator ] at position ${this.pos + i} [E11]`);
error.code = 'ParserError11';
error.parserContext = { input: this.str, pos: this.pos + i, chr };
throw error;
}
this.currentNode.isClosed = true;
this.currentNode.endPos = this.pos + i;
this.currentNode = this.currentNode.parentNode;
checkSP();
break;
// < starts a new partial byte range (e.g., BODY[]<0.1024>)
case '<':
// '<' is only a partial range marker when it immediately follows ']',
// which occurs in BODY[section]<origin.length> responses.
// In all other contexts, '<' is treated as the start of an ATOM.
if (this.str.charAt(i - 1) !== ']') {
this.currentNode = this.createNode(this.currentNode, this.pos + i);
this.currentNode.type = 'ATOM';
this.currentNode.value = chr;
this.state = STATE_ATOM;
} else {
this.currentNode = this.createNode(this.currentNode, this.pos + i);
this.currentNode.type = 'PARTIAL';
this.state = STATE_PARTIAL;
this.currentNode.isClosed = false;
}
break;
// literal8 (RFC 3516): uses ~{size} prefix instead of {size}
// literal8 allows binary data containing NUL bytes, unlike regular literals
case '~': {
let nextChr = this.str.charAt(i + 1);
if (nextChr !== '{') {
if (imapFormalSyntax['ATOM-CHAR']().includes(nextChr)) {
// '~' not followed by '{' but followed by an ATOM char: treat as ATOM
this.currentNode = this.createNode(this.currentNode, this.pos + i);
this.currentNode.type = 'ATOM';
this.currentNode.value = chr;
this.state = STATE_ATOM;
break;
}
let error = new Error(`Unexpected literal8 marker at position ${this.pos + i} [E12]`);
error.code = 'ParserError12';
error.parserContext = { input: this.str, pos: this.pos + i, chr };
throw error;
}
// Mark the next literal as literal8 type; consumed when '{' is encountered
this.expectedLiteralType = 'literal8';
break;
}
// { starts a new literal (regular {size}\r\n or literal8 ~{size}\r\n)
case '{':
this.currentNode = this.createNode(this.currentNode, this.pos + i);
this.currentNode.type = 'LITERAL';
// Use literal8 type if '~' was seen immediately before, otherwise standard literal
this.currentNode.literalType = this.expectedLiteralType || 'literal';
this.expectedLiteralType = false;
this.state = STATE_LITERAL;
this.currentNode.isClosed = false;
break;
// * starts a new sequence
case '*':
this.currentNode = this.createNode(this.currentNode, this.pos + i);
this.currentNode.type = 'SEQUENCE';
this.currentNode.value = chr;
this.currentNode.isClosed = false;
this.state = STATE_SEQUENCE;
break;
// normally a space should never occur
case ' ':
// just ignore
break;
// [ starts section
case '[':
// If it is the *first* element after response command, then process as a response argument list
// Status responses (OK/NO/BAD/BYE/PREAUTH) use [code] for response codes
if (['OK', 'NO', 'BAD', 'BYE', 'PREAUTH'].includes(this.parent.command.toUpperCase()) && this.currentNode === this.tree) {
this.currentNode.endPos = this.pos + i;
this.currentNode = this.createNode(this.currentNode, this.pos + i);
this.currentNode.type = 'ATOM';
this.currentNode = this.createNode(this.currentNode, this.pos + i);
this.currentNode.type = 'SECTION';
this.currentNode.isClosed = false;
this.state = STATE_NORMAL;
// RFC 2221 REFERRAL special case: the payload is an RFC 2192/RFC 5092
// IMAP URL (e.g., imap://user@host/mailbox) which contains characters
// that would break normal ATOM parsing (colons, slashes, etc.).
// We handle this by consuming everything up to ']' as a single ATOM value.
if (this.str.substr(i + 1, 9).toUpperCase() === 'REFERRAL ') {
// create the REFERRAL atom
this.currentNode = this.createNode(this.currentNode, this.pos + i + 1);
this.currentNode.type = 'ATOM';
this.currentNode.endPos = this.pos + i + 8;
this.currentNode.value = 'REFERRAL';
this.currentNode = this.currentNode.parentNode;
// eat all the way through the ] to be the IMAPURL token.
this.currentNode = this.createNode(this.currentNode, this.pos + i + 10);
// just call this an ATOM, even though IMAPURL might be more correct
this.currentNode.type = 'ATOM';
// jump i to the ']'
i = this.str.indexOf(']', i + 10);
this.currentNode.endPos = this.pos + i - 1;
this.currentNode.value = this.str.substring(this.currentNode.startPos - this.pos, this.currentNode.endPos - this.pos + 1);
this.currentNode = this.currentNode.parentNode;
// close out the SECTION
this.currentNode.isClosed = true;
this.currentNode = this.currentNode.parentNode;
checkSP();
}
break;
}
/* falls through */
default:
// Any ATOM supported char starts a new Atom sequence, otherwise throw an error
// Allow \ as the first char for atom to support system flags
// Allow % to support LIST '' %
// Allow 8bit characters (presumably unicode)
if (!imapFormalSyntax['ATOM-CHAR']().includes(chr) && chr !== '\\' && chr !== '%' && chr.charCodeAt(0) < 0x80) {
let error = new Error(`Unexpected char at position ${this.pos + i} [E13: ${JSON.stringify(chr)}]`);
error.code = 'ParserError13';
error.parserContext = { input: this.str, pos: this.pos + i, chr };
throw error;
}
this.currentNode = this.createNode(this.currentNode, this.pos + i);
this.currentNode.type = 'ATOM';
this.currentNode.value = chr;
this.state = STATE_ATOM;
break;
}
break;
case STATE_ATOM:
// An atom is terminated by: space, closing delimiter of parent node,
// or encountering a '[' that starts a section for BODY/BINARY commands.
// space finishes an atom
if (chr === ' ') {
this.currentNode.endPos = this.pos + i - 1;
this.currentNode = this.currentNode.parentNode;
this.state = STATE_NORMAL;
break;
}
// ')' or ']' terminates the atom AND closes the enclosing LIST or SECTION
if (
this.currentNode.parentNode &&
((chr === ')' && this.currentNode.parentNode.type === 'LIST') || (chr === ']' && this.currentNode.parentNode.type === 'SECTION'))
) {
this.currentNode.endPos = this.pos + i - 1;
this.currentNode = this.currentNode.parentNode;
this.currentNode.isClosed = true;
this.currentNode.endPos = this.pos + i;
this.currentNode = this.currentNode.parentNode;
this.state = STATE_NORMAL;
checkSP();
break;
}
// If the atom so far is all digits and we see ',' or ':', it is actually
// a sequence set (e.g., "1:5" or "1,3,5"), so reclassify and switch state
if ((chr === ',' || chr === ':') && RE_DIGITS.test(this.currentNode.value)) {
this.currentNode.type = 'SEQUENCE';
this.currentNode.isClosed = true;
this.state = STATE_SEQUENCE;
}
// [ starts a section group for this element
// Allowed only for selected elements, otherwise falls through to regular ATOM processing
if (chr === '[' && ['BODY', 'BODY.PEEK', 'BINARY', 'BINARY.PEEK'].includes(this.currentNode.value.toUpperCase())) {
this.currentNode.endPos = this.pos + i;
this.currentNode = this.createNode(this.currentNode.parentNode, this.pos + i);
this.currentNode.type = 'SECTION';
this.currentNode.isClosed = false;
this.state = STATE_NORMAL;
break;
}
// if the char is not ATOM compatible, throw. Allow \* as an exception
if (
!imapFormalSyntax['ATOM-CHAR']().includes(chr) &&
chr.charCodeAt(0) < 0x80 && // allow 8bit (presumably unicode) bytes
chr !== ']' &&
!(chr === '*' && this.currentNode.value === '\\') &&
(!this.parent || !this.parent.command || !['NO', 'BAD', 'OK'].includes(this.parent.command))
) {
let error = new Error(`Unexpected char at position ${this.pos + i} [E16: ${JSON.stringify(chr)}]`);
error.code = 'ParserError16';
error.parserContext = { input: this.str, pos: this.pos + i, chr };
throw error;
} else if (this.currentNode.value === '\\*') {
let error = new Error(`Unexpected char at position ${this.pos + i} [E17: ${JSON.stringify(chr)}]`);
error.code = 'ParserError17';
error.parserContext = { input: this.str, pos: this.pos + i, chr };
throw error;
}
this.currentNode.value += chr;
break;
case STATE_STRING:
// DQUOTE ends the string sequence
if (chr === '"') {
this.currentNode.endPos = this.pos + i;
this.currentNode.isClosed = true;
this.currentNode = this.currentNode.parentNode;
this.state = STATE_NORMAL;
checkSP();
break;
}
// \ Escapes the following char
if (chr === '\\') {
i++;
if (i >= len) {
let error = new Error(`Unexpected end of input at position ${this.pos + i} [E18]`);
error.code = 'ParserError18';
error.parserContext = { input: this.str, pos: this.pos + i };
throw error;
}
chr = this.str.charAt(i);
}
this.currentNode.value += chr;
break;
case STATE_PARTIAL:
if (chr === '>') {
if (this.currentNode.value.at(-1) === '.') {
let error = new Error(`Unexpected end of partial at position ${this.pos + i} [E19]`);
error.code = 'ParserError19';
error.parserContext = { input: this.str, pos: this.pos + i, chr };
throw error;
}
this.currentNode.endPos = this.pos + i;
this.currentNode.isClosed = true;
this.currentNode = this.currentNode.parentNode;
this.state = STATE_NORMAL;
checkSP();
break;
}
if (chr === '.' && (this.currentNode.value === '' || this.currentNode.value.includes('.'))) {
let error = new Error(`Unexpected partial separator . at position ${this.pos + i} [E20]`);
error.code = 'ParserError20';
error.parserContext = { input: this.str, pos: this.pos + i, chr };
throw error;
}
if (!imapFormalSyntax.DIGIT().includes(chr) && chr !== '.') {
let error = new Error(`Unexpected char at position ${this.pos + i} [E21: ${JSON.stringify(chr)}]`);
error.code = 'ParserError21';
error.parserContext = { input: this.str, pos: this.pos + i, chr };
throw error;
}
if ((this.currentNode.value === '0' || this.currentNode.value.endsWith('.0')) && chr !== '.') {
let error = new Error(`Invalid partial at position ${this.pos + i} [E22: ${JSON.stringify(chr)}]`);
error.code = 'ParserError22';
error.parserContext = { input: this.str, pos: this.pos + i, chr };
throw error;
}
this.currentNode.value += chr;
break;
case STATE_LITERAL:
if (this.currentNode.started) {
// only relevant if literals are not already parsed out from input
// Disabled NULL byte check
// See https://github.com/emailjs/emailjs-imap-handler/commit/f11b2822bedabe492236e8263afc630134a3c41c
/*
if (chr === '\u0000') {
throw new Error('Unexpected \\x00 at position ' + (this.pos + i));
}
*/
this.currentNode.chBuffer[this.currentNode.chPos++] = chr.charCodeAt(0);
if (this.currentNode.chPos >= this.currentNode.literalLength) {
this.currentNode.endPos = this.pos + i;
this.currentNode.isClosed = true;
this.currentNode.value = this.currentNode.chBuffer.toString('binary');
this.currentNode.chBuffer = Buffer.alloc(0);
this.currentNode = this.currentNode.parentNode;
this.state = STATE_NORMAL;
checkSP();
}
break;
}
if (chr === '+' && this.options.literalPlus) {
this.currentNode.literalPlus = true;
break;
}
if (chr === '}') {
if (!('literalLength' in this.currentNode)) {
let error = new Error(`Unexpected literal prefix end char } at position ${this.pos + i} [E23]`);
error.code = 'ParserError23';
error.parserContext = { input: this.str, pos: this.pos + i, chr };
throw error;
}
if (this.str.charAt(i + 1) === '\n') {
i++;
} else if (this.str.charAt(i + 1) === '\r' && this.str.charAt(i + 2) === '\n') {
i += 2;
} else {
let error = new Error(`Unexpected char at position ${this.pos + i} [E24: ${JSON.stringify(chr)}]`);
error.code = 'ParserError24';
error.parserContext = { input: this.str, pos: this.pos + i, chr };
throw error;
}
this.currentNode.literalLength = Number(this.currentNode.literalLength);
if (!this.currentNode.literalLength) {
// special case where literal content length is 0
// close the node right away, do not wait for additional input
this.currentNode.endPos = this.pos + i;
this.currentNode.isClosed = true;
this.currentNode = this.currentNode.parentNode;
this.state = STATE_NORMAL;
checkSP();
} else if (this.options.literals) {
// use the next precached literal values
this.currentNode.value = this.options.literals.shift();
// only APPEND arguments are kept as Buffers
/*
if ((this.parent.command || '').toString().toUpperCase() !== 'APPEND') {
this.currentNode.value = this.currentNode.value.toString('binary');
}
*/
this.currentNode.endPos = this.pos + i + this.currentNode.value.length;
this.currentNode.started = false;
this.currentNode.isClosed = true;
this.currentNode = this.currentNode.parentNode;
this.state = STATE_NORMAL;
checkSP();
} else {
this.currentNode.started = true;
// Allocate expected size buffer. Max size check is already performed
// Maybe should use allocUnsafe instead?
this.currentNode.chBuffer = Buffer.alloc(this.currentNode.literalLength);
this.currentNode.chPos = 0;
}
break;
}
if (!imapFormalSyntax.DIGIT().includes(chr)) {
let error = new Error(`Unexpected char at position ${this.pos + i} [E25: ${JSON.stringify(chr)}]`);
error.code = 'ParserError25';
error.parserContext = { input: this.str, pos: this.pos + i, chr };
throw error;
}
if (this.currentNode.literalLength === '0') {
let error = new Error(`Invalid literal at position ${this.pos + i} [E26]`);
error.code = 'ParserError26';
error.parserContext = { input: this.str, pos: this.pos + i, chr };
throw error;
}
this.currentNode.literalLength = (this.currentNode.literalLength || '') + chr;
break;
case STATE_SEQUENCE:
// space finishes the sequence set
if (chr === ' ') {
if (!RE_SINGLE_DIGIT.test(this.currentNode.value.at(-1)) && this.currentNode.value.at(-1) !== '*') {
let error = new Error(`Unexpected whitespace at position ${this.pos + i} [E27]`);
error.code = 'ParserError27';
error.parserContext = { input: this.str, pos: this.pos + i, chr };
throw error;
}
if (this.currentNode.value !== '*' && this.currentNode.value.at(-1) === '*' && this.currentNode.value.at(-2) !== ':') {
let error = new Error(`Unexpected whitespace at position ${this.pos + i} [E28]`);
error.code = 'ParserError28';
error.parserContext = { input: this.str, pos: this.pos + i, chr };
throw error;
}
this.currentNode.isClosed = true;
this.currentNode.endPos = this.pos + i - 1;
this.currentNode = this.currentNode.parentNode;
this.state = STATE_NORMAL;
break;
} else if (this.currentNode.parentNode && chr === ']' && this.currentNode.parentNode.type === 'SECTION') {
this.currentNode.endPos = this.pos + i - 1;
this.currentNode = this.currentNode.parentNode;
this.currentNode.isClosed = true;
this.currentNode.endPos = this.pos + i;
this.currentNode = this.currentNode.parentNode;
this.state = STATE_NORMAL;
checkSP();
break;
}
if (chr === ':') {
if (!RE_SINGLE_DIGIT.test(this.currentNode.value.at(-1)) && this.currentNode.value.at(-1) !== '*') {
let error = new Error(`Unexpected range separator : at position ${this.pos + i} [E29]`);
error.code = 'ParserError29';
error.parserContext = { input: this.str, pos: this.pos + i, chr };
throw error;
}
} else if (chr === '*') {
if (![',', ':'].includes(this.currentNode.value.at(-1))) {
let error = new Error(`Unexpected range wildcard at position ${this.pos + i} [E30]`);
error.code = 'ParserError30';
error.parserContext = { input: this.str, pos: this.pos + i, chr };
throw error;
}
} else if (chr === ',') {
if (!RE_SINGLE_DIGIT.test(this.currentNode.value.at(-1)) && this.currentNode.value.at(-1) !== '*') {
let error = new Error(`Unexpected sequence separator , at position ${this.pos + i} [E31]`);
error.code = 'ParserError31';
error.parserContext = { input: this.str, pos: this.pos + i, chr };
throw error;
}
if (this.currentNode.value.at(-1) === '*' && this.currentNode.value.at(-2) !== ':') {
let error = new Error(`Unexpected sequence separator , at position ${this.pos + i} [E32]`);
error.code = 'ParserError32';
error.parserContext = { input: this.str, pos: this.pos + i, chr };
throw error;
}
} else if (!RE_SINGLE_DIGIT.test(chr)) {
let error = new Error(`Unexpected char at position ${this.pos + i} [E33: ${JSON.stringify(chr)}]`);
error.code = 'ParserError33';
error.parserContext = { input: this.str, pos: this.pos + i, chr };
throw error;
}
if (RE_SINGLE_DIGIT.test(chr) && this.currentNode.value.at(-1) === '*') {
let error = new Error(`Unexpected number at position ${this.pos + i} [E34: ${JSON.stringify(chr)}]`);
error.code = 'ParserError34';
error.parserContext = { input: this.str, pos: this.pos + i, chr };
throw error;
}
this.currentNode.value += chr;
break;
case STATE_TEXT:
this.currentNode.value += chr;
break;
}
}
}
}
module.exports.TokenParser = TokenParser;