@aws-lambda-powertools/jmespath
Version:
A type safe and modern jmespath module to parse and extract data from JSON documents using JMESPath
335 lines (334 loc) • 11.1 kB
JavaScript
"use strict";
Object.defineProperty(exports, "__esModule", { value: true });
exports.Lexer = void 0;
const constants_js_1 = require("./constants.js");
const errors_js_1 = require("./errors.js");
/**
* A lexer for JMESPath expressions.
*
* This lexer tokenizes a JMESPath expression into a sequence of tokens.
*/
class Lexer {
#position;
#expression;
#chars;
#current;
#length;
/**
* Tokenize a JMESPath expression.
*
* This method is a generator that yields tokens for the given expression.
*
* @param expression The JMESPath expression to tokenize.
*/
*tokenize(expression) {
this.#initializeForExpression(expression);
while (this.#current !== '' && this.#current !== undefined) {
if (constants_js_1.SIMPLE_TOKENS.has(this.#current)) {
yield {
// biome-ignore lint/style/noNonNullAssertion: We know that SIMPLE_TOKENS has this.#current as a key because we checked for that above.
type: constants_js_1.SIMPLE_TOKENS.get(this.#current),
value: this.#current,
start: this.#position,
end: this.#position + 1,
};
this.#next();
}
else if (constants_js_1.START_IDENTIFIER.has(this.#current)) {
yield this.#consumeIdentifier();
}
else if (constants_js_1.WHITESPACE.has(this.#current)) {
this.#next();
}
else if (this.#current === '[') {
yield this.#consumeSquareBracket();
}
else if (this.#current === `'`) {
yield this.#consumeRawStringLiteral();
}
else if (this.#current === '`') {
yield this.#consumeLiteral();
}
else if (constants_js_1.VALID_NUMBER.has(this.#current)) {
const start = this.#position;
const buff = this.#consumeNumber();
yield {
type: 'number',
value: Number.parseInt(buff),
start: start,
end: start + buff.length,
};
}
else if (this.#current === '-') {
yield this.#consumeNegativeNumber();
}
else if (this.#current === '"') {
yield this.#consumeQuotedIdentifier();
}
else if (['<', '>', '!', '=', '|', '&'].includes(this.#current)) {
yield this.#consumeComparatorSigns(this.#current);
}
else {
throw new errors_js_1.LexerError(this.#position, this.#current);
}
}
yield { type: 'eof', value: '', start: this.#length, end: this.#length };
}
/**
* Consume a comparator sign.
*
* This method is called when the lexer encounters a comparator sign.
*
* @param current The current character
*/
#consumeComparatorSigns = (current) => {
switch (current) {
case '<':
return this.#matchOrElse('=', 'lte', 'lt');
case '>':
return this.#matchOrElse('=', 'gte', 'gt');
case '!':
return this.#matchOrElse('=', 'ne', 'not');
case '|':
return this.#matchOrElse('|', 'or', 'pipe');
case '&':
return this.#matchOrElse('&', 'and', 'expref');
default:
return this.#consumeEqualSign();
}
};
/**
* Consume an equal sign.
*
* This method is called when the lexer encounters an equal sign.
* It checks if the next character is also an equal sign and returns
* the corresponding token.
*/
#consumeEqualSign() {
if (this.#next() === '=') {
this.#next();
return {
type: 'eq',
value: '==',
start: this.#position - 1,
end: this.#position,
};
}
throw new errors_js_1.LexerError(this.#position - 1, '=');
}
/**
* Consume an unquoted identifier.
*
* This method is called when the lexer encounters a character that is a valid
* identifier. It advances the lexer until it finds a character that is not a
* valid identifier and returns the corresponding token.
*/
#consumeIdentifier() {
const start = this.#position;
let buff = this.#current;
while (constants_js_1.VALID_IDENTIFIER.has(this.#next())) {
buff += this.#current;
}
return {
type: 'unquoted_identifier',
value: buff,
start,
end: start + buff.length,
};
}
/**
* Consume a negative number.
*
* This method is called when the lexer encounters a negative sign.
* It checks if the next character is a number and returns the corresponding token.
*/
#consumeNegativeNumber() {
const start = this.#position;
const buff = this.#consumeNumber();
if (buff.length > 1) {
return {
type: 'number',
value: Number.parseInt(buff),
start: start,
end: start + buff.length,
};
}
// If the negative sign is not followed by a number, it is an error.
throw new errors_js_1.LexerError(start, 'Unknown token after "-"');
}
/**
* Consume a raw string that is a number.
*
* It takes the current position and advances
* the lexer until it finds a character that
* is not a number.
*/
#consumeNumber() {
let buff = this.#current;
while (constants_js_1.VALID_NUMBER.has(this.#next())) {
buff += this.#current;
}
return buff;
}
/**
* Consume a square bracket.
*
* This method is called when the lexer encounters a square bracket.
* It checks if the next character is a question mark or a closing
* square bracket and returns the corresponding token.
*/
#consumeSquareBracket() {
const start = this.#position;
const nextChar = this.#next();
if (nextChar === ']') {
this.#next();
return { type: 'flatten', value: '[]', start: start, end: start + 2 };
}
if (nextChar === '?') {
this.#next();
return { type: 'filter', value: '[?', start: start, end: start + 2 };
}
return { type: 'lbracket', value: '[', start: start, end: start + 1 };
}
/**
* Initializes the lexer for the given expression.
*
* We use a separate method for this instead of the constructor
* because we want to be able to reuse the same lexer instance
* and also because we want to be able to expose a public API
* for tokenizing expressions like `new Lexer().tokenize(expression)`.
*
* @param expression The JMESPath expression to tokenize.
*/
#initializeForExpression(expression) {
if (typeof expression !== 'string') {
throw new errors_js_1.EmptyExpressionError();
}
this.#position = 0;
this.#expression = expression;
this.#chars = Array.from(expression);
this.#current = this.#chars[0];
this.#length = this.#expression.length;
}
/**
* Advance the lexer to the next character in the expression.
*/
#next() {
if (this.#position === this.#length - 1) {
this.#current = '';
}
else {
this.#position += 1;
this.#current = this.#chars[this.#position];
}
return this.#current;
}
/**
* Consume until the given delimiter is reached allowing
* for escaping of the delimiter with a backslash (`\`).
*
* @param delimiter The delimiter to consume until.
*/
#consumeUntil(delimiter) {
const start = this.#position;
let buff = '';
this.#next();
while (this.#current !== delimiter) {
if (this.#current === '\\') {
buff += '\\';
this.#next();
}
if (this.#current === '') {
// We've reached the end of the expression (EOF) before
// we found the delimiter. This is an error.
throw new errors_js_1.LexerError(start, this.#expression.substring(start));
}
buff += this.#current;
this.#next();
}
// Skip the closing delimiter
this.#next();
return buff;
}
/**
* Process a literal.
*
* A literal is a JSON string that is enclosed in backticks.
*/
#consumeLiteral() {
const start = this.#position;
const lexeme = this.#consumeUntil('`').replace('\\`', '`');
try {
const parsedJson = JSON.parse(lexeme);
return {
type: 'literal',
value: parsedJson,
start,
end: this.#position - start,
};
}
catch (error) {
throw new errors_js_1.LexerError(start, lexeme);
}
}
/**
* Process a quoted identifier.
*
* A quoted identifier is a string that is enclosed in double quotes.
*/
#consumeQuotedIdentifier() {
const start = this.#position;
const lexeme = `"${this.#consumeUntil('"')}"`;
const tokenLen = this.#position - start;
return {
type: 'quoted_identifier',
value: JSON.parse(lexeme),
start,
end: tokenLen,
};
}
/**
* Process a raw string literal.
*
* A raw string literal is a string that is enclosed in single quotes.
*/
#consumeRawStringLiteral() {
const start = this.#position;
const lexeme = this.#consumeUntil(`'`).replace(`\\'`, `'`);
const tokenLen = this.#position - start;
return {
type: 'literal',
value: lexeme,
start,
end: tokenLen,
};
}
/**
* Match the expected character and return the corresponding token type.
*
* @param expected The expected character
* @param matchType The token type to return if the expected character is found
* @param elseType The token type to return if the expected character is not found
*/
#matchOrElse(expected, matchType, elseType) {
const start = this.#position;
const current = this.#current;
const nextChar = this.#next();
if (nextChar === expected) {
this.#next();
return {
type: matchType,
value: current + nextChar,
start,
end: start + 2,
};
}
return {
type: elseType,
value: current,
start,
end: start,
};
}
}
exports.Lexer = Lexer;