UNPKG

test

Version:

Node.js 18's node:test, as an npm package

530 lines (453 loc) 12.2 kB
// https://github.com/nodejs/node/blob/2483da743cbb48f31c6b3f8cb186d89f31d73611/lib/internal/test_runner/tap_lexer.js 'use strict' const { ArrayPrototypePop, ArrayPrototypePush, MathMax, SafeSet, StringPrototypeIncludes, StringPrototypeTrim } = require('#internal/per_context/primordials') const { codes: { ERR_TAP_LEXER_ERROR } } = require('#internal/errors') const kEOL = '' const kEOF = '' const TokenKind = { EOF: 'EOF', EOL: 'EOL', NEWLINE: 'NewLine', NUMERIC: 'Numeric', LITERAL: 'Literal', KEYWORD: 'Keyword', WHITESPACE: 'Whitespace', COMMENT: 'Comment', DASH: 'Dash', PLUS: 'Plus', HASH: 'Hash', ESCAPE: 'Escape', UNKNOWN: 'Unknown', // TAP tokens TAP: 'TAPKeyword', TAP_VERSION: 'VersionKeyword', TAP_PLAN: 'PlanKeyword', TAP_TEST_POINT: 'TestPointKeyword', TAP_SUBTEST_POINT: 'SubTestPointKeyword', TAP_TEST_OK: 'TestOkKeyword', TAP_TEST_NOTOK: 'TestNotOkKeyword', TAP_YAML_START: 'YamlStartKeyword', TAP_YAML_END: 'YamlEndKeyword', TAP_YAML_BLOCK: 'YamlKeyword', TAP_PRAGMA: 'PragmaKeyword', TAP_BAIL_OUT: 'BailOutKeyword' } class Token { constructor ({ kind, value, stream }) { const valueLength = ('' + value).length this.kind = kind this.value = value this.location = { line: stream.line, column: MathMax(stream.column - valueLength + 1, 1), // 1 based start: MathMax(stream.pos - valueLength, 0), // zero based end: stream.pos - (value === '' ? 0 : 1) // zero based } // EOF is a special case if (value === TokenKind.EOF) { const eofPosition = stream.input.length + 1 // We consider EOF to be outside the stream this.location.start = eofPosition this.location.end = eofPosition this.location.column = stream.column + 1 // 1 based } } } class InputStream { constructor (input) { this.input = input this.pos = 0 this.column = 0 this.line = 1 } eof () { return this.peek() === undefined } peek (offset = 0) { return this.input[this.pos + offset] } next () { const char = this.peek() if (char === undefined) { return undefined } this.pos++ this.column++ if (char === '\n') { this.line++ this.column = 0 } return char } } class TapLexer { static Keywords = new SafeSet([ 'TAP', 'version', 'ok', 'not', '...', '---', '..', 'pragma', '-', '+' // NOTE: "Skip", "Todo" and "Bail out!" literals are deferred to the parser ]) #isComment = false #source = null #line = 1 #column = 0 #escapeStack = [] #lastScannedToken = null constructor (source) { this.#source = new InputStream(source) this.#lastScannedToken = new Token({ kind: TokenKind.EOL, value: kEOL, stream: this.#source }) } scan () { const tokens = [] let chunk = [] while (!this.eof()) { const token = this.#scanToken() // Remember the last scanned token (except for whitespace) if (token.kind !== TokenKind.WHITESPACE) { this.#lastScannedToken = token } ArrayPrototypePush(chunk, token) if (token.kind === TokenKind.NEWLINE) { // Store the current chunk + NEWLINE token ArrayPrototypePush(tokens, chunk) chunk = [] } } if (chunk.length > 0) { ArrayPrototypePush(chunk, this.#scanEOL()) ArrayPrototypePush(tokens, chunk) } // send EOF as a separate chunk ArrayPrototypePush(tokens, [this.#scanEOF()]) return tokens } next () { return this.#source.next() } eof () { return this.#source.eof() } error (message, token, expected = '') { this.#source.error(message, token, expected) } #scanToken () { const char = this.next() if (this.#isEOFSymbol(char)) { return this.#scanEOF() } else if (this.#isNewLineSymbol(char)) { return this.#scanNewLine(char) } else if (this.#isNumericSymbol(char)) { return this.#scanNumeric(char) } else if (this.#isDashSymbol(char)) { return this.#scanDash(char) } else if (this.#isPlusSymbol(char)) { return this.#scanPlus(char) } else if (this.#isHashSymbol(char)) { return this.#scanHash(char) } else if (this.#isEscapeSymbol(char)) { return this.#scanEscapeSymbol(char) } else if (this.#isWhitespaceSymbol(char)) { return this.#scanWhitespace(char) } else if (this.#isLiteralSymbol(char)) { return this.#scanLiteral(char) } throw new ERR_TAP_LEXER_ERROR( `Unexpected character: ${char} at line ${this.#line}, column ${ this.#column }` ) } #scanNewLine (char) { // In case of odd number of ESCAPE symbols, we need to clear the remaining // escape chars from the stack and start fresh for the next line. this.#escapeStack = [] // We also need to reset the comment flag this.#isComment = false return new Token({ kind: TokenKind.NEWLINE, value: char, stream: this.#source }) } #scanEOL () { return new Token({ kind: TokenKind.EOL, value: kEOL, stream: this.#source }) } #scanEOF () { this.#isComment = false return new Token({ kind: TokenKind.EOF, value: kEOF, stream: this.#source }) } #scanEscapeSymbol (char) { // If the escape symbol has been escaped (by previous symbol), // or if the next symbol is a whitespace symbol, // then consume it as a literal. if ( this.#hasTheCurrentCharacterBeenEscaped() || this.#source.peek(1) === TokenKind.WHITESPACE ) { ArrayPrototypePop(this.#escapeStack) return new Token({ kind: TokenKind.LITERAL, value: char, stream: this.#source }) } // Otherwise, consume the escape symbol as an escape symbol that should be ignored by the parser // we also need to push the escape symbol to the escape stack // and consume the next character as a literal (done in the next turn) ArrayPrototypePush(this.#escapeStack, char) return new Token({ kind: TokenKind.ESCAPE, value: char, stream: this.#source }) } #scanWhitespace (char) { return new Token({ kind: TokenKind.WHITESPACE, value: char, stream: this.#source }) } #scanDash (char) { // Peek next 3 characters and check if it's a YAML start marker const marker = char + this.#source.peek() + this.#source.peek(1) if (this.#isYamlStartSymbol(marker)) { this.next() // consume second - this.next() // consume third - return new Token({ kind: TokenKind.TAP_YAML_START, value: marker, stream: this.#source }) } return new Token({ kind: TokenKind.DASH, value: char, stream: this.#source }) } #scanPlus (char) { return new Token({ kind: TokenKind.PLUS, value: char, stream: this.#source }) } #scanHash (char) { const lastCharacter = this.#source.peek(-2) const nextToken = this.#source.peek() // If we encounter a hash symbol at the beginning of a line, // we consider it as a comment if (!lastCharacter || this.#isNewLineSymbol(lastCharacter)) { this.#isComment = true return new Token({ kind: TokenKind.COMMENT, value: char, stream: this.#source }) } // The only valid case where a hash symbol is considered as a hash token // is when it's preceded by a whitespace symbol and followed by a non-hash symbol if ( this.#isWhitespaceSymbol(lastCharacter) && !this.#isHashSymbol(nextToken) ) { return new Token({ kind: TokenKind.HASH, value: char, stream: this.#source }) } const charHasBeenEscaped = this.#hasTheCurrentCharacterBeenEscaped() if (this.#isComment || charHasBeenEscaped) { if (charHasBeenEscaped) { ArrayPrototypePop(this.#escapeStack) } return new Token({ kind: TokenKind.LITERAL, value: char, stream: this.#source }) } // As a fallback, we consume the hash symbol as a literal return new Token({ kind: TokenKind.LITERAL, value: char, stream: this.#source }) } #scanLiteral (char) { let word = char while (!this.#source.eof()) { const nextChar = this.#source.peek() if (this.#isLiteralSymbol(nextChar)) { word += this.#source.next() } else { break } } word = StringPrototypeTrim(word) if (TapLexer.Keywords.has(word)) { const token = this.#scanTAPKeyword(word) if (token) { return token } } if (this.#isYamlEndSymbol(word)) { return new Token({ kind: TokenKind.TAP_YAML_END, value: word, stream: this.#source }) } return new Token({ kind: TokenKind.LITERAL, value: word, stream: this.#source }) } #scanTAPKeyword (word) { const isLastScannedTokenEOLorNewLine = TokenKind.EOL === this.#lastScannedToken.kind || TokenKind.NEWLINE === this.#lastScannedToken.kind if (word === 'TAP' && isLastScannedTokenEOLorNewLine) { return new Token({ kind: TokenKind.TAP, value: word, stream: this.#source }) } if (word === 'version' && this.#lastScannedToken.kind === TokenKind.TAP) { return new Token({ kind: TokenKind.TAP_VERSION, value: word, stream: this.#source }) } if (word === '..' && this.#lastScannedToken.kind === TokenKind.NUMERIC) { return new Token({ kind: TokenKind.TAP_PLAN, value: word, stream: this.#source }) } if (word === 'not' && isLastScannedTokenEOLorNewLine) { return new Token({ kind: TokenKind.TAP_TEST_NOTOK, value: word, stream: this.#source }) } if ( word === 'ok' && (this.#lastScannedToken.kind === TokenKind.TAP_TEST_NOTOK || isLastScannedTokenEOLorNewLine) ) { return new Token({ kind: TokenKind.TAP_TEST_OK, value: word, stream: this.#source }) } if (word === 'pragma' && isLastScannedTokenEOLorNewLine) { return new Token({ kind: TokenKind.TAP_PRAGMA, value: word, stream: this.#source }) } return null } #scanNumeric (char) { let number = char while (!this.#source.eof()) { const nextChar = this.#source.peek() if (this.#isNumericSymbol(nextChar)) { number += nextChar this.#source.next() } else { break } } return new Token({ kind: TokenKind.NUMERIC, value: number, stream: this.#source }) } #hasTheCurrentCharacterBeenEscaped () { // Use the escapeStack to keep track of the escape characters return this.#escapeStack.length > 0 } #isNumericSymbol (char) { return char >= '0' && char <= '9' } #isLiteralSymbol (char) { return ( (char >= 'a' && char <= 'z') || (char >= 'A' && char <= 'Z') || this.#isSpecialCharacterSymbol(char) ) } #isSpecialCharacterSymbol (char) { // We deliberately do not include "# \ + -"" in this list // these are used for comments/reasons explanations, pragma and escape characters // whitespace is not included because it is handled separately return StringPrototypeIncludes('!"$%&\'()*,./:;<=>?@[]^_`{|}~', char) } #isWhitespaceSymbol (char) { return char === ' ' || char === '\t' } #isEOFSymbol (char) { return char === undefined } #isNewLineSymbol (char) { return char === '\n' || char === '\r' } #isHashSymbol (char) { return char === '#' } #isDashSymbol (char) { return char === '-' } #isPlusSymbol (char) { return char === '+' } #isEscapeSymbol (char) { return char === '\\' } #isYamlStartSymbol (char) { return char === '---' } #isYamlEndSymbol (char) { return char === '...' } } module.exports = { TapLexer, TokenKind }