@cucumber/gherkin
Version:
Gherkin parser
365 lines (327 loc) • 10.8 kB
text/typescript
import * as messages from '@cucumber/messages'
import { compareStepKeywords } from './compareStepKeywords'
import type Dialect from './Dialect'
import { NoSuchLanguageException } from './Errors'
import DIALECTS from './gherkin-languages.json'
import type { Item } from './IToken'
import type ITokenMatcher from './ITokenMatcher'
import { type Token, TokenType } from './Parser'
const DIALECT_DICT: { [key: string]: Dialect } = DIALECTS
const DEFAULT_DOC_STRING_SEPARATOR = /^(```[`]*)(.*)/
function addKeywordTypeMappings(
h: { [key: string]: messages.StepKeywordType[] },
keywords: readonly string[],
keywordType: messages.StepKeywordType
) {
for (const k of keywords) {
if (!(k in h)) {
h[k] = [] as messages.StepKeywordType[]
}
h[k].push(keywordType)
}
}
export default class GherkinInMarkdownTokenMatcher implements ITokenMatcher<TokenType> {
private dialect: Dialect
private dialectName: string
private readonly nonStarStepKeywords: string[]
private readonly stepRegexp: RegExp
private readonly headerRegexp: RegExp
private activeDocStringSeparator: RegExp
private indentToRemove: number
private matchedFeatureLine: boolean
private keywordTypesMap: { [key: string]: messages.StepKeywordType[] }
constructor(private readonly defaultDialectName: string = 'en') {
this.dialect = DIALECT_DICT[defaultDialectName]
this.nonStarStepKeywords = []
.concat(this.dialect.given)
.concat(this.dialect.when)
.concat(this.dialect.then)
.concat(this.dialect.and)
.concat(this.dialect.but)
.filter((value, index, self) => value !== '* ' && self.indexOf(value) === index)
.sort(compareStepKeywords)
this.initializeKeywordTypes()
this.stepRegexp = new RegExp(
`${KeywordPrefix.BULLET}(${this.nonStarStepKeywords.map(escapeRegExp).join('|')})`
)
const headerKeywords = []
.concat(this.dialect.feature)
.concat(this.dialect.background)
.concat(this.dialect.rule)
.concat(this.dialect.scenarioOutline)
.concat(this.dialect.scenario)
.concat(this.dialect.examples)
.filter((value, index, self) => self.indexOf(value) === index)
this.headerRegexp = new RegExp(
`${KeywordPrefix.HEADER}(${headerKeywords.map(escapeRegExp).join('|')})`
)
this.reset()
}
changeDialect(newDialectName: string, location?: messages.Location) {
const newDialect = DIALECT_DICT[newDialectName]
if (!newDialect) {
throw NoSuchLanguageException.create(newDialectName, location)
}
this.dialectName = newDialectName
this.dialect = newDialect
this.initializeKeywordTypes()
}
initializeKeywordTypes() {
this.keywordTypesMap = {}
addKeywordTypeMappings(
this.keywordTypesMap,
this.dialect.given,
messages.StepKeywordType.CONTEXT
)
addKeywordTypeMappings(this.keywordTypesMap, this.dialect.when, messages.StepKeywordType.ACTION)
addKeywordTypeMappings(
this.keywordTypesMap,
this.dialect.then,
messages.StepKeywordType.OUTCOME
)
addKeywordTypeMappings(
this.keywordTypesMap,
[].concat(this.dialect.and).concat(this.dialect.but),
messages.StepKeywordType.CONJUNCTION
)
}
// We've made a deliberate choice not to support `# language: [ISO 639-1]` headers or similar
// in Markdown. Users should specify a language globally. This can be done in
// cucumber-js using the --language [ISO 639-1] option.
match_Language(token: Token): boolean {
if (!token) throw new Error('no token')
return false
}
match_Empty(token: Token): boolean {
let result = false
if (token.line.isEmpty) {
result = true
}
if (
!this.match_TagLine(token) &&
!this.match_FeatureLine(token) &&
!this.match_ScenarioLine(token) &&
!this.match_BackgroundLine(token) &&
!this.match_ExamplesLine(token) &&
!this.match_RuleLine(token) &&
!this.match_TableRow(token) &&
!this.match_Comment(token) &&
!this.match_Language(token) &&
!this.match_DocStringSeparator(token) &&
!this.match_EOF(token) &&
!this.match_StepLine(token)
) {
// neutered
result = true
}
if (result) {
token.matchedType = TokenType.Empty
}
return this.setTokenMatched(token, null, result)
}
match_Other(token: Token): boolean {
const text = token.line.getLineText(this.indentToRemove) // take the entire line, except removing DocString indents
token.matchedType = TokenType.Other
token.matchedText = text
token.matchedIndent = 0
return this.setTokenMatched(token, null, true)
}
match_Comment(token: Token): boolean {
let result = false
if (token.line.startsWith('|')) {
const tableCells = token.line.getTableCells()
if (this.isGfmTableSeparator(tableCells)) result = true
}
return this.setTokenMatched(token, null, result)
}
match_DocStringSeparator(token: Token) {
const match = token.line.trimmedLineText.match(this.activeDocStringSeparator)
const [, newSeparator, mediaType] = match || []
let result = false
if (newSeparator) {
if (this.activeDocStringSeparator === DEFAULT_DOC_STRING_SEPARATOR) {
this.activeDocStringSeparator = new RegExp(`^(${newSeparator})$`)
this.indentToRemove = token.line.indent
} else {
this.activeDocStringSeparator = DEFAULT_DOC_STRING_SEPARATOR
}
token.matchedKeyword = newSeparator
token.matchedType = TokenType.DocStringSeparator
token.matchedText = mediaType || ''
result = true
}
return this.setTokenMatched(token, null, result)
}
match_EOF(token: Token): boolean {
let result = false
if (token.isEof) {
token.matchedType = TokenType.EOF
result = true
}
return this.setTokenMatched(token, null, result)
}
match_FeatureLine(token: Token): boolean {
if (this.matchedFeatureLine) {
return this.setTokenMatched(token, null, false)
}
// We first try to match "# Feature: blah"
let result = this.matchTitleLine(
KeywordPrefix.HEADER,
this.dialect.feature,
':',
token,
TokenType.FeatureLine
)
// If we didn't match "# Feature: blah", we still match this line
// as a FeatureLine.
// The reason for this is that users may not want to be constrained by having this as their fist line.
if (!result) {
token.matchedType = TokenType.FeatureLine
token.matchedText = token.line.trimmedLineText
result = this.setTokenMatched(token, null, true)
}
this.matchedFeatureLine = result
return result
}
match_BackgroundLine(token: Token): boolean {
return this.matchTitleLine(
KeywordPrefix.HEADER,
this.dialect.background,
':',
token,
TokenType.BackgroundLine
)
}
match_RuleLine(token: Token): boolean {
return this.matchTitleLine(
KeywordPrefix.HEADER,
this.dialect.rule,
':',
token,
TokenType.RuleLine
)
}
match_ScenarioLine(token: Token): boolean {
return (
this.matchTitleLine(
KeywordPrefix.HEADER,
this.dialect.scenario,
':',
token,
TokenType.ScenarioLine
) ||
this.matchTitleLine(
KeywordPrefix.HEADER,
this.dialect.scenarioOutline,
':',
token,
TokenType.ScenarioLine
)
)
}
match_ExamplesLine(token: Token): boolean {
return this.matchTitleLine(
KeywordPrefix.HEADER,
this.dialect.examples,
':',
token,
TokenType.ExamplesLine
)
}
match_StepLine(token: Token): boolean {
return this.matchTitleLine(
KeywordPrefix.BULLET,
this.nonStarStepKeywords,
'',
token,
TokenType.StepLine
)
}
matchTitleLine(
prefix: KeywordPrefix,
keywords: readonly string[],
keywordSuffix: ':' | '',
token: Token,
matchedType: TokenType
) {
const regexp = new RegExp(
`${prefix}(${keywords.map(escapeRegExp).join('|')})${keywordSuffix}(.*)`
)
const match = token.line.match(regexp)
let indent = token.line.indent
let result = false
if (match) {
token.matchedType = matchedType
token.matchedKeyword = match[2]
if (match[2] in this.keywordTypesMap) {
// only set the keyword type if this is a step keyword
if (this.keywordTypesMap[match[2]].length > 1) {
token.matchedKeywordType = messages.StepKeywordType.UNKNOWN
} else {
token.matchedKeywordType = this.keywordTypesMap[match[2]][0]
}
}
token.matchedText = match[3].trim()
indent += match[1].length
result = true
}
return this.setTokenMatched(token, indent, result)
}
setTokenMatched(token: Token, indent: number | null, matched: boolean) {
token.matchedGherkinDialect = this.dialectName
token.matchedIndent = indent !== null ? indent : token.line == null ? 0 : token.line.indent
token.location.column = token.matchedIndent + 1
return matched
}
match_TableRow(token: Token): boolean {
// Gherkin tables must be indented 2-5 spaces in order to be distinguidedn from non-Gherkin tables
if (token.line.lineText.match(/^\s\s\s?\s?\s?\|/)) {
const tableCells = token.line.getTableCells()
if (this.isGfmTableSeparator(tableCells)) return false
token.matchedKeyword = '|'
token.matchedType = TokenType.TableRow
token.matchedItems = tableCells
return true
}
return false
}
private isGfmTableSeparator(tableCells: readonly Item[]): boolean {
const separatorValues = tableCells
.map((item) => item.text)
.filter((value) => value.match(/^:?-+:?$/))
return separatorValues.length > 0
}
match_TagLine(token: Token): boolean {
const tags: Item[] = []
let m: RegExpMatchArray
const re = /`(@[^`]+)`/g
do {
m = re.exec(token.line.trimmedLineText)
if (m) {
tags.push({
column: token.line.indent + m.index + 2,
text: m[1],
})
}
} while (m)
if (tags.length === 0) return false
token.matchedType = TokenType.TagLine
token.matchedItems = tags
return true
}
reset(): void {
if (this.dialectName !== this.defaultDialectName) {
this.changeDialect(this.defaultDialectName)
}
this.activeDocStringSeparator = DEFAULT_DOC_STRING_SEPARATOR
}
}
enum KeywordPrefix {
// https://spec.commonmark.org/0.29/#bullet-list-marker
BULLET = '^(\\s*[*+-]\\s*)',
HEADER = '^(#{1,6}\\s)',
}
// https://stackoverflow.com/questions/3115150/how-to-escape-regular-expression-special-characters-using-javascript
function escapeRegExp(text: string) {
return text.replace(/[-[\]{}()*+?.,\\^$|#\s]/g, '\\$&')
}