@masala/parser
Version:
Masala Parser
222 lines (199 loc) • 10.2 kB
JavaScript
import { describe, it, expect } from 'vitest'
import { F, C, N } from '../../lib/parsec'
import { GenLex, getMathGenLex } from '../../lib/genlex/genlex'
import stream from '../../lib/stream'
// Helper function from the original test file
function dateParser() {
// Renamed from date() to avoid conflict if imported elsewhere
return N.digits()
.then(C.charIn(['-', '/']).returns('-')) // Standardize separator for join
.then(N.digits())
.then(C.charIn(['-', '/']).returns('-'))
.then(N.digits())
.map((dateValues) =>
dateValues[4] > 2000 ? dateValues.reverse() : dateValues,
)
.map((dateArray) => dateArray.join(''))
}
describe('GenLex Tests', () => {
it('genlex find offsets when success', () => {
const genlex = new GenLex()
const plus = genlex.tokenize('+')
const minus = genlex.tokenize('-')
let grammar = plus.or(minus).rep().thenEos()
const parser = genlex.use(grammar)
const text = '+ + - --'
const parsing = parser.parse(stream.ofString(text))
expect(parsing.isEos()).toBe(true)
expect(parsing.offset).toBe(5) // Nodeunit: test.equal(5, parsing.offset, 'there are 5 keywords');
expect(parsing.input.location(parsing.offset)).toBe(8) // Nodeunit: test.equal(8, parsing.input.location(parsing.offset), 'there are 8 chars')
})
it('genlex find offsets when fail', () => {
const genlex = new GenLex()
const plus = genlex.tokenize('+')
const minus = genlex.tokenize('-')
let grammar = plus.or(minus).rep().thenEos()
const parser = genlex.use(grammar)
const text = '+ + +* --'
const parsing = parser.parse(stream.ofString(text))
expect(parsing.isEos()).toBe(false) // Nodeunit: test.ok(! parsing.isEos(), 'an error should have occurred');
expect(parsing.getOffset()).toBe(3) // Nodeunit: test.equal(3, parsing.getOffset(), 'Failed at the third token');
expect(parsing.location()).toBe(7) // Nodeunit: test.equal(7, parsing.location(), 'fail is not 3: it must be the char offset before the error');
})
it('expect Genlex to be constructed with spaces ', () => {
const genlex = new GenLex()
expect(genlex.spaces).toBeDefined() // Nodeunit: test.ok(genlex.spaces !== undefined);
expect(genlex.definitions.length).toBe(0) // Nodeunit: test.ok(genlex.definitions.length === 0);
})
it('expect tokenize() to add on definition', () => {
const genlex = new GenLex()
genlex.tokenize(N.number(), 'number', 500)
expect(genlex.definitions.length).toBe(1) // Nodeunit: test.ok(genlex.definitions.length === 1);
})
it('expect use() to sort definitions by revert precedence', () => {
const genlex = new GenLex()
const tkNumber = genlex.tokenize(N.number(), 'number')
const tkDate = genlex.tokenize(dateParser(), 'date', 800)
const tkChar = genlex.tokenize(C.charLiteral(), 'char', 1200)
let grammar = tkDate.then(tkNumber.rep().or(tkChar))
// Nodeunit: test.notEqual(genlex.definitions[0].name, 'date'); (This assert is before genlex.use)
// We can't directly replicate this without knowing the initial unsorted state behavior precisely.
// The important part is the state *after* .use()
genlex.use(grammar)
expect(genlex.definitions[0].name).toBe('char') // Nodeunit: test.equal(genlex.definitions[0].name, 'char'); (after sort)
expect(genlex.definitions[2].name).toBe('date') // Nodeunit: test.equal(genlex.definitions[2].name, 'date'); -> Indexing might differ, check sorted order.
expect(genlex.definitions[1].name).toBe('number') // Assuming number has lowest precedence here.
})
it('expect use() to create an easy tokenizer', () => {
const genlex = new GenLex()
const tkNumber = genlex.tokenize(N.number(), 'number')
let grammar = tkNumber.rep()
const parser = genlex.use(grammar)
const parsing = parser.parse(stream.ofString('34 23'))
expect(parsing.isAccepted()).toBe(true) // Nodeunit: test.ok(parsing.isAccepted());
})
it('a Genlex can update its precedence', () => {
const genlex = new GenLex()
const tkNumber = genlex.tokenize(N.number(), 'number')
const tkDate = genlex.tokenize(dateParser(), 'date', 800)
let content = '10/05/2014 34 23'
genlex.setSeparators(' /')
genlex.updatePrecedence('number', 10) // Make number lower precedence than default date tokenizer
let grammar = tkDate.or(tkNumber).rep() // No thenEos in original test here
const parser = genlex.use(grammar)
const parsing = parser.parse(stream.ofString(content))
const tokens = parsing.value.array()
expect(tokens).toStrictEqual([10, 5, 2014, 34, 23]) // Assuming dateParser returns a string like this
})
it('GenLex can tokenize keywords', () => {
const genlex = new GenLex()
const plus = genlex.tokenize('+')
let grammar = plus.rep().then(F.eos().drop())
const parser = genlex.use(grammar)
const text = '+++++'
const parsing = parser.parse(stream.ofString(text))
expect(parsing.isAccepted()).toBe(true) // Nodeunit: test.ok(parsing.isAccepted(), 'GenLex can tokenize keywords');
})
it('tokenize mixes with keywords', () => {
const genlex = new GenLex()
const number = genlex.tokenize(N.number(), 'number')
const plus = genlex.tokenize('+')
let grammar = plus.or(number).rep().then(F.eos().drop())
const parser = genlex.use(grammar)
const text = '++77++4+'
const parsing = parser.parse(stream.ofString(text))
expect(parsing.isEos()).toBe(true) // Nodeunit: test.ok(parsing.isEos(), 'tokenize mixes with keywords');
})
it('getMathGenLex() gives a simple genlex', () => {
const genlex = getMathGenLex()
const number = genlex.get('number')
let grammar = number.rep()
const text = '15 14'
const parser = genlex.use(grammar)
const parsing = parser.parse(stream.ofString(text))
// Original test: test.deepEqual(parsing.value.array(), ['15', '14'], ...);
// N.number() typically returns numbers, not strings. Tokens would wrap these numbers.
const tokenValues = parsing.value.array()
expect(tokenValues).toEqual([15, 14])
})
it('getMathGenLex can be enhanced with a parser', () => {
const genlex = getMathGenLex()
genlex.remove('-')
const number = genlex.get('number')
const tkDate = genlex.tokenize(dateParser(), 'date', 800)
let grammar = tkDate.rep().then(number).then(F.eos())
const text = '15-12-2018 12-02-2020 12 '
const parser = genlex.use(grammar)
const parsing = parser.parse(stream.ofString(text))
expect(parsing.isAccepted()).toBe(true) // Nodeunit: test.ok(parsing.isAccepted());
})
it('getMathGenLex can be enhanced with a string', () => {
const genlex = getMathGenLex()
const number = genlex.get('number')
const dol = genlex.tokenize('$', 'dol')
let grammar = number.then(dol).rep().then(F.eos())
const text = '15 $ '
const parser = genlex.use(grammar)
const parsing = parser.parse(stream.ofString(text))
expect(parsing.isEos()).toBe(true) // Nodeunit: test.ok(parsing.isEos());
})
it('getMathGenLex can be enhanced with a string and no name', () => {
const genlex = getMathGenLex()
const number = genlex.get('number')
genlex.tokenize('$')
const dol = genlex.get('$')
let grammar = number.then(dol).rep().then(F.eos())
const text = '15 $ '
const parsing = genlex.use(grammar).parse(stream.ofString(text))
expect(parsing.isEos()).toBe(true) // Nodeunit: test.ok(parsing.isEos());
})
it('genlex can change separators with a given string', () => {
const genlex = getMathGenLex()
const number = genlex.get('number')
let grammar = number.rep().then(F.eos().drop())
genlex.setSeparators('-')
const text = '15-12-35--'
const parser = genlex.use(grammar)
const parsing = parser.parse(stream.ofString(text))
expect(parsing.isAccepted()).toBe(true) // Nodeunit: test.ok(parsing.isAccepted());
const tokenValues = parsing.value.array()
expect(tokenValues).toEqual([15, 12, 35]) // Nodeunit: test.deepEqual(parsing.value.array(), [15, 12, 35]);
})
it('genlex separators must be a string', () => {
const genlex = getMathGenLex()
expect(() => {
// Nodeunit: test.ok(found); after try-catch
genlex.setSeparators(C.char('-'))
}).toThrow()
})
it('genlex can change separators with a full Parser', () => {
const genlex = getMathGenLex()
const number = genlex.get('number')
let grammar = number.rep().then(F.eos().drop())
const separatorParser = C.char('-').then(C.char('/').opt())
genlex.setSeparatorsParser(separatorParser)
const text = '15-12-/35--10'
const parser = genlex.use(grammar)
const parsing = parser.parse(stream.ofString(text))
expect(parsing.isAccepted()).toBe(true) // Nodeunit: test.ok(parsing.isAccepted());
const tokenValues = parsing.value.array()
expect(tokenValues).toEqual([15, 12, 35, 10]) // Nodeunit: test.deepEqual(parsing.value.array(), [15, 12, 35,10]);
})
it('genlex provide all named tokens', () => {
const genlex = getMathGenLex()
const { number, plus, mult, open, close } = genlex.tokens()
let grammar = number
.or(plus)
.or(open)
.or(close)
.or(mult)
.rep()
.then(F.eos().drop())
const text = '12+ 35'
const parser = genlex.use(grammar)
const parsing = parser.parse(stream.ofString(text))
expect(parsing.isAccepted()).toBe(true)
const tokenValues = parsing.value.array()
expect(tokenValues).toEqual([12, '+', 35])
})
})