UNPKG

@masala/parser

Version:
222 lines (199 loc) 10.2 kB
import { describe, it, expect } from 'vitest' import { F, C, N } from '../../lib/parsec' import { GenLex, getMathGenLex } from '../../lib/genlex/genlex' import stream from '../../lib/stream' // Helper function from the original test file function dateParser() { // Renamed from date() to avoid conflict if imported elsewhere return N.digits() .then(C.charIn(['-', '/']).returns('-')) // Standardize separator for join .then(N.digits()) .then(C.charIn(['-', '/']).returns('-')) .then(N.digits()) .map((dateValues) => dateValues[4] > 2000 ? dateValues.reverse() : dateValues, ) .map((dateArray) => dateArray.join('')) } describe('GenLex Tests', () => { it('genlex find offsets when success', () => { const genlex = new GenLex() const plus = genlex.tokenize('+') const minus = genlex.tokenize('-') let grammar = plus.or(minus).rep().thenEos() const parser = genlex.use(grammar) const text = '+ + - --' const parsing = parser.parse(stream.ofString(text)) expect(parsing.isEos()).toBe(true) expect(parsing.offset).toBe(5) // Nodeunit: test.equal(5, parsing.offset, 'there are 5 keywords'); expect(parsing.input.location(parsing.offset)).toBe(8) // Nodeunit: test.equal(8, parsing.input.location(parsing.offset), 'there are 8 chars') }) it('genlex find offsets when fail', () => { const genlex = new GenLex() const plus = genlex.tokenize('+') const minus = genlex.tokenize('-') let grammar = plus.or(minus).rep().thenEos() const parser = genlex.use(grammar) const text = '+ + +* --' const parsing = parser.parse(stream.ofString(text)) expect(parsing.isEos()).toBe(false) // Nodeunit: test.ok(! parsing.isEos(), 'an error should have occurred'); expect(parsing.getOffset()).toBe(3) // Nodeunit: test.equal(3, parsing.getOffset(), 'Failed at the third token'); expect(parsing.location()).toBe(7) // Nodeunit: test.equal(7, parsing.location(), 'fail is not 3: it must be the char offset before the error'); }) it('expect Genlex to be constructed with spaces ', () => { const genlex = new GenLex() expect(genlex.spaces).toBeDefined() // Nodeunit: test.ok(genlex.spaces !== undefined); expect(genlex.definitions.length).toBe(0) // Nodeunit: test.ok(genlex.definitions.length === 0); }) it('expect tokenize() to add on definition', () => { const genlex = new GenLex() genlex.tokenize(N.number(), 'number', 500) expect(genlex.definitions.length).toBe(1) // Nodeunit: test.ok(genlex.definitions.length === 1); }) it('expect use() to sort definitions by revert precedence', () => { const genlex = new GenLex() const tkNumber = genlex.tokenize(N.number(), 'number') const tkDate = genlex.tokenize(dateParser(), 'date', 800) const tkChar = genlex.tokenize(C.charLiteral(), 'char', 1200) let grammar = tkDate.then(tkNumber.rep().or(tkChar)) // Nodeunit: test.notEqual(genlex.definitions[0].name, 'date'); (This assert is before genlex.use) // We can't directly replicate this without knowing the initial unsorted state behavior precisely. // The important part is the state *after* .use() genlex.use(grammar) expect(genlex.definitions[0].name).toBe('char') // Nodeunit: test.equal(genlex.definitions[0].name, 'char'); (after sort) expect(genlex.definitions[2].name).toBe('date') // Nodeunit: test.equal(genlex.definitions[2].name, 'date'); -> Indexing might differ, check sorted order. expect(genlex.definitions[1].name).toBe('number') // Assuming number has lowest precedence here. }) it('expect use() to create an easy tokenizer', () => { const genlex = new GenLex() const tkNumber = genlex.tokenize(N.number(), 'number') let grammar = tkNumber.rep() const parser = genlex.use(grammar) const parsing = parser.parse(stream.ofString('34 23')) expect(parsing.isAccepted()).toBe(true) // Nodeunit: test.ok(parsing.isAccepted()); }) it('a Genlex can update its precedence', () => { const genlex = new GenLex() const tkNumber = genlex.tokenize(N.number(), 'number') const tkDate = genlex.tokenize(dateParser(), 'date', 800) let content = '10/05/2014 34 23' genlex.setSeparators(' /') genlex.updatePrecedence('number', 10) // Make number lower precedence than default date tokenizer let grammar = tkDate.or(tkNumber).rep() // No thenEos in original test here const parser = genlex.use(grammar) const parsing = parser.parse(stream.ofString(content)) const tokens = parsing.value.array() expect(tokens).toStrictEqual([10, 5, 2014, 34, 23]) // Assuming dateParser returns a string like this }) it('GenLex can tokenize keywords', () => { const genlex = new GenLex() const plus = genlex.tokenize('+') let grammar = plus.rep().then(F.eos().drop()) const parser = genlex.use(grammar) const text = '+++++' const parsing = parser.parse(stream.ofString(text)) expect(parsing.isAccepted()).toBe(true) // Nodeunit: test.ok(parsing.isAccepted(), 'GenLex can tokenize keywords'); }) it('tokenize mixes with keywords', () => { const genlex = new GenLex() const number = genlex.tokenize(N.number(), 'number') const plus = genlex.tokenize('+') let grammar = plus.or(number).rep().then(F.eos().drop()) const parser = genlex.use(grammar) const text = '++77++4+' const parsing = parser.parse(stream.ofString(text)) expect(parsing.isEos()).toBe(true) // Nodeunit: test.ok(parsing.isEos(), 'tokenize mixes with keywords'); }) it('getMathGenLex() gives a simple genlex', () => { const genlex = getMathGenLex() const number = genlex.get('number') let grammar = number.rep() const text = '15 14' const parser = genlex.use(grammar) const parsing = parser.parse(stream.ofString(text)) // Original test: test.deepEqual(parsing.value.array(), ['15', '14'], ...); // N.number() typically returns numbers, not strings. Tokens would wrap these numbers. const tokenValues = parsing.value.array() expect(tokenValues).toEqual([15, 14]) }) it('getMathGenLex can be enhanced with a parser', () => { const genlex = getMathGenLex() genlex.remove('-') const number = genlex.get('number') const tkDate = genlex.tokenize(dateParser(), 'date', 800) let grammar = tkDate.rep().then(number).then(F.eos()) const text = '15-12-2018 12-02-2020 12 ' const parser = genlex.use(grammar) const parsing = parser.parse(stream.ofString(text)) expect(parsing.isAccepted()).toBe(true) // Nodeunit: test.ok(parsing.isAccepted()); }) it('getMathGenLex can be enhanced with a string', () => { const genlex = getMathGenLex() const number = genlex.get('number') const dol = genlex.tokenize('$', 'dol') let grammar = number.then(dol).rep().then(F.eos()) const text = '15 $ ' const parser = genlex.use(grammar) const parsing = parser.parse(stream.ofString(text)) expect(parsing.isEos()).toBe(true) // Nodeunit: test.ok(parsing.isEos()); }) it('getMathGenLex can be enhanced with a string and no name', () => { const genlex = getMathGenLex() const number = genlex.get('number') genlex.tokenize('$') const dol = genlex.get('$') let grammar = number.then(dol).rep().then(F.eos()) const text = '15 $ ' const parsing = genlex.use(grammar).parse(stream.ofString(text)) expect(parsing.isEos()).toBe(true) // Nodeunit: test.ok(parsing.isEos()); }) it('genlex can change separators with a given string', () => { const genlex = getMathGenLex() const number = genlex.get('number') let grammar = number.rep().then(F.eos().drop()) genlex.setSeparators('-') const text = '15-12-35--' const parser = genlex.use(grammar) const parsing = parser.parse(stream.ofString(text)) expect(parsing.isAccepted()).toBe(true) // Nodeunit: test.ok(parsing.isAccepted()); const tokenValues = parsing.value.array() expect(tokenValues).toEqual([15, 12, 35]) // Nodeunit: test.deepEqual(parsing.value.array(), [15, 12, 35]); }) it('genlex separators must be a string', () => { const genlex = getMathGenLex() expect(() => { // Nodeunit: test.ok(found); after try-catch genlex.setSeparators(C.char('-')) }).toThrow() }) it('genlex can change separators with a full Parser', () => { const genlex = getMathGenLex() const number = genlex.get('number') let grammar = number.rep().then(F.eos().drop()) const separatorParser = C.char('-').then(C.char('/').opt()) genlex.setSeparatorsParser(separatorParser) const text = '15-12-/35--10' const parser = genlex.use(grammar) const parsing = parser.parse(stream.ofString(text)) expect(parsing.isAccepted()).toBe(true) // Nodeunit: test.ok(parsing.isAccepted()); const tokenValues = parsing.value.array() expect(tokenValues).toEqual([15, 12, 35, 10]) // Nodeunit: test.deepEqual(parsing.value.array(), [15, 12, 35,10]); }) it('genlex provide all named tokens', () => { const genlex = getMathGenLex() const { number, plus, mult, open, close } = genlex.tokens() let grammar = number .or(plus) .or(open) .or(close) .or(mult) .rep() .then(F.eos().drop()) const text = '12+ 35' const parser = genlex.use(grammar) const parsing = parser.parse(stream.ofString(text)) expect(parsing.isAccepted()).toBe(true) const tokenValues = parsing.value.array() expect(tokenValues).toEqual([12, '+', 35]) }) })