@masala/parser
Version:
217 lines (165 loc) • 6.32 kB
JavaScript
import response from "../parsec/response";
import {F, C, N} from "../parsec";
import unit from "../data/unit";
import option from "../data/option";
export class TokenDefinition {
// value will be determined at runtime while parsing
constructor(parser, name, precedence) {
this.parser = parser;
this.name = name;
this.precedence = precedence;
}
}
// a Token object is instantiated at runtime, with a value given by the parsed text
export class Token {
constructor(name, value) {
this.name = name;
this.value = value;
}
accept(name) {
// TODO logger console.log('accepting', name, this.name===name, this.value);
return this.name === name ? option.some(this.value) : option.none();
}
}
export class GenLex {
constructor() {
this.spaces = defaultSpaces();
// definitions keep trace of all: parser, precedence and name
this.definitions = [];
// get a token, but not directly its precedence
this.tokensMap = {}
}
tokenize(parser, name, precedence = 1000) {
if (typeof parser === 'string') {
if (name === undefined) {
name = parser;
}
return this.tokenize(C.string(parser), name, precedence);
}
const definition = new TokenDefinition(parser, name, precedence)
this.definitions.push(definition);
// probably a bad name
const token = literal(token => token.accept(name), name);
this.tokensMap[name] = token;
return token;
}
keywords(keys, precedence = 1000) {
return keys.reduce((acc, key) =>
acc.concat(this.tokenize(key, key, precedence))
, []);
}
setSeparators(spacesCharacters) {
if (typeof spacesCharacters !== 'string') {
throw "setSeparators needs a string as separators, such as ' \r\n\f\t' ;" +
" use setSeparatorsParser to declare a parser";
}
this.spaces = C.charIn(spacesCharacters).map(() => unit);
}
/**
* Set separator Parser. It's up to the parser to accept or not
* optional repetition
* @param spacesParser
*/
setSeparatorsParser(spacesParser) {
this.spaces = spacesParser.map(() => unit);
}
updatePrecedence(tokenName, precedence) {
this.definitions.find(def => def.name === tokenName)
.precedence = precedence;
}
buildTokenizer() {
const token = this.findTokenByPrecedence();
return this.spaces.optrep().drop()
.then(token)
.then(this.spaces.optrep().drop())
.single();
}
use(grammar) {
return this.buildTokenizer().chain(grammar);
}
findTokenByPrecedence() {
const sortedDefinitions = this.definitions
.sort((d1, d2) => d2.precedence - d1.precedence);
return sortedDefinitions.reduce(
(combinator, definition) =>
F.try(getTokenParser(definition))
// .or (F.error('no match for '+definition.name))
.or(combinator),
F.error()
);
}
remove(tokenName) {
// find definitions
this.definitions = this.definitions
.filter(d => d.name !== tokenName);
delete this.tokensMap[tokenName];
}
// type: { [key: string]: Parser }
tokens() {
return this.tokensMap;
}
get(tokenName) {
return this.tokensMap[tokenName];
}
}
function getTokenParser(def) {
return def.parser.map(value => new Token(def.name, value));
}
// name is for easier debugging
// eslint-disable-next-line
function literal(tokenize, name) {
return F.parse((input, index) => {
// TODO logger console.log('testing ', {name, input:input.get(index), index});
// console.log('trying ', {index, name});
return input
.get(index)
// FIXME= value is the token, token is the value
.map(value => {
/* TODO: keep for logger
let token = value;
try {
console.log('in map', {value, name, index});
console.log('tokenizing', tokenize(token));
} catch (e) {
console.error('failed', e)
}*/
return tokenize(value)
.map(token => {
// TODO logger console.log('accept with ', name, index);
//console.log('accept:', token,index, input.location(index));
return response.accept(token, input, index + 1, true)
}
)
.orLazyElse(() => {
// TODO logger console.log('lazyElse failed with ', name, index);
// console.log('reject:',index, input.source.offsets[index],input,'>>>', value,
// input.location(index));
return response.reject(input, index, false)
}
)
}
)
.lazyRecoverWith(() => {
// TODO logger console.log('failed with ', name, index);
//console.log('lazyRecover with offset:', input.location(index));
return response.reject(input, index, false)
}
)
}
);
}
function defaultSpaces() {
return C.charIn(' \r\n\f\t').map(() => unit);
}
export function getMathGenLex() {
const basicGenlex = new GenLex();
// We try first to have digits
basicGenlex.tokenize(N.number(), 'number', 1100);
basicGenlex.tokenize(C.char('+'), 'plus', 1000);
basicGenlex.tokenize(C.char('-'), 'minus', 1000);
basicGenlex.tokenize(C.char('*'), 'mult', 800);
basicGenlex.tokenize(C.char('/'), 'div', 800);
basicGenlex.tokenize(C.char('('), 'open', 1000);
basicGenlex.tokenize(C.char(')'), 'close', 1000);
return basicGenlex;
}