bitmark-grammar
Version:
563 lines (495 loc) • 16.5 kB
JavaScript
/*
* index.js
*
*/
import { process } from 'process';
import R_clone from 'ramda/es/clone.js';
import R_slice from 'ramda/es/slice.js';
import { CharStreams } from './typescript-es6/antlr4es6/CharStreams.js';
import { CommonTokenStream } from './typescript-es6/antlr4es6/CommonTokenStream.js';
import { PredictionMode } from './typescript-es6/antlr4es6/atn/PredictionMode.js';
import { BailErrorStrategy } from './typescript-es6/antlr4es6/BailErrorStrategy.js';
import { DefaultErrorStrategy } from './typescript-es6/antlr4es6/DefaultErrorStrategy.js';
import { ParseTreeWalker } from './typescript-es6/antlr4es6/tree/ParseTreeWalker.js';
//
const bitmarkLexer = require('./antlr/bitmarkLexer.js');
const bitmarkParser = require('./antlr/bitmarkParser.js');
// close
const clozeLexer = require('./antlr/clozeLexer.js');
const clozeParser = require('./antlr/clozeParser.js');
// match
const matchLexer = require('./antlr/matchLexer.js');
const matchParser = require('./antlr/matchParser.js');
// multiple-choice-*
const choiceLexer = require('./antlr/choiceLexer.js');
const choiceParser = require('./antlr/choiceParser.js');
const interviewLexer = require('./antlr/interviewLexer.js');
const interviewParser = require('./antlr/interviewParser.js');
const truefalseLexer = require('./antlr/truefalseLexer.js');
const truefalseParser = require('./antlr/truefalseParser.js');
const flashcardLexer = require('./antlr/flashcardLexer.js');
const flashcardParser = require('./antlr/flashcardParser.js');
const chatLexer = require('./antlr/chatLexer.js');
const chatParser = require('./antlr/chatParser.js');
const sequenceLexer = require('./antlr/sequenceLexer.js');
const sequenceParser = require('./antlr/sequenceParser.js');
import { BitmarkListener } from './bitmark-listener.mjs';
import { JSON_BIT_TEMPLATES } from './bit-template.mjs';
import { BitUtil } from './bit-utils.mjs';
const JSON_BITS = [".vendor-amcharts-5-chart"];
/*
*/
class Preprocessor {
constructor() { }
countlines(text) {
const lines = (text.match(/\n/g) || '').length + 1;
return lines;
}
/*
* Goto the desired line
* Start looking for a "\n[."
* The line before the "\n[." is the line to cut
*/
split_bits(text) {
text = text.replace(/\] +$/mg, ']');
if (text.charAt(text.length - 1) !== '\n')
text += '\n';
let bb = new BitUtil(text);
let bits = bb.split_bits(); // array of {offset, bittext}
return bits;
}
remove_comments(text) {
return text.replace(/\|\|[\w\W]*?\|\|/mg, '');
}
replace_text_at(text, index, replacement, orgtext) {
return text.substr(0, index) + replacement + text.substr(index + orgtext.length);
}
// Checks if the bit expects JSON data
is_a_json_bit(text) {
if (text !== undefined) {
let x = text.match(/\S*\[(\.[^\]\[]+)\]/);
let s = x[1].replace(/\:.*$/, ''); // remove format spec.
return 0 <= JSON_BITS.indexOf(s) ? true : false;
}
return false;
}
is_a_js_bit(text) {
let m = text.match(/\.app-bitmark-from-javascript/);
return m != null;
}
has_a_url(text) {
let re = /\[(&audio|&image|&video|&article|&document|&app|&website|&still-image|@src[0-9]x)[A-Za-z\-]*:(http|https|file):\/\/.*?\](?=\n|\[@)/g; // look for one
let m = text.match(re);
return m && 0 < m.length ? true : false;
}
escape_bracket_in_url_if_any(text) {
let re = /\[((&audio|&image|&video|&article|&document|&app|&website|&still-image|@src[0-9]x)[A-Za-z\-]*:(http|https|file):\/\/.*?)\](?=\n|\[@)/g; // look for all
let text_repl = text;
let m;
while ((m = re.exec(text_repl)) !== null) {
let mr = m[1].replace(/\[/g, '[');
mr = mr.replace(/\]/g, ']');
text_repl = text_repl.replace(m[1], mr);
}
return text_repl;
}
/*
Escape the [] inside between ** and **. This is a special case and I am not
sure if this is a good solution. 7/25/2023
*/
escape_brackets_in_emphasis(text) {
let re = /(\*\*+[^\[\*\n]*\[[^\]\*]*\][^\]\*\n]*\*+\*)/gms;
let text_repl=text;
let m;
while ((m = re.exec(text_repl)) !== null) {
let mr = m[1].replace(/\[/g, '[');
mr = mr.replace(/\]/g, ']');
text_repl = text_repl.replace(m[0], mr);
}
return text_repl;
}
/*
Escare [] in json data. It confuses with Bitmark bits
Uses HTML escape strings
[ = [
] = ]
*/
escape_json_for_json_bits(text) {
String.prototype.lastIndexOfEnd = function (string) {
let io = this.lastIndexOf(string);
return io == -1 ? -1 : io + string.length;
};
const exjson = (t) => {
// JSON extractor.
const start = t.indexOf('{');
const end = t.lastIndexOfEnd('}');
let result = t.substring(start, end);
return result;
}
// Remove non-json bits etc
let json_orig = exjson(text);
let json_repl = json_orig.replace(/\[/g, '[');
json_repl = json_repl.replace(/\]/g, ']');
text = text.replace(json_orig, json_repl);
// offset <0 to dont care just replace them
return [text, [{ before: '[', after: '[', offset: -1 },
{ before: ']', after: ']', offset: -1 }]];
}
// Expecting single bit arg
replace_stray_bitheads(text) {
let seq = 0;
let ignore = 5;
//const regex = /(\[\.[^\]\[]+\])/; // for the first bit if any
const regex = /(\[\.[^\]\[]+)/; // no need closing ] 10/6/2023
const MAXSEQ = 20; // cant have too many
let x_array = [];
while (seq < MAXSEQ) {
// Dont add if not the head doesnt start from 0th column
text.search(regex);
let where = text.slice(ignore).search(regex);
if (where < 0)
break;
let tail = text.substr(where, text.length);
let m = tail.match(regex);
if (text.charAt(where - 1) !== '\n') {
// Replace that with a marker
text = this.replace_text_at(text, where + ignore, `$\{\{${seq}\}\}`, m[1]);
let x = {
before: m[1],
after: `$\{\{${seq}\}\}`,
offset: where + ignore
};
x_array.push(x);
}
ignore += where + m[1].length;
seq++;
}
return [text, x_array]; // replaced.
}
escapeRegExp(text) {
return text.replace(/[-[\]{}()*+?.,\\^$|#\s]/g, '\\$&');
}
// simple version. no offset
unreplace_stray_bitheads(text, x_array) {
const unescape_brackets = txt => (txt.replace(/[/g, '[')).replace(/]/g, ']');
for (let i in x_array) {
//
let x = x_array[i];
let y = this.escapeRegExp(x.after);
var re = new RegExp(y, 'g');
text = text.replace(re, x.before);
}
text = unescape_brackets(text); // get the backets back.
return text;
}
}
/*
*/
class BitmarkErrorListener extends DefaultErrorStrategy {
//options = {};
constructor(source, options) {
super();
this.options = options;
this.errors = [];
this.bail_mode = false;
this.source = source;
return this;
}
//
clearErrors() {
this.errors = []; // was INST
}
modifyErrorMessage(msg) {
const STD_MSGS = [
{ regex: /extraneous input ('[^\']*')/, repl: 'Unexpected input ${0}' },
{ regex: /mismatched input ('[^\']*')/, repl: 'Rule violation around ${0}' },
{ regex: /no viable alternative at input ('[^\']*')/, repl: 'Unexpected input ${0}' },
{ regex: /missing/, repl: null }, // no mod
];
let fn = (ob, msg) => {
let m = msg.match(ob.regex);
let newmsg = null;
if (m) {
if (ob.repl)
newmsg = ob.repl.replace('${0}', m[1]);
}
return newmsg;
};
let newmsg = null;
for (let i = 0; i < STD_MSGS.length; i++) {
newmsg = fn(STD_MSGS[i], msg);
if (newmsg)
break;
}
if (!newmsg)
newmsg = msg;
return newmsg;
}
//
syntaxError(recognizer, offendingSymbol, line, column, msg, e) {
msg = this.modifyErrorMessage(msg);
let tmpl = R_clone(JSON_BIT_TEMPLATES.Error_Info);
tmpl.message = msg;
tmpl.line = parseInt(line) - 1;
tmpl.column = column;
let bb = new BitUtil(this.source);
let bit = this.source;
let errline = bb.get_error_line(this.source, line, column);
/* Error_Info: {
bitmark: ...
parser: {
fatalError: '',
warnings: [],
errorLine: ''
}
}*/
if (0 < errline.length)
tmpl.errorLine = errline;
if (this.bail_mode) {
throw JSON.stringify(tmpl, null, 4); // bail out
}
else {
// not aborting
this.errors.push(tmpl);
}
return; // leave
}
//
manualError(ctx, line, column, message) {
let tmpl = R_clone(JSON_BIT_TEMPLATES.Error_Info);
tmpl.line = line < 0 ? ctx._start._line : line; // 1 origin
tmpl.column = column; //ctx._start._charPositionInLine;
let bb = new BitUtil(this.source);
let bit = this.source;
let errline = bb.get_error_line(this.source, tmpl.line, column);
tmpl.errorLine = errline;
tmpl.message = message;
this.errors.push(tmpl);
}
};
let Option_Template = {
trace: false,
debug: false,
};
class BitmarkParser {
/*
*/
constructor(text1, options, bit = null) {
this.ParserTable = {
'cloze': {
regex: /\n\[\.[ \t]*cloze/,
name: 'cloze',
lexer: clozeLexer,
parser: clozeParser
},
'match': {
regex: /\n\[\.[ \t]*match/,
name: 'match',
lexer: matchLexer,
parser: matchParser
},
'multiple': {
regex: /\n\[\.[ \t]*(multiple|highlight)/,
name: 'multiple',
lexer: choiceLexer,
parser: choiceParser
},
'interview': {
regex: /\n\[\.[ \t]*interview/,
name: 'interview',
lexer: interviewLexer,
parser: interviewParser
},
'true-false': {
regex: /\n\[\.[ \t]*true-false/,
name: 'true-false',
lexer: truefalseLexer,
parser: truefalseParser
},
'flashcard': {
regex: /\n\[\.[ \t]*(flashcard|vocabulary)/,
name: 'flashcard',
lexer: flashcardLexer,
parser: flashcardParser
},
'vocabulary': {
regex: /\n\[\.[ \t]*vocabulary/,
name: 'vocabulary',
lexer: flashcardLexer,
parser: flashcardParser
},
'chat': {
regex: /\n\[\.[ \t]*chat/,
name: 'chat',
lexer: chatLexer,
parser: chatParser
},
'conversation': {
regex: /\n\[\.[ \t]*conversation/,
name: 'chat',
lexer: chatLexer,
parser: chatParser
},
'sequence': {
regex: /\n\[\.[ \t]*sequence/,
name: 'sequence',
lexer: sequenceLexer,
parser: sequenceParser
},
'menu': {
regex: /\n\[\.[ \t]*menu-3/,
name: 'sequence',
lexer: sequenceLexer,
parser: sequenceParser
},
'default': {
regex: null,
name: null,
lexer: bitmarkLexer,
parser: bitmarkParser
},
};
this.options = options;
this.input_text = '\n' + text1; // whole text. added NL 12/17/2020
this.x_array = [];
this.parser_vars = {
chars: null,
lexer: null,
parser: null,
tokens: null,
printer: null,
errorlisten: null,
bit: bit
};
}
// Initialized the parser environment
init(splitted_text, bit) {
// Tweak the stray bitheads
let prep = new Preprocessor(this.source);
let replaced = splitted_text, x_array = [], y_array = [];
if (prep.has_a_url(splitted_text)) {
// Brackets contained in a URL is problem. Need to escape. No need x_array
replaced = prep.escape_bracket_in_url_if_any(replaced);
}
if (prep.is_a_json_bit(splitted_text)) {
[replaced, x_array] = prep.escape_json_for_json_bits(replaced);
}
if (0 < replaced.indexOf('[', 6)) { // skip initial [] for bit heading
replaced = prep.escape_brackets_in_emphasis(replaced);
}
[replaced, y_array] = prep.replace_stray_bitheads(replaced);
x_array = y_array.concat(x_array);
this.x_array = x_array;
this.original_text = splitted_text;
this.input_text = replaced;
splitted_text = replaced;
this.parser_vars.bit = bit;
bit = !bit ? 'default' : bit;
this.parser_vars.chars = CharStreams.fromString(splitted_text);
let lp = this.ParserTable[bit];
if (!lp) {
console.error(`Error: no parser available for bit ${bit}`);
return null;
}
this.parser_vars.lexer = new lp.lexer(this.parser_vars.chars);
this.parser_vars.tokens = new CommonTokenStream(this.parser_vars.lexer);
this.parser_vars.parser = new lp.parser(this.parser_vars.tokens);
this.parser_vars.printer = null;
// Remove default
this.parser_vars.lexer.removeErrorListeners();
this.parser_vars.parser.removeErrorListeners();
// And add our own
let errlisten = new BitmarkErrorListener(this.input_text, {});
this.parser_vars.errorlisten = errlisten;
this.parser_vars.lexer.addErrorListener(errlisten); // for the unreconizable tokens
this.parser_vars.parser.addErrorListener(errlisten); // for the syntax errors
}
/**/
run_parser() {
this.parser_vars.parser.buildParseTrees = true;
this.parser_vars.parser.isTrace = this.options.trace;
this.parser_vars.parser._interp.predictionMode = PredictionMode.SLL; // works!!
this.parser_vars.printer = new BitmarkListener(this.parser_vars.errorlisten,
this.input_text,
this.parser_vars.parser);
this.parser_vars.parser.addParseListener(this.parser_vars.printer);
let tree = this.parser_vars.parser.bitmark();
return this.parser_vars.printer.get_result(); // not json
}
//
parse() {
let pp = new Preprocessor();
let bits = pp.split_bits(this.input_text);
let allobjs = [];
//const t0 = now();
let entry = null;
let parsed = false;
for (let bit of bits) {
parsed = false;
let text_with_comments = R_clone(bit.bit);
bit.bit = pp.remove_comments(bit.bit); // Oct 4,2021
for (let key in this.ParserTable) {
if (bit.bit.match(this.ParserTable[key].regex)) {
entry = this.ParserTable[key];
// Initialize with new bitmark
this.init(bit.bit, entry.name);
let obj = this.run_parser(); // obj is an array
if (!obj || !obj.length)
obj = [{ bitmark: bit }];
obj[0].bitmark = text_with_comments.trim();
// obj[0].bit.content at this point is bithead replaced text.
obj[0].bit.body = pp.unreplace_stray_bitheads(obj[0].bit.body, this.x_array);
if (0 < this.parser_vars.errorlisten.errors.length) {
obj[0]['errors'] = this.parser_vars.errorlisten.errors;
this.parser_vars.errorlisten.errors = [];
}
allobjs = allobjs.concat(obj);
parsed = true;
break;
}
}
if (!parsed) {
entry = this.ParserTable['default'];
// Run the default parser
this.init(bit.bit, entry.name);
let obj = this.run_parser();
let unknown = null;
if (obj.length < 1) {
// Most probably wrong bit name
let bitre = /\s*\[(.*)\]/;
let m = bit.bit.match(bitre);
unknown = m[1];
}
else
obj[0].bitmark = text_with_comments.trim();
// obj[0].bit.content at this point is bithead replaced text.
if (!unknown)
obj[0].bit.body = pp.unreplace_stray_bitheads(obj[0].bit.body, this.x_array);
if (0 < this.parser_vars.errorlisten.errors.length) {
if (!obj || !obj.length)
obj = [{ bitmark: bit }];
if (unknown) {
obj[0]['errors'] = ["unknown bit name: " + unknown];
obj[0].bitmark.offset = 0;
}
else
obj[0]['errors'] = this.parser_vars.errorlisten.errors;
this.parser_vars.errorlisten.errors = [];
}
allobjs = allobjs.concat(obj);
}
}
//const t1 = now();
//if (this.options.debug)
// console.log(`Call to parser for 3 took ${t1 - t0} milliseconds.`);
let json = JSON.stringify(allobjs, null, 4);
while (allobjs.length) {
delete allobjs.pop();
}
if (this.options.debug)
console.log(json);
return json;
}
};
export {BitmarkParser};
export {Preprocessor};