UNPKG

bloodyroots

Version:

Recursive descent parser

527 lines (459 loc) 15.3 kB
// Generated by CoffeeScript 1.6.2 (function() { var Parser, inspect, inspect_orig, re_quote, typeIsArray, __slice = [].slice; re_quote = require('regexp-quote'); inspect_orig = require('util').inspect; inspect = function(x) { return inspect_orig(x, false, null); }; require('sprintf.js'); typeIsArray = Array.isArray || function(value) { return {}.toString.call(value) === '[object Array]'; }; Parser = (function() { function Parser() {} Parser.define_production = function(alpha_s, beta) { return this.prototype[alpha_s] = function(vdata, idx) { return beta.op.call(this, vdata, idx); }; }; Parser.define_grammar_operation = function(name, op_f) { return this[name] = function() { var args; args = 1 <= arguments.length ? __slice.call(arguments, 0) : []; return { name: name, op: op_f != null ? op_f.apply(this, args) : this['match_' + name].apply(this, args) }; }; }; Parser.define_grammar_operation('at_least_one', function(beta, suffix) { return this.match_range(beta, 1, void 0, true, suffix); }); Parser.define_grammar_operation('alternation'); Parser.define_grammar_operation('range'); Parser.define_grammar_operation('re', function(re_str, match_name) { return this.match_re(RegExp('^(?:' + re_str + ')'), match_name); }); Parser.define_grammar_operation('seq'); Parser.define_grammar_operation('transform', function(f, beta) { return this.op_transform(f, beta); }); Parser.define_grammar_operation('v'); Parser.define_grammar_operation('var_re'); Parser.define_grammar_operation('zero_or_more', function(beta, suffix) { return this.match_range(beta, 0, void 0, true, suffix); }); Parser.define_grammar_operation('zero_or_one', function(beta, suffix) { return this.match_range(beta, 0, 1, true, suffix); }); Parser.backref = function(ref) { return function(vdata) { var m; m = /^([^\[]*)\[([0-9]*)\]/.exec(ref); return [(vdata[m[1]] || [])[m[2]]]; }; }; Parser.prototype.debug_log = function(f) { var data, idx, name, outcome, _ref; if (this.constructor.debug) { _ref = f.call(this), name = _ref[0], idx = _ref[1], outcome = _ref[2], data = _ref[3]; return '%-15s %3s %-25s %-8s %s\n'.printf(name, idx, this.string_abbrev(idx, 25), outcome || '', data || ''); } }; Parser.match_alternation = function() { var args, beta_seq, suffix; args = 1 <= arguments.length ? __slice.call(arguments, 0) : []; if (typeIsArray(args[0])) { beta_seq = args[0], suffix = args[1]; } else { beta_seq = args; } return function(vdata, idx) { var beta, i, m, m2, _i, _len; this.debug_log(function() { var beta; return [ 'alternation', idx, 'begin', 'alternation=%s%s'.sprintf((function() { var _i, _len, _results; _results = []; for (_i = 0, _len = beta_seq.length; _i < _len; _i++) { beta = beta_seq[_i]; _results.push(beta.name); } return _results; })(), (suffix != null ? ' suffix=' + suffix.name : ' no-suffix')) ]; }); i = 0; for (_i = 0, _len = beta_seq.length; _i < _len; _i++) { beta = beta_seq[_i]; this.debug_log(function() { return ['alternation', idx, 'i=' + i, beta.name]; }); m = beta.op.call(this, vdata, idx); if (m != null) { if (suffix != null) { m2 = suffix.op.call(this, vdata, idx + m[0]); if (m2 != null) { this.debug_log(function() { return ['alternation', idx + m[0] + m2[0], 'success', 'count=' + (i + 1)]; }); return [ m[0] + m2[0], { pos: idx, length: m[0] + m2[0], type: 'seq', seq: [m[1], m2[1]] } ]; } } else { this.debug_log(function() { return ['alternation', idx + m[0], 'success', 'count=%d'.sprintf(i + 1)]; }); return m; } } i++; } this.debug_log(function() { return ['alternation', idx, 'fail']; }); }; }; Parser.match_range = function(beta, min, max, greedy, suffix) { if (min == null) { min = 0; } if (greedy == null) { greedy = true; } if (greedy) { return this._match_greedy_range(beta, min, max, suffix); } else { return this._match_nongreedy_range(beta, min, max, suffix); } }; Parser._match_greedy_range = function(beta, min, max, suffix) { return function(vdata, idx) { var match_indices, result, state, _this = this; if (!(state = this._match_range_to_min(beta, min, max, true, vdata, idx))) { return; } match_indices = [state.progress]; this._match_range_from_min(beta, max, vdata, idx, state, function() { match_indices.push(state.progress); return false; }); while (match_indices.length) { state.progress = match_indices.pop(); this.debug_log(function() { return ['range', idx + state.progress, 'i=' + state.count, 'greedy backtracking']; }); if (result = this._match_range_suffix(suffix, vdata, idx, state)) { return result; } state.work.pop(); state.count--; } this.debug_log(function() { return ['range', idx + state.progress, 'fail', 'greedy backtracking']; }); }; }; Parser._match_nongreedy_range = function(beta, min, max, suffix) { return function(vdata, idx) { var state, _this = this; if (!(state = this._match_range_to_min(beta, min, max, false, vdata, idx))) { return; } return this._match_range_suffix(suffix, vdata, idx, state) || this._match_range_from_min(beta, max, vdata, idx, state, function() { return _this._match_range_suffix(suffix, vdata, idx, state); }) || (this.debug_log(function() { return ['range', idx + state.progress, 'fail', '>=min non-greedy']; }), void 0); }; }; Parser.prototype._match_range_to_min = function(beta, min, max, greedy, vdata, idx) { var m, state; this.debug_log(function() { return ['range', idx, 'begin', '%s min=%s max=%s %s %s'.sprintf(beta.name, min, (max != null ? max : ''), (greedy ? 'greedy' : 'non-greedy'), (typeof suffix !== "undefined" && suffix !== null ? 'suffix=' + suffix.name : 'no-suffix'))]; }); if ((max != null) && min > max) { this.debug_log(function() { return ['re', idx, 'fail', 'min > max']; }); return; } state = { count: 0, progress: 0, work: [], greedy: greedy }; while (state.count < min) { this.debug_log(function() { return ['range', idx + state.progress, 'i=' + state.count, '<min']; }); m = beta.op.call(this, vdata, idx + state.progress); if (m == null) { this.debug_log(function() { return ['range', idx + state.progress, 'fail', '<min matches']; }); return; } state.progress += m[0]; state.work.push(m[1]); state.count++; } return state; }; Parser.prototype._match_range_from_min = function(beta, max, vdata, idx, state, func) { var m, output; while ((max == null) || state.count < max) { this.debug_log(function() { return ['range', idx + state.progress, 'i=' + state.count, '>=min %s'.sprintf(state.greedy ? 'greedy' : 'non-greedy')]; }); m = beta.op.call(this, vdata, idx + state.progress); if (m == null) { break; } state.progress += m[0]; state.work.push(m[1]); state.count++; if (output = func()) { return output; } } }; Parser.prototype._match_range_suffix = function(suffix, vdata, idx, state) { var m; if (suffix != null) { if ((m = suffix.op.call(this, vdata, idx + state.progress)) != null) { state.progress += m[0]; state.work.push(m[1]); this.debug_log(function() { return ['range', idx + state.progress, 'success', 'count=%d %s'.sprintf(state.count, (state.greedy ? 'greedy' : 'non-greedy'))]; }); return [ state.progress, { pos: idx, length: state.progress, type: 'seq', seq: state.work } ]; } else { } } else { this.debug_log(function() { return ['range', idx + state.progress, 'success', 'count=%d %s'.sprintf(state.count, (state.greedy ? 'greedy' : 'non-greedy trivial'))]; }); return [ state.progress, { pos: idx, length: state.progress, type: 'seq', seq: state.work } ]; } }; Parser.match_re = function(rre, match_name) { return function(vdata, idx) { var m; m = rre.exec(this.str.substr(idx)); if (m) { this.debug_log(function() { return ['re', idx, 'success', this.strip_quotes(inspect(rre.source))]; }); if (match_name != null) { vdata[match_name] = m.slice(0); } return [ m[0].length, { pos: idx, length: m[0].length, type: 're', match: m[0], groups: m.slice(0) } ]; } else { this.debug_log(function() { return ['re', idx, 'fail', this.strip_quotes(inspect(rre.source))]; }); } }; }; Parser.match_seq = function() { var beta_seq; beta_seq = 1 <= arguments.length ? __slice.call(arguments, 0) : []; return function(vdata, idx) { var beta, i, m, progress, work, _i, _len; this.debug_log(function() { var beta; return [ 'seq', idx, 'begin', (function() { var _i, _len, _results; _results = []; for (_i = 0, _len = beta_seq.length; _i < _len; _i++) { beta = beta_seq[_i]; _results.push(beta.name); } return _results; })() ]; }); progress = 0; work = []; i = 0; for (_i = 0, _len = beta_seq.length; _i < _len; _i++) { beta = beta_seq[_i]; this.debug_log(function() { return ['seq', idx + progress, 'i=' + i, beta.name]; }); m = beta.op.call(this, vdata, idx + progress); if (m == null) { this.debug_log(function() { return ['seq', idx + progress, 'fail']; }); return; } progress += m[0]; work.push(m[1]); i++; } this.debug_log(function() { return ['seq', idx + progress, 'success']; }); return [ progress, { pos: idx, length: progress, type: 'seq', seq: work } ]; }; }; Parser.match_v = function(alpha_s, argf) { return function(vdata, idx) { var m, new_vdata; this.debug_log(function() { return ['v', idx, 'begin', alpha_s]; }); new_vdata = {}; if (argf != null) { new_vdata.arg = argf.call(this, vdata); } m = this.vcache(alpha_s, idx, new_vdata); this.debug_log(function() { return ['v', idx + (m != null ? m[0] : 0), (m != null ? 'success' : 'fail'), alpha_s]; }); return m; }; }; Parser.match_var_re = function(re_str, match_name) { var self; self = this; return function(vdata, idx) { return self.match_re(RegExp('^(?:' + this.replace_backreferences(re_str, vdata) + ')'), match_name).call(this, vdata, idx); }; }; Parser.op_transform = function(f, beta) { return function(vdata, idx) { var m, tm; this.debug_log(function() { return ['transform', idx, 'begin', beta.name]; }); m = beta.op.call(this, vdata, idx); if (m == null) { this.debug_log(function() { return ['transform', idx, 'fail', beta.name]; }); return; } tm = f.call(this, m[1], vdata, idx); if (tm == null) { this.debug_log(function() { return ['transform', idx + m[0], 'fail', 'transform']; }); return; } this.debug_log(function() { return ['transform', idx + m[0], 'success']; }); return [m[0], tm]; }; }; Parser.prototype.parse = function(str) { var doc; this.str = str; this.v_cache = {}; this.debug_log(function() { return ['parse', 0, 'begin']; }); doc = this.Document({}, 0); if (doc == null) { this.debug_log(function() { return ['parse', 0, 'fail']; }); return; } this.debug_log(function() { return ['parse', doc[0], 'success']; }); return doc[1]; }; Parser.prototype.replace_backreferences = function(re_str, vdata) { var m, mstr, work; work = re_str; while (m = /\\=([^\[]+)\[([0-9]+)\]/.exec(work)) { mstr = (vdata[m[1]] || [])[m[2]]; if (mstr == null) { mstr = ''; } work = work.substr(0, m.index) + re_quote(mstr) + work.substr(m.index + m[0].length); } return work; }; Parser.prototype.string_abbrev = function(start, n) { var istr; istr = this.str.substr(start); istr = this.strip_quotes(inspect(istr)); if (istr.length > n) { return istr.substr(0, n - 3) + '...'; } else { return istr; } }; Parser.prototype.strip_quotes = function(str) { var m; m = /^'(.*)'$/.exec(str); if (m) { return m[1]; } else { return str; } }; Parser.prototype.vcache = function(alpha_s, idx, vdata) { var cache_key; cache_key = [alpha_s, idx, JSON.stringify(vdata)].join('#'); if (this.v_cache.hasOwnProperty(cache_key)) { this.debug_log(function() { return ['vcache', idx, 'cached']; }); return this.v_cache[cache_key]; } else { return this.v_cache[cache_key] = this[alpha_s](vdata, idx); } }; return Parser; })(); exports.Parser = Parser; }).call(this);