UNPKG

ethercalc

Version:

Multi-User Spreadsheet Server

audreyt/ethercalc

176 lines (154 loc) • 6.04 kB

JavaScript

Class('Document.Parser', function() { var proto = this.prototype; proto.className = 'Document.Parser'; proto.init = function() {} proto.parse = function(input, receiver) { this.input = (input.search(/\n$/) == -1) ? input+"\n" : input; if (receiver) this.receiver = receiver; this.receiver.init(); this.grammar = this.create_grammar(); this.parse_blocks('top'); return this.receiver.content(); } proto.create_grammar = function() { throw "Please define create_grammar in a derived class of Document.Parser."; }; //------------------------------------------------------------------------------ // Parse input into a series of blocks. With each iteration the parser must // match a block at position 0 of the text, and remove that block from the // input reparse it further. This continues until there is no input left. //------------------------------------------------------------------------------ proto.parse_blocks = function(container_type) { var types = this.grammar[container_type].blocks; // Document.contains[container_type]; if (!types) return; while (this.input.length) { var length = this.input.length; for (var i = 0; i < types.length; i++) { var type = types[i]; var matched = this.find_match('matched_block', type); if (matched) { this.input = this.input.substr(matched.end); this.handle_match(type, matched); break; } } if (this.input.length >= length) throw this.classname + ': Reduction error for:\n' + this.input + '\n' + JSON.stringify(this); } return; } proto.handle_match = function(type, match) { var grammar = this.grammar[type]; var parse = grammar.blocks ? 'parse_blocks' : 'parse_phrases'; // console.log("Subparsing " + parse + '(' + type + '): '); // console.log(match); this.subparse(parse, match, type, grammar.filter); } proto.find_match = function(matched_func, type) { var re = this.grammar[type].match; if (!re) throw 'no regexp for type: ' + type; var capture = this.input.match(re); if (capture) { // console.log("Found match " + type + " - " + matched_func); var match = this[matched_func].call(this, capture, this.grammar[type].lookbehind); match.type = this.grammar[type].type || type; // console.log(match); return match; } return; }; //------------------------------------------------------------------------------ // This code parses a chunk into interleaved pieces of plain text and // phrases. It repeatedly tries to match every possible phrase and // then takes the match closest to the start. Everything before a // match is written as text. Matched phrases are subparsed according // to their rules. This continues until the input is all eaten. //------------------------------------------------------------------------------ proto.parse_phrases = function(container_type) { var types = this.grammar[container_type].phrases; if (!types) { this.receiver.text_node(this.input || ''); return } // console.log("INPUT: " + this.input); while (this.input.length) { var match = null; for (var i = 0; i < types.length; i++) { var type = types[i]; var matched = this.find_match('matched_phrase', type); if (! matched) continue; if (!match || (matched.begin < match.begin)) { match = matched; if (match.begin == 0) break; } } if (!match) { // console.log("NO MATCH: " + this.input); this.receiver.text_node(this.input || ''); break; } if (match.begin != 0) { // console.log("MATCH OFFSET:" + this.input + " (" + match.type + ")" + match.begin); this.receiver.text_node(this.input.substr(0, match.begin) || ''); } this.input = this.input.substr(match.end); this.handle_match(match.type, match); } return; } proto.subparse = function(func, match, type, filter) { /* The call could cause side effects to the match object. */ match.type = this.grammar[type].type; if (match.type == null) match.type = type; var filtered_text = filter ? filter(match) : null; if (match.type) this.receiver.begin_node(match); var parser = eval('new ' + this.className + '()'); parser.input = (filtered_text == null) ? match.text : filtered_text; parser.grammar = this.grammar; parser.receiver = this.receiver.instantiate(); // console.log("SEEDED: (" + type + ")" + parser.input); parser[func].call(parser, type); this.receiver.insert(parser.receiver); if (match.type) this.receiver.end_node(match); } //------------------------------------------------------------------------------ // Helper functions // // These are the odds and ends called by the code above. //------------------------------------------------------------------------------ /* Blocks has no lookbehinds, so: * All match begins at 0. The first capture is text; the next ones are various parts. */ proto.matched_block = function(capture) { return { begin: capture.index, text: capture[1], end: capture[0].length, 1: capture[2], 2: capture[3], 3: capture[4] }; } /* The first capture in a Phrases is the lookbehind. So: */ proto.matched_phrase = function(capture, lookbehind) { if (lookbehind) { var text = capture[2]; var begin = this.input.indexOf(capture[1]); return { text: text, begin: begin, end: (begin + capture[1].length), 1: RegExp.$2, 2: RegExp.$3, 3: RegExp.$4 }; } return { begin: capture.index, text: capture[1], end: capture.index + capture[0].length, 1: capture[2], 2: capture[3], 3: capture[4] }; } });