UNPKG

mmir-lib

Version:

MMIR (Mobile Multimodal Interaction and Relay) library

502 lines (463 loc) 24.6 kB
define(['mmirf/codeGenUtils', 'mmirf/util/extend'], function(codeGenUtils, extend){ /** * BaseGenerator for common methods, resources etc. for parsing JSON grammars * and generating (engine-specific) grammar definitions, that are then compiled * by a grammer-engine into an executable grammar. * * The BaseGenerator is used/extended in the specific * <code>*Generator.compileGrammar()</code> implementations * * @class * @constant * @public * @name BaseGenerator * @memberOf mmir.env.grammar * @hideconstructor * * @param {mmir.tools.Logger} logger the logger instance of the specific engine/generator * @param {String} engineId the engine's ID, e.g "jscc" * * @see mmir.env.grammar.JsccGenerator * @see mmir.env.grammar.JisonGenerator * @see mmir.env.grammar.PegJsGenerator */ return function BaseGenerator(logger, engineId) { var gen = { engineId: engineId, //TO impl. by sub-classes: /** * engine's syntax: separator for phrases (= rule) * * @memberOf BaseGenerator * @example * entryRule: rule1 | rule2 | rule3 ... * // -> "|" */ phrase_separator: "|", /** * engine's syntax: (internal) variable for accessing the matched phrase (= rule) * * @memberOf BaseGenerator * @example * rule: TOKEN1 rule2 TOKEN3 [* %% = ... * // ->"%%" */ phrase_match_var: "%%", /** * engine's syntax: * generate the utterance (= rule) declaration/heading according to the engines * * @param {String} utteranceName * the name of the utterance that will be declared * @returns {String} the utterance (=rule) declaration heading * * @memberOf BaseGenerator * @example * rule1: TOKEN1 rule2 TOKEN3 * // for utteranceName "rule1" -> "rule1: " */ toUtteranceDeclarationHead: function(utteranceName){ throw Error('not implemented');},//impl. abstract /** * engine's syntax: * generate the utterance's phrase-definition incl. the processing for the phrases' semantic interpretation * * NOTE: * some implementations may include the phrase definition in semanticInterpretation * e.g. in case the phrase's tokens/utterances need to be referenced by local variables for processing the * semantic interpretation.<br/> * In these cases, <code>toUtteranceDeclarationPhrase(..)</code> may only return * <code>semanticInterpretation</code>, since it already includes <code>phrase</code>. * * @param {String} phrase * the phrase definition * @param {String} semanticInterpretation * the code for processing the phrases' semantic interpretation * @returns {String} the (concatenated) phrase-definition and processing for the phrases' semantic interpretation * * @memberOf BaseGenerator * @example * return phrase + semanticInterpretation; */ toUtteranceDeclarationPhrase: function(phrase, semanticInterpretation){ throw Error('not implemented');},//impl. abstract /** * engine's syntax: * for generating custom phrase-strings, e.g. adding local variables for referencing tokens/utterances in the phrases * (and then processing them in the semantic interpretation code). * * Implementation may do nothing, if no custom phrase string is required for generating the engine's grammar definition * * @param {Number} i * current index of the phrase * @param {Array<String>} phraseList * the phrase list * @param {Array<String>} phraseBuffer * the buffer for generated phrases (will be concated after processing all phrases) * * @memberOf BaseGenerator * @see #toUtteranceDeclarationPhrase * @example * //simple example (jison): * phraseBuffer.push(" " + phraseList[i]); * * //more intricate example (pegjs): * if(i > 0){ * phraseBuffer.push(" " + this._WHITESPACE_TOKEN_NAME + " "); * } * phraseBuffer.push(this._PARTIAL_MATCH_PREFIX + (i+1) + ":" + phraseList[i]); */ addPhraseMatchForInterpretion: function(i, phraseList, phraseBuffer){ throw Error('not implemented');},//impl. abstract /** * engine's syntax: * for generating the semantic-interpretation processing code for a phrase: * 1. store current phrase match into (local/temporary) variable {@link #temp_phrase_match_var} * 2. add the matched phrase (var) to array of field <code>.phrases</code> in {@link #tempPhrasesVarName} * * @param {Number} i * current index of the phrase * @param {String} tempPhrasesVarName * name of the current utterance (to which the current phrase belongs) * @param {Array<String>} phraseList * the phrase list * @param {Array<String>} semanticProcBuffer * the buffer for generated semantic-interpretation processing code (will be concated after processing all phrases) * * @memberOf BaseGenerator * @see #toUtteranceDeclarationPhrase * @example * var code = this.temp_phrase_match_var + " = " + this._PARTIAL_MATCH_PREFIX + (i+1) + ";" * + tempPhrasesVar + "['phrases'].push(" + this.temp_phrase_match_var + ");\n\t\t"; * semanticProcBuffer.push(code); */ addPartialPhraseInterpretion: function(i, tempPhrasesVarName, phraseList, semanticProcBuffer){ throw Error('not implemented');},//impl. abstract /** * engine's syntax: * for generating the complete phrase string incl. its semantic interpretation processing code * * @param {String} phraseMatchStr * the (custom) phrase string as generated by <code>addPhraseMatchForInterpretion(..)</code> * (may be empty if no custom phrase string was generated) * @param {String} pharseMatchResult * the phrase matching definition (internally generated) * @param {String} semanticProcResult * the (custom) phrase string as generated by <code>addPartialPhraseInterpretion(..)</code> * @returns {String} the complete phrase string incl. its semantic interpretation processing code * * @see #temp_phrase_match_var * @see #addPhraseMatchForInterpretion * @see #addPartialPhraseInterpretion * @see #toUtteranceDeclarationPhrase * * @example * return phraseMatchStr + " %{\n\t " + pharseMatchResult + "; " + semanticProcResult + "; \n\t%} "; */ toPhraseInterpretion: function(phraseMatchStr, pharseMatchResult, semanticProcResult){ throw Error('not implemented');},//impl. abstract /** * NOTE: default implementation (may be overridden if needed) * * engine's syntax: * the code for specifying the return-value of a phrase (= rule) match * * @param {String} pharseMatchResultDef * the phrase match code * @return {String} code for getting index of match within the input string * * @see #phrase_match_var * @example * //example 1 (default implementation): * return this.phrase_match_var + " = {" + pharseMatchResultDef + "}"; * //example 2 (pegjs): * return "var " + this.phrase_match_var + " = {" + pharseMatchResultDef + "}"; */ toPhraseMatchResultForInterpretion: function(pharseMatchResultDef){ return this.phrase_match_var + " = {" + pharseMatchResultDef + "}"; }, /** * NOTE: default implementation (may be overridden if needed) * * engine's syntax: * code for retrieving the index/location/offset of match within the input string * * @return {String} code for getting index of match within the input string * * @see #helper_func_index * @example * // default implementation: * return "_index("+ this._PARTIAL_MATCH_PREFIX +"1)"; */ getPhraseMatchIndex: function(){ return "_index("+ this._PARTIAL_MATCH_PREFIX +"1)"; }, //common grammar parsing & generation functions /** * entry point for parsing JSON-grammar's utterances: * generates (engine-specific) rules for the utterances, their phrases, * and the corresponding semantic interpretation processing code. */ parseUtterances: function(){ var self = this; var utt_index = 0; var json_utterances = this.json_grammar_definition.utterances; for(var utterance_name in json_utterances){ var utterance_def = json_utterances[utterance_name]; if(utt_index > 0){ self.grammar_phrases += '\n\t' + self.phrase_separator; } utt_index++; self.doParseUtterance(utterance_name, utterance_def); } }, /** * generates the (engine-specific) rule-body for one utterances, their phrases, * and the corresponding semantic interpretation processing code. * * SIDE EFFECTS * * appends a "token variable" declaration code for the utterance to {@link #token_variables} * * appends generated rule-body for the utterance to {@link #grammar_utterances} * * @param {String} utterance_name the utterance name * @param {UtteranceJson} utterance_def the JSON definition for the utterance */ doParseUtterance: function(utterance_name, utterance_def){ var self = this; self.token_variables += " var " + self.variable_prefix + utterance_name.toLowerCase() + " = [];\n"; var grammar_utterance = self.toUtteranceDeclarationHead(utterance_name);//impl. abstract self.grammar_phrases += utterance_name + " " ; var phrases = utterance_def.phrases; var vars = {}; var semantic = self.doCreateSemanticInterpretationForUtterance(utterance_name, utterance_def, vars); for(var index=0,size=phrases.length; index < size; ++index){ if(index > 0){ grammar_utterance += '\n ' + self.phrase_separator; } var phrase = phrases[index]; var semantic_interpretation = self.doCreateSemanticInterpretationForPhrase( utterance_name.toLowerCase(), phrase, semantic, vars ); grammar_utterance += self.toUtteranceDeclarationPhrase(phrase, semantic_interpretation);//impl. abstract } self.grammar_utterances += grammar_utterance + ";\n\n"; }, /** * generates the (engine-specific) semantic interpretation processing code for an utterance. * * @param {String} utterance_name the utterance name * @param {UtteranceJson} utterance_def the JSON definition for the utterance * @param {Object} var_map INOUT variable that will contain the variable names that were referenced/used in the semantic definition: * if variable index has an index-reference, it will be included as string, otherwise the entry is set to <code>true</code>, i.e. * <pre>{[varName: string]: true | string}</pre> * * @returns {String} the code for the utterance's semantic interpretation processing */ doCreateSemanticInterpretationForUtterance: function(utterance_name, utterance_def, var_map){ var semantic = utterance_def.semantic, variable_index, variable_name; if(logger.isDebug()) logger.debug('doCreateSemanticInterpretationForUtterance: '+semantic);//debug var semantic_as_string = JSON.stringify(semantic); if(semantic_as_string){ this.variable_regexp.lastIndex = 0; var variables = this.variable_regexp.exec(semantic_as_string); while (variables != null) { var variable = variables[1], remapped_variable_name = ""; if(logger.isDebug()) logger.debug("variables " + variable, semantic_as_string);//debug variable_index = /\[(\d+)\]/.exec(variable); variable_name = new RegExp('_\\$([a-zA-Z_][a-zA-Z0-9_\\-]*)').exec(variable)[1]; // variableObj = /_\$([a-zA-Z_][a-zA-Z0-9_\-]*)(\[(\d+)\])?(\["semantic"\]|\['semantic'\]|\.semantic)?/.exec(variable); // variableObj = /_\$([a-zA-Z_][a-zA-Z0-9_\-]*)(\[(\d+)\])?((\[(("(.*?[^\\])")|('(.*?[^\\])'))\])|(\.(\w+)))?/.exec(variable); //"_$NAME[INDEX]['FIELD']": _$NAME [ INDEX ] [" FIELD "] | [' FIELD '] | .FIELD if (variable_index == null) { remapped_variable_name = "return " + variable; } else { //TODO replace try/catch with safe_acc function // PROBLEM: currently, the format for variable-access is not well defined // -> in case of accessing the "semantic" field for a variable reference of another Utterance // we would need another safe_acc call // ... i.e. need to parse expression for this, but since the format is not well defined // we cannot say, for what exactly we should parse... // NORMAL VAR EXPR: _$a_normal_token[0] // ACCESS TO SEMANTICS: _$other_utterance[0]['semantic'] // but this could also be expressed e.g. as _$other_utterance[0].semantic // ... // remapped_variable_name = variable.replace( // '[' + variable_index[1] + ']' // , "[safe_acc(" // + utterance_name.toLowerCase() + "_temp, 'phrases', '" // + variable_name.toLowerCase() + "', " // + variable_index[1] // + ")]" // ); remapped_variable_name = "var res = _getTok("+utterance_name.toLowerCase() + "_temp['phrases'],'" + variable_name.toLowerCase() + "', " + variable_index[1]+");" + " return typeof res === 'string'? res : (typeof res === 'object' && res? "+variable+" : void(0))"; } semantic_as_string = semantic_as_string.replace( variables[0], //TODO replace try/catch with safe_acc function " function(){try{ " + remapped_variable_name + ";} catch(e){return void(0);}}() " ); var_map[variable_name.toLowerCase()] = variable_index? variable_index[1] : true; variables = this.variable_regexp.exec(semantic_as_string); } } return semantic_as_string; }, /** * generates the (engine-specific) semantic interpretation processing code for a phrase (of an utterance). * * @param {String} utterance_name the utterance name * @param {String} phrase the phrase definition * @param {String} semantic_as_string the result of {@link #doCreateSemanticInterpretationForUtterance} for the utterance * @param {Object} var_map the map of variable names that are used in the semantic definition, as processed by {@link #doCreateSemanticInterpretationForUtterance} * * @returns {String} the code for the phrases's semantic interpretation processing */ doCreateSemanticInterpretationForPhrase: function(utterance_name, phrase, semantic_as_string, var_map){ var phraseList = phrase.split(/\s+/), length = phraseList.length; var phraseBuffer = []; var semanticProcBuffer = []; var tempPhrasesVarName = utterance_name + "_temp"; for (var i = 0; i < length; ++i) { // //create STRING for phrase-matching // if(i > 0){ // phraseStr += " " + this._WHITESPACE_TOKEN_NAME + " "; // } // phraseStr += this._PARTIAL_MATCH_PREFIX + num + ":" + phraseList[i]; this.addPhraseMatchForInterpretion(i, phraseList, phraseBuffer);//impl. abstract //create STR for semantic processing of phrase // semanticProcResult += this.temp_phrase_match_var" = " + this._PARTIAL_MATCH_PREFIX + num + ";" // + utterance_name + "_temp['phrases'].push("+this.temp_phrase_match_var+");\n\t\t"; this.addPartialPhraseInterpretion(i, tempPhrasesVarName, phraseList, semanticProcBuffer);//impl. abstract } var pharseMatchResultDef = this.entry_index_field + ": " + this.getPhraseMatchIndex() + "," + this.entry_type_field + ": '" + utterance_name + "'," + this.entry_token_field + ": null"; // "var _m = {" + pharseMatchResult += "}"; pharseMatchResult = this.toPhraseMatchResultForInterpretion(pharseMatchResultDef);//impl. abstract var semanticProcResult = "var "+utterance_name+"_temp = {}, "+this.temp_phrase_match_var+"; "+utterance_name+"_temp['phrases'] = [];" + (semanticProcBuffer.length > 0? semanticProcBuffer.join('') : '') + this.phrase_match_var + "." + this.entry_token_field + " = " + utterance_name + "_temp['phrases'];" + utterance_name + "_temp['phrase']=_tokenList("+utterance_name + "_temp['phrases']).join(' ');" + (var_map.phrase === true? "var " + this.variable_prefix + "phrase=" + utterance_name + "_temp['phrase'];" : "")//include phrase-string in _$phrase, if it was "requested" in + utterance_name + "_temp['utterance']='" + utterance_name + "'; " + utterance_name + "_temp['engine']='" + this.engineId + "'; " + utterance_name + "_temp['semantic'] = " + semantic_as_string + "; " + this.variable_prefix + utterance_name + ".push(" + utterance_name + "_temp); " + this.variable_prefix + "result = " + utterance_name + "_temp"; // return phraseStr + " {\n\t " + pharseMatchResult + "; " + semanticProcResult + "; return _m; \n\t} "; return this.toPhraseInterpretion(phraseBuffer.length > 0? phraseBuffer.join('') : '', pharseMatchResult, semanticProcResult);//impl. abstract }, //NOTE: moved from GrammarConverter variable_prefix: "_$", variable_regexp: /"(_\$[^\"]*)"/igm, temp_phrase_match_var: "tempMatch", /** NOTE: must consist of ASCI "word chars", i.e. not whitespaces, numbers etc.*/ entry_token_field: "tok", /** NOTE: must consist of ASCI "word chars", i.e. not whitespaces, numbers etc.*/ entry_index_field: "i", /** NOTE: must consist of ASCI "word chars", i.e. not whitespaces, numbers etc.*/ entry_type_field: "type", /** * * @param {String} instanceId the grammar's instance ID * @param {GrammarEngineCompileOptions} defaultOptions the default options for the grammar engine, * fields depend on the specific grammar engine with these standard fields: * execMode: 'sync' | 'async' | default: sync * genSourceUrl: true | STRING: the sourceURL for eval'ed parser-module | default: FALSY * @param {GrammarEngineCompileOptions} configOptions the configuration options for the grammar engine, e.g. retrieve using * <pre>var configOptions = configManager.get(pluginName, {})</pre> * @param {Number|GrammarCompileOption} fileFormatVersionOrOptions * the version of the file format (this is a constant within {@link mmir.SemanticInterpreter#getFileVersion},<br> * or an options object with<br> * options.fileVersion: {Number} the file fomat version<br> * options.strict: {Boolean} OPTIONAL for enabling/disabling JavaScript strict mode for generated grammar code (DEFAULT: true)<br> * * @return options object with the grammar engine's compile options (specific to the engine) and general compile options: * options.fileVersion: {Number} the file fomat version * options.strict: {Boolean} OPTIONAL for enabling/disabling JavaScript strict mode for generated grammar code (DEFAULT: true) * options.execMode: {'sync' | 'async'} OPTIONAL the default execution for the generated grammar (DEFAULT: 'sync') * options.genSourceUrl: {FALSY | true | String}: the sourceURL for eval'ed parser-module, if FALSY, the sourceURL will be omitted in the generated code (DEFAULT: FALSY) */ toGrammarCompileOptions: function(instanceId, defaultOptions, configOptions, fileFormatVersionOrOptions){ var compileOptions = fileFormatVersionOrOptions; if(typeof fileFormatVersionOrOptions === 'number'){ compileOptions = {fileVersion: fileFormatVersionOrOptions}; } else if(!fileFormatVersionOrOptions){ compileOptions = {fileVersion: 0}; } //combine with default default options: return extend({id: instanceId}, defaultOptions, configOptions, compileOptions); }, /** * Get code-prefix for wrapping generated, executable grammars. * * @param {Number} fileFormatVersion * the file format (see {@link mmir.SemanticInterpreter#getFileVersion}) * @param {String} execMode * the execution mode for the generated grammar: 'sync' | 'async' * @param {Boolean} [disableStrictMode] OPTIONAL disable JavaScript strict mode in the generated grammar code * * @returns {String} the prefix code for generated grammars (i.e. prepend to generated grammar code) * * @see mmir.tools.CodeGenUtils#getCodeWrapPrefix */ getCodeWrapPrefix: function(fileFormatVersion, execMode, disableStrictMode){ return codeGenUtils.getCodeWrapPrefix(disableStrictMode) + 'var semanticInterpreter = require("mmirf/semanticInterpreter");\n'+ 'var options = {fileFormat:'+fileFormatVersion+',execMode:'+JSON.stringify(execMode)+'};\n'; }, /** * Get code-suffix for wrapping generated, executable grammars. * * @param {Array<string>} encodedStopwords * the list of encoded stopwords (see {@link #getEncodedStopwords}) * @param {String} grammarFuncName * the (variable's) name of the grammar function that was generated * (and will be used in {@link #executeGrammar}) * @param {String} grammarId * the ID for the grammar (e.g. language code) with which the grammar * will be registered with SemanticInterpreter (see {@link mmir.SemanticInterpreter#addGrammar}) * * @returns {String} the suffix code for generated grammars (i.e. append to generated grammar code) * * @see mmir.tools.CodeGenUtils#getCodeWrapSuffix */ getCodeWrapSuffix: function(encodedStopwords, grammarFuncName, grammarId){ return '\noptions.stopwords=' + //store stopwords with their Unicode representation (only for non-ASCII chars) JSON.stringify(encodedStopwords).replace(/\\\\u/gm,'\\u') +//<- revert JSON.stringify encoding for the Unicodes ';\n' + //add "self registering" for the grammar-function // i.e. register the grammar-function for the ID with the SemanticInterpreter 'semanticInterpreter.addGrammar("' + grammarId + '", ' + grammarFuncName + ', options);\n\n' + 'return ' + grammarFuncName + ';' + codeGenUtils.getCodeWrapSuffix(); }, //NOTE need access to var-defs of instance -> set these later (see below) /** code for HELPER (match) -> string: flatten arrays and return as concatenated string */ helper_func_flatten: null, /** code for HELPER (match) -> number: get the index/offset (~ location) of a match within the input string */ helper_func_index: null, /** code for HELPER (obj) -> boolean: check if obj is an array (NOTE: required by helper_func_tok) */ helper_func_isarray: null, /** code for HELPER (field, match) -> string: get the token/string-representation for a match, and add it to the field variable */ helper_func_tok: null, /** code for HELPER (match, list) -> string[]: add match (recursively) to token-list entry in list */ helper_func_tokenList: null, /** code for HELPER (phrases, type, index) -> string: get token for type at index from phrases-list */ helper_func_getTok: null, }; gen.helper_func_flatten = " var _flatten = function(match){ if(!match.join){ return match;} for(var i=0, size = match.length; i < size; ++i){if(!match[i]){continue;}if(match[i].join){match[i] = _flatten(match[i])}} return match.join('') };\n"; gen.helper_func_index = " var _index = function(match){return match."+gen.entry_index_field+"};\n"; gen.helper_func_isarray = " var _isarray = function(obj){return Object.prototype.toString.call(obj)==='[object Array]';};\n"; gen.helper_func_tok = " var _tok = function(field, match){match = _flatten(match); _isarray(field)? field.push(match) : field[match] = match; return match;};\n"; gen.helper_func_tokenList = " var _tokenList = function(match, list) {list = list || [];var size = match.length, t;for (var i = 0; i < size; ++i) {t = match[i];if (!t) {continue;}if (t."+gen.entry_token_field+".join) {_tokenList(t."+gen.entry_token_field+", list);} else {list.push(t."+gen.entry_token_field+");}}return list;};\n"; gen.helper_func_getTok = " var _getTok = function(phrases, type, index) {var count = 0, p;for(var i=0, size = phrases.length; i < size; ++i){p = phrases[i];if(p."+gen.entry_type_field+" === type){if(index === count++){return typeof p."+gen.entry_token_field+" === 'string'? p."+gen.entry_token_field+" : p;}}}};\n"; return gen; } });