mmir-lib
Version:
MMIR (Mobile Multimodal Interaction and Relay) library
697 lines (605 loc) • 23.2 kB
JavaScript
define(['mmirf/jscc','mmirf/resources','mmirf/configurationManager','mmirf/grammarConverter', 'mmirf/baseGen','mmirf/util/deferred','mmirf/util/extend','mmirf/util/toArray','mmirf/util/loadFile','mmirf/logger', 'module'],
/**
* Generator for executable language-grammars (i.e. converted JSON grammars).
*
* <p>
* This generator uses JS/CC for compiling the JSON grammar.
*
* <p>
* The generator for compiling the JSON grammar definitions in <code>www/config/languages/<language code>/grammar.json</code>
* can be configured in <code>www/config/configuration.json</code>:<br>
* <pre>
* {
* ...
* "grammarCompiler": "jscc",
* ...
* }</pre>
*
* <em>NOTE: this is the default grammar engine (if there is no setting in
* <code>configuration.json</code>, then this engine will be used)
* </em>
*
* <p>
* JS/CC supports grammar generation for:
* LALR(1)
*
* (in difference to jison, JS/CC supports backtracking to certain extend)
*
* @see <a href="http://jscc.phorward-software.com/">http://jscc.phorward-software.com/</a>
*
* @class
* @constant
* @public
* @name JsccGenerator
* @memberOf mmir.env.grammar
* @hideconstructor
*
* @requires JS/CC
*/
function(jscc, resources, configManager, GrammarConverter, BaseGenerator, deferred, extend, toArray, loadFile, Logger, module){
////////////////////////////////////// template loading / setup for JS/CC generator ////////////////////////////////
/**
* Deferred object that will be returned - for async-initialization:
* the deferred object will be resolved, when this module has been initialized.
*
* @private
* @type Deferred
* @memberOf JsccGenerator#
*
* @see #_loadTemplate
*/
var deferred = deferred();
/**
* The Logger for the JS/CC generator.
*
* @private
* @type mmir.tools.Logger
* @memberOf JsccGenerator#
*
* @see mmir.Logging
*/
var logger = Logger.create(module);
//setup logger for compile-messages
/**
* HELPER for creating default logging / error-print functions
*
* @function
* @private
* @memberOf JsccGenerator#
*
* @see mmir.Logging
*/
function _createCompileLogFunc(log /*Logger*/, level /*String: log-function-name*/, toArray/*helper-lib: provides function makeArray(obj); e.g. jquery*/){
return function(){
var args = toArray(arguments);
//prepend "location-information" to logger-call:
args.unshift('JS/CC', 'compile');
//output log-message:
log[level].apply(log, args);
};
}
/**
* The URL to the JS/CC template file (generated code-text will be "embedded" in this template)
*
* @private
* @type String
* @memberOf JsccGenerator#
*/
var templatePath = resources.getGrammarPluginPath() + 'grammarTemplate_reduced.tpl';
/**
* The default options for the JS/CC compiler.
*
* To overwrite the default options, configure the following property in <code>www/config/configuration.json</code>:<br>
* <pre>
* {
* ...
* "grammar": {
* ...
* "jscc": {
* "execMode": "your configuration setting!"
* }
* ...
* },
* ...
* }</pre>
*
* Valid settings are:
* <code>execMode = 'sync' | 'async'</code>
* <code>genSourceUrl = true | STRING | FALSY'</code>
*
*
* genSourceUrl: if TRUTHY, the sourceUrl for eval'ed parser-module is set
* (i.e. eval'ed code will appear at the URL in debugger, if browser supports sourceURL setting)
* if true: the sourceUrl will be generated using the grammar's ID
* if STRING: the string will be used as sourceUrl; if "<id>" is contained, it will be replaced by the grammar's ID
*
* @constant
* @private
* @default targetType := jscc.MODE_GEN_JS, execMode := sync, genSourceUrl := FALSY
* @memberOf JsccGenerator#
*/
var DEFAULT_OPTIONS = {
targetType: jscc.MODE_GEN_JS,
execMode: 'sync',//'sync' | 'async' | default: sync
genSourceUrl: '',// true | STRING: the sourceURL for eval'ed parser-module | default: FALSY
};
/**
* the ID for the grammar engine
* @constant
* @private
* @memberOf JsccGenerator#
*/
var engineId = 'jscc';
/**
* Name for this plugin/grammar-generator (e.g. used for looking up configuration values in configuration.json).
* @constant
* @private
* @memberOf JsccGenerator#
*/
var pluginName = 'grammar.'+engineId;
/**
* instance of a BaseGenerator (provides common resources for generating grammar definitions).
* @constant
* @private
* @type BaseGenerator
* @memberOf JsccGenerator#
*/
var baseGenerator = new BaseGenerator(logger, engineId);
/**
* Exported (public) functions for the JS/CC grammar-engine.
* @public
* @type GrammarGenerator
* @memberOf JsccGenerator#
*/
var jsccGen = {
/** @scope JsccGenerator.prototype */
/**
* The name/ID for the compile engine for the JS/CC compiler
*
* @memberOf mmir.env.grammar.JsccGenerator.prototype
*/
engineId: engineId,
/**
* @param {Function} [callback] OPTIONAL
* the callback that is triggered, when the engine is initialized
* @returns {Deferred}
* a promise that is resolved, when the engine is initialized
* (NOTE: if you use the same function for the <code>callback</code> AND the promise,
* then the function will be invoked twice!)
*
* @memberOf mmir.env.grammar.JsccGenerator.prototype
*/
init: function(callback){
if(callback){
deferred.then(callback, callback);
}
return deferred;
},
/** @returns {Boolean} if this engine compilation works asynchronously. The current implementation works synchronously (returns FALSE) */
isAsyncCompilation: function(){ return false; },
/**
* The function for compiling a JSON grammar:
*
*
* @param {mmir.grammar.GrammarConverter} theConverterInstance
* @param {String} instanceId
* the ID for the compiled grammar (usually this is a language code)
* @param {Number|GrammarCompileOption} fileFormatVersionOrOptions
* the version of the file format (this is a constant within {@link mmir.SemanticInterpreter},
* or an compile options object, see {@link mmir.env.grammar.BaseGenerator#toGrammarCompileOptions}
* @param callback
* @returns {mmir.grammar.GrammarConverter}
* the grammar instance with attached with the compiled function for executing the
* grammar to the instance's {@link GrammarConvert#executeGrammar} property/function.
*/
compileGrammar: function(theConverterInstance, instanceId, fileFormatVersionOrOptions, callback){
//attach functions for JS/CC conversion/generation to the converter-instance:
extend(theConverterInstance, baseGenerator, JsccGrammarConverterExt);
//attach the JS/CC template to the converter instance
theConverterInstance.TEMPLATE = this.template;
//start conversion: create grammar in JS/CC syntax (from the JSON definition):
theConverterInstance.init();
this._preparePrintError();
theConverterInstance.convertJSONGrammar();
var grammarDefinition = theConverterInstance.getGrammarDef();
//load options from configuration:
var config = configManager.get(pluginName, {});
//combine with default default options:
var options = baseGenerator.toGrammarCompileOptions(instanceId, DEFAULT_OPTIONS, config, fileFormatVersionOrOptions);
var compileParserModule = function(grammarParserStr, hasError){
var addGrammarParserExec = theConverterInstance.getCodeWrapPrefix(options.fileVersion, JSON.stringify(options.execMode), !options.strict)
+ 'var grammarFunc = function(nl_input_text, options){\n\n'
+ 'function _printLog(){console.log.apply(console, arguments);};\n'
+ 'function _noopFunc(){};\n'
+ grammarParserStr
+ '\n};\n'
+ theConverterInstance.getCodeWrapSuffix(theConverterInstance.getEncodedStopwords(), 'grammarFunc', instanceId);
if(options.genSourceUrl){
var sourceUrlStr;
if(options.genSourceUrl === true){
sourceUrlStr = 'gen/grammar/_compiled_grammar_'+instanceId;
} else {
sourceUrlStr = options.genSourceUrl.toString().replace(/<id>/g,instanceId);
}
//for Chrome / FireFox debugging: provide an URL for eval'ed code
addGrammarParserExec += '//@ sourceURL='+sourceUrlStr+'\n'
+'//# sourceURL='+sourceUrlStr+'\n';
}
theConverterInstance.setGrammarSource(addGrammarParserExec);
try{
eval(addGrammarParserExec);
} catch (err) {
//TODO russa: generate meaningful error message with details about error location
// eg. use esprima (http://esprima.org) ...?
// ... as optional dependency (see deferred initialization above?)
var evalMsg = 'Error during eval() for "'+ instanceId +'": ' + err + ', source code:\n'+addGrammarParserExec+'\n\ntemplate code:\n'+theConverterInstance.TEMPLATE;
if(jscc.get_printError()){
jscc.get_printError()(evalMsg);
}
else {
logger.error('JS/CC', 'evalCompiled', evalMsg, err);
}
if(! hasError){
evalMsg = '[INVALID GRAMMAR JavaScript CODE] ' + evalMsg;
var parseDummyFunc = (function(msg, error){
return function(){ console.error(msg); console.error(error); throw msg;};
})(evalMsg, err);
parseDummyFunc.hasErrors = true;
theConverterInstance.setGrammarFunction(parseDummyFunc);
}
}
//invoke callback if present:
if(callback){
callback(theConverterInstance);
}
};
var isPreventDefault = this._afterCompileParser(compileParserModule, callback);
var result = this._compileParser(grammarDefinition, options, isPreventDefault);
if(!isPreventDefault){
var hasError = result.hasError;
compileParserModule(result.def, hasError);
}
return theConverterInstance;
},
/**
* @protected
*/
_compileParser: function(grammarDefinition, options, afterCompileParserResult){
//set up the JS/CC compiler:
var dfa_table = '';
jscc.reset_all( jscc.EXEC_WEB );
jscc.set_debug(/*show errors: */true, /*show warnings: */true, /*hide trace: */false);
var hasError = false;
var grammarParser;
try {
jscc.parse_grammar(grammarDefinition, 'grammar ' + options.id);
} catch (err){
var msg = 'Error while compiling grammar: ' + (err.stack?err.stack:err);
hasError = msg;
msg = '[INVALID GRAMMAR] ' + msg
+ '\n-----------------------------\n Grammar Definition:\n-----------------------------\n'
+ grammarDefinition;
grammarParser = 'var msg = '+JSON.stringify(msg)+'; console.error(msg); return {error: msg, phrase: nl_input_text, engine: "jscc"};';
}
var errorCount = jscc.getErrors();
var warnCount = jscc.getWarnings();
if (errorCount == 0) {
jscc.undef();
jscc.unreachable();
errorCount = jscc.getErrors();
if (errorCount == 0) {
jscc.first();
jscc.print_symbols();
dfa_table = jscc.create_subset(jscc.get_nfa_states());
dfa_table = jscc.minimize_dfa(dfa_table);
jscc.set_dfa_table(dfa_table);//FIXME: check, if this is really necessary
jscc.lalr1_parse_table(false);
}
}
if (errorCount > 0 || warnCount > 0){
logger.error(
'JSCC', 'compile', 'there occured'
+ (warnCount > 0? warnCount + ' warning(s)' : '')
+ (errorCount > 0? errorCount + ' error(s)' : '')
+ ' during compilation.'
);
hasError = errorCount > 0;
}
// console.debug("before replace " + theConverterInstance.PARSER_TEMPLATE);//debug
if( ! hasError){
var genData = this._getGenerated(options.targetType, dfa_table);
grammarParser = this._applyGenerated(genData, this.template);
} else {
var msg = 'Error'+(errorCount === 1? '': 's')+' parsing grammar ('+errorCount+'):\n ' + jscc.getErrorMessages().join('\n ');
if(warnCount > 0){
msg += '\nWarning'+(warnCount === 1? '': 's')+' parsing grammar('+warnCount+'):\n ' + jscc.getWarningMessages().join('\n ');
}
hasError = msg;
msg = '[INVALID GRAMMAR] ' + msg
+ '\n-----------------------------\n Grammar Definition:\n-----------------------------\n'
+ grammarDefinition;
grammarParser = 'var msg = '+JSON.stringify(msg)+'; console.error(msg); return {error: msg, phrase: nl_input_text, engine: "jscc"};';
}
jscc.resetErrors();
jscc.resetWarnings();
return {def: grammarParser, hasError: hasError};
},
/**
* @protected
*/
_preparePrintError: function(){
//only set print-message function, if it is not already set
// (i.e. if JS/CC still has its default print function set)
if(jscc.is_printError_default()){
/**
* The default logging function for printing compilation errors.
* @private
* @name set_printError
* @function
* @memberOf JsccGenerator.jscc#
*/
jscc.set_printError( _createCompileLogFunc(logger, 'error', toArray));
}
if(jscc.is_printWarning_default()){
/**
* The default logging function for printing compilation warnings.
* @private
* @name set_printWarning
* @function
* @memberOf JsccGenerator.jscc#
*/
jscc.set_printWarning( _createCompileLogFunc(logger, 'warn', toArray));
}
if(jscc.is_printInfo_default()){
/**
* The default logging function for printing compilation information.
* @private
* @name set_printInfo
* @function
* @memberOf JsccGenerator.jscc#
*/
jscc.set_printInfo( _createCompileLogFunc(logger, 'info', toArray));
}
},
/**
* The default logging / error-print function for JS/CC.
*
* @protected
*
* @see mmir.Logging
*/
printError: function(){
var errorFunc = jscc.get_printError();
if(errorFunc){
errorFunc.apply(jscc, arguments);
} else {
var args = toArray(arguments);
//prepend "location-information" to logger-call:
args.unshift('JS/CC', 'compile');
//output log-message:
logger.error.apply(logger, args);
}
},
/**
* Optional hook for pre-processing the generated parser, after the parser is generated.
*
* By default, this function returns VOID, in which case the parser-module is created by default.
*
* If a function is returned instead, then it must invoke <code>compileParserModuleFunc</code>:
* <code>compileParserModuleFunc(compiledParser : STRING, hasErrors : BOOLEAN)</code>
*
*
* @param {Function} compileParserModuleFunc
* the function that generates the parser-module:
* <code>compileParserModuleFunc(compiledParser : STRING, hasErrors : BOOLEAN)</code>
*
* @param {Function} compileCallbackFunc
* the callback function which will be invoked by compileParserModuleFunc, after it has finished.
* If compileParserModuleFunc() is prevented from exectution then the callback MUST be invoked manually
* <code>compileCallbackFunc(theConverterInstance: GrammarConverter)</code>
*
* @returns {TRUTHY|VOID}
* FALSY for the default behavior.
* IF a TRUTHY value is returned, then the default action after compiling the parser
* is not executed:
* i.e. compileParserModuleFunc is not automatically called and in consequence the callback is not invoked
*
*
* NOTE: if not FALSY, then either compileParserModuleFunc() must be invoked, or the callback() must be invoked!
*
* @protected
*/
_afterCompileParser: function(compileParserModuleFunc, compileCallbackFunc){
//default: return VOID
return;
},
/**
* @protected
*/
_getGenerated: function(genMode, dfaTable){
return {
head: jscc.get_code_head(),
tables: jscc.print_parse_tables(genMode),//jscc.MODE_GEN_JS
dfa: jscc.print_dfa_table(dfaTable),//dfa_table
terminals: jscc.print_term_actions(),
labels: jscc.print_symbol_labels(),
actions: jscc.print_actions(),
error: jscc.get_error_symbol_id(),
eof: jscc.get_eof_symbol_id(),
whitespace: jscc.get_whitespace_symbol_id()
};
},
/**
* @protected
*/
_applyGenerated: function(genData, template){
var grammarParserStr = template;
grammarParserStr = grammarParserStr.replace(/##PREFIX##/g, "");
grammarParserStr = grammarParserStr.replace(/##HEADER##/g, genData.head);
grammarParserStr = grammarParserStr.replace(/##TABLES##/g, genData.tables);
grammarParserStr = grammarParserStr.replace(/##DFA##/g, genData.dfa);
grammarParserStr = grammarParserStr.replace(/##TERMINAL_ACTIONS##/g, genData.terminals);
grammarParserStr = grammarParserStr.replace(/##LABELS##/g, genData.labels);
grammarParserStr = grammarParserStr.replace(/##ACTIONS##/g, genData.actions);
grammarParserStr = grammarParserStr.replace(/##FOOTER##/g,
"\n_dbg_withtrace = options && !!options.trace;\n"
// +"options = options || {debug: true, trace: function(msg){window.alert(msg)}};\n" //TEST
+ "var _logDebug = options && !!options.debug? _printLog : _noopFunc;\n"
+ "var alert = options && options.trace? typeof options.trace === 'function'? options.trace : _printLog : _noopFunc;\n"
+ "var _semanticAnnotationResult = {result: {phrase: nl_input_text, engine: 'jscc'}};\n"
+ "__parse(nl_input_text, [], [], _semanticAnnotationResult);\n"
+ "return _semanticAnnotationResult.result;"
);
grammarParserStr = grammarParserStr.replace(/##ERROR##/g, genData.error);
grammarParserStr = grammarParserStr.replace(/##EOF##/g, genData.eof);
grammarParserStr = grammarParserStr.replace(/##WHITESPACE##/g, genData.whitespace);
return grammarParserStr;
}
};
if(typeof WEBPACK_BUILD !== 'undefined' && WEBPACK_BUILD){
jsccGen.template = require('./grammarTemplate_reduced.tpl');
deferred.resolve();
} else {
/**
* Initializes the grammar-engine:
*
* loads the template and resolves the engine as initialzed.
*
* @param {String} url
* URL to the template file
* @param {GrammarGenerator} jsccGenerator
* the JS/CC grammar generator instance
* @param {Deferred} promise
* the {@link #deferred} for resolving the generator
* as initialized.
*
*
* @private
* @function
* @memberOf JsccGenerator#
*/
function _loadTemplate(url, jsccGenerator, promise){
loadFile({//$.ajax({
url: url,
dataType: 'text',
async: true,
success: function(data){
jsccGenerator.template = data;
promise.resolve();
},
error: function(_xhr, status, err){
var msg = 'Failed to load grammar template file from "'+templatePath+'": '+status+', ERROR '+err;
logger.error(msg);
promise.reject(msg);
}
});
}
//load the JS/CC template and resolve this module as "initialzed":
_loadTemplate(templatePath, jsccGen, deferred);
}
////////////////////////////////////// JS/CC specific extensions to GrammarConverter ////////////////////////////////
/**
* JS/CC specific extension / implementation for {@link mmir.grammar.GrammarConverter} instances
*
* @type mmir.grammar.GrammarConverter
* @memberOf JsccGenerator#
*/
var JsccGrammarConverterExt = {
/** @memberOf JsccGrammarConverterExt */
init: function(){
this.THE_INTERNAL_GRAMMAR_CONVERTER_INSTANCE_NAME = "theGrammarConverterInstance";
this._PARTIAL_MATCH_PREFIX = "%";
this._PARTIAL_LOCATION_PREFIX = '@';
this.grammar_tokens = "/~ --- Token definitions --- ~/\n\n/~ Characters to be ignored ~/\n! ' |\\t' ;\n\n/~ Non-associative tokens ~/\n";
this.grammar_utterances = "";
this.grammar_phrases = "phrases:";
this.token_variables = "[*\n var " + this.variable_prefix + "result = '';\n";
this.tokens_array = [];
},
convertJSONGrammar: function(){
this.json_grammar_definition = this.maskJSON(this.json_grammar_definition);
//include some helper functions
this.token_variables +=
this.helper_func_tokenList
+ this.helper_func_getTok
+ this.helper_func_index;
this.parseTokens();
this.parseUtterances();
this.parseStopWords();
this.grammar_definition = this.token_variables
+ "*]\n\n"
+ this.grammar_tokens
+ "\n\n ##\n\n/~ --- Grammar specification --- ~/\n\nutterance: phrases [* "
+ "_logDebug(" + this.variable_prefix + "result); "
+ "semanticAnnotationResult.result = "
+ this.variable_prefix + "result; *] ;\n\n" + this.grammar_utterances
+ "\n" + this.grammar_phrases + ";";
this.json_grammar_definition = this.unmaskJSON(this.json_grammar_definition);
},
parseTokens: function(){
var self = this;
var json_tokens = this.json_grammar_definition.tokens;
var pref = self.variable_prefix;
for(var token_name in json_tokens){
var words = json_tokens[token_name];
self.token_variables += " var " + pref
+ token_name.toLowerCase() + " = [];\n";
var grammar_token =" '";
for(var i=0, size = words.length; i < size ; ++i){
if(i > 0){
grammar_token += "|";
}
grammar_token += this._prepareToken(words[i]);
}
grammar_token += "' " + token_name + " [* " +
pref + token_name.toLowerCase() + ".push(%match);"
+ "%match = {"
+ this.entry_index_field + ": %offset,"
+ this.entry_type_field + ": '" + token_name.toLowerCase() + "',"
+ this.entry_token_field + ": %match"
+ "}; *];\n";
self.grammar_tokens += grammar_token;
}
},
//////////////// implementing/overriding BaseGenerator fields & functions: ////////////////////////
//impl. abstract
phrase_separator: "|",
//impl. abstract
phrase_match_var: "%%",
//impl. abstract
toUtteranceDeclarationHead: function(utteranceName){
return utteranceName + ': ';
},
//impl. abstract
toUtteranceDeclarationPhrase: function(phrase, semanticInterpretation){
// phrase + semanticInterpretation
return phrase + semanticInterpretation;
},
//impl. abstract
addPhraseMatchForInterpretion: function(_i, _phraseList, _phraseBuffer){
//do nothing!
},
//impl. abstract
addPartialPhraseInterpretion: function(i, tempPhrasesVar, _phraseList, semanticProcBuffer){
// //create STR for semantic processing of phrase
// semanticProcResult += this.temp_phrase_match_var + " = " + this._PARTIAL_MATCH_PREFIX + num + ";"
// + utterance_name + "_temp['phrases'].push(" + this.temp_phrase_match_var + ");\n\t\t";
semanticProcBuffer.push(
this.temp_phrase_match_var + " = " + this._PARTIAL_MATCH_PREFIX + (i+1) + ";"
+ tempPhrasesVar + "['phrases'].push(" + this.temp_phrase_match_var + ");\n\t\t"
);
},
// toPhraseMatchResultForInterpretion: -> use default impl.
//impl. abstract
toPhraseInterpretion: function(_phraseMatchStr, pharseMatchResult, semanticProcResult){
// return " [* " + pharseMatchResult + "; " + semanticProcResult + " *]";
return " [* " + pharseMatchResult + "; " + semanticProcResult + " *]"
},
//////////////// internal helpers: ////////////////////////
_prepareToken: function(token){
//need to mask delimiting quotes, i.e. '
return token.replace(/'/g, "\\'");
}
};
return jsccGen;
});