fdevsta_monmove
Version:
864 lines (860 loc) • 33.4 kB
JavaScript
/**
* Functionality managing the match models
*
* @file
*/
;
Object.defineProperty(exports, "__esModule", { value: true });
//import * as intf from 'constants';
var debug = require("debug");
var debuglog = debug('model');
//const loadlog = logger.logger('modelload', '');
var IMatch = require("../match/ifmatch");
var InputFilterRules = require("../match/rule");
//import * as Tools from '../match/tools';
var fs = require("fs");
var Meta = require("./meta");
var Utils = require("abot_utils");
var CircularSer = require("abot_utils");
var Distance = require("abot_stringdist");
var process = require("process");
var _ = require("lodash");
/**
* the model path, may be controlled via environment variable
*/
var envModelPath = process.env["ABOT_MODELPATH"] || "node_modules/abot_testmodel/testmodel";
function cmpTools(a, b) {
return a.name.localeCompare(b.name);
}
exports.cmpTools = cmpTools;
var ARR_MODEL_PROPERTIES = ["domain", "bitindex", "defaultkeycolumn", "defaulturi", "categoryDescribed", "columns", "description", "tool", "toolhidden", "synonyms", "category", "wordindex", "exactmatch", "hidden"];
function addSynonyms(synonyms, category, synonymFor, bitindex, bitSentenceAnd, wordType, mRules, seen) {
synonyms.forEach(function (syn) {
var oRule = {
category: category,
matchedString: synonymFor,
type: IMatch.EnumRuleType.WORD,
word: syn,
bitindex: bitindex,
bitSentenceAnd: bitSentenceAnd,
wordType: wordType,
_ranking: 0.95
};
debuglog(debuglog.enabled ? ("inserting synonym" + JSON.stringify(oRule)) : '-');
insertRuleIfNotPresent(mRules, oRule, seen);
});
}
function getRuleKey(rule) {
var r1 = rule.matchedString + "-|-" + rule.category + " -|- " + rule.type + " -|- " + rule.word + " " + rule.bitindex + " " + rule.wordType;
if (rule.range) {
var r2 = getRuleKey(rule.range.rule);
r1 += " -|- " + rule.range.low + "/" + rule.range.high + " -|- " + r2;
}
return r1;
}
var Breakdown = require("../match/breakdown");
/* given a rule which represents a word sequence which is split during tokenization */
function addBestSplit(mRules, rule, seenRules) {
//if(!global_AddSplits) {
// return;
//}
if (rule.type !== IMatch.EnumRuleType.WORD) {
return;
}
var best = Breakdown.makeMatchPattern(rule.lowercaseword);
if (!best) {
return;
}
var newRule = {
category: rule.category,
matchedString: rule.matchedString,
bitindex: rule.bitindex,
bitSentenceAnd: rule.bitindex,
wordType: rule.wordType,
word: best.longestToken,
type: 0,
lowercaseword: best.longestToken,
_ranking: 0.95,
// exactOnly : rule.exactOnly,
range: best.span
};
if (rule.exactOnly) {
newRule.exactOnly = rule.exactOnly;
}
;
newRule.range.rule = rule;
insertRuleIfNotPresent(mRules, newRule, seenRules);
}
exports.addBestSplit = addBestSplit;
function insertRuleIfNotPresent(mRules, rule, seenRules) {
if (rule.type !== IMatch.EnumRuleType.WORD) {
mRules.push(rule);
return;
}
if ((rule.word === undefined) || (rule.matchedString === undefined)) {
throw new Error('illegal rule' + JSON.stringify(rule, undefined, 2));
}
var r = getRuleKey(rule);
/* if( (rule.word === "service" || rule.word=== "services") && r.indexOf('OData') >= 0) {
console.log("rulekey is" + r);
console.log("presence is " + JSON.stringify(seenRules[r]));
}*/
rule.lowercaseword = rule.word.toLowerCase();
if (seenRules[r]) {
debuglog(debuglog.enabled ? ("Attempting to insert duplicate" + JSON.stringify(rule, undefined, 2)) : "-");
var duplicates = seenRules[r].filter(function (oEntry) {
return 0 === InputFilterRules.compareMRuleFull(oEntry, rule);
});
if (duplicates.length > 0) {
return;
}
}
seenRules[r] = (seenRules[r] || []);
seenRules[r].push(rule);
if (rule.word === "") {
debuglog(debuglog.enabled ? ('Skipping rule with emtpy word ' + JSON.stringify(rule, undefined, 2)) : '-');
//g('Skipping rule with emtpy word ' + JSON.stringify(rule, undefined, 2));
return;
}
mRules.push(rule);
addBestSplit(mRules, rule, seenRules);
return;
}
function readFileAsJSON(filename) {
var data = fs.readFileSync(filename, 'utf-8');
try {
return JSON.parse(data);
}
catch (e) {
console.log("Content of file " + filename + " is no json" + e);
process.exit(-1);
}
return undefined;
}
function loadModelData(modelPath, oMdl, sModelName, oModel) {
// read the data ->
// data is processed into mRules directly,
var bitindex = oMdl.bitindex;
var sFileName = ('./' + modelPath + '/' + sModelName + ".data.json");
var oMdlData = readFileAsJSON(sFileName);
oMdlData.forEach(function (oEntry) {
if (!oEntry.domain) {
oEntry._domain = oMdl.domain;
}
if (!oEntry.tool && oMdl.tool.name) {
oEntry.tool = oMdl.tool.name;
}
oModel.records.push(oEntry);
oMdl.category.forEach(function (cat) {
if (oEntry[cat] === 'undefined') {
oEntry[cat] = "n/a";
var bug = "INCONSISTENT*> ModelData " + sFileName + " does not contain category " + cat + " with value 'undefined', undefined is illegal value, use n/a " + JSON.stringify(oEntry) + "";
debuglog(bug);
//console.log(bug);
//process.exit(-1);
}
});
oMdl.wordindex.forEach(function (category) {
if (oEntry[category] === undefined) {
debuglog("INCONSISTENT*> ModelData " + sFileName + " does not contain category " + category + " of wordindex" + JSON.stringify(oEntry) + "");
return;
}
if (oEntry[category] !== "*") {
var sString = oEntry[category];
debuglog("pushing rule with " + category + " -> " + sString);
var oRule = {
category: category,
matchedString: sString,
type: IMatch.EnumRuleType.WORD,
word: sString,
bitindex: bitindex,
bitSentenceAnd: bitindex,
wordType: IMatch.WORDTYPE.FACT,
_ranking: 0.95
};
if (oMdl.exactmatch && oMdl.exactmatch.indexOf(category) >= 0) {
oRule.exactOnly = true;
}
insertRuleIfNotPresent(oModel.mRules, oRule, oModel.seenRules);
if (oMdlData.synonyms && oMdlData.synonyms[category]) {
throw new Error("how can this happen?");
//addSynonyms(oMdlData.synonyms[category], category, sString, bitindex, bitindex, "X", oModel.mRules, oModel.seenRules);
}
// a synonym for a FACT
if (oEntry.synonyms && oEntry.synonyms[category]) {
addSynonyms(oEntry.synonyms[category], category, sString, bitindex, bitindex, IMatch.WORDTYPE.FACT, oModel.mRules, oModel.seenRules);
}
}
});
});
}
function loadModel(modelPath, sModelName, oModel) {
debuglog(" loading " + sModelName + " ....");
var oMdl = readFileAsJSON('./' + modelPath + '/' + sModelName + ".model.json");
mergeModelJson(sModelName, oMdl, oModel);
loadModelData(modelPath, oMdl, sModelName, oModel);
}
function getAllDomainsBitIndex(oModel) {
var len = oModel.domains.length;
var res = 0;
for (var i = 0; i < len; ++i) {
res = res << 1;
res = res | 0x0001;
}
return res;
}
exports.getAllDomainsBitIndex = getAllDomainsBitIndex;
function getDomainBitIndex(domain, oModel) {
var index = oModel.domains.indexOf(domain);
if (index < 0) {
index = oModel.domains.length;
}
if (index >= 32) {
throw new Error("too many domain for single 32 bit index");
}
return 0x0001 << index;
}
exports.getDomainBitIndex = getDomainBitIndex;
/**
* Given a bitfield, return an unsorted set of domains matching present bits
* @param oModel
* @param bitfield
*/
function getDomainsForBitField(oModel, bitfield) {
return oModel.domains.filter(function (domain) {
return (getDomainBitIndex(domain, oModel) & bitfield);
});
}
exports.getDomainsForBitField = getDomainsForBitField;
function mergeModelJson(sModelName, oMdl, oModel) {
var categoryDescribedMap = {};
oMdl.bitindex = getDomainBitIndex(oMdl.domain, oModel);
oMdl.categoryDescribed = [];
// rectify category
oMdl.category = oMdl.category.map(function (cat) {
if (typeof cat === "string") {
return cat;
}
if (typeof cat.name !== "string") {
console.log("Missing name in object typed category in " + JSON.stringify(cat) + " in model " + sModelName);
process.exit(-1);
//throw new Error('Domain ' + oMdl.domain + ' already loaded while loading ' + sModelName + '?');
}
categoryDescribedMap[cat.name] = cat;
oMdl.categoryDescribed.push(cat);
return cat.name;
});
// add the categories to the model:
oMdl.category.forEach(function (category) {
insertRuleIfNotPresent(oModel.mRules, {
category: "category",
matchedString: category,
type: IMatch.EnumRuleType.WORD,
word: category,
lowercaseword: category.toLowerCase(),
bitindex: oMdl.bitindex,
wordType: IMatch.WORDTYPE.CATEGORY,
bitSentenceAnd: oMdl.bitindex,
_ranking: 0.95
}, oModel.seenRules);
});
if (oModel.domains.indexOf(oMdl.domain) >= 0) {
debuglog("***********here mdl" + JSON.stringify(oMdl, undefined, 2));
throw new Error('Domain ' + oMdl.domain + ' already loaded while loading ' + sModelName + '?');
}
// check properties of model
Object.keys(oMdl).sort().forEach(function (sProperty) {
if (ARR_MODEL_PROPERTIES.indexOf(sProperty) < 0) {
throw new Error('Model property "' + sProperty + '" not a known model property in model of domain ' + oMdl.domain + ' ');
}
});
// consider streamlining the categories
oModel.rawModels[oMdl.domain] = oMdl;
oModel.full.domain[oMdl.domain] = {
description: oMdl.description,
categories: categoryDescribedMap,
bitindex: oMdl.bitindex
};
// check that
// check that members of wordindex are in categories,
oMdl.wordindex = oMdl.wordindex || [];
oMdl.wordindex.forEach(function (sWordIndex) {
if (oMdl.category.indexOf(sWordIndex) < 0) {
throw new Error('Model wordindex "' + sWordIndex + '" not a category of domain ' + oMdl.domain + ' ');
}
});
oMdl.exactmatch = oMdl.exactmatch || [];
oMdl.exactmatch.forEach(function (sExactMatch) {
if (oMdl.category.indexOf(sExactMatch) < 0) {
throw new Error('Model exactmatch "' + sExactMatch + '" not a category of domain ' + oMdl.domain + ' ');
}
});
oMdl.columns = oMdl.columns || [];
oMdl.columns.forEach(function (sExactMatch) {
if (oMdl.category.indexOf(sExactMatch) < 0) {
throw new Error('Model column "' + sExactMatch + '" not a category of domain ' + oMdl.domain + ' ');
}
});
// add relation domain -> category
var domainStr = MetaF.Domain(oMdl.domain).toFullString();
var relationStr = MetaF.Relation(Meta.RELATION_hasCategory).toFullString();
var reverseRelationStr = MetaF.Relation(Meta.RELATION_isCategoryOf).toFullString();
oMdl.category.forEach(function (sCategory) {
var CategoryString = MetaF.Category(sCategory).toFullString();
oModel.meta.t3[domainStr] = oModel.meta.t3[domainStr] || {};
oModel.meta.t3[domainStr][relationStr] = oModel.meta.t3[domainStr][relationStr] || {};
oModel.meta.t3[domainStr][relationStr][CategoryString] = {};
oModel.meta.t3[CategoryString] = oModel.meta.t3[CategoryString] || {};
oModel.meta.t3[CategoryString][reverseRelationStr] = oModel.meta.t3[CategoryString][reverseRelationStr] || {};
oModel.meta.t3[CategoryString][reverseRelationStr][domainStr] = {};
});
// add a precice domain matchrule
insertRuleIfNotPresent(oModel.mRules, {
category: "domain",
matchedString: oMdl.domain,
type: IMatch.EnumRuleType.WORD,
word: oMdl.domain,
bitindex: oMdl.bitindex,
bitSentenceAnd: oMdl.bitindex,
wordType: "D",
_ranking: 0.95
}, oModel.seenRules);
// check the tool
if (oMdl.tool && oMdl.tool.requires) {
var requires = Object.keys(oMdl.tool.requires || {});
var diff = _.difference(requires, oMdl.category);
if (diff.length > 0) {
console.log(" " + oMdl.domain + " : Unkown category in requires of tool: \"" + diff.join('"') + '"');
process.exit(-1);
}
var optional = Object.keys(oMdl.tool.optional);
diff = _.difference(optional, oMdl.category);
if (diff.length > 0) {
console.log(" " + oMdl.domain + " : Unkown category optional of tool: \"" + diff.join('"') + '"');
process.exit(-1);
}
Object.keys(oMdl.tool.sets || {}).forEach(function (setID) {
var diff = _.difference(oMdl.tool.sets[setID].set, oMdl.category);
if (diff.length > 0) {
console.log(" " + oMdl.domain + " : Unkown category in setId " + setID + " of tool: \"" + diff.join('"') + '"');
process.exit(-1);
}
});
// extract tools an add to tools:
oModel.tools.filter(function (oEntry) {
if (oEntry.name === (oMdl.tool && oMdl.tool.name)) {
console.log("Tool " + oMdl.tool.name + " already present when loading " + sModelName);
//throw new Error('Domain already loaded?');
process.exit(-1);
}
});
}
else {
oMdl.toolhidden = true;
oMdl.tool.requires = { "impossible": {} };
}
// add the tool name as rule unless hidden
if (!oMdl.toolhidden && oMdl.tool && oMdl.tool.name) {
insertRuleIfNotPresent(oModel.mRules, {
category: "tool",
matchedString: oMdl.tool.name,
type: IMatch.EnumRuleType.WORD,
word: oMdl.tool.name,
bitindex: oMdl.bitindex,
bitSentenceAnd: oMdl.bitindex,
wordType: IMatch.WORDTYPE.TOOL,
_ranking: 0.95
}, oModel.seenRules);
}
;
if (oMdl.synonyms && oMdl.synonyms["tool"]) {
addSynonyms(oMdl.synonyms["tool"], "tool", oMdl.tool.name, oMdl.bitindex, oMdl.bitindex, IMatch.WORDTYPE.TOOL, oModel.mRules, oModel.seenRules);
}
;
if (oMdl.synonyms) {
Object.keys(oMdl.synonyms).forEach(function (ssynkey) {
if (oMdl.category.indexOf(ssynkey) >= 0 && ssynkey !== "tool") {
if (oModel.full.domain[oMdl.domain].categories[ssynkey]) {
oModel.full.domain[oMdl.domain].categories[ssynkey].synonyms = oMdl.synonyms[ssynkey];
}
addSynonyms(oMdl.synonyms[ssynkey], "category", ssynkey, oMdl.bitindex, oMdl.bitindex, IMatch.WORDTYPE.CATEGORY, oModel.mRules, oModel.seenRules);
}
});
}
oModel.domains.push(oMdl.domain);
if (oMdl.tool.name) {
oModel.tools.push(oMdl.tool);
}
oModel.category = oModel.category.concat(oMdl.category);
oModel.category.sort();
oModel.category = oModel.category.filter(function (string, index) {
return oModel.category[index] !== oModel.category[index + 1];
});
} // loadmodel
function splitRules(rules) {
var res = {};
var nonWordRules = [];
rules.forEach(function (rule) {
if (rule.type === IMatch.EnumRuleType.WORD) {
if (!rule.lowercaseword) {
throw new Error("Rule has no member lowercaseword" + JSON.stringify(rule));
}
res[rule.lowercaseword] = res[rule.lowercaseword] || { bitindex: 0, rules: [] };
res[rule.lowercaseword].bitindex = res[rule.lowercaseword].bitindex | rule.bitindex;
res[rule.lowercaseword].rules.push(rule);
}
else {
nonWordRules.push(rule);
}
});
return {
wordMap: res,
nonWordRules: nonWordRules,
allRules: rules,
wordCache: {}
};
}
exports.splitRules = splitRules;
function cmpLengthSort(a, b) {
var d = a.length - b.length;
if (d) {
return d;
}
return a.localeCompare(b);
}
var Algol = require("../match/algol");
// offset[0] : len-2
// len -1
// len
// len +1
// len +2
// len +3
function findNextLen(targetLen, arr, offsets) {
offsets.shift();
for (var i = offsets[4]; (i < arr.length) && (arr[i].length <= targetLen); ++i) {
/* empty*/
}
//console.log("pushing " + i);
offsets.push(i);
}
exports.findNextLen = findNextLen;
function addRangeRulesUnlessPresent(rules, lcword, rangeRules, presentRulesForKey, seenRules) {
rangeRules.forEach(function (rangeRule) {
var newRule = Object.assign({}, rangeRule);
newRule.lowercaseword = lcword;
newRule.word = lcword;
//if((lcword === 'services' || lcword === 'service') && newRule.range.rule.lowercaseword.indexOf('odata')>=0) {
// console.log("adding "+ JSON.stringify(newRule) + "\n");
//}
//todo: check whether an equivalent rule is already present?
var cnt = rules.length;
insertRuleIfNotPresent(rules, newRule, seenRules);
});
}
exports.addRangeRulesUnlessPresent = addRangeRulesUnlessPresent;
function addCloseExactRangeRules(rules, seenRules) {
var keysMap = {};
var rangeKeysMap = {};
rules.forEach(function (rule) {
if (rule.type === IMatch.EnumRuleType.WORD) {
//keysMap[rule.lowercaseword] = 1;
keysMap[rule.lowercaseword] = keysMap[rule.lowercaseword] || [];
keysMap[rule.lowercaseword].push(rule);
if (!rule.exactOnly && rule.range) {
rangeKeysMap[rule.lowercaseword] = rangeKeysMap[rule.lowercaseword] || [];
rangeKeysMap[rule.lowercaseword].push(rule);
}
}
});
var keys = Object.keys(keysMap);
keys.sort(cmpLengthSort);
var len = 0;
keys.forEach(function (key, index) {
if (key.length != len) {
//console.log("shift to len" + key.length + ' at ' + index + ' ' + key );
}
len = key.length;
});
// keys = keys.slice(0,2000);
var rangeKeys = Object.keys(rangeKeysMap);
rangeKeys.sort(cmpLengthSort);
//console.log(` ${keys.length} keys and ${rangeKeys.length} rangekeys `);
var low = 0;
var high = 0;
var lastlen = 0;
var offsets = [0, 0, 0, 0, 0, 0];
var len = rangeKeys.length;
findNextLen(0, keys, offsets);
findNextLen(1, keys, offsets);
findNextLen(2, keys, offsets);
rangeKeys.forEach(function (rangeKey) {
if (rangeKey.length !== lastlen) {
for (i = lastlen + 1; i <= rangeKey.length; ++i) {
findNextLen(i + 2, keys, offsets);
}
// console.log(` shifted to ${rangeKey.length} with offsets beeing ${offsets.join(' ')}`);
// console.log(` here 0 ${offsets[0]} : ${keys[Math.min(keys.length-1, offsets[0])].length} ${keys[Math.min(keys.length-1, offsets[0])]} `);
// console.log(` here 5-1 ${keys[offsets[5]-1].length} ${keys[offsets[5]-1]} `);
// console.log(` here 5 ${offsets[5]} : ${keys[Math.min(keys.length-1, offsets[5])].length} ${keys[Math.min(keys.length-1, offsets[5])]} `);
lastlen = rangeKey.length;
}
for (var i = offsets[0]; i < offsets[5]; ++i) {
var d = Distance.calcDistanceAdjusted(rangeKey, keys[i]);
// console.log(`${rangeKey.length-keys[i].length} ${d} ${rangeKey} and ${keys[i]} `);
if ((d !== 1.0) && (d >= Algol.Cutoff_rangeCloseMatch)) {
//console.log(`would add ${rangeKey} for ${keys[i]} ${d}`);
var cnt = rules.length;
// we only have to add if there is not yet a match rule here which points to the same
addRangeRulesUnlessPresent(rules, keys[i], rangeKeysMap[rangeKey], keysMap[keys[i]], seenRules);
if (rules.length > cnt) {
//console.log(` added ${(rules.length - cnt)} records at${rangeKey} for ${keys[i]} ${d}`);
}
}
}
});
/*
[
['aEFG','aEFGH'],
['aEFGH','aEFGHI'],
['Odata','ODatas'],
['Odata','Odatas'],
['Odata','Odatb'],
['Odata','UData'],
['service','services'],
['this isfunny and more','this isfunny and mores'],
].forEach(rec => {
console.log(`distance ${rec[0]} ${rec[1]} : ${Distance.calcDistance(rec[0],rec[1])} adf ${Distance.calcDistanceAdjusted(rec[0],rec[1])} `);
});
console.log("distance Odata Udata"+ Distance.calcDistance('OData','UData'));
console.log("distance Odata Odatb"+ Distance.calcDistance('OData','ODatb'));
console.log("distance Odatas Odata"+ Distance.calcDistance('OData','ODataa'));
console.log("distance Odatas abcde"+ Distance.calcDistance('abcde','abcdef'));
console.log("distance services "+ Distance.calcDistance('services','service'));
*/
}
exports.addCloseExactRangeRules = addCloseExactRangeRules;
var n = 0;
function loadModels(modelPath) {
var oModel;
oModel = {
full: { domain: {} },
rawModels: {},
domains: [],
tools: [],
rules: undefined,
category: [],
operators: {},
mRules: [],
seenRules: {},
records: [],
meta: { t3: {} }
};
var t = Date.now();
modelPath = modelPath || envModelPath;
try {
var a = CircularSer.load('./' + modelPath + '/_cachefalse.js');
//console.log("found a cache ? " + !!a);
//a = undefined;
if (a) {
debuglog(" return preparese model ");
if (process.env.ABOT_EMAIL_USER) {
console.log("loaded models from cache in " + (Date.now() - t) + " ");
}
return a;
}
}
catch (e) {
//console.log('error' + e);
// no cache file,
}
var mdls = readFileAsJSON('./' + modelPath + '/models.json');
mdls.forEach(function (sModelName) {
loadModel(modelPath, sModelName, oModel);
});
// add the categories to the model:
/*
oModel.category.forEach(function (category) {
insertRuleIfNotPresent(oModel.mRules, {
category: "category",
matchedString: category,
type: IMatch.EnumRuleType.WORD,
word: category,
lowercaseword: category.toLowerCase(),
bitindex : oMdl.
_ranking: 0.95
}, oModel.seenRules);
});
*/
var metaBitIndex = getDomainBitIndex('meta', oModel);
var bitIndexAllDomains = getAllDomainsBitIndex(oModel);
// add the domain meta rule
insertRuleIfNotPresent(oModel.mRules, {
category: "meta",
matchedString: "domain",
type: IMatch.EnumRuleType.WORD,
word: "domain",
bitindex: metaBitIndex,
wordType: IMatch.WORDTYPE.META,
bitSentenceAnd: bitIndexAllDomains,
_ranking: 0.95
}, oModel.seenRules);
var fillerBitIndex = getDomainBitIndex('meta', oModel);
//add a filler rule
var fillers = readFileAsJSON('./' + modelPath + '/filler.json');
/*
var re = "^((" + fillers.join(")|(") + "))$";
oModel.mRules.push({
category: "filler",
type: IMatch.EnumRuleType.REGEXP,
regexp: new RegExp(re, "i"),
matchedString: "filler",
bitindex: fillerBitIndex,
_ranking: 0.9
});
*/
fillers.forEach(function (filler) {
insertRuleIfNotPresent(oModel.mRules, {
category: "filler",
type: IMatch.EnumRuleType.WORD,
word: filler,
lowercaseword: filler.toLowerCase(),
matchedString: filler,
exactOnly: true,
bitindex: fillerBitIndex,
bitSentenceAnd: bitIndexAllDomains,
wordType: IMatch.WORDTYPE.FILLER,
_ranking: 0.9
}, oModel.seenRules);
});
//add operators
var operators = readFileAsJSON('./' + modelPath + '/operators.json');
var operatorBitIndex = getDomainBitIndex('operators', oModel);
Object.keys(operators.operators).forEach(function (operator) {
if (IMatch.aOperatorNames.indexOf(operator) < 0) {
debuglog("unknown operator " + operator);
throw new Error("unknown operator " + operator);
}
oModel.operators[operator] = operators.operators[operator];
oModel.operators[operator].operator = operator;
Object.freeze(oModel.operators[operator]);
var word = operator;
insertRuleIfNotPresent(oModel.mRules, {
category: "operator",
word: word.toLowerCase(),
lowercaseword: word.toLowerCase(),
type: IMatch.EnumRuleType.WORD,
matchedString: word,
bitindex: operatorBitIndex,
bitSentenceAnd: bitIndexAllDomains,
wordType: IMatch.WORDTYPE.OPERATOR,
_ranking: 0.9
}, oModel.seenRules);
// add all synonyms
if (operators.synonyms[operator]) {
Object.keys(operators.synonyms[operator]).forEach(function (synonym) {
insertRuleIfNotPresent(oModel.mRules, {
category: "operator",
word: synonym.toLowerCase(),
lowercaseword: synonym.toLowerCase(),
type: IMatch.EnumRuleType.WORD,
matchedString: operator,
bitindex: operatorBitIndex,
bitSentenceAnd: bitIndexAllDomains,
wordType: IMatch.WORDTYPE.OPERATOR,
_ranking: 0.9
}, oModel.seenRules);
});
}
});
/*
})
{
category: "filler",
type: 1,
regexp: /^((start)|(show)|(from)|(in))$/i,
matchedString: "filler",
_ranking: 0.9
},
*/
oModel.mRules = oModel.mRules.sort(InputFilterRules.cmpMRule);
addCloseExactRangeRules(oModel.mRules, oModel.seenRules);
oModel.mRules = oModel.mRules.sort(InputFilterRules.cmpMRule);
forceGC();
oModel.rules = splitRules(oModel.mRules);
forceGC();
oModel.tools = oModel.tools.sort(cmpTools);
delete oModel.seenRules;
debuglog('saving');
forceGC();
CircularSer.save('./' + modelPath + '/_cachefalse.js', oModel);
forceGC();
if (process.env.ABOT_EMAIL_USER) {
console.log("loaded models by calculation in " + (Date.now() - t) + " ");
}
return oModel;
}
exports.loadModels = loadModels;
function sortCategoriesByImportance(map, cats) {
var res = cats.slice(0);
res.sort(rankCategoryByImportance.bind(undefined, map));
return res;
}
exports.sortCategoriesByImportance = sortCategoriesByImportance;
function rankCategoryByImportance(map, cata, catb) {
var catADesc = map[cata];
var catBDesc = map[catb];
if (cata === catb) {
return 0;
}
// if a is before b, return -1
if (catADesc && !catBDesc) {
return -1;
}
if (!catADesc && catBDesc) {
return +1;
}
var prioA = (catADesc && catADesc.importance) || 99;
var prioB = (catBDesc && catBDesc.importance) || 99;
// lower prio goes to front
var r = prioA - prioB;
if (r) {
return r;
}
return cata.localeCompare(catb);
}
exports.rankCategoryByImportance = rankCategoryByImportance;
var MetaF = Meta.getMetaFactory();
function getOperator(mdl, operator) {
return mdl.operators[operator];
}
exports.getOperator = getOperator;
function getResultAsArray(mdl, a, rel) {
if (rel.toType() !== 'relation') {
throw new Error("expect relation as 2nd arg");
}
var res = mdl.meta.t3[a.toFullString()] &&
mdl.meta.t3[a.toFullString()][rel.toFullString()];
if (!res) {
return [];
}
return Object.getOwnPropertyNames(res).sort().map(MetaF.parseIMeta);
}
exports.getResultAsArray = getResultAsArray;
function getCategoriesForDomain(theModel, domain) {
if (theModel.domains.indexOf(domain) < 0) {
throw new Error("Domain \"" + domain + "\" not part of model");
}
var res = getResultAsArray(theModel, MetaF.Domain(domain), MetaF.Relation(Meta.RELATION_hasCategory));
return Meta.getStringArray(res);
}
exports.getCategoriesForDomain = getCategoriesForDomain;
function getTableColumns(theModel, domain) {
if (theModel.domains.indexOf(domain) < 0) {
throw new Error("Domain \"" + domain + "\" not part of model");
}
return theModel.rawModels[domain].columns.slice(0);
}
exports.getTableColumns = getTableColumns;
function forceGC() {
if (global && global.gc) {
global.gc();
}
}
/**
* Return all categories of a domain which can appear on a word,
* these are typically the wordindex domains + entries generated by generic rules
*
* The current implementation is a simplification
*/
function getPotentialWordCategoriesForDomain(theModel, domain) {
// this is a simplified version
return getCategoriesForDomain(theModel, domain);
}
exports.getPotentialWordCategoriesForDomain = getPotentialWordCategoriesForDomain;
function getDomainsForCategory(theModel, category) {
if (theModel.category.indexOf(category) < 0) {
throw new Error("Category \"" + category + "\" not part of model");
}
var res = getResultAsArray(theModel, MetaF.Category(category), MetaF.Relation(Meta.RELATION_isCategoryOf));
return Meta.getStringArray(res);
}
exports.getDomainsForCategory = getDomainsForCategory;
/*
export function getAllRecordCategoriesForTargetCategory(model: IMatch.IModels, category: string, wordsonly: boolean): { [key: string]: boolean } {
var res = {};
//
var fn = wordsonly ? getPotentialWordCategoriesForDomain : getCategoriesForDomain;
var domains = getDomainsForCategory(model, category);
domains.forEach(function (domain) {
fn(model, domain).forEach(function (wordcat) {
res[wordcat] = true;
});
});
Object.freeze(res);
return res;
}
export function getAllRecordCategoriesForTargetCategories(model: IMatch.IModels, categories: string[], wordsonly: boolean): { [key: string]: boolean } {
var res = {};
//
var fn = wordsonly ? getPotentialWordCategoriesForDomain : getCategoriesForDomain;
var domains = undefined;
categories.forEach(function (category) {
var catdomains = getDomainsForCategory(model, category)
if (!domains) {
domains = catdomains;
} else {
domains = _.intersection(domains, catdomains);
}
});
if (domains.length === 0) {
throw new Error('categories ' + Utils.listToQuotedCommaAnd(categories) + ' have no common domain.')
}
domains.forEach(function (domain) {
fn(model, domain).forEach(function (wordcat) {
res[wordcat] = true;
});
});
Object.freeze(res);
return res;
}
*/
/**
* givena set of categories, return a structure
*
*
* { domains : ["DOMAIN1", "DOMAIN2"],
* categorySet : { cat1 : true, cat2 : true, ...}
* }
*/
function getDomainCategoryFilterForTargetCategories(model, categories, wordsonly) {
var res = {};
//
var fn = wordsonly ? getPotentialWordCategoriesForDomain : getCategoriesForDomain;
var domains = undefined;
categories.forEach(function (category) {
var catdomains = getDomainsForCategory(model, category);
if (!domains) {
domains = catdomains;
}
else {
domains = _.intersection(domains, catdomains);
}
});
if (domains.length === 0) {
throw new Error('categories ' + Utils.listToQuotedCommaAnd(categories) + ' have no common domain.');
}
domains.forEach(function (domain) {
fn(model, domain).forEach(function (wordcat) {
res[wordcat] = true;
});
});
Object.freeze(res);
return { domains: domains,
categorySet: res };
}
exports.getDomainCategoryFilterForTargetCategories = getDomainCategoryFilterForTargetCategories;
function getDomainCategoryFilterForTargetCategory(model, category, wordsonly) {
return getDomainCategoryFilterForTargetCategories(model, [category], wordsonly);
}
exports.getDomainCategoryFilterForTargetCategory = getDomainCategoryFilterForTargetCategory;
//# sourceMappingURL=model.js.map