compromise
Version:
natural language processing in the browser
65 lines (61 loc) • 1.54 kB
JavaScript
;
const p = require('../paths');
const lexicon = p.lexicon;
const log = p.log;
const path = 'tagger/lexicon';
const check_lexicon = (str, sentence) => {
//check a user's custom lexicon
let custom = sentence.lexicon || {};
if (custom[str]) {
return custom[str];
}
if (lexicon[str]) {
return lexicon[str];
}
return null;
};
const lexicon_pass = function (ts) {
log.here(path);
let found;
//loop through each term
for (let i = 0; i < ts.terms.length; i++) {
let t = ts.terms[i];
//basic term lookup
found = check_lexicon(t.normal, ts);
if (found) {
t.tagAs(found, 'lexicon-match');
continue;
}
found = check_lexicon(t.text, ts);
if (found) {
t.tagAs(found, 'lexicon-match-text');
continue;
}
//support contractions (manually)
let parts = t.term.contraction();
if (parts && parts.start) {
found = check_lexicon(parts.start.toLowerCase(), ts);
if (found) {
t.tagAs(found, 'contraction-lexicon');
continue;
}
}
//support silent_term matches
found = check_lexicon(t.silent_term, ts);
if (t.silent_term && found) {
t.tagAs(found, 'silent_term-lexicon');
continue;
}
//multiple-words / hyphenation
let words = t.normal.split(/[ -]/);
if (words.length > 1) {
found = check_lexicon(words[words.length - 1], ts);
if (found) {
t.tagAs(found, 'multiword-lexicon');
continue;
}
}
}
return ts;
};
module.exports = lexicon_pass;