compromise
Version:
natural language processing in the browser
58 lines (54 loc) • 1.88 kB
JavaScript
;
const markov = require('./data/neighbours');
const afterThisWord = markov.afterThisWord;
const beforeThisWord = markov.beforeThisWord;
const beforeThisPos = markov.beforeThisPos;
const afterThisPos = markov.afterThisPos;
const log = require('../paths').log;
const path = 'tagger/neighbours';
//basically a last-ditch effort before everything falls back to a noun
//for unknown terms, look left + right first, and hit-up the markov-chain for clues
const neighbour_step = function (ts) {
log.here(path);
ts.terms.forEach((t, n) => {
//is it still unknown?
let termTags = Object.keys(t.tag);
if (termTags.length === 0) {
let lastTerm = ts.terms[n - 1];
let nextTerm = ts.terms[n + 1];
//look at last word for clues
if (lastTerm && afterThisWord[lastTerm.normal]) {
t.tagAs(afterThisWord[lastTerm.normal], 'neighbour-after-"' + lastTerm.normal + '"');
return;
}
//look at next word for clues
if (nextTerm && beforeThisWord[nextTerm.normal]) {
t.tagAs(beforeThisWord[nextTerm.normal], 'neighbour-before-"' + nextTerm.normal + '"');
return;
}
//look at the last POS for clues
let tags = [];
if (lastTerm) {
tags = Object.keys(lastTerm.tag);
for (let i = 0; i < tags.length; i++) {
if (afterThisPos[tags[i]]) {
t.tagAs(afterThisPos[tags[i]], 'neighbour-after-[' + tags[i] + ']');
return;
}
}
}
//look at the next POS for clues
if (nextTerm) {
tags = Object.keys(nextTerm.tag);
for (let i = 0; i < tags.length; i++) {
if (beforeThisPos[tags[i]]) {
t.tagAs(beforeThisPos[tags[i]], 'neighbour-before-[' + tags[i] + ']');
return;
}
}
}
}
});
return ts;
};
module.exports = neighbour_step;