compromise
Version:
natural language processing in the browser
126 lines (118 loc) • 2.93 kB
JavaScript
;
//-types of comma-use-
// PlaceComma - Hollywood, California
// List - cool, fun, and great.
// ClauseEnd - if so, we do.
//like Toronto, Canada
const isPlaceComma = (ts, i) => {
let t = ts.terms[i];
let nextTerm = ts.terms[i + 1];
//'australia, canada' is a list
if (nextTerm && t.tag.Place && !t.tag.Country && nextTerm.tag.Country) {
return true;
}
return false;
};
//adj, noun, or verb
const mainTag = (t) => {
if (t.tag.Adjective) {
return 'Adjective';
}
if (t.tag.Noun) {
return 'Noun';
}
if (t.tag.Verb) {
return 'Verb';
}
return null;
};
const tagAsList = (ts, start, end) => {
for(let i = start; i <= end; i++) {
ts.terms[i].tag.List = true;
}
};
//take the first term with a comma, and test to the right.
//the words with a comma must be the same pos.
const isList = (ts, i) => {
let start = i;
let tag = mainTag(ts.terms[i]);
//ensure there's a following comma, and its the same pos
//then a Conjunction
let sinceComma = 0;
let count = 0;
let hasConjunction = false;
for(i = i + 1; i < ts.terms.length; i++) {
let t = ts.terms[i];
//are we approaching the end
if (count > 0 && t.tag.Conjunction) {
hasConjunction = true;
continue;
}
//found one,
if (t.tag[tag]) {
//looks good. keep it going
if (t.tag.Comma) {
count += 1;
sinceComma = 0;
continue;
}
if (count > 0 && hasConjunction) { //is this the end of the list?
tagAsList(ts, start, i);
return true;
}
}
sinceComma += 1;
//have we gone too far without a comma?
if (sinceComma > 5) {
return false;
}
}
return false;
};
const commaStep = function(ts) {
//tag the correct punctuation forms
for(let i = 0; i < ts.terms.length; i++) {
let t = ts.terms[i];
let punct = t.endPunctuation();
if (punct === ',') {
t.tagAs('Comma', 'comma-step');
continue;
}
if (punct === ';' || punct === ':') {
t.tagAs('ClauseEnd', 'clause-punt');
continue;
}
//support elipses
if (t.whitespace.after.match(/^\.\./)) {
t.tagAs('ClauseEnd', 'clause-elipses');
continue;
}
//support ' - ' clause
if (ts.terms[i + 1] && ts.terms[i + 1].whitespace.before.match(/ - /)) {
t.tagAs('ClauseEnd', 'hypen-clause');
continue;
}
}
//disambiguate the commas now
for(let i = 0; i < ts.terms.length; i++) {
let t = ts.terms[i];
if (t.tag.Comma) {
//if we already got it
if (t.tag.List) {
continue;
}
//like 'Hollywood, California'
if (isPlaceComma(ts, i)) {
continue;
}
//like 'cold, wet hands'
if (isList(ts, i)) {
continue;
}
//otherwise, it's a phrasal comma, like 'you must, if you think so'
t.tag.ClauseEnd = true;
}
}
return ts;
};
module.exports = commaStep;