compromise
Version:
modest natural language processing
2,079 lines (1,546 loc) • 334 kB
JavaScript
function _typeof(obj) {
if (typeof Symbol === "function" && typeof Symbol.iterator === "symbol") {
_typeof = function (obj) {
return typeof obj;
};
} else {
_typeof = function (obj) {
return obj && typeof Symbol === "function" && obj.constructor === Symbol && obj !== Symbol.prototype ? "symbol" : typeof obj;
};
}
return _typeof(obj);
}
function _classCallCheck(instance, Constructor) {
if (!(instance instanceof Constructor)) {
throw new TypeError("Cannot call a class as a function");
}
}
function _defineProperties(target, props) {
for (var i = 0; i < props.length; i++) {
var descriptor = props[i];
descriptor.enumerable = descriptor.enumerable || false;
descriptor.configurable = true;
if ("value" in descriptor) descriptor.writable = true;
Object.defineProperty(target, descriptor.key, descriptor);
}
}
function _createClass(Constructor, protoProps, staticProps) {
if (protoProps) _defineProperties(Constructor.prototype, protoProps);
if (staticProps) _defineProperties(Constructor, staticProps);
return Constructor;
}
function _inherits(subClass, superClass) {
if (typeof superClass !== "function" && superClass !== null) {
throw new TypeError("Super expression must either be null or a function");
}
subClass.prototype = Object.create(superClass && superClass.prototype, {
constructor: {
value: subClass,
writable: true,
configurable: true
}
});
if (superClass) _setPrototypeOf(subClass, superClass);
}
function _getPrototypeOf(o) {
_getPrototypeOf = Object.setPrototypeOf ? Object.getPrototypeOf : function _getPrototypeOf(o) {
return o.__proto__ || Object.getPrototypeOf(o);
};
return _getPrototypeOf(o);
}
function _setPrototypeOf(o, p) {
_setPrototypeOf = Object.setPrototypeOf || function _setPrototypeOf(o, p) {
o.__proto__ = p;
return o;
};
return _setPrototypeOf(o, p);
}
function _assertThisInitialized(self) {
if (self === void 0) {
throw new ReferenceError("this hasn't been initialised - super() hasn't been called");
}
return self;
}
function _possibleConstructorReturn(self, call) {
if (call && (typeof call === "object" || typeof call === "function")) {
return call;
}
return _assertThisInitialized(self);
}
//this is a not-well-thought-out way to reduce our dependence on `object===object` stuff
var chars = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789'.split(''); //generates a unique id for this term
function makeId(str) {
str = str || '_';
var text = str + '-';
for (var i = 0; i < 7; i++) {
text += chars[Math.floor(Math.random() * chars.length)];
}
return text;
}
var _id = makeId;
//a hugely-ignorant, and widely subjective transliteration of latin, cryllic, greek unicode characters to english ascii.
//approximate visual (not semantic or phonetic) relationship between unicode and ascii characters
//http://en.wikipedia.org/wiki/List_of_Unicode_characters
//https://docs.google.com/spreadsheet/ccc?key=0Ah46z755j7cVdFRDM1A2YVpwa1ZYWlpJM2pQZ003M0E
var compact = {
'!': '¡',
'?': '¿Ɂ',
'"': '“”"❝❞',
"'": '‘‛❛❜',
'-': '—–',
a: 'ªÀÁÂÃÄÅàáâãäåĀāĂ㥹ǍǎǞǟǠǡǺǻȀȁȂȃȦȧȺΆΑΔΛάαλАадѦѧӐӑӒӓƛɅæ',
b: 'ßþƀƁƂƃƄƅɃΒβϐϦБВЪЬвъьѢѣҌҍ',
c: '¢©ÇçĆćĈĉĊċČčƆƇƈȻȼͻͼͽϲϹϽϾСсєҀҁҪҫ',
d: 'ÐĎďĐđƉƊȡƋƌǷ',
e: 'ÈÉÊËèéêëĒēĔĕĖėĘęĚěƎƏƐǝȄȅȆȇȨȩɆɇΈΕΞΣέεξϱϵ϶ЀЁЕЭеѐёҼҽҾҿӖӗӘәӚӛӬӭ',
f: 'ƑƒϜϝӺӻҒғſ',
g: 'ĜĝĞğĠġĢģƓǤǥǦǧǴǵ',
h: 'ĤĥĦħƕǶȞȟΉΗЂЊЋНнђћҢңҤҥҺһӉӊ',
I: 'ÌÍÎÏ',
i: 'ìíîïĨĩĪīĬĭĮįİıƖƗȈȉȊȋΊΐΪίιϊІЇії',
j: 'ĴĵǰȷɈɉϳЈј',
k: 'ĶķĸƘƙǨǩΚκЌЖКжкќҚқҜҝҞҟҠҡ',
l: 'ĹĺĻļĽľĿŀŁłƚƪǀǏǐȴȽΙӀӏ',
m: 'ΜϺϻМмӍӎ',
n: 'ÑñŃńŅņŇňʼnŊŋƝƞǸǹȠȵΝΠήηϞЍИЙЛПийлпѝҊҋӅӆӢӣӤӥπ',
o: 'ÒÓÔÕÖØðòóôõöøŌōŎŏŐőƟƠơǑǒǪǫǬǭǾǿȌȍȎȏȪȫȬȭȮȯȰȱΌΘΟθοσόϕϘϙϬϭϴОФоѲѳӦӧӨөӪӫ',
p: 'ƤƿΡρϷϸϼРрҎҏÞ',
q: 'Ɋɋ',
r: 'ŔŕŖŗŘřƦȐȑȒȓɌɍЃГЯгяѓҐґ',
s: 'ŚśŜŝŞşŠšƧƨȘșȿЅѕ',
t: 'ŢţŤťŦŧƫƬƭƮȚțȶȾΓΤτϮТт',
u: 'µÙÚÛÜùúûüŨũŪūŬŭŮůŰűŲųƯưƱƲǓǔǕǖǗǘǙǚǛǜȔȕȖȗɄΰμυϋύ',
v: 'νѴѵѶѷ',
w: 'ŴŵƜωώϖϢϣШЩшщѡѿ',
x: '×ΧχϗϰХхҲҳӼӽӾӿ',
y: 'ÝýÿŶŷŸƳƴȲȳɎɏΎΥΫγψϒϓϔЎУучўѰѱҮүҰұӮӯӰӱӲӳ',
z: 'ŹźŻżŽžƩƵƶȤȥɀΖζ'
}; //decompress data into two hashes
var unicode = {};
Object.keys(compact).forEach(function (k) {
compact[k].split('').forEach(function (s) {
unicode[s] = k;
});
});
var killUnicode = function killUnicode(str) {
var chars = str.split('');
chars.forEach(function (s, i) {
if (unicode[s]) {
chars[i] = unicode[s];
}
});
return chars.join('');
};
var unicode_1 = killUnicode; // console.log(killUnicode('bjŏȒk—Ɏó'));
var periodAcronym = /([A-Z]\.)+[A-Z]?,?$/;
var oneLetterAcronym = /^[A-Z]\.,?$/;
var noPeriodAcronym = /[A-Z]{2,}('s|,)?$/;
var lowerCaseAcronym = /([a-z]\.){2,}[a-z]\.?$/;
var isAcronym = function isAcronym(str) {
//like N.D.A
if (periodAcronym.test(str) === true) {
return true;
} //like c.e.o
if (lowerCaseAcronym.test(str) === true) {
return true;
} //like 'F.'
if (oneLetterAcronym.test(str) === true) {
return true;
} //like NDA
if (noPeriodAcronym.test(str) === true) {
return true;
}
return false;
};
var isAcronym_1 = isAcronym;
var hasSlash = /[a-z\u00C0-\u00FF] ?\/ ?[a-z\u00C0-\u00FF]/;
/** some basic operations on a string to reduce noise */
var clean = function clean(str) {
str = str || '';
str = str.toLowerCase();
str = str.trim();
var original = str; //(very) rough ASCII transliteration - bjŏrk -> bjork
str = unicode_1(str); //rough handling of slashes - 'see/saw'
if (hasSlash.test(str) === true) {
str = str.replace(/\/.*/, '');
} //#tags, @mentions
str = str.replace(/^[#@]/, ''); //punctuation
str = str.replace(/[,;.!?]+$/, ''); // coerce single curly quotes
str = str.replace(/[\u0027\u0060\u00B4\u2018\u2019\u201A\u201B\u2032\u2035\u2039\u203A]+/g, "'"); // coerce double curly quotes
str = str.replace(/[\u0022\u00AB\u00BB\u201C\u201D\u201E\u201F\u2033\u2034\u2036\u2037\u2E42\u301D\u301E\u301F\uFF02]+/g, '"'); //coerce Unicode ellipses
str = str.replace(/\u2026/g, '...'); //en-dash
str = str.replace(/\u2013/g, '-'); //lookin'->looking (make it easier for conjugation)
str = str.replace(/([aeiou][ktrp])in$/, '$1ing'); //turn re-enactment to reenactment
if (/^(re|un)-?[^aeiou]./.test(str) === true) {
str = str.replace('-', '');
} //strip leading & trailing grammatical punctuation
if (/^[:;]/.test(str) === false) {
str = str.replace(/\.{3,}$/g, '');
str = str.replace(/[",\.!:;\?\)]+$/g, '');
str = str.replace(/^['"\(]+/g, '');
} //do this again..
str = str.trim(); //oh shucks,
if (str === '') {
str = original;
} //compact acronyms
if (isAcronym_1(str)) {
str = str.replace(/\./g, '');
} //nice-numbers
str = str.replace(/([0-9]),([0-9])/g, '$1$2');
return str;
};
var clean_1 = clean; // console.log(normalize('Dr. V Cooper'));
/** reduced is one step further than clean */
var reduced = function reduced(str) {
// remove apostrophes
str = str.replace(/['’]s$/, '');
str = str.replace(/s['’]$/, 's');
return str;
};
var reduce = reduced;
//all punctuation marks, from https://en.wikipedia.org/wiki/Punctuation
//we have slightly different rules for start/end - like #hashtags.
var startings = /^[ \n\t\.’'\[\](){}⟨⟩:,،、‒–—―…!.‹›«»‐\-?‘’;\/⁄·\&*\•^†‡°¡¿※№÷׺ª%‰+−=‱¶′″‴§~|‖¦©℗®℠™¤₳฿\u0022|\uFF02|\u0027|\u201C|\u2018|\u201F|\u201B|\u201E|\u2E42|\u201A|\u00AB|\u2039|\u2035|\u2036|\u2037|\u301D|\u0060|\u301F]+/;
var endings = /[ \n\t\.’'\[\](){}⟨⟩:,،、‒–—―…!.‹›«»‐\-?‘’;\/⁄·\&*@\•^†‡°¡¿※#№÷׺ª‰+−=‱¶′″‴§~|‖¦©℗®℠™¤₳฿\u0022|\uFF02|\u0027|\u201D|\u2019|\u201D|\u2019|\u201D|\u201D|\u2019|\u00BB|\u203A|\u2032|\u2033|\u2034|\u301E|\u00B4|\u301E]+$/; //money = ₵¢₡₢$₫₯֏₠€ƒ₣₲₴₭₺₾ℳ₥₦₧₱₰£៛₽₹₨₪৳₸₮₩¥
var hasSlash$1 = /\//;
var hasApostrophe = /['’]/;
var hasAcronym = /^[a-z]\.([a-z]\.)+/i;
var minusNumber = /^[-+\.][0-9]/;
/** turn given text into a parsed-up object
* seperate the 'meat' of the word from the whitespace+punctuation
*/
var parseTerm = function parseTerm(str) {
var original = str;
var pre = '';
var post = '';
str = str.replace(startings, function (found) {
pre = found; // support '-40'
if ((pre === '-' || pre === '+' || pre === '.') && minusNumber.test(str)) {
pre = '';
return found;
}
return '';
});
str = str.replace(endings, function (found) {
post = found; // keep s-apostrophe - "flanders'" or "chillin'"
if (hasApostrophe.test(found) && /[sn]['’]$/.test(original) && hasApostrophe.test(pre) === false) {
post = post.replace(hasApostrophe, '');
return "'";
} //keep end-period in acronym
if (hasAcronym.test(str) === true) {
post = post.replace(/\./, '');
return '.';
}
return '';
}); //we went too far..
if (str === '') {
// do a very mild parse, and hope for the best.
original = original.replace(/ *$/, function (after) {
post = after || '';
return '';
});
str = original;
pre = '';
post = post;
} // create the various forms of our text,
var clean = clean_1(str);
var parsed = {
text: str,
clean: clean,
reduced: reduce(clean),
pre: pre,
post: post
}; // support aliases for slashes
if (hasSlash$1.test(str)) {
str.split(hasSlash$1).forEach(function (word) {
parsed.alias = parsed.alias || {};
parsed.alias[word.trim()] = true;
});
}
return parsed;
};
var parse = parseTerm;
function createCommonjsModule(fn, module) {
return module = { exports: {} }, fn(module, module.exports), module.exports;
}
var _01Case = createCommonjsModule(function (module, exports) {
var titleCase = /^[A-Z][a-z'\u00C0-\u00FF]/;
var upperCase = /^[A-Z]+s?$/;
/** convert all text to uppercase */
exports.toUpperCase = function () {
this.text = this.text.toUpperCase();
return this;
};
/** convert all text to lowercase */
exports.toLowerCase = function () {
this.text = this.text.toLowerCase();
return this;
};
/** only set the first letter to uppercase
* leave any existing uppercase alone
*/
exports.toTitleCase = function () {
this.text = this.text.replace(/^ *[a-z\u00C0-\u00FF]/, function (x) {
return x.toUpperCase();
}); //support unicode?
return this;
};
/** if all letters are uppercase */
exports.isUpperCase = function () {
return upperCase.test(this.text);
};
/** if the first letter is uppercase, and the rest are lowercase */
exports.isTitleCase = function () {
return titleCase.test(this.text);
};
exports.titleCase = exports.isTitleCase;
});
var _01Case_1 = _01Case.toUpperCase;
var _01Case_2 = _01Case.toLowerCase;
var _01Case_3 = _01Case.toTitleCase;
var _01Case_4 = _01Case.isUpperCase;
var _01Case_5 = _01Case.isTitleCase;
var _01Case_6 = _01Case.titleCase;
var _02Punctuation = createCommonjsModule(function (module, exports) {
// these methods are called with '@hasComma' in the match syntax
// various unicode quotation-mark formats
var startQuote = /(\u0022|\uFF02|\u0027|\u201C|\u2018|\u201F|\u201B|\u201E|\u2E42|\u201A|\u00AB|\u2039|\u2035|\u2036|\u2037|\u301D|\u0060|\u301F)/;
var endQuote = /(\u0022|\uFF02|\u0027|\u201D|\u2019|\u201D|\u2019|\u201D|\u201D|\u2019|\u00BB|\u203A|\u2032|\u2033|\u2034|\u301E|\u00B4|\u301E)/;
/** search the term's 'post' punctuation */
exports.hasPost = function (punct) {
return this.post.indexOf(punct) !== -1;
};
/** search the term's 'pre' punctuation */
exports.hasPre = function (punct) {
return this.pre.indexOf(punct) !== -1;
};
/** does it have a quotation symbol? */
exports.hasQuote = function () {
return startQuote.test(this.pre) || endQuote.test(this.post);
};
exports.hasQuotation = exports.hasQuote;
/** does it have a comma? */
exports.hasComma = function () {
return this.hasPost(',');
};
/** does it end in a period? */
exports.hasPeriod = function () {
return this.hasPost('.') === true && this.hasPost('...') === false;
};
/** does it end in an exclamation */
exports.hasExclamation = function () {
return this.hasPost('!');
};
/** does it end with a question mark? */
exports.hasQuestionMark = function () {
return this.hasPost('?') || this.hasPost('¿');
};
/** is there a ... at the end? */
exports.hasEllipses = function () {
return this.hasPost('..') || this.hasPost('…') || this.hasPre('..') || this.hasPre('…');
};
/** is there a semicolon after this word? */
exports.hasSemicolon = function () {
return this.hasPost(';');
};
/** is there a slash '/' in this word? */
exports.hasSlash = function () {
return /\//.test(this.text);
};
/** a hyphen connects two words like-this */
exports.hasHyphen = function () {
var hyphen = /(-|–|—)/;
return hyphen.test(this.post) || hyphen.test(this.pre);
};
/** a dash separates words - like that */
exports.hasDash = function () {
var hyphen = / (-|–|—) /;
return hyphen.test(this.post) || hyphen.test(this.pre);
};
/** is it multiple words combinded */
exports.hasContraction = function () {
return Boolean(this.implicit);
};
/** try to sensibly put this punctuation mark into the term */
exports.addPunctuation = function (punct) {
// dont add doubles
if (punct === ',' || punct === ';') {
this.post = this.post.replace(punct, '');
}
this.post = punct + this.post;
return this;
};
});
var _02Punctuation_1 = _02Punctuation.hasPost;
var _02Punctuation_2 = _02Punctuation.hasPre;
var _02Punctuation_3 = _02Punctuation.hasQuote;
var _02Punctuation_4 = _02Punctuation.hasQuotation;
var _02Punctuation_5 = _02Punctuation.hasComma;
var _02Punctuation_6 = _02Punctuation.hasPeriod;
var _02Punctuation_7 = _02Punctuation.hasExclamation;
var _02Punctuation_8 = _02Punctuation.hasQuestionMark;
var _02Punctuation_9 = _02Punctuation.hasEllipses;
var _02Punctuation_10 = _02Punctuation.hasSemicolon;
var _02Punctuation_11 = _02Punctuation.hasSlash;
var _02Punctuation_12 = _02Punctuation.hasHyphen;
var _02Punctuation_13 = _02Punctuation.hasDash;
var _02Punctuation_14 = _02Punctuation.hasContraction;
var _02Punctuation_15 = _02Punctuation.addPunctuation;
//declare it up here
var wrapMatch = function wrapMatch() {};
/** ignore optional/greedy logic, straight-up term match*/
var doesMatch = function doesMatch(t, reg, index, length) {
// support id matches
if (reg.id === t.id) {
return true;
} // support '.'
if (reg.anything === true) {
return true;
} // support '^' (in parentheses)
if (reg.start === true && index !== 0) {
return false;
} // support '$' (in parentheses)
if (reg.end === true && index !== length - 1) {
return false;
} //support a text match
if (reg.word !== undefined) {
//match contractions
if (t.implicit !== null && t.implicit === reg.word) {
return true;
} // term aliases for slashes and things
if (t.alias !== undefined && t.alias.hasOwnProperty(reg.word)) {
return true;
} // support ~ match
if (reg.soft === true && reg.word === t.root) {
return true;
} //match either .clean or .text
return reg.word === t.clean || reg.word === t.text || reg.word === t.reduced;
} //support #Tag
if (reg.tag !== undefined) {
return t.tags[reg.tag] === true;
} //support @method
if (reg.method !== undefined) {
if (typeof t[reg.method] === 'function' && t[reg.method]() === true) {
return true;
}
return false;
} //support /reg/
if (reg.regex !== undefined) {
return reg.regex.test(t.clean);
} //support (one|two)
if (reg.choices !== undefined) {
// try to support && operator
if (reg.operator === 'and') {
// must match them all
return reg.choices.every(function (r) {
return wrapMatch(t, r, index, length);
});
} // or must match one
return reg.choices.some(function (r) {
return wrapMatch(t, r, index, length);
});
}
return false;
}; // wrap result for !negative match logic
wrapMatch = function wrapMatch(t, reg, index, length) {
var result = doesMatch(t, reg, index, length);
if (reg.negative === true) {
return !result;
}
return result;
};
var _doesMatch = wrapMatch;
var boring = {};
/** check a match object against this term */
var doesMatch_1 = function doesMatch_1(reg, index, length) {
return _doesMatch(this, reg, index, length);
};
/** does this term look like an acronym? */
var isAcronym_1$1 = function isAcronym_1$1() {
return isAcronym_1(this.text);
};
/** is this term implied by a contraction? */
var isImplicit = function isImplicit() {
return this.text === '' && Boolean(this.implicit);
};
/** does the term have at least one good tag? */
var isKnown = function isKnown() {
return Object.keys(this.tags).some(function (t) {
return boring[t] !== true;
});
};
/** cache the root property of the term */
var setRoot = function setRoot(world) {
var transform = world.transforms;
var str = this.implicit || this.clean;
if (this.tags.Plural) {
str = transform.toSingular(str, world);
}
if (this.tags.Verb && !this.tags.Negative && !this.tags.Infinitive) {
var tense = null;
if (this.tags.PastTense) {
tense = 'PastTense';
} else if (this.tags.Gerund) {
tense = 'Gerund';
} else if (this.tags.PresentTense) {
tense = 'PresentTense';
} else if (this.tags.Participle) {
tense = 'Participle';
} else if (this.tags.Actor) {
tense = 'Actor';
}
str = transform.toInfinitive(str, world, tense);
}
this.root = str;
};
var _03Misc = {
doesMatch: doesMatch_1,
isAcronym: isAcronym_1$1,
isImplicit: isImplicit,
isKnown: isKnown,
setRoot: setRoot
};
var hasSpace = /[\s-]/;
var isUpperCase = /^[A-Z-]+$/; // const titleCase = str => {
// return str.charAt(0).toUpperCase() + str.substr(1)
// }
/** return various text formats of this term */
var textOut = function textOut(options, showPre, showPost) {
options = options || {};
var word = this.text;
var before = this.pre;
var after = this.post; // -word-
if (options.reduced === true) {
word = this.reduced || '';
}
if (options.root === true) {
word = this.root || '';
}
if (options.implicit === true && this.implicit) {
word = this.implicit || '';
}
if (options.normal === true) {
word = this.clean || this.text || '';
}
if (options.root === true) {
word = this.root || this.reduced || '';
}
if (options.unicode === true) {
word = unicode_1(word);
} // cleanup case
if (options.titlecase === true) {
if (this.tags.ProperNoun && !this.titleCase()) ; else if (this.tags.Acronym) {
word = word.toUpperCase(); //uppercase acronyms
} else if (isUpperCase.test(word) && !this.tags.Acronym) {
// lowercase everything else
word = word.toLowerCase();
}
}
if (options.lowercase === true) {
word = word.toLowerCase();
} // remove the '.'s from 'F.B.I.' (safely)
if (options.acronyms === true && this.tags.Acronym) {
word = word.replace(/\./g, '');
} // -before/after-
if (options.whitespace === true || options.root === true) {
before = '';
after = ' ';
if ((hasSpace.test(this.post) === false || options.last) && !this.implicit) {
after = '';
}
}
if (options.punctuation === true && !options.root) {
//normalized end punctuation
if (this.hasPost('.') === true) {
after = '.' + after;
} else if (this.hasPost('?') === true) {
after = '?' + after;
} else if (this.hasPost('!') === true) {
after = '!' + after;
} else if (this.hasPost(',') === true) {
after = ',' + after;
} else if (this.hasEllipses() === true) {
after = '...' + after;
}
}
if (showPre !== true) {
before = '';
}
if (showPost !== true) {
// let keep = after.match(/\)/) || ''
after = ''; //keep //after.replace(/[ .?!,]+/, '')
} // remove the '.' from 'Mrs.' (safely)
if (options.abbreviations === true && this.tags.Abbreviation) {
after = after.replace(/^\./, '');
}
return before + word + after;
};
var _04Text = {
textOut: textOut
};
var boringTags = {
Auxiliary: 1,
Possessive: 1
};
/** a subjective ranking of tags kinda tfidf-based */
var rankTags = function rankTags(term, world) {
var tags = Object.keys(term.tags);
var tagSet = world.tags;
tags = tags.sort(function (a, b) {
//bury the tags we dont want
if (boringTags[b] || !tagSet[b]) {
return -1;
} // unknown tags are interesting
if (!tagSet[b]) {
return 1;
}
if (!tagSet[a]) {
return 0;
} // then sort by #of parent tags (most-specific tags first)
if (tagSet[a].lineage.length > tagSet[b].lineage.length) {
return 1;
}
if (tagSet[a].isA.length > tagSet[b].isA.length) {
return -1;
}
return 0;
});
return tags;
};
var _bestTag = rankTags;
var jsonDefault = {
text: true,
tags: true,
implicit: true,
clean: false,
id: false,
index: false,
offset: false,
whitespace: false,
bestTag: false
};
/** return various metadata for this term */
var json = function json(options, world) {
options = options || {};
options = Object.assign({}, jsonDefault, options);
var result = {}; // default on
if (options.text) {
result.text = this.text;
}
if (options.normal) {
result.normal = this.normal;
}
if (options.tags) {
result.tags = Object.keys(this.tags);
} // default off
if (options.clean) {
result.clean = this.clean;
}
if (options.id || options.offset) {
result.id = this.id;
}
if (options.implicit && this.implicit !== null) {
result.implicit = this.implicit;
}
if (options.whitespace) {
result.pre = this.pre;
result.post = this.post;
}
if (options.bestTag) {
result.bestTag = _bestTag(this, world)[0];
}
return result;
};
var _05Json = {
json: json
};
var methods = Object.assign({}, _01Case, _02Punctuation, _03Misc, _04Text, _05Json);
function isClientSide() {
return typeof window !== 'undefined' && window.document;
}
/** add spaces at the end */
var padEnd = function padEnd(str, width) {
str = str.toString();
while (str.length < width) {
str += ' ';
}
return str;
};
/** output for verbose-mode */
var logTag = function logTag(t, tag, reason) {
if (isClientSide()) {
console.log('%c' + padEnd(t.clean, 3) + ' + ' + tag + ' ', 'color: #6accb2;');
return;
} //server-side
var log = '\x1b[33m' + padEnd(t.clean, 15) + '\x1b[0m + \x1b[32m' + tag + '\x1b[0m ';
if (reason) {
log = padEnd(log, 35) + ' ' + reason + '';
}
console.log(log);
};
/** output for verbose mode */
var logUntag = function logUntag(t, tag, reason) {
if (isClientSide()) {
console.log('%c' + padEnd(t.clean, 3) + ' - ' + tag + ' ', 'color: #AB5850;');
return;
} //server-side
var log = '\x1b[33m' + padEnd(t.clean, 3) + ' \x1b[31m - #' + tag + '\x1b[0m ';
if (reason) {
log = padEnd(log, 35) + ' ' + reason;
}
console.log(log);
};
var isArray = function isArray(arr) {
return Object.prototype.toString.call(arr) === '[object Array]';
};
var titleCase = function titleCase(str) {
return str.charAt(0).toUpperCase() + str.substr(1);
};
var fns = {
logTag: logTag,
logUntag: logUntag,
isArray: isArray,
titleCase: titleCase
};
/** add a tag, and its descendents, to a term */
var addTag = function addTag(t, tag, reason, world) {
var tagset = world.tags; //support '.' or '-' notation for skipping the tag
if (tag === '' || tag === '.' || tag === '-') {
return;
}
if (tag[0] === '#') {
tag = tag.replace(/^#/, '');
}
tag = fns.titleCase(tag); //if we already got this one
if (t.tags[tag] === true) {
return;
} // log it?
var isVerbose = world.isVerbose();
if (isVerbose === true) {
fns.logTag(t, tag, reason);
} //add tag
t.tags[tag] = true; //whee!
//check tagset for any additional things to do...
if (tagset.hasOwnProperty(tag) === true) {
//add parent Tags
tagset[tag].isA.forEach(function (down) {
t.tags[down] = true;
if (isVerbose === true) {
fns.logTag(t, '→ ' + down);
}
}); //remove any contrary tags
t.unTag(tagset[tag].notA, '←', world);
}
};
/** support an array of tags */
var addTags = function addTags(term, tags, reason, world) {
if (typeof tags !== 'string') {
for (var i = 0; i < tags.length; i++) {
addTag(term, tags[i], reason, world);
} // tags.forEach(tag => addTag(term, tag, reason, world))
} else {
addTag(term, tags, reason, world);
}
};
var add = addTags;
/** remove this tag, and its descentents from the term */
var unTag = function unTag(t, tag, reason, world) {
var isVerbose = world.isVerbose(); //support '*' for removing all tags
if (tag === '*') {
t.tags = {};
return t;
} // remove the tag
if (t.tags[tag] === true) {
delete t.tags[tag]; //log in verbose-mode
if (isVerbose === true) {
fns.logUntag(t, tag, reason);
}
} //delete downstream tags too
var tagset = world.tags;
if (tagset[tag]) {
var lineage = tagset[tag].lineage;
for (var i = 0; i < lineage.length; i++) {
if (t.tags[lineage[i]] === true) {
delete t.tags[lineage[i]];
if (isVerbose === true) {
fns.logUntag(t, ' - ' + lineage[i]);
}
}
}
}
return t;
}; //handle an array of tags
var untagAll = function untagAll(term, tags, reason, world) {
if (typeof tags !== 'string' && tags) {
for (var i = 0; i < tags.length; i++) {
unTag(term, tags[i], reason, world);
}
return;
}
unTag(term, tags, reason, world);
};
var unTag_1 = untagAll;
var canBe = function canBe(term, tag, world) {
var tagset = world.tags; // cleanup tag
if (tag[0] === '#') {
tag = tag.replace(/^#/, '');
} //fail-fast
if (tagset[tag] === undefined) {
return true;
} //loop through tag's contradictory tags
var enemies = tagset[tag].notA || [];
for (var i = 0; i < enemies.length; i++) {
if (term.tags[enemies[i]] === true) {
return false;
}
}
if (tagset[tag].isA !== undefined) {
return canBe(term, tagset[tag].isA, world); //recursive
}
return true;
};
var canBe_1 = canBe;
/** add a tag or tags, and their descendents to this term
* @param {string | string[]} tags - a tag or tags
* @param {string?} [reason] a clue for debugging
*/
var tag_1 = function tag_1(tags, reason, world) {
add(this, tags, reason, world);
return this;
};
/** only tag this term if it's consistent with it's current tags */
var tagSafe = function tagSafe(tags, reason, world) {
if (canBe_1(this, tags, world)) {
add(this, tags, reason, world);
}
return this;
};
/** remove a tag or tags, and their descendents from this term
* @param {string | string[]} tags - a tag or tags
* @param {string?} [reason] a clue for debugging
*/
var unTag_1$1 = function unTag_1$1(tags, reason, world) {
unTag_1(this, tags, reason, world);
return this;
};
/** is this tag consistent with the word's current tags?
* @param {string | string[]} tags - a tag or tags
* @returns {boolean}
*/
var canBe_1$1 = function canBe_1$1(tags, world) {
return canBe_1(this, tags, world);
};
var tag = {
tag: tag_1,
tagSafe: tagSafe,
unTag: unTag_1$1,
canBe: canBe_1$1
};
var Term =
/*#__PURE__*/
function () {
function Term() {
var text = arguments.length > 0 && arguments[0] !== undefined ? arguments[0] : '';
_classCallCheck(this, Term);
text = String(text);
var obj = parse(text); // the various forms of our text
this.text = obj.text || '';
this.clean = obj.clean;
this.reduced = obj.reduced;
this.root = null;
this.implicit = null;
this.pre = obj.pre || '';
this.post = obj.post || '';
this.tags = {};
this.prev = null;
this.next = null;
this.id = _id(obj.clean);
this.isA = 'Term'; // easier than .constructor...
// support alternative matches
if (obj.alias) {
this.alias = obj.alias;
}
}
/** set the text of the Term to something else*/
_createClass(Term, [{
key: "set",
value: function set(str) {
var obj = parse(str);
this.text = obj.text;
this.clean = obj.clean;
return this;
}
}]);
return Term;
}();
/** create a deep-copy of this term */
Term.prototype.clone = function () {
var term = new Term(this.text);
term.pre = this.pre;
term.post = this.post;
term.tags = Object.assign({}, this.tags); //use the old id, so it can be matched with .match(doc)
// term.id = this.id
return term;
};
Object.assign(Term.prototype, methods);
Object.assign(Term.prototype, tag);
var Term_1 = Term;
/** return a flat array of Term objects */
var terms = function terms(n) {
if (this.length === 0) {
return [];
} // use cache, if it exists
if (this.cache.terms) {
if (n !== undefined) {
return this.cache.terms[n];
}
return this.cache.terms;
}
var terms = [this.pool.get(this.start)];
for (var i = 0; i < this.length - 1; i += 1) {
var id = terms[terms.length - 1].next;
if (id === null) {
// throw new Error('linked-list broken')
console.error("Compromise error: Linked list broken in phrase '" + this.start + "'");
break;
}
var term = this.pool.get(id);
terms.push(term); //return this one?
if (n !== undefined && n === i) {
return terms[n];
}
} // this.cache.terms = terms
if (n !== undefined) {
return terms[n];
}
return terms;
};
/** return a shallow or deep copy of this phrase */
var clone = function clone(isShallow) {
var _this = this;
if (isShallow) {
return this.buildFrom(this.start, this.length);
} //how do we clone part of the pool?
var terms = this.terms();
var newTerms = terms.map(function (t) {
return t.clone();
}); //connect these new ids up
newTerms.forEach(function (t, i) {
//add it to the pool..
_this.pool.add(t);
if (newTerms[i + 1]) {
t.next = newTerms[i + 1].id;
}
if (newTerms[i - 1]) {
t.prev = newTerms[i - 1].id;
}
});
return this.buildFrom(newTerms[0].id, newTerms.length);
};
/** return last term object */
var lastTerm = function lastTerm() {
var terms = this.terms();
return terms[terms.length - 1];
};
/** quick lookup for a term id */
var hasId = function hasId(wantId) {
if (this.length === 0 || !wantId) {
return false;
}
if (this.start === wantId) {
return true;
} // use cache, if available
if (this.cache.terms) {
var _terms = this.cache.terms;
for (var i = 0; i < _terms.length; i++) {
if (_terms[i].id === wantId) {
return true;
}
}
return false;
} // otherwise, go through each term
var lastId = this.start;
for (var _i = 0; _i < this.length - 1; _i += 1) {
var term = this.pool.get(lastId);
if (term === undefined) {
console.error("Compromise error: Linked list broken. Missing term '".concat(lastId, "' in phrase '").concat(this.start, "'\n")); // throw new Error('linked List error')
return false;
}
if (term.next === wantId) {
return true;
}
lastId = term.next;
}
return false;
};
/** how many seperate, non-empty words is it? */
var wordCount = function wordCount() {
return this.terms().filter(function (t) {
return t.text !== '';
}).length;
};
var _01Utils = {
terms: terms,
clone: clone,
lastTerm: lastTerm,
hasId: hasId,
wordCount: wordCount
};
var trimEnd = function trimEnd(str) {
return str.replace(/ +$/, '');
};
/** produce output in the given format */
var text = function text() {
var options = arguments.length > 0 && arguments[0] !== undefined ? arguments[0] : {};
var isFirst = arguments.length > 1 ? arguments[1] : undefined;
var isLast = arguments.length > 2 ? arguments[2] : undefined;
if (typeof options === 'string') {
if (options === 'normal') {
options = {
whitespace: true,
unicode: true,
lowercase: true,
punctuation: true,
acronyms: true,
abbreviations: true,
implicit: true,
normal: true
};
} else if (options === 'clean') {
options = {
titlecase: false,
lowercase: true,
punctuation: true,
whitespace: true,
unicode: true,
implicit: true
};
} else if (options === 'reduced') {
options = {
titlecase: false,
lowercase: true,
punctuation: false,
//FIXME: reversed?
whitespace: true,
unicode: true,
implicit: true,
reduced: true
};
} else if (options === 'root') {
options = {
titlecase: false,
lowercase: true,
punctuation: true,
whitespace: true,
unicode: true,
implicit: true,
root: true
};
} else {
options = {};
}
}
var terms = this.terms(); //this this phrase a complete sentence?
var isFull = false;
if (terms[0] && terms[0].prev === null && terms[terms.length - 1].next === null) {
isFull = true;
}
var text = terms.reduce(function (str, t, i) {
options.last = isLast && i === terms.length - 1;
var showPre = true;
var showPost = true;
if (isFull === false) {
// dont show beginning whitespace
if (i === 0 && isFirst) {
showPre = false;
} // dont show end-whitespace
if (i === terms.length - 1 && isLast) {
showPost = false;
}
}
var txt = t.textOut(options, showPre, showPost); // if (options.titlecase && i === 0) {
// txt = titleCase(txt)
// }
return str + txt;
}, ''); //full-phrases show punctuation, but not whitespace
if (isFull === true && isLast) {
text = trimEnd(text);
}
if (options.trim === true) {
text = text.trim();
}
return text;
};
var _02Text = {
text: text
};
/** remove start and end whitespace */
var trim = function trim() {
var terms = this.terms();
if (terms.length > 0) {
//trim starting
terms[0].pre = terms[0].pre.replace(/^\s+/, ''); //trim ending
var lastTerm = terms[terms.length - 1];
lastTerm.post = lastTerm.post.replace(/\s+$/, '');
}
return this;
};
var _03Change = {
trim: trim
};
var endOfSentence = /[.?!]\s*$/; // replacing a 'word.' with a 'word!'
var combinePost = function combinePost(before, after) {
//only transfer the whitespace
if (endOfSentence.test(after)) {
var whitespace = before.match(/\s*$/);
return after + whitespace;
}
return before;
}; //add whitespace to the start of the second bit
var addWhitespace = function addWhitespace(beforeTerms, newTerms) {
// add any existing pre-whitespace to beginning
newTerms[0].pre = beforeTerms[0].pre;
var lastTerm = beforeTerms[beforeTerms.length - 1]; //add any existing punctuation to end of our new terms
var newTerm = newTerms[newTerms.length - 1];
newTerm.post = combinePost(lastTerm.post, newTerm.post); // remove existing punctuation
lastTerm.post = ''; //before ←[space] - after
if (lastTerm.post === '') {
lastTerm.post += ' ';
}
}; //insert this segment into the linked-list
var stitchIn = function stitchIn(beforeTerms, newTerms, pool) {
var lastBefore = beforeTerms[beforeTerms.length - 1];
var lastNew = newTerms[newTerms.length - 1];
var afterId = lastBefore.next; //connect ours in (main → newPhrase)
lastBefore.next = newTerms[0].id; //stich the end in (newPhrase → after)
lastNew.next = afterId; //do it backwards, too
if (afterId) {
// newPhrase ← after
var afterTerm = pool.get(afterId);
afterTerm.prev = lastNew.id;
} // before ← newPhrase
var beforeId = beforeTerms[0].id;
if (beforeId) {
var newTerm = newTerms[0];
newTerm.prev = beforeId;
}
}; // avoid stretching a phrase twice.
var unique = function unique(list) {
return list.filter(function (o, i) {
return list.indexOf(o) === i;
});
}; //append one phrase onto another.
var appendPhrase = function appendPhrase(before, newPhrase, doc) {
var beforeTerms = before.cache.terms || before.terms();
var newTerms = newPhrase.cache.terms || newPhrase.terms(); //spruce-up the whitespace issues
addWhitespace(beforeTerms, newTerms); //insert this segment into the linked-list
stitchIn(beforeTerms, newTerms, before.pool); // stretch!
// make each effected phrase longer
var toStretch = [before];
var hasId = before.start;
var docs = [doc];
docs = docs.concat(doc.parents()); // find them all!
docs.forEach(function (parent) {
// only the phrases that should change
var shouldChange = parent.list.filter(function (p) {
return p.hasId(hasId);
});
toStretch = toStretch.concat(shouldChange);
}); // don't double-count a phrase
toStretch = unique(toStretch);
toStretch.forEach(function (p) {
p.length += newPhrase.length;
});
return before;
};
var append = appendPhrase;
var hasSpace$1 = / /; //a new space needs to be added, either on the new phrase, or the old one
// '[new] [◻old]' -or- '[old] [◻new] [old]'
var addWhitespace$1 = function addWhitespace(newTerms) {
//add a space before our new text?
// add a space after our text
var lastTerm = newTerms[newTerms.length - 1];
if (hasSpace$1.test(lastTerm.post) === false) {
lastTerm.post += ' ';
}
return;
}; //insert this segment into the linked-list
var stitchIn$1 = function stitchIn(main, newPhrase, newTerms) {
// [newPhrase] → [main]
var lastTerm = newTerms[newTerms.length - 1];
lastTerm.next = main.start; // [before] → [main]
var pool = main.pool;
var start = pool.get(main.start);
if (start.prev) {
var before = pool.get(start.prev);
before.next = newPhrase.start;
} //do it backwards, too
// before ← newPhrase
newTerms[0].prev = main.terms(0).prev; // newPhrase ← main
main.terms(0).prev = lastTerm.id;
};
var unique$1 = function unique(list) {
return list.filter(function (o, i) {
return list.indexOf(o) === i;
});
}; //append one phrase onto another
var joinPhrase = function joinPhrase(original, newPhrase, doc) {
var starterId = original.start;
var newTerms = newPhrase.terms(); //spruce-up the whitespace issues
addWhitespace$1(newTerms); //insert this segment into the linked-list
stitchIn$1(original, newPhrase, newTerms); //increase the length of our phrases
var toStretch = [original];
var docs = [doc];
docs = docs.concat(doc.parents());
docs.forEach(function (d) {
// only the phrases that should change
var shouldChange = d.list.filter(function (p) {
return p.hasId(starterId) || p.hasId(newPhrase.start);
});
toStretch = toStretch.concat(shouldChange);
}); // don't double-count
toStretch = unique$1(toStretch); // stretch these phrases
toStretch.forEach(function (p) {
p.length += newPhrase.length; // change the start too, if necessary
if (p.start === starterId) {
p.start = newPhrase.start;
}
});
return original;
};
var prepend = joinPhrase;
//recursively decrease the length of all the parent phrases
var shrinkAll = function shrinkAll(doc, id, deleteLength, after) {
var arr = doc.parents();
arr.push(doc);
arr.forEach(function (d) {
//find our phrase to shrink
var phrase = d.list.find(function (p) {
return p.hasId(id);
});
if (!phrase) {
return;
}
phrase.length -= deleteLength; // does it start with this soon-removed word?
if (phrase.start === id) {
phrase.start = after.id;
}
}); // cleanup empty phrase objects
doc.list = doc.list.filter(function (p) {
if (!p.start || !p.length) {
return false;
}
return true;
});
};
/** wrap the linked-list around these terms
* so they don't appear any more
*/
var deletePhrase = function deletePhrase(phrase, doc) {
var pool = doc.pool();
var terms = phrase.cache.terms || phrase.terms(); //grab both sides of the chain,
var prev = pool.get(terms[0].prev) || {};
var after = pool.get(terms[terms.length - 1].next) || {};
if (terms[0].implicit && prev.implicit) {
prev.set(prev.implicit);
prev.post += ' ';
} // //first, change phrase lengths
shrinkAll(doc, phrase.start, phrase.length, after); // connect [prev]->[after]
if (prev) {
prev.next = after.id;
} // connect [prev]<-[after]
if (after) {
after.prev = prev.id;
} // lastly, actually delete the terms from the pool?
// for (let i = 0; i < terms.length; i++) {
// pool.remove(terms[i].id)
// }
};
var _delete = deletePhrase;
/** put this text at the end */
var append_1 = function append_1(newPhrase, doc) {
append(this, newPhrase, doc);
return this;
};
/** add this text to the beginning */
var prepend_1 = function prepend_1(newPhrase, doc) {
prepend(this, newPhrase, doc);
return this;
};
var delete_1 = function delete_1(doc) {
_delete(this, doc);
return this;
}; // stich-in newPhrase, stretch 'doc' + parents
var replace = function replace(newPhrase, doc) {
//add it do the end
var firstLength = this.length;
append(this, newPhrase, doc); //delete original terms
var tmp = this.buildFrom(this.start, this.length);
tmp.length = firstLength;
_delete(tmp, doc);
};
/**
* Turn this phrase object into 3 phrase objects
*/
var splitOn = function splitOn(p) {
var terms = this.terms();
var result = {
before: null,
match: null,
after: null
};
var index = terms.findIndex(function (t) {
return t.id === p.start;
});
if (index === -1) {
return result;
} //make all three sections into phrase-objects
var start = terms.slice(0, index);
if (start.length > 0) {
result.before = this.buildFrom(start[0].id, start.length);
}
var match = terms.slice(index, index + p.length);
if (match.length > 0) {
result.match = this.buildFrom(match[0].id, match.length);
}
var end = terms.slice(index + p.length, terms.length);
if (end.length > 0) {
result.after = this.buildFrom(end[0].id, end.length, this.pool);
}
return result;
};
var _04Insert = {
append: append_1,
prepend: prepend_1,
"delete": delete_1,
replace: replace,
splitOn: splitOn
};
/** return json metadata for this phrase */
var json$1 = function json() {
var options = arguments.length > 0 && arguments[0] !== undefined ? arguments[0] : {};
var world = arguments.length > 1 ? arguments[1] : undefined;
var res = {}; // text data
if (options.text) {
res.text = this.text();
}
if (options.normal) {
res.normal = this.text('normal');
}
if (options.clean) {
res.clean = this.text('clean');
}
if (options.reduced) {
res.reduced = this.text('reduced');
}
if (options.root) {
res.root = this.text('root');
}
if (options.trim) {
if (res.text) {
res.text = res.text.trim();
}
if (res.normal) {
res.normal = res.normal.trim();
}
if (res.reduced) {
res.reduced = res.reduced.trim();
}
} // terms data
if (options.terms) {
if (options.terms === true) {
options.terms = {};
}
res.terms = this.terms().map(function (t) {
return t.json(options.terms, world);
});
}
return res;
};
var _05Json$1 = {
json: json$1
};
/** match any terms after this phrase */
var lookAhead = function lookAhead(regs) {
// if empty match string, return everything after
if (!regs) {
regs = '.*';
}
var pool = this.pool; // get a list of all terms preceding our start
var terms = [];
var getAfter = function getAfter(id) {
var term = pool.get(id);
if (!term) {
return;
}
terms.push(term);
if (term.prev) {
getAfter(term.next); //recursion
}
};
var all = this.terms();
var lastTerm = all[all.length - 1];
getAfter(lastTerm.next);
if (terms.length === 0) {
return [];
} // got the terms, make a phrase from them
var p = this.buildFrom(terms[0].id, terms.length);
return p.match(regs);
};
/** match any terms before this phrase */
var lookBehind = function lookBehind(regs) {
// if empty match string, return everything before
if (!regs) {
regs = '.*';
}
var pool = this.pool; // get a list of all terms preceding our start
var terms = [];
var getBefore = function getBefore(id) {
var term = pool.get(id);
if (!term) {
return;
}
terms.push(term);
if (term.prev) {
getBefore(term.prev); //recursion
}
};
var term = pool.get(this.start);
getBefore(term.prev);
if (terms.length === 0) {
return [];
} // got the terms, make a phrase from them
var p = this.buildFrom(terms[terms.length - 1].id, terms.length);
return p.match(regs);
};
var _06Lookahead = {
lookAhead: lookAhead,
lookBehind: lookBehind
};
var methods$1 = Object.assign({}, _01Utils, _02Text, _03Change, _04Insert, _05Json$1, _06Lookahead);
// try to avoid doing the match
var failFast = function failFast(p, regs) {
if (regs.length === 0) {
return true;
}
for (var i = 0; i < regs.length; i += 1) {
var reg = regs[i]; // //logical quick-ones
if (reg.optional !== true && reg.negative !== true) {
//start/end impossibilites
if (reg.start === true && i > 0) {
return true;
} // has almost no effect
if (p.cache.words !== undefined && reg.word !== undefined && p.cache.words.hasOwnProperty(reg.word) !== true) {
return true;
}
} //this is not possible
if (reg.anything === true && reg.negative === true) {
return true;
}
}
return false;
};
var _02FailFast = failFast;
// i formally apologize for how complicated this is.
//found a match? it's greedy? keep going!
var getGreedy = function getGreedy(terms, t, reg, until, index, length) {
var start = t;
for (; t < terms.length; t += 1) {
//stop for next-reg match
if (until && terms[t].doesMatch(until, index + t, length)) {
return t;
}
var count = t - start + 1; // is it max-length now?
if (reg.max !== undefined && count === reg.max) {
return t;
} //stop here
if (terms[t].doesMatch(reg, index + t, length) === false) {
// is it too short?
if (reg.min !== undefined && count < reg.min) {
return null;
}
return t;
}
}
return t;
}; //'unspecific greedy' is a weird situation.
var greedyTo = function greedyTo(terms, t, nextReg, index, length) {
//if there's no next one, just go off the end!
if (!nextReg) {
return terms.length;
} //otherwise, we're looking for the next one
for (; t < terms.length; t += 1) {
if (terms[t].doesMatch(nextReg, index + t, length) === true) {
return t;
}
} //guess it doesn't exist, then.
return null;
};
/** tries to match a sequence of terms, starting from here */
var tryHere = function tryHere(terms, regs, index, length) {
var captures = [];
var t = 0; // we must satisfy each rule in 'regs'
for (var r = 0; r < regs.length; r += 1) {
var reg = regs[r]; //should we fail here?
if (!terms[t]) {
//are all remaining regs optional?
var hasNeeds = regs.slice(r).some(function (remain) {
return !remain.optional;
});
if (hasNeeds === false) {
break;
} // have unmet needs
return false;
} //support 'unspecific greedy' .* properly
if (reg.anything === true && reg.greedy === true) {
var skipto = greedyTo(terms, t, regs[r + 1], reg, index); // ensure it's long enough
if (reg.min !== undefined && skipto - t < reg.min) {
return false;
} // reduce it back, if it's too long
if (reg.max !== undefined && skipto - t > reg.max) {
t = t + reg.max;
continue;
} //TODO: support [*] properly
if (skipto === null) {
return false; //couldn't find it
}
t = skipto;
continue;
} //if it looks like a match, continue
//we have a special case where an end-anchored greedy match may need to
//start matching before the actual end; we do this by (temporarily!)
//removing the "end" property from the matching token... since this is
//very situation-specific, we *only* do this when we really need to.
if (reg.anything === true || reg.end === true && reg.greedy === true && index + t < length - 1 && terms[t].doesMatch(Object.assign({}, reg, {
end: false
}), index + t, length) === true || terms[t].doesMatch(reg, index + t, length) === true) {
var startAt = t; // okay, it was a match, but if it optional too,
// we should check the next reg too, to skip it?
if (reg.optional && regs[r + 1]) {
// does the next reg match it too?
if (terms[t].doesMatch(regs[r + 1], index + t, length) === true) {
// but does the next reg match the next term??
// only skip if it doesn't
if (!terms[t + 1] || terms[t + 1].doesMatch(regs[r + 1], index + t, length) === false) {
r += 1;
}
}
} //advance to the next term!
t += 1; //check any ending '$' flags
if (reg.end === true) {
//if this isn't the last term, refuse the match
if (t !== terms.length && reg.greedy !== true) {
return false;
}
} //try keep it going!
if (reg.greedy === true) {
// for greedy checking, we no longer care about the reg.start
// value, and leaving it can cause failures for anchored greedy
// matches. ditto for end-greedy matches: we need an earlier non-
// ending match to succceed until we get to the actual end.
t = getGreedy(terms, t, Object.assign({}, reg, {
start: false,
end: false
}), regs[r + 1], index, length);
if (t === null) {
return false; //greedy was too short