compromise
Version:
natural language processing in the browser
179 lines (171 loc) • 2.27 kB
JavaScript
//these are common word shortenings used in the lexicon and sentence segmentation methods
//there are all nouns,or at the least, belong beside one.
;
//common abbreviations
let compact = {
Noun: [
'arc',
'al',
'exp',
'fy',
'pd',
'pl',
'plz',
'tce',
'bl',
'ma',
'ba',
'lit',
'ex',
'eg',
'ie',
'ca',
'cca',
'vs',
'etc',
'esp',
'ft',
//these are too ambiguous
'bc',
'ad',
'md',
'corp',
'col'
],
Organization: [
'dept',
'univ',
'assn',
'bros',
'inc',
'ltd',
'co',
//proper nouns with exclamation marks
'yahoo',
'joomla',
'jeopardy'
],
Place: [
'rd',
'st',
'dist',
'mt',
'ave',
'blvd',
'cl',
'ct',
'cres',
'hwy',
//states
'ariz',
'cal',
'calif',
'colo',
'conn',
'fla',
'fl',
'ga',
'ida',
'ia',
'kan',
'kans',
'minn',
'neb',
'nebr',
'okla',
'penna',
'penn',
'pa',
'dak',
'tenn',
'tex',
'ut',
'vt',
'va',
'wis',
'wisc',
'wy',
'wyo',
'usafa',
'alta',
'ont',
'que',
'sask'
],
Date: [
'jan',
'feb',
'mar',
'apr',
'jun',
'jul',
'aug',
'sep',
'sept',
'oct',
'nov',
'dec',
'circa'
],
//Honorifics
Honorific: [
'adj',
'adm',
'adv',
'asst',
'atty',
'bldg',
'brig',
'capt',
'cmdr',
'comdr',
'cpl',
'det',
'dr',
'esq',
'gen',
'gov',
'hon',
'jr',
'llb',
'lt',
'maj',
'messrs',
'mister',
'mlle',
'mme',
'mr',
'mrs',
'ms',
'mstr',
'op',
'ord',
'phd',
'prof',
'pvt',
'rep',
'reps',
'res',
'rev',
'sen',
'sens',
'sfc',
'sgt',
'sir',
'sr',
'supt',
'surg'
//miss
//misses
]
};
//unpack the compact terms into the misc lexicon..
let abbreviations = {};
const keys = Object.keys(compact);
for (let i = 0; i < keys.length; i++) {
const arr = compact[keys[i]];
for (let i2 = 0; i2 < arr.length; i2++) {
abbreviations[arr[i2]] = keys[i];
}
}
module.exports = abbreviations;