expand-contractions
Version:
Convert English word contractions such "what's" into "what is".
72 lines (54 loc) • 2.16 kB
JavaScript
var wordLookup = require('./word-lookup');
class Contractions {
constructor(contractions) {
var keys;
this._expandLookup = contractions;
// Put the longest keys first so they match before shorter, partial matches.
keys = Object.keys(this._expandLookup);
keys.sort((a, b) => {
var diff = b.length - a.length;
if (diff !== 0) diff = this._expandLookup[b].length - this._expandLookup[a].length;
return diff;
});
this._expandRegexp = new RegExp(keys.join('|'), 'gi');
// Build reverse lookup
let contractLookup = {};
Object.keys(contractions).forEach(function (key) {
contractLookup[contractions[key]] = key;
});
this._contractLookup = contractLookup;
keys = Object.keys(this._contractLookup);
keys.sort((a, b) => {
var diff = b.length - a.length;
if (diff !== 0) diff = this._contractLookup[b].length - this._contractLookup[a].length;
return diff;
});
this._contractRegexp = new RegExp(keys.join('|'), 'gi');
// Expose class for custom word lists
this.Contractions = Contractions;
}
expand(text) {
return this._convert(text, this._expandLookup, this._expandRegexp);
}
contract(text) {
return this._convert(text, this._contractLookup, this._contractRegexp);
}
_convert(text, lookup, regexp) {
return text.replace(regexp, (matched) => {
var replacement = lookup[matched.toLowerCase()];
var firstCharCode = matched.charAt(0) === '\'' ? matched.charCodeAt(1) : matched.charCodeAt(0);
// Check if first character of matched string is uppercase
if (firstCharCode >= 65 && firstCharCode <= 90) {
// Uppercase the first character of the replacement text
if (replacement.charAt(0) === '\'') {
replacement = '\'' + replacement.charAt(1).toUpperCase() + replacement.slice(2);
} else {
replacement = replacement.charAt(0).toUpperCase() + replacement.slice(1);
}
}
return replacement;
});
}
}
module.exports = new Contractions(wordLookup);
;