lemmatizer
Version:
English language lemmatizer
53 lines (52 loc) • 1.74 kB
JavaScript
;
Object.defineProperty(exports, "__esModule", { value: true });
const prefixes_1 = require("./prefixes");
const en_lexicon_1 = require("en-lexicon");
const en_stemmer_1 = require("en-stemmer");
const en_inflectors_1 = require("en-inflectors");
function lemmatizer(string) {
return safeStemmer(verbsAndNouns(unprefixer(string)));
}
exports.lemmatizer = lemmatizer;
function unprefixer(string) {
prefixes_1.prefixes.forEach((prefix) => {
if (string.startsWith(prefix) && en_lexicon_1.lexicon[string.substr(prefix.length)]) {
string = string.substr(prefix.length);
}
});
return string;
}
exports.unprefixer = unprefixer;
function verbsAndNouns(token) {
if (isVerb(token))
return new en_inflectors_1.Inflectors(token).toPresent();
else if (isPluralNoun(token))
return new en_inflectors_1.Inflectors(token).toSingular();
return token;
}
function safeStemmer(string) {
const stem = en_stemmer_1.stemmer(string);
if (en_lexicon_1.lexicon[stem])
return stem;
else if (en_lexicon_1.lexicon[stem + "e"])
return stem + "e";
else if (en_lexicon_1.lexicon[stem + "y"])
return stem + "y";
else if (en_lexicon_1.lexicon[stem + "l"])
return stem + "l";
else
return string;
}
function isVerb(string) {
if (en_lexicon_1.lexicon[string] && en_lexicon_1.lexicon[string].split("|").find(x => x.startsWith("V")))
return true;
else
return false;
}
function isPluralNoun(string) {
if (en_lexicon_1.lexicon[string] && en_lexicon_1.lexicon[string].split("|").find(x => x.startsWith("N") && x.endsWith("S")))
return true;
else
return false;
}
exports.default = lemmatizer;