UNPKG

lemmatizer

Version:
53 lines (52 loc) 1.74 kB
"use strict"; Object.defineProperty(exports, "__esModule", { value: true }); const prefixes_1 = require("./prefixes"); const en_lexicon_1 = require("en-lexicon"); const en_stemmer_1 = require("en-stemmer"); const en_inflectors_1 = require("en-inflectors"); function lemmatizer(string) { return safeStemmer(verbsAndNouns(unprefixer(string))); } exports.lemmatizer = lemmatizer; function unprefixer(string) { prefixes_1.prefixes.forEach((prefix) => { if (string.startsWith(prefix) && en_lexicon_1.lexicon[string.substr(prefix.length)]) { string = string.substr(prefix.length); } }); return string; } exports.unprefixer = unprefixer; function verbsAndNouns(token) { if (isVerb(token)) return new en_inflectors_1.Inflectors(token).toPresent(); else if (isPluralNoun(token)) return new en_inflectors_1.Inflectors(token).toSingular(); return token; } function safeStemmer(string) { const stem = en_stemmer_1.stemmer(string); if (en_lexicon_1.lexicon[stem]) return stem; else if (en_lexicon_1.lexicon[stem + "e"]) return stem + "e"; else if (en_lexicon_1.lexicon[stem + "y"]) return stem + "y"; else if (en_lexicon_1.lexicon[stem + "l"]) return stem + "l"; else return string; } function isVerb(string) { if (en_lexicon_1.lexicon[string] && en_lexicon_1.lexicon[string].split("|").find(x => x.startsWith("V"))) return true; else return false; } function isPluralNoun(string) { if (en_lexicon_1.lexicon[string] && en_lexicon_1.lexicon[string].split("|").find(x => x.startsWith("N") && x.endsWith("S"))) return true; else return false; } exports.default = lemmatizer;