@botonic/plugin-contentful
Version:
## What Does This Plugin Do?
173 lines • 5.89 kB
JavaScript
"use strict";
Object.defineProperty(exports, "__esModule", { value: true });
exports.StemmerPl = void 0;
class StemmerPl {
stem(tokens) {
return tokens.map((stem) => {
stem = stem.toLowerCase();
stem = this.removeNouns(stem);
stem = this.removeDiminutive(stem);
const adj = this.removeAdjectiveEnds(stem);
if (adj != stem) {
return adj;
}
stem = this.removeVerbsEnds(stem);
stem = this.removeAdverbsEnds(stem);
stem = this.removePluralForms(stem);
return this.removeGeneralEnds(stem);
});
}
removeGeneralEnds(word) {
if (word.length > 4 && ['ia', 'ie'].includes(word.substr(-2))) {
return word.substr(0, word.length - 2);
}
if (word.length > 4 &&
['', 'ą', 'i', 'a', 'ę', 'y', 'e', 'ł'].includes(word.substr(-1))) {
return word.substr(0, word.length - 1);
}
return word;
}
removeDiminutive(word) {
if (word.length > 6) {
if (['eczek', 'iczek', 'iszek', 'aszek', 'uszek'].includes(word.substr(-5))) {
return word.substr(0, word.length - 5);
}
if (['enek', 'ejek', 'erek'].includes(word.substr(-4))) {
return word.substr(0, word.length - 2);
}
}
if (word.length > 4) {
if (['ek', 'ak'].includes(word.substr(-2))) {
return word.substr(0, word.length - 2);
}
}
return word;
}
removeVerbsEnds(word) {
// https://github.com/Tutanchamon/pl_stemmer/issues/2
if (word.length > 5 && word.endsWith('bym')) {
return word.substr(0, word.length - 3);
}
if (word.length > 5 &&
[
// eslint-disable-next-line prettier/prettier
'esz',
'asz',
'cie',
'eść',
'esc',
'aść',
'asc',
'łem',
'lem',
'amy',
'emy',
].includes(word.substr(-3))) {
return word.substr(0, word.length - 3);
}
if (word.length > 3 &&
[
'esz',
'asz',
'eść',
'esc',
'aść',
'asc',
'eć',
'ec',
'ać',
'ac',
].includes(word.substr(-3))) {
return word.substr(0, word.length - 2);
}
// https://github.com/Tutanchamon/pl_stemmer/issues/3
if (word.length > 3 && ['aj'].includes(word.substr(-2))) {
return word.substr(0, word.length - 1);
}
if (word.length > 3 &&
[
// eslint-disable-next-line prettier/prettier
'ać',
'ac',
'em',
'am',
'ał',
'al',
'ił',
'il',
'ić',
'ic',
'ąc',
'ac',
].includes(word.substr(-2))) {
return word.substr(0, word.length - 2);
}
return word;
}
removeNouns(word) {
if (word.length > 7 &&
['zacja', 'zacją', 'zacji'].includes(word.substr(-5))) {
return word.substr(0, word.length - 4);
}
if (word.length > 6 &&
['acja', 'acji', 'acją', 'tach', 'anie', 'enie', 'eni', 'ani'].includes(word.substr(-4))) {
return word.substr(0, word.length - 4);
}
if (word.length > 6 && word.endsWith('tyka')) {
return word.substr(0, word.length - 2);
}
if (word.length > 5 &&
['ach', 'ami', 'nia', 'ni', 'cia', 'ci'].includes(word.substr(-3))) {
return word.substr(0, word.length - 3);
}
if (word.length > 5 && ['cji', 'cja', 'cją'].includes(word.substr(-3))) {
return word.substr(0, word.length - 2);
}
if (word.length > 5 && ['ce', 'ta'].includes(word.substr(-2))) {
return word.substr(0, word.length - 2);
}
return word;
}
removeAdjectiveEnds(word) {
if (word.length > 7 &&
word.startsWith('naj') &&
(word.endsWith('sze') || word.endsWith('szy'))) {
return word.substring(3, word.length - 3);
}
if (word.length > 7 && word.startsWith('naj') && word.endsWith('szych')) {
return word.substring(3, word.length - 5);
}
if (word.length > 6 && word.endsWith('czny')) {
return word.substr(0, word.length - 4);
}
if (word.length > 5 &&
['owy', 'owa', 'owe', 'ych', 'ego'].includes(word.substr(-3))) {
return word.substr(0, word.length - 3);
}
if (word.length > 5 && ['ej'].includes(word.substr(-2))) {
return word.substr(0, word.length - 2);
}
return word;
}
removeAdverbsEnds(word) {
if (word.length > 4 && ['nie', 'wie'].includes(word.substr(-3))) {
return word.substr(0, word.length - 2);
}
if (word.length > 4 && word.endsWith('rze')) {
return word.substr(0, word.length - 2);
}
return word;
}
removePluralForms(word) {
if (word.length > 4 &&
(word.endsWith('ów') || word.endsWith('ow') || word.endsWith('om'))) {
return word.substr(0, word.length - 2);
}
if (word.length > 4 && word.endsWith('ami')) {
return word.substr(0, word.length - 3);
}
return word;
}
}
exports.StemmerPl = StemmerPl;
//# sourceMappingURL=polish-stemmer.js.map