UNPKG

@botonic/plugin-contentful

Version:

## What Does This Plugin Do?

173 lines 5.89 kB
"use strict"; Object.defineProperty(exports, "__esModule", { value: true }); exports.StemmerPl = void 0; class StemmerPl { stem(tokens) { return tokens.map((stem) => { stem = stem.toLowerCase(); stem = this.removeNouns(stem); stem = this.removeDiminutive(stem); const adj = this.removeAdjectiveEnds(stem); if (adj != stem) { return adj; } stem = this.removeVerbsEnds(stem); stem = this.removeAdverbsEnds(stem); stem = this.removePluralForms(stem); return this.removeGeneralEnds(stem); }); } removeGeneralEnds(word) { if (word.length > 4 && ['ia', 'ie'].includes(word.substr(-2))) { return word.substr(0, word.length - 2); } if (word.length > 4 && ['', 'ą', 'i', 'a', 'ę', 'y', 'e', 'ł'].includes(word.substr(-1))) { return word.substr(0, word.length - 1); } return word; } removeDiminutive(word) { if (word.length > 6) { if (['eczek', 'iczek', 'iszek', 'aszek', 'uszek'].includes(word.substr(-5))) { return word.substr(0, word.length - 5); } if (['enek', 'ejek', 'erek'].includes(word.substr(-4))) { return word.substr(0, word.length - 2); } } if (word.length > 4) { if (['ek', 'ak'].includes(word.substr(-2))) { return word.substr(0, word.length - 2); } } return word; } removeVerbsEnds(word) { // https://github.com/Tutanchamon/pl_stemmer/issues/2 if (word.length > 5 && word.endsWith('bym')) { return word.substr(0, word.length - 3); } if (word.length > 5 && [ // eslint-disable-next-line prettier/prettier 'esz', 'asz', 'cie', 'eść', 'esc', 'aść', 'asc', 'łem', 'lem', 'amy', 'emy', ].includes(word.substr(-3))) { return word.substr(0, word.length - 3); } if (word.length > 3 && [ 'esz', 'asz', 'eść', 'esc', 'aść', 'asc', 'eć', 'ec', 'ać', 'ac', ].includes(word.substr(-3))) { return word.substr(0, word.length - 2); } // https://github.com/Tutanchamon/pl_stemmer/issues/3 if (word.length > 3 && ['aj'].includes(word.substr(-2))) { return word.substr(0, word.length - 1); } if (word.length > 3 && [ // eslint-disable-next-line prettier/prettier 'ać', 'ac', 'em', 'am', 'ał', 'al', 'ił', 'il', 'ić', 'ic', 'ąc', 'ac', ].includes(word.substr(-2))) { return word.substr(0, word.length - 2); } return word; } removeNouns(word) { if (word.length > 7 && ['zacja', 'zacją', 'zacji'].includes(word.substr(-5))) { return word.substr(0, word.length - 4); } if (word.length > 6 && ['acja', 'acji', 'acją', 'tach', 'anie', 'enie', 'eni', 'ani'].includes(word.substr(-4))) { return word.substr(0, word.length - 4); } if (word.length > 6 && word.endsWith('tyka')) { return word.substr(0, word.length - 2); } if (word.length > 5 && ['ach', 'ami', 'nia', 'ni', 'cia', 'ci'].includes(word.substr(-3))) { return word.substr(0, word.length - 3); } if (word.length > 5 && ['cji', 'cja', 'cją'].includes(word.substr(-3))) { return word.substr(0, word.length - 2); } if (word.length > 5 && ['ce', 'ta'].includes(word.substr(-2))) { return word.substr(0, word.length - 2); } return word; } removeAdjectiveEnds(word) { if (word.length > 7 && word.startsWith('naj') && (word.endsWith('sze') || word.endsWith('szy'))) { return word.substring(3, word.length - 3); } if (word.length > 7 && word.startsWith('naj') && word.endsWith('szych')) { return word.substring(3, word.length - 5); } if (word.length > 6 && word.endsWith('czny')) { return word.substr(0, word.length - 4); } if (word.length > 5 && ['owy', 'owa', 'owe', 'ych', 'ego'].includes(word.substr(-3))) { return word.substr(0, word.length - 3); } if (word.length > 5 && ['ej'].includes(word.substr(-2))) { return word.substr(0, word.length - 2); } return word; } removeAdverbsEnds(word) { if (word.length > 4 && ['nie', 'wie'].includes(word.substr(-3))) { return word.substr(0, word.length - 2); } if (word.length > 4 && word.endsWith('rze')) { return word.substr(0, word.length - 2); } return word; } removePluralForms(word) { if (word.length > 4 && (word.endsWith('ów') || word.endsWith('ow') || word.endsWith('om'))) { return word.substr(0, word.length - 2); } if (word.length > 4 && word.endsWith('ami')) { return word.substr(0, word.length - 3); } return word; } } exports.StemmerPl = StemmerPl; //# sourceMappingURL=polish-stemmer.js.map