UNPKG

@botonic/plugin-contentful

Version:

Botonic Plugin Contentful is one of the **[available](https://github.com/hubtype/botonic/tree/master/packages)** plugins for Botonic. **[Contentful](http://www.contentful.com)** is a CMS (Content Management System) which manages contents of a great variet

263 lines 8.82 kB
"use strict"; Object.defineProperty(exports, "__esModule", { value: true }); exports.StemmerSk = void 0; // from https://github.com/mrshu/stemm-sk/blob/master/stemmsk/__init__.py class StemmerSk { constructor(aggressive = false) { this.aggressive = aggressive; } stem(tokens) { return tokens.map(token => this.stemToken(token)); } stemToken(token) { let stem = this.removeCase(token); stem = this.removePossessives(stem); if (this.aggressive) { stem = this.removeComparative(stem); stem = this.removeDiminutive(stem); stem = this.removeAugmentative(stem); stem = this.removeDerivational(stem); } return stem; } removeCase(token) { const length = token.length; if (length > 7 && token.endsWith('atoch')) { return token.slice(0, length - 5); } if (length > 6) { if (token.endsWith('atom')) { return this.palatalise(token.slice(0, length - 3)); } } if (length > 5) { const tokenEnding = token.slice(-3); if ([ 'ami', 'ata', 'eho', 'emi', 'emu', 'ete', 'eti', 'ich', 'ich', 'iho', 'iho', 'imi', 'imu', 'och', ].includes(tokenEnding)) { return this.palatalise(token.slice(0, length - 2)); } if (['ach', 'ami', 'ata', 'aty', 'ove', 'ovi', 'ych', 'ymi'].includes(tokenEnding)) { return token.slice(0, length - 3); } } if (length > 4) { if (token.endsWith('om')) { return this.palatalise(token.slice(0, length - 1)); } const tokenEnding = token.slice(-2); if (['em', 'es', 'im'].includes(tokenEnding)) { return this.palatalise(token.slice(0, length - 2)); } if (['am', 'at', 'ej', 'mi', 'os', 'ou', 'um', 'us', 'ym'].includes(tokenEnding)) { return token.slice(0, length - 2); } } if (length > 3) { const tokenLastCharacter = token.slice(-1); if (['e', 'i'].includes(tokenLastCharacter)) { return this.palatalise(token); } if (['a', 'e', 'o', 'u', 'y'].includes(tokenLastCharacter)) { return token.slice(0, length - 1); } } return token; } palatalise(token) { const length = token.length; let lastCharacters = token.slice(-2); let substring = token.slice(0, length - 2); if (['ce', 'ci'].includes(lastCharacters)) { return substring + 'k'; } if (['ze', 'zi'].includes(lastCharacters)) { return substring + 'h'; } lastCharacters = token.slice(-3); substring = token.slice(0, length - 3); if (['cte', 'cti'].includes(lastCharacters)) { return substring + 'ck'; } if (['ste', 'sti'].includes(lastCharacters)) { return substring + 'sk'; } return token.slice(0, length - 1); } removePossessives(token) { const length = token.length; if (length > 5) { if (token.endsWith('ov')) { return token.slice(0, length - 2); } if (token.endsWith('in')) { return this.palatalise(token.slice(0, length - 1)); } } return token; } removeComparative(token) { const length = token.length; if (length > 5) { if (token.endsWith('ejs')) { return this.palatalise(token.slice(0, length - 2)); } } return token; } removeDiminutive(token) { const length = token.length; if (length > 7 && token.endsWith('ousok')) { return token.slice(0, length - 5); } if (length > 6) { const tokenEnding = token.slice(-4); if (['ecok', 'enok', 'icok', 'inok'].includes(tokenEnding)) { return this.palatalise(token.slice(0, length - 3)); } if (['acok', 'anok', 'ocok', 'onok', 'ucok', 'unok'].includes(tokenEnding)) { return this.palatalise(token.slice(0, length - 4)); } } if (length > 5) { const tokenEnding = token.slice(-3); if (['eck', 'enk', 'ick', 'ink'].includes(tokenEnding)) { return this.palatalise(token.slice(0, length - 3)); } if (['ack', 'ank', 'atk', 'ock', 'onk', 'uck', 'unk', 'usk'].includes(tokenEnding)) { return token.slice(0, length - 3); } } if (length > 4) { const tokenEnding = token.slice(-2); if (['ek', 'ik'].includes(tokenEnding)) { return this.palatalise(token.slice(0, length - 1)); } if (['ak', 'ok', 'uk'].includes(tokenEnding)) { return token.slice(0, length - 1); } } if (length > 3 && token.endsWith('k')) { return token.slice(0, length - 1); } return token; } removeAugmentative(token) { const length = token.length; if (length > 6 && token.endsWith('ajzn')) { return token.slice(0, length - 4); } if (length > 5 && ['izn', 'isk'].includes(token.slice(-3))) { return this.palatalise(token.slice(0, length - 2)); } if (length > 4 && token.endsWith('ak')) { return token.slice(0, length - 2); } return token; } // eslint-disable-next-line complexity removeDerivational(token) { const length = token.length; if (length > 8 && token.endsWith('obinec')) { return token.slice(0, length - 6); } if (length > 7) { if (token.endsWith('ionar')) { return this.palatalise(token.slice(0, length - 4)); } if (['ovisk', 'ovist', 'ovnik', 'ovstv'].includes(token.slice(-5))) { return token.slice(0, length - 5); } } if (length > 6) { const tokenEnding = token.slice(-4); if ([ 'asok', 'nost', 'ovec', 'ovik', 'ovin', 'ovtv', 'stin', 'teln', ].includes(tokenEnding)) { return token.slice(0, length - 4); } if (['enic', 'inec', 'itel'].includes(tokenEnding)) { return this.palatalise(token.slice(0, length - 3)); } } if (length > 5) { if (token.endsWith('arn')) { return token.slice(0, length - 3); } const tokenEnding = token.slice(-3); if (['enk', 'ian', 'irn', 'isk', 'ist', 'itb'].includes(tokenEnding)) { return this.palatalise(token.slice(0, length - 2)); } if ([ 'can', 'ctv', 'kar', 'kyn', 'ner', 'nik', 'och', 'ost', 'oun', 'ous', 'out', 'ovn', 'stv', 'usk', ].includes(tokenEnding)) { return token.slice(0, length - 3); } } if (length > 4) { const tokenEnding = token.slice(-2); if (['ac', 'an', 'ar', 'as'].includes(tokenEnding)) { return token.slice(0, length - 2); } if (['ec', 'en', 'er', 'ic', 'in', 'ir', 'it', 'iv'].includes(tokenEnding)) { return this.palatalise(token.slice(0, length - 1)); } if ([ 'ck', 'cn', 'dl', 'nk', 'ob', 'on', 'ot', 'ov', 'tk', 'tv', 'ul', 'vk', 'yn', ].includes(tokenEnding)) { return token.slice(0, length - 2); } } if (length > 3 && ['c', 'k', 'l', 'n', 't'].includes(token.slice(-1))) { return token.slice(0, length - 1); } return token; } } exports.StemmerSk = StemmerSk; //# sourceMappingURL=stemmer-sk.js.map