@botonic/plugin-contentful
Version:
Botonic Plugin Contentful is one of the **[available](https://github.com/hubtype/botonic/tree/master/packages)** plugins for Botonic. **[Contentful](http://www.contentful.com)** is a CMS (Content Management System) which manages contents of a great variet
263 lines • 8.82 kB
JavaScript
"use strict";
Object.defineProperty(exports, "__esModule", { value: true });
exports.StemmerSk = void 0;
// from https://github.com/mrshu/stemm-sk/blob/master/stemmsk/__init__.py
class StemmerSk {
constructor(aggressive = false) {
this.aggressive = aggressive;
}
stem(tokens) {
return tokens.map(token => this.stemToken(token));
}
stemToken(token) {
let stem = this.removeCase(token);
stem = this.removePossessives(stem);
if (this.aggressive) {
stem = this.removeComparative(stem);
stem = this.removeDiminutive(stem);
stem = this.removeAugmentative(stem);
stem = this.removeDerivational(stem);
}
return stem;
}
removeCase(token) {
const length = token.length;
if (length > 7 && token.endsWith('atoch')) {
return token.slice(0, length - 5);
}
if (length > 6) {
if (token.endsWith('atom')) {
return this.palatalise(token.slice(0, length - 3));
}
}
if (length > 5) {
const tokenEnding = token.slice(-3);
if ([
'ami',
'ata',
'eho',
'emi',
'emu',
'ete',
'eti',
'ich',
'ich',
'iho',
'iho',
'imi',
'imu',
'och',
].includes(tokenEnding)) {
return this.palatalise(token.slice(0, length - 2));
}
if (['ach', 'ami', 'ata', 'aty', 'ove', 'ovi', 'ych', 'ymi'].includes(tokenEnding)) {
return token.slice(0, length - 3);
}
}
if (length > 4) {
if (token.endsWith('om')) {
return this.palatalise(token.slice(0, length - 1));
}
const tokenEnding = token.slice(-2);
if (['em', 'es', 'im'].includes(tokenEnding)) {
return this.palatalise(token.slice(0, length - 2));
}
if (['am', 'at', 'ej', 'mi', 'os', 'ou', 'um', 'us', 'ym'].includes(tokenEnding)) {
return token.slice(0, length - 2);
}
}
if (length > 3) {
const tokenLastCharacter = token.slice(-1);
if (['e', 'i'].includes(tokenLastCharacter)) {
return this.palatalise(token);
}
if (['a', 'e', 'o', 'u', 'y'].includes(tokenLastCharacter)) {
return token.slice(0, length - 1);
}
}
return token;
}
palatalise(token) {
const length = token.length;
let lastCharacters = token.slice(-2);
let substring = token.slice(0, length - 2);
if (['ce', 'ci'].includes(lastCharacters)) {
return substring + 'k';
}
if (['ze', 'zi'].includes(lastCharacters)) {
return substring + 'h';
}
lastCharacters = token.slice(-3);
substring = token.slice(0, length - 3);
if (['cte', 'cti'].includes(lastCharacters)) {
return substring + 'ck';
}
if (['ste', 'sti'].includes(lastCharacters)) {
return substring + 'sk';
}
return token.slice(0, length - 1);
}
removePossessives(token) {
const length = token.length;
if (length > 5) {
if (token.endsWith('ov')) {
return token.slice(0, length - 2);
}
if (token.endsWith('in')) {
return this.palatalise(token.slice(0, length - 1));
}
}
return token;
}
removeComparative(token) {
const length = token.length;
if (length > 5) {
if (token.endsWith('ejs')) {
return this.palatalise(token.slice(0, length - 2));
}
}
return token;
}
removeDiminutive(token) {
const length = token.length;
if (length > 7 && token.endsWith('ousok')) {
return token.slice(0, length - 5);
}
if (length > 6) {
const tokenEnding = token.slice(-4);
if (['ecok', 'enok', 'icok', 'inok'].includes(tokenEnding)) {
return this.palatalise(token.slice(0, length - 3));
}
if (['acok', 'anok', 'ocok', 'onok', 'ucok', 'unok'].includes(tokenEnding)) {
return this.palatalise(token.slice(0, length - 4));
}
}
if (length > 5) {
const tokenEnding = token.slice(-3);
if (['eck', 'enk', 'ick', 'ink'].includes(tokenEnding)) {
return this.palatalise(token.slice(0, length - 3));
}
if (['ack', 'ank', 'atk', 'ock', 'onk', 'uck', 'unk', 'usk'].includes(tokenEnding)) {
return token.slice(0, length - 3);
}
}
if (length > 4) {
const tokenEnding = token.slice(-2);
if (['ek', 'ik'].includes(tokenEnding)) {
return this.palatalise(token.slice(0, length - 1));
}
if (['ak', 'ok', 'uk'].includes(tokenEnding)) {
return token.slice(0, length - 1);
}
}
if (length > 3 && token.endsWith('k')) {
return token.slice(0, length - 1);
}
return token;
}
removeAugmentative(token) {
const length = token.length;
if (length > 6 && token.endsWith('ajzn')) {
return token.slice(0, length - 4);
}
if (length > 5 && ['izn', 'isk'].includes(token.slice(-3))) {
return this.palatalise(token.slice(0, length - 2));
}
if (length > 4 && token.endsWith('ak')) {
return token.slice(0, length - 2);
}
return token;
}
// eslint-disable-next-line complexity
removeDerivational(token) {
const length = token.length;
if (length > 8 && token.endsWith('obinec')) {
return token.slice(0, length - 6);
}
if (length > 7) {
if (token.endsWith('ionar')) {
return this.palatalise(token.slice(0, length - 4));
}
if (['ovisk', 'ovist', 'ovnik', 'ovstv'].includes(token.slice(-5))) {
return token.slice(0, length - 5);
}
}
if (length > 6) {
const tokenEnding = token.slice(-4);
if ([
'asok',
'nost',
'ovec',
'ovik',
'ovin',
'ovtv',
'stin',
'teln',
].includes(tokenEnding)) {
return token.slice(0, length - 4);
}
if (['enic', 'inec', 'itel'].includes(tokenEnding)) {
return this.palatalise(token.slice(0, length - 3));
}
}
if (length > 5) {
if (token.endsWith('arn')) {
return token.slice(0, length - 3);
}
const tokenEnding = token.slice(-3);
if (['enk', 'ian', 'irn', 'isk', 'ist', 'itb'].includes(tokenEnding)) {
return this.palatalise(token.slice(0, length - 2));
}
if ([
'can',
'ctv',
'kar',
'kyn',
'ner',
'nik',
'och',
'ost',
'oun',
'ous',
'out',
'ovn',
'stv',
'usk',
].includes(tokenEnding)) {
return token.slice(0, length - 3);
}
}
if (length > 4) {
const tokenEnding = token.slice(-2);
if (['ac', 'an', 'ar', 'as'].includes(tokenEnding)) {
return token.slice(0, length - 2);
}
if (['ec', 'en', 'er', 'ic', 'in', 'ir', 'it', 'iv'].includes(tokenEnding)) {
return this.palatalise(token.slice(0, length - 1));
}
if ([
'ck',
'cn',
'dl',
'nk',
'ob',
'on',
'ot',
'ov',
'tk',
'tv',
'ul',
'vk',
'yn',
].includes(tokenEnding)) {
return token.slice(0, length - 2);
}
}
if (length > 3 && ['c', 'k', 'l', 'n', 't'].includes(token.slice(-1))) {
return token.slice(0, length - 1);
}
return token;
}
}
exports.StemmerSk = StemmerSk;
//# sourceMappingURL=stemmer-sk.js.map