@botonic/plugin-contentful
Version:
## What Does This Plugin Do?
57 lines • 1.76 kB
JavaScript
// From http://nlp.ffzg.hr/resources/tools/stemmer-for-croatian/
import { hrDefaultStopWords } from '../stopwords/stopwords-hr';
import { hrRules } from './rules/rules-hr';
import { hrTransformations } from './transformations/transformations-hr';
export class StemmerHr {
stem(tokens) {
return tokens.map(token => this.stemToken(token));
}
stemToken(token) {
if (this.isStopWord(token)) {
return token;
}
else {
return this.getRoot(this.transform(token));
}
}
isStopWord(token) {
return hrDefaultStopWords.indexOf(token) != -1;
}
transform(token) {
for (const replacement in hrTransformations) {
const targets = hrTransformations[replacement];
for (const target of targets) {
if (token.endsWith(target)) {
return token.replace(target, replacement);
}
}
}
return token;
}
getRoot(token) {
for (const rule of hrRules) {
const match = new RegExp(rule).exec(token);
if (match) {
const root = match[1];
if (this.containsVocal(root) && root.length > 1) {
return root;
}
}
}
return token;
}
containsVocal(token) {
token = this.highlightRSyllable(token);
if (token.search(/[aeiouR]/) == undefined) {
return false;
}
else {
return true;
}
}
highlightRSyllable(token) {
// eslint-disable-next-line no-useless-escape
return token.replace(/(^|[^aeiou])r($|[^aeiou])/gm, `\$1R\$2`);
}
}
//# sourceMappingURL=stemmer-hr.js.map