yoastseo-dep
Version:
Yoast clientside page analysis
649 lines (642 loc) • 19.7 kB
JavaScript
import stem from "../../../../../../src/languageProcessing/languages/cs/helpers/internal/stem";
import getMorphologyData from "../../../../../specHelpers/getMorphologyData";
const morphologyDataCS = getMorphologyData( "cs" ).cs;
// The first word in each array is the word, the second one is the expected stem.
const wordsToStem = [
// Input a word ending in case suffix -atech.
[ "tématech", "tém" ],
// Input a word ending in case suffix -ětem.
[ "markrabětem", "markrab" ],
// Input a word ending in case suffix -atům.
[ "diplomatům", "diplom" ],
// Input a word ending in case suffix -ech.
[ "významech", "význam" ],
// Input a word ending in case suffix -ich.
[ "jejich", "jej" ],
// Input a word ending in case suffix -ích.
[ "nejlepších", "nejlepš" ],
// Input a word ending in case suffix -ého.
[ "filmového", "film" ],
// Input a word ending in case suffix -ěmi.
[ "zeměmi", "zem" ],
// Input a word ending in case suffix -emi.
[ "hranic", "hran" ],
// Input a word ending in case suffix -ému.
[ "výraznému", "výraz" ],
// Input a word ending in case suffix -ete.
[ "zašlete", "zaš" ],
// Input a word ending in case suffix -eti.
[ "čtyřiceti", "čtyř" ],
// Input a word ending in case suffix -iho.
[ "roliho", "rol" ],
// Input a word ending in case suffix -ího.
[ "mezinárodního", "mezinárod" ],
// Input a word ending in case suffix -ími.
[ "vyznamenáními", "vyznamen" ],
// Input a word ending in case suffix -imu.
[ "mluvčímu", "mluvčím" ],
// Input a word ending in case suffix -ách.
[ "Čechách", "čech" ],
// Input a word ending in case suffix -ata.
[ "přijata", "přij" ],
// Input a word ending in case suffix -aty.
[ "diplomaty", "diplom" ],
// Input a word ending in case suffix -ých.
[ "obecných", "obec" ],
// Input a word ending in case suffix -ama.
[ "očičkama", "oči" ],
// Input a word ending in case suffix -ami.
[ "přehradami", "přehrad" ],
// Input a word ending in case suffix -ové.
[ "autobusové", "autobus" ],
// Input a word ending in case suffix -ovi.
[ "klíčoví", "klí" ],
// Input a word ending in case suffix -ými.
[ "dochovanými", "dochov" ],
// Input a word ending in case suffix -em.
[ "architektem", "architek" ],
// Input a word ending in case suffix -es.
[ "čerkes", "čer" ],
// Input a word ending in case suffix -ém.
[ "novelizovaném", "novelizova" ],
// Input a word ending in case suffix -ím.
[ "dnešním", "dneš" ],
// Input a word ending in case suffix -ům.
[ "záznamům", "záznam" ],
// Input a word ending in case suffix -at.
[ "jednat", "jed" ],
// Input a word ending in case suffix -ám.
[ "snahám", "snah" ],
// Input a word ending in case suffix -os.
[ "výnos", "výn" ],
// Input a word ending in case suffix -us.
[ "bilingvismus", "bilingvism" ],
// Input a word ending in case suffix -mi.
[ "některými", "někter" ],
// Input a word ending in case suffix -ou.
[ "hospodářskou", "hospodářs" ],
// Input a word ending in case suffix -e.
[ "rozvoje", "rozvoj" ],
// Input a word ending in case suffix -i.
[ "koni", "kon" ],
// Input a word ending in case suffix -í.
[ "mající", "mají" ],
// Input a word ending in case suffix -ě.
[ "relativně", "relativ" ],
// Input a word ending in case suffix -u.
[ "severu", "sever" ],
// Input a word ending in case suffix -y.
[ "ochrany", "ochr" ],
// Input a word ending in case suffix -ů.
[ "států", "stá" ],
// Input a word ending in case suffix -a.
[ "jedna", "jed" ],
// Input a word ending in case suffix -o.
[ "jedno", "jed" ],
// Input a word ending in case suffix -á.
[ "odmítá", "odmí" ],
// Input a word ending in case suffix -é.
[ "každé", "každ" ],
// Input a word ending in case suffix -ý.
[ "přirozeno", "přiroh" ],
// Input a word ending in possessive suffix -ov.
[ "učitelova", "učite" ],
// Input a word ending in possessive suffix -ův.
[ "manželův", "manže" ],
// Input a word ending in possessive suffix -in.
[ "dceřin", "dceř" ],
// Input a word ending in palatalised suffix -ci.
[ "", "" ],
// Input a word ending in palatalised suffix -ce.
[ "", "" ],
// Input a word ending in palatalised suffix -či.
[ "", "" ],
// Input a word ending in palatalised suffix -če.
[ "", "" ],
// Input a word ending in palatalised suffix -k.
[ "", "" ],
// Input a word ending in palatalised suffix -zi.
[ "", "" ],
// Input a word ending in palatalised suffix -ze.
[ "", "" ],
// Input a word ending in palatalised suffix -ži.
[ "", "" ],
// Input a word ending in palatalised suffix -že.
[ "", "" ],
// Input a word ending in palatalised suffix -h.
[ "", "" ],
// Input a word ending in palatalised suffix -čtě.
[ "", "" ],
// Input a word ending in palatalised suffix -čti.
[ "angličtina", "angl" ],
// Input a word ending in palatalised suffix -čtí.
[ "", "" ],
// Input a word ending in palatalised suffix -ck.
[ "automatické", "automat" ],
// Input a word ending in palatalised suffix -ště.
[ "", "" ],
// Input a word ending in palatalised suffix -šti.
[ "", "" ],
// Input a word ending in palatalised suffix -ští.
[ "", "" ],
// Input a word ending in palatalised suffix -sk.
[ "", "" ],
// Input a word ending in comparative suffix -ejš.
[ "dolejš", "dol" ],
// Input a word ending in comparative suffix -ějš.
[ "nejbezpečnějš", "nejbezpe" ],
// Input a word ending in diminutive suffix -oušek.
[ "modroušek", "modr" ],
// Input a word ending in diminutive suffix -eček.
[ "domeček", "dom" ],
// Input a word ending in diminutive suffix -éček.
[ "cédéček", "céd" ],
// Input a word ending in diminutive suffix -iček.
[ "kočiček", "koč" ],
// Input a word ending in diminutive suffix -íček.
[ "pešíček", "peš" ],
// Input a word ending in diminutive suffix -enek.
[ "pálenek", "pál" ],
// Input a word ending in diminutive suffix -ének.
[ "kamének", "kam" ],
// Input a word ending in diminutive suffix -inek
[ "palačinek", "pala" ],
// Input a word ending in diminutive suffix -ínek.
[ "šulínek", "šul" ],
// Input a word ending in diminutive suffix -áček.
[ "obláček", "obl" ],
// Input a word ending in diminutive suffix -aček.
[ "stříkaček", "stří" ],
// Input a word ending in diminutive suffix -oček.
[ "baboček", "bab" ],
// Input a word ending in diminutive suffix -uček.
[ "oblouček", "oblo" ],
// Input a word ending in diminutive suffix -anek.
[ "pražanek", "praž" ],
// Input a word ending in diminutive suffix -onek.
[ "salonek", "sal" ],
// Input a word ending in diminutive suffix -unek.
[ "okounek", "oko" ],
// Input a word ending in diminutive suffix -ánek.
[ "kulhánek", "kulh" ],
// Input a word ending in diminutive suffix -éčk.
[ "", "" ],
// Input a word ending in diminutive suffix -ičk.
[ "skleničk", "skle" ],
// Input a word ending in diminutive suffix -enk.
[ "příklenk", "pří" ],
// Input a word ending in diminutive suffix -énk.
[ "okénk", "oké" ],
// Input a word ending in diminutive suffix -ink.
[ "trénink", "tré" ],
// Input a word ending in diminutive suffix -ínk.
[ "podmínk", "pod" ],
// Input a word ending in diminutive suffix -áčk.
[ "přemáčk", "přem" ],
// Input a word ending in diminutive suffix -ačk.
[ "nekuřačk", "nekuř" ],
// Input a word ending in diminutive suffix -očk.
[ "pobočk", "pob" ],
// Input a word ending in diminutive suffix -učk.
[ "měkkoučk", "měkko" ],
// Input a word ending in diminutive suffix -ank.
[ "fašank", "faš" ],
// Input a word ending in diminutive suffix -onk.
[ "šešonk", "šeš" ],
// Input a word ending in diminutive suffix -unk.
[ "šalunk", "šal" ],
// Input a word ending in diminutive suffix -átk.
[ "zvířátk", "zvíř" ],
// Input a word ending in diminutive suffix -ánk.
[ "pozvánk", "pozv" ],
// Input a word ending in diminutive suffix -ušk.
[ "zkoušk", "zko" ],
// Input a word ending in diminutive suffix -ek.
[ "domek", "dom" ],
// Input a word ending in diminutive suffix -ék.
[ "obrazék", "obraz" ],
// Input a word ending in diminutive suffix -ík.
[ "petřík", "petř" ],
// Input a word ending in diminutive/derivational suffix -ik.
[ "historik", "histor" ],
// Input a word ending in diminutive suffix -ák.
[ "kočičák", "kočičá" ],
// Input a word ending in diminutive suffix -ak.
[ "", "" ],
// Input a word ending in diminutive suffix -ok.
[ "", "" ],
// Input a word ending in diminutive suffix -uk.
[ "", "" ],
// Input a word ending in diminutive suffix -k.
[ "naviják", "navijá" ],
// Input a word ending in augmentative suffix -ajzn.
[ "chřipajzna", "chřip" ],
// Input a word ending in augmentative suffix -izn.
[ "babizna", "bab" ],
// Input a word ending in augmentative suffix -isk.
[ "", "" ],
// Input a word ending in augmentative suffix -ák.
[ "", "" ],
// Input a word ending in derivational suffix -obinec.
[ "chudobinec", "chud" ],
// Input a word ending in derivational suffix -ionář.
[ "milionář", "mil" ],
// Input a word ending in derivational suffix -ovisk.
[ "", "" ],
// Input a word ending in derivational suffix -ovstv.
[ "mistrovstv", "mistr" ],
// Input a word ending in derivational suffix -ovišt.
[ "pracovišt", "prac" ],
// Input a word ending in derivational suffix -ovník.
[ "pracovník", "prac" ],
// Input a word ending in derivational suffix -ásek.
[ "", "" ],
// Input a word ending in derivational suffix -loun
[ "vztekloun", "vztek" ],
// Input a word ending in derivational suffix -nost.
[ "společnost", "společ" ],
// Input a word ending in derivational suffix -teln.
[ "neuvěřiteln", "neuvěři" ],
// Input a word ending in derivational suffix -ovec.
[ "sportovec", "sport" ],
// Input a word ending in derivational suffix -ovík.
[ "šalgovík", "šalg" ],
// Input a word ending in derivational suffix -ovtv.
[ "", "" ],
// Input a word ending in derivational suffix -ovin.
[ "těstovin", "těst" ],
// Input a word ending in derivational suffix -štin.
[ "slovenštin", "slovens" ],
// Input a word ending in derivational suffix -enic.
[ "židenic", "žid" ],
// Input a word ending in derivational suffix -inec.
[ "zvěřinec", "zvěř" ],
// Input a word ending in derivational suffix -itel.
[ "ředitel", "řed" ],
// Input a word ending in derivational suffix -árn.
[ "legendárn", "legend" ],
// Input a word ending in derivational suffix -ěnk.
[ "doplněnk", "dopln" ],
// Input a word ending in derivational suffix -ián.
[ "indián", "ind" ],
// Input a word ending in derivational suffix -ist.
[ "kořist", "koř" ],
// Input a word ending in derivational suffix -isk.
[ "", "" ],
// Input a word ending in derivational suffix -išt.
[ "pracovišt", "prac" ],
// Input a word ending in derivational suffix -itb.
[ "", "" ],
// Input a word ending in derivational suffix -írn.
[ "", "" ],
// Input a word ending in derivational suffix -och.
[ "běloch", "běl" ],
// Input a word ending in derivational suffix -ost.
[ "možnost", "mož" ],
// Input a word ending in derivational suffix -ovn.
[ "sportovn", "sport" ],
// Input a word ending in derivational suffix -oun.
[ "bručoun", "bruč" ],
// Input a word ending in derivational suffix -out.
[ "přesunout", "přesun" ],
// Input a word ending in derivational suffix -ouš.
[ "chocholouš", "chochol" ],
// Input a word ending in derivational suffix -ušk.
[ "", "" ],
// Input a word ending in derivational suffix -kyn.
[ "přítelkyn", "přítel" ],
// Input a word ending in derivational suffix -čan.
[ "vesničan", "vesni" ],
// Input a word ending in derivational suffix -kář.
[ "zahrádkář", "zahrád" ],
// Input a word ending in derivational suffix -néř.
[ "platnéř", "plat" ],
// Input a word ending in derivational suffix -ník.
[ "ročník", "roč" ],
// Input a word ending in derivational suffix -ctv.
[ "účetnictv", "účetni" ],
// Input a word ending in derivational suffix -stv.
[ "zemědělstv", "zeměděl" ],
// Input a word ending in derivational suffix -áč.
[ "květináč", "květin" ],
// Input a word ending in derivational suffix -ač.
[ "přepínač", "přepín" ],
// Input a word ending in derivational suffix -án.
[ "připsán", "přips" ],
// Input a word ending in derivational suffix -an.
[ "dušan", "duš" ],
// Input a word ending in derivational suffix -ář.
[ "komentář", "koment" ],
// Input a word ending in derivational suffix -as.
[ "nečas", "neč" ],
// Input a word ending in derivational suffix -ec.
[ "jazykovědec", "jazykověd" ],
// Input a word ending in derivational suffix -en.
[ "květen", "květ" ],
// Input a word ending in derivational suffix -ěn.
[ "propuštěn", "propusk" ],
// Input a word ending in derivational suffix -éř.
[ "bankéř", "bank" ],
// Input a word ending in derivational suffix -íř.
[ "krejčíř", "krejč" ],
// Input a word ending in derivational suffix -ic.
[ "čarodějnic", "čarodějn" ],
// Input a word ending in derivational suffix -in.
[ "dřevin", "dřev" ],
// Input a word ending in derivational suffix -ín.
[ "slavičín", "slavič" ],
// Input a word ending in derivational suffix -it.
[ "vytvořit", "vytvoř" ],
// Input a word ending in derivational suffix -iv.
[ "", "" ],
// Input a word ending in derivational suffix -ob.
[ "způsob", "způs" ],
// Input a word ending in derivational suffix -ot.
[ "nečistot", "nečist" ],
// Input a word ending in derivational suffix -ov.
[ "havířov", "hav" ],
// Input a word ending in derivational suffix -oň.
[ "broskvoň", "broskv" ],
// Input a word ending in derivational suffix -ul.
[ "přesunul", "přesun" ],
// Input a word ending in derivational suffix -yn.
[ "všechyn", "všech" ],
// Input a word ending in derivational suffix -čk.
[ "zmáčk", "zmá" ],
// Input a word ending in derivational suffix -čn.
[ "společn", "spole" ],
// Input a word ending in derivational suffix -dl.
[ "předvedl", "předve" ],
// Input a word ending in derivational suffix -nk.
[ "", "" ],
// Input a word ending in derivational suffix -tv.
[ "", "" ],
// Input a word ending in derivational suffix -tk.
[ "", "" ],
// Input a word ending in derivational suffix -vk.
[ "", "" ],
// Input a word ending in derivational suffix -c.
[ "majíc", "mají" ],
// Input a word ending in derivational suffix -č.
[ "sazeč", "saze" ],
// Input a word ending in derivational suffix -k.
[ "černotisk", "černotis" ],
// Input a word ending in derivational suffix -l.
[ "začal", "zača" ],
// Input a word ending in derivational suffix -n.
[ "možn", "mož" ],
// Input a word ending in derivational suffix -t.
[ "vyhrát", "vyhrá" ],
// Input a word from stems that belong to one word.
[ "děti", "dít" ],
// Input a word from stems that belong to one word.
[ "psové", "pes" ],
];
const paradigms = [
// A paradigm of a masculine animate noun (hard declension).
{ stem: "manže", forms: [
"manžel",
"manžela",
"manželovi",
"manželu",
"manžele",
"manželé",
"manželem",
"manželové",
"manželi",
"manželů",
"manželům",
"manžely",
"manželech",
"manželích",
] },
// A paradigm of a masculine animate noun (soft declension).
{ stem: "muž", forms: [
"muž",
// "muže",
"mužovi",
// "muži",
// "mužem",
"mužové",
"mužů",
"mužům",
"mužích",
] },
// A paradigm of a masculine animate noun (A-stem declension).
{ stem: "předsed", forms: [
"předseda",
"předsedy",
"předsedovi",
"předsedu",
"předsedo",
"předsedou",
"předsedové",
"předsedů",
"předsedům",
"předsedech",
] },
// A paradigm of a masculine animate noun (adjectival declension).
{ stem: "mluv", forms: [
"mluvčí",
"mluvčího",
// "mluvčímu",
"mluvčím",
"mluvčích",
"mluvčími",
] },
// A paradigm of a masculine inanimate noun (hard declension).
{ stem: "hrad", forms: [
"hrad",
"hradu",
"hrade",
"hradě",
"hradem",
"hrady",
"hradů",
"hradům",
"hradech",
] },
// A paradigm of a masculine inanimate noun (soft declension).
{ stem: "stroj", forms: [
"stroj",
"stroje",
"stroji",
"strojem",
"strojů",
"strojích",
] },
// A paradigm of a feminine noun (hard declension).
{ stem: "žen", forms: [
"žena",
"ženy",
"ženě",
"ženu",
"ženo",
"ženou",
"žen",
"ženám",
"ženách",
"ženami",
] },
// A paradigm of a feminine noun (soft declension).
{ stem: "růž", forms: [
// "růže",
// "růži",
"růží",
// "růžím",
"růžích",
// "růžemi",
] },
// A paradigm of a feminine noun (soft declension).
{ stem: "pís", forms: [
// "píseň",
"písně",
"písni",
"písní",
"písním",
"písních",
"písněmi",
] },
// A paradigm of a feminine noun (soft declension).
{ stem: "kos", forms: [
"kost",
"kosti",
"kostí",
"kostem",
"kostech",
"kostmi",
] },
// A paradigm of a neuter noun (hard declension).
{ stem: "měs", forms: [
"město",
"města",
"městu",
"městě",
"městem",
"měst",
"městům",
"městech",
"městy",
] },
// A paradigm of a neuter noun (soft declension).
{ stem: "moř", forms: [
"moře",
"moři",
// "mořím",
"mořem",
"mořích",
] },
// A paradigm of a neuter noun (mixed declension).
{ stem: "kuř", forms: [
"kuře",
// "kuřete",
"kuřeti",
// "kuřetem",
"kuřata",
"kuřat",
"kuřaty",
"kuřatům",
"kuřatech",
] },
// A paradigm of a neuter noun (long-i stem).
{ stem: "stav", forms: [
"stavení",
// "stavením",
"staveních",
"staveními",
] },
// A paradigm of an adjective (hard declension).
{ stem: "mlad", forms: [
"mladý",
"mladá",
"mladé",
"mladého",
"mladé",
"mladému",
"mladou",
// "mladém",
"mladým",
"mladí",
"mladých",
"mladými",
] },
// A paradigm of an adjective (soft declension).
{ stem: "jar", forms: [
"jarní",
"jarního",
// "jarnímu",
"jarním",
"jarních",
"jarními",
] },
// A paradigm of a masculine possessive adjective.
{ stem: "otc", forms: [
// "otcův",
"otcova",
"otcovo",
"otcovy",
"otcovu",
"otcově",
"otcovým",
"otcovou",
"otcovi",
"otcových",
] },
// A paradigm of a feminine possessive adjective.
{ stem: "mat", forms: [
"matčin",
"matčina",
"matčino",
"matčiny",
"matčinu",
"matčině",
"matčiným",
"matčinou",
"matčini",
"matčiných",
"matčinými",
] },
// Paradigms for the list of words with stems that belong to one word.
{ stem: "dít", forms: [
"děti",
"dítě",
"dítěte",
"dětem",
"dítěti",
"dítětem",
"dětech",
] },
{ stem: "koč", forms: [
"kočka",
"kočky",
"kočky",
"koček",
"kočce",
"kočkám",
"kočku",
"kočky",
"kočko",
"kočky",
"kočce",
"kočkách",
"kočkou",
"kočkami",
] },
];
describe( "Test for stemming Czech words", () => {
for ( let i = 0; i < wordsToStem.length; i++ ) {
const wordToCheck = wordsToStem[ i ];
it( "stems the word " + wordToCheck[ 0 ], () => {
expect( stem( wordToCheck[ 0 ], morphologyDataCS ) ).toBe( wordToCheck[ 1 ] );
} );
}
} );
describe( "Test to make sure all forms of a paradigm get stemmed to the same stem", () => {
for ( const paradigm of paradigms ) {
for ( const form of paradigm.forms ) {
it( "correctly stems the word: " + form + " to " + paradigm.stem, () => {
expect( stem( form, morphologyDataCS ) ).toBe( paradigm.stem );
} );
}
}
} );