UNPKG

yoastseo-dep

Version:

Yoast clientside page analysis

301 lines (297 loc) 12.6 kB
import stem from "../../../../../../src/languageProcessing/languages/id/helpers/internal/stem"; import getMorphologyData from "../../../../../specHelpers/getMorphologyData"; const morphologyData = getMorphologyData( "id" ).id; const wordsToStem = [ // Words with prefix men- or pen- and are in the exception list. [ "menalar", "nalar" ], [ "penikmat", "nikmat" ], // Words with prefix men- or pen- and suffix -kan/-an/-i and are in the exception list. [ "menasihati", "nasihat" ], [ "menaikkan", "naik" ], [ "penalaran", "nalar" ], // Words with prefix men- or pen- which start with vowel and not in the exception list. [ "menukar", "tukar" ], [ "penukar", "tukar" ], // Words with prefix men- or pen- and suffix -kan/-an/-i which start with vowel and not in the exception list. [ "menukarkan", "tukar" ], [ "menangisi", "tangis" ], [ "penukaran", "tukar" ], // Words with prefix men- or pen- which start with consonant and not in the exception list. [ "mencuci", "cuci" ], [ "pencuci", "cuci" ], // Words with prefix men- or pen- and suffix -kan/-an/-i which start with consonant and not in the exception list. [ "mencucikan", "cuci" ], [ "mencakari", "cakar" ], [ "pencakupan", "cakup" ], // Words with prefix meng- or peng- and are in the exception list. [ "mengeduk", "keduk" ], [ "pengecam", "kecam" ], // Words with prefix meng- or peng- and suffix -kan/-an/-i and are in the exception list. [ "mengenalkan", "kenal" ], // [ "mengenali", "kenal" ], [ "pengenalan", "kenal" ], // Words with prefix meng- or peng- which are not in the exception list. [ "mengambil", "ambil" ], [ "pengambil", "ambil" ], // Words that receive derivational affixes and are in the list of doNotStem exception will not be correctly stemmed. // [ "mengolah", "olah" ], [ "penggiat", "giat" ], // Words with prefix meng- or peng- and suffix -kan/-an/-i which are not in the exception list. [ "mengambilkan", "ambil" ], [ "mengambili", "ambil" ], [ "pengambilan", "ambil" ], // Words with prefix mem- or pem- and is in the exception list of words beginning in -p. [ "memajang", "pajang" ], [ "pemaksa", "paksa" ], // Words with prefix mem- or pem- and is in the exception list of words beginning in -m. [ "memundam", "mundam" ], [ "pemasnawi", "masnawi" ], // Words with prefix mem- or pem- and suffix -kan/-an/-i and are in the exception list of words beginning in -p. [ "memaksakan", "paksa" ], [ "memancari", "pancar" ], [ "pemaksaan", "paksa" ], // Words with prefix mem- or pem- and suffix -kan/-an/-i and are in the exception list of words beginning in -m. [ "memundamkan", "mundam" ], [ "memundami", "mundam" ], [ "pemasnawian", "masnawi" ], // Words with prefix mem- or pem- which are not in the exception list. [ "membaca", "baca" ], [ "pembaca", "baca" ], // Words with prefix mem- or pem- and suffix -kan/-an/-i which are not in the exception list. [ "membacakan", "baca" ], [ "membasahi", "basah" ], [ "pembacaan", "baca" ], // Words with first prefix mem- or pem- and second prefix ber-/per- with suffix -kan/-an/-i [ "memberdayakan", "daya" ], [ "memberdayai", "daya" ], [ "pemberdayaan", "daya" ], // Words with ter- prefix and are in the exception list. [ "terambah", "rambah" ], [ "terintang", "rintang" ], // Words with prefix ter- and are in the doNotStemTer exception list. [ "terang", "terang" ], [ "teritorial", "teritorial" ], // Words with prefixes ke- and ter- and are in the doNotStemTer exception list. [ "ketermolabil", "termolabil" ], [ "keterindil", "terindil" ], // Words with prefixes ke- and ter- and not in the exception lists. [ "keterarahan", "arah" ], [ "keterbatasan", "batas" ], // Words with ter- prefix and are not in the exception list. [ "terpaksa", "paksa" ], [ "terdiam", "diam" ], [ "terarah", "arah" ], // Words with ter- prefix and suffixes -kan and are not in the exception list. [ "terbuatkan", "buat" ], // Words with prefixes me-/di-/ke- [ "melintas", "lintas" ], [ "dibuat", "buat" ], [ "ketua", "tua" ], // Words with prefixes me-/di-/ke- and suffixes -kan/-an/-i [ "melintasi", "lintas" ], [ "merusakkan", "rusak" ], // [ "dirusaki", "rusak" ], [ "ketuaan", "tua" ], // Word with prefix ke- and ber- and suffix -an which is in exception list [ "keberagaman", "ragam" ], // Words with prefix ber-/be-/per-/pe- [ "bersantap", "santap" ], [ "bekerja", "kerja" ], [ "perdaya", "daya" ], [ "pemain", "main" ], // Words with prefix ber-/per- and in exception list [ "peramal", "ramal" ], [ "beragam", "ragam" ], // Words with prefix per- and suffixes -an and in exception list [ "peramalan", "ramal" ], // Words with prefix ber-/per- and suffixes -kan/-an and not in exception list [ "bersamaan", "sama" ], [ "beralaskan", "alas" ], [ "persamaan", "sama" ], // Words with suffixes -kan/-i/-an [ "biarkan", "biar" ], [ "sirami", "siram" ], [ "makanan", "makan" ], // Words which do not have derivational affixes with particles -kah, -lah, -pun [ "bukalah", "buka" ], [ "satupun", "satu" ], [ "bukankah", "bukan" ], // Words which do not have derivational affixes with possessive pronoun suffixes -ku/-mu/-nya [ "cintaku", "cinta" ], [ "buatmu", "buat" ], [ "buatnya", "buat" ], // Words which do not have derivational affixes with both particles and possessive pronoun suffixes -ku/-mu/-nya [ "cintakulah", "cinta" ], [ "cintamukah", "cinta" ], [ "cintanyapun", "cinta" ], // Words which have derivational affixes with particles -kah, -lah, -pun [ "kesenanganpun", "senang" ], [ "biarkanlah", "biar" ], [ "terdiamkah", "diam" ], // Words which have derivational affixes with possessive pronoun suffixes -ku/-mu/-nya [ "pendakiannya", "daki" ], [ "pengorbananku", "korban" ], [ "pengorbananmu", "korban" ], // Words which have derivational affixes with both particles and possessive pronoun suffixes -ku/-mu/-nya [ "pengorbanankulah", "korban" ], [ "pengorbananmukah", "korban" ], [ "kebahagiaannyapun", "bahagia" ], // Words that should not be stemmed [ "abadi", "abadi" ], [ "berkah", "berkah" ], [ "celah", "celah" ], [ "rumpun", "rumpun" ], [ "liku", "liku" ], [ "temu", "temu" ], [ "tanya", "tanya" ], // Words with bel-/pel- prefixes [ "belajar", "ajar" ], [ "pelajar", "ajar" ], [ "belunjur", "unjur" ], // Words ending in k that get suffix -an [ "anakan", "anak" ], [ "peranakan", "anak" ], [ "rembukan", "rembuk" ], [ "pengedukan", "keduk" ], [ "berserakan", "serak" ], [ "penyuntikan", "suntik" ], // Words from kBeginning exception list with peng- preceded by first order prefix. [ "dipengambangkan", "kambang" ], [ "kepengukuhan", "kukuh" ], // Words which start with peng- preceded by first order prefix that are not in an exception list. [ "kepengacaraan", "acara" ], [ "mempengaruhi", "aruh" ], [ "dipengaruhi", "aruh" ], [ "kepenghunian", "huni" ], // Single syllable words that get prefix di- [ "dipel", "pel" ], // Single syllable words that get prefix di- and suffix -kan/-i [ "dipelkan", "pel" ], [ "disahi", "sah" ], // Single syllable words that get prefix di- and suffix -kan/-i and a particle suffixes [ "dipelkankah", "pel" ], [ "disahkanlah", "sah" ], // Single syllable words that get prefix di- and particle suffixes [ "dicaspun", "cas" ], [ "dipelkah", "pel" ], // Single syllable words that get a particle suffix [ "bomkah", "bom" ], // Single syllable words that get a possessive pronoun suffix [ "vasmu", "vas" ], // Single syllable words that get a possessive pronoun suffix and a particle suffix [ "vasmupun", "vas" ], // Single syllable words that get either -kan suffix [ "pelkan", "pel" ], // Single syllable words that get prefix penge- [ "pengecek", "cek" ], [ "pengelap", "lap" ], // Single syllable words that get prefix menge- [ "mengekir", "kir" ], [ "mengeklik", "klik" ], // Single syllable words that get prefix penge- and suffix -an [ "pengeboman", "bom" ], [ "pengesahan", "sah" ], // Single syllable words that get prefix menge- and suffix -kan/-i [ "mengebomi", "bom" ], [ "mengesahkan", "sah" ], /* * Two syllable words that look like it starts with one of the single syllable words and ends in one of these suffixes -kan/-an/-i. * Currently some of these words are incorrectly stemmed. They will be solved when we implement the root dictionary */ // [ "topan", "topan" ], // [ "lokan", "lokan" ], [ "turban", "turban" ], [ "pingsan", "pingsan" ], [ "tinggi", "tinggi" ], /* * Two syllable words whose stem starts in one of the single syllable words and gets prefix menge-/penge- * in which it accidentally overlaps with words starting with e- in kBeginning list and have prefix meng-/peng- */ // [ "mengebiri", "kebiri" ], // [ "pengerat", "kerat" ], // Words with derivational affixes that need to be stemmed before comparing with the list of doNotStem exception. [ "bersekolah", "sekolah" ], [ "terhimpun", "himpun" ], [ "merumpun", "rumpun" ], [ "mengolah", "olah" ], [ "melangkah", "langkah" ], [ "bertanya", "tanya" ], [ "meramu", "ramu" ], [ "memangku", "pangku" ], // Plurals formed by reduplication [ "buku-buku", "buku" ], // Plurals formed by reduplication + a suffix [ "buku-bukunya", "buku" ], // Plurals formed by reduplication + multiple suffixes [ "rumah-rumahnyalah", "rumah" ], // Plurals formed by reduplication + a prefix [ "menjadi-jadi", "jadi" ], // Plurals formed by reduplication + a prefix that changes the beginning of the word (1 letter change) [ "memijit-mijit", "pijit" ], // Plurals formed by reduplication + a circumfix [ "melambai-lambaikan", "lambai" ], // Plurals formed by reduplication + a circumfix that changes the beginning of the word (1 letter change) [ "meniru-nirukan", "tiru" ], // Plurals formed by reduplication + a prefix that changes the beginning of the word (2 letter change) [ "mengayuh-ngayuh", "kayuh" ], [ "menyeduh-nyeduh", "seduh" ], // Reduplicated words that aren't plurals shouldn't be stemmed (i.e. words on nonPluralReduplications exception list). [ "kupu-kupu", "kupu-kupu" ], [ "hati-hati", "hati-hati" ], // Reduplicated words with stem changes that aren't plurals shouldn't be stemmed (i.e. words on nonPluralReduplications exception list). [ "mengira-ngira", "kira-kira" ], [ "mengongko-ngongko", "kongko-kongko" ], [ "memilih-milih", "pilih-pilih" ], [ "menyama-nyama", "sama-sama" ], [ "menatah-natah", "tatah-tatah" ], // Reduplicated words that aren't plurals shouldn't be stemmed (i.e. words on nonPluralReduplicationsFullForms exception list). [ "antah-berantah", "antah-berantah" ], // Non-plurals that aren't full reduplications shouldn't be stemmed. [ "muda-mudi", "muda-mudi" ], [ "corat-coret", "corat-coret" ], [ "gelap-gulita", "gelap-gulita" ], [ "pontang-panting", "pontang-panting" ], // More specs for plurals/re-duplications. [ "membunuh-bunuh", "bunuh" ], [ "menarik-narik", "tarik" ], [ "tidur-tiduran", "tidur" ], [ "menunda-nunda", "tunda" ], [ "menjadi-jadi", "jadi" ], [ "bertingkat-tingkat", "tingkat" ], // Words that are separated by dash but are not plural (made-up word) [ "bertingkat-tingkat-tangga", "tingkat-tingkat-tangga" ], // Words with a beginning that looks like a valid prefix [ "pelita", "pelita" ], [ "medali", "medali" ], [ "belanja", "belanja" ], [ "keliling", "keliling" ], [ "diskusi", "diskusi" ], [ "disinfektan", "disinfektan" ], // Words with a beginning that looks like a valid prefix and get non-derivational suffix i.e. -kah/-lah/-pun/-ku/-mu/-nya [ "belanjalah", "belanja" ], [ "berpedomankah", "pedoman" ], [ "pedomannyapun", "pedoman" ], [ "kelilingnya", "keliling" ], [ "keringatku", "keringat" ], [ "kelincimu", "kelinci" ], // Words with a beginning that looks like a valid prefix with a second order prefix [ "bermedali", "medali" ], [ "berdiskusi", "diskusi" ], [ "pendiskusian", "diskusi" ], // Words with a beginning that looks like a valid prefix with a derivational suffix -kan/-an/-i [ "belanjaan", "belanja" ], [ "kecewakan", "kecewa" ], // Words with a beginning that looks like a valid prefix and also gets a first order prefix and a suffix [ "membelanjakan", "belanja" ], [ "memberangkatkan", "berangkat" ], [ "mendiskusikan", "diskusi" ], [ "pendiskusian", "diskusi" ], ]; describe( "Test for stemming Indonesian words", () => { for ( let i = 0; i < wordsToStem.length; i++ ) { const wordToCheck = wordsToStem[ i ]; it( "stems the word " + wordToCheck[ 0 ], () => { expect( stem( wordToCheck[ 0 ], morphologyData ) ).toBe( wordToCheck[ 1 ] ); } ); } } );