UNPKG

yoastseo-dep

Version:

Yoast clientside page analysis

492 lines (484 loc) 17.2 kB
import stem from "../../../../../../src/languageProcessing/languages/es/helpers/internal/stem"; import getMorphologyData from "../../../../../specHelpers/getMorphologyData"; const morphologyDataES = getMorphologyData( "es" ).es; const wordsToStem = [ // Input a word that ends in -s but is not a plural. [ "caos", "caos" ], [ "gas", "gas" ], [ "martes", "martes" ], [ "microondas", "microondas" ], [ "jesús", "jesus" ], // Input noun with: singular: -z, plural: -ces [ "actriz", "actriz" ], [ "actrices", "actriz" ], [ "luz", "luz" ], [ "luces", "luz" ], [ "voz", "voz" ], [ "voces", "voz" ], // Input an adjective on -ano [ "americano", "american" ], [ "republicana", "republican" ], // Input a word that ends with a clitic pronoun and is on the list of words that end like pronouns suffixes but are not verbs. [ "anime", "anim" ], [ "abuela", "abuel" ], // Input a word that ends with a clitic pronoun and is a verb. [ "abofarse", "abof" ], // [ "mírame", "mir" ], // Input a word that does not ends with a clitic pronoun and is on the exceptions full forms list. [ "sacratísimo", "sagrad" ], [ "veamos", "ver" ], [ "libanesa", "libanes" ], [ "libanés", "libanes" ], [ "crudelísimas", "cruel" ], // Input a word that looks like a diminutive but is not. [ "acólito", "acolit" ], [ "amalecitas", "amalecit" ], // Input a word that is on the diminutive exceptions list for diminutives ending in -it-. [ "reicito", "rey" ], [ "realitito", "reality" ], [ "lucecita", "luz" ], [ "actricita", "actriz" ], [ "ciudadcita", "ciudad" ], [ "ciudadita", "ciudad" ], [ "raicitos", "raiz" ], [ "raicitas", "raiz" ], // Input a diminutive that is on the stem canonicalization exception list for nouns [ "ciudaduela", "ciudad" ], [ "ciudadela", "ciudad" ], [ "abejuela", "abej" ], [ "locuelo", "loc" ], // Input a word that is a typical diminutive and should be stemmed by the rules. [ "puertecita", "puert" ], [ "ventitas", "vent" ], [ "suavito", "suav" ], [ "vueltito", "vuelt" ], [ "vueltecito", "vuelt" ], [ "jovencitos", "joven" ], [ "amorcitos", "amor" ], [ "valsecito", "vals" ], [ "reyecito", "rey" ], [ "pianito", "pian" ], [ "gobiernito", "gobiern" ], [ "huesitos", "hues" ], [ "aparatito", "aparat" ], [ "paseítos", "pase" ], [ "jadeíta", "jade" ], // Input a word that ends in a suffix preceded by uy. [ "excluyendo", "exclu" ], [ "atribuyes", "atribu" ], // Input a word that undergoes stem modification changes. [ "recuerdan", "record" ], [ "comienzo", "comenz" ], // Input a word that ends in a common verb suffix. [ "saltaron", "salt" ], // [ "revocares", "revoc" ], // Input a word that ends in -os, -s, -a, -o, -á, -í,-ó, -é, -e. [ "agostinas", "agostin" ], [ "boboré", "bobor" ], // Input a word that is on the stems that belong together list. // [ "dollar", "dolar" ], // [ "chalets", "chale" ], // [ "sé", "sab" ], [ "quepa", "cab" ], // Input a word that ends in -en, -es, -éis, -emos and is not preceded by gu. [ "valéis", "val" ], [ "dirigen", "dirig" ], // Input a word that ends in -en, -es, -éis, -emos and is preceded by gu. [ "distinguen", "distingu" ], [ "alarguemos", "alarg" ], // Input a word that looks like a verb form but it's not. // [ "cabalgada", "cabalgad" ], [ "abacería", "abaceri" ], // Input a word that looks like a verb form and is on the list of stems that belong together. [ "san", "san" ], [ "virgen", "virgen" ], // Input a word that ends in -í, either a verb or a noun. [ "entendí", "entend" ], [ "marroquí", "marroqu" ], // Input an adverb that ends in -mente preceded by a consonant. [ "actualmente", "actual" ], [ "hábilmente", "habil" ], // Input an adverb that ends in -mente preceded by a vowel. [ "rápidamente", "rapid" ], [ "aparentemente", "aparent" ], // Input a word that ends in -mente but is not an adverb. [ "mentes", "ment" ], [ "fundamente", "fundament" ], // Input a superlative that ends in -ísimo, -ísima, ísimos, -ísimas and is preceded by bil. [ "notabilísimo", "notabl" ], [ "respetabilísimas", "respetabl" ], // Input a superlative that ends in -ísimo, -ísima, ísimos, -ísimas and is preceded by qu, gu. [ "riquísimo", "ric" ], [ "amiguísimas", "amig" ], // Input a superlative that ends in -ísimo, -ísima, ísimos, -ísimas and is preceded by c. [ "felicísimo", "feliz" ], [ "velocísimas", "veloz" ], // Input a superlative that ends in -ísimo, -ísima, ísimos, -ísimas and is preceded by i. [ "impiísima", "impi" ], /* * Input a superlative that ends in -ísimo, -ísima, ísimos, -ísimas and is preceded by * -b, -d, -f, -g, -h, -i, -l, -m, -n, -p, -q, -r, -s, -t, -v, -z, -x, -y, -w, -k, -j, -u. */ [ "rapidísimo", "rapid" ], [ "generalísimas", "general" ], // Input a superlative that ends in -érrimo, -érrima, -érrimos, érrimas. [ "genialérrima", "genial" ], [ "tristérrimo", "trist" ], // Exceptions in superlatives. [ "habilísima", "habil" ], [ "majérrimo", "majerrim" ], [ "cérrimo", "cérrim" ], [ "gérrimo", "gerrim" ], [ "torísimo", "torisim" ], [ "físima", "fisim" ], [ "dísima", "disim" ], // Input a word whose stem ends in ij ∧ suffix = {o, a, as, amos, áis, an}. [verbs in -igir] [ "dirijo", "dirig" ], [ "exijamos", "exig" ], [ "inflijo", "inflig" ], // Input a word whose stem ends in ij ∧ suffix = {o, a, as, amos, áis, an}. [verbs in -egir] [ "elija", "eleg" ], [ "corrijáis", "correg" ], [ "rijamos", "reg" ], [ "colijan", "coleg" ], // Input a word whose stem ends in ig ∧ suffix = {es, e, en, ió, ieron, iendo, [imp. & fut. subj suffixes]}. [verbs in -igir] [ "infligieras", "inflig" ], [ "transigió", "transig" ], [ "transijáis", "transig" ], // Input a word whose stem ends in ig ∧ suffix = {es, e, en, ió, ieron, iendo, [imp. & fut. subj suffixes]}. [verbs in -egir] [ "colegíamos", "coleg" ], [ "colige", "coleg" ], [ "registeis", "reg" ], [ "rigiera", "reg" ], // Input a word whose stem ends in zc ∧ suffix = {o, [pres. subj suffixes], a, as, amos, áis, an}. [ "conozco", "conoc" ], [ "conozcamos", "conoc" ], [ "traduzcamos", "traduc" ], [ "compadezco", "compadec" ], // Input a word whose stem ends in -c ∧ suffix = {é}. [ "lancé", "lanz" ], [ "visualicé", "visualiz" ], [ "empecé", "empez" ], /* * Input a word whose stem ends in x: X = CVC(C) ∧ V = {i} ∧ suffix = * {í, iste, ió, imos, isteis, ieron, amos, áis, iendo, [imp. & fut. subj suffixes], [pres. subj suffixes], e, o}. */ [ "sintió", "sent" ], [ "sugiriese", "suger" ], /* * Input a word whose stem ends in x: X = CVC(C) ∧ V = {u} ∧ suffix = * {í, iste, ió, imos, isteis, ieron, amos, áis, iendo, [imp. & fut. subj suffixes], [pres. subj suffixes], e, o}. */ [ "murieron", "mor" ], [ "durmió", "dorm" ], // Input a word whose stem contains ie (but not in the infinitive) ∧ suffix = {o, es, as, e, a, en, an}. [ "cierno", "cern" ], [ "aciertas", "acert" ], // Input a word whose stem contains ue (but not in the infinitive) ∧ suffix = {o, es, as, e, a, en, an}. [ "recuerdan", "record" ], [ "resuelves", "resolv" ], // Input a word whose stem contains ue in the infinitive. [ "quejan", "quej" ], [ "quemas", "quem" ], // Input a verb where stem ends on -í-, ú- and precedes -o, -as, -a, -an, -e, -es, -en. [ "espían", "espi" ], [ "envías", "envi" ], [ "consensúas", "consensu" ], [ "licúa", "licu" ], // Input a verb where stem ends on -qu-, -gu- and precedes -é, -e, -es, -emos, -éis, -en [ "apliques", "aplic" ], [ "ataquemos", "atac" ], [ "rebusques", "rebusc" ], [ "conjuguen", "conjug" ], [ "juzguéis", "juzg" ], // Exceptions for rules on stem-modifying verbs. [ "aguaste", "agu" ], [ "engreíais", "engre" ], [ "interdijese", "interdec" ], // Input a verb that has multiple stems. [ "compuesta", "compon" ], [ "compongo", "compon" ], [ "componer", "compon" ], // Input a verb that ends in quir. [ "desagua", "desagu" ], [ "desagüé", "desagu" ], // Input a verb that ends in guir. [ "autoextingo", "autoextingu" ], [ "autoextinguimos", "autoextingu" ], // Input a verb that ends in guar. [ "menguamos", "mengu" ], [ "mengüé", "mengu" ], // Input a verb that ends in ducir. [ "abducir", "abduc" ], [ "abduzco", "abduc" ], [ "abdujo", "abduc" ], [ "abdujerás", "abduc" ], [ "abdujeses", "abduc" ], // Input a verb that ends in seguir. [ "autoseguir", "autosegu" ], [ "autosiga", "autosegu" ], [ "autosiguemos", "autosegu" ], [ "autoseguid", "autosegu" ], // Input a verb that ends in sentir. [ "desconsentir", "desconsent" ], [ "desconsiento", "desconsent" ], [ "desconsintió", "desconsent" ], // Words that look like verb forms but aren't verbs. // Non-verb ending in -ió [ "chevió", "chevi" ], [ "cheviós", "chevi" ], // Non-verb ending in -irán [ "caguairán", "caguairan" ], [ "caguairanes", "caguairan" ], // Non-verb ending in -ái [ "samurái", "samurai" ], [ "samuráis", "samurai" ], // Non-verb ending in -ei [ "chatolei", "chatolei" ], // Non-verb ending in -éi [ "upéi", "upei" ], // Non-verb ending in -ir [ "mártir", "martir" ], [ "mártires", "martir" ], // Non-verb ending in -ír [ "hazmerreír", "hazmerreir" ], // Non-verb ending in -ada [ "abada", "abad" ], [ "abadas", "abad" ], // Non-verb ending in -ado [ "mercado", "mercad" ], [ "mercados", "mercad" ], // Non-verb ending in -imo [ "mínimo", "minim" ], [ "mínimos", "minim" ], // Non-verb ending in -emo [ "extremo", "extrem" ], [ "extremos", "extrem" ], // Non-verb ending in -ad [ "ciudad", "ciudad" ], [ "ciudades", "ciudad" ], // Non-verb ending in -ed [ "pared", "pared" ], [ "paredes", "pared" ], // Non-verb ending in -ie [ "serie", "seri" ], [ "series", "seri" ], // Non-verb ending in -ié [ "hincapié", "hincapi" ], [ "hincapiés", "hincapi" ], // Non-verb ending in -ando [ "contrabando", "contraband" ], [ "contrabandos", "contraband" ], // Non-verb ending in -ándo [ "cuándo", "cuand" ], // Non-verb ending in -aré [ "pagaré", "pagar" ], [ "pagarés", "pagar" ], // Non-verb ending in -eré [ "tereré", "terer" ], [ "tererés", "terer" ], // Non-verb ending in -ará [ "yarará", "yarar" ], [ "yararás", "yarar" ], // Non-verb ending in -erá [ "camerá", "camer" ], [ "camerás", "camer" ], // Non-verb ending in -irá [ "aragüirá", "aragüir" ], [ "aragüirás", "aragüir" ], // Non-verb ending in -ia [ "historia", "histori" ], [ "historias", "histori" ], // Non-verb ending in -id [ "apartheid", "apartheid" ], // Non-verb ending in -aba [ "guayaba", "guayab" ], [ "guayabas", "guayab" ], // Non-verb ending in -asta [ "canasta", "canast" ], [ "canastas", "canast" ], // Non-verb ending in -iste [ "quiste", "quist" ], [ "quistes", "quist" ], // Non-verb ending in -aste [ "contraste", "contrast" ], [ "contrastes", "contrast" ], // Non-verb ending in -ía. [ "policía", "polici" ], [ "policías", "polici" ], // Non-verb ending in -an [ "eslogan", "eslogan" ], [ "eslóganes", "eslogan" ], // Non-verb ending in -en [ "imagen", "imagen" ], [ "imágenes", "imagen" ], // Non-verb ending in -er [ "mujer", "mujer" ], [ "mujeres", "mujer" ], // Non-verb ending in -iendo [ "arriendo", "arriend" ], [ "arriendos", "arriend" ], // Non-verb ending in -ieron [ "gobieron", "gobieron" ], // Non-verb ending in -iera [ "ingeniera", "ingenier" ], [ "ingenieras", "ingenier" ], // Non-verb ending in -aron [ "gatillaron", "gatillaron" ], // Non-verb ending in -ida [ "vida", "vid" ], [ "vidas", "vid" ], // Non-verb ending in -ido [ "partido", "part" ], [ "partidos", "part" ], // Non-verb ending in -amo [ "reclamo", "reclam" ], [ "reclamos", "reclam" ], // Non-verb ending in -ara [ "máscara", "mascar" ], [ "máscaras", "mascar" ], // Non-verb ending in -ere [ "títere", "titer" ], [ "títeres", "titer" ], // Non-verb ending in -ase [ "base", "bas" ], [ "bases", "bas" ], // Non-verb ending in -ar [ "hogar", "hogar" ], [ "hogares", "hogar" ], // Non-verb ending in -ya [ "playa", "play" ], [ "playas", "play" ], // Non-verb ending in -ye [ "rallye", "rally" ], // Non-verb ending in -yo [ "apoyo", "apoy" ], [ "apoyos", "apoy" ], // Non-verb ending in -yera [ "playera", "player" ], [ "playeras", "player" ], // Non-verb ending in -arán [ "catamarán", "catamaran" ], [ "catamaranes", "catamaran" ], // Non-verb ending in -erán [ "bumerán", "bumeran" ], // Non-verb ending in -asta [ "empaste", "empast" ], [ "empastes", "empast" ], // Non-verb ending in -iste [ "quiste", "quist" ], [ "quistes", "quist" ], // Non-verb ending in -ida [ "sólida", "solid" ], [ "sólidas", "solid" ], // Non-verb ending in -ido [ "antióxido", "antioxid" ], [ "antióxidos", "antioxid" ], // Input a word that looks like it ends on a personal pronoun but is not. // Word with no personal pronoun ending on -me [ "uniforme", "uniform" ], // Word with no personal pronoun ending on -se [ "concordiense", "concordiens" ], // Word with no personal pronoun ending on -le [ "doble", "dobl" ], // Word with no personal pronoun ending on -la [ "acerolas", "acerol" ], // Word with no personal pronoun ending on -lo [ "estrello", "estrell" ], // Word with no personal pronoun ending on -no [ "infernos", "infern" ], // Word with -ibles suffix [ "posibles", "posibl" ], // Word with -icación suffix [ "diversificación", "diversif" ], // Word with -logías/logía suffix [ "biotecnología", "biotecnolog" ], [ "biotecnologías", "biotecnolog" ], // Word with -ución suffix [ "sustitución", "sustitu" ], // Word with -encia suffix [ "advertencia", "advertent" ], // Word witn -icidad suffix [ "complicidad", "complic" ], // Word with -ativos suffix [ "decorativos", "decor" ], // Word with -imiento suffix [ "enriquecimiento", "enriquec" ], // Word which contains only vowels [ "aaaa", "aaaa" ], // Word which contains only consonants [ "bbbb", "bbbb" ], // Word with suffix -ita that is preceded by vowel (the word below is made-up) [ "laita", "lait" ], // Word which contains only one letter [ "o", "o" ], // Word with pronoun suffix -se preceded by -ándo [ "lizándose", "liz" ], // Word ending in -e preceded bu -gu (the word below is made-up) [ "persigue", "persegu" ], // Words that looks like it ends in a valid -mente suffix but do not [ "demente", "dement" ], [ "clemente", "clement" ], // Verbs that undergo stem modification. // Qu -> c [ "apliques", "aplic" ], [ "ataquemos", "atac" ], // Qu -> c + ue -> o [ "trueque", "trueque" ], // Ij -> g [ "dirijo", "dirig" ], [ "exijamos", "exig" ], // Zc -> c [ "conozco", "conoc" ], [ "traduzcamos", "traduc" ], // I -> e [ "sintió", "sent" ], [ "sugiriese", "suger" ], // U -> o [ "murieron", "mor" ], [ "durmió", "dorm" ], // Ue -> o [ "recuerdan", "record" ], [ "resuelves", "resolv" ], // Ie -> e [ "cierno", "cern" ], [ "aciertas", "acert" ], ]; const paradigms = [ // A paradigm with various types of diminutive { stem: "nariz", forms: [ "nariz", "naricitas", "narizotas" ] }, { stem: "murall", forms: [ "murallas", "murallitas", "murallotas" ] }, { stem: "azucar", forms: [ "azúcar", "azucarita", "azuquítar" ] }, { stem: "pared", forms: [ "pared", "paredcita", "parecita", "paredita", "paredilla" ] }, { stem: "ciudad", forms: [ "ciudadcita", "ciudadita", "ciudaduela", "ciudadela" ] }, { stem: "alegr", forms: [ "alegre", "alegrete", "alegreta" ] }, { stem: "mam", forms: [ "mama", "mamá", "mamaíta", "mamita", "mamacita", "mami" ] }, { stem: "pap", forms: [ "papa", "papá", "papaíto", "papito", "papacito", "papi" ] }, { stem: "bibliotec", forms: [ "biblioteca", "bibliotecita" ] }, { stem: "muñec", forms: [ "muñeca", "muñecas", "muñeco", "muñecos", "muñecito" ] }, { stem: "chalec", forms: [ "chaleco", "chalecos", "chalecito", "chalecitos" ] }, { stem: "chec", forms: [ "checo", "checos", "checito", "checitos" ] }, { stem: "jaquec", forms: [ "jaqueca", "jaquecas", "jaquecita", "jaquecitas" ] }, { stem: "videotec", forms: [ "videoteca", "videotecas", "videotecita", "videotecitas" ] }, { stem: "rey", forms: [ "reyecito", "reyecitos", "reicito", "reicitos", "reyito", "reyitos" ] }, { stem: "puert", forms: [ "puertecito", "puertecitos", "puertito", "puertitos" ] }, { stem: "beb", forms: [ "bebita", "bebitas", "bebecita", "bebecitas", "bebecito" ] }, { stem: "lunch", forms: [ "lonchecito", "lonchito", "lunchito" ] }, ]; describe( "Test for stemming Spanish words", () => { for ( let i = 0; i < wordsToStem.length; i++ ) { const wordToCheck = wordsToStem[ i ]; it( "stems the word " + wordToCheck[ 0 ], () => { expect( stem( wordToCheck[ 0 ], morphologyDataES ) ).toBe( wordToCheck[ 1 ] ); } ); } } ); describe( "Test to make sure all forms of a paradigm get stemmed to the same stem", () => { for ( const paradigm of paradigms ) { for ( const form of paradigm.forms ) { it( "correctly stems the word: " + form + " to " + paradigm.stem, () => { expect( stem( form, morphologyDataES ) ).toBe( paradigm.stem ); } ); } } } );