@orama/stemmers
Version:
Stemmers for Orama
1 lines • 9.28 kB
Source Map (JSON)
{"version":3,"sources":["<anon>"],"sourcesContent":["\"use strict\";\nObject.defineProperty(exports, \"__esModule\", {\n value: true\n});\nfunction _export(target, all) {\n for(var name in all)Object.defineProperty(target, name, {\n enumerable: true,\n get: Object.getOwnPropertyDescriptor(all, name).get\n });\n}\n_export(exports, {\n get language () {\n return language;\n },\n get stemmer () {\n return stemmer;\n }\n});\nconst step2List = {\n ational: 'ate',\n tional: 'tion',\n enci: 'ence',\n anci: 'ance',\n izer: 'ize',\n bli: 'ble',\n alli: 'al',\n entli: 'ent',\n eli: 'e',\n ousli: 'ous',\n ization: 'ize',\n ation: 'ate',\n ator: 'ate',\n alism: 'al',\n iveness: 'ive',\n fulness: 'ful',\n ousness: 'ous',\n aliti: 'al',\n iviti: 'ive',\n biliti: 'ble',\n logi: 'log'\n};\nconst step3List = {\n icate: 'ic',\n ative: '',\n alize: 'al',\n iciti: 'ic',\n ical: 'ic',\n ful: '',\n ness: ''\n};\n// Consonant\nconst c = '[^aeiou]';\n// Vowel\nconst v = '[aeiouy]';\n// Consonant sequence\nconst C = c + '[^aeiouy]*';\n// Vowel sequence\nconst V = v + '[aeiou]*';\n// [C]VC... is m>0\nconst mgr0 = '^(' + C + ')?' + V + C;\n// [C]VC[V] is m=1\nconst meq1 = '^(' + C + ')?' + V + C + '(' + V + ')?$';\n// [C]VCVC... is m>1\nconst mgr1 = '^(' + C + ')?' + V + C + V + C;\n// vowel in stem\nconst s_v = '^(' + C + ')?' + v;\nfunction stemmer(w) {\n let stem;\n let suffix;\n let re;\n let re2;\n let re3;\n let re4;\n if (w.length < 3) {\n return w;\n }\n const firstch = w.substring(0, 1);\n if (firstch == 'y') {\n w = firstch.toUpperCase() + w.substring(1);\n }\n re = /^(.+?)(ss|i)es$/;\n re2 = /^(.+?)([^s])s$/;\n if (re.test(w)) {\n w = w.replace(re, '$1$2');\n } else if (re2.test(w)) {\n w = w.replace(re2, '$1$2');\n }\n re = /^(.+?)eed$/;\n re2 = /^(.+?)(ed|ing)$/;\n if (re.test(w)) {\n const fp = re.exec(w);\n re = new RegExp(mgr0);\n if (re.test(fp[1])) {\n re = /.$/;\n w = w.replace(re, '');\n }\n } else if (re2.test(w)) {\n const fp = re2.exec(w);\n stem = fp[1];\n re2 = new RegExp(s_v);\n if (re2.test(stem)) {\n w = stem;\n re2 = /(at|bl|iz)$/;\n re3 = new RegExp('([^aeiouylsz])\\\\1$');\n re4 = new RegExp('^' + C + v + '[^aeiouwxy]$');\n if (re2.test(w)) {\n w = w + 'e';\n } else if (re3.test(w)) {\n re = /.$/;\n w = w.replace(re, '');\n } else if (re4.test(w)) {\n w = w + 'e';\n }\n }\n }\n re = /^(.+?)y$/;\n if (re.test(w)) {\n const fp = re.exec(w);\n stem = fp?.[1];\n re = new RegExp(s_v);\n if (stem && re.test(stem)) {\n w = stem + 'i';\n }\n }\n re = /^(.+?)(ational|tional|enci|anci|izer|bli|alli|entli|eli|ousli|ization|ation|ator|alism|iveness|fulness|ousness|aliti|iviti|biliti|logi)$/;\n if (re.test(w)) {\n const fp = re.exec(w);\n stem = fp?.[1];\n suffix = fp?.[2];\n re = new RegExp(mgr0);\n if (stem && re.test(stem)) {\n // eslint-disable-next-line @typescript-eslint/ban-ts-comment\n // @ts-ignore\n w = stem + step2List[suffix];\n }\n }\n re = /^(.+?)(icate|ative|alize|iciti|ical|ful|ness)$/;\n if (re.test(w)) {\n const fp = re.exec(w);\n stem = fp?.[1];\n suffix = fp?.[2];\n re = new RegExp(mgr0);\n // eslint-disable-next-line @typescript-eslint/ban-ts-comment\n // @ts-ignore\n if (stem && re.test(stem)) {\n // eslint-disable-next-line @typescript-eslint/ban-ts-comment\n // @ts-ignore\n w = stem + step3List[suffix];\n }\n }\n re = /^(.+?)(al|ance|ence|er|ic|able|ible|ant|ement|ment|ent|ou|ism|ate|iti|ous|ive|ize)$/;\n re2 = /^(.+?)(s|t)(ion)$/;\n if (re.test(w)) {\n const fp = re.exec(w);\n stem = fp?.[1];\n re = new RegExp(mgr1);\n if (stem && re.test(stem)) {\n w = stem;\n }\n } else if (re2.test(w)) {\n const fp = re2.exec(w);\n stem = fp?.[1] ?? '' + fp?.[2] ?? '';\n re2 = new RegExp(mgr1);\n if (re2.test(stem)) {\n w = stem;\n }\n }\n re = /^(.+?)e$/;\n if (re.test(w)) {\n const fp = re.exec(w);\n stem = fp?.[1];\n re = new RegExp(mgr1);\n re2 = new RegExp(meq1);\n re3 = new RegExp('^' + C + v + '[^aeiouwxy]$');\n if (stem && (re.test(stem) || re2.test(stem) && !re3.test(stem))) {\n w = stem;\n }\n }\n re = /ll$/;\n re2 = new RegExp(mgr1);\n if (re.test(w) && re2.test(w)) {\n re = /.$/;\n w = w.replace(re, '');\n }\n if (firstch == 'y') {\n w = firstch.toLowerCase() + w.substring(1);\n }\n return w;\n}\nconst language = 'english';\n"],"names":["_export","target","all","name","Object","defineProperty","enumerable","get","getOwnPropertyDescriptor","exports","value","language","stemmer","step2List","ational","tional","enci","anci","izer","bli","alli","entli","eli","ousli","ization","ation","ator","alism","iveness","fulness","ousness","aliti","iviti","biliti","logi","step3List","icate","ative","alize","iciti","ical","ful","ness","c","v","C","V","mgr0","meq1","mgr1","s_v","w","stem","suffix","re","re2","re3","re4","length","firstch","substring","toUpperCase","test","replace","fp","exec","RegExp","toLowerCase"],"mappings":"AAAA,aAIA,SAASA,QAAQC,CAAM,CAAEC,CAAG,EACxB,IAAI,IAAIC,KAAQD,EAAIE,OAAOC,cAAc,CAACJ,EAAQE,EAAM,CACpDG,WAAY,CAAA,EACZC,IAAKH,OAAOI,wBAAwB,CAACN,EAAKC,GAAMI,GAAG,AACvD,EACJ,CARAH,OAAOC,cAAc,CAACI,QAAS,aAAc,CACzCC,MAAO,CAAA,CACX,GAOAV,QAAQS,QAAS,CACb,IAAIE,UAAY,CACZ,OAAOA,QACX,EACA,IAAIC,SAAW,CACX,OAAOA,OACX,CACJ,GACA,IAAMC,UAAY,CACdC,QAAS,MACTC,OAAQ,OACRC,KAAM,OACNC,KAAM,OACNC,KAAM,MACNC,IAAK,MACLC,KAAM,KACNC,MAAO,MACPC,IAAK,IACLC,MAAO,MACPC,QAAS,MACTC,MAAO,MACPC,KAAM,MACNC,MAAO,KACPC,QAAS,MACTC,QAAS,MACTC,QAAS,MACTC,MAAO,KACPC,MAAO,MACPC,OAAQ,MACRC,KAAM,KACV,EACMC,UAAY,CACdC,MAAO,KACPC,MAAO,GACPC,MAAO,KACPC,MAAO,KACPC,KAAM,KACNC,IAAK,GACLC,KAAM,EACV,EAEMC,EAAI,WAEJC,EAAI,WAEJC,EAAIF,qBAEJG,EAAIF,EAAI,WAERG,KAAO,KAAOF,EAAI,KAAOC,EAAID,EAE7BG,KAAO,KAAOH,EAAI,KAAOC,EAAID,EAAI,IAAMC,EAAI,MAE3CG,KAAO,KAAOJ,EAAI,KAAOC,EAAID,EAAIC,EAAID,EAErCK,IAAM,KAAOL,EAAI,KAAOD,EAC9B,SAAShC,QAAQuC,CAAC,MACVC,EACAC,EACAC,EACAC,EACAC,EACAC,EACJ,GAAIN,EAAEO,MAAM,CAAG,EACX,OAAOP,EAEX,IAAMQ,EAAUR,EAAES,SAAS,CAAC,EAAG,GAa/B,GAZID,AAAW,KAAXA,GACAR,CAAAA,EAAIQ,EAAQE,WAAW,GAAKV,EAAES,SAAS,CAAC,EAAC,EAG7CL,EAAM,iBACFD,AAFJA,CAAAA,EAAK,iBAAgB,EAEdQ,IAAI,CAACX,GACRA,EAAIA,EAAEY,OAAO,CAACT,EAAI,QACXC,EAAIO,IAAI,CAACX,IAChBA,CAAAA,EAAIA,EAAEY,OAAO,CAACR,EAAK,OAAM,EAG7BA,EAAM,kBACFD,AAFJA,CAAAA,EAAK,YAAW,EAETQ,IAAI,CAACX,GAAI,CACZ,IAAMa,EAAKV,EAAGW,IAAI,CAACd,GAEfG,AADJA,CAAAA,EAAK,IAAIY,OAAOnB,KAAI,EACbe,IAAI,CAACE,CAAE,CAAC,EAAE,IACbV,EAAK,KACLH,EAAIA,EAAEY,OAAO,CAACT,EAAI,IAE1B,MAAWC,EAAIO,IAAI,CAACX,KAEhBC,EAAOY,AADIT,EAAIU,IAAI,CAACd,EACX,CAAC,EAAE,CAERI,AADJA,CAAAA,EAAM,IAAIW,OAAOhB,IAAG,EACZY,IAAI,CAACV,KACTD,EAAIC,EACJG,EAAM,cACNC,EAAM,AAAIU,OAAO,sBACjBT,EAAM,AAAIS,OAAO,IAAMrB,EAAID,EAAI,gBAC3BW,EAAIO,IAAI,CAACX,GACTA,GAAQ,IACDK,EAAIM,IAAI,CAACX,IAChBG,EAAK,KACLH,EAAIA,EAAEY,OAAO,CAACT,EAAI,KACXG,EAAIK,IAAI,CAACX,IAChBA,CAAAA,GAAQ,GAAE,IAKtB,GAAIG,AADJA,CAAAA,EAAK,UAAS,EACPQ,IAAI,CAACX,GAAI,CACZ,IAAMa,EAAKV,EAAGW,IAAI,CAACd,GACnBC,EAAOY,GAAI,CAAC,EAAE,CACdV,EAAK,IAAIY,OAAOhB,KACZE,GAAQE,EAAGQ,IAAI,CAACV,IAChBD,CAAAA,EAAIC,EAAO,GAAE,CAErB,CAEA,GAAIE,AADJA,CAAAA,EAAK,0IAAyI,EACvIQ,IAAI,CAACX,GAAI,CACZ,IAAMa,EAAKV,EAAGW,IAAI,CAACd,GACnBC,EAAOY,GAAI,CAAC,EAAE,CACdX,EAASW,GAAI,CAAC,EAAE,CAChBV,EAAK,IAAIY,OAAOnB,MACZK,GAAQE,EAAGQ,IAAI,CAACV,IAGhBD,CAAAA,EAAIC,EAAOvC,SAAS,CAACwC,EAAO,AAAD,CAEnC,CAEA,GAAIC,AADJA,CAAAA,EAAK,gDAA+C,EAC7CQ,IAAI,CAACX,GAAI,CACZ,IAAMa,EAAKV,EAAGW,IAAI,CAACd,GACnBC,EAAOY,GAAI,CAAC,EAAE,CACdX,EAASW,GAAI,CAAC,EAAE,CAChBV,EAAK,IAAIY,OAAOnB,MAGZK,GAAQE,EAAGQ,IAAI,CAACV,IAGhBD,CAAAA,EAAIC,EAAOjB,SAAS,CAACkB,EAAO,AAAD,CAEnC,CAGA,GADAE,EAAM,oBACFD,AAFJA,CAAAA,EAAK,qFAAoF,EAElFQ,IAAI,CAACX,GAAI,CACZ,IAAMa,EAAKV,EAAGW,IAAI,CAACd,GACnBC,EAAOY,GAAI,CAAC,EAAE,CACdV,EAAK,IAAIY,OAAOjB,MACZG,GAAQE,EAAGQ,IAAI,CAACV,IAChBD,CAAAA,EAAIC,CAAG,CAEf,MAAO,GAAIG,EAAIO,IAAI,CAACX,GAAI,CACpB,IAAMa,EAAKT,EAAIU,IAAI,CAACd,GACpBC,EAAOY,GAAI,CAAC,EAAE,EAAI,GAAKA,GAAI,CAAC,EAAE,EAAI,GAE9BT,AADJA,CAAAA,EAAM,IAAIW,OAAOjB,KAAI,EACba,IAAI,CAACV,IACTD,CAAAA,EAAIC,CAAG,CAEf,CAEA,GAAIE,AADJA,CAAAA,EAAK,UAAS,EACPQ,IAAI,CAACX,GAAI,CACZ,IAAMa,EAAKV,EAAGW,IAAI,CAACd,GACnBC,EAAOY,GAAI,CAAC,EAAE,CACdV,EAAK,IAAIY,OAAOjB,MAChBM,EAAM,IAAIW,OAAOlB,MACjBQ,EAAM,AAAIU,OAAO,IAAMrB,EAAID,EAAI,gBAC3BQ,GAASE,CAAAA,EAAGQ,IAAI,CAACV,IAASG,EAAIO,IAAI,CAACV,IAAS,CAACI,EAAIM,IAAI,CAACV,EAAI,GAC1DD,CAAAA,EAAIC,CAAG,CAEf,CAUA,OATAE,EAAK,MACLC,EAAM,IAAIW,OAAOjB,MACbK,EAAGQ,IAAI,CAACX,IAAMI,EAAIO,IAAI,CAACX,KACvBG,EAAK,KACLH,EAAIA,EAAEY,OAAO,CAACT,EAAI,KAElBK,AAAW,KAAXA,GACAR,CAAAA,EAAIQ,EAAQQ,WAAW,GAAKhB,EAAES,SAAS,CAAC,EAAC,EAEtCT,CACX,CACA,IAAMxC,SAAW"}