UNPKG

@isdk/detect-text-language

Version:
612 lines (608 loc) 15.3 kB
"use strict"; var __defProp = Object.defineProperty; var __getOwnPropDesc = Object.getOwnPropertyDescriptor; var __getOwnPropNames = Object.getOwnPropertyNames; var __hasOwnProp = Object.prototype.hasOwnProperty; var __export = (target, all) => { for (var name in all) __defProp(target, name, { get: all[name], enumerable: true }); }; var __copyProps = (to, from, except, desc) => { if (from && typeof from === "object" || typeof from === "function") { for (let key of __getOwnPropNames(from)) if (!__hasOwnProp.call(to, key) && key !== except) __defProp(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable }); } return to; }; var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod); // src/index.ts var src_exports = {}; __export(src_exports, { CountryCodes: () => CountryCodes, CountryNames: () => CountryNames, detectTextLangEx: () => detectTextLangEx, detectTextLanguage: () => detectTextLanguage, getCountryCodeFromLang: () => getCountryCodeFromLang, getLanguageFromIso6391: () => getLanguageFromIso6391 }); module.exports = __toCommonJS(src_exports); // src/country-codes.ts var CountryCodes = { "US": "en-US,es-US,haw,fr", "GB": "en-GB,cy-GB,gd", "FR": "fr-FR,frp,br,co,ca,eu,oc", "DE": "de", "AD": "ca", "AE": "ar-AE,fa,en,hi,ur", "AF": "fa-AF,ps,uz-AF,tk", "AG": "en-AG", "AI": "en-AI", "AL": "sq,el", "AM": "hy", "AN": "nl-AN,en,es", "AO": "pt-AO", "AQ": "", "AR": "es-AR,en,it,de,fr,gn", "AS": "en-AS,sm,to", "AT": "de-AT,hr,hu,sl", "AU": "en-AU", "AW": "nl-AW,pap,es,en", "AX": "sv-AX", "AZ": "az,ru,hy", "BA": "bs,hr-BA,sr-BA", "BB": "en-BB", "BD": "bn-BD,en", "BE": "nl-BE,fr-BE,de-BE", "BF": "fr-BF,mos", "BG": "bg,tr-BG,rom", "BH": "ar-BH,en,fa,ur", "BI": "fr-BI,rn", "BJ": "fr-BJ", "BL": "fr", "BM": "en-BM,pt", "BN": "ms-BN,en-BN", "BO": "es-BO,qu,ay", "BQ": "nl,pap,en", "BR": "pt-BR,es,en,fr", "BS": "en-BS", "BT": "dz", "BV": "", "BW": "en-BW,tn-BW", "BY": "be,ru", "BZ": "en-BZ,es", "CA": "en-CA,fr-CA,iu", "CC": "ms-CC,en", "CD": "fr-CD,ln,ktu,kg,sw,lua", "CF": "fr-CF,sg,ln,kg", "CG": "fr-CG,kg,ln-CG", "CH": "de-CH,fr-CH,it-CH,rm", "CI": "fr-CI", "CK": "en-CK,mi", "CL": "es-CL", "CM": "en-CM,fr-CM", "CN": "zh-CN,zh,yue,wuu,dta,ug,za", "CO": "es-CO", "CR": "es-CR,en", "CU": "es-CU,pap", "CV": "pt-CV", "CW": "nl,pap", "CX": "en,zh,ms-CX", "CY": "el-CY,tr-CY,en", "CZ": "cs,sk", "DJ": "fr-DJ,ar,so-DJ,aa", "DK": "da-DK,en,fo,de-DK", "DM": "en-DM", "DO": "es-DO", "DZ": "ar-DZ", "EC": "es-EC", "EE": "et,ru", "EG": "ar-EG,en,fr", "EH": "ar,mey", "ER": "aa-ER,ar,tig,kun,ti-ER", "ES": "es-ES,ca,gl,eu,oc", "ET": "am,en-ET,om-ET,ti-ET,so-ET,sid", "FI": "fi-FI,sv-FI,smn", "FJ": "en-FJ,fj", "FK": "en-FK", "FM": "en-FM,chk,pon,yap,kos,uli,woe,nkr,kpg", "FO": "fo,da-FO", "GA": "fr-GA", "GD": "en-GD", "GE": "ka,ru,hy,az", "GF": "fr-GF", "GG": "en,nrf", "GH": "en-GH,ak,ee,tw", "GI": "en-GI,es,it,pt", "GL": "kl,da-GL,en", "GM": "en-GM,mnk,wof,wo,ff", "GN": "fr-GN", "GP": "fr-GP", "GQ": "es-GQ,fr,pt", "GR": "el-GR,en,fr", "GS": "en", "GT": "es-GT", "GU": "en-GU,ch-GU", "GW": "pt-GW,pov", "GY": "en-GY", "HK": "zh-HK,yue,zh,en", "HM": "", "HN": "es-HN,cab,miq", "HR": "hr-HR,sr", "HT": "ht,fr-HT", "HU": "hu-HU", "ID": "id,en,nl,jv", "IE": "en-IE,ga-IE", "IL": "he,ar-IL,en-IL", "IM": "en,gv", "IN": "en-IN,hi,bn,te,mr,ta,ur,gu,kn,ml,or,pa,as,bh,sat,ks,ne,sd,kok,doi,mni,sit,sa,fr,lus,inc", "IO": "en-IO", "IQ": "ar-IQ,ku,hy", "IR": "fa-IR,ku", "IS": "is,en,de,da,sv,no", "IT": "it-IT,de-IT,fr-IT,sc,ca,co,sl", "JE": "en,fr,nrf", "JM": "en-JM", "JO": "ar-JO,en", "JP": "ja", "KE": "en-KE,sw-KE", "KG": "ky,uz,ru", "KH": "km,fr,en", "KI": "en-KI,gil", "KM": "ar,fr-KM", "KN": "en-KN", "KP": "ko-KP", "KR": "ko-KR,en", "KW": "ar-KW,en", "KY": "en-KY", "KZ": "kk,ru", "LA": "lo,fr,en", "LB": "ar-LB,fr-LB,en,hy", "LC": "en-LC", "LI": "de-LI", "LK": "si,ta,en", "LR": "en-LR", "LS": "en-LS,st,zu,xh", "LT": "lt,ru,pl", "LU": "lb,de-LU,fr-LU", "LV": "lv,ru,lt", "LY": "ar-LY,it,en", "MA": "ar-MA,ber,fr", "MC": "fr-MC,en,it", "MD": "ro,ru,gag,tr", "ME": "sr,hu,bs,sq,hr,rom", "MF": "fr", "MG": "fr-MG,mg", "MH": "mh,en-MH", "MK": "mk,sq,tr,rmm,sr", "ML": "fr-ML,bm", "MM": "my", "MN": "mn,ru", "MO": "zh,zh-MO,pt", "MP": "fil,tl,zh,ch-MP,en-MP", "MQ": "fr-MQ", "MR": "ar-MR,fuc,snk,fr,mey,wo", "MS": "en-MS", "MT": "mt,en-MT", "MU": "en-MU,bho,fr", "MV": "dv,en", "MW": "ny,yao,tum,swk", "MX": "es-MX", "MY": "ms-MY,en,zh,ta,te,ml,pa,th", "MZ": "pt-MZ,vmw", "NA": "en-NA,af,de,hz,naq", "NC": "fr-NC", "NE": "fr-NE,ha,kr,dje", "NF": "en-NF", "NG": "en-NG,ha,yo,ig,ff", "NI": "es-NI,en", "NL": "nl-NL,fy-NL", "NO": "no,nb,nn,se,fi", "NP": "ne,en", "NR": "na,en-NR", "NU": "niu,en-NU", "NZ": "en-NZ,mi", "OM": "ar-OM,en,bal,ur", "PA": "es-PA,en", "PE": "es-PE,qu,ay", "PF": "fr-PF,ty", "PG": "en-PG,ho,meu,tpi", "PH": "tl,en-PH,fil,ceb,ilo,hil,war,pam,bik,bcl,pag,mrw,tsg,mdh,cbk,krj,sgd,msb,akl,ibg,yka,mta,abx", "PK": "ur-PK,en-PK,pa,sd,ps,brh", "PL": "pl", "PM": "fr-PM", "PN": "en-PN", "PR": "en-PR,es-PR", "PS": "ar-PS", "PT": "pt-PT,mwl", "PW": "pau,sov,en-PW,tox,ja,fil,zh", "PY": "es-PY,gn", "QA": "ar-QA,es", "RE": "fr-RE", "RO": "ro,hu,rom", "RS": "sr,hu,bs,rom", "RU": "ru,tt,xal,cau,ady,kv,ce,tyv,cv,udm,tut,mns,bua,myv,mdf,chm,ba,inh,kbd,krc,av,sah,nog", "RW": "rw,en-RW,fr-RW,sw", "SA": "ar-SA", "SB": "en-SB,tpi", "SC": "en-SC,fr-SC", "SD": "ar-SD,en,fia", "SE": "sv-SE,se,sma,fi-SE", "SG": "cmn,en-SG,ms-SG,ta-SG,zh-SG", "SH": "en-SH", "SI": "sl,sh", "SJ": "no,ru", "SK": "sk,hu", "SL": "en-SL,men,tem", "SM": "it-SM", "SN": "fr-SN,wo,fuc,mnk", "SO": "so-SO,ar-SO,it,en-SO", "SR": "nl-SR,en,srn,hns,jv", "SS": "en,ar-SS", "ST": "pt-ST", "SV": "es-SV", "SX": "nl,en", "SY": "ar-SY,ku,hy,arc,fr,en", "SZ": "en-SZ,ss-SZ", "TC": "en-TC", "TD": "fr-TD,ar-TD,sre", "TF": "fr", "TG": "fr-TG,ee,hna,kbp,dag,ha", "TH": "th,en", "TJ": "tg,ru", "TK": "tkl,en-TK", "TL": "tet,pt-TL,id,en", "TM": "tk,ru,uz", "TN": "ar-TN,fr", "TO": "to,en-TO", "TR": "tr-TR,ku,diq,az,av", "TT": "en-TT,hns,fr,es,zh", "TV": "tvl,en,sm,gil", "TW": "zh-TW,zh,nan,hak", "TZ": "sw-TZ,en,ar", "UA": "uk,ru-UA,rom,pl,hu", "UG": "en-UG,lg,sw,ar", "UM": "en-UM", "UY": "es-UY", "UZ": "uz,ru,tg", "VA": "la,it,fr", "VC": "en-VC,fr", "VE": "es-VE", "VG": "en-VG", "VI": "en-VI", "VN": "vi,en,fr,zh,km", "VU": "bi,en-VU,fr-VU", "WF": "wls,fud,fr-WF", "WS": "sm,en-WS", "XK": "sq,sr", "YE": "ar-YE", "YT": "fr-YT", "ZA": "zu,xh,af,nso,en-ZA,tn,st,ts,ss,ve,nr", "ZM": "en-ZM,bem,loz,lun,lue,ny,toi", "ZW": "en-ZW,sn,nr,nd" }; var CountryNames = { "AD": "Andorra", "AE": "United Arab Emirates", "AF": "Afghanistan", "AG": "Antigua and Barbuda", "AI": "Anguilla", "AL": "Albania", "AM": "Armenia", "AN": "Netherlands Antilles", "AO": "Angola", "AQ": "Antarctica", "AR": "Argentina", "AS": "American Samoa", "AT": "Austria", "AU": "Australia", "AW": "Aruba", "AX": "\xC5land Islands", "AZ": "Azerbaijan", "BA": "Bosnia and Herzegovina", "BB": "Barbados", "BD": "Bangladesh", "BE": "Belgium", "BF": "Burkina Faso", "BG": "Bulgaria", "BH": "Bahrain", "BI": "Burundi", "BJ": "Benin", "BL": "Saint Barth\xE9lemy", "BM": "Bermuda", "BN": "Brunei Darussalam", "BO": "Bolivia, Plurinational State of", "BQ": "Caribbean Netherlands", "BR": "Brazil", "BS": "Bahamas", "BT": "Bhutan", "BV": "Bouvet Island", "BW": "Botswana", "BY": "Belarus", "BZ": "Belize", "CA": "Canada", "CC": "Cocos (Keeling) Islands", "CD": "Democratic Republic of the Congo", "CF": "Central African Republic", "CH": "Switzerland", "CI": "C\xF4te d'Ivoire", "CK": "Cook Islands", "CL": "Chile", "CM": "Cameroon", "CN": "China", "CO": "Colombia", "CR": "Costa Rica", "CU": "Cuba", "CV": "Cape Verde", "CW": "Curacao", "CX": "Christmas Island", "CY": "Cyprus", "CZ": "Czech Republic", "DE": "Germany", "DJ": "Djibouti", "DK": "Denmark", "DM": "Dominica", "DO": "Dominican Republic", "DZ": "Algeria", "EC": "Ecuador", "EE": "Estonia", "EG": "Egypt", "EH": "Western Sahara", "ER": "Eritrea", "ES": "Spain", "ET": "Ethiopia", "FI": "Finland", "FJ": "Fiji", "FK": "Falkland Islands (Malvinas)", "FM": "Micronesia, Federated States of", "FO": "Faroe Islands", "FR": "France", "GA": "Gabon", "GB": "United Kingdom", "GD": "Grenada", "GE": "Georgia", "GF": "French Guiana", "GG": "Guernsey", "GH": "Ghana", "GI": "Gibraltar", "GL": "Greenland", "GM": "The Gambia", "GN": "Guinea", "GP": "Guadeloupe", "GQ": "Equatorial Guinea", "GR": "Greece", "GS": "South Georgia and the South Sandwich Islands", "GT": "Guatemala", "GU": "Guam", "GW": "Guinea-Bissau", "GY": "Guyana", "HK": "Hong Kong, China (SAR)", "HM": "Heard Island and McDonald Islands", "HN": "Honduras", "HR": "Croatia", "HT": "Haiti", "HU": "Hungary", "ID": "Indonesia", "IE": "Ireland", "IL": "Israel", "IM": "Isle of Man", "IN": "India", "IO": "British Indian Ocean Territory (administered by India)", "IQ": "Iraq", "IR": "Iran, Islamic Republic of", "IS": "Iceland", "IT": "Italy", "JE": "Jersey", "JM": "Jamaica", "JO": "Jordan", "JP": "Japan", "KE": "Kenya", "KG": "Kyrgyzstan", "KH": "Cambodia", "KI": "Kiribati", "KM": "Comoros", "KN": "Saint Kitts and Nevis", "KP": "Korea, Democratic People's Republic of (North Korea)", "KR": "Republic of Korea (South Korea)", "KW": "Kuwait", "KY": "Cayman Islands", "KZ": "Kazakhstan", "LA": "Lao People's Democratic Republic", "LB": "Lebanon", "LC": "Saint Lucia", "LI": "Liechtenstein", "LK": "Sri Lanka", "LR": "Liberia", "LS": "Lesotho", "LT": "Lithuania", "LU": "Luxembourg", "LV": "Latvia", "LY": "Libya", "MA": "Morocco", "MC": "Monaco", "MD": "Moldova, Republic of", "ME": "Montenegro", "MF": "Saint Martin (French part)", "MG": "Madagascar", "MH": "Marshall Islands", "MK": "North Macedonia", "ML": "Mali", "MM": "Myanmar", "MN": "Mongolia", "MO": "Macau, China (SAR)", "MP": "Northern Mariana Islands", "MQ": "Martinique", "MR": "Mauritania", "MS": "Montserrat", "MT": "Malta", "MU": "Mauritius", "MV": "Maldives", "MW": "Malawi", "MX": "Mexico", "MY": "Malaysia", "MZ": "Mozambique", "NA": "Namibia", "NC": "New Caledonia", "NE": "Niger", "NF": "Norfolk Island", "NG": "Nigeria", "NI": "Nicaragua", "NL": "Netherlands", "NO": "Norway", "NP": "Nepal", "NR": "Nauru", "NU": "Niue", "NZ": "New Zealand", "OM": "Oman", "PA": "Panama", "PE": "Peru", "PF": "French Polynesia", "PG": "Papua New Guinea", "PH": "Philippines", "PK": "Pakistan", "PL": "Poland", "PM": "Saint Pierre and Miquelon", "PN": "Pitcairn Islands", "PR": "Puerto Rico", "PS": "Palestine, State of (Gaza Strip)", "PT": "Portugal", "PW": "Palau", "PY": "Paraguay", "QA": "Qatar", "RE": "R\xE9union", "RO": "Romania", "RS": "Serbia", "RU": "Russian Federation (Russia)", "RW": "Rwanda", "SA": "Saudi Arabia", "SB": "Solomon Islands", "SC": "Seychelles", "SD": "Sudan", "SE": "Sweden", "SG": "Singapore", "SH": "Saint Helena, Ascension and Tristan da Cunha", "SI": "Slovenia", "SJ": "Svalbard and Jan Mayen Islands", "SK": "Slovakia", "SL": "Sierra Leone", "SM": "San Marino", "SN": "Senegal", "SO": "Somalia", "SR": "Suriname", "SS": "South Sudan", "ST": "S\xE3o Tom\xE9 and Pr\xEDncipe", "SV": "El Salvador", "SX": "Sint Maarten (Dutch part)", "SY": "Syrian Arab Republic", "SZ": "Eswatini", "TC": "Turks and Caicos Islands", "TD": "Chad", "TF": "French Southern Territories", "TG": "Togo", "TH": "Thailand", "TJ": "Tajikistan", "TK": "Tokelau", "TL": "Timor-Leste (East Timor)", "TM": "Turkmenistan", "TN": "Tunisia", "TO": "Tonga", "TR": "Turkey", "TT": "Trinidad and Tobago", "TV": "Tuvalu", "TW": "Taiwan, Province of China", "TZ": "Tanzania, United Republic of", "UA": "Ukraine", "UG": "Uganda", "UM": "United States Minor Outlying Islands", "US": "United States", "UY": "Uruguay", "UZ": "Uzbekistan", "VA": "Holy See (Vatican City State)", "VC": "Saint Vincent and the Grenadines", "VE": "Venezuela, Bolivarian Republic of", "VG": "British Virgin Islands", "VI": "United States Virgin Islands", "VN": "Viet Nam", "VU": "Vanuatu", "WF": "Wallis and Futuna", "WS": "Samoa", "XK": "Kosovo", "YE": "Yemen", "YT": "Mayotte", "ZA": "South Africa", "ZM": "Zambia", "ZW": "Zimbabwe" }; // src/detect-text-lang-eld.ts var import_eld = require("eld"); var import_iso_639_3 = require("iso-639-3"); var _CountryCodes = Object.fromEntries(Object.entries(CountryCodes).map(([key, value]) => [key, value.split(",")[0]])); var isSubset = false; function dynamicLangSubset(langSubset) { if (langSubset?.length) { isSubset = true; import_eld.eld.dynamicLangSubset(langSubset); } else if (isSubset) { isSubset = false; import_eld.eld.dynamicLangSubset(false); } } function detectTextLanguage(text, options = {}) { dynamicLangSubset(options.langSubset); const result = import_eld.eld.detect(text); if (result.isReliable()) { const threshold = options.threshold ?? 0.1; const scores = result.getScores(); const lang = result.language; if (scores[lang] >= threshold) { if (!options.isoCode) { return getLanguageFromIso6391(lang); } return lang; } } } function detectTextLangEx(text, options) { dynamicLangSubset(options?.langSubset); let result; const langInfo = import_eld.eld.detect(text); if (langInfo.isReliable()) { const threshold = options?.threshold ?? 0.1; const scores = langInfo.getScores(); const iso6391 = langInfo.language; if (scores[iso6391] >= threshold) { result = { iso6391, scores: langInfo.getScores() }; const countryCode = getCountryCodeFromLang(iso6391); if (countryCode) { result.iso3166 = countryCode; const countryName = CountryNames[countryCode]; result.country = countryName; } const info = import_iso_639_3.iso6393.find((i) => i.iso6391 === iso6391); if (info?.name) { result.name = info.name; } } } return result; } function getCountryCodeFromLang(iso6391) { for (const [key, value] of Object.entries(_CountryCodes)) { if (value === iso6391 || value.startsWith(iso6391 + "-")) { return key; } } } function getLanguageFromIso6391(iso6391) { const info = import_iso_639_3.iso6393.find((i) => i.iso6391 === iso6391); return info?.name; } // Annotate the CommonJS export names for ESM import in node: 0 && (module.exports = { CountryCodes, CountryNames, detectTextLangEx, detectTextLanguage, getCountryCodeFromLang, getLanguageFromIso6391 });