which-lang
Version:
the best option for predicting a language
38 lines (35 loc) • 2.33 kB
JavaScript
import path from "path";
import fast_text from "fast-text";
let { Classifier } = fast_text;
var reliabilityList = ["th","ml","my","ta","te","pa","am","kn","gu","si","bo","dv","ja","el","he","ko","hy","bn","mr","en","zh","tr","ru","de","ug","vi","eo","ka","hi","it","ar","fr","hu","lo","pl","km","es","fi","pt","mk","uk","ur","nl","lt","cs"];
var languageIsoCodes = ['af','als','am','an','ar','arz','as','ast','av','az','azb','ba','bar','bcl','be','bg','bh','bn','bo','bpy','br','bs','bxr','ca','cbk','ce','ceb','ckb','co','cs','cv','cy','da','de','diq','dsb','dty','dv','el','eml','en','eo','es','et','eu','fa','fi','fr','frr','fy','ga','gd','gl','gn','gom','gu','gv','he','hi','hif','hr','hsb','ht','hu','hy','ia','id','ie','ilo','io','is','it','ja','jbo','jv','ka','kk','km','kn','ko','krc','ku','kv','kw','ky','la','lb','lez','li','lmo','lo','lrc','lt','lv','mai','mg','mhr','min','mk','ml','mn','mr','mrj','ms','mt','mwl','my','myv','mzn','nah','nap','nds','ne','new','nl','nn','no','oc','or','os','pa','pam','pfl','pl','pms','pnb','ps','pt','qu','rm','ro','ru','rue','sa','sah','sc','scn','sco','sd','sh','si','sk','sl','so','sq','sr','su','sv','sw','ta','te','tg','th','tk','tl','tr','tt','tyv','ug','uk','ur','uz','vec','vep','vi','vls','vo','wa','war','wuu','xal','xmf','yi','yo','yue','zh'];
export default class LanguageDetection {
constructor(options = {}) {
const {
model = path.resolve("./model/fast-text-lid-model.bin"),
} = options;
this.classifier = new Classifier(model);
this.languageIsoCodes = languageIsoCodes;
}
formatInput(text) {
return text.replace(/[\n:]/g, "");
}
predict(text, k = 1) {
return new Promise((resolve, reject) => {
this.classifier.predict(this.formatInput(text), k, (err, res) => {
if (err) reject(err);
else {
res = res.map((item) => {
const lang = item.label.replace(/^__label__/, "");
return {
lang,
prob: item.value,
isReliableLanguage: reliabilityList.includes(lang),
};
});
resolve(res);
}
});
});
}
}