pwabuilder-lib
Version:
PWA Builder Core Library
79 lines (63 loc) • 1.99 kB
JavaScript
var cheerio = require('cheerio');
var lang = require('./lang');
var rtlLangs = [
'ar', //Arabic
'az', //Azerbaijani
'dv', //Divehi, Dhivehi, Maldivian
'fa', //Persian
'he', //Hebrew
'jv', //Javanese
'kk', //Kazakh
'ks', //Kashmiri
'ku', //Kurdish
'ml', //Malayalam
'ms', //Malay
'pa', //Panjabi, Punjabi
'ps', //Pushto, Pashto
'sd', //Sindhi
'so', //Somali
'tk', //Turkmen
'ug', //Uighur, Uyghur
'ur', //Urdu
'yi' //Yiddish
];
var direction = function(obj, callback) {
var html = obj.html;
var $ = cheerio.load(html);
// do the cheap check up front. If they declare it, we use it
var declaredDir = $('html[dir]').attr('dir');
if (declaredDir) {
callback(null, declaredDir);
}
else {
// if they did not declare a dir attr, we check to see if most of characters
// in the page's corpus are known RTL unicode characters
// we want the text of the body, which includes the text of <scripts>, which
// throws off the calculation since JS is latin chars. So we strip them out
var $body = $('body').clone();
$body.find('script').remove();
// strip out whitespace too
var text = $body.text().replace(/\s/g, '');
var rtlRegex = /[\u0590-\u083F]|[\u08A0-\u08FF]|[\uFB1D-\uFDFF]|[\uFE70-\uFEFF]/mg;
var rtlChars = text.match(rtlRegex);
var mostlyRtl = false;
if (rtlChars) {
mostlyRtl = rtlChars.length / text.length > 0.4;
}
if (mostlyRtl) {
callback(null, 'rtl');
}
else {
// if we aren't sure that it is RTL, we finally check the for the language
// of the page, then return the result based off of that country code
lang(obj, function(err, result) {
if (result) {
var dir = rtlLangs.indexOf(result) > -1 ? 'rtl' : 'ltr';
callback(err, dir);
}
});
}
}
};
module.exports = direction;
;