@fluent/langneg
Version:
Language Negotiation API for Fluent
153 lines (152 loc) • 4.66 kB
JavaScript
/* eslint no-magic-numbers: 0 */
const languageCodeRe = "([a-z]{2,3}|\\*)";
const scriptCodeRe = "(?:-([a-z]{4}|\\*))";
const regionCodeRe = "(?:-([a-z]{2}|\\*))";
const variantCodeRe = "(?:-(([0-9][a-z0-9]{3}|[a-z0-9]{5,8})|\\*))";
/**
* Regular expression splitting locale id into four pieces:
*
* Example: `en-Latn-US-macos`
*
* language: en
* script: Latn
* region: US
* variant: macos
*
* It can also accept a range `*` character on any position.
*/
const localeRe = new RegExp(`^${languageCodeRe}${scriptCodeRe}?${regionCodeRe}?${variantCodeRe}?$`, "i");
export class Locale {
/**
* Parses a locale id using the localeRe into an array with four elements.
*
* If the second argument `range` is set to true, it places range `*` char
* in place of any missing piece.
*
* It also allows skipping the script section of the id, so `en-US` is
* properly parsed as `en-*-US-*`.
*/
constructor(locale) {
const result = localeRe.exec(locale.replace(/_/g, "-"));
if (!result) {
this.isWellFormed = false;
return;
}
let [, language, script, region, variant] = result;
if (language) {
this.language = language.toLowerCase();
}
if (script) {
this.script = script[0].toUpperCase() + script.slice(1);
}
if (region) {
this.region = region.toUpperCase();
}
this.variant = variant;
this.isWellFormed = true;
}
isEqual(other) {
return (this.language === other.language &&
this.script === other.script &&
this.region === other.region &&
this.variant === other.variant);
}
matches(other, thisRange = false, otherRange = false) {
return ((this.language === other.language ||
(thisRange && this.language === undefined) ||
(otherRange && other.language === undefined)) &&
(this.script === other.script ||
(thisRange && this.script === undefined) ||
(otherRange && other.script === undefined)) &&
(this.region === other.region ||
(thisRange && this.region === undefined) ||
(otherRange && other.region === undefined)) &&
(this.variant === other.variant ||
(thisRange && this.variant === undefined) ||
(otherRange && other.variant === undefined)));
}
toString() {
return [this.language, this.script, this.region, this.variant]
.filter(part => part !== undefined)
.join("-");
}
clearVariants() {
this.variant = undefined;
}
clearRegion() {
this.region = undefined;
}
addLikelySubtags() {
const newLocale = getLikelySubtagsMin(this.toString().toLowerCase());
if (newLocale) {
this.language = newLocale.language;
this.script = newLocale.script;
this.region = newLocale.region;
this.variant = newLocale.variant;
return true;
}
return false;
}
}
/**
* Below is a manually a list of likely subtags corresponding to Unicode
* CLDR likelySubtags list.
* This list is curated by the maintainers of Project Fluent and is
* intended to be used in place of the full likelySubtags list in use cases
* where full list cannot be (for example, due to the size).
*
* This version of the list is based on CLDR 30.0.3.
*/
const likelySubtagsMin = {
ar: "ar-arab-eg",
"az-arab": "az-arab-ir",
"az-ir": "az-arab-ir",
be: "be-cyrl-by",
da: "da-latn-dk",
el: "el-grek-gr",
en: "en-latn-us",
fa: "fa-arab-ir",
ja: "ja-jpan-jp",
ko: "ko-kore-kr",
pt: "pt-latn-br",
sr: "sr-cyrl-rs",
"sr-ru": "sr-latn-ru",
sv: "sv-latn-se",
ta: "ta-taml-in",
uk: "uk-cyrl-ua",
zh: "zh-hans-cn",
"zh-hant": "zh-hant-tw",
"zh-hk": "zh-hant-hk",
"zh-mo": "zh-hant-mo",
"zh-tw": "zh-hant-tw",
"zh-gb": "zh-hant-gb",
"zh-us": "zh-hant-us",
};
const regionMatchingLangs = [
"az",
"bg",
"cs",
"de",
"es",
"fi",
"fr",
"hu",
"it",
"lt",
"lv",
"nl",
"pl",
"ro",
"ru",
];
function getLikelySubtagsMin(loc) {
if (Object.prototype.hasOwnProperty.call(likelySubtagsMin, loc)) {
return new Locale(likelySubtagsMin[loc]);
}
const locale = new Locale(loc);
if (locale.language && regionMatchingLangs.includes(locale.language)) {
locale.region = locale.language.toUpperCase();
return locale;
}
return null;
}