ilib-locale
Version:
A BCP-47/IETF locale specifier parser/validator
474 lines (435 loc) • 17.4 kB
JavaScript
/*
* Locale.js - Locale specifier definition
*
* Copyright © 2012-2015, 2018, 2021-2022, 2025-2026 JEDLSoft
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
*
* See the License for the specific language governing permissions and
* limitations under the License.
*/
// from http://en.wikipedia.org/wiki/ISO_3166-1
import { a2toa3regmap } from "./a2toa3regmap.js";
import { a1toa3langmap } from "./a1toa3langmap.js";
// the list below is originally from https://unicode.org/iso15924/iso15924-codes.html
import { iso15924 } from "./scripts.js";
import * as ilibEnv from "ilib-env";
/**
* Check if an object is a member of the given array. If this javascript engine
* support indexOf, it is used directly. Otherwise, this function implements it
* itself. The idea is to make sure that you can use the quick indexOf if it is
* available, but use a slower implementation in older engines as well.
*
* @private
* @param {Array.<Object|string|number>} array array to search
* @param {Object|string|number} obj object being sought. This should be of the same type as the
* members of the array being searched. If not, this function will not return
* any results.
* @return {number} index of the object in the array, or -1 if it is not in the array.
*/
function indexOf(array, obj) {
if (!array || !obj) {
return -1;
}
if (typeof(array.indexOf) === 'function') {
return array.indexOf(obj);
} else {
// polyfill
for (let i = 0; i < array.length; i++) {
if (array[i] === obj) {
return i;
}
}
return -1;
}
};
/**
* @class Represent a locale specifier instance.
* Locales are specified either with a specifier string
* that follows the BCP-47 convention (roughly: "language-region-script-variant") or
* with 4 parameters that specify the language, region, variant, and script individually.
*/
class Locale {
/**
* Create a new locale instance. Locales are specified either with a specifier string
* that follows the BCP-47 convention (roughly: "language-region-script-variant") or
* with 4 parameters that specify the language, region, variant, and script individually.<p>
*
* The language is given as an ISO 639-1 two-letter, lower-case language code. You
* can find a full list of these codes at
* <a href="http://en.wikipedia.org/wiki/List_of_ISO_639-1_codes">http://en.wikipedia.org/wiki/List_of_ISO_639-1_codes</a><p>
*
* The region is given as an ISO 3166-1 two-letter, upper-case region code. You can
* find a full list of these codes at
* <a href="http://en.wikipedia.org/wiki/ISO_3166-1_alpha-2">http://en.wikipedia.org/wiki/ISO_3166-1_alpha-2</a>.<p>
*
* The variant is any string that does not contain a dash which further differentiates
* locales from each other.<p>
*
* The script is given as the ISO 15924 four-letter script code. In some locales,
* text may be validly written in more than one script. For example, Serbian is often
* written in both Latin and Cyrillic, though not usually mixed together. You can find a
* full list of these codes at
* <a href="http://en.wikipedia.org/wiki/ISO_15924#List_of_codes">http://en.wikipedia.org/wiki/ISO_15924#List_of_codes</a>.<p>
*
* As an example in ilib, the script can be used in the date formatter. Dates formatted
* in Serbian could have day-of-week names or month names written in the Latin
* or Cyrillic script. Often one script is default such that sr-SR-Latn is the same
* as sr-SR so the script code "Latn" can be left off of the locale spec.<p>
*
* Each part is optional, and an empty string in the specifier before or after a
* dash or as a parameter to the constructor denotes an unspecified value. In this
* case, many of the ilib functions will treat the locale as generic. For example
* the locale "en-" is equivalent to "en" and to "en--" and denotes a locale
* of "English" with an unspecified region and variant, which typically matches
* any region or variant.<p>
*
* Without any arguments to the constructor, this function returns the locale of
* the host Javascript engine.<p>
*
*
* @constructor
* @param {?string|Locale=} language the ISO 639 2-letter code for the language, or a full
* locale spec in BCP-47 format, or another Locale instance to copy from
* @param {string=} region the ISO 3166 2-letter code for the region
* @param {string=} variant the name of the variant of this locale, if any
* @param {string=} script the ISO 15924 code of the script for this locale, if any
*/
constructor(language, region, variant, script) {
if (typeof(region) === 'undefined' && typeof(variant) === 'undefined' && typeof(script) === 'undefined') {
let spec = language || ilibEnv.getLocale();
if (typeof(spec) === 'string') {
const parts = spec.split(/[-_]/g);
for (let i = 0; i < parts.length; i++ ) {
// Check for BCP-47 extension singleton (single letter a-z or digit)
// or private use singleton "x". Everything from the singleton
// onwards becomes part of the variant.
if (Locale._isExtensionSingleton(parts[i]) && i < parts.length - 1) {
const extensionPart = parts.slice(i).join('-');
/**
* @private
* @type {string|undefined}
*/
this.variant = this.variant ? this.variant + '-' + extensionPart : extensionPart;
break;
} else if (Locale._isLanguageCode(parts[i])) {
/**
* @private
* @type {string|undefined}
*/
this.language = parts[i];
} else if (Locale._isRegionCode(parts[i])) {
/**
* @private
* @type {string|undefined}
*/
this.region = parts[i];
} else if (Locale._isScriptCode(parts[i])) {
/**
* @private
* @type {string|undefined}
*/
this.script = parts[i];
} else {
/**
* @private
* @type {string|undefined}
*/
// Append to existing variant instead of overwriting
this.variant = this.variant ? this.variant + '-' + parts[i] : parts[i];
}
}
this.language = this.language || undefined;
this.region = this.region || undefined;
this.script = this.script || undefined;
this.variant = this.variant || undefined;
} else if (typeof(spec) === 'object') {
this.language = spec.language || undefined;
this.region = spec.region || undefined;
this.script = spec.script || undefined;
this.variant = spec.variant || undefined;
}
} else {
if (language && typeof(language) === "string") {
language = language.trim();
this.language = language.length > 0 ? language.toLowerCase() : undefined;
} else {
this.language = undefined;
}
if (region && typeof(region) === "string") {
region = region.trim();
this.region = region.length > 0 ? region.toUpperCase() : undefined;
} else {
this.region = undefined;
}
if (variant && typeof(variant) === "string") {
variant = variant.trim();
this.variant = variant.length > 0 ? variant : undefined;
} else {
this.variant = undefined;
}
if (script && typeof(script) === "string") {
script = script.trim();
this.script = script.length > 0 ? script : undefined;
} else {
this.script = undefined;
}
}
this._genSpec();
}
/**
* @private
*/
_genSpec() {
this.spec = [this.language, this.script, this.region, this.variant].filter(part => part).join("-");
}
/**
* Return the ISO 639 language code for this locale.
* @return {string|undefined} the language code for this locale
*/
getLanguage() {
return this.language;
}
/**
* Return the language of this locale as an ISO-639-alpha3 language code
* @return {string|undefined} the alpha3 language code of this locale
*/
getLanguageAlpha3() {
return Locale.languageAlpha1ToAlpha3(this.language);
}
/**
* Return the ISO 3166 region code for this locale.
* @return {string|undefined} the region code of this locale
*/
getRegion() {
return this.region;
}
/**
* Return the region of this locale as an ISO-3166-alpha3 region code
* @return {string|undefined} the alpha3 region code of this locale
*/
getRegionAlpha3() {
return Locale.regionAlpha2ToAlpha3(this.region);
}
/**
* Return the ISO 15924 script code for this locale
* @return {string|undefined} the script code of this locale
*/
getScript() {
return this.script;
}
/**
* Return the variant code for this locale
* @return {string|undefined} the variant code of this locale, if any
*/
getVariant() {
return this.variant;
}
/**
* Return the whole locale specifier as a string.
* @return {string} the locale specifier
*/
getSpec() {
if (!this.spec) this._genSpec();
return this.spec;
}
/**
* Return the language locale specifier. This includes the
* language and the script if it is available. This can be
* used to see whether the written language of two locales
* match each other regardless of the region or variant.
*
* @return {string} the language locale specifier
*/
getLangSpec() {
var spec = this.language;
if (spec && this.script) {
spec += "-" + this.script;
}
return spec || "";
}
/**
* Express this locale object as a string. Currently, this simply calls the getSpec
* function to represent the locale as its specifier.
*
* @return {string} the locale specifier
*/
toString() {
return this.getSpec();
}
/**
* Return true if the the other locale is exactly equal to the current one.
* @return {boolean} whether or not the other locale is equal to the current one
*/
equals(other) {
return this.language === other.language &&
this.region === other.region &&
this.script === other.script &&
this.variant === other.variant;
}
/**
* Return true if the current locale uses a valid ISO codes for each component
* of the locale that exists.
* @return {boolean} true if the current locale has all valid components, and
* false otherwise.
*/
isValid() {
if (!this.language && !this.script && !this.region) return false;
return !!((!this.language || (Locale._isLanguageCode(this.language) && Locale.a1toa3langmap[this.language])) &&
(!this.script || (Locale._isScriptCode(this.script) && Locale.iso15924.indexOf(this.script) > -1)) &&
(!this.region || (Locale._isRegionCode(this.region) && Locale.a2toa3regmap[this.region])));
}
};
// from http://en.wikipedia.org/wiki/ISO_3166-1
Locale.a2toa3regmap = a2toa3regmap;
Locale.a1toa3langmap = a1toa3langmap;
// the list below is originally from https://unicode.org/iso15924/iso15924-codes.html
Locale.iso15924 = iso15924.scripts;
/**
* Tell whether or not the str does not start with a lower case ASCII char.
* @private
* @param {string} str the char to check
* @return {boolean} true if the char is not a lower case ASCII char
*/
Locale._notLower = function(str) {
// do this with ASCII only so we don't have to depend on the CType functions
const ch = str.charCodeAt(0);
return ch < 97 || ch > 122;
};
/**
* Tell whether or not the str does not start with an upper case ASCII char.
* @private
* @param {string} str the char to check
* @return {boolean} true if the char is a not an upper case ASCII char
*/
Locale._notUpper = function(str) {
// do this with ASCII only so we don't have to depend on the CType functions
const ch = str.charCodeAt(0);
return ch < 65 || ch > 90;
};
/**
* Tell whether or not the str does not start with a digit char.
* @private
* @param {string} str the char to check
* @return {boolean} true if the char is a not an upper case ASCII char
*/
Locale._notDigit = function(str) {
// do this with ASCII only so we don't have to depend on the CType functions
const ch = str.charCodeAt(0);
return ch < 48 || ch > 57;
};
/**
* Tell whether or not the given string has the correct syntax to be
* an ISO 639 language code.
*
* @private
* @param {string} str the string to parse
* @return {boolean} true if the string could syntactically be a language code.
*/
Locale._isLanguageCode = function(str) {
if (typeof(str) === 'undefined' || str.length < 2 || str.length > 3) {
return false;
}
for (let i = 0; i < str.length; i++) {
if (Locale._notLower(str.charAt(i))) {
return false;
}
}
return true;
};
/**
* Tell whether or not the given string has the correct syntax to be
* an ISO 3166 2-letter region code or M.49 3-digit region code.
*
* @private
* @param {string} str the string to parse
* @return {boolean} true if the string could syntactically be a language code.
*/
Locale._isRegionCode = function (str) {
let i;
if (typeof(str) === 'undefined' || str.length < 2 || str.length > 3) {
return false;
}
if (str.length === 2) {
for (i = 0; i < str.length; i++) {
if (Locale._notUpper(str.charAt(i))) {
return false;
}
}
} else {
for (i = 0; i < str.length; i++) {
if (Locale._notDigit(str.charAt(i))) {
return false;
}
}
}
return true;
};
/**
* Tell whether or not the given string has the correct syntax to be
* an ISO 639 language code.
*
* @private
* @param {string} str the string to parse
* @return {boolean} true if the string could syntactically be a language code.
*/
Locale._isScriptCode = function(str) {
if (typeof(str) === 'undefined' || str.length !== 4 || Locale._notUpper(str.charAt(0))) {
return false;
}
for (let i = 1; i < 4; i++) {
if (Locale._notLower(str.charAt(i))) {
return false;
}
}
return true;
};
/**
* Tell whether or not the given string is a BCP-47 extension singleton.
* Extension singletons are single lowercase letters (a-z) that introduce
* extension subtags. This includes 'x' for private use and other letters
* like 'u' for Unicode locale extensions and 't' for transformed content.
*
* @private
* @param {string} str the string to check
* @return {boolean} true if the string is an extension singleton.
*/
Locale._isExtensionSingleton = function(str) {
if (typeof(str) === 'undefined' || str.length !== 1) {
return false;
}
return !Locale._notLower(str.charAt(0));
};
/**
* Return the ISO-3166 alpha3 equivalent region code for the given ISO 3166 alpha2
* region code. If the given alpha2 code is not found, this function returns its
* argument unchanged.
* @static
* @param {string|undefined} alpha2 the alpha2 code to map
* @return {string|undefined} the alpha3 equivalent of the given alpha2 code, or the alpha2
* parameter if the alpha2 value is not found
*/
Locale.regionAlpha2ToAlpha3 = function(alpha2) {
return Locale.a2toa3regmap[alpha2] || alpha2;
};
/**
* Return the ISO-639 alpha3 equivalent language code for the given ISO 639 alpha1
* language code. If the given alpha1 code is not found, this function returns its
* argument unchanged.
* @static
* @param {string|undefined} alpha1 the alpha1 code to map
* @return {string|undefined} the alpha3 equivalent of the given alpha1 code, or the alpha1
* parameter if the alpha1 value is not found
*/
Locale.languageAlpha1ToAlpha3 = function(alpha1) {
return Locale.a1toa3langmap[alpha1] || alpha1;
};
export default Locale;