@formatjs/ecma402-abstract
Version:
A collection of implementation for ECMAScript abstract operations
469 lines (468 loc) • 18.3 kB
JavaScript
import { Decimal } from "@formatjs/bigdecimal";
import { S_UNICODE_REGEX } from "../regex.generated.js";
import "../types/number.js";
import { getPowerOf10 } from "./decimal-cache.js";
import "../types/plural-rules.js";
import { digitMapping } from "./digit-mapping.generated.js";
import { GetUnsignedRoundingMode } from "./GetUnsignedRoundingMode.js";
import { ToRawFixed } from "./ToRawFixed.js";
// This is from: unicode-12.1.0/General_Category/Symbol/regex.js
// IE11 does not support unicode flag, otherwise this is just /\p{S}/u.
// /^\p{S}/u
const CARET_S_UNICODE_REGEX = new RegExp(`^${S_UNICODE_REGEX.source}`);
// /\p{S}$/u
const S_DOLLAR_UNICODE_REGEX = new RegExp(`${S_UNICODE_REGEX.source}$`);
const CLDR_NUMBER_PATTERN = /[#0](?:[.,][#0]+)*/g;
export default function formatToParts(numberResult, data, pl, options) {
const { sign, exponent, magnitude } = numberResult;
const { notation, style, numberingSystem } = options;
const defaultNumberingSystem = data.numbers.nu[0];
// #region Part 1: partition and interpolate the CLDR number pattern.
// ----------------------------------------------------------
let compactNumberPattern = null;
if (notation === "compact" && magnitude) {
compactNumberPattern = getCompactDisplayPattern(numberResult, pl, data, style, options.compactDisplay, options.currencyDisplay, numberingSystem);
}
// This is used multiple times
let nonNameCurrencyPart;
if (style === "currency" && options.currencyDisplay !== "name") {
const byCurrencyDisplay = data.currencies[options.currency];
if (byCurrencyDisplay) {
switch (options.currencyDisplay) {
case "code":
nonNameCurrencyPart = options.currency;
break;
case "symbol":
nonNameCurrencyPart = byCurrencyDisplay.symbol;
break;
default:
nonNameCurrencyPart = byCurrencyDisplay.narrow;
break;
}
} else {
// Fallback for unknown currency
nonNameCurrencyPart = options.currency;
}
}
let numberPattern;
if (!compactNumberPattern) {
// Note: if the style is unit, or is currency and the currency display is name,
// its unit parts will be interpolated in part 2. So here we can fallback to decimal.
if (style === "decimal" || style === "unit" || style === "currency" && options.currencyDisplay === "name") {
// Shortcut for decimal
const decimalData = data.numbers.decimal[numberingSystem] || data.numbers.decimal[defaultNumberingSystem];
numberPattern = getPatternForSign(decimalData.standard, sign);
} else if (style === "currency") {
const currencyData = data.numbers.currency[numberingSystem] || data.numbers.currency[defaultNumberingSystem];
// We replace number pattern part with `0` for easier postprocessing.
numberPattern = getPatternForSign(currencyData[options.currencySign], sign);
} else {
// percent
const percentPattern = data.numbers.percent[numberingSystem] || data.numbers.percent[defaultNumberingSystem];
numberPattern = getPatternForSign(percentPattern, sign);
}
} else {
numberPattern = compactNumberPattern;
}
// Extract the decimal number pattern string. It looks like "#,##0,00", which will later be
// used to infer decimal group sizes.
const decimalNumberPattern = CLDR_NUMBER_PATTERN.exec(numberPattern)[0];
// Now we start to substitute patterns
// 1. replace strings like `0` and `#,##0.00` with `{0}`
// 2. unquote characters (invariant: the quoted characters does not contain the special tokens)
numberPattern = numberPattern.replace(CLDR_NUMBER_PATTERN, "{0}").replace(/'(.)'/g, "$1");
// Handle currency spacing (both compact and non-compact).
if (style === "currency" && options.currencyDisplay !== "name") {
const currencyData = data.numbers.currency[numberingSystem] || data.numbers.currency[defaultNumberingSystem];
// See `currencySpacing` substitution rule in TR-35.
// Here we always assume the currencyMatch is "[:^S:]" and surroundingMatch is "[:digit:]".
//
// Example 1: for pattern "#,##0.00¤" with symbol "US$", we replace "¤" with the symbol,
// but insert an extra non-break space before the symbol, because "[:^S:]" matches "U" in
// "US$" and "[:digit:]" matches the latn numbering system digits.
//
// Example 2: for pattern "¤#,##0.00" with symbol "US$", there is no spacing between symbol
// and number, because `$` does not match "[:^S:]".
//
// Implementation note: here we do the best effort to infer the insertion.
// We also assume that `beforeInsertBetween` and `afterInsertBetween` will never be `;`.
const afterCurrency = currencyData.currencySpacing.afterInsertBetween;
if (afterCurrency && !S_DOLLAR_UNICODE_REGEX.test(nonNameCurrencyPart)) {
numberPattern = numberPattern.replace("¤{0}", `¤${afterCurrency}{0}`);
}
const beforeCurrency = currencyData.currencySpacing.beforeInsertBetween;
if (beforeCurrency && !CARET_S_UNICODE_REGEX.test(nonNameCurrencyPart)) {
numberPattern = numberPattern.replace("{0}¤", `{0}${beforeCurrency}¤`);
}
}
// The following tokens are special: `{0}`, `¤`, `%`, `-`, `+`, `{c:...}.
const numberPatternParts = numberPattern.split(/({c:[^}]+}|\{0\}|[¤%\-+])/g);
const numberParts = [];
const symbols = data.numbers.symbols[numberingSystem] || data.numbers.symbols[defaultNumberingSystem];
for (const part of numberPatternParts) {
if (!part) {
continue;
}
switch (part) {
case "{0}": {
// We only need to handle scientific and engineering notation here.
numberParts.push(...partitionNumberIntoParts(
symbols,
numberResult,
notation,
exponent,
numberingSystem,
// If compact number pattern exists, do not insert group separators.
!compactNumberPattern && (options.useGrouping ?? true),
decimalNumberPattern,
style,
options.roundingIncrement,
GetUnsignedRoundingMode(options.roundingMode, sign === -1)
));
break;
}
case "-":
numberParts.push({
type: "minusSign",
value: symbols.minusSign
});
break;
case "+":
numberParts.push({
type: "plusSign",
value: symbols.plusSign
});
break;
case "%":
numberParts.push({
type: "percentSign",
value: symbols.percentSign
});
break;
case "¤":
// Computed above when handling currency spacing.
numberParts.push({
type: "currency",
value: nonNameCurrencyPart
});
break;
default:
if (part.startsWith("{c:")) {
numberParts.push({
type: "compact",
value: part.substring(3, part.length - 1)
});
} else {
// literal
numberParts.push({
type: "literal",
value: part
});
}
break;
}
}
// #endregion
// #region Part 2: interpolate unit pattern if necessary.
// ----------------------------------------------
switch (style) {
case "currency": {
// `currencyDisplay: 'name'` has similar pattern handling as units.
if (options.currencyDisplay === "name") {
const unitPattern = (data.numbers.currency[numberingSystem] || data.numbers.currency[defaultNumberingSystem]).unitPattern;
// Select plural
let unitName;
const currencyNameData = data.currencies[options.currency];
if (currencyNameData) {
unitName = selectPlural(pl, numberResult.roundedNumber.times(getPowerOf10(exponent)).toNumber(), currencyNameData.displayName);
} else {
// Fallback for unknown currency
unitName = options.currency;
}
// Do {0} and {1} substitution
const unitPatternParts = unitPattern.split(/(\{[01]\})/g);
const result = [];
for (const part of unitPatternParts) {
switch (part) {
case "{0}":
result.push(...numberParts);
break;
case "{1}":
result.push({
type: "currency",
value: unitName
});
break;
default:
if (part) {
result.push({
type: "literal",
value: part
});
}
break;
}
}
return result;
} else {
return numberParts;
}
}
case "unit": {
const { unit, unitDisplay } = options;
let unitData = data.units.simple[unit];
let unitPattern;
if (unitData) {
// Simple unit pattern
unitPattern = selectPlural(pl, numberResult.roundedNumber.times(getPowerOf10(exponent)).toNumber(), data.units.simple[unit][unitDisplay]);
} else {
// See: http://unicode.org/reports/tr35/tr35-general.html#perUnitPatterns
// If cannot find unit in the simple pattern, it must be "per" compound pattern.
// Implementation note: we are not following TR-35 here because we need to format to parts!
const [numeratorUnit, denominatorUnit] = unit.split("-per-");
unitData = data.units.simple[numeratorUnit];
const numeratorUnitPattern = selectPlural(pl, numberResult.roundedNumber.times(getPowerOf10(exponent)).toNumber(), data.units.simple[numeratorUnit][unitDisplay]);
const perUnitPattern = data.units.simple[denominatorUnit].perUnit[unitDisplay];
if (perUnitPattern) {
// perUnitPattern exists, combine it with numeratorUnitPattern
unitPattern = perUnitPattern.replace("{0}", numeratorUnitPattern);
} else {
// get compoundUnit pattern (e.g. "{0} per {1}"), repalce {0} with numerator pattern and {1} with
// the denominator pattern in singular form.
const perPattern = data.units.compound.per[unitDisplay];
const denominatorPattern = selectPlural(pl, 1, data.units.simple[denominatorUnit][unitDisplay]);
unitPattern = unitPattern = perPattern.replace("{0}", numeratorUnitPattern).replace("{1}", denominatorPattern.replace("{0}", ""));
}
}
const result = [];
// We need spacing around "{0}" because they are not treated as "unit" parts, but "literal".
for (const part of unitPattern.split(/(\s*\{0\}\s*)/)) {
const interpolateMatch = /^(\s*)\{0\}(\s*)$/.exec(part);
if (interpolateMatch) {
// Space before "{0}"
if (interpolateMatch[1]) {
result.push({
type: "literal",
value: interpolateMatch[1]
});
}
// "{0}" itself
result.push(...numberParts);
// Space after "{0}"
if (interpolateMatch[2]) {
result.push({
type: "literal",
value: interpolateMatch[2]
});
}
} else if (part) {
result.push({
type: "unit",
value: part
});
}
}
return result;
}
default: return numberParts;
}
// #endregion
}
// A subset of https://tc39.es/ecma402/#sec-partitionnotationsubpattern
// Plus the exponent parts handling.
function partitionNumberIntoParts(symbols, numberResult, notation, exponent, numberingSystem, useGrouping, decimalNumberPattern, style, roundingIncrement, unsignedRoundingMode) {
const result = [];
// eslint-disable-next-line prefer-const
let { formattedString: n, roundedNumber: x } = numberResult;
if (x.isNaN()) {
return [{
type: "nan",
value: n
}];
} else if (!x.isFinite()) {
return [{
type: "infinity",
value: n
}];
}
const digitReplacementTable = digitMapping[numberingSystem];
if (digitReplacementTable) {
n = n.replace(/\d/g, (digit) => digitReplacementTable[+digit] || digit);
}
// TODO: Else use an implementation dependent algorithm to map n to the appropriate
// representation of n in the given numbering system.
const decimalSepIndex = n.indexOf(".");
let integer;
let fraction;
if (decimalSepIndex > 0) {
integer = n.slice(0, decimalSepIndex);
fraction = n.slice(decimalSepIndex + 1);
} else {
integer = n;
}
// #region Grouping integer digits
// The weird compact and x >= 10000 check is to ensure consistency with Node.js and Chrome.
// Note that `de` does not have compact form for thousands, but Node.js does not insert grouping separator
// unless the rounded number is greater than 10000:
// NumberFormat('de', {notation: 'compact', compactDisplay: 'short'}).format(1234) //=> "1234"
// NumberFormat('de').format(1234) //=> "1.234"
let shouldUseGrouping = false;
if (useGrouping === "always") {
shouldUseGrouping = true;
} else if (useGrouping === "min2") {
shouldUseGrouping = x.greaterThanOrEqualTo(1e4);
} else if (useGrouping === "auto" || useGrouping) {
shouldUseGrouping = notation !== "compact" || x.greaterThanOrEqualTo(1e4);
}
if (shouldUseGrouping) {
// a. Let groupSepSymbol be the implementation-, locale-, and numbering system-dependent (ILND) String representing the grouping separator.
// For currency we should use `currencyGroup` instead of generic `group`
const groupSepSymbol = style === "currency" && symbols.currencyGroup != null ? symbols.currencyGroup : symbols.group;
const groups = [];
// > There may be two different grouping sizes: The primary grouping size used for the least
// > significant integer group, and the secondary grouping size used for more significant groups.
// > If a pattern contains multiple grouping separators, the interval between the last one and the
// > end of the integer defines the primary grouping size, and the interval between the last two
// > defines the secondary grouping size. All others are ignored.
const integerNumberPattern = decimalNumberPattern.split(".")[0];
const patternGroups = integerNumberPattern.split(",");
let primaryGroupingSize = 3;
let secondaryGroupingSize = 3;
if (patternGroups.length > 1) {
primaryGroupingSize = patternGroups[patternGroups.length - 1].length;
}
if (patternGroups.length > 2) {
secondaryGroupingSize = patternGroups[patternGroups.length - 2].length;
}
let i = integer.length - primaryGroupingSize;
if (i > 0) {
// Slice the least significant integer group
groups.push(integer.slice(i, i + primaryGroupingSize));
// Then iteratively push the more signicant groups
// TODO: handle surrogate pairs in some numbering system digits
for (i -= secondaryGroupingSize; i > 0; i -= secondaryGroupingSize) {
groups.push(integer.slice(i, i + secondaryGroupingSize));
}
groups.push(integer.slice(0, i + secondaryGroupingSize));
} else {
groups.push(integer);
}
while (groups.length > 0) {
const integerGroup = groups.pop();
result.push({
type: "integer",
value: integerGroup
});
if (groups.length > 0) {
result.push({
type: "group",
value: groupSepSymbol
});
}
}
} else {
result.push({
type: "integer",
value: integer
});
}
// #endregion
if (fraction !== undefined) {
const decimalSepSymbol = style === "currency" && symbols.currencyDecimal != null ? symbols.currencyDecimal : symbols.decimal;
result.push({
type: "decimal",
value: decimalSepSymbol
}, {
type: "fraction",
value: fraction
});
}
if ((notation === "scientific" || notation === "engineering") && x.isFinite()) {
result.push({
type: "exponentSeparator",
value: symbols.exponential
});
if (exponent < 0) {
result.push({
type: "exponentMinusSign",
value: symbols.minusSign
});
exponent = -exponent;
}
const exponentResult = ToRawFixed(new Decimal(exponent), 0, 0, roundingIncrement, unsignedRoundingMode);
result.push({
type: "exponentInteger",
value: exponentResult.formattedString
});
}
return result;
}
function getPatternForSign(pattern, sign) {
if (pattern.indexOf(";") < 0) {
pattern = `${pattern};-${pattern}`;
}
const [zeroPattern, negativePattern] = pattern.split(";");
switch (sign) {
case 0: return zeroPattern;
case -1: return negativePattern;
default: return negativePattern.indexOf("-") >= 0 ? negativePattern.replace(/-/g, "+") : `+${zeroPattern}`;
}
}
// Find the CLDR pattern for compact notation based on the magnitude of data and style.
//
// Example return value: "¤ {c:laki}000;¤{c:laki} -0" (`sw` locale):
// - Notice the `{c:...}` token that wraps the compact literal.
// - The consecutive zeros are normalized to single zero to match CLDR_NUMBER_PATTERN.
//
// Returning null means the compact display pattern cannot be found.
function getCompactDisplayPattern(numberResult, pl, data, style, compactDisplay, currencyDisplay, numberingSystem) {
const { roundedNumber, sign, magnitude } = numberResult;
let magnitudeKey = String(10 ** magnitude);
const defaultNumberingSystem = data.numbers.nu[0];
let pattern;
if (style === "currency" && currencyDisplay !== "name") {
const byNumberingSystem = data.numbers.currency;
const currencyData = byNumberingSystem[numberingSystem] || byNumberingSystem[defaultNumberingSystem];
// NOTE: compact notation ignores currencySign!
let compactPluralRules = currencyData.short?.[magnitudeKey];
// GH #4236: If magnitude exceeds available patterns, use the largest available
if (!compactPluralRules) {
const thresholds = Object.keys(currencyData.short || {});
if (thresholds.length > 0 && magnitudeKey > thresholds[thresholds.length - 1]) {
magnitudeKey = thresholds[thresholds.length - 1];
compactPluralRules = currencyData.short?.[magnitudeKey];
}
}
if (!compactPluralRules) {
return null;
}
pattern = selectPlural(pl, roundedNumber.toNumber(), compactPluralRules);
} else {
const byNumberingSystem = data.numbers.decimal;
const byCompactDisplay = byNumberingSystem[numberingSystem] || byNumberingSystem[defaultNumberingSystem];
let compactPlaralRule = byCompactDisplay[compactDisplay][magnitudeKey];
// GH #4236: If magnitude exceeds available patterns, use the largest available
if (!compactPlaralRule) {
const thresholds = Object.keys(byCompactDisplay[compactDisplay]);
if (thresholds.length > 0 && magnitudeKey > thresholds[thresholds.length - 1]) {
magnitudeKey = thresholds[thresholds.length - 1];
compactPlaralRule = byCompactDisplay[compactDisplay][magnitudeKey];
}
}
if (!compactPlaralRule) {
return null;
}
pattern = selectPlural(pl, roundedNumber.toNumber(), compactPlaralRule);
}
// See https://unicode.org/reports/tr35/tr35-numbers.html#Compact_Number_Formats
// > If the value is precisely “0”, either explicit or defaulted, then the normal number format
// > pattern for that sort of object is supplied.
if (pattern === "0") {
return null;
}
pattern = getPatternForSign(pattern, sign).replace(/([^\s;\-+\d¤]+)/g, "{c:$1}").replace(/0+/, "0");
return pattern;
}
function selectPlural(pl, x, rules) {
return rules[pl.select(x)] || rules.other;
}