numfmt
Version:
Full Excel style number formatting
710 lines (686 loc) • 21.6 kB
JavaScript
/* eslint-disable array-element-newline */
import { currencySymbols, reCurrencySymbols } from './constants.js';
import { defaultLocale, getLocale } from './locale.js';
/**
* @typedef {object} ParseData
* @property {number | boolean} v - the value
* @property {string} [z] - number format pattern
*/
/*
This is a list of the allowed date formats. The test file contains
the full list of permuations and the resulting values and formats.
Legend:
"-" - Date separator (any of "/" | "-" | " " | "."⁽¹⁾ | ", "⁽²⁾)
" " - Whitespace
"j" - Day without leading zero (1-31)
"d" - Day with leading zero (00-31)
"D" - Abbreviated day name ("Sun"-"Sat")
"l" - Full day name ("Sunday"-"Saturday")
"n" - Month without leading zero (1-12)
"m" - Month with leading zero (01-12)
"F" - Full month name ("January"-"December")
"M" - Abbreviated month name ("Jan"-"Dec")
"y" - Year without century (00-99)
"Y" - Year of our lord (1900-9999)
"x" - Time of day (all formats: "10 PM", "10:11:12", ...)
"!" - Only use in "date-first" locales: 4.2.2000 = 4 feb.
"?" - Only use in "month-first" locales: 2.4.2000 = 4 feb.
¹ Only considered valid if there are three or more sections to the date.
² Comma is only allowed if followed by a space.
Time is appended to each of these as they are inserted into the
collection of valid dates below.
*/
const okDateFormats = [
// day-month-year
'!d-m-y', '!d-m-Y', '!j-m-y', '!j-m-Y',
'!d-n-y', '!d-n-Y', '!j-n-y', '!j-n-Y',
// month-day-year
'?m-d-y', '?m-d-Y', '?m-j-y', '?m-j-Y',
'?n-d-y', '?n-d-Y', '?n-j-y', '?n-j-Y',
// unab
'd-M-y', 'd-M-Y', 'j-M-y', 'j-M-Y',
'M-d-y', 'M-d-Y', 'M-j-y', 'M-j-Y',
'd-F-y', 'd-F-Y', 'F-d-y', 'F-d-Y',
'F-j-y', 'F-j-Y', 'j-F-y', 'j-F-Y',
'y-F-d', 'y-F-j', 'y-M-d', 'y-M-j',
'Y-F-d', 'Y-F-j', 'Y-M-d', 'Y-m-d',
'Y-M-j', 'Y-m-j', 'Y-n-d', 'Y-n-j',
'j-F', // 2-April
'j-M', // 2-Apr
'd-F', // 02-April
'd-M', // 02-Apr
'n-d', // 4-02
'n-j', // 4-2
'n-Y', // 4-1908
'm-d', // 04-02
'm-j', // 04-2
'm-Y', // 04-1908
'M-Y', // Apr-1908
'M-y', // Apr-08
'F-y', // April-08
'F-Y', // April-1908
'Y-M', // 1908-Apr
'Y-n', // 1908-4
'Y-m', // 1908-04
'Y-F', // 1908-April
'Y-M' // 1908-Apr
];
// letter to excel
const tx0 = { j: 'd', d: 'd', D: 'ddd', l: 'dddd', n: 'm', m: 'm', M: 'mmm', F: 'mmmm', y: 'yy', Y: 'yyyy' };
const tx00 = { j: 'dd', d: 'dd', D: 'ddd', l: 'dddd', n: 'mm', m: 'mm', M: 'mmm', F: 'mmmm', y: 'yy', Y: 'yyyy' };
// date formats are stored as a token-tree in a trie
// for minimal looping and branching while parsing
const dateTrieDM = {};
const dateTrieMD = {};
function packDate (f, node, allowType = 1) {
if (f) {
const char = f[0];
const next = f.slice(1);
if (char === '!') {
packDate(next, node, 4);
}
else if (char === '?') {
packDate(next, node, 2);
}
else {
node[char] = node[char] || {};
packDate(next, node[char], allowType);
}
}
else {
node.$ = allowType;
}
}
function addFormatToTrie (fmt, trie) {
// add date to token tree
packDate(fmt, trie);
// add a variant of the date with time suffixed
// Excel allows time first, but Sheets and GRID do not
packDate(fmt + ' x', trie);
// add a variant of the date with weekdays pre-/suffixed
packDate(fmt + ' l', trie);
packDate(fmt + ' l x', trie);
packDate('l ' + fmt, trie);
packDate('l ' + fmt + ' x', trie);
packDate(fmt + ' D', trie);
packDate(fmt + ' D x', trie);
packDate('D ' + fmt, trie);
packDate('D ' + fmt + ' x', trie);
}
okDateFormats.forEach(fmt => {
(fmt[0] !== '?') && addFormatToTrie(fmt, dateTrieDM);
(fmt[0] !== '!') && addFormatToTrie(fmt, dateTrieMD);
});
const currentYear = new Date().getUTCFullYear();
const PT = '.';
const CM = ',';
const SP = ' ';
const NS = ' ';
const NN = ' ';
const AP = "'";
const AG = '٬';
const dec2group = {
'.': [ CM, NS, NN, AP, AG ],
',': [ PT, NS, NN, AP, AG ],
'٫': [ PT, NS, NN, AP, AG ]
};
const isDigit = d => d && d.length === 1 && d >= '0' && d <= '9';
/**
* Parse a numeric string input and return its value and format. If the input
* was not recognized or valid, the function returns a `null`, for valid input
* it returns an object with two properties:
*
* * `v`: the parsed value.
* * `z`: the number format of the input (if applicable).
*
* @see parseValue
* @param {string} value The number to parse
* @param {object} [options={}] Options
* @param {string} [options.locale=""]
* A BCP 47 string tag. Locale default is english with a `\u00a0`
* grouping symbol (see [addLocale](#addLocale))
* @returns {ParseData | null} An object of the parsed value and a corresponding format string
*/
export function parseNumber (value, options = {}) {
const l10n = getLocale(options.locale || '') || defaultLocale;
// we base everything on the decimal separator
const dec = l10n.decimal;
// base allowed grouping chars on decimal
const grp = [ ...(dec2group[dec] || [ AP, AG ]) ];
if (!grp.includes(l10n.group) && l10n.group !== SP && l10n.group !== dec) {
grp.push(l10n.group);
}
let num = '';
let exp = '';
let sign = 1;
let format = '';
let minus = false;
let openParen = false;
let closeParen = false;
let percent = false;
let currency = false;
let currencySymbol = null;
let currencyTrailing = false;
let i = 0;
// prefix
const prefixChars = [ SP, NS, NN, '+', '%', '(', '-' ].concat(currencySymbols);
while (prefixChars.includes(value[i])) {
const char = value[i];
if (char === '-') {
if (minus || openParen) { return null; }
minus = true;
sign = -1;
}
else if (reCurrencySymbols.test(char)) {
if (currency) { return null; }
currency = true;
currencySymbol = char;
}
else if (char === '(') {
if (openParen || minus) { return null; }
openParen = true;
sign = -1;
}
else if (char === '%') {
if (percent) { return null; }
percent = true;
}
i++;
}
// number
let haveDecimal = false;
let g;
if (value[i] === dec || isDigit(value[i])) {
while (i < value.length) {
const ch = value[i];
// can maybe allow space as the grouping operator if we find that it is
// immediately followed by a digit or decimal?
if (!g && grp.includes(ch)) {
g = ch;
// skip
}
else if (g && g === ch) {
// skip
}
else if (ch === dec) {
if (haveDecimal) {
break;
}
num += '.';
haveDecimal = true;
}
else if (isDigit(ch)) {
num += ch;
}
else {
break;
}
i++;
}
}
// exponent
if (value[i] === 'e' || value[i] === 'E') {
exp += value[i];
i++;
if (value[i] === '+' || value[i] === '-') {
exp += value[i];
i++;
}
const d = i;
while (isDigit(value[i])) {
exp += value[i];
i++;
}
if (d === i) {
// contains no digits
return null;
}
}
// suffix
const suffixChars = [ SP, NS, NN, '%', '$', ')' ].concat(currencySymbols);
while (suffixChars.includes(value[i])) {
const char = value[i];
// only 1 occurance of these is allowed
if (reCurrencySymbols.test(char)) {
if (currency) { return null; }
currency = true;
currencySymbol = char;
currencyTrailing = true;
}
else if (char === ')') {
if (closeParen || !openParen) { return null; }
closeParen = true;
}
else if (char === '%') {
if (percent) { return null; }
percent = true;
}
i++;
}
if (i !== value.length) {
return null;
}
// is number ok?
let numberValue = parseFloat(num + exp);
if (!isFinite(numberValue)) {
return null;
}
if (exp) {
if (percent || currency) {
return null;
}
// allow parens and minus, but not %$
format = '0.00E+00';
}
else if (percent) {
if (currency) {
// Sheets allows this: $123% => $1.23 (Excel does not)
return null;
}
// numpart dictates how "deep" the format is: "0" vs "0.00"
format = num.includes('.')
? '0.00%'
: '0%';
numberValue *= 0.01;
}
else if (currency) {
// numpart dictates how "deep" the format is: "0" vs "0.00"
const currencyFormat = num.includes('.')
? '#,##0.00'
: '#,##0';
if (currencyTrailing) {
format = currencyFormat + currencySymbol;
}
else {
format = currencySymbol + currencyFormat;
}
}
else if (g) {
format = num.includes('.')
? '#,##0.00'
: '#,##0';
}
// we may want to lower the fidelity of the number: +num.toFixed(13)
const ret = { v: numberValue * sign };
if (format) {
ret.z = format;
}
return ret;
}
export function isValidDate (y, m, d) {
// day can't be 0
if (d < 1) {
return false;
}
// month must be 1-12
if (m < 1 || m > 12) {
return false;
}
// february
if (m === 2) {
const isLeapYear = (((y % 4 === 0) && (y % 100 !== 0)) || (y % 400 === 0));
// 1900 is a leap year in Excel
const febDays = (isLeapYear || y === 1900) ? 29 : 28;
if (d > febDays) {
return false;
}
}
// test any other month
else if (
((m === 4 || m === 6 || m === 9 || m === 11) && d > 30) ||
((m === 1 || m === 3 || m === 5 || m === 7 || m === 8 || m === 10 || m === 12) && d > 31)) {
return false;
}
return true;
}
// should really match { ’' } and all whitespace
const matchRec = (str, data, skipPeriod = false) => {
for (const item of data) {
if (str.startsWith(item[0])) {
// if the match is followed by a "." we'll skip it if the abbr. is by
// convention abbreviated in the locale.
let l = item[0].length;
if (skipPeriod && (item[2] === 'D' || item[2] === 'M') && str[l] === '.') {
l++;
}
return [ str.slice(0, l), item ];
}
}
return [ '', null ];
};
const nextToken = (str, node, data, lData) => {
const path = data.path || '';
const matchOrder = Object.keys(node);
for (let i = 0; i < matchOrder.length; i++) {
let r;
const t = matchOrder[i];
if (!node[t]) {
continue;
}
if (t === '$' || t === '€') {
// if string is done, then we can return
if (!str) {
r = data;
}
}
else if (t === '-') {
const m = /^(\s*([./-]|,\s)\s*|\s+)/.exec(str);
if (m) {
const sep = (m[1] === '-' || m[1] === '/' || m[1] === '.') ? m[1] : ' ';
// don't allow mixing date separators
if (!data.sep || data.sep === sep) {
const s = m[0].replace(/\s+/g, ' ');
r = nextToken(str.slice(m[0].length), node[t], { ...data, sep, path: path + s }, lData);
}
}
}
else if (t === ' ') {
const m = /^[,.]?\s+/.exec(str);
if (m) {
const s = m[0].replace(/\s+/g, ' ');
r = nextToken(str.slice(m[0].length), node[t], { ...data, path: path + s }, lData);
}
}
else if (t === 'j' || t === 'd') {
const m = /^(0?[1-9]|1\d|2\d|3[01])\b/.exec(str);
if (m) {
r = nextToken(str.slice(m[0].length), node[t], { ...data, day: m[0], path: path + t }, lData);
}
}
else if (t === 'n' || t === 'm') {
const m = /^(0?[1-9]|1[012])\b/.exec(str);
if (m) {
r = nextToken(str.slice(m[0].length), node[t], { ...data, month: +m[0], _mon: m[0], path: path + t }, lData);
}
}
else if (t === 'F' || t === 'M') {
const [ m, match ] = matchRec(str, lData.mon, lData.mp);
if (match && match[2] === t) {
r = nextToken(str.slice(m.length), node[t],
{ ...data, month: match[1], _mon: m, path: path + t }, lData);
}
}
else if (t === 'l' || t === 'D') {
const [ m, match ] = matchRec(str, lData.day, lData.dp);
if (match && match[2] === t) {
// the value is ignored
r = nextToken(str.slice(m.length), node[t], { ...data, path: path + t }, lData);
}
}
else if (t === 'y') {
const m = /^\d\d\b/.exec(str);
if (m) {
const y = (+m[0] >= 30) ? +m[0] + 1900 : +m[0] + 2000;
r = nextToken(str.slice(m[0].length), node[t], { ...data, year: y, path: path + t }, lData);
}
}
else if (t === 'Y') {
const m = /^\d\d\d\d\b/.exec(str);
if (m) {
r = nextToken(str.slice(m[0].length), node[t], { ...data, year: +m[0], path: path + t }, lData);
}
}
else if (t === 'x') {
const time = parseTime(str, { locale: lData.locale });
if (time) {
r = nextToken('', node[t], { ...data, time: time.v, tf: time.z, path: path + t }, lData);
}
}
else {
throw new Error(`Unknown date token "${t}"`);
}
if (r) {
// reject invalid dates so we continue traversing the tree
if (isValidDate(data.year || 1916, data.month || 1, data.day ? +data.day : 1)) {
return r;
}
}
}
};
const normDateStr = s => (
s.replace(/\s+/g, ' ').trim()
.replace(/’/, "'")
.replace(/\.$/, '')
.toLowerCase()
);
const getLookups = (arr, sym) => {
const s = arr.map((d, i) => [ normDateStr(d), i + 1, sym ]);
s.sort((a, b) => b[0].length - a[0].length);
return s;
};
/**
* Parse a date or datetime string input and return its value and format. If
* the input was not recognized or valid, the function returns a `null`, for
* valid input it returns an object with two properties:
*
* - `v`: the parsed value.
* - `z`: the number format of the input (if applicable).
*
* @see parseValue
* @param {string} value The date to parse
* @param {object} [options={}] Options
* @param {string} [options.locale=""]
* A BCP 47 string tag. Locale default is english with a `\u00a0`
* grouping symbol (see [addLocale](#addLocale))
* @returns {ParseData | null} An object of the parsed value and a corresponding format string
*/
export function parseDate (value, options = {}) {
const l10n = getLocale(options.locale || '') || defaultLocale;
const lData = {
mon: getLookups(l10n.mmmm, 'F').concat(getLookups(l10n.mmm, 'M')),
mp: l10n.mmm[0].at(-1) === '.',
day: getLookups(l10n.dddd, 'l').concat(getLookups(l10n.ddd, 'D')),
dp: l10n.ddd[0].at(-1) === '.',
locale: options.locale
};
// possible shortcut: quickly dismiss if there isn't a number?
const date = nextToken(
normDateStr(value),
l10n.preferMDY ? dateTrieMD : dateTrieDM,
{ path: '' },
lData
);
if (date) {
// disallow matches where two tokens are separated by a period
if (date.sep === '.' && date.path.length === 3) {
return null;
}
const year = +(date.year ?? currentYear);
if (!date.day) {
date.day = 1;
}
let epoch = -Infinity;
if (year < 1900) {
return null;
}
else if (year <= 1900 && date.month <= 2) {
epoch = 25568;
}
else if (year < 10000) {
epoch = 25569;
}
const dateValue = (Date.UTC(year, date.month - 1, date.day) / 864e5) + epoch + (date.time || 0);
if (dateValue >= 0 && dateValue <= 2958465) {
const lead0 = (
// either has a leading zero
(date._mon[0] === '0' || date.day[0] === '0') ||
// both are 2-digits long
(date._mon.length === 2 && date.day.length === 2)
);
const format = date.path.replace(/[jdlDnmMFyYx]/g, a => {
if (a === 'x') { return date.tf || ''; }
return (lead0 ? tx00[a] : tx0[a]) || a;
});
return { v: dateValue, z: format };
}
}
return null;
}
const normAMPMStr = s => (
s.replace(/\s+/g, '').trim()
.replace(/\./g, '')
.toLowerCase()
);
/**
* Parse a time string input and return its value and format. If the input was
* not recognized or valid, the function returns a `null`, for valid input it
* returns an object with two properties:
*
* - `v`: the parsed value.
* - `z`: the number format of the input (if applicable).
*
* @see parseValue
* @param {string} value The date to parse
* @param {object} [options={}] Options
* @param {string} [options.locale=""]
* A BCP 47 string tag. Locale default is english with a `\u00a0`
* grouping symbol (see [addLocale](#addLocale))
* @returns {ParseData | null} An object of the parsed value and a corresponding format string
*/
export function parseTime (value, options = {}) {
const l10n = getLocale(options.locale || '') || defaultLocale;
const parts = /^\s*([10]?\d|2[0-4])(?::([0-5]\d|\d))?(?::([0-5]\d|\d))?(\.\d{1,10})?(?=\s*[^\s\d]|$)/.exec(value);
let ampm = '';
if (parts) {
const tail = normAMPMStr(value.slice(parts[0].length));
if (tail === normAMPMStr(l10n.ampm[0]) || tail === 'a' || tail === 'am') {
ampm = 'a';
}
else if (tail === normAMPMStr(l10n.ampm[1]) || tail === 'p' || tail === 'pm') {
ampm = 'p';
}
else if (tail) {
return null;
}
}
if (parts) {
const [ , h, m, s, f ] = parts;
// don't allow milliseconds without seconds
if (f && !s) {
return null;
}
// single number must also include AM/PM part
if (!ampm && !m && !s) {
return null;
}
// AM/PM part must align with hours
let hrs = +(h || 0) * 1;
if (ampm) {
if (hrs >= 13) {
return null;
}
// console.error(am);
// valid -- Ertu með far eð
// 00:00 AM - 12:00 AM
if (ampm === 'p') {
hrs += 12;
}
}
const min = +(m || 0) * 1;
const sec = +(s || 0) * 1;
const mss = +(f || 0) * 1;
return {
v: ((hrs * 60 * 60) + (min * 60) + sec + mss) / (60 * 60 * 24),
z: (
(h.length === 2 ? 'hh' : 'h') +
':mm' +
(s ? ':ss' : '') +
(ampm ? ' AM/PM' : '')
)
};
}
return null;
}
/**
* Parse a string input and return its boolean value. If the input was not
* recognized or valid, the function returns a `null`, for valid input it
* returns an object with one property:
*
* - `v`: the parsed value.
*
* @see parseValue
* @param {string} value The supposed boolean to parse
* @param {object} [options={}] Options
* @param {string} [options.locale=""]
* A BCP 47 string tag. Locale default is english with a `\u00a0`
* grouping symbol (see [addLocale](#addLocale))
* @returns {ParseData | null} An object of the parsed value and a corresponding format string
*/
export function parseBool (value, options = {}) {
const l10n = getLocale(options.locale || '') || defaultLocale;
const v = value.trim().toLowerCase();
const bT = l10n.bool[0].toLowerCase();
if (v === 'true' || v === bT) {
return { v: true };
}
const bF = l10n.bool[1].toLowerCase();
if (v === 'false' || v === bF) {
return { v: false };
}
return null;
}
/**
* Attempt to parse a "spreadsheet input" string input and return its value and
* format. If the input was not recognized or valid, the function returns a
* `null`, for valid input it returns an object with two properties:
*
* - `v`: The parsed value. For dates, this will be an Excel style serial date.
* - `z`: (Optionally) the number format string of the input. This property will
* not be present if it amounts to the `General` format.
*
* `parseValue()` recognizes a wide range of dates and date-times, times,
* numbers, and booleans. Some examples:
*
* ```js
* // basic number
* parseValue("-123");// { v: -123 }
* // formatted number
* parseValue("$1,234"); // { v: 1234, z: "$#,##0" }
* // a percent
* parseValue("12.3%"); // { v: 0.123, z: "0.00%" }
* // a date
* parseValue("07 October 1984"); // { v: 30962, z: 'dd mmmm yyyy' }
* // an ISO formatted date-time
* parseValue("1984-09-10 11:12:13.1234"); // { v: 30935.46681855787, z: "yyyy-mm-dd hh:mm:ss" }
* // a boolean
* parseValue("false"); // { v: false }
* ```
*
* The formatting string outputted may not correspond exactly to the input.
* Rather, is it composed of certain elements which the input controls. This is
* comparable to how Microsoft Excel and Google Sheets parse pasted input. Some
* things you may expect:
*
* - Whitespace is ignored.
* - Decimal fractions are always represented by `.00` regardless of how many
* digits were shown in the input.
* - Negatives denoted by parentheses [`(1,234)`] will not include the
* parentheses in the format string (the value will still by negative.)
* - All "scientific notation" returns the same format: `0.00E+00`.
*
* Internally the parser calls, `parseNumber`, `parseDate`,
* `parseTime` and `parseBool`. They work in the same way except
* with a more limited scope. You may want those function if you are limiting
* input to a smaller scope.
*
* Be warned that the parser do not (yet) take locale into account so all input
* is assumed to be in "en-US". This means that `1,234.5` will parse, but
* `1.234,5` will not. Similarly, the order of date parts will be US centric.
* This may change in the future so be careful what options you pass the
* functions.
*
* @param {string} value The value to parse
* @param {object} [options={}] Options
* @param {string} [options.locale=""]
* A BCP 47 string tag. Locale default is english with a `\u00a0`
* grouping symbol (see [addLocale](#addLocale))
* @returns {ParseData | null} An object of the parsed value and a corresponding format string
*/
export function parseValue (value, options) {
return (
parseNumber(value, options) ??
parseDate(value, options) ??
parseTime(value, options) ??
parseBool(value, options)
);
}