UNPKG

numfmt

Version:

Full Excel style number formatting

github.com/borgar/numfmt

557 lines (508 loc) • 17 kB

JavaScript

/* eslint-disable padded-blocks */ import { resolveLocale } from './locale.js'; import { u_YEAR, u_MONTH, u_DAY, u_HOUR, u_MIN, u_SEC, u_DSEC, u_CSEC, u_MSEC, EPOCH_1900, EPOCH_1317, TOKEN_AMPM, TOKEN_BREAK, TOKEN_CALENDAR, TOKEN_CHAR, TOKEN_COLOR, TOKEN_COMMA, TOKEN_CONDITION, TOKEN_DATETIME, TOKEN_DBNUM, TOKEN_DIGIT, TOKEN_DURATION, TOKEN_ERROR, TOKEN_ESCAPED, TOKEN_EXP, TOKEN_FILL, TOKEN_GENERAL, TOKEN_GROUP, TOKEN_HASH, TOKEN_LOCALE, TOKEN_MINUS, TOKEN_NATNUM, TOKEN_PAREN, TOKEN_PERCENT, TOKEN_PLUS, TOKEN_POINT, TOKEN_QMARK, TOKEN_SCALE, TOKEN_SKIP, TOKEN_SLASH, TOKEN_SPACE, TOKEN_STRING, TOKEN_TEXT, TOKEN_ZERO } from './constants.js'; function minMaxPad (str, part, prefix) { part[prefix + '_max'] = str.length; part[prefix + '_min'] = str.replace(/#/g, '').length; return part; } function add (s, tokens) { // allow adding string tokens without wrapping if (typeof s === 'string') { tokens.push({ type: 'string', value: s }); } else { tokens.push(s); } } function isNumOp (token, activePattern) { const type = token && token.type; return ( (type === TOKEN_HASH || type === TOKEN_ZERO || type === TOKEN_QMARK) || (type === TOKEN_DIGIT && activePattern === 'den') ); } export function parseFormatSection (inputTokens) { const outputTokens = []; const part = { scale: 1, percent: false, text: false, date: 0, date_eval: false, date_system: EPOCH_1900, sec_decimals: 0, general: false, clock: 24, int_pattern: [], frac_pattern: [], man_pattern: [], den_pattern: [], num_pattern: [], tokens: outputTokens }; let currentPattern = 'int'; let lastNumberChunk = null; const dateChunks = []; let last; let haveLocale = false; let index = -1; let partOver = false; let patternSource = ''; let haveSlash = false; while (++index < inputTokens.length && !partOver) { const token = inputTokens[index]; const type = token.type || TOKEN_ERROR; patternSource += token.raw; if (type === TOKEN_GENERAL) { part.general = true; add(token, outputTokens); } // new partition else if (isNumOp(token, currentPattern)) { const pt = part[currentPattern + '_pattern']; if (isNumOp(last, currentPattern) || last?.type === TOKEN_GROUP) { // append to current pt.push((pt.pop() || '') + token.value); lastNumberChunk.num += token.value; } else { // new number section pt.push(token.value); lastNumberChunk = { type: currentPattern, num: token.value }; add(lastNumberChunk, outputTokens); } } else if (type === TOKEN_PAREN) { if (token.value === '(') { part.parens = true; } add(token.value, outputTokens); } else if (type === TOKEN_DIGIT) { // just print it add(token.value, outputTokens); } // vulgar fractions else if (type === TOKEN_SLASH) { haveSlash = true; if (part[currentPattern + '_pattern'].length) { if (!lastNumberChunk) { // need to have a numerator present throw new SyntaxError('Format pattern is missing a numerator'); } part.fractions = true; // ... we just passed the numerator - correct that item part.num_pattern.push(part[currentPattern + '_pattern'].pop()); lastNumberChunk.type = 'num'; // next up... the denominator currentPattern = 'den'; add({ type: 'div' }, outputTokens); } else { add(token.value, outputTokens); } } else if (type === TOKEN_COMMA) { add(',', outputTokens); } else if (type === TOKEN_SCALE) { part.scale = 0.001 ** token.raw.length; } else if (type === TOKEN_GROUP) { if (currentPattern === 'int') { part.grouping = true; } if (currentPattern === 'den') { throw new SyntaxError('Cannot group denominator digits'); } // else we just ignore it! } else if (type === TOKEN_SPACE) { add(token, outputTokens); } else if (type === TOKEN_BREAK) { partOver = true; break; // leave the ";" hanging } else if (type === TOKEN_TEXT) { // @ part.text = true; add(token, outputTokens); } else if (type === TOKEN_PLUS || type === TOKEN_MINUS) { add(token, outputTokens); } // [h] [m] [s] // else if ((m = /^(?:\[(h+|m+|s+)\])/i.exec(s))) { else if (type === TOKEN_DURATION) { const tokenValue = token.value.toLowerCase(); // deal with in tokenizer const startsWith = tokenValue[0]; const bit = { type: '', size: 0, date: 1, pad: tokenValue.length }; if (startsWith === 'h') { bit.size = u_HOUR; bit.type = 'hour-elap'; } else if (startsWith === 'm') { bit.size = u_MIN; bit.type = 'min-elap'; } else { bit.size = u_SEC; bit.type = 'sec-elap'; } // signal date calc and track smallest needed unit part.date = part.date | bit.size; dateChunks.push(bit); add(bit, outputTokens); } // Note: In locales where decimal symbol is set to "," Excel will expect // "," rather than a ".". This must be solved by re-localizing the // pattern before using it. // .0 .00 .000 else if (part.date && type === TOKEN_POINT && inputTokens[index + 1]?.type === TOKEN_ZERO) { let dec = 1; index++; let raw = '0'; if (inputTokens[index + 1]?.type === TOKEN_ZERO) { raw += '0'; dec = 2; index++; } if (inputTokens[index + 1]?.type === TOKEN_ZERO) { raw += '0'; dec = 3; index++; } patternSource += raw; const size = [ u_SEC, u_DSEC, u_CSEC, u_MSEC ][dec]; part.date = part.date | size; part.date_eval = true; part.sec_decimals = Math.max(part.sec_decimals, dec); add({ type: 'subsec', size: size, decimals: dec, date: 1 }, outputTokens); } else if (type === TOKEN_CALENDAR) { // signal date system (ignored if defined with [$-xxx]) if (!haveLocale) { // Use Hijri calendar system if (token.value === 'B2' || token.value === 'b2') { // TODO: B2 does more than this // it switches locale to [$-060401] (ar) which affects display (RTL) part.date_system = EPOCH_1317; } // Use Gregorian calendar system else { // B1 | b1 // signal date system (ignored if defined with [$-xxx]) part.date_system = EPOCH_1900; } } } // hh:mm:ss YYYY-MM-DD else if (type === TOKEN_DATETIME) { // Excel is "mostly" case insensitive here except it checks the last used // date token. Which, if it was s or h, minutes is used. The same is true // if we hit m or s, and last is m. // m and mm are spurious, mmm is always month const bit = { type: '', size: 0, date: 1 }; const value = token.value.toLowerCase(); // deal with in tokenizer? const startsWith = value[0]; if (value === 'y' || value === 'yy') { bit.size = u_YEAR; bit.type = 'year-short'; } else if (startsWith === 'y' || startsWith === 'e') { bit.size = u_YEAR; bit.type = 'year'; } else if (value === 'b' || value === 'bb') { bit.size = u_YEAR; bit.type = 'b-year-short'; } else if (startsWith === 'b') { bit.size = u_YEAR; bit.type = 'b-year'; } else if (value === 'd' || value === 'dd') { bit.size = u_DAY; bit.type = 'day'; bit.pad = /dd/.test(value); } else if (value === 'ddd' || value === 'aaa') { bit.size = u_DAY; bit.type = 'weekday-short'; } else if (startsWith === 'd' || startsWith === 'a') { bit.size = u_DAY; bit.type = 'weekday'; } else if (startsWith === 'h') { bit.size = u_HOUR; bit.type = 'hour'; bit.pad = /hh/i.test(value); } else if (startsWith === 'm') { if (value.length === 3) { bit.size = u_MONTH; bit.type = 'monthname-short'; } else if (value.length === 5) { bit.size = u_MONTH; bit.type = 'monthname-single'; } else if (value.length >= 4) { bit.size = u_MONTH; bit.type = 'monthname'; } // m or mm can be either minute or month based on context const last_date_chunk = dateChunks[dateChunks.length - 1]; if (!bit.type && last_date_chunk && !last_date_chunk.used && (last_date_chunk.size & (u_HOUR | u_SEC))) { // if this value follows hour or second, it is a minute last_date_chunk.used = true; bit.size = u_MIN; bit.type = 'min'; bit.pad = /mm/.test(value); } // if we still don't know, we treat as a month // and defer, a later 'sec' value may switch it if (!bit.type) { bit.size = u_MONTH; bit.type = 'month'; bit.pad = /mm/.test(value); bit.indeterminate = true; } } else if (startsWith === 's') { bit.size = u_SEC; bit.type = 'sec'; bit.pad = /ss/.test(value); // if last date chunk was m, flag this used const last_date_chunk = dateChunks[dateChunks.length - 1]; if (last_date_chunk && last_date_chunk.size & u_MIN) { bit.used = true; } // if last date chunk is undecided, we know that it is a minute else if (last_date_chunk && last_date_chunk.indeterminate) { delete last_date_chunk.indeterminate; last_date_chunk.size = u_MIN; last_date_chunk.type = 'min'; bit.used = true; } } else if (startsWith === 'g') { // TODO: Don't know what this does? (yet!) } // signal date calc and track smallest needed unit part.date = part.date | bit.size; part.date_eval = true; dateChunks.push(bit); add(bit, outputTokens); } // AM/PM // See: https://github.com/SheetJS/sheetjs/issues/676 else if (type === TOKEN_AMPM) { part.clock = 12; part.date = part.date | u_HOUR; part.date_eval = true; // deal with in tokenizer? token.short = token.value === 'A/P'; add(token, outputTokens); } // escaped character, string else if (type === TOKEN_STRING || type === TOKEN_ESCAPED || type === TOKEN_CHAR) { add(token.value, outputTokens); } // condition else if (type === TOKEN_CONDITION) { part.condition = [ token.value[0], // operator parseFloat(token.value[1]) // operand ]; } // locale code -- we allow std. "en-US" style codes // https://stackoverflow.com/questions/54134729/what-does-the-130000-in-excel-locale-code-130000-mean/54540455#54540455 else if (type === TOKEN_LOCALE) { const bits = token.value.split('-'); const code = bits.length < 2 ? '' : bits.slice(1).join('-'); const currency = bits[0]; if (currency) { add(currency, outputTokens); } const l4e = resolveLocale(code); if (l4e) { part.locale = l4e; } const wincode = parseInt(code, 16); if (isFinite(wincode) && (wincode & 0xff0000)) { const cal = (wincode >> 16) & 0xff; // only Hijri is supported atm. if (cal === 6) { part.date_system = EPOCH_1317; } } haveLocale = true; // ignore any B2 & B1 tokens } // color else if (type === TOKEN_COLOR) { let cm; let v = token.value.toLowerCase(); if ((cm = /^color\s*(\d+)$/i.exec(v))) { v = parseInt(cm[1], 10); } part.color = v; } // percentage else if (type === TOKEN_PERCENT) { part.scale = 100; part.percent = true; add('%', outputTokens); } // decimal fraction else if (type === TOKEN_POINT) { add(token, outputTokens); if (!part.date) { part.dec_fractions = true; currentPattern = 'frac'; } } // exponent else if (type === TOKEN_EXP) { // Exponent pattern requires symbol to directly follow "E" but the // signature symbol, however, prefixes the first digit of the mantissa part.exponential = true; part.exp_plus = token.value.includes('+'); currentPattern = 'man'; add({ type: 'exp', plus: part.exp_plus }, outputTokens); } // skip width else if (type === TOKEN_SKIP) { add(token, outputTokens); } // fill space with next char else if (type === TOKEN_FILL) { add(token, outputTokens); } else if (type === TOKEN_DBNUM || type === TOKEN_NATNUM) { // UNSUPPORTED: // - DBNum1 = NatNum4 // - DBNum2 = NatNum5 // - DBNum3 = either NatNum6 or NatNum3? // - DBNum3 = NatNum10 // NatNum: https://www.openoffice.org/api/docs/common/ref/com/sun/star/i18n/NativeNumberMode.html } else if (type === TOKEN_ERROR) { throw new SyntaxError(`Illegal character: ${patternSource}`); } else { throw new SyntaxError(`Unknown token ${type} in ${patternSource}`); } // advance parser last = token; } part.tokensUsed = index; part.pattern = patternSource; // Quickly determine if this pattern is condition only // if so, then add String(value) but using the condition if (/^((?:\[[^\]]+\])+)(;|$)/.test(part.pattern) && !/^\[(?:h+|m+|s+)\]/.test(part.pattern)) { add({ type: 'text' }, outputTokens); } // Make sure we don't have an illegal pattern. We could support some of this // but we side with Excel and don't because they make no sense. if ( (part.fractions && part.dec_fractions) || (part.grouping && !part.int_pattern.length) || (part.fractions && part.exponential) || (part.fractions && (part.den_pattern.length * part.num_pattern.length) === 0) || (haveSlash && !part.fractions && !part.date) || (part.exponential && ((part.int_pattern.length || part.frac_pattern.length) * part.man_pattern.length) === 0) ) { throw new SyntaxError(`Invalid pattern: ${patternSource}`); } const intPattern = part.int_pattern.join(''); const manPattern = part.man_pattern.join(''); const fracPattern = part.frac_pattern.join(''); minMaxPad(intPattern, part, 'int'); let min = 0; for (let i = 0; i < intPattern.length; i++) { const ch = intPattern[intPattern.length - 1 - i]; if (/^[0-9?]/.test(ch)) { min = i + 1; } } part.int_min = min; minMaxPad(fracPattern, part, 'frac'); minMaxPad(manPattern, part, 'man'); let num_pat = part.num_pattern.join(''); // let den_pat = part.den_pattern.join(''); let den_pat = part.den_pattern[0] || ''; const enforce_padded = den_pat.includes('?') || num_pat.includes('?'); // numerical denominator padding type is inherited from numerator padding type if (enforce_padded) { den_pat = den_pat.replace(/\d/g, '?'); den_pat = den_pat.replace(/#$/g, '?'); minMaxPad(num_pat, part, 'num'); minMaxPad(den_pat, part, 'den'); num_pat = num_pat.replace(/#$/g, '?'); } else { minMaxPad(num_pat, part, 'num'); minMaxPad(den_pat, part, 'den'); } part.int_p = intPattern; part.man_p = manPattern; part.num_p = num_pat; part.den_p = den_pat; if (part.den_pattern.length) { // detect and set rounding factor for denominator part.denominator = parseInt(part.den_pattern.join('').replace(/\D/g, ''), 10); } part.integer = !!intPattern.length; if (!part.integer && !part.exponential && fracPattern.length) { // if no integer has been found, we inject one const pointIdx = part.tokens.findIndex(d => d.type === 'point'); part.tokens.splice(pointIdx, 0, { type: 'int', value: '#' }); part.integer = true; part.int_pattern = [ '#' ]; part.int_p = '#'; } // extra whitespace rules for vulgar fractions if (part.fractions) { // fragment bits affect surrounding whitespace // if either bit is "#", the whitespace around it, and // the div symbol, is removed if the bit is not shown for (let i = 0; i < outputTokens.length - 1; i++) { const tok = outputTokens[i]; if (tok.type !== 'string' && tok.type !== 'space') { continue; } const nextType = outputTokens[i + 1].type; if (nextType === 'num') { tok.rule = 'num+int'; } else if (nextType === 'div') { tok.rule = 'num'; } else if (nextType === 'den') { tok.rule = 'den'; } else { // tok.rule = '???'; } } } // if the number is fragmented, grouping should not be applied if (part.grouping) { if (part.int_pattern.length > 1) { part.grouping = false; } } return part; }