UNPKG

@datatables/type-detector

Version:

Raw data type detection.

github.com/DataTables/cloudtables-type-detect/

DataTables/Type-Detection

1,025 lines (1,024 loc) • 48.4 kB

JavaScript

"use strict"; var __assign = (this && this.__assign) || function () { __assign = Object.assign || function(t) { for (var s, i = 1, n = arguments.length; i < n; i++) { s = arguments[i]; for (var p in s) if (Object.prototype.hasOwnProperty.call(s, p)) t[p] = s[p]; } return t; }; return __assign.apply(this, arguments); }; exports.__esModule = true; var moment = require("../node_modules/moment/moment"); var TypeDetect = /** @class */ (function () { function TypeDetect(decimalCharacter, thousandsSeparator) { if (decimalCharacter === void 0) { decimalCharacter = '.'; } if (thousandsSeparator === void 0) { thousandsSeparator = ','; } this.decimalCharacter = decimalCharacter; this.thousandsSeparator = thousandsSeparator; this.langOpts = { abbrDays: { deDE: /^(mo\.|di\.|mi\.|do\.|fr\.|sa\.|so\.)$/gi, en: /^(mon|tue|wed|thu|fri|sat|sun)$/gi, esES: /^(lun\.|mar\.|mié\.|jue\.|vie\.|sáb\.|dom\.)$/gi, frFR: /^(lun\.|mar\.|mer\.|jeu\.|ven\.|sam\.|dim\.)$/gi }, abbrMonths: { deDE: /^(jan\.|feb\.|märz\.|apr\.|mai\.|juni\.|juli\.|aug\.|sep\.|okt\.|nov\.|dez\.)$/gi, en: /^(jan|feb|mar|apr|may|jun|jul|aug|sep|oct|nov|dec)$/gi, esES: /^(ene\.|feb\.|mar\.|abr\.|may\.|jun\.|jul\.|ago\.|sep\.|oct\.|nov\.|dic\.)$/gi, frFR: /^(janv\.|févr\.|mars\.|avr\.|mai\.|juin\.|juil\.|août\.|sept\.|oct\.|nov\.|dec\.)$/gi }, days: { deDE: /(montag|dienstag|mittwoch|donnerstag|freitag|samstag|sonntag)/gi, en: /(monday|tuesday|wednesday|thursday|friday|saturday|sunday)/gi, esES: /(lunes|martes|miércoles|jueves|viernes|sábado|domingo)/gi, frFR: /(lundi|mardi|mercredi|jeudi|vendredi|samedi|dimanche)/gi }, months: { deDE: /^(januar|februar|märz|april|mai|juni|juli|august|september|oktober|november|dezember)$/gi, en: /^(january|february|march|april|may|june|july|august|september|october|november|december)$/gi, esES: /^(enero|febrero|marzo|abril|mayo|junio|julio|agosto|septiembre|octubre|noviembre|diciembre)$/gi, frFR: /^(janvier|février|mars|avril|mai|juin|juillet|août|septembre|octobre|novembre|décembre)$/gi }, postFixes: { deDE: /^[0-9]+(st|nd|rd|th)$/gi, en: /^[0-9]+(st|nd|rd|th)$/gi, esES: /^[0-9]+(st|nd|rd|th)$/gi, frFR: /^[0-9]+(st|nd|rd|th)$/gi } }; } /** * Detects the type and additional details from the overall data set. * * @param data The dataset to have a type detected * @returns string - the type that has been detected and any additional details for that type */ TypeDetect.prototype.typeDetect = function (data) { var details = { dp: null, format: null, locale: null, postfix: null, prefix: null, type: null }; // If there is no data then return blank details if (!data || data.length === 0) { return details; } // Identify possible prefix and postfix - will only be used if certain types are identified var possPrefix = this._getPrefix(data); var possPostfix = this._getPostfix(data); var potentialType = this._getType(data, possPrefix, possPostfix); // If the type is a datetime, date or time then the format needs to be set. if (potentialType.indexOf('date_') === 0 || potentialType.indexOf('time_') === 0 || potentialType.indexOf('datetime_') === 0) { var splitdate = potentialType.split('_'); details.format = splitdate[1]; // If a format cannot be determined then default back to a string type if (details.format === null) { details.type = 'string'; } else { details.type = splitdate[0]; details.locale = splitdate[2]; } } else if (potentialType.includes('number')) { // If the type is a number then use the previously identified postfix and prefix details.type = 'number'; details.prefix = possPrefix; details.postfix = possPostfix; // Also determine the number of decimal places details.dp = this._getDP(data, possPostfix); } else if (potentialType.includes('sequence')) { // If the type is a number then use the previously identified postfix and prefix details.type = 'sequence'; details.prefix = ''; details.postfix = ''; // Also determine the number of decimal places details.dp = this._getDP(data, ''); // Sequences cannot have decimal places if (details.dp > 0) { details.type = 'number'; } } else { details.type = potentialType; } return details; }; /** * Extends the language options that are available by default. * * @param langOpts The extra language options that are to be added to/override the existing language options * @returns self for chaining */ TypeDetect.prototype.i18n = function (langOpts) { if (langOpts.abbrDays) { this.langOpts.abbrDays = __assign(__assign({}, this.langOpts.abbrDays), langOpts.abbrDays); } if (langOpts.abbrMonths) { this.langOpts.abbrMonths = __assign(__assign({}, this.langOpts.abbrMonths), langOpts.abbrMonths); } if (langOpts.days) { this.langOpts.days = __assign(__assign({}, this.langOpts.days), langOpts.days); } if (langOpts.months) { this.langOpts.months = __assign(__assign({}, this.langOpts.months), langOpts.months); } if (langOpts.postFixes) { this.langOpts.postFixes = __assign(__assign({}, this.langOpts.postFixes), langOpts.postFixes); } return this; }; TypeDetect.prototype._isBoolean = function (d) { var unqiue = Array.from(new Set(d)); // Remove null and undefined values unqiue = unqiue.filter(function (u) { return u !== undefined && u !== null; }); // Can only be boolean if we have 1 or 2 entries if (unqiue.length < 1 || unqiue.length > 2) { return false; } // Remove all non "boolean" values var filtered = unqiue.filter(function (v) { if (typeof v === 'string') { v = v.toLocaleLowerCase(); } return v === false || v === true || v === 'false' || v === 'true' || v === 0 || v === 1 || v === '0' || v === '1' || v === 'f' || v === 't' || v === 'no' || v === 'yes' || v === 'off' || v === 'on' || v === '×' || v === '✓' || v === '' || v === 'X' || v === 'x'; }); // Must still be 1 or 2 entries after the filtering return unqiue.length === filtered.length && (filtered.length === 1 || filtered.length === 2); }; TypeDetect.prototype._isEmpty = function (d) { return d === undefined || d === null || d === ''; }; /** * Gets the actual type of the data as a string * * @param data The array of data to be processed * @param prefix Any prefix that has been detected within the dataset * @param postfix Any postfix that has been detected within the dataset * @returns string - the actual type of the data and occasionally the * format of the data in dates this is underscore separated */ TypeDetect.prototype._getType = function (data, prefix, postfix) { var types = []; var dateSuggestion = null; var postFixRegExp = new RegExp(this._escapeRegExp(postfix) + '$'); var thousandsRegExp = new RegExp(this._escapeRegExp(this.thousandsSeparator), 'g'); // A type can only be set if all of the data fits it for (var i = 0; i < data.length; i++) { var el = data[i]; if (this._isEmpty(el)) { continue; } var type = typeof el; var tempEl = el; if (Array.isArray(tempEl)) { return 'array'; } else if (type === 'object' && tempEl.excel === undefined) { return 'object'; } if (type === 'object') { type = typeof el.value; tempEl = el.value; } // Need a temp el to replace and one to track var tempElReplaced = tempEl; // If the prefix exists, replace it within the temporary el if (prefix.length > 0 && tempEl.indexOf(prefix) === 0 && tempEl.length !== prefix.length) { tempElReplaced = tempElReplaced.replace(prefix, ''); } // If the postfix exists replace it within the temporary el if (postfix.length > 0 && tempElReplaced.indexOf(postfix) === tempElReplaced.length - postfix.length && tempElReplaced.length !== prefix.length) { tempElReplaced = tempElReplaced.replace(postFixRegExp, ''); } // Replace any thousands separators in the temporary element if (type === 'string') { tempElReplaced = tempElReplaced.replace(thousandsRegExp, ''); } // Replace any decimal characters in the temporary element if (this.decimalCharacter !== '.' && tempElReplaced.includes(this.decimalCharacter)) { tempElReplaced = tempElReplaced.split(this.decimalCharacter).join('.'); } // Check if there are any html tags if (type === 'string' && tempEl.match(/<(“[^”]*”|'[^’]*’|[^'”>])*>/g) !== null) { type = 'html'; } // Check if it is a url else if (type === 'string' && tempEl.match(/:\/\//g) !== null) { type = 'string'; } // At this point if the remaining value within tempEl can be converted to a number then it is a number else if (type === 'string' && (!isNaN(+el) && el.length > 0 || !isNaN(+tempElReplaced) && tempElReplaced.length > 0)) { if (data.length === 1 && prefix.length + postfix.length > 3 * tempElReplaced.length) { type = 'string'; } else { type = 'number'; } } else if (type === 'string' && !types.includes('string')) { // Data/time format detection // Note that if there is already a data point which doesn't match, then the column as a whole // can't be a date time, hence the second condition above. var format = this._getDateFormat(typeof el === 'string' ? el : el.value, dateSuggestion); if (!format) { type = 'string'; } else if (dateSuggestion !== null && (format === 'mixed' || format === 'string')) { return format; } else if (typeof format !== 'string') { // If there is a suggested format and it doesn't match this format then check all // of the previous data points against this new format as it could work for them if (dateSuggestion !== null && format.momentFormat !== dateSuggestion.momentFormat) { for (var j = 0; j < i; j++) { if (!moment(!(typeof el === 'object') ? data[j] : data[j].value, format.momentFormat, format.locales.length > 0 ? format.locales[0].substring(0, 2) : 'en').isValid()) { // This didn't work for the previous data, // but did the previous suggestion work for this point? if (!moment(!(typeof el === 'object') ? data[i] : data[i].value, dateSuggestion.momentFormat, dateSuggestion.locales.length > 0 ? dateSuggestion.locales[0].substring(0, 2) : 'en').isValid()) { return 'mixed'; } else { format = dateSuggestion; } } } } // if there is a suggested format if (dateSuggestion !== null) { // if the suggested format is shorter than this then it has probably // identified a short representation and so it should be used if (dateSuggestion.format.length < format.format.length) { format.format = dateSuggestion.format; } // Otherwise it must use the new format so remove the old one from potential types else { types.splice(types.indexOf('date-' + dateSuggestion), 1); } } var leadingtoken = 'date_'; if (format.momentFormat.includes(':')) { var tempFormat = format.momentFormat.replace(' A', 'A').replace(' a', 'a'); leadingtoken = tempFormat.includes(' ') || tempFormat.includes('T') ? 'datetime_' : 'time_'; } // Set the type for this format and the suggestion for the next type = leadingtoken + format.momentFormat + '_' + format.locales.join('-'); dateSuggestion = format; } } // If this type has not been identified yet then add it to the array if (!types.includes(type)) { types.push(type); } if (types.length === 2 && (!types.includes('string') || !types.includes('html'))) { return 'mixed'; } } // If more than one type has been identified then it must be mixed if (types.length === 2 && types.includes('string') && types.includes('html')) { return 'html'; } else if (types.length > 1) { return 'mixed'; } else if (this._isBoolean(data)) { return 'boolean'; } else if (types[0] === 'number') { return this._isSequence(data) ? 'sequence' : types[0]; } // Otherwise if only numbers have been found then that is the type else if (types.length && (types[0].includes('date') || types[0].includes('time') || types[0].includes('html'))) { return types[0]; } // If no other types are found then default to string return 'string'; }; /** * Determines a date format for a value that is passed in. * A big major condition here is that some combinations of tokens are not allowed. * For example, `2021 2021` will not be picked up as `YYYY YYYY` and * `2021 21` will not be picked up as `YYYY YY` Full details on this are provided in the readme * https://github.com/DataTables/Type-Detection#tokens * * @param el The potential date that is to have a value determined * @param suggestion The previously suggested format for other values in the same field * @returns the suggested format for the field */ TypeDetect.prototype._getDateFormat = function (el, suggestion) { // Current format object to store the details of this element var format = { format: [], hasDay: false, hasMonth: false, hasYear: false, latestLocale: null, latestToken: null, locales: [], momentFormat: '', separators: [], split: [], tokensUsed: [] }; var defaultFormatFormat = { definite: false, firm: false, value: '' }; var origEl = el; // Get the possible locales var locales = format.locales.length > 0 ? format.locales : Object.keys(this.langOpts.abbrDays); for (var _i = 0, locales_1 = locales; _i < locales_1.length; _i++) { var locale = locales_1[_i]; if (el.match(this.langOpts.days[locale]) !== null) { el = el.replace(this.langOpts.days[locale], '{day}'); format.locales.push(locale); } } var charSplit = el.split(''); var separators = ['-', '/', ':', ',', ' ', '+']; var prev = ''; // Iterate over all of the characters for (var _a = 0, charSplit_1 = charSplit; _a < charSplit_1.length; _a++) { var char = charSplit_1[_a]; if (separators.includes(char)) { // If the character is a separator if (prev.match(/[0-9]T[0-9]/g)) { var prevSplit = prev.split('T'); format.split.push(prevSplit[0]); format.separators.push('T'); format.format.push(defaultFormatFormat); // Add a part to identify format.split.push(prevSplit[1]); } else { // Add the characters that appeared since the last separator to the split array format.split.push(prev); } format.format.push(defaultFormatFormat); // Add a part to identify format.separators.push(char); // Note the Separator at this point prev = ''; // Clear the previous characters } else { // The character is not a separator so note it can continue to the next one prev += char; } } // Add the final part to the split format.split.push(prev); format.format.push(defaultFormatFormat); var formatSplitLength = format.split.length; // If a previous format (suggestion) has been detected and the two have a different number of parts // then the format cannot be consistent so return mixed if (suggestion !== null && formatSplitLength !== suggestion.split.length) { return 'mixed'; } // Iterate over every part of the potential datetime for (var i = 0; i < format.split.length; i++) { // If there has been a previous suggestion and it is firm for this part if (suggestion !== null && suggestion.format[i].firm === true) { // Copy into the current format format.locales = suggestion.locales; format.format[i] = suggestion.format[i]; var value = format.format[i].value; // Used a lot so store in temp variable format.tokensUsed.push(value); // Set flags for days, months and years if (value === 'DD' || value === 'D' || value === 'Do') { format.hasDay = true; } else if (value === 'YYYY' || value === 'YY' || value === 'Y') { format.hasYear = true; } else if (value === 'MM' || value === 'M' || value === 'MMM' || value === 'MMMM') { format.hasMonth = true; } continue; } var spl = format.split[i]; // If the value of this part is an empty string then it is a straightforward set if (spl.length === 0) { format.format[i] = { definite: true, firm: true, value: '' }; continue; } if (spl === '{day}') { this._setDateFormat(format, i, 'dddd', true, true); } else if (spl === 'T') { this._setDateFormat(format, i, 'T', true, true); } else if (format.separators[i - 1] === ':' && spl.match(/^\d\d\.\d\d\dZ?$/)) { // Seconds with milliseconds this._setDateFormat(format, i, 'ss.SSS', true, true); } // Some tokens are numbers else if (!isNaN(+spl)) { // If the previous separator was a plus and offset has not been included yet if (format.separators[i - 1] === '+' && !format.tokensUsed.includes('ZZ') && !format.tokensUsed.includes('Z') || // Or the previous separator was a minus, immediately preceeded by a ' ' format.separators[i - 1] === '-' && format.separators[i - 2] === ' ' && format.split[i - 1] === '' || // Or the previous separator was a minus and the one immediately before that was a ':' format.separators[i - 1] === '-' && format.separators[i - 2] === ':') { // The current token must be a time offset format = this._determineTokenFormat(format, i, 'ZZ', 'Z'); } else if (i > 1 && format.format[i - 1].value.includes('Z')) { format.format[i] = { definite: true, firm: true, value: '' }; continue; } // If the current separator is a colon then it must be immediately followed by an hour, // minute or second token. This has to be in that order. else if (format.separators[i] === ':') { // Have to check for all 4 tokens here to ensure not 2 hours even though we will only set HH or H // These can be converted later to hh and h if AM/PM is detected later. if (!format.tokensUsed.includes('HH') && !format.tokensUsed.includes('H') && !format.tokensUsed.includes('hh') && !format.tokensUsed.includes('h')) { format = this._determineTokenFormat(format, i, 'HH', 'H'); } else if (!format.tokensUsed.includes('mm') && !format.tokensUsed.includes('m')) { format = this._determineTokenFormat(format, i, 'mm', 'm'); } else if (!format.tokensUsed.includes('ss') && !format.tokensUsed.includes('s')) { format = this._determineTokenFormat(format, i, 'ss', 's'); } } // If the last separator was a colon then it is the end of the time so can only be // a minute or second token else if (i > 0 && format.separators[i - 1] === ':') { if (!format.tokensUsed.includes('mm') && !format.tokensUsed.includes('m')) { format = this._determineTokenFormat(format, i, 'mm', 'm'); } else if (!format.tokensUsed.includes('ss') && !format.tokensUsed.includes('s')) { format = this._determineTokenFormat(format, i, 'ss', 's'); } } // If it's not a colon then can attempt to detect year // Can't attempt to detect months or days at this point // as values under 31 could be a day or a year, and under // 12 could be month, day or year. These are determined later in the code. // Only year can be 4 characters long and a number else if (!format.tokensUsed.includes('YYYY') && spl.length === 4) { format = this._setDateFormat(format, i, 'YYYY', true, true, undefined, 'hasYear'); continue; } // Alternatively could be 2 digits if greater than 31 // Only years can be greater than 31, months and days mean // that we cannot determine this below that value else if (!format.tokensUsed.includes('YY') && +spl > 31) { format = this._setDateFormat(format, i, 'YY', true, false, undefined, 'hasYear'); } } // Some tokens are strings else { // Check for AM/PM var addMinutes = ''; if (spl.match(/\d\d(am|pm)/i)) { // Minutes then am/pm // TODO this should go into the tokeniser above. It would require a rewrite // of this whole date/time parser. addMinutes = 'mm'; spl = spl.replace(/\d\d/, ''); } // Put conditions into variables to avoid evaluating them multiple times each. var condUpper = !format.tokensUsed.includes('A') && (spl === 'AM' || spl === 'PM'); var condLower = !format.tokensUsed.includes('a') && (spl === 'am' || spl === 'pm'); if (condUpper || condLower) { if (condUpper) { format = this._setDateFormat(format, i, addMinutes + 'A', true, true); } else { format = this._setDateFormat(format, i, addMinutes + 'a', true, true); } // If this is found then need to make sure that any hours found are 12 hour for (var j = 0; j < i; j++) { var formatFormat = format.format[j]; if (formatFormat.firm === false && formatFormat.value.includes('H')) { format = this._setDateFormat(format, j, formatFormat.value.toLocaleLowerCase(), true, true); break; } } } // Check for other string tokens else { // Get the possible locales locales = format.locales.length > 0 ? format.locales : Object.keys(this.langOpts.abbrDays); var tokensThisRound = []; var localesThisRound = []; for (var _b = 0, locales_2 = locales; _b < locales_2.length; _b++) { var locale = locales_2[_b]; format.latestToken = null; format.latestLocale = null; if ((!format.tokensUsed.includes('Do') || tokensThisRound.includes('Do')) && spl.match(this.langOpts.postFixes[locale])) { format = this._setDateFormat(format, i, 'Do', true, true, locale, 'hasDay'); } else if ((!format.tokensUsed.includes('MMMM') || tokensThisRound.includes('MMMM')) && spl.match(this.langOpts.months[locale]) && this.langOpts.months[locale] !== undefined) { format = this._setDateFormat(format, i, 'MMMM', true, spl === 'may' ? false : true, locale, 'hasMonth'); } else if ((!format.tokensUsed.includes('MMM') || tokensThisRound.includes('MMM')) && spl.match(this.langOpts.abbrMonths[locale])) { format = this._setDateFormat(format, i, 'MMM', true, spl === 'may' ? false : true, locale, 'hasMonth'); } else if ((!format.tokensUsed.includes('dddd') || tokensThisRound.includes('dddd')) && spl.match(this.langOpts.days[locale])) { this._setDateFormat(format, i, 'dddd', true, true, locale); } else if ((!format.tokensUsed.includes('ddd') || tokensThisRound.includes('ddd')) && spl.match(this.langOpts.abbrDays[locale])) { this._setDateFormat(format, i, 'ddd', true, true, locale); } if (format.latestToken !== null) { tokensThisRound.push(format.latestToken); } if (format.latestLocale !== null) { localesThisRound.push(format.latestLocale); } } var newLocales = []; for (var _c = 0, localesThisRound_1 = localesThisRound; _c < localesThisRound_1.length; _c++) { var locale = localesThisRound_1[_c]; if (format.locales.includes(locale)) { newLocales.push(locale); } } format.locales = localesThisRound; } } } var empties = []; // Find the unknown parts for (var i = 0; i < format.format.length; i++) { if (format.format[i].definite === false) { empties.push(i); } } // If there is no day, month or year then need to attempt to identify the month // Year will have been established above if it is greater than 31 // Day values could be either month or year so couldn't say for sure if (!format.hasDay && !format.hasMonth && !format.hasYear) { var possMonth = []; // If a value is less than 12 then it could be a month for (var _d = 0, empties_1 = empties; _d < empties_1.length; _d++) { var empty = empties_1[_d]; if (!isNaN(+format.split[empty]) && +format.split[empty] <= 12) { possMonth.push(empty); } } if (possMonth.length === 1) { // Can now declare this a month if the others are over format = format.split[possMonth[0]].indexOf('0') === 0 ? this._setDateFormat(format, possMonth[0], 'MM', true, false, undefined, 'hasMonth') : this._setDateFormat(format, possMonth[0], 'M', true, false, undefined, 'hasMonth'); } } var changeMade = true; var allIdentified = false; while (changeMade && !allIdentified) { changeMade = false; allIdentified = true; for (var i = 0; i < format.format.length; i++) { if (format.format[i].value.length === 0 && format.format[i].definite === false && !isNaN(+format.split[i]) && (!format.hasDay || !format.hasYear || !format.hasMonth)) { // Year and Day - must be month if (format.hasYear && format.hasDay && +format.split[i] <= 12) { format = this._determineTokenFormat(format, i, 'MM', 'M', undefined, 'hasMonth'); changeMade = true; } // Day and Month - must be year else if (format.hasDay && format.hasMonth) { format = this._determineTokenFormat(format, i, 'YY', 'Y', undefined, 'hasYear'); changeMade = true; } // Month and Year - must be day else if (format.hasMonth && format.hasYear) { format = this._determineTokenFormat(format, i, 'DD', 'D', undefined, 'hasDay'); changeMade = true; } // Year or Day - must be month else if ((format.hasYear || format.hasDay) && +format.split[i] <= 12) { format = this._determineTokenFormat(format, i, 'MM', 'M', undefined, 'hasMonth'); changeMade = true; } // Month then assume day next else if (format.hasMonth && +format.split[i] <= 31) { format = this._determineTokenFormat(format, i, 'DD', 'D', undefined, 'hasDay'); changeMade = true; } // Last resort assume Month else if (+format.split[i] <= 12) { format = this._determineTokenFormat(format, i, 'MM', 'M', undefined, 'hasMonth'); changeMade = true; } else { allIdentified = false; } } } } // Construct momentFormat from parts and separator for (var i = 0; i < format.split.length - 1; i++) { if (!format.format[i].definite) { return 'string'; } format.momentFormat += format.format[i].value; // We don't want to add the current separator if the next token is a Z // or the current token is a Z and the next separator is a : as this is included in the Z token if (!(format.format[i + 1].value.includes('Z') || format.format[i].value.includes('Z') && format.separators[i] === ':')) { format.momentFormat += format.separators[i]; } } // Faster to do this here than add a check every loop format.momentFormat += format.format[format.split.length - 1].value; // If there is a format and it is valid then return it if (format.momentFormat.length > 0 && moment(origEl, format.momentFormat, format.locales.length > 0 ? format.locales[0].substring(0, 2) : 'en').isValid()) { return format; } return null; }; TypeDetect.prototype._isSequence = function (data) { var thousandsRegExp = new RegExp(this.thousandsSeparator, 'g'); data = data .map(function (a) { return typeof a === 'string' ? a.replace(thousandsRegExp, '') : a; }) .sort(function (a, b) { if (+a > +b) { return 1; } else if (+b > +a) { return -1; } else return 0; }); if (data.length < 3 || isNaN(+data[1]) || isNaN(+data[0])) { return false; } var initGap = +data[1] - +data[0]; for (var i = 2; i < data.length; i++) { if (isNaN(data[i]) || +data[i] !== +data[i - 1] + initGap) { return false; } } return true; }; /** * Determine whether to use a double or single token if there is a leading 0 * * @param format The format object currently being constructed * @param idx The part number * @param a The first option if double token * @param b The second option if single token * @param has Any flag to be set * @returns the updated format */ TypeDetect.prototype._determineTokenFormat = function (format, idx, a, b, locale, has) { if (has === void 0) { has = ''; } return format.split[idx].indexOf('0') === 0 ? this._setDateFormat(format, idx, a, true, true, locale, has) : this._setDateFormat(format, idx, b, true, false, locale, has); }; /** * Set's all of the relevant values when a new token is determined * * @param format The format object currently being constructed * @param idx The part number * @param value The token to be set * @param definite Whether this is definitely the token's type * @param firm Whether this is the exact token * @param locale Optional, any locale that is associated with this value/token combination * @param has Any flag to be set * @returns the updated format */ TypeDetect.prototype._setDateFormat = function (format, idx, value, definite, firm, locale, has) { if (has === void 0) { has = ''; } format.format[idx] = { definite: definite, firm: firm, value: value }; format.tokensUsed.push(value); format.latestToken = value; format.latestLocale = locale !== undefined ? locale : null; if (locale !== undefined && !format.locales.includes(locale)) { format.locales.push(locale); } // Set the flag if it has been defined if (has.length > 0) { format[has] = true; } return format; }; /** * Identifies a common prefix amongst an array of data * * @param data The data that is to be parsed to determine a prefix * @returns string, the prefix that has been identified */ TypeDetect.prototype._getPrefix = function (data) { // Find the first not-null value var prefix = this._firstNonNull(data); if (prefix === undefined) { return ''; } // If the first item is an object, then we are parsing excel data so our // interaction with it is slightly different. if (typeof prefix === 'object') { if (!prefix.excel) { return ''; } var idx = prefix.excel.indexOf('"'); // Can check for a quotation mark before going any further // If there is not one at the start then no prefix if (idx !== 0) { return ''; } var lastIdx = prefix.excel.lastIndexOf('"'); // There needs to be at least 2 quotation marks if (idx !== lastIdx) { var excelPrefix = prefix.excel.match(/^"[^"]*"/ig)[0]; for (var i = 1; i < data.length; i++) { if (data[i] === '') { continue; } if (data[i].excel.indexOf(excelPrefix) !== 0) { return ''; } } return excelPrefix.replace(/"/g, ''); } return ''; } else { prefix = this._determinePrefix(data); // Anything left at this point is present at the start of every value // and _may_ be considered a prefix, but this will depend on the type var matches = prefix.match(/^[^0-9]+/g); return matches !== null ? matches[0] : ''; } }; TypeDetect.prototype._determinePrefix = function (data, postfix) { if (postfix === void 0) { postfix = false; } var first = this._firstNonNull(data); var prefix = postfix ? first.split('').reverse().join('') : first; for (var _i = 0, data_1 = data; _i < data_1.length; _i++) { var d = data_1[_i]; if (this._isEmpty(d)) { continue; } // There can't be a prefix if the type isn't a string if (typeof d !== 'string') { return ''; } var el = postfix ? d.split('').reverse().join('') : d; // If the whole prefix is at the start of the value then it isn't going // to be shortened further so we can proceed to the next value if (el.indexOf(prefix) === 0) { continue; } // Gradually increase the prefix in length to check that it matches the start of the value for (var j = 0; j < prefix.length; j++) { // If this portion of prefix is at the beginning of the string then carry on // Otherwise it isn't the same across all of the elements so slice it down // to what has passed so far and check again if (el.indexOf(prefix.slice(0, j)) !== 0) { prefix = prefix.slice(0, j - 1); // If the length of the prefix is 0 then there has been no match between this value // and the previous value. There is therefore no need to iterate through the remaining // values as we can tell at this point that there isn't going to be a common prefix // across the entire dataset if (prefix.length === 0) { return ''; } } } } return prefix; }; /** * Find the first non-empty value in an array * * @param d Array to find * @returns undefined if nothing found, otherwise the value found */ TypeDetect.prototype._firstNonNull = function (d) { var _this = this; return d.find(function (a) { return !_this._isEmpty(a); }); }; /** * Identifies a common postfix amongst an array of data * * @param data The data that is to be parsed to determine a postfix * @returns string, the postfix that has been identified */ TypeDetect.prototype._getPostfix = function (data) { var _this = this; // Find the first not-null value var postfix = data.find(function (d) { return !_this._isEmpty(d); }); if (postfix === undefined) { return ''; } if (postfix && typeof postfix === 'object') { if (!postfix.excel) { return ''; } var idx = postfix.excel.indexOf('"'); // Can check for a quotation mark before going any further // If there is not one then there is no postfix with excel if (idx === -1) { return ''; } var lastIdx = postfix.excel.lastIndexOf('"'); // There need to be at least 2 quotation marks and the last one must be at the end if (idx !== lastIdx && lastIdx === postfix.excel.length - 1) { var excelPostfix = postfix.excel.match(/"[^"]*"$/ig)[0]; for (var i = 1; i < data.length; i++) { if (data[i] === '') { continue; } if (data[i].excel.match(new RegExp(this._escapeRegExp(excelPostfix) + '$')) === null) { return ''; } } return excelPostfix.replace(/"/g, ''); } return ''; } // If the type of the first element is not a string then there cannot be a postfix // Need to check this now before the string operations else if (typeof postfix === 'string') { // Reverse the string, a postfix is a prefix working from the other end of the string // So, can use the same algorithm as above in `getPrefix()` to do this var possPost = this._determinePrefix(data, true); // Anything left at this point is present at the end of every value, once it is // reversed and _may_ be considered a postfix, but this will depend on the type var matches = possPost.split('').reverse().join('').match(/[^0-9]+$/g); return matches !== null ? matches[matches.length - 1] : ''; } return ''; }; /** * Identifies the highest number of decimal places within the dataset * * @param data The data that is to be parsed to determine the number of decimal places * @param postfix The datas postfix that is stripped from the data to accurately determine number of decimal places * @returns number, the highest number of decimal places in the entire dataset */ TypeDetect.prototype._getDP = function (data, postfix) { var highestDP = 0; var replaceRegex = new RegExp(this._escapeRegExp(postfix) + '$'); for (var _i = 0, data_2 = data; _i < data_2.length; _i++) { var el = data_2[_i]; if (this._isEmpty(el)) { continue; } // Replace the postfix as it's characters do not count as decimal places var split = (typeof el === 'object' ? el.value : el) .toString() .replace(replaceRegex, '') .split(this.decimalCharacter); // Check that there is a decimal place and also if there are // more than previously seen - the highest value should be used if (split.length > 1 && split[1].length > highestDP) { highestDP = split[1].length; } } return highestDP; }; TypeDetect.prototype._escapeRegExp = function (val) { var escapeRegExp = new RegExp('(\\' + ['/', '.', '*', '+', '?', '|', '(', ')', '[', ']', '{', '}', '\\', '$', '^', '-'].join('|\\') + ')', 'g'); return val.replace(escapeRegExp, '\\$1'); }; return TypeDetect; }()); exports["default"] = TypeDetect; // This method is a good start towards more advanced excel format detection. // Cloudtables is not yet equipped to deal with this much info yet though so it can wait here incase that point comes. // interface IExcelFormat { // dp: number; // fraction: boolean; // scaleThousands: number; // thousandsSeparated: boolean; // leadingZeros: number; // forceLeadingZeros: boolean; // colours: object[]; // hideZeroValues: boolean; // hideAllValues: boolean; // includesDateTime: boolean; // } // public getAdvancedExcelFormat(el: any): any { // if (typeof el !== 'object' || Object.keys(el).length !== 2) { // return false; // } // let value = el.value; // let code = el.excel; // let format: IExcelFormat = { // colours: [], // dp: 0, // forceLeadingZeros: false, // fraction: false, // hideAllValues: false, // hideZeroValues: false, // includesDateTime: false, // leadingZeros: 0, // scaleThousands: 0, // thousandsSeparated: false // }; // if (code.includes('.')) { // format.dp = code.split('.')[1].length; // } // if (code.includes('/') && code.match(/[a-z]/ig) === null) { // format.fraction = true; // } // if (code.includes(',')) { // if (code.lastIndexOf(',') !== code.length - 1) { // format.thousandsSeparated = true; // } // let tempCode = code; // while (tempCode.lastIndexOf(',') === tempCode.length - 1) { // tempCode = tempCode.slice(0, -1); // format.scaleThousands++; // } // } // if (code.indexOf('"') === 0 && code.split('"')[1].indexOf('0') === 0) { // format.forceLeadingZeros = true; // } // if (code.split('"')[(code.split('"').length > 1 ? 1 : 0)].match(/^0+/g) !== null) { // format.leadingZeros = code.split('"')[(code.split('"').length > 1 ? 1 : 0)].match(/^0+/g)[0].length; // } // let tempCode = code; // while (tempCode.includes('[')) { // let match = tempCode.match(/\[[^\[]*\]/ig)[0]; // let colour = match.split(/(\[|\])/ig)[1]; // let condition = null; // tempCode = tempCode.replace(match, ''); // if (tempCode.includes('[')) { // match = tempCode.match(/\[[^\[]*\]/ig)[0]; // condition = match.split(/(\[|\])/ig)[1]; // tempCode = tempCode.replace(match, ''); // } // format.colours.push({colour, condition}); // } // if (code.match(/[^0-9\#\?]*/ig) !== null && code.match(/[^0-9\#\?]*/ig)[0].length === code.length) { // format.includesDateTime = true; // } // return format; // }