@formant/ava
Version:
A framework for automated visual analytics.
356 lines (355 loc) • 13.2 kB
JavaScript
;
Object.defineProperty(exports, "__esModule", { value: true });
exports.analyzeField = exports.analyzeType = exports.analyzeDate = exports.analyzeNumber = exports.analyzeString = exports.isTime = exports.isNominal = exports.isInterval = exports.isContinuous = exports.isDiscrete = exports.isUnique = exports.isOrdinal = exports.isConst = exports.isDateFieldInfo = exports.isNumberFieldInfo = exports.isStringFieldInfo = void 0;
var statistics_1 = require("../../statistics");
var utils_1 = require("../../utils");
/**
* Check if it is StringFieldInfo.
*/
function isStringFieldInfo(x) {
return x.recommendation === 'string';
}
exports.isStringFieldInfo = isStringFieldInfo;
/**
* Check if it is NumberFieldInfo.
*/
function isNumberFieldInfo(x) {
return x.recommendation === 'integer' || x.recommendation === 'float';
}
exports.isNumberFieldInfo = isNumberFieldInfo;
/**
* Check if it is DateFieldInfo.
*/
function isDateFieldInfo(x) {
return x.recommendation === 'date';
}
exports.isDateFieldInfo = isDateFieldInfo;
/**
* Checks if field is constant
* @param info - The {@link FieldInfo} to process
*/
function isConst(info) {
return info.distinct === 1;
}
exports.isConst = isConst;
/**
* Checks if field is an ordinal.
* @param info - Field Info
*/
function isOrdinal(info) {
var rawData = info.rawData, recommendation = info.recommendation;
if (recommendation !== 'string')
return false;
if (isConst(info))
return false;
var list = rawData.filter(function (item) { return !(0, utils_1.isNil)(item) && (0, utils_1.isBasicType)(item); });
if (list.length === 0)
return false;
var start = null;
var end = null;
var startIndex = -1;
var endIndex = -1;
var through = true;
while (through) {
var through_1 = true;
for (var i = 0; i < list.length; i += 1) {
var item = list[i];
var char = item[startIndex + 1];
if (start === null || i === 0)
start = char;
if (char !== start) {
through_1 = false;
break;
}
}
if (!through_1)
break;
startIndex += 1;
}
through = true;
while (through) {
var through_2 = true;
for (var i = 0; i < list.length; i += 1) {
var item = list[i];
var char = item[item.length - 1 - (endIndex + 1)];
if (end === null || i === 0)
end = char;
if (char !== end) {
through_2 = false;
break;
}
}
if (!through_2)
break;
endIndex += 1;
}
var patterns = [/\d+/, /(零|一|二|三|四|五|六|七|八|九|十)+/, /(一|二|三|四|五|六|日)/, /^[a-z]$/, /^[A-Z]$/];
if (startIndex === -1 && endIndex === -1)
return false;
var arr = list.map(function (item) {
return item.slice(startIndex === -1 ? 0 : startIndex + 1, endIndex === -1 ? undefined : item.length - endIndex - 1);
});
var _loop_1 = function (i) {
var p = patterns[i];
var notMatch = arr.some(function (item) { return !p.test(item); });
if (!notMatch)
return { value: true };
};
for (var i = 0; i < patterns.length; i += 1) {
var state_1 = _loop_1(i);
if (typeof state_1 === "object")
return state_1.value;
}
return false;
}
exports.isOrdinal = isOrdinal;
/**
* Checks if field is an unique.
* @param info - The {@link FieldInfo} to process
*/
function isUnique(info) {
return info.distinct === info.count;
}
exports.isUnique = isUnique;
/**
* Checks if field is discrete.
* @remarks
* @param info - The {@link FieldInfo} to process
*/
function isDiscrete(info) {
return info.recommendation === 'integer';
}
exports.isDiscrete = isDiscrete;
/**
* Checks if field is a continuous.
* @param info - The {@link FieldInfo} to process
*/
function isContinuous(info) {
return info.recommendation === 'float';
}
exports.isContinuous = isContinuous;
/**
* Checks if field is an interval.
* @param info - The {@link FieldInfo} to process
*/
function isInterval(info) {
return info.recommendation === 'integer' || info.recommendation === 'float';
}
exports.isInterval = isInterval;
/**
* Checks if field is a nominal.
* @param info - The {@link FieldInfo} to process
*/
function isNominal(info) {
if (info.recommendation === 'boolean')
return true;
if (info.recommendation === 'string')
return !isOrdinal(info);
return false;
}
exports.isNominal = isNominal;
/**
* Checks if field is a time.
* @param info - Field Info
*/
function isTime(info) {
return info.recommendation === 'date';
}
exports.isTime = isTime;
/**
* Analyze string field info.
* @param value - data
*/
function analyzeString(value) {
var lenArray = value.map(function (item) { return item.length; });
return {
maxLength: (0, statistics_1.max)(lenArray),
minLength: (0, statistics_1.min)(lenArray),
meanLength: (0, statistics_1.mean)(lenArray),
containsChar: value.some(function (item) { return /[A-z]/.test(item); }),
containsDigit: value.some(function (item) { return /[0-9]/.test(item); }),
containsSpace: value.some(function (item) { return /\s/.test(item); }),
};
}
exports.analyzeString = analyzeString;
/**
* Analyze number field info.
* @param value - data
*/
function analyzeNumber(value) {
return {
minimum: (0, statistics_1.min)(value),
maximum: (0, statistics_1.max)(value),
mean: (0, statistics_1.mean)(value),
percentile5: (0, statistics_1.quantile)(value, 5),
percentile25: (0, statistics_1.quantile)(value, 25),
percentile50: (0, statistics_1.quantile)(value, 50),
percentile75: (0, statistics_1.quantile)(value, 75),
percentile95: (0, statistics_1.quantile)(value, 95),
sum: (0, statistics_1.sum)(value),
variance: (0, statistics_1.variance)(value),
standardDeviation: (0, statistics_1.standardDeviation)(value),
zeros: value.filter(function (item) { return item === 0; }).length,
};
}
exports.analyzeNumber = analyzeNumber;
/**
* Analyze date field info.
* @param value - data
*/
function analyzeDate(value, isInteger) {
if (isInteger === void 0) { isInteger = false; }
var list = value.map(function (item) {
if (isInteger) {
var str = "".concat(item);
if (str.length === 8)
return new Date("".concat(str.substring(0, 4), "/").concat(str.substring(4, 2), "/").concat(str.substring(6, 2))).getTime();
}
return new Date(item).getTime();
});
return {
minimum: value[(0, statistics_1.minIndex)(list)],
maximum: value[(0, statistics_1.maxIndex)(list)],
};
}
exports.analyzeDate = analyzeDate;
/**
* Determine what type a value is, may be one of [integer float date string null].
*/
function analyzeType(value, strictDatePattern) {
if ((0, utils_1.isNil)(value))
return 'null';
if ((0, utils_1.isNumber)(value)) {
if ((0, utils_1.isInteger)(value))
return 'integer';
return 'float';
}
// 优先识别日期类型,避免字符型日期被判断成字符
if ((0, utils_1.isDate)(value) || (0, utils_1.isDateString)(value, strictDatePattern))
return 'date';
if ((0, utils_1.isString)(value)) {
if ((0, utils_1.isNumberString)(value)) {
if (value.includes('.'))
return 'float';
return 'integer';
}
}
return 'string';
}
exports.analyzeType = analyzeType;
/**
* Analyze field info.
* @param value - data
* @public
*/
function analyzeField(value, strictDatePattern) {
var list = value.map(function (item) { return ((0, utils_1.isNil)(item) ? null : item); });
var valueMap = (0, statistics_1.valueMap)(list);
var recommendation;
var nonNullArray = valueMap.null ? list.filter(function (item) { return item !== null; }) : list;
var typeArray = list.map(function (item) { return analyzeType(item, strictDatePattern); });
var types = Object.keys((0, statistics_1.valueMap)(typeArray)).filter(function (item) { return item !== 'null'; });
// generate recommendation
switch (types.length) {
case 0:
recommendation = 'null';
break;
case 1:
recommendation = types[0];
// an integer field may be a date field
if (recommendation === 'integer') {
var data = list.filter(function (item) { return item !== null; });
if (data.map(function (num) { return "".concat(num); }).every(function (str) { return (0, utils_1.isDateString)(str); })) {
recommendation = 'date';
}
}
break;
case 2:
if ((types.includes('integer') || types.includes('date')) && types.includes('float')) {
recommendation = 'float';
break;
}
if (types.includes('integer') && types.includes('date')) {
// an integer field may be a date field
var data = list.filter(function (item) { return item !== null; });
if (data.map(function (num) { return "".concat(num); }).every(function (str) { return (0, utils_1.isDateString)(str); })) {
recommendation = 'date';
}
else {
recommendation = 'integer';
}
break;
}
recommendation = 'string';
break;
default:
recommendation = 'string';
}
var uniqueArray = (0, utils_1.unique)(nonNullArray);
var fieldInfo = {
count: value.length,
distinct: uniqueArray.length,
type: types.length <= 1 ? types[0] || 'null' : 'mixed',
recommendation: recommendation,
missing: valueMap.null || 0,
rawData: value,
valueMap: valueMap,
};
if (types.length > 1) {
var meta_1 = {};
var restNotNullArray_1 = nonNullArray;
types.forEach(function (item) {
if (item === 'date') {
meta_1.date = analyzeField(restNotNullArray_1.filter(function (item) { return (0, utils_1.isDateString)(item); }), strictDatePattern);
restNotNullArray_1 = restNotNullArray_1.filter(function (item) { return !(0, utils_1.isDateString)(item); });
}
else if (item === 'integer') {
meta_1.integer = analyzeField(restNotNullArray_1.filter(function (item) { return (0, utils_1.isIntegerString)(item) && !(0, utils_1.isDateString)(item); }), strictDatePattern);
restNotNullArray_1 = restNotNullArray_1.filter(function (item) { return !(0, utils_1.isIntegerString)(item); });
}
else if (item === 'float') {
meta_1.float = analyzeField(restNotNullArray_1.filter(function (item) { return (0, utils_1.isFloatString)(item) && !(0, utils_1.isDateString)(item); }), strictDatePattern);
restNotNullArray_1 = restNotNullArray_1.filter(function (item) { return !(0, utils_1.isFloatString)(item); });
}
else if (item === 'string') {
meta_1.string = analyzeField(restNotNullArray_1.filter(function (item) { return analyzeType(item, strictDatePattern) === 'string'; }));
restNotNullArray_1 = restNotNullArray_1.filter(function (item) { return analyzeType(item, strictDatePattern) !== 'string'; });
}
});
fieldInfo.meta = meta_1;
}
if (fieldInfo.distinct === 2 && fieldInfo.recommendation !== 'date') {
// temporarily threshold
if (list.length >= 100) {
fieldInfo.recommendation = 'boolean';
}
else if ((0, utils_1.isBoolean)(uniqueArray, true)) {
fieldInfo.recommendation = 'boolean';
}
}
if (recommendation === 'string') {
Object.assign(fieldInfo, analyzeString(nonNullArray.map(function (item) { return "".concat(item); })));
}
if (recommendation === 'integer' || recommendation === 'float') {
Object.assign(fieldInfo, analyzeNumber(nonNullArray.map(function (item) { return item * 1; })));
}
if (recommendation === 'date') {
Object.assign(fieldInfo, analyzeDate(nonNullArray, fieldInfo.type === 'integer'));
}
var levelOfMeasurements = [];
if (isNominal(fieldInfo))
levelOfMeasurements.push('Nominal');
if (isOrdinal(fieldInfo))
levelOfMeasurements.push('Ordinal');
if (isInterval(fieldInfo))
levelOfMeasurements.push('Interval');
if (isDiscrete(fieldInfo))
levelOfMeasurements.push('Discrete');
if (isContinuous(fieldInfo))
levelOfMeasurements.push('Continuous');
if (isTime(fieldInfo))
levelOfMeasurements.push('Time');
fieldInfo.levelOfMeasurements = levelOfMeasurements;
return fieldInfo;
}
exports.analyzeField = analyzeField;