UNPKG

@formant/ava

Version:

A framework for automated visual analytics.

91 lines (90 loc) 4.5 kB
import { __read, __spreadArray } from "tslib"; import { get, isNil, isString, orderBy } from 'lodash'; import { distinct, mean } from '../../../data'; import { categoryOutlier } from '../../algorithms'; import { IQR_K, SIGNIFICANCE_BENCHMARK } from '../../constant'; import { calculatePValue, calculateOutlierThresholds, preValidation, getAlgorithmCommonInput, getNonSignificantInsight, } from '../util'; export var findOutliers = function (values, outlierParameter) { var _a = outlierParameter || {}, method = _a.method, iqrK = _a.iqrK, _b = _a.confidenceInterval, confidenceInterval = _b === void 0 ? SIGNIFICANCE_BENCHMARK : _b; var outliers = []; var thresholds = []; var candidates = values.map(function (item, index) { return { index: index, value: item }; }); if (method !== 'p-value') { var IQRResult = categoryOutlier.IQR(values, { k: iqrK !== null && iqrK !== void 0 ? iqrK : IQR_K }); var lowerOutlierIndexes = IQRResult.lower.indexes; var upperOutlierIndexes = IQRResult.upper.indexes; __spreadArray(__spreadArray([], __read(lowerOutlierIndexes), false), __read(upperOutlierIndexes), false).forEach(function (index) { var value = values[index]; var pValue = (candidates.findIndex(function (item) { return item.value === value; }) + 1) / values.length; // two-sided var significance = pValue > 0.5 ? pValue : 1 - pValue; outliers.push({ index: index, value: value, significance: significance, }); }); thresholds.push(IQRResult.lower.threshold, IQRResult.upper.threshold); } else { var sortedCandidates = orderBy(candidates, function (item) { return Math.abs(mean(values) - item.value); }, ['desc']); thresholds.push.apply(thresholds, __spreadArray([], __read(calculateOutlierThresholds(values, confidenceInterval, 'two-sided')), false)); for (var i = 0; i < sortedCandidates.length; i += 1) { var candidate = sortedCandidates[i]; var value = candidate.value, index = candidate.index; var pValue = calculatePValue(values, value, 'two-sided'); var significance = 1 - pValue; if (significance < confidenceInterval) { break; } outliers.push({ index: index, value: value, significance: significance, }); } } return { outliers: outliers, thresholds: thresholds }; }; export var getCategoryOutlierInfo = function (props) { var valid = preValidation(props); var insightType = 'category_outlier'; if (isString(valid)) return getNonSignificantInsight({ detailInfo: valid, insightType: insightType, infoType: 'verificationFailure' }); var _a = getAlgorithmCommonInput(props), dimension = _a.dimension, values = _a.values, measure = _a.measure; if (isNil(dimension) || isNil(measure)) return getNonSignificantInsight({ detailInfo: 'Measure or dimension is empty.', insightType: insightType, infoType: 'verificationFailure', }); if (distinct(values) === 1) return getNonSignificantInsight({ insightType: insightType, infoType: 'noInsight', customInfo: { info: 'Each value in the data is the same. No outliers were found.' }, }); var data = props.data; var outlierParameter = get(props, 'options.algorithmParameter.outlier'); var outliers = findOutliers(values, outlierParameter).outliers; if (outliers.length === 0) { var info = "No outliers were found using method ".concat((outlierParameter === null || outlierParameter === void 0 ? void 0 : outlierParameter.method) !== 'p-value' ? 'Inter Quartile Range' : 'Normal Distribution test', "."); return getNonSignificantInsight({ insightType: insightType, infoType: 'noInsight', customInfo: { info: info } }); } var categoryOutliers = outliers.map(function (item) { var index = item.index, significance = item.significance; return { type: insightType, dimension: dimension, measure: measure, significance: significance, index: index, x: data[index][dimension], y: data[index][measure], significantInsight: true, }; }); return categoryOutliers; };