UNPKG

@formant/ava

Version:

A framework for automated visual analytics.

244 lines (243 loc) 14.5 kB
"use strict"; Object.defineProperty(exports, "__esModule", { value: true }); exports.enumerateInsights = exports.extractInsightsFromSubspace = exports.extractInsightsForCorrelation = exports.extractInsightsFor1M1DCombination = void 0; var tslib_1 = require("tslib"); var lodash_1 = require("lodash"); var constant_1 = require("../constant"); var insights_1 = require("../insights"); var aggregate_1 = require("../utils/aggregate"); var homogeneous_1 = require("../insights/extractors/homogeneous"); var preprocess_1 = require("./preprocess"); var util_1 = require("./util"); /** calculate the Impact which reflects the importance of the subject of an insight against the entire dataset */ function computeSubspaceImpact(data, subspace, impactMeasureReferences, measures) { if (!(measures === null || measures === void 0 ? void 0 : measures.length) || !subspace) return 1; var impactValues = measures.map(function (measure) { var measureValue = (0, preprocess_1.calculateImpactValue)(data, measure); var referenceKey = "".concat(measure.fieldName, "@").concat(measure.method); var referenceValue = impactMeasureReferences[referenceKey]; return measureValue / referenceValue; }); return Math.max.apply(Math, tslib_1.__spreadArray([], tslib_1.__read(impactValues), false)); } /** extract patterns from a specific subject */ function extractPatternsFromSubject(data, subjectInfo, fieldPropsMap, options) { var measures = subjectInfo.measures, dimensions = subjectInfo.dimensions; var enumInsightTypes = (options === null || options === void 0 ? void 0 : options.insightTypes) || constant_1.PATTERN_TYPES; var patterns = {}; enumInsightTypes.forEach(function (insightType) { var insightExtractorChecker = insights_1.ExtractorCheckers[insightType]; var isValid = true; // Check whether the data requirements of the extractor are met if (insightExtractorChecker) { if ((0, lodash_1.isString)(insightExtractorChecker({ data: data, subjectInfo: subjectInfo, fieldPropsMap: fieldPropsMap }))) isValid = false; } if (isValid && insights_1.insightPatternsExtractor) { var _a = options || {}, algorithmParameter = _a.algorithmParameter, dataProcessInfo = _a.dataProcessInfo; var extractorOptions = { algorithmParameter: algorithmParameter, dataProcessInfo: dataProcessInfo, // Validation has been done in method extractInsights dataValidation: false, // Select only significant insights filterInsight: true, }; var extractedPatterns = (0, insights_1.insightPatternsExtractor)({ data: data, dimensions: dimensions.map(function (dim) { return ({ fieldName: dim }); }), measures: measures, insightType: insightType, options: extractorOptions, }); patterns[insightType] = extractedPatterns; } else { patterns[insightType] = undefined; } }); return patterns; } function extractInsightsFor1M1DCombination(data, dimensions, measures, subspace, referenceInfo, options) { var fieldPropsMap = referenceInfo.fieldPropsMap; var insights = []; dimensions.forEach(function (dimension) { var _a, _b; var insightsPerDim = []; var isTimeField = (_b = (_a = fieldPropsMap[dimension]) === null || _a === void 0 ? void 0 : _a.levelOfMeasurements) === null || _b === void 0 ? void 0 : _b.includes('Time'); measures.forEach(function (measure) { var childSubjectInfo = { dimensions: [dimension], subspace: subspace, measures: [measure] }; var aggregatedData = (0, aggregate_1.aggregate)(data, dimension, [measure], isTimeField); var patterns = extractPatternsFromSubject(aggregatedData, childSubjectInfo, fieldPropsMap, options); var patternsArray = (0, lodash_1.flatten)(Object.values(patterns).filter(function (item) { return (item === null || item === void 0 ? void 0 : item.length) > 0; })).sort(function (a, b) { return b.significance - a.significance; }); if (patternsArray.length) { var insight = { subspace: subspace, dimensions: [ { fieldName: dimension, }, ], measures: [measure], patterns: patternsArray, data: aggregatedData, score: patternsArray[0].significance, }; insightsPerDim.push(insight); } else { insightsPerDim.push(null); } }); insights.push(insightsPerDim); }); return insights; } exports.extractInsightsFor1M1DCombination = extractInsightsFor1M1DCombination; function extractInsightsForCorrelation(data, dimensions, measures, subspace, referenceInfo, options) { var _a; var fieldPropsMap = referenceInfo.fieldPropsMap; var insights = []; var measureNum = measures.length; if (measureNum >= 2) { for (var i = 0; i < measureNum - 1; i += 1) { for (var j = i + 1; j < measureNum; j += 1) { var childSubjectInfo = { dimensions: dimensions, subspace: subspace, measures: [measures[i], measures[j]] }; var patterns = extractPatternsFromSubject(data, childSubjectInfo, fieldPropsMap, tslib_1.__assign(tslib_1.__assign({}, options), { insightTypes: ['correlation'] })); var patternsArray = (_a = patterns === null || patterns === void 0 ? void 0 : patterns.correlation) === null || _a === void 0 ? void 0 : _a.sort(function (a, b) { return b.significance - a.significance; }); if (patternsArray === null || patternsArray === void 0 ? void 0 : patternsArray.length) { var insight = { subspace: subspace, dimensions: dimensions.map(function (d) { return ({ fieldName: d }); }), measures: [measures[i], measures[j]], patterns: patternsArray, data: data, score: patternsArray[0].significance, }; insights.push(insight); } } } } return insights; } exports.extractInsightsForCorrelation = extractInsightsForCorrelation; /** recursive extraction in data subspace */ function extractInsightsFromSubspace(data, dimensions, measures, subspace, referenceInfo, insightsHeap, homogeneousInsightsHeap, options) { var _a, _b; /** subspace pruning */ if (!(data === null || data === void 0 ? void 0 : data.length)) { return []; } // calculate impact score var impactMeasureReferences = referenceInfo.impactMeasureReferences, fieldPropsMap = referenceInfo.fieldPropsMap; var subspaceImpact = computeSubspaceImpact(data, subspace, impactMeasureReferences, options === null || options === void 0 ? void 0 : options.impactMeasures); // pruning1: check the subpace impact limit if (subspaceImpact < constant_1.INSIGHT_SCORE_BENCHMARK) { return []; } // pruning2: check if the impact score is greater than the minimum score in heap var impactScoreWeight = !!(options === null || options === void 0 ? void 0 : options.impactWeight) && options.impactWeight >= 0 && options.impactWeight < 1 ? options.impactWeight : constant_1.IMPACT_SCORE_WEIGHT; var optimalScore = subspaceImpact * impactScoreWeight + 1 * (1 - impactScoreWeight); if (insightsHeap.length >= insightsHeap.limit) { var minScoreInHeap = (_a = insightsHeap.peek()) === null || _a === void 0 ? void 0 : _a.score; if (optimalScore <= minScoreInHeap) { return []; } } /** insight extraction */ var insights = []; /** Combination: 1M * 1D */ var insightsFor1M1DCombination = extractInsightsFor1M1DCombination(data, dimensions, measures, subspace, referenceInfo, options); insightsFor1M1DCombination.forEach(function (insightsPerDim) { var insightsForMeasures = insightsPerDim .filter(function (item) { return !!item; }) .map(function (item) { return (tslib_1.__assign(tslib_1.__assign({}, item), { score: item.score * (1 - impactScoreWeight) + subspaceImpact * impactScoreWeight })); }); insights.push.apply(insights, tslib_1.__spreadArray([], tslib_1.__read(insightsPerDim), false)); (0, util_1.addInsightsToHeap)(insightsForMeasures, insightsHeap); }); // Combination 3: 1M * 1M */ if ((options.insightTypes || constant_1.PATTERN_TYPES).includes('correlation')) { var extracted = extractInsightsForCorrelation(data, dimensions, measures, subspace, referenceInfo, options); var insightsForCorrelation = extracted === null || extracted === void 0 ? void 0 : extracted.map(function (item) { return (tslib_1.__assign(tslib_1.__assign({}, item), { score: item.score * (1 - impactScoreWeight) + subspaceImpact * impactScoreWeight })); }); (0, util_1.addInsightsToHeap)(insightsForCorrelation, insightsHeap); } /** extract homogeneous insight in measures */ if (options === null || options === void 0 ? void 0 : options.homogeneous) { insightsFor1M1DCombination.forEach(function (insightsPerDim, dimIndex) { var homogeneousPatternsForMeasures = (0, homogeneous_1.extractHomogeneousPatternsForMeasures)(measures, insightsPerDim); if (homogeneousPatternsForMeasures.length > 0) { var homogeneousInsights = homogeneousPatternsForMeasures.map(function (pattern) { return ({ subspace: subspace, dimensions: [{ fieldName: dimensions[dimIndex] }], measures: measures, patterns: [pattern], data: data, score: pattern.significance * (1 - impactScoreWeight) + subspaceImpact * impactScoreWeight, }); }); homogeneousInsightsHeap.addAll(homogeneousInsights); } }); } /** subspace search */ if (!(options === null || options === void 0 ? void 0 : options.ignoreSubspace)) { var searchedDimensions_1 = subspace.map(function (item) { return item.dimension; }); var remainDimensionFields_1 = (((_b = options === null || options === void 0 ? void 0 : options.dimensions) === null || _b === void 0 ? void 0 : _b.map(function (dimension) { return dimension.fieldName; })) || Object.values(fieldPropsMap) .filter(function (item) { return item.domainType === 'dimension'; }) .map(function (item) { return item.name; })).filter(function (field) { return !searchedDimensions_1.includes(field); }); if (remainDimensionFields_1.length > 0) { remainDimensionFields_1.forEach(function (dimension) { var siblingGroupInsights = []; var groupedData = (0, lodash_1.groupBy)(data, dimension); var breakdownValues = (0, lodash_1.uniq)(fieldPropsMap[dimension].rawData); var dimensionsInSubspace = remainDimensionFields_1.filter(function (item) { return item !== dimension; }); if (breakdownValues.length > 1) { breakdownValues.forEach(function (value) { var childSubspace = tslib_1.__spreadArray(tslib_1.__spreadArray([], tslib_1.__read(subspace), false), [{ dimension: dimension, value: value }], false); var subspaceInsights = extractInsightsFromSubspace(groupedData[value], dimensionsInSubspace, measures, childSubspace, referenceInfo, insightsHeap, homogeneousInsightsHeap, options); siblingGroupInsights.push(subspaceInsights); }); } /** extract homegenehous insight in sibling group */ if (options === null || options === void 0 ? void 0 : options.homogeneous) { dimensionsInSubspace.forEach(function (dim) { measures.forEach(function (measure) { var siblingGroupInsightsArr = siblingGroupInsights.map(function (siblingItem) { return (siblingItem.find(function (insight) { return (!!insight && insight.dimensions.length === 1 && insight.dimensions[0].fieldName === dim && insight.measures.length === 1 && insight.measures[0].fieldName === measure.fieldName); }) || null); }); var homogeneousPatternsForSiblingGroups = (0, homogeneous_1.extractHomogeneousPatternsForSiblingGroups)(breakdownValues, siblingGroupInsightsArr); var insightsForSiblingGroup = homogeneousPatternsForSiblingGroups.map(function (pattern) { return ({ subspace: subspace, dimensions: [{ fieldName: dimension }, { fieldName: dim }], measures: [measure], patterns: [pattern], data: data, score: pattern.significance * (1 - impactScoreWeight) + subspaceImpact * impactScoreWeight, }); }); homogeneousInsightsHeap.addAll(insightsForSiblingGroup); }); }); } }); } } return insights; } exports.extractInsightsFromSubspace = extractInsightsFromSubspace; /** insight subject enumeration in the data */ function enumerateInsights(data, dimensions, measures, referenceInfo, insightsHeap, metaInsightsHeap, options) { if (options === void 0) { options = {}; } var initSubspace = []; extractInsightsFromSubspace(data, dimensions, measures, initSubspace, referenceInfo, insightsHeap, metaInsightsHeap, options); } exports.enumerateInsights = enumerateInsights;