@formant/ava
Version:
A framework for automated visual analytics.
244 lines (243 loc) • 14.5 kB
JavaScript
;
Object.defineProperty(exports, "__esModule", { value: true });
exports.enumerateInsights = exports.extractInsightsFromSubspace = exports.extractInsightsForCorrelation = exports.extractInsightsFor1M1DCombination = void 0;
var tslib_1 = require("tslib");
var lodash_1 = require("lodash");
var constant_1 = require("../constant");
var insights_1 = require("../insights");
var aggregate_1 = require("../utils/aggregate");
var homogeneous_1 = require("../insights/extractors/homogeneous");
var preprocess_1 = require("./preprocess");
var util_1 = require("./util");
/** calculate the Impact which reflects the importance of the subject of an insight against the entire dataset */
function computeSubspaceImpact(data, subspace, impactMeasureReferences, measures) {
if (!(measures === null || measures === void 0 ? void 0 : measures.length) || !subspace)
return 1;
var impactValues = measures.map(function (measure) {
var measureValue = (0, preprocess_1.calculateImpactValue)(data, measure);
var referenceKey = "".concat(measure.fieldName, "@").concat(measure.method);
var referenceValue = impactMeasureReferences[referenceKey];
return measureValue / referenceValue;
});
return Math.max.apply(Math, tslib_1.__spreadArray([], tslib_1.__read(impactValues), false));
}
/** extract patterns from a specific subject */
function extractPatternsFromSubject(data, subjectInfo, fieldPropsMap, options) {
var measures = subjectInfo.measures, dimensions = subjectInfo.dimensions;
var enumInsightTypes = (options === null || options === void 0 ? void 0 : options.insightTypes) || constant_1.PATTERN_TYPES;
var patterns = {};
enumInsightTypes.forEach(function (insightType) {
var insightExtractorChecker = insights_1.ExtractorCheckers[insightType];
var isValid = true;
// Check whether the data requirements of the extractor are met
if (insightExtractorChecker) {
if ((0, lodash_1.isString)(insightExtractorChecker({ data: data, subjectInfo: subjectInfo, fieldPropsMap: fieldPropsMap })))
isValid = false;
}
if (isValid && insights_1.insightPatternsExtractor) {
var _a = options || {}, algorithmParameter = _a.algorithmParameter, dataProcessInfo = _a.dataProcessInfo;
var extractorOptions = {
algorithmParameter: algorithmParameter,
dataProcessInfo: dataProcessInfo,
// Validation has been done in method extractInsights
dataValidation: false,
// Select only significant insights
filterInsight: true,
};
var extractedPatterns = (0, insights_1.insightPatternsExtractor)({
data: data,
dimensions: dimensions.map(function (dim) { return ({ fieldName: dim }); }),
measures: measures,
insightType: insightType,
options: extractorOptions,
});
patterns[insightType] = extractedPatterns;
}
else {
patterns[insightType] = undefined;
}
});
return patterns;
}
function extractInsightsFor1M1DCombination(data, dimensions, measures, subspace, referenceInfo, options) {
var fieldPropsMap = referenceInfo.fieldPropsMap;
var insights = [];
dimensions.forEach(function (dimension) {
var _a, _b;
var insightsPerDim = [];
var isTimeField = (_b = (_a = fieldPropsMap[dimension]) === null || _a === void 0 ? void 0 : _a.levelOfMeasurements) === null || _b === void 0 ? void 0 : _b.includes('Time');
measures.forEach(function (measure) {
var childSubjectInfo = { dimensions: [dimension], subspace: subspace, measures: [measure] };
var aggregatedData = (0, aggregate_1.aggregate)(data, dimension, [measure], isTimeField);
var patterns = extractPatternsFromSubject(aggregatedData, childSubjectInfo, fieldPropsMap, options);
var patternsArray = (0, lodash_1.flatten)(Object.values(patterns).filter(function (item) { return (item === null || item === void 0 ? void 0 : item.length) > 0; })).sort(function (a, b) { return b.significance - a.significance; });
if (patternsArray.length) {
var insight = {
subspace: subspace,
dimensions: [
{
fieldName: dimension,
},
],
measures: [measure],
patterns: patternsArray,
data: aggregatedData,
score: patternsArray[0].significance,
};
insightsPerDim.push(insight);
}
else {
insightsPerDim.push(null);
}
});
insights.push(insightsPerDim);
});
return insights;
}
exports.extractInsightsFor1M1DCombination = extractInsightsFor1M1DCombination;
function extractInsightsForCorrelation(data, dimensions, measures, subspace, referenceInfo, options) {
var _a;
var fieldPropsMap = referenceInfo.fieldPropsMap;
var insights = [];
var measureNum = measures.length;
if (measureNum >= 2) {
for (var i = 0; i < measureNum - 1; i += 1) {
for (var j = i + 1; j < measureNum; j += 1) {
var childSubjectInfo = { dimensions: dimensions, subspace: subspace, measures: [measures[i], measures[j]] };
var patterns = extractPatternsFromSubject(data, childSubjectInfo, fieldPropsMap, tslib_1.__assign(tslib_1.__assign({}, options), { insightTypes: ['correlation'] }));
var patternsArray = (_a = patterns === null || patterns === void 0 ? void 0 : patterns.correlation) === null || _a === void 0 ? void 0 : _a.sort(function (a, b) { return b.significance - a.significance; });
if (patternsArray === null || patternsArray === void 0 ? void 0 : patternsArray.length) {
var insight = {
subspace: subspace,
dimensions: dimensions.map(function (d) { return ({ fieldName: d }); }),
measures: [measures[i], measures[j]],
patterns: patternsArray,
data: data,
score: patternsArray[0].significance,
};
insights.push(insight);
}
}
}
}
return insights;
}
exports.extractInsightsForCorrelation = extractInsightsForCorrelation;
/** recursive extraction in data subspace */
function extractInsightsFromSubspace(data, dimensions, measures, subspace, referenceInfo, insightsHeap, homogeneousInsightsHeap, options) {
var _a, _b;
/** subspace pruning */
if (!(data === null || data === void 0 ? void 0 : data.length)) {
return [];
}
// calculate impact score
var impactMeasureReferences = referenceInfo.impactMeasureReferences, fieldPropsMap = referenceInfo.fieldPropsMap;
var subspaceImpact = computeSubspaceImpact(data, subspace, impactMeasureReferences, options === null || options === void 0 ? void 0 : options.impactMeasures);
// pruning1: check the subpace impact limit
if (subspaceImpact < constant_1.INSIGHT_SCORE_BENCHMARK) {
return [];
}
// pruning2: check if the impact score is greater than the minimum score in heap
var impactScoreWeight = !!(options === null || options === void 0 ? void 0 : options.impactWeight) && options.impactWeight >= 0 && options.impactWeight < 1
? options.impactWeight
: constant_1.IMPACT_SCORE_WEIGHT;
var optimalScore = subspaceImpact * impactScoreWeight + 1 * (1 - impactScoreWeight);
if (insightsHeap.length >= insightsHeap.limit) {
var minScoreInHeap = (_a = insightsHeap.peek()) === null || _a === void 0 ? void 0 : _a.score;
if (optimalScore <= minScoreInHeap) {
return [];
}
}
/** insight extraction */
var insights = [];
/** Combination: 1M * 1D */
var insightsFor1M1DCombination = extractInsightsFor1M1DCombination(data, dimensions, measures, subspace, referenceInfo, options);
insightsFor1M1DCombination.forEach(function (insightsPerDim) {
var insightsForMeasures = insightsPerDim
.filter(function (item) { return !!item; })
.map(function (item) { return (tslib_1.__assign(tslib_1.__assign({}, item), { score: item.score * (1 - impactScoreWeight) + subspaceImpact * impactScoreWeight })); });
insights.push.apply(insights, tslib_1.__spreadArray([], tslib_1.__read(insightsPerDim), false));
(0, util_1.addInsightsToHeap)(insightsForMeasures, insightsHeap);
});
// Combination 3: 1M * 1M */
if ((options.insightTypes || constant_1.PATTERN_TYPES).includes('correlation')) {
var extracted = extractInsightsForCorrelation(data, dimensions, measures, subspace, referenceInfo, options);
var insightsForCorrelation = extracted === null || extracted === void 0 ? void 0 : extracted.map(function (item) { return (tslib_1.__assign(tslib_1.__assign({}, item), { score: item.score * (1 - impactScoreWeight) + subspaceImpact * impactScoreWeight })); });
(0, util_1.addInsightsToHeap)(insightsForCorrelation, insightsHeap);
}
/** extract homogeneous insight in measures */
if (options === null || options === void 0 ? void 0 : options.homogeneous) {
insightsFor1M1DCombination.forEach(function (insightsPerDim, dimIndex) {
var homogeneousPatternsForMeasures = (0, homogeneous_1.extractHomogeneousPatternsForMeasures)(measures, insightsPerDim);
if (homogeneousPatternsForMeasures.length > 0) {
var homogeneousInsights = homogeneousPatternsForMeasures.map(function (pattern) { return ({
subspace: subspace,
dimensions: [{ fieldName: dimensions[dimIndex] }],
measures: measures,
patterns: [pattern],
data: data,
score: pattern.significance * (1 - impactScoreWeight) + subspaceImpact * impactScoreWeight,
}); });
homogeneousInsightsHeap.addAll(homogeneousInsights);
}
});
}
/** subspace search */
if (!(options === null || options === void 0 ? void 0 : options.ignoreSubspace)) {
var searchedDimensions_1 = subspace.map(function (item) { return item.dimension; });
var remainDimensionFields_1 = (((_b = options === null || options === void 0 ? void 0 : options.dimensions) === null || _b === void 0 ? void 0 : _b.map(function (dimension) { return dimension.fieldName; })) ||
Object.values(fieldPropsMap)
.filter(function (item) { return item.domainType === 'dimension'; })
.map(function (item) { return item.name; })).filter(function (field) { return !searchedDimensions_1.includes(field); });
if (remainDimensionFields_1.length > 0) {
remainDimensionFields_1.forEach(function (dimension) {
var siblingGroupInsights = [];
var groupedData = (0, lodash_1.groupBy)(data, dimension);
var breakdownValues = (0, lodash_1.uniq)(fieldPropsMap[dimension].rawData);
var dimensionsInSubspace = remainDimensionFields_1.filter(function (item) { return item !== dimension; });
if (breakdownValues.length > 1) {
breakdownValues.forEach(function (value) {
var childSubspace = tslib_1.__spreadArray(tslib_1.__spreadArray([], tslib_1.__read(subspace), false), [{ dimension: dimension, value: value }], false);
var subspaceInsights = extractInsightsFromSubspace(groupedData[value], dimensionsInSubspace, measures, childSubspace, referenceInfo, insightsHeap, homogeneousInsightsHeap, options);
siblingGroupInsights.push(subspaceInsights);
});
}
/** extract homegenehous insight in sibling group */
if (options === null || options === void 0 ? void 0 : options.homogeneous) {
dimensionsInSubspace.forEach(function (dim) {
measures.forEach(function (measure) {
var siblingGroupInsightsArr = siblingGroupInsights.map(function (siblingItem) {
return (siblingItem.find(function (insight) {
return (!!insight &&
insight.dimensions.length === 1 &&
insight.dimensions[0].fieldName === dim &&
insight.measures.length === 1 &&
insight.measures[0].fieldName === measure.fieldName);
}) || null);
});
var homogeneousPatternsForSiblingGroups = (0, homogeneous_1.extractHomogeneousPatternsForSiblingGroups)(breakdownValues, siblingGroupInsightsArr);
var insightsForSiblingGroup = homogeneousPatternsForSiblingGroups.map(function (pattern) { return ({
subspace: subspace,
dimensions: [{ fieldName: dimension }, { fieldName: dim }],
measures: [measure],
patterns: [pattern],
data: data,
score: pattern.significance * (1 - impactScoreWeight) + subspaceImpact * impactScoreWeight,
}); });
homogeneousInsightsHeap.addAll(insightsForSiblingGroup);
});
});
}
});
}
}
return insights;
}
exports.extractInsightsFromSubspace = extractInsightsFromSubspace;
/** insight subject enumeration in the data */
function enumerateInsights(data, dimensions, measures, referenceInfo, insightsHeap, metaInsightsHeap, options) {
if (options === void 0) { options = {}; }
var initSubspace = [];
extractInsightsFromSubspace(data, dimensions, measures, initSubspace, referenceInfo, insightsHeap, metaInsightsHeap, options);
}
exports.enumerateInsights = enumerateInsights;