@visactor/vmind
Version:
<div align="center"> <a href="https://github.com/VisActor#gh-light-mode-only" target="_blank"> <img alt="VisActor Logo" width="200" src="https://github.com/VisActor/.github/blob/main/profile/logo_500_200_light.svg"/> </a> <a href="https://githu
320 lines (280 loc) • 17.3 kB
JavaScript
;
var __importDefault = this && this.__importDefault || function(mod) {
return mod && mod.__esModule ? mod : {
default: mod
};
};
Object.defineProperty(exports, "__esModule", {
value: !0
}), exports.mergeDataTable = exports.mergeClusterDataView = exports.canMergeClusterResult = exports.getSplitDataViewOfDataTable = exports.canMergeDataTable = exports.getCtxByValidColumnRatio = exports.revisedUnMatchedFieldInfo = exports.getCtxByRangeValueTranser = exports.getCtxByFilterRowWithNonEmptyValues = exports.getCtxByfilterSameDataItem = exports.getCtxBymeasureAutoTransfer = exports.sortDataTableByDate = exports.getCtxByneedNumericalFields = exports.getCtxByfilterSameValueColumn = exports.transferFieldInfo = void 0;
const string_similarity_js_1 = __importDefault(require("string-similarity-js")), field_1 = require("../../utils/field"), types_1 = require("../../types"), vutils_1 = require("@visactor/vutils"), text_1 = require("../../utils/text"), common_1 = require("../../utils/common"), cluster_1 = require("../../utils/cluster"), dayjs_1 = __importDefault(require("dayjs")), removeFieldInfoInCtx = (context, cleanFieldKey) => {
if (!cleanFieldKey.length) return context;
const {fieldInfo: fieldInfo = [], dataTable: dataTable = []} = context || {}, newFieldInfo = fieldInfo.filter((info => !cleanFieldKey.includes(info.fieldName))), fieldNameList = newFieldInfo.map((info => info.fieldName)), newDataTable = dataTable.map((dataItem => (0,
vutils_1.pick)(dataItem, fieldNameList)));
return Object.assign(Object.assign({}, context), {
fieldInfo: newFieldInfo,
dataTable: newDataTable
});
}, transferFieldInfo = (context, fieldMapping) => ((context.fieldInfo || []).forEach((info => {
var _a, _b;
info.role && info.location || (info.role = (0, field_1.getRoleByFieldType)(info.type),
info.location = info.role), (null == fieldMapping ? void 0 : fieldMapping[info.fieldName]) && (info.alias = null !== (_a = info.alias) && void 0 !== _a ? _a : null === (_b = fieldMapping[info.fieldName]) || void 0 === _b ? void 0 : _b.alias);
})), context);
exports.transferFieldInfo = transferFieldInfo;
const getCtxByfilterSameValueColumn = context => {
const {fieldInfo: fieldInfo = [], dataTable: dataTable = []} = context || {}, newContext = Object.assign({}, context);
if (dataTable.length > 1 && fieldInfo.length) {
const cleanFieldKey = [];
return fieldInfo.forEach((info => {
if (info.role === types_1.ROLE.MEASURE) return;
let shouldFilter = !0;
const prev = dataTable[0][info.fieldName];
for (let i = 1; i < dataTable.length; i++) if ((0, common_1.isValidData)(dataTable[i][info.fieldName]) && dataTable[i][info.fieldName] !== prev) {
shouldFilter = !1;
break;
}
shouldFilter && cleanFieldKey.push(info.fieldName);
})), removeFieldInfoInCtx(newContext, cleanFieldKey);
}
if (dataTable.length && fieldInfo.length) {
const cleanFieldKey = [];
return fieldInfo.forEach((info => {
info.role !== types_1.ROLE.DIMENSION || (0, common_1.isValidData)(dataTable[0][info.fieldName]) || cleanFieldKey.push(info.fieldName);
})), removeFieldInfoInCtx(newContext, cleanFieldKey);
}
return newContext;
};
exports.getCtxByfilterSameValueColumn = getCtxByfilterSameValueColumn;
const getCtxByneedNumericalFields = context => -1 === context.fieldInfo.findIndex((info => info.role === types_1.ROLE.MEASURE)) ? Object.assign(Object.assign({}, context), {
dataTable: [],
fieldInfo: []
}) : context;
exports.getCtxByneedNumericalFields = getCtxByneedNumericalFields;
const sortDataTableByDate = context => {
const {dataTable: dataTable, fieldInfo: fieldInfo} = context, dateField = fieldInfo.find((info => info.role === types_1.ROLE.DIMENSION && info.type === types_1.DataType.DATE));
return dateField && dataTable.sort(((a, b) => {
const dateA = (0, dayjs_1.default)((0, common_1.convertStringToDateValue)(`${a[dateField.fieldName]}`)), dateB = (0,
dayjs_1.default)((0, common_1.convertStringToDateValue)(`${b[dateField.fieldName]}`));
return dateA.isValid() && dateB.isValid() ? dateA.isBefore(dateB) ? -1 : 1 : 0;
})), context;
};
exports.sortDataTableByDate = sortDataTableByDate;
const getCtxBymeasureAutoTransfer = (context, text) => {
const {fieldInfo: fieldInfo = [], dataTable: dataTable = []} = context || {}, isStringText = text && "string" == typeof text;
return dataTable.length >= 1 && fieldInfo.length && fieldInfo.forEach((info => {
if (info.role !== types_1.ROLE.DIMENSION) for (let i = 0; i < dataTable.length; i++) {
let value = dataTable[i][info.fieldName];
if ("string" == typeof dataTable[i][info.fieldName] && isNaN(Number(value))) {
const extractionValue = `${(0, text_1.extractFirstNumberInString)(value)}`, beforeLen = value.length, curLen = extractionValue.length;
dataTable[i][info.fieldName] = "null" !== extractionValue && (curLen / beforeLen > .9 || beforeLen - curLen <= 2) ? Number(extractionValue) : null;
} else "string" == typeof dataTable[i][info.fieldName] ? (value = Number(value),
dataTable[i][info.fieldName] = value) : (0, vutils_1.isNumber)(value) || (value = null,
dataTable[i][info.fieldName] = null);
if (value = dataTable[i][info.fieldName], info.type === types_1.DataType.RATIO && (0,
vutils_1.isNumber)(value)) {
if (isStringText) {
const ratioValue = 100 * value;
!text.includes(`${ratioValue}%`) && !text.includes(`${value}倍`) || text.includes(`${value}`) || (dataTable[i][info.fieldName] = ratioValue,
value = ratioValue);
}
"%" === (null == info ? void 0 : info.ratioGranularity) ? dataTable[i][info.fieldName] = value / 100 : "‰" === (null == info ? void 0 : info.ratioGranularity) && (dataTable[i][info.fieldName] = value / 1e3);
}
}
})), context;
};
exports.getCtxBymeasureAutoTransfer = getCtxBymeasureAutoTransfer;
const getCtxByfilterSameDataItem = context => {
const {dataTable: dataTable = [], fieldInfo: fieldInfo} = context || {};
let newDataTable = (0, common_1.uniqBy)(dataTable, (item => JSON.stringify(item)));
return 1 === fieldInfo.length && (newDataTable = newDataTable.length > 0 ? [ newDataTable[0] ] : newDataTable),
Object.assign(Object.assign({}, context), {
dataTable: newDataTable
});
};
exports.getCtxByfilterSameDataItem = getCtxByfilterSameDataItem;
const isDataItemWithNonEmptyValues = (dataItem, fieldInfo) => fieldInfo.some((info => (0,
common_1.isValidData)(dataItem[info.fieldName]))), getCtxByFilterRowWithNonEmptyValues = context => {
const {dataTable: dataTable = [], fieldInfo: fieldInfo} = context || {}, measureFieldInfo = fieldInfo.filter((info => info.role === types_1.ROLE.MEASURE));
return Object.assign(Object.assign({}, context), {
dataTable: dataTable.filter((item => isDataItemWithNonEmptyValues(item, measureFieldInfo)))
});
};
exports.getCtxByFilterRowWithNonEmptyValues = getCtxByFilterRowWithNonEmptyValues;
const transferRangeData = (cell, type) => {
const validCell = cell.filter((v => (0, common_1.isValidData)(v)));
switch (type) {
case "avg":
return (0, common_1.average)(validCell);
case "filter":
return null;
case "max":
return Math.max(...validCell);
case "min":
return Math.min(...validCell);
case "first":
return validCell[0];
case "last":
return validCell[validCell.length - 1];
default:
return validCell.join("-");
}
}, getCtxByRangeValueTranser = (context, type) => {
const {dataTable: dataTable = [], fieldInfo: fieldInfo} = context || {};
return Object.assign(Object.assign({}, context), {
dataTable: dataTable.map((item => {
const newItem = Object.assign({}, item);
return fieldInfo.forEach((info => {
info.role === types_1.ROLE.MEASURE && !(0, vutils_1.isString)(item[info.fieldName]) && (0,
vutils_1.isArray)(item[info.fieldName]) && (newItem[info.fieldName] = transferRangeData(item[info.fieldName], type));
})), newItem;
}))
});
};
exports.getCtxByRangeValueTranser = getCtxByRangeValueTranser;
const revisedUnMatchedFieldInfo = context => {
const {dataTable: dataTable, fieldInfo: fieldInfo} = context, dataTableFieldSet = new Set;
dataTable.forEach((item => {
Object.keys(item).forEach((key => dataTableFieldSet.add(key)));
}));
const fieldInfoMapping = {};
fieldInfo.forEach((info => {
fieldInfoMapping[info.fieldName] = info;
}));
const fieldNameSet = new Set(Object.keys(fieldInfoMapping)), intersectionName = dataTableFieldSet.intersection(fieldNameSet);
if (intersectionName.size !== dataTableFieldSet.size) {
const dataTableUnMatch = dataTableFieldSet.difference(intersectionName), fieldNameUnMatch = fieldNameSet.difference(intersectionName);
if (dataTableUnMatch.size !== fieldNameUnMatch.size) return context;
fieldNameUnMatch.forEach((name => {
const candidateList = [ ...dataTableUnMatch ];
let min = -1, matchedName = candidateList[0];
candidateList.forEach((v => {
const score = (0, string_similarity_js_1.default)(name, v);
score > min && (min = score, matchedName = v);
})), fieldInfoMapping[name].fieldName = matchedName, fieldInfoMapping[name].alias = name,
dataTableUnMatch.delete(matchedName);
}));
}
return context;
};
exports.revisedUnMatchedFieldInfo = revisedUnMatchedFieldInfo;
const getCtxByValidColumnRatio = (context, ratio = .2) => {
const {dataTable: dataTable = [], fieldInfo: fieldInfo} = context || {};
let maxCount = 0;
const invalidFieldInfo = fieldInfo.filter((info => info.role === types_1.ROLE.MEASURE)).map((info => {
const validCount = dataTable.map((item => item[info.fieldName])).filter((item => (0,
common_1.isValidData)(item))).length;
return maxCount = Math.max(validCount, maxCount), {
fieldName: info.fieldName,
validCount: validCount
};
})).filter((info => !info.validCount || info.validCount / maxCount <= ratio));
return removeFieldInfoInCtx(context, invalidFieldInfo.map((v => v.fieldName)));
};
exports.getCtxByValidColumnRatio = getCtxByValidColumnRatio;
const canMergeDataTable = (ctxA, ctxB) => {
const {fieldInfo: fieldInfoA = [], summary: summaryA} = ctxA || {}, {fieldInfo: fieldInfoB = [], summary: summaryB} = ctxB || {};
return !!(fieldInfoA.length === fieldInfoB.length && fieldInfoA.length && summaryA && summaryB) && fieldInfoA.every((item => fieldInfoB.find((itemB => itemB.fieldName === item.fieldName && itemB.type === item.type && (null == itemB ? void 0 : itemB.unit) === (null == item ? void 0 : item.unit) && (null == itemB ? void 0 : itemB.ratioGranularity) === (null == item ? void 0 : item.ratioGranularity) && (null == itemB ? void 0 : itemB.alias) === (null == item ? void 0 : item.alias)))));
};
exports.canMergeDataTable = canMergeDataTable;
const getSplitDataViewOfDataTable = (context, threshold = .4) => {
const {dataTable: dataTable = [], fieldInfo: fieldInfo} = context || {}, measureFieldInfo = fieldInfo.filter((info => info.role === types_1.ROLE.MEASURE)), dimensionFieldInfo = fieldInfo.filter((info => info.role === types_1.ROLE.DIMENSION)), clusterDataItem = [];
measureFieldInfo.forEach(((item, index) => {
clusterDataItem.push({
id: item.fieldName,
value: dataTable.map((data => (0, common_1.isValidData)(data[item.fieldName]) ? 1 : 0))
});
}));
const {clusters: clusters} = (0, cluster_1.agglomerativeHierarchicalClustering)(clusterDataItem, threshold);
if (!clusters.length) return context;
const dataViewList = [];
return clusters.forEach((cluster => {
const clusterIds = cluster.children.map((v => v.id)), clusterFieldInfo = [ ...dimensionFieldInfo, ...measureFieldInfo.filter((info => clusterIds.includes(info.fieldName))) ], clusterFieldIds = clusterFieldInfo.map((v => v.fieldName));
let newContext = {
dataTable: dataTable.map((v => (0, vutils_1.pick)(v, clusterFieldIds))),
fieldInfo: clusterFieldInfo
};
[ exports.getCtxByFilterRowWithNonEmptyValues, exports.getCtxByfilterSameDataItem, exports.getCtxByfilterSameValueColumn ].forEach((func => {
newContext = func(newContext);
}));
let validCellCount = 0, validMeasureCellCount = 0;
newContext.dataTable.forEach((item => {
newContext.fieldInfo.forEach((info => {
const isValid = (0, common_1.isValidData)(item[info.fieldName]);
validCellCount += isValid ? 1 : -1, validMeasureCellCount += isValid && info.role === types_1.ROLE.MEASURE ? 1 : 0;
}));
}));
const dataView = {
fieldInfo: newContext.fieldInfo,
dataTable: newContext.dataTable,
validColumnLength: clusterIds.length,
validRowLength: newContext.dataTable.length,
validMeasureCellCount: validMeasureCellCount,
validCellCount: validCellCount
};
validCellCount > 0 && dataViewList.push(dataView);
})), dataViewList.sort(((a, b) => a.validCellCount < b.validCellCount || a.validCellCount === b.validCellCount && a.validRowLength < b.validRowLength || a.validCellCount === b.validCellCount && a.validRowLength === b.validRowLength && a.validColumnLength < b.validColumnLength ? 1 : -1)),
0 === dataViewList.length ? context : Object.assign(Object.assign({}, context), {
originDataTable: dataTable,
fieldInfo: dataViewList[0].fieldInfo,
dataTable: dataViewList[0].dataTable,
clusterResult: dataViewList
});
};
exports.getSplitDataViewOfDataTable = getSplitDataViewOfDataTable;
const canMergeClusterResult = clusterResult => !!clusterResult.length && clusterResult.every((dataView => {
const {fieldInfo: fieldInfo, dataTable: dataTable} = dataView;
return 1 === dataTable.length && -1 === fieldInfo.findIndex((info => [ types_1.DataType.DATE, types_1.DataType.TIME ].includes(info.type)));
}));
exports.canMergeClusterResult = canMergeClusterResult;
const mergeClusterDataView = clusterResult => {
const newFieldInfo = [], newDataTable = [ {} ];
return clusterResult.forEach((dataView => {
const {fieldInfo: fieldInfo, dataTable: dataTable} = dataView, measureFields = fieldInfo.filter((info => info.role === types_1.ROLE.MEASURE));
newFieldInfo.push(...measureFields), measureFields.forEach((field => {
newDataTable[0][field.fieldName] = dataTable[0][field.fieldName];
}));
})), {
fieldInfo: newFieldInfo,
dataTable: newDataTable
};
};
exports.mergeClusterDataView = mergeClusterDataView;
const isSameFields = (a, b) => a.length === b.length && a.every(((info, index) => {
const matchInB = b.find((item => item.fieldName === info.fieldName));
return matchInB && matchInB.role === info.role && matchInB.type === info.type && matchInB.unit === info.unit && matchInB.dateGranularity === info.dateGranularity;
}));
function longestCommonSubstringAtEdges(a, b) {
let startLen = 0;
for (;startLen < a.length && startLen < b.length && a[startLen] === b[startLen]; ) startLen++;
let endLen = 0;
for (;endLen < a.length && endLen < b.length && a[a.length - 1 - endLen] === b[b.length - 1 - endLen]; ) endLen++;
return startLen >= endLen ? {
strA: a.substring(startLen, a.length),
strB: b.substring(startLen, b.length),
commonStr: a.substring(0, startLen)
} : {
strA: a.substring(0, a.length - endLen),
strB: b.substring(0, b.length - endLen),
commonStr: a.substring(endLen, a.length)
};
}
const mergeDataTable = (ctxA, ctxB) => {
const {dataTable: tableA, summary: summaryA, textRange: rangeA} = ctxA, {dataTable: tableB, summary: summaryB, textRange: rangeB} = ctxB, {strA: strA, strB: strB, commonStr: commonStr} = longestCommonSubstringAtEdges(summaryA, summaryB), newFieldInfo = {
fieldName: commonStr,
description: `${summaryA} and ${summaryB}`,
role: types_1.ROLE.DIMENSION,
type: types_1.DataType.STRING
}, newDataTable = [ ...tableA.map((v => Object.assign(Object.assign({}, v), {
[commonStr]: strA
}))), ...tableB.map((v => Object.assign(Object.assign({}, v), {
[commonStr]: strB
}))) ], textRange = rangeA && rangeB ? [ rangeA[0], rangeB[1] ] : null;
return {
dataTable: newDataTable,
fieldInfo: [ newFieldInfo, ...ctxA.fieldInfo ],
summary: `${summaryA} and ${summaryB}`,
textRange: textRange
};
};
exports.mergeDataTable = mergeDataTable;
//# sourceMappingURL=utils.js.map