UNPKG

@visactor/vmind

Version:

<div align="center"> <a href="https://github.com/VisActor#gh-light-mode-only" target="_blank"> <img alt="VisActor Logo" width="200" src="https://github.com/VisActor/.github/blob/main/profile/logo_500_200_light.svg"/> </a> <a href="https://githu

320 lines (280 loc) 17.3 kB
"use strict"; var __importDefault = this && this.__importDefault || function(mod) { return mod && mod.__esModule ? mod : { default: mod }; }; Object.defineProperty(exports, "__esModule", { value: !0 }), exports.mergeDataTable = exports.mergeClusterDataView = exports.canMergeClusterResult = exports.getSplitDataViewOfDataTable = exports.canMergeDataTable = exports.getCtxByValidColumnRatio = exports.revisedUnMatchedFieldInfo = exports.getCtxByRangeValueTranser = exports.getCtxByFilterRowWithNonEmptyValues = exports.getCtxByfilterSameDataItem = exports.getCtxBymeasureAutoTransfer = exports.sortDataTableByDate = exports.getCtxByneedNumericalFields = exports.getCtxByfilterSameValueColumn = exports.transferFieldInfo = void 0; const string_similarity_js_1 = __importDefault(require("string-similarity-js")), field_1 = require("../../utils/field"), types_1 = require("../../types"), vutils_1 = require("@visactor/vutils"), text_1 = require("../../utils/text"), common_1 = require("../../utils/common"), cluster_1 = require("../../utils/cluster"), dayjs_1 = __importDefault(require("dayjs")), removeFieldInfoInCtx = (context, cleanFieldKey) => { if (!cleanFieldKey.length) return context; const {fieldInfo: fieldInfo = [], dataTable: dataTable = []} = context || {}, newFieldInfo = fieldInfo.filter((info => !cleanFieldKey.includes(info.fieldName))), fieldNameList = newFieldInfo.map((info => info.fieldName)), newDataTable = dataTable.map((dataItem => (0, vutils_1.pick)(dataItem, fieldNameList))); return Object.assign(Object.assign({}, context), { fieldInfo: newFieldInfo, dataTable: newDataTable }); }, transferFieldInfo = (context, fieldMapping) => ((context.fieldInfo || []).forEach((info => { var _a, _b; info.role && info.location || (info.role = (0, field_1.getRoleByFieldType)(info.type), info.location = info.role), (null == fieldMapping ? void 0 : fieldMapping[info.fieldName]) && (info.alias = null !== (_a = info.alias) && void 0 !== _a ? _a : null === (_b = fieldMapping[info.fieldName]) || void 0 === _b ? void 0 : _b.alias); })), context); exports.transferFieldInfo = transferFieldInfo; const getCtxByfilterSameValueColumn = context => { const {fieldInfo: fieldInfo = [], dataTable: dataTable = []} = context || {}, newContext = Object.assign({}, context); if (dataTable.length > 1 && fieldInfo.length) { const cleanFieldKey = []; return fieldInfo.forEach((info => { if (info.role === types_1.ROLE.MEASURE) return; let shouldFilter = !0; const prev = dataTable[0][info.fieldName]; for (let i = 1; i < dataTable.length; i++) if ((0, common_1.isValidData)(dataTable[i][info.fieldName]) && dataTable[i][info.fieldName] !== prev) { shouldFilter = !1; break; } shouldFilter && cleanFieldKey.push(info.fieldName); })), removeFieldInfoInCtx(newContext, cleanFieldKey); } if (dataTable.length && fieldInfo.length) { const cleanFieldKey = []; return fieldInfo.forEach((info => { info.role !== types_1.ROLE.DIMENSION || (0, common_1.isValidData)(dataTable[0][info.fieldName]) || cleanFieldKey.push(info.fieldName); })), removeFieldInfoInCtx(newContext, cleanFieldKey); } return newContext; }; exports.getCtxByfilterSameValueColumn = getCtxByfilterSameValueColumn; const getCtxByneedNumericalFields = context => -1 === context.fieldInfo.findIndex((info => info.role === types_1.ROLE.MEASURE)) ? Object.assign(Object.assign({}, context), { dataTable: [], fieldInfo: [] }) : context; exports.getCtxByneedNumericalFields = getCtxByneedNumericalFields; const sortDataTableByDate = context => { const {dataTable: dataTable, fieldInfo: fieldInfo} = context, dateField = fieldInfo.find((info => info.role === types_1.ROLE.DIMENSION && info.type === types_1.DataType.DATE)); return dateField && dataTable.sort(((a, b) => { const dateA = (0, dayjs_1.default)((0, common_1.convertStringToDateValue)(`${a[dateField.fieldName]}`)), dateB = (0, dayjs_1.default)((0, common_1.convertStringToDateValue)(`${b[dateField.fieldName]}`)); return dateA.isValid() && dateB.isValid() ? dateA.isBefore(dateB) ? -1 : 1 : 0; })), context; }; exports.sortDataTableByDate = sortDataTableByDate; const getCtxBymeasureAutoTransfer = (context, text) => { const {fieldInfo: fieldInfo = [], dataTable: dataTable = []} = context || {}, isStringText = text && "string" == typeof text; return dataTable.length >= 1 && fieldInfo.length && fieldInfo.forEach((info => { if (info.role !== types_1.ROLE.DIMENSION) for (let i = 0; i < dataTable.length; i++) { let value = dataTable[i][info.fieldName]; if ("string" == typeof dataTable[i][info.fieldName] && isNaN(Number(value))) { const extractionValue = `${(0, text_1.extractFirstNumberInString)(value)}`, beforeLen = value.length, curLen = extractionValue.length; dataTable[i][info.fieldName] = "null" !== extractionValue && (curLen / beforeLen > .9 || beforeLen - curLen <= 2) ? Number(extractionValue) : null; } else "string" == typeof dataTable[i][info.fieldName] ? (value = Number(value), dataTable[i][info.fieldName] = value) : (0, vutils_1.isNumber)(value) || (value = null, dataTable[i][info.fieldName] = null); if (value = dataTable[i][info.fieldName], info.type === types_1.DataType.RATIO && (0, vutils_1.isNumber)(value)) { if (isStringText) { const ratioValue = 100 * value; !text.includes(`${ratioValue}%`) && !text.includes(`${value}倍`) || text.includes(`${value}`) || (dataTable[i][info.fieldName] = ratioValue, value = ratioValue); } "%" === (null == info ? void 0 : info.ratioGranularity) ? dataTable[i][info.fieldName] = value / 100 : "‰" === (null == info ? void 0 : info.ratioGranularity) && (dataTable[i][info.fieldName] = value / 1e3); } } })), context; }; exports.getCtxBymeasureAutoTransfer = getCtxBymeasureAutoTransfer; const getCtxByfilterSameDataItem = context => { const {dataTable: dataTable = [], fieldInfo: fieldInfo} = context || {}; let newDataTable = (0, common_1.uniqBy)(dataTable, (item => JSON.stringify(item))); return 1 === fieldInfo.length && (newDataTable = newDataTable.length > 0 ? [ newDataTable[0] ] : newDataTable), Object.assign(Object.assign({}, context), { dataTable: newDataTable }); }; exports.getCtxByfilterSameDataItem = getCtxByfilterSameDataItem; const isDataItemWithNonEmptyValues = (dataItem, fieldInfo) => fieldInfo.some((info => (0, common_1.isValidData)(dataItem[info.fieldName]))), getCtxByFilterRowWithNonEmptyValues = context => { const {dataTable: dataTable = [], fieldInfo: fieldInfo} = context || {}, measureFieldInfo = fieldInfo.filter((info => info.role === types_1.ROLE.MEASURE)); return Object.assign(Object.assign({}, context), { dataTable: dataTable.filter((item => isDataItemWithNonEmptyValues(item, measureFieldInfo))) }); }; exports.getCtxByFilterRowWithNonEmptyValues = getCtxByFilterRowWithNonEmptyValues; const transferRangeData = (cell, type) => { const validCell = cell.filter((v => (0, common_1.isValidData)(v))); switch (type) { case "avg": return (0, common_1.average)(validCell); case "filter": return null; case "max": return Math.max(...validCell); case "min": return Math.min(...validCell); case "first": return validCell[0]; case "last": return validCell[validCell.length - 1]; default: return validCell.join("-"); } }, getCtxByRangeValueTranser = (context, type) => { const {dataTable: dataTable = [], fieldInfo: fieldInfo} = context || {}; return Object.assign(Object.assign({}, context), { dataTable: dataTable.map((item => { const newItem = Object.assign({}, item); return fieldInfo.forEach((info => { info.role === types_1.ROLE.MEASURE && !(0, vutils_1.isString)(item[info.fieldName]) && (0, vutils_1.isArray)(item[info.fieldName]) && (newItem[info.fieldName] = transferRangeData(item[info.fieldName], type)); })), newItem; })) }); }; exports.getCtxByRangeValueTranser = getCtxByRangeValueTranser; const revisedUnMatchedFieldInfo = context => { const {dataTable: dataTable, fieldInfo: fieldInfo} = context, dataTableFieldSet = new Set; dataTable.forEach((item => { Object.keys(item).forEach((key => dataTableFieldSet.add(key))); })); const fieldInfoMapping = {}; fieldInfo.forEach((info => { fieldInfoMapping[info.fieldName] = info; })); const fieldNameSet = new Set(Object.keys(fieldInfoMapping)), intersectionName = dataTableFieldSet.intersection(fieldNameSet); if (intersectionName.size !== dataTableFieldSet.size) { const dataTableUnMatch = dataTableFieldSet.difference(intersectionName), fieldNameUnMatch = fieldNameSet.difference(intersectionName); if (dataTableUnMatch.size !== fieldNameUnMatch.size) return context; fieldNameUnMatch.forEach((name => { const candidateList = [ ...dataTableUnMatch ]; let min = -1, matchedName = candidateList[0]; candidateList.forEach((v => { const score = (0, string_similarity_js_1.default)(name, v); score > min && (min = score, matchedName = v); })), fieldInfoMapping[name].fieldName = matchedName, fieldInfoMapping[name].alias = name, dataTableUnMatch.delete(matchedName); })); } return context; }; exports.revisedUnMatchedFieldInfo = revisedUnMatchedFieldInfo; const getCtxByValidColumnRatio = (context, ratio = .2) => { const {dataTable: dataTable = [], fieldInfo: fieldInfo} = context || {}; let maxCount = 0; const invalidFieldInfo = fieldInfo.filter((info => info.role === types_1.ROLE.MEASURE)).map((info => { const validCount = dataTable.map((item => item[info.fieldName])).filter((item => (0, common_1.isValidData)(item))).length; return maxCount = Math.max(validCount, maxCount), { fieldName: info.fieldName, validCount: validCount }; })).filter((info => !info.validCount || info.validCount / maxCount <= ratio)); return removeFieldInfoInCtx(context, invalidFieldInfo.map((v => v.fieldName))); }; exports.getCtxByValidColumnRatio = getCtxByValidColumnRatio; const canMergeDataTable = (ctxA, ctxB) => { const {fieldInfo: fieldInfoA = [], summary: summaryA} = ctxA || {}, {fieldInfo: fieldInfoB = [], summary: summaryB} = ctxB || {}; return !!(fieldInfoA.length === fieldInfoB.length && fieldInfoA.length && summaryA && summaryB) && fieldInfoA.every((item => fieldInfoB.find((itemB => itemB.fieldName === item.fieldName && itemB.type === item.type && (null == itemB ? void 0 : itemB.unit) === (null == item ? void 0 : item.unit) && (null == itemB ? void 0 : itemB.ratioGranularity) === (null == item ? void 0 : item.ratioGranularity) && (null == itemB ? void 0 : itemB.alias) === (null == item ? void 0 : item.alias))))); }; exports.canMergeDataTable = canMergeDataTable; const getSplitDataViewOfDataTable = (context, threshold = .4) => { const {dataTable: dataTable = [], fieldInfo: fieldInfo} = context || {}, measureFieldInfo = fieldInfo.filter((info => info.role === types_1.ROLE.MEASURE)), dimensionFieldInfo = fieldInfo.filter((info => info.role === types_1.ROLE.DIMENSION)), clusterDataItem = []; measureFieldInfo.forEach(((item, index) => { clusterDataItem.push({ id: item.fieldName, value: dataTable.map((data => (0, common_1.isValidData)(data[item.fieldName]) ? 1 : 0)) }); })); const {clusters: clusters} = (0, cluster_1.agglomerativeHierarchicalClustering)(clusterDataItem, threshold); if (!clusters.length) return context; const dataViewList = []; return clusters.forEach((cluster => { const clusterIds = cluster.children.map((v => v.id)), clusterFieldInfo = [ ...dimensionFieldInfo, ...measureFieldInfo.filter((info => clusterIds.includes(info.fieldName))) ], clusterFieldIds = clusterFieldInfo.map((v => v.fieldName)); let newContext = { dataTable: dataTable.map((v => (0, vutils_1.pick)(v, clusterFieldIds))), fieldInfo: clusterFieldInfo }; [ exports.getCtxByFilterRowWithNonEmptyValues, exports.getCtxByfilterSameDataItem, exports.getCtxByfilterSameValueColumn ].forEach((func => { newContext = func(newContext); })); let validCellCount = 0, validMeasureCellCount = 0; newContext.dataTable.forEach((item => { newContext.fieldInfo.forEach((info => { const isValid = (0, common_1.isValidData)(item[info.fieldName]); validCellCount += isValid ? 1 : -1, validMeasureCellCount += isValid && info.role === types_1.ROLE.MEASURE ? 1 : 0; })); })); const dataView = { fieldInfo: newContext.fieldInfo, dataTable: newContext.dataTable, validColumnLength: clusterIds.length, validRowLength: newContext.dataTable.length, validMeasureCellCount: validMeasureCellCount, validCellCount: validCellCount }; validCellCount > 0 && dataViewList.push(dataView); })), dataViewList.sort(((a, b) => a.validCellCount < b.validCellCount || a.validCellCount === b.validCellCount && a.validRowLength < b.validRowLength || a.validCellCount === b.validCellCount && a.validRowLength === b.validRowLength && a.validColumnLength < b.validColumnLength ? 1 : -1)), 0 === dataViewList.length ? context : Object.assign(Object.assign({}, context), { originDataTable: dataTable, fieldInfo: dataViewList[0].fieldInfo, dataTable: dataViewList[0].dataTable, clusterResult: dataViewList }); }; exports.getSplitDataViewOfDataTable = getSplitDataViewOfDataTable; const canMergeClusterResult = clusterResult => !!clusterResult.length && clusterResult.every((dataView => { const {fieldInfo: fieldInfo, dataTable: dataTable} = dataView; return 1 === dataTable.length && -1 === fieldInfo.findIndex((info => [ types_1.DataType.DATE, types_1.DataType.TIME ].includes(info.type))); })); exports.canMergeClusterResult = canMergeClusterResult; const mergeClusterDataView = clusterResult => { const newFieldInfo = [], newDataTable = [ {} ]; return clusterResult.forEach((dataView => { const {fieldInfo: fieldInfo, dataTable: dataTable} = dataView, measureFields = fieldInfo.filter((info => info.role === types_1.ROLE.MEASURE)); newFieldInfo.push(...measureFields), measureFields.forEach((field => { newDataTable[0][field.fieldName] = dataTable[0][field.fieldName]; })); })), { fieldInfo: newFieldInfo, dataTable: newDataTable }; }; exports.mergeClusterDataView = mergeClusterDataView; const isSameFields = (a, b) => a.length === b.length && a.every(((info, index) => { const matchInB = b.find((item => item.fieldName === info.fieldName)); return matchInB && matchInB.role === info.role && matchInB.type === info.type && matchInB.unit === info.unit && matchInB.dateGranularity === info.dateGranularity; })); function longestCommonSubstringAtEdges(a, b) { let startLen = 0; for (;startLen < a.length && startLen < b.length && a[startLen] === b[startLen]; ) startLen++; let endLen = 0; for (;endLen < a.length && endLen < b.length && a[a.length - 1 - endLen] === b[b.length - 1 - endLen]; ) endLen++; return startLen >= endLen ? { strA: a.substring(startLen, a.length), strB: b.substring(startLen, b.length), commonStr: a.substring(0, startLen) } : { strA: a.substring(0, a.length - endLen), strB: b.substring(0, b.length - endLen), commonStr: a.substring(endLen, a.length) }; } const mergeDataTable = (ctxA, ctxB) => { const {dataTable: tableA, summary: summaryA, textRange: rangeA} = ctxA, {dataTable: tableB, summary: summaryB, textRange: rangeB} = ctxB, {strA: strA, strB: strB, commonStr: commonStr} = longestCommonSubstringAtEdges(summaryA, summaryB), newFieldInfo = { fieldName: commonStr, description: `${summaryA} and ${summaryB}`, role: types_1.ROLE.DIMENSION, type: types_1.DataType.STRING }, newDataTable = [ ...tableA.map((v => Object.assign(Object.assign({}, v), { [commonStr]: strA }))), ...tableB.map((v => Object.assign(Object.assign({}, v), { [commonStr]: strB }))) ], textRange = rangeA && rangeB ? [ rangeA[0], rangeB[1] ] : null; return { dataTable: newDataTable, fieldInfo: [ newFieldInfo, ...ctxA.fieldInfo ], summary: `${summaryA} and ${summaryB}`, textRange: textRange }; }; exports.mergeDataTable = mergeDataTable; //# sourceMappingURL=utils.js.map