UNPKG

@visactor/vmind

Version:

<div align="center"> <a href="https://github.com/VisActor#gh-light-mode-only" target="_blank"> <img alt="VisActor Logo" width="200" src="https://github.com/VisActor/.github/blob/main/profile/logo_500_200_light.svg"/> </a> <a href="https://githu

293 lines (260 loc) 15.3 kB
import stringSimilarity from "string-similarity-js"; import { getRoleByFieldType } from "../../utils/field"; import { DataType, ROLE } from "../../types"; import { isArray, isNumber, isString, pick } from "@visactor/vutils"; import { extractFirstNumberInString } from "../../utils/text"; import { isValidData, uniqBy, average, convertStringToDateValue } from "../../utils/common"; import { agglomerativeHierarchicalClustering } from "../../utils/cluster"; import dayjs from "dayjs"; const removeFieldInfoInCtx = (context, cleanFieldKey) => { if (!cleanFieldKey.length) return context; const {fieldInfo: fieldInfo = [], dataTable: dataTable = []} = context || {}, newFieldInfo = fieldInfo.filter((info => !cleanFieldKey.includes(info.fieldName))), fieldNameList = newFieldInfo.map((info => info.fieldName)), newDataTable = dataTable.map((dataItem => pick(dataItem, fieldNameList))); return Object.assign(Object.assign({}, context), { fieldInfo: newFieldInfo, dataTable: newDataTable }); }; export const transferFieldInfo = (context, fieldMapping) => ((context.fieldInfo || []).forEach((info => { var _a, _b; info.role && info.location || (info.role = getRoleByFieldType(info.type), info.location = info.role), (null == fieldMapping ? void 0 : fieldMapping[info.fieldName]) && (info.alias = null !== (_a = info.alias) && void 0 !== _a ? _a : null === (_b = fieldMapping[info.fieldName]) || void 0 === _b ? void 0 : _b.alias); })), context); export const getCtxByfilterSameValueColumn = context => { const {fieldInfo: fieldInfo = [], dataTable: dataTable = []} = context || {}, newContext = Object.assign({}, context); if (dataTable.length > 1 && fieldInfo.length) { const cleanFieldKey = []; return fieldInfo.forEach((info => { if (info.role === ROLE.MEASURE) return; let shouldFilter = !0; const prev = dataTable[0][info.fieldName]; for (let i = 1; i < dataTable.length; i++) if (isValidData(dataTable[i][info.fieldName]) && dataTable[i][info.fieldName] !== prev) { shouldFilter = !1; break; } shouldFilter && cleanFieldKey.push(info.fieldName); })), removeFieldInfoInCtx(newContext, cleanFieldKey); } if (dataTable.length && fieldInfo.length) { const cleanFieldKey = []; return fieldInfo.forEach((info => { info.role !== ROLE.DIMENSION || isValidData(dataTable[0][info.fieldName]) || cleanFieldKey.push(info.fieldName); })), removeFieldInfoInCtx(newContext, cleanFieldKey); } return newContext; }; export const getCtxByneedNumericalFields = context => -1 === context.fieldInfo.findIndex((info => info.role === ROLE.MEASURE)) ? Object.assign(Object.assign({}, context), { dataTable: [], fieldInfo: [] }) : context; export const sortDataTableByDate = context => { const {dataTable: dataTable, fieldInfo: fieldInfo} = context, dateField = fieldInfo.find((info => info.role === ROLE.DIMENSION && info.type === DataType.DATE)); return dateField && dataTable.sort(((a, b) => { const dateA = dayjs(convertStringToDateValue(`${a[dateField.fieldName]}`)), dateB = dayjs(convertStringToDateValue(`${b[dateField.fieldName]}`)); return dateA.isValid() && dateB.isValid() ? dateA.isBefore(dateB) ? -1 : 1 : 0; })), context; }; export const getCtxBymeasureAutoTransfer = (context, text) => { const {fieldInfo: fieldInfo = [], dataTable: dataTable = []} = context || {}, isStringText = text && "string" == typeof text; return dataTable.length >= 1 && fieldInfo.length && fieldInfo.forEach((info => { if (info.role !== ROLE.DIMENSION) for (let i = 0; i < dataTable.length; i++) { let value = dataTable[i][info.fieldName]; if ("string" == typeof dataTable[i][info.fieldName] && isNaN(Number(value))) { const extractionValue = `${extractFirstNumberInString(value)}`, beforeLen = value.length, curLen = extractionValue.length; dataTable[i][info.fieldName] = "null" !== extractionValue && (curLen / beforeLen > .9 || beforeLen - curLen <= 2) ? Number(extractionValue) : null; } else "string" == typeof dataTable[i][info.fieldName] ? (value = Number(value), dataTable[i][info.fieldName] = value) : isNumber(value) || (value = null, dataTable[i][info.fieldName] = null); if (value = dataTable[i][info.fieldName], info.type === DataType.RATIO && isNumber(value)) { if (isStringText) { const ratioValue = 100 * value; !text.includes(`${ratioValue}%`) && !text.includes(`${value}倍`) || text.includes(`${value}`) || (dataTable[i][info.fieldName] = ratioValue, value = ratioValue); } "%" === (null == info ? void 0 : info.ratioGranularity) ? dataTable[i][info.fieldName] = value / 100 : "‰" === (null == info ? void 0 : info.ratioGranularity) && (dataTable[i][info.fieldName] = value / 1e3); } } })), context; }; export const getCtxByfilterSameDataItem = context => { const {dataTable: dataTable = [], fieldInfo: fieldInfo} = context || {}; let newDataTable = uniqBy(dataTable, (item => JSON.stringify(item))); return 1 === fieldInfo.length && (newDataTable = newDataTable.length > 0 ? [ newDataTable[0] ] : newDataTable), Object.assign(Object.assign({}, context), { dataTable: newDataTable }); }; const isDataItemWithNonEmptyValues = (dataItem, fieldInfo) => fieldInfo.some((info => isValidData(dataItem[info.fieldName]))); export const getCtxByFilterRowWithNonEmptyValues = context => { const {dataTable: dataTable = [], fieldInfo: fieldInfo} = context || {}, measureFieldInfo = fieldInfo.filter((info => info.role === ROLE.MEASURE)); return Object.assign(Object.assign({}, context), { dataTable: dataTable.filter((item => isDataItemWithNonEmptyValues(item, measureFieldInfo))) }); }; const transferRangeData = (cell, type) => { const validCell = cell.filter((v => isValidData(v))); switch (type) { case "avg": return average(validCell); case "filter": return null; case "max": return Math.max(...validCell); case "min": return Math.min(...validCell); case "first": return validCell[0]; case "last": return validCell[validCell.length - 1]; default: return validCell.join("-"); } }; export const getCtxByRangeValueTranser = (context, type) => { const {dataTable: dataTable = [], fieldInfo: fieldInfo} = context || {}; return Object.assign(Object.assign({}, context), { dataTable: dataTable.map((item => { const newItem = Object.assign({}, item); return fieldInfo.forEach((info => { info.role === ROLE.MEASURE && !isString(item[info.fieldName]) && isArray(item[info.fieldName]) && (newItem[info.fieldName] = transferRangeData(item[info.fieldName], type)); })), newItem; })) }); }; export const revisedUnMatchedFieldInfo = context => { const {dataTable: dataTable, fieldInfo: fieldInfo} = context, dataTableFieldSet = new Set; dataTable.forEach((item => { Object.keys(item).forEach((key => dataTableFieldSet.add(key))); })); const fieldInfoMapping = {}; fieldInfo.forEach((info => { fieldInfoMapping[info.fieldName] = info; })); const fieldNameSet = new Set(Object.keys(fieldInfoMapping)), intersectionName = dataTableFieldSet.intersection(fieldNameSet); if (intersectionName.size !== dataTableFieldSet.size) { const dataTableUnMatch = dataTableFieldSet.difference(intersectionName), fieldNameUnMatch = fieldNameSet.difference(intersectionName); if (dataTableUnMatch.size !== fieldNameUnMatch.size) return context; fieldNameUnMatch.forEach((name => { const candidateList = [ ...dataTableUnMatch ]; let min = -1, matchedName = candidateList[0]; candidateList.forEach((v => { const score = stringSimilarity(name, v); score > min && (min = score, matchedName = v); })), fieldInfoMapping[name].fieldName = matchedName, fieldInfoMapping[name].alias = name, dataTableUnMatch.delete(matchedName); })); } return context; }; export const getCtxByValidColumnRatio = (context, ratio = .2) => { const {dataTable: dataTable = [], fieldInfo: fieldInfo} = context || {}; let maxCount = 0; const invalidFieldInfo = fieldInfo.filter((info => info.role === ROLE.MEASURE)).map((info => { const validCount = dataTable.map((item => item[info.fieldName])).filter((item => isValidData(item))).length; return maxCount = Math.max(validCount, maxCount), { fieldName: info.fieldName, validCount: validCount }; })).filter((info => !info.validCount || info.validCount / maxCount <= ratio)); return removeFieldInfoInCtx(context, invalidFieldInfo.map((v => v.fieldName))); }; export const canMergeDataTable = (ctxA, ctxB) => { const {fieldInfo: fieldInfoA = [], summary: summaryA} = ctxA || {}, {fieldInfo: fieldInfoB = [], summary: summaryB} = ctxB || {}; return !!(fieldInfoA.length === fieldInfoB.length && fieldInfoA.length && summaryA && summaryB) && fieldInfoA.every((item => fieldInfoB.find((itemB => itemB.fieldName === item.fieldName && itemB.type === item.type && (null == itemB ? void 0 : itemB.unit) === (null == item ? void 0 : item.unit) && (null == itemB ? void 0 : itemB.ratioGranularity) === (null == item ? void 0 : item.ratioGranularity) && (null == itemB ? void 0 : itemB.alias) === (null == item ? void 0 : item.alias))))); }; export const getSplitDataViewOfDataTable = (context, threshold = .4) => { const {dataTable: dataTable = [], fieldInfo: fieldInfo} = context || {}, measureFieldInfo = fieldInfo.filter((info => info.role === ROLE.MEASURE)), dimensionFieldInfo = fieldInfo.filter((info => info.role === ROLE.DIMENSION)), clusterDataItem = []; measureFieldInfo.forEach(((item, index) => { clusterDataItem.push({ id: item.fieldName, value: dataTable.map((data => isValidData(data[item.fieldName]) ? 1 : 0)) }); })); const {clusters: clusters} = agglomerativeHierarchicalClustering(clusterDataItem, threshold); if (!clusters.length) return context; const dataViewList = []; return clusters.forEach((cluster => { const clusterIds = cluster.children.map((v => v.id)), clusterFieldInfo = [ ...dimensionFieldInfo, ...measureFieldInfo.filter((info => clusterIds.includes(info.fieldName))) ], clusterFieldIds = clusterFieldInfo.map((v => v.fieldName)); let newContext = { dataTable: dataTable.map((v => pick(v, clusterFieldIds))), fieldInfo: clusterFieldInfo }; [ getCtxByFilterRowWithNonEmptyValues, getCtxByfilterSameDataItem, getCtxByfilterSameValueColumn ].forEach((func => { newContext = func(newContext); })); let validCellCount = 0, validMeasureCellCount = 0; newContext.dataTable.forEach((item => { newContext.fieldInfo.forEach((info => { const isValid = isValidData(item[info.fieldName]); validCellCount += isValid ? 1 : -1, validMeasureCellCount += isValid && info.role === ROLE.MEASURE ? 1 : 0; })); })); const dataView = { fieldInfo: newContext.fieldInfo, dataTable: newContext.dataTable, validColumnLength: clusterIds.length, validRowLength: newContext.dataTable.length, validMeasureCellCount: validMeasureCellCount, validCellCount: validCellCount }; validCellCount > 0 && dataViewList.push(dataView); })), dataViewList.sort(((a, b) => a.validCellCount < b.validCellCount || a.validCellCount === b.validCellCount && a.validRowLength < b.validRowLength || a.validCellCount === b.validCellCount && a.validRowLength === b.validRowLength && a.validColumnLength < b.validColumnLength ? 1 : -1)), 0 === dataViewList.length ? context : Object.assign(Object.assign({}, context), { originDataTable: dataTable, fieldInfo: dataViewList[0].fieldInfo, dataTable: dataViewList[0].dataTable, clusterResult: dataViewList }); }; export const canMergeClusterResult = clusterResult => !!clusterResult.length && clusterResult.every((dataView => { const {fieldInfo: fieldInfo, dataTable: dataTable} = dataView; return 1 === dataTable.length && -1 === fieldInfo.findIndex((info => [ DataType.DATE, DataType.TIME ].includes(info.type))); })); export const mergeClusterDataView = clusterResult => { const newFieldInfo = [], newDataTable = [ {} ]; return clusterResult.forEach((dataView => { const {fieldInfo: fieldInfo, dataTable: dataTable} = dataView, measureFields = fieldInfo.filter((info => info.role === ROLE.MEASURE)); newFieldInfo.push(...measureFields), measureFields.forEach((field => { newDataTable[0][field.fieldName] = dataTable[0][field.fieldName]; })); })), { fieldInfo: newFieldInfo, dataTable: newDataTable }; }; const isSameFields = (a, b) => a.length === b.length && a.every(((info, index) => { const matchInB = b.find((item => item.fieldName === info.fieldName)); return matchInB && matchInB.role === info.role && matchInB.type === info.type && matchInB.unit === info.unit && matchInB.dateGranularity === info.dateGranularity; })); function longestCommonSubstringAtEdges(a, b) { let startLen = 0; for (;startLen < a.length && startLen < b.length && a[startLen] === b[startLen]; ) startLen++; let endLen = 0; for (;endLen < a.length && endLen < b.length && a[a.length - 1 - endLen] === b[b.length - 1 - endLen]; ) endLen++; return startLen >= endLen ? { strA: a.substring(startLen, a.length), strB: b.substring(startLen, b.length), commonStr: a.substring(0, startLen) } : { strA: a.substring(0, a.length - endLen), strB: b.substring(0, b.length - endLen), commonStr: a.substring(endLen, a.length) }; } export const mergeDataTable = (ctxA, ctxB) => { const {dataTable: tableA, summary: summaryA, textRange: rangeA} = ctxA, {dataTable: tableB, summary: summaryB, textRange: rangeB} = ctxB, {strA: strA, strB: strB, commonStr: commonStr} = longestCommonSubstringAtEdges(summaryA, summaryB), newFieldInfo = { fieldName: commonStr, description: `${summaryA} and ${summaryB}`, role: ROLE.DIMENSION, type: DataType.STRING }, newDataTable = [ ...tableA.map((v => Object.assign(Object.assign({}, v), { [commonStr]: strA }))), ...tableB.map((v => Object.assign(Object.assign({}, v), { [commonStr]: strB }))) ], textRange = rangeA && rangeB ? [ rangeA[0], rangeB[1] ] : null; return { dataTable: newDataTable, fieldInfo: [ newFieldInfo, ...ctxA.fieldInfo ], summary: `${summaryA} and ${summaryB}`, textRange: textRange }; }; //# sourceMappingURL=utils.js.map