UNPKG

@visactor/vmind

Version:

<div align="center"> <a href="https://github.com/VisActor#gh-light-mode-only" target="_blank"> <img alt="VisActor Logo" width="200" src="https://github.com/VisActor/.github/blob/main/profile/logo_500_200_light.svg"/> </a> <a href="https://githu

70 lines (63 loc) 3.2 kB
import { AtomName } from "../../types/atom"; import { BaseAtom } from "../base"; import { isArray, merge } from "@visactor/vutils"; import { canMergeClusterResult, getSplitDataViewOfDataTable } from "./utils"; import { pipelines } from "./dataClean"; import { Factory } from "../../core/factory"; export class MultipleDataCleanAtom extends BaseAtom { constructor(context, option) { super(context, option), this.name = AtomName.MULTIPLE_DATA_CLEAN; } buildDefaultContext(context) { return merge({}, { datasets: [] }, context); } buildDefaultOptions() { return { filterSameValueColumn: !0, needNumericalFields: !0, measureAutoTransfer: !0, filterSameDataItem: !0, filterRowWithEmptyValues: !0, rangeValueTransfer: "last", hierarchicalClustering: !0, clusterThreshold: .4, filterRatioInDataset: .6 }; } shouldRunByContextUpdate(context) { return context.datasets !== this.context.datasets; } _runWithOutLLM() { const {datasets: datasets} = this.context, {filterRatioInDataset: filterRatioInDataset} = this.options, result = []; return datasets.forEach((dataset => { var _a; let newDataset = Object.assign({}, dataset); if (pipelines.forEach((({key: key, func: func}) => { const currentOption = "measureAutoTransfer" === key ? null == dataset ? void 0 : dataset.text : this.options[key]; !1 !== currentOption && (newDataset = Object.assign(Object.assign({}, newDataset), func(newDataset, currentOption))); })), this.options.hierarchicalClustering) { const {clusterResult: clusterResult = []} = getSplitDataViewOfDataTable(newDataset, this.options.clusterThreshold); if (clusterResult.length) { const maxValidCount = clusterResult[0].validCellCount; newDataset = clusterResult.filter((dataView => { const {validCellCount: validCellCount, validMeasureCellCount: validMeasureCellCount, validColumnLength: validColumnLength, validRowLength: validRowLength} = dataView; return validCellCount / maxValidCount >= filterRatioInDataset || validMeasureCellCount === validColumnLength * validRowLength; })).map((dataView => Object.assign(Object.assign({}, newDataset), { dataTable: dataView.dataTable, fieldInfo: dataView.fieldInfo }))); } } isArray(newDataset) && 1 === newDataset.length && (newDataset = newDataset[0]), isArray(newDataset) ? result.push(...newDataset) : (null === (_a = newDataset.dataTable) || void 0 === _a ? void 0 : _a.length) > 0 && result.push(newDataset); })), this.updateContext({ datasets: result }), this.context; } } export const registerMultipleDataCleanAtom = () => { Factory.registerAtom(AtomName.MULTIPLE_DATA_CLEAN, MultipleDataCleanAtom); }; //# sourceMappingURL=multiple.js.map