@visactor/vmind
Version:
<div align="center"> <a href="https://github.com/VisActor#gh-light-mode-only" target="_blank"> <img alt="VisActor Logo" width="200" src="https://github.com/VisActor/.github/blob/main/profile/logo_500_200_light.svg"/> </a> <a href="https://githu
70 lines (63 loc) • 3.2 kB
JavaScript
import { AtomName } from "../../types/atom";
import { BaseAtom } from "../base";
import { isArray, merge } from "@visactor/vutils";
import { canMergeClusterResult, getSplitDataViewOfDataTable } from "./utils";
import { pipelines } from "./dataClean";
import { Factory } from "../../core/factory";
export class MultipleDataCleanAtom extends BaseAtom {
constructor(context, option) {
super(context, option), this.name = AtomName.MULTIPLE_DATA_CLEAN;
}
buildDefaultContext(context) {
return merge({}, {
datasets: []
}, context);
}
buildDefaultOptions() {
return {
filterSameValueColumn: !0,
needNumericalFields: !0,
measureAutoTransfer: !0,
filterSameDataItem: !0,
filterRowWithEmptyValues: !0,
rangeValueTransfer: "last",
hierarchicalClustering: !0,
clusterThreshold: .4,
filterRatioInDataset: .6
};
}
shouldRunByContextUpdate(context) {
return context.datasets !== this.context.datasets;
}
_runWithOutLLM() {
const {datasets: datasets} = this.context, {filterRatioInDataset: filterRatioInDataset} = this.options, result = [];
return datasets.forEach((dataset => {
var _a;
let newDataset = Object.assign({}, dataset);
if (pipelines.forEach((({key: key, func: func}) => {
const currentOption = "measureAutoTransfer" === key ? null == dataset ? void 0 : dataset.text : this.options[key];
!1 !== currentOption && (newDataset = Object.assign(Object.assign({}, newDataset), func(newDataset, currentOption)));
})), this.options.hierarchicalClustering) {
const {clusterResult: clusterResult = []} = getSplitDataViewOfDataTable(newDataset, this.options.clusterThreshold);
if (clusterResult.length) {
const maxValidCount = clusterResult[0].validCellCount;
newDataset = clusterResult.filter((dataView => {
const {validCellCount: validCellCount, validMeasureCellCount: validMeasureCellCount, validColumnLength: validColumnLength, validRowLength: validRowLength} = dataView;
return validCellCount / maxValidCount >= filterRatioInDataset || validMeasureCellCount === validColumnLength * validRowLength;
})).map((dataView => Object.assign(Object.assign({}, newDataset), {
dataTable: dataView.dataTable,
fieldInfo: dataView.fieldInfo
})));
}
}
isArray(newDataset) && 1 === newDataset.length && (newDataset = newDataset[0]),
isArray(newDataset) ? result.push(...newDataset) : (null === (_a = newDataset.dataTable) || void 0 === _a ? void 0 : _a.length) > 0 && result.push(newDataset);
})), this.updateContext({
datasets: result
}), this.context;
}
}
export const registerMultipleDataCleanAtom = () => {
Factory.registerAtom(AtomName.MULTIPLE_DATA_CLEAN, MultipleDataCleanAtom);
};
//# sourceMappingURL=multiple.js.map