@visactor/vmind
Version:
<div align="center"> <a href="https://github.com/VisActor#gh-light-mode-only" target="_blank"> <img alt="VisActor Logo" width="200" src="https://github.com/VisActor/.github/blob/main/profile/logo_500_200_light.svg"/> </a> <a href="https://githu
70 lines (64 loc) • 3.5 kB
JavaScript
;
Object.defineProperty(exports, "__esModule", {
value: !0
}), exports.registerMultipleDataCleanAtom = exports.MultipleDataCleanAtom = void 0;
const atom_1 = require("../../types/atom"), base_1 = require("../base"), vutils_1 = require("@visactor/vutils"), utils_1 = require("./utils"), dataClean_1 = require("./dataClean"), factory_1 = require("../../core/factory");
class MultipleDataCleanAtom extends base_1.BaseAtom {
constructor(context, option) {
super(context, option), this.name = atom_1.AtomName.MULTIPLE_DATA_CLEAN;
}
buildDefaultContext(context) {
return (0, vutils_1.merge)({}, {
datasets: []
}, context);
}
buildDefaultOptions() {
return {
filterSameValueColumn: !0,
needNumericalFields: !0,
measureAutoTransfer: !0,
filterSameDataItem: !0,
filterRowWithEmptyValues: !0,
rangeValueTransfer: "last",
hierarchicalClustering: !0,
clusterThreshold: .4,
filterRatioInDataset: .6
};
}
shouldRunByContextUpdate(context) {
return context.datasets !== this.context.datasets;
}
_runWithOutLLM() {
const {datasets: datasets} = this.context, {filterRatioInDataset: filterRatioInDataset} = this.options, result = [];
return datasets.forEach((dataset => {
var _a;
let newDataset = Object.assign({}, dataset);
if (dataClean_1.pipelines.forEach((({key: key, func: func}) => {
const currentOption = "measureAutoTransfer" === key ? null == dataset ? void 0 : dataset.text : this.options[key];
!1 !== currentOption && (newDataset = Object.assign(Object.assign({}, newDataset), func(newDataset, currentOption)));
})), this.options.hierarchicalClustering) {
const {clusterResult: clusterResult = []} = (0, utils_1.getSplitDataViewOfDataTable)(newDataset, this.options.clusterThreshold);
if (clusterResult.length) {
const maxValidCount = clusterResult[0].validCellCount;
newDataset = clusterResult.filter((dataView => {
const {validCellCount: validCellCount, validMeasureCellCount: validMeasureCellCount, validColumnLength: validColumnLength, validRowLength: validRowLength} = dataView;
return validCellCount / maxValidCount >= filterRatioInDataset || validMeasureCellCount === validColumnLength * validRowLength;
})).map((dataView => Object.assign(Object.assign({}, newDataset), {
dataTable: dataView.dataTable,
fieldInfo: dataView.fieldInfo
})));
}
}
(0, vutils_1.isArray)(newDataset) && 1 === newDataset.length && (newDataset = newDataset[0]),
(0, vutils_1.isArray)(newDataset) ? result.push(...newDataset) : (null === (_a = newDataset.dataTable) || void 0 === _a ? void 0 : _a.length) > 0 && result.push(newDataset);
})), this.updateContext({
datasets: result
}), this.context;
}
}
exports.MultipleDataCleanAtom = MultipleDataCleanAtom;
const registerMultipleDataCleanAtom = () => {
factory_1.Factory.registerAtom(atom_1.AtomName.MULTIPLE_DATA_CLEAN, MultipleDataCleanAtom);
};
exports.registerMultipleDataCleanAtom = registerMultipleDataCleanAtom;
//# sourceMappingURL=multiple.js.map