UNPKG

@visactor/vmind

Version:

<div align="center"> <a href="https://github.com/VisActor#gh-light-mode-only" target="_blank"> <img alt="VisActor Logo" width="200" src="https://github.com/VisActor/.github/blob/main/profile/logo_500_200_light.svg"/> </a> <a href="https://githu

70 lines (64 loc) 3.5 kB
"use strict"; Object.defineProperty(exports, "__esModule", { value: !0 }), exports.registerMultipleDataCleanAtom = exports.MultipleDataCleanAtom = void 0; const atom_1 = require("../../types/atom"), base_1 = require("../base"), vutils_1 = require("@visactor/vutils"), utils_1 = require("./utils"), dataClean_1 = require("./dataClean"), factory_1 = require("../../core/factory"); class MultipleDataCleanAtom extends base_1.BaseAtom { constructor(context, option) { super(context, option), this.name = atom_1.AtomName.MULTIPLE_DATA_CLEAN; } buildDefaultContext(context) { return (0, vutils_1.merge)({}, { datasets: [] }, context); } buildDefaultOptions() { return { filterSameValueColumn: !0, needNumericalFields: !0, measureAutoTransfer: !0, filterSameDataItem: !0, filterRowWithEmptyValues: !0, rangeValueTransfer: "last", hierarchicalClustering: !0, clusterThreshold: .4, filterRatioInDataset: .6 }; } shouldRunByContextUpdate(context) { return context.datasets !== this.context.datasets; } _runWithOutLLM() { const {datasets: datasets} = this.context, {filterRatioInDataset: filterRatioInDataset} = this.options, result = []; return datasets.forEach((dataset => { var _a; let newDataset = Object.assign({}, dataset); if (dataClean_1.pipelines.forEach((({key: key, func: func}) => { const currentOption = "measureAutoTransfer" === key ? null == dataset ? void 0 : dataset.text : this.options[key]; !1 !== currentOption && (newDataset = Object.assign(Object.assign({}, newDataset), func(newDataset, currentOption))); })), this.options.hierarchicalClustering) { const {clusterResult: clusterResult = []} = (0, utils_1.getSplitDataViewOfDataTable)(newDataset, this.options.clusterThreshold); if (clusterResult.length) { const maxValidCount = clusterResult[0].validCellCount; newDataset = clusterResult.filter((dataView => { const {validCellCount: validCellCount, validMeasureCellCount: validMeasureCellCount, validColumnLength: validColumnLength, validRowLength: validRowLength} = dataView; return validCellCount / maxValidCount >= filterRatioInDataset || validMeasureCellCount === validColumnLength * validRowLength; })).map((dataView => Object.assign(Object.assign({}, newDataset), { dataTable: dataView.dataTable, fieldInfo: dataView.fieldInfo }))); } } (0, vutils_1.isArray)(newDataset) && 1 === newDataset.length && (newDataset = newDataset[0]), (0, vutils_1.isArray)(newDataset) ? result.push(...newDataset) : (null === (_a = newDataset.dataTable) || void 0 === _a ? void 0 : _a.length) > 0 && result.push(newDataset); })), this.updateContext({ datasets: result }), this.context; } } exports.MultipleDataCleanAtom = MultipleDataCleanAtom; const registerMultipleDataCleanAtom = () => { factory_1.Factory.registerAtom(atom_1.AtomName.MULTIPLE_DATA_CLEAN, MultipleDataCleanAtom); }; exports.registerMultipleDataCleanAtom = registerMultipleDataCleanAtom; //# sourceMappingURL=multiple.js.map