UNPKG

@ndbx/runtime

Version:

The `@ndbx/runtime` package provides a runtime environment to embed NodeBox visualizations directly into React applications. NodeBox is a powerful tool for creating interactive and generative visualizations, and this runtime allows you to integrate those

188 lines (163 loc) 4.8 kB
/** * Sample data using various sampling strategies. * * This node provides multiple methods for sampling data: * - Size-based: Random count, percentage, first/last n rows, every n rows * - Statistical: Stratified, Quartil * * @category Data Transformation */ import { quantile } from "https://esm.sh/d3-array@3.2.4"; export default function (node) { const dataIn = node.tableIn({ name: "data", label: "Input Data" }); // Sampling Strategy Selection node.pushSection({ name: "Sampling Strategy" }); const strategyIn = node.stringIn({ name: "strategy", label: "Strategy", value: "random_count", choices: [ ["random_count", "Random (Count)"], ["random_percent", "Random (Percentage)"], ["first_n", "First N Rows"], ["last_n", "Last N Rows"], ["systematic", "Every Nth Row"], ["stratified", "Stratified"], ["quartile", "Quartile-based"], ], }); node.popSection(); // Size-based Parameters node.pushSection({ name: "Size Parameters" }); const countIn = node.numberIn({ name: "count", label: "Count", value: 100, }); const percentageIn = node.numberIn({ name: "percentage", label: "Percentage", value: 10, min: 0, max: 100, }); const stepIn = node.numberIn({ name: "step", label: "Step Size", value: 1, }); node.popSection(); // Statistical Parameters node.pushSection({ name: "Statistical Parameters" }); const groupByIn = node.stringIn({ name: "groupBy", label: "Group Attribute", }); const quartileIn = node.stringIn({ name: "quartile", label: "Quartile", value: "1", choices: [ ["0", "Q1 (0-25%)"], ["1", "Q2 (25-50%)"], ["2", "Q3 (50-75%)"], ["3", "Q4 (75-100%)"], ], }); node.popSection(); // Seed for reproducibility node.pushSection({ name: "Random" }); const seedIn = node.numberIn({ name: "seed", label: "Random Seed", value: 1234, }); node.popSection(); const dataOut = node.tableOut({ name: "output", label: "Sampled Data" }); // Random number generator with seed function seededRandom(seed) { return function () { seed = (seed * 16807) % 2147483647; return (seed - 1) / 2147483646; }; } // Sampling functions function randomSample(data, count, random) { if (count >= data.length) return data; const sampled = new Set(); while (sampled.size < count) { sampled.add(Math.floor(random() * data.length)); } return Array.from(sampled).map((i) => data[i]); } function stratifiedSample(data, count, stratifyBy, random) { // Group data by stratification column const groups = new Map(); for (const row of data) { const key = row[stratifyBy]; if (!groups.has(key)) groups.set(key, []); groups.get(key).push(row); } // Calculate proportional sample sizes for each group const total = data.length; const result = []; for (const [_, group] of groups) { const groupCount = Math.round((group.length / total) * count); if (groupCount > 0) { result.push(...randomSample(group, groupCount, random)); } } return result; } function quartileSample(data, quartile, column) { const values = data.map((row) => row[column]); const q = quartile * 0.25; const qValue = quantile(values, q); const nextQValue = quantile(values, q + 0.25); return data.filter((row) => row[column] >= qValue && row[column] < nextQValue); } node.onRender = () => { const data = dataIn.value; if (!data || data.length === 0) { dataOut.set([]); return; } const random = seededRandom(seedIn.value); let result = []; switch (strategyIn.value) { case "random_count": result = randomSample(data, countIn.value, random); break; case "random_percent": const count = Math.round((data.length * percentageIn.value) / 100); result = randomSample(data, count, random); break; case "first_n": result = data.slice(0, countIn.value); break; case "last_n": result = data.slice(-countIn.value); break; case "systematic": result = data.filter((_, i) => i % stepIn.value === 0); break; case "stratified": if (!groupByIn.value) { result = data; break; } result = stratifiedSample(data, countIn.value, groupByIn.value, random); break; case "quartile": if (!groupByIn.value) { result = data; break; } result = quartileSample(data, parseInt(quartileIn.value), groupByIn.value); break; default: result = data; } dataOut.set(result); }; }