UNPKG

@eagleoutice/flowr

Version:

Static Dataflow Analyzer and Program Slicer for the R Programming Language

60 lines (59 loc) 3.31 kB
import type { ClusterReport } from './clusterer'; import type { Table } from '../../../util/files'; /** * A conventional histogram (e.g., created by {@link histogramFromNumbers}). * Can be converted to a {@link Table} by {@link histograms2table}. * As described in {@link histogramFromNumbers}, there always will be a special bin for minimum. */ export interface Histogram { /** A name intended for humans to know what the histogram is about. */ readonly name: string; /** Values located in each bin */ bins: number[]; /** The configured size of each bin (stored explicitly to avoid semantic confusion with floating point arithmetic/problems with different rounding schemes) */ binSize: number; /** Minimum value encountered (inclusive minimum of the underlying value range) */ min: number; /** Maximum value encountered (inclusive maximum of the underlying value range) */ max: number; /** Average of the included numbers */ mean: number; /** Standard deviation of the included numbers */ std: number; /** Median of the included numbers */ median: number; } /** * Produces column-wise histogram-information based on a {@link ClusterReport}. * * Let's suppose you want histograms for the Assignments feature. * By default, for each clustered value, a histogram is produced (can be configured by `filter`). * * @param report - The report to collect histogram information from * @param binSize - Size of each bin (see {@link histogramFromNumbers} for details on why we do not specify the bin-count) * @param relateValuesToNumberOfLines - If true, each value (like `<-` appeared in file 'x' exactly `N` times) will be divided by the number of lines in the file 'x'. * @param filter - If given, only produce histograms for the given values */ export declare function histogramsFromClusters(report: ClusterReport, binSize: number, relateValuesToNumberOfLines: boolean, ...filter: string[]): Histogram[]; /** * Produces a histogram from a list of numbers. * Because we need to create several histograms of different datasets and want to compare them, we do not accept the * number of bins desired and calculate the bin-size from the data (via `Math.ceil((max - min + 1) / bins)`). * Instead, we require the bin-size to be given. * There *always* will be an extra bin for the minimum value. */ export declare function histogramFromNumbers(name: string, binSize: number, values: number[]): Histogram; /** * Takes an array of histograms created by {@link histogramFromNumbers} and produces a CSV table from it. * They must have the same bin-size for this function to work. * * The table has the following columns: * - `bin` - The corresponding bin number * - `from` - The exclusive lower bound of the bin * - `to` - The inclusive upper bound of the bin * - a column with the name of each histogram, containing its count of values in the corresponding bin * * @param histograms - The histogram to convert (assumed to have the same ranges and bins) * @param countAsDensity - If true, the count is divided by the total number of values (individually for each histogram, similar to pgfplots `hist/density` option) */ export declare function histograms2table(histograms: Histogram[], countAsDensity?: boolean): Table;