@eagleoutice/flowr
Version:
Static Dataflow Analyzer and Program Slicer for the R Programming Language
60 lines (59 loc) • 3.31 kB
TypeScript
import type { ClusterReport } from './clusterer';
import type { Table } from '../../../util/files';
/**
* A conventional histogram (e.g., created by {@link histogramFromNumbers}).
* Can be converted to a {@link Table} by {@link histograms2table}.
* As described in {@link histogramFromNumbers}, there always will be a special bin for minimum.
*/
export interface Histogram {
/** A name intended for humans to know what the histogram is about. */
readonly name: string;
/** Values located in each bin */
bins: number[];
/** The configured size of each bin (stored explicitly to avoid semantic confusion with floating point arithmetic/problems with different rounding schemes) */
binSize: number;
/** Minimum value encountered (inclusive minimum of the underlying value range) */
min: number;
/** Maximum value encountered (inclusive maximum of the underlying value range) */
max: number;
/** Average of the included numbers */
mean: number;
/** Standard deviation of the included numbers */
std: number;
/** Median of the included numbers */
median: number;
}
/**
* Produces column-wise histogram-information based on a {@link ClusterReport}.
*
* Let's suppose you want histograms for the Assignments feature.
* By default, for each clustered value, a histogram is produced (can be configured by `filter`).
*
* @param report - The report to collect histogram information from
* @param binSize - Size of each bin (see {@link histogramFromNumbers} for details on why we do not specify the bin-count)
* @param relateValuesToNumberOfLines - If true, each value (like `<-` appeared in file 'x' exactly `N` times) will be divided by the number of lines in the file 'x'.
* @param filter - If given, only produce histograms for the given values
*/
export declare function histogramsFromClusters(report: ClusterReport, binSize: number, relateValuesToNumberOfLines: boolean, ...filter: string[]): Histogram[];
/**
* Produces a histogram from a list of numbers.
* Because we need to create several histograms of different datasets and want to compare them, we do not accept the
* number of bins desired and calculate the bin-size from the data (via `Math.ceil((max - min + 1) / bins)`).
* Instead, we require the bin-size to be given.
* There *always* will be an extra bin for the minimum value.
*/
export declare function histogramFromNumbers(name: string, binSize: number, values: number[]): Histogram;
/**
* Takes an array of histograms created by {@link histogramFromNumbers} and produces a CSV table from it.
* They must have the same bin-size for this function to work.
*
* The table has the following columns:
* - `bin` - The corresponding bin number
* - `from` - The exclusive lower bound of the bin
* - `to` - The inclusive upper bound of the bin
* - a column with the name of each histogram, containing its count of values in the corresponding bin
*
* @param histograms - The histogram to convert (assumed to have the same ranges and bins)
* @param countAsDensity - If true, the count is divided by the total number of values (individually for each histogram, similar to pgfplots `hist/density` option)
*/
export declare function histograms2table(histograms: Histogram[], countAsDensity?: boolean): Table;