UNPKG

herta

Version:

Advanced mathematics framework for scientific, engineering, and financial applications

465 lines (394 loc) 13.4 kB
/** * Advanced statistical analysis module for herta.js * Provides statistical functions for scientific data analysis */ const Decimal = require('decimal.js'); // Statistics module const statistics = {}; /** * Descriptive statistics functions */ statistics.descriptive = {}; /** * Calculate the mean of an array of values * @param {Array} data - Array of numerical values * @returns {number} - The mean value */ statistics.descriptive.mean = function (data) { if (!Array.isArray(data) || data.length === 0) { throw new Error('Input must be a non-empty array'); } const sum = data.reduce((acc, val) => acc + val, 0); return sum / data.length; }; /** * Calculate the median of an array of values * @param {Array} data - Array of numerical values * @returns {number} - The median value */ statistics.descriptive.median = function (data) { if (!Array.isArray(data) || data.length === 0) { throw new Error('Input must be a non-empty array'); } // Sort the data const sorted = [...data].sort((a, b) => a - b); const mid = Math.floor(sorted.length / 2); if (sorted.length % 2 === 0) { // Even number of elements return (sorted[mid - 1] + sorted[mid]) / 2; } // Odd number of elements return sorted[mid]; }; /** * Calculate the variance of an array of values * @param {Array} data - Array of numerical values * @param {boolean} [sample=true] - If true, calculate sample variance, otherwise population variance * @returns {number} - The variance */ statistics.descriptive.variance = function (data, sample = true) { if (!Array.isArray(data) || data.length === 0) { throw new Error('Input must be a non-empty array'); } const mean = statistics.descriptive.mean(data); const squaredDiffs = data.map((val) => (val - mean) ** 2); const sum = squaredDiffs.reduce((acc, val) => acc + val, 0); return sample ? sum / (data.length - 1) : sum / data.length; }; /** * Calculate the standard deviation of an array of values * @param {Array} data - Array of numerical values * @param {boolean} [sample=true] - If true, calculate sample standard deviation, otherwise population * @returns {number} - The standard deviation */ statistics.descriptive.standardDeviation = function (data, sample = true) { return Math.sqrt(statistics.descriptive.variance(data, sample)); }; /** * Calculate the covariance between two arrays of values * @param {Array} dataX - First array of numerical values * @param {Array} dataY - Second array of numerical values * @param {boolean} [sample=true] - If true, calculate sample covariance, otherwise population * @returns {number} - The covariance */ statistics.descriptive.covariance = function (dataX, dataY, sample = true) { if (!Array.isArray(dataX) || !Array.isArray(dataY) || dataX.length !== dataY.length) { throw new Error('Inputs must be arrays of equal length'); } const meanX = statistics.descriptive.mean(dataX); const meanY = statistics.descriptive.mean(dataY); let sum = 0; for (let i = 0; i < dataX.length; i++) { sum += (dataX[i] - meanX) * (dataY[i] - meanY); } return sample ? sum / (dataX.length - 1) : sum / dataX.length; }; /** * Calculate the correlation coefficient between two arrays of values * @param {Array} dataX - First array of numerical values * @param {Array} dataY - Second array of numerical values * @returns {number} - The correlation coefficient (Pearson's r) */ statistics.descriptive.correlation = function (dataX, dataY) { const covariance = statistics.descriptive.covariance(dataX, dataY); const stdDevX = statistics.descriptive.standardDeviation(dataX); const stdDevY = statistics.descriptive.standardDeviation(dataY); return covariance / (stdDevX * stdDevY); }; /** * Probability distributions */ statistics.distributions = {}; /** * Normal (Gaussian) distribution functions */ statistics.distributions.normal = { /** * Calculate the probability density function (PDF) of the normal distribution * @param {number} x - The value to evaluate * @param {number} [mean=0] - The mean of the distribution * @param {number} [stdDev=1] - The standard deviation of the distribution * @returns {number} - The PDF value */ pdf(x, mean = 0, stdDev = 1) { const variance = stdDev * stdDev; return (1 / Math.sqrt(2 * Math.PI * variance)) * Math.exp(-((x - mean) ** 2) / (2 * variance)); }, /** * Calculate the cumulative distribution function (CDF) of the normal distribution * @param {number} x - The value to evaluate * @param {number} [mean=0] - The mean of the distribution * @param {number} [stdDev=1] - The standard deviation of the distribution * @returns {number} - The CDF value */ cdf(x, mean = 0, stdDev = 1) { // Error function approximation for normal CDF const z = (x - mean) / (stdDev * Math.sqrt(2)); return 0.5 * (1 + statistics.special.erf(z)); }, /** * Generate random samples from a normal distribution * @param {number} [mean=0] - The mean of the distribution * @param {number} [stdDev=1] - The standard deviation of the distribution * @param {number} [n=1] - Number of samples to generate * @returns {Array|number} - Array of samples or a single sample if n=1 */ sample(mean = 0, stdDev = 1, n = 1) { // Box-Muller transform for normal distribution sampling function generateSample() { let u1; let u2; do { u1 = Math.random(); u2 = Math.random(); } while (u1 <= Number.EPSILON); const z0 = Math.sqrt(-2.0 * Math.log(u1)) * Math.cos(2.0 * Math.PI * u2); return mean + stdDev * z0; } if (n === 1) { return generateSample(); } const samples = []; for (let i = 0; i < n; i++) { samples.push(generateSample()); } return samples; } }; /** * Student's t-distribution functions */ statistics.distributions.t = { /** * Calculate the probability density function (PDF) of the t-distribution * @param {number} x - The value to evaluate * @param {number} dof - Degrees of freedom * @returns {number} - The PDF value */ pdf(x, dof) { if (dof <= 0) { throw new Error('Degrees of freedom must be positive'); } const numerator = statistics.special.gamma((dof + 1) / 2); const denominator = Math.sqrt(dof * Math.PI) * statistics.special.gamma(dof / 2); return (numerator / denominator) * (1 + (x * x) / dof) ** (-(dof + 1) / 2); } }; /** * Hypothesis testing functions */ statistics.hypothesis = {}; /** * Perform a one-sample t-test * @param {Array} data - Sample data * @param {number} mu - Population mean to test against * @param {Object} [options] - Additional options * @returns {Object} - Test results including t-statistic, p-value, etc. */ statistics.hypothesis.tTest = function (data, mu, options = {}) { const defaultOptions = { alpha: 0.05, alternative: 'two-sided' // 'two-sided', 'less', 'greater' }; const config = { ...defaultOptions, ...options }; const n = data.length; const mean = statistics.descriptive.mean(data); const stdDev = statistics.descriptive.standardDeviation(data); const se = stdDev / Math.sqrt(n); const tStat = (mean - mu) / se; const dof = n - 1; // Calculate p-value based on alternative hypothesis let pValue; if (config.alternative === 'two-sided') { // Two-tailed test pValue = 2 * (1 - statistics.distributions.t.cdf(Math.abs(tStat), dof)); } else if (config.alternative === 'less') { // One-tailed test (less than) pValue = statistics.distributions.t.cdf(tStat, dof); } else if (config.alternative === 'greater') { // One-tailed test (greater than) pValue = 1 - statistics.distributions.t.cdf(tStat, dof); } const reject = pValue < config.alpha; return { tStat, pValue, dof, mean, stdDev, se, mu, reject, alternative: config.alternative, alpha: config.alpha }; }; /** * Special mathematical functions for statistics */ statistics.special = { /** * Error function * @param {number} x - Input value * @returns {number} - Error function value */ erf(x) { // Abramowitz and Stegun approximation (maximum error: 1.5×10^−7) const sign = x >= 0 ? 1 : -1; const t = 1.0 / (1.0 + 0.3275911 * Math.abs(x)); const y = 1.0 - (((((1.061405429 * t - 1.453152027) * t) + 1.421413741) * t - 0.284496736) * t + 0.254829592) * t * Math.exp(-x * x); return sign * y; }, /** * Gamma function approximation * @param {number} z - Input value * @returns {number} - Gamma function value */ gamma(z) { // Lanczos approximation for the gamma function if (z < 0.5) { return Math.PI / (Math.sin(Math.PI * z) * statistics.special.gamma(1 - z)); } z -= 1; const p = [0.99999999999980993, 676.5203681218851, -1259.1392167224028, 771.32342877765313, -176.61502916214059, 12.507343278686905, -0.13857109526572012, 9.9843695780195716e-6, 1.5056327351493116e-7]; let x = p[0]; for (let i = 1; i < p.length; i++) { x += p[i] / (z + i); } const t = z + p.length - 1.5; return Math.sqrt(2 * Math.PI) * t ** (z + 0.5) * Math.exp(-t) * x; } }; /** * Time series analysis functions */ statistics.timeSeries = { /** * Calculate the moving average of a time series * @param {Array} data - Time series data * @param {number} window - Window size for moving average * @returns {Array} - Moving average values */ movingAverage(data, window) { if (!Array.isArray(data) || window <= 0 || window > data.length) { throw new Error('Invalid input parameters'); } const result = []; for (let i = 0; i <= data.length - window; i++) { const windowData = data.slice(i, i + window); const avg = statistics.descriptive.mean(windowData); result.push(avg); } return result; }, /** * Calculate the exponential moving average of a time series * @param {Array} data - Time series data * @param {number} alpha - Smoothing factor (0 < alpha < 1) * @returns {Array} - Exponential moving average values */ exponentialMovingAverage(data, alpha) { if (!Array.isArray(data) || alpha <= 0 || alpha >= 1) { throw new Error('Invalid input parameters'); } const result = [data[0]]; for (let i = 1; i < data.length; i++) { const ema = alpha * data[i] + (1 - alpha) * result[i - 1]; result.push(ema); } return result; }, /** * Calculate the autocorrelation of a time series * @param {Array} data - Time series data * @param {number} lag - Lag value * @returns {number} - Autocorrelation value */ autocorrelation(data, lag) { if (!Array.isArray(data) || lag <= 0 || lag >= data.length) { throw new Error('Invalid input parameters'); } const mean = statistics.descriptive.mean(data); let numerator = 0; let denominator = 0; for (let i = 0; i < data.length - lag; i++) { numerator += (data[i] - mean) * (data[i + lag] - mean); } for (let i = 0; i < data.length; i++) { denominator += (data[i] - mean) ** 2; } return numerator / denominator; } }; /** * Machine learning utilities */ statistics.ml = { /** * Perform simple linear regression * @param {Array} x - Independent variable values * @param {Array} y - Dependent variable values * @returns {Object} - Regression results including slope, intercept, r-squared, etc. */ linearRegression(x, y) { if (!Array.isArray(x) || !Array.isArray(y) || x.length !== y.length) { throw new Error('Inputs must be arrays of equal length'); } const n = x.length; const meanX = statistics.descriptive.mean(x); const meanY = statistics.descriptive.mean(y); let numerator = 0; let denominator = 0; for (let i = 0; i < n; i++) { numerator += (x[i] - meanX) * (y[i] - meanY); denominator += (x[i] - meanX) ** 2; } const slope = numerator / denominator; const intercept = meanY - slope * meanX; // Calculate R-squared let ssTotal = 0; let ssResidual = 0; for (let i = 0; i < n; i++) { const yPred = slope * x[i] + intercept; ssTotal += (y[i] - meanY) ** 2; ssResidual += (y[i] - yPred) ** 2; } const rSquared = 1 - (ssResidual / ssTotal); return { slope, intercept, rSquared, predict(newX) { if (Array.isArray(newX)) { return newX.map((val) => slope * val + intercept); } return slope * newX + intercept; } }; }, /** * Normalize data using z-score (standard score) * @param {Array} data - Data to normalize * @returns {Array} - Normalized data */ zScoreNormalize(data) { const mean = statistics.descriptive.mean(data); const stdDev = statistics.descriptive.standardDeviation(data); return data.map((val) => (val - mean) / stdDev); }, /** * Min-max normalization of data to [0, 1] range * @param {Array} data - Data to normalize * @returns {Array} - Normalized data */ minMaxNormalize(data) { const min = Math.min(...data); const max = Math.max(...data); const range = max - min; return data.map((val) => (val - min) / range); } }; module.exports = statistics;