UNPKG

@ai-on-browser/data-analysis-models

Version:

Data analysis model package without any dependencies

351 lines (332 loc) 7.82 kB
/** * Returns Davies-Bouldin index. * @param {Array<Array<number>>} data Original data * @param {*[]} pred Predicted categories * @param {number} p P * @param {number} q Q * @returns {number} Davies-Bouldin index */ export function davisBouldinIndex(data, pred, p = 2, q = 1) { const n = pred.length const dim = data[0].length const classes = [...new Set(pred)] const nc = classes.length const counts = Array(nc).fill(0) const centroid = [] const y = [] for (let k = 0; k < nc; k++) { centroid[k] = Array(dim).fill(0) for (let i = 0; i < n; i++) { if (pred[i] !== classes[k]) { continue } y[i] = k counts[k]++ for (let d = 0; d < dim; d++) { centroid[k][d] += data[i][d] } } centroid[k] = centroid[k].map(v => v / counts[k]) } const s = Array(nc).fill(0) for (let i = 0; i < n; i++) { const ci = centroid[y[i]] const d = data[i].reduce((s, v, d) => s + Math.abs(v - ci[d]) ** p, 0) s[y[i]] += d ** (q / p) } for (let k = 0; k < s.length; k++) { s[k] = (s[k] / counts[k]) ** (1 / q) } const r = Array.from({ length: nc }, () => []) let db = 0 for (let k = 0; k < nc; k++) { let max_r = -Infinity for (let l = 0; l < k; l++) { if (r[k][l] > max_r) { max_r = r[k][l] } } for (let l = k + 1; l < nc; l++) { const m = centroid[k].reduce((s, v, d) => s + Math.abs(v - centroid[l][d]) ** p, 0) r[k][l] = r[l][k] = (s[k] + s[l]) / m ** (1 / p) if (r[k][l] > max_r) { max_r = r[k][l] } } db += max_r } return db / nc } /** * Returns Silhouette coefficient. * @param {Array<Array<number>>} data Original data * @param {*[]} pred Predicted categories * @returns {number[]} Silhouette coefficient */ export function silhouetteCoefficient(data, pred) { const n = pred.length const classes = [...new Set(pred)] const dist = (a, b) => Math.sqrt(a.reduce((s, v, d) => s + (v - b[d]) ** 2, 0)) const d = [] for (let i = 0; i < n; i++) { d[i] = [] for (let j = 0; j < i; j++) { d[i][j] = d[j][i] = dist(data[i], data[j]) } } const s = [] for (let i = 0; i < n; i++) { let a = 0 let c = 0 for (let j = 0; j < n; j++) { if (j === i || pred[j] !== pred[i]) { continue } a += d[i][j] c++ } a /= c let b = Infinity for (let k = 0; k < classes.length; k++) { if (pred[i] === classes[k]) { continue } let bk = 0 let c = 0 for (let j = 0; j < n; j++) { if (pred[j] !== classes[k]) { continue } bk += d[i][j] c++ } bk /= c if (bk < b) { b = bk } } s[i] = (b - a) / Math.max(a, b) } return s } /** * Returns Dunn index. * @param {Array<Array<number>>} data Original data * @param {*[]} pred Predicted categories * @param {'max' | 'mean' | 'centroid'} intra_d Intra-cluster distance type * @param {'centroid'} inter_d Inter-cluster distance type * @returns {number} Dunn index */ export function dunnIndex(data, pred, intra_d = 'max', inter_d = 'centroid') { const n = pred.length const dim = data[0].length const classes = [...new Set(pred)] const nc = classes.length const dist = (a, b) => Math.sqrt(a.reduce((s, v, d) => s + (v - b[d]) ** 2, 0)) const counts = Array(nc).fill(0) const y = [] for (let i = 0; i < n; i++) { const p = classes.indexOf(pred[i]) y[i] = p counts[p]++ } const centroid = [] if (intra_d === 'centroid' || inter_d === 'centroid') { for (let k = 0; k < nc; k++) { centroid[k] = Array(dim).fill(0) for (let i = 0; i < n; i++) { if (pred[i] !== classes[k]) { continue } for (let d = 0; d < dim; d++) { centroid[k][d] += data[i][d] } } centroid[k] = centroid[k].map(v => v / counts[k]) } } const dk = [] for (let k = 0; k < nc; k++) { if (intra_d === 'max') { let max_d = 0 for (let i = 0; i < n; i++) { if (pred[i] !== classes[k]) { continue } for (let j = 0; j < i; j++) { if (pred[j] !== classes[k]) { continue } const d = dist(data[i], data[j]) if (max_d < d) { max_d = d } } } dk[k] = max_d } else if (intra_d === 'mean') { let sd = 0 for (let i = 0; i < n; i++) { if (pred[i] !== classes[k]) { continue } for (let j = 0; j < n; j++) { if (i === j || pred[j] !== classes[k]) { continue } sd += dist(data[i], data[j]) } } dk[k] = (2 * sd) / (counts[k] * (counts[k] - 1)) } else if (intra_d === 'centroid') { let sd = 0 for (let i = 0; i < n; i++) { if (pred[i] !== classes[k]) { continue } sd += dist(data[i], centroid[k]) } dk[k] = sd / counts[k] } } const di = Array.from({ length: nc }, () => []) if (inter_d === 'centroid') { for (let k = 0; k < nc; k++) { for (let l = 0; l < k; l++) { di[k][l] = dist(centroid[k], centroid[l]) } } } const max_dk = Math.max(...dk) let min_di = Infinity for (let k = 0; k < nc; k++) { for (let l = 0; l < k; l++) { if (min_di > di[k][l]) { min_di = di[k][l] } } } return min_di / max_dk } /** * Returns Purity. * @param {*[]} pred Predicted categories * @param {*[]} t True categories * @returns {number} Purity */ export function purity(pred, t) { const n = pred.length const c = [...new Set(pred)] let p = 0 for (let k = 0; k < c.length; k++) { const count = {} let max_cnt = 0 for (let i = 0; i < n; i++) { if (pred[i] !== c[k]) { continue } if (!count[t[i]]) { count[t[i]] = 0 } count[t[i]]++ if (max_cnt < count[t[i]]) { max_cnt = count[t[i]] } } p += max_cnt } return p / n } /** * Returns Rand index. * @param {*[]} pred Predicted categories * @param {*[]} t True categories * @returns {number} Rank index */ export function randIndex(pred, t) { const n = pred.length let r = 0 for (let i = 0; i < n; i++) { for (let j = i + 1; j < n; j++) { if (pred[i] === pred[j] && t[i] === t[j]) { r++ } else if (pred[i] !== pred[j] && t[i] !== t[j]) { r++ } } } return r / ((n * (n - 1)) / 2) } /** * Returns Dice index. * @param {*[]} pred Predicted categories * @param {*[]} t True categories * @param {number} [beta] Positive real factor. Recall is considered `beta` times as important as precision. * @returns {number} Dice index */ export function diceIndex(pred, t, beta = 1) { const n = pred.length let tp = 0 let fp = 0 let fn = 0 for (let i = 0; i < n; i++) { for (let j = i + 1; j < n; j++) { if (pred[i] === pred[j] && t[i] === t[j]) { tp++ } else if (pred[i] === pred[j] && t[i] !== t[j]) { fp++ } else if (pred[i] !== pred[j] && t[i] === t[j]) { fn++ } } } return ((1 + beta ** 2) * tp) / ((1 + beta ** 2) * tp + beta ** 2 * fn + fp) } /** * Returns Jaccard index. * @param {*[]} pred Predicted categories * @param {*[]} t True categories * @returns {number} Jaccard index */ export function jaccardIndex(pred, t) { const n = pred.length let tp = 0 let fp = 0 let fn = 0 for (let i = 0; i < n; i++) { for (let j = i + 1; j < n; j++) { if (pred[i] === pred[j] && t[i] === t[j]) { tp++ } else if (pred[i] === pred[j] && t[i] !== t[j]) { fp++ } else if (pred[i] !== pred[j] && t[i] === t[j]) { fn++ } } } return tp / (tp + fp + fn) } /** * Returns Fowlkes-Mallows index. * @param {*[]} pred Predicted categories * @param {*[]} t True categories * @returns {number} Fowlkes-Mallows index */ export function fowlkesMallowsIndex(pred, t) { const n = pred.length let tp = 0 let fp = 0 let fn = 0 for (let i = 0; i < n; i++) { for (let j = i + 1; j < n; j++) { if (pred[i] === pred[j] && t[i] === t[j]) { tp++ } else if (pred[i] === pred[j] && t[i] !== t[j]) { fp++ } else if (pred[i] !== pred[j] && t[i] === t[j]) { fn++ } } } return tp / Math.sqrt((tp + fp) * (tp + fn)) }