@ai-on-browser/data-analysis-models
Version:
Data analysis model package without any dependencies
452 lines (426 loc) • 13.3 kB
JavaScript
const metrics = {
euclid: (a, b) => Math.sqrt(a.reduce((s, v, i) => s + (v - b[i]) ** 2, 0)),
manhattan: (a, b) => a.reduce((s, v, i) => s + Math.abs(v - b[i]), 0),
chebyshev: (a, b) => a.reduce((s, v, i) => Math.max(s, Math.abs(v - b[i])), -Infinity),
}
/**
* @typedef {object} AgglomerativeClusterNode
* @property {number[]} [point] Data point of leaf node
* @property {number} [index] Data index of leaf node
* @property {number} [distance] Distance between children nodes
* @property {number[]} [distances] Distances of leaf data and others
* @property {number} size Number of leaf nodes
* @property {AgglomerativeClusterNode[]} [children] Children nodes
* @property {AgglomerativeClusterNode[]} leafs Leaf nodes
*/
/**
* Agglomerative clustering
*/
class AgglomerativeClustering {
/**
* @param {'euclid' | 'manhattan' | 'chebyshev' | function (number[], number[]): number} metric Metric name
*/
constructor(metric = 'euclid') {
this._root = null
this._metric = metric
if (typeof this._metric === 'function') {
this._d = this._metric
} else {
this._d = metrics[this._metric]
}
}
/**
* Fit model parameters.
* @param {Array<Array<number>>} points Training data
*/
fit(points) {
const clusters = []
points.forEach((v, i) => {
clusters.push({
point: v,
index: i,
distances: points.map(p => this._d(v, p)),
size: 1,
get leafs() {
return [this]
},
})
})
const distances = []
for (let i = 0; i < clusters.length; i++) {
if (!distances[i]) distances[i] = []
for (let j = 0; j < i; j++) {
if (!distances[i][j]) distances[i][j] = distances[j][i] = this.distance(clusters[i], clusters[j])
}
}
while (clusters.length > 1) {
const n = clusters.length
let min_i = 0
let min_j = 1
let min_d = distances[0][1]
for (let i = 1; i < n; i++) {
distances[i].forEach((d, j) => {
if (d < min_d) {
min_i = i
min_j = j
min_d = d
}
})
}
const min_i_leafs = clusters[min_i].size
const min_j_leafs = clusters[min_j].size
distances.forEach((dr, k) => {
if (k !== min_j && k !== min_i) {
dr[min_i] = this.update(
min_i_leafs,
min_j_leafs,
clusters[k].size,
dr[min_i],
dr[min_j],
distances[min_j][min_i]
)
distances[min_i][k] = dr[min_i]
dr.splice(min_j, 1)
}
})
distances[min_i].splice(min_j, 1)
distances.splice(min_j, 1)
clusters[min_i] = {
distance: min_d,
size: clusters[min_i].size + clusters[min_j].size,
children: [clusters[min_i], clusters[min_j]],
get leafs() {
return [...this.children[0].leafs, ...this.children[1].leafs]
},
}
clusters.splice(min_j, 1)
}
this._root = clusters[0]
}
/**
* Returns the specified number of clusters.
* @param {number} number Number of clusters
* @returns {AgglomerativeClusterNode[]} Cluster nodes
*/
getClusters(number) {
const scanNodes = [this._root]
while (scanNodes.length < number) {
let max_distance = 0
let max_distance_idx = -1
for (let i = 0; i < scanNodes.length; i++) {
const node = scanNodes[i]
if (node.children && node.distance > max_distance) {
max_distance_idx = i
max_distance = node.distance
}
}
if (max_distance_idx === -1) {
break
}
const max_distance_node = scanNodes[max_distance_idx]
scanNodes.splice(max_distance_idx, 1, max_distance_node.children[0], max_distance_node.children[1])
}
return scanNodes
}
/**
* Returns predicted categories.
* @param {number} k Number of clusters
* @returns {number[]} Predicted values
*/
predict(k) {
const p = []
const clusters = this.getClusters(k)
for (let i = 0; i < clusters.length; i++) {
const leafs = clusters[i].leafs
for (let k = 0; k < leafs.length; k++) {
p[leafs[k].index] = i
}
}
return p
}
/**
* Returns a distance between two nodes.
* @abstract
* @param {AgglomerativeClusterNode} c1 Node
* @param {AgglomerativeClusterNode} c2 Node
* @returns {number} Distance
*/
distance(c1, c2) {
throw new Error('Not Implemented')
}
_mean(d) {
const m = Array(d[0].length).fill(0)
for (let i = 0; i < d.length; i++) {
for (let k = 0; k < d[i].length; k++) {
m[k] += d[i][k]
}
}
return m.map(v => v / d.length)
}
_lanceWilliamsUpdater(ala, alb, bt, gm) {
return (ka, kb, ab) => ala * ka + alb * kb + bt * ab + gm * Math.abs(ka - kb)
}
/**
* Returns new distance.
* @abstract
* @param {number} ca Number of datas in a merging node A
* @param {number} cb Number of datas in a merging node B
* @param {number} ck Number of datas in a current node
* @param {number} ka Distance between node A and current node
* @param {number} kb Distance between node B and current node
* @param {number} ab Distance between node A and node B
* @returns {number} New distance between current node and merged node
*/
update(ca, cb, ck, ka, kb, ab) {
throw new Error('Not Implemented')
}
}
/**
* Complete linkage agglomerative clustering
*/
export class CompleteLinkageAgglomerativeClustering extends AgglomerativeClustering {
/**
* Returns a distance between two nodes.
* @param {AgglomerativeClusterNode} c1 Node
* @param {AgglomerativeClusterNode} c2 Node
* @returns {number} Distance
*/
distance(c1, c2) {
const f1 = c1.leafs
const f2 = c2.leafs
return Math.max.apply(
null,
f1.map(v1 => {
return Math.max.apply(
null,
f2.map(v2 => v1.distances[v2.index])
)
})
)
}
/**
* Returns new distance.
* @param {number} ca Number of datas in a merging node A
* @param {number} cb Number of datas in a merging node B
* @param {number} ck Number of datas in a current node
* @param {number} ka Distance between node A and current node
* @param {number} kb Distance between node B and current node
* @param {number} ab Distance between node A and node B
* @returns {number} New distance between current node and merged node
*/
update(ca, cb, ck, ka, kb, ab) {
return this._lanceWilliamsUpdater(0.5, 0.5, 0, 0.5)(ka, kb, ab)
}
}
/**
* Single linkage agglomerative clustering
*/
export class SingleLinkageAgglomerativeClustering extends AgglomerativeClustering {
/**
* Returns a distance between two nodes.
* @param {AgglomerativeClusterNode} c1 Node
* @param {AgglomerativeClusterNode} c2 Node
* @returns {number} Distance
*/
distance(c1, c2) {
const f1 = c1.leafs
const f2 = c2.leafs
const minDistance = Math.min.apply(
null,
f1.map(v1 => {
return Math.min.apply(
null,
f2.map(v2 => v1.distances[v2.index])
)
})
)
return minDistance
}
/**
* Returns new distance.
* @param {number} ca Number of datas in a merging node A
* @param {number} cb Number of datas in a merging node B
* @param {number} ck Number of datas in a current node
* @param {number} ka Distance between node A and current node
* @param {number} kb Distance between node B and current node
* @param {number} ab Distance between node A and node B
* @returns {number} New distance between current node and merged node
*/
update(ca, cb, ck, ka, kb, ab) {
return this._lanceWilliamsUpdater(0.5, 0.5, 0, -0.5)(ka, kb, ab)
}
}
/**
* Group average agglomerative clustering
*/
export class GroupAverageAgglomerativeClustering extends AgglomerativeClustering {
/**
* Returns a distance between two nodes.
* @param {AgglomerativeClusterNode} c1 Node
* @param {AgglomerativeClusterNode} c2 Node
* @returns {number} Distance
*/
distance(c1, c2) {
const f1 = c1.leafs
const f2 = c2.leafs
const totalDistance = f1.reduce((acc1, v1) => {
return acc1 + f2.reduce((acc2, v2) => acc2 + v1.distances[v2.index], 0)
}, 0)
return totalDistance / (f1.length * f2.length)
}
/**
* Returns new distance.
* @param {number} ca Number of datas in a merging node A
* @param {number} cb Number of datas in a merging node B
* @param {number} ck Number of datas in a current node
* @param {number} ka Distance between node A and current node
* @param {number} kb Distance between node B and current node
* @param {number} ab Distance between node A and node B
* @returns {number} New distance between current node and merged node
*/
update(ca, cb, ck, ka, kb, ab) {
return this._lanceWilliamsUpdater(ca / (ca + cb), cb / (ca + cb), 0, 0)(ka, kb, ab)
}
}
/**
* Ward's agglomerative clustering
*/
export class WardsAgglomerativeClustering extends AgglomerativeClustering {
/**
* Returns a distance between two nodes.
* @param {AgglomerativeClusterNode} c1 Node
* @param {AgglomerativeClusterNode} c2 Node
* @returns {number} Distance
*/
distance(c1, c2) {
const f1 = c1.leafs.map(f => f.point)
const f2 = c2.leafs.map(f => f.point)
const fs = f1.concat(f2)
const ave1 = this._mean(f1)
const ave2 = this._mean(f2)
const aves = this._mean(fs)
const e1 = f1.reduce((acc, f) => acc + this._d(f, ave1) ** 2, 0)
const e2 = f2.reduce((acc, f) => acc + this._d(f, ave2) ** 2, 0)
const es = fs.reduce((acc, f) => acc + this._d(f, aves) ** 2, 0)
return es - e1 - e2
}
/**
* Returns new distance.
* @param {number} ca Number of datas in a merging node A
* @param {number} cb Number of datas in a merging node B
* @param {number} ck Number of datas in a current node
* @param {number} ka Distance between node A and current node
* @param {number} kb Distance between node B and current node
* @param {number} ab Distance between node A and node B
* @returns {number} New distance between current node and merged node
*/
update(ca, cb, ck, ka, kb, ab) {
return this._lanceWilliamsUpdater(
(ck + ca) / (ck + ca + cb),
(ck + cb) / (ck + ca + cb),
-ck / (ck + ca + cb),
0
)(ka, kb, ab)
}
}
/**
* Centroid agglomerative clustering
*/
export class CentroidAgglomerativeClustering extends AgglomerativeClustering {
/**
* Returns a distance between two nodes.
* @param {AgglomerativeClusterNode} c1 Node
* @param {AgglomerativeClusterNode} c2 Node
* @returns {number} Distance
*/
distance(c1, c2) {
const f1 = c1.leafs.map(f => f.point)
const f2 = c2.leafs.map(f => f.point)
const d = this._d(this._mean(f1), this._mean(f2))
return d * d
}
/**
* Returns new distance.
* @param {number} ca Number of datas in a merging node A
* @param {number} cb Number of datas in a merging node B
* @param {number} ck Number of datas in a current node
* @param {number} ka Distance between node A and current node
* @param {number} kb Distance between node B and current node
* @param {number} ab Distance between node A and node B
* @returns {number} New distance between current node and merged node
*/
update(ca, cb, ck, ka, kb, ab) {
return this._lanceWilliamsUpdater(
ca / (ca + cb),
cb / (ca + cb),
(-ca * cb) / ((ca + cb) * (ca + cb)),
0
)(ka, kb, ab)
}
}
/**
* Weighted average agglomerative clustering
*/
export class WeightedAverageAgglomerativeClustering extends AgglomerativeClustering {
/**
* Returns a distance between two nodes.
* @param {AgglomerativeClusterNode} c1 Node
* @param {AgglomerativeClusterNode} c2 Node
* @returns {number} Distance
*/
distance(c1, c2) {
const calcDistRec = function calcDistRec(h1, h2) {
if (h1.size === 1 && h2.size === 1) {
return h1.distances[h2.index]
} else if (h2.size === 1) {
return (calcDistRec(h2, h1.children[0]) + calcDistRec(h2, h1.children[1])) / 2
} else {
return (calcDistRec(h1, h2.children[0]) + calcDistRec(h1, h2.children[1])) / 2
}
}
return calcDistRec(c1, c2)
}
/**
* Returns new distance.
* @param {number} ca Number of datas in a merging node A
* @param {number} cb Number of datas in a merging node B
* @param {number} ck Number of datas in a current node
* @param {number} ka Distance between node A and current node
* @param {number} kb Distance between node B and current node
* @param {number} ab Distance between node A and node B
* @returns {number} New distance between current node and merged node
*/
update(ca, cb, ck, ka, kb, ab) {
return this._lanceWilliamsUpdater(0.5, 0.5, 0, 0)(ka, kb, ab)
}
}
/**
* Median agglomerative clustering
*/
export class MedianAgglomerativeClustering extends AgglomerativeClustering {
/**
* Returns a distance between two nodes.
* @param {AgglomerativeClusterNode} c1 Node
* @param {AgglomerativeClusterNode} c2 Node
* @returns {number} Distance
*/
distance(c1, c2) {
const m1 = this._mean(c1.leafs.map(f => f.point))
const m2 = this._mean(c2.leafs.map(f => f.point))
const m = m1.map((v, i) => (v + m2[i]) / 2)
return this._d(m, m2) ** 2
}
/**
* Returns new distance.
* @param {number} ca Number of datas in a merging node A
* @param {number} cb Number of datas in a merging node B
* @param {number} ck Number of datas in a current node
* @param {number} ka Distance between node A and current node
* @param {number} kb Distance between node B and current node
* @param {number} ab Distance between node A and node B
* @returns {number} New distance between current node and merged node
*/
update(ca, cb, ck, ka, kb, ab) {
return this._lanceWilliamsUpdater(0.5, 0.5, -0.25, 0)(ka, kb, ab)
}
}