@woosh/meep-engine
Version:
Pure JavaScript game engine. Fully featured and production ready.
251 lines (203 loc) • 8.29 kB
JavaScript
import { assert } from "../assert.js";
import { randomIntegerBetween } from "../math/random/randomIntegerBetween.js";
import { seededRandom } from "../math/random/seededRandom.js";
import { graph_compute_distance_matrix } from "./graph_compute_distance_matrix.js";
/**
* Sentinel value marking a node as not yet assigned to any cluster.
* Must be distinguishable from any valid cluster index 0..k-1, so k must
* stay strictly below this.
* @type {number}
*/
const UNASSIGNED_CLUSTER = 0xFFFFFFFF;
/**
* Maximum number of refinement iterations before the algorithm stops even
* if medoids haven't stabilised. In practice convergence is reached within
* a handful of iterations on well-connected graphs.
* @type {number}
*/
const MAX_ITERATIONS = 32;
/**
* Find the medoid of a set of nodes: the member with minimum sum of
* distances to every other member.
*
* @param {number[]} members cluster member node indices
* @param {SquareMatrix} m_distances matrix whose columns for each member index are filled
* @returns {number} node index of the medoid
*/
function find_cluster_medoid(members, m_distances) {
const n = members.length;
let best_member = members[0];
let best_sum = Number.POSITIVE_INFINITY;
for (let i = 0; i < n; i++) {
const candidate = members[i];
let sum = 0;
for (let j = 0; j < n; j++) {
if (i === j) {
continue;
}
sum += m_distances.getCellValue(candidate, members[j]);
}
if (sum < best_sum) {
best_sum = sum;
best_member = candidate;
}
}
return best_member;
}
/**
* @param {MultiNode<T>[]} node_array
* @param {number} k
* @param {number} random_seed
* @param {number[]|Uint32Array} node_cluster_assignments pre-filled with UNASSIGNED_CLUSTER
* @param {Map<T, number>} node_index_map
* @param {Graph<MultiNode<T>>} graph
* @returns {number[][]}
*/
export function graph_k_means_cluster_detailed(
node_array,
k,
random_seed,
node_cluster_assignments,
node_index_map,
graph
) {
const total_node_count = node_array.length;
if (k > total_node_count) {
throw new Error(`Not enough nodes in the graph, K(=${k}) > |V| (=${total_node_count})`);
}
// Cluster indices are stored alongside UNASSIGNED_CLUSTER in the same array, so
// k must fit strictly below the sentinel to avoid collisions.
assert.lessThan(k, UNASSIGNED_CLUSTER, `k must be less than UNASSIGNED_CLUSTER(=${UNASSIGNED_CLUSTER})`);
if (k === 0) {
return [];
}
const random = seededRandom(random_seed);
/**
* Current medoid (centroid) node index for each cluster
* @type {number[]}
*/
const seeds = [];
// Farthest-first seeding: pick the first seed uniformly at random, then each
// subsequent seed is the node whose minimum distance to any existing seed is
// largest. This reliably spreads seeds across disconnected components and
// avoids the common K-means failure mode where random initialisation puts
// every seed into the same component.
{
// randomIntegerBetween is inclusive on both ends, so the max index is count-1
const first_seed = randomIntegerBetween(random, 0, total_node_count - 1);
seeds[0] = first_seed;
node_cluster_assignments[first_seed] = 0;
}
for (let i = 1; i < k; i++) {
const m_partial = graph_compute_distance_matrix(graph, node_array, seeds, node_index_map);
let best_node = -1;
let best_min_dist = -1;
for (let node_index = 0; node_index < total_node_count; node_index++) {
if (node_cluster_assignments[node_index] !== UNASSIGNED_CLUSTER) {
// already a seed
continue;
}
let min_dist = Number.POSITIVE_INFINITY;
for (let j = 0; j < i; j++) {
const d = m_partial.getCellValue(node_index, seeds[j]);
if (d < min_dist) {
min_dist = d;
}
}
if (min_dist > best_min_dist) {
best_min_dist = min_dist;
best_node = node_index;
}
}
seeds[i] = best_node;
node_cluster_assignments[best_node] = i;
}
/**
* Node indices belonging to each cluster. Rebuilt every iteration.
* @type {number[][]}
*/
let cluster_members = [];
for (let iter = 0; iter < MAX_ITERATIONS; iter++) {
// 1. Compute distances from every current seed to every node (BFS from each seed)
const m_distances_from_seeds = graph_compute_distance_matrix(graph, node_array, seeds, node_index_map);
// 2. Assign every node to its nearest seed
for (let node_index = 0; node_index < total_node_count; node_index++) {
let closest_cluster = 0;
let closest_distance = Number.POSITIVE_INFINITY;
for (let cluster_index = 0; cluster_index < k; cluster_index++) {
const cluster_seed = seeds[cluster_index];
// Matrix columns are the seed (target) indices, rows are source nodes.
// We need the distance from node_index to the seed, so read m[node_index, seed].
const distance = m_distances_from_seeds.getCellValue(node_index, cluster_seed);
if (distance < closest_distance) {
closest_distance = distance;
closest_cluster = cluster_index;
}
}
node_cluster_assignments[node_index] = closest_cluster;
}
// 3. Rebuild cluster member lists
cluster_members = [];
for (let i = 0; i < k; i++) {
cluster_members[i] = [];
}
for (let node_index = 0; node_index < total_node_count; node_index++) {
cluster_members[node_cluster_assignments[node_index]].push(node_index);
}
// 4. Recompute each seed as the medoid of its cluster
let seeds_changed = false;
for (let cluster_index = 0; cluster_index < k; cluster_index++) {
const members = cluster_members[cluster_index];
if (members.length <= 1) {
// 0 or 1 member: medoid is trivially the current seed
continue;
}
// BFS from every member to compute within-cluster pairwise distances
const m_within = graph_compute_distance_matrix(graph, node_array, members, node_index_map);
const new_seed = find_cluster_medoid(members, m_within);
if (new_seed !== seeds[cluster_index]) {
seeds[cluster_index] = new_seed;
seeds_changed = true;
}
}
if (!seeds_changed) {
break;
}
}
return cluster_members;
}
/**
* Partition graph into K parts using K-medoids (graph K-means).
* Iteratively reassigns nodes to the nearest medoid, then recomputes medoids
* as the within-cluster node minimising the sum of distances to other members.
* Stops when medoids stabilise or MAX_ITERATIONS is reached.
*
* Distances are BFS-based and so treat all edges as unit-weight; see
* {@link graph_compute_distance_matrix}.
*
* @template T
* @param {Graph<T>} graph
* @param {number} k number of desired parts
* @param {number} random_seed seed for random number generator, useful for restarting partitioning
* @returns {number[][]}
*/
export function graph_k_means_cluster(graph, k, random_seed) {
const node_array = Array.from(graph.getNodes());
const node_count = node_array.length;
/**
* Uint32 so cluster indices and the UNASSIGNED_CLUSTER sentinel (0xFFFFFFFF) don't collide
* for any practical k
* @type {Uint32Array}
*/
const node_cluster_assignments = new Uint32Array(node_count);
node_cluster_assignments.fill(UNASSIGNED_CLUSTER);
/**
* build node index
* @type {Map<T, number>}
*/
const node_index_map = new Map();
for (let i = 0; i < node_count; i++) {
node_index_map.set(node_array[i], i);
}
return graph_k_means_cluster_detailed(node_array, k, random_seed, node_cluster_assignments, node_index_map, graph);
}