UNPKG

louvain-algorithm

Version:

Community finding algorithm.

442 lines (369 loc) 23.8 kB
// -------------------------------------------- Louvain Algorithm -------------------------------------------- // [Description] // This algorithm is divided in 2 phases: Modularity Optimization and Community Aggregation. Just after the first is // completed, the second takes place. Louvain will iteratively go through both until we get an optimized partition of the network. // Modularity Optimization - At the beginning of this phase, the algorithm will randomly order all the nodes in the network such that, one by one, // it will remove and insert it in a different community. This will continue until no significant variation in modularity is // achieved (given by a constant defined below - __MIN). // Community Aggregation - After finalizing the first pass, every node belonging to the same community is merged into a single giant one and // the links connecting these will be formed by the sum of the ones previously connecting nodes from the same different communities. From now on, // there will also exist self-loops that represent the sum of all links in a given community (strictly connecting nodes inside of it) before being // collapsed into a single one. jLouvain = function (nds, edgs, __MIN) { // A function expression can be stored in a variable. After it has been // stored this way, it can be used as a function. Functions stored in variables do not need // names. They are always invoked using the variable name. // Constants // let __MIN = 0.0000001; // Below this difference of actual versus previous modularity, Louvain algorithm iteration stops. // Global Variables let original_graph_nodes; // Defined in the core() of the algorithm. let original_graph_edges; // Defined in the core() of the algorithm. let original_graph = {}; // Defined in the core() of the algorithm. let partition_init; // Defined in the core() of the algorithm. May not be used (depending on the user input). let edge_index = {}; // edge_index[edge.source+'_'+edge.target] = ... Attributes an index to each edge. This index // is the position of the edge in the graph.edges array. // ----------------------------------------- Helpers ----------------------------------------- function make_set(array) { // Receives an array with repeated values. Returns one filtered (and ordered) with only the different ones. let set = {}; array.forEach(function (d) { set[d] = true; }); return Object.keys(set); // Object.keys receives an array or an object. It returns an array with the respective // array's position or keys, respectively. Moreover, it eliminates repeated values (present in array) in the final set. } // Set -> {1: true, 2: true, 3: true...} Returns an ARRAY of the keys (each key corresponds to a node). function obj_values(obj) { let vals = []; for (let key in obj) { if (obj.hasOwnProperty(key)) { vals.push(obj[key]); } } return vals; } // Returns an ARRAY of the values of the input object (in the same initial order). hasOwnProperty returns true or // false depending on the presence of such property in obj.. function get_degree_for_node(graph, node) { // Node is a number ID. Graph is an object with 3 properties (nodes, // edges and _assoc_mat). _assoc_mat is an object, not an array! let neighbours = graph._assoc_mat[node] ? Object.keys(graph._assoc_mat[node]) : []; // In case we are looking // for a node not connected, it defines neighbours as an empty array. let weight = 0; neighbours.forEach(function (neighbour) { let value = graph._assoc_mat[node][neighbour] || 1; if (node === neighbour) { // In case we have already performed community aggregation, graph._assoc_mat[node][neighbour] will be different of 0. value *= 2; } weight += value; }); return weight; } // Returns the sum of the weights of all links connecting to node. function get_neighbours_of_node(graph, node) { if (typeof graph._assoc_mat[node] === 'undefined') { // In case we are looking for a node not connected, the // function returns an empty array. return []; } return Object.keys(graph._assoc_mat[node]); // Returns the position of each value that exists: // var object1 = [2,,0,0,,2] -> Array ["0", "2", "3", "5"] } // Prints an ARRAY with all neighbours of input node ID. function get_edge_weight(graph, node1, node2) { return graph._assoc_mat[node1] ? graph._assoc_mat[node1][node2] : undefined; } // Returns specific weight of the edge defined by node1 and node2. function get_graph_size(graph) { let size = 0; graph.edges.forEach(function (edge) { size += edge.weight; }); return size; } // Returns the sum of the property "weight" of all edges present in graph.edges. function add_edge_to_graph(graph, edge) { // Edge is an object that specifies the source node, target node, and weight. update_assoc_mat(graph, edge); // Updating assoc_mat with the new edge's weight. if (edge_index[edge.source+'_'+edge.target]) { // There is no weight to update in edge_index. graph.edges[edge_index[edge.source+'_'+edge.target]].weight = edge.weight; // Because it is already in edges // from example.html and edge_index (because of the next part). Update the weight in graph.edges. } else { graph.edges.push(edge); // Add edge to graph.edges. edge_index[edge.source+'_'+edge.target] = graph.edges.length - 1; // Update edge_index with new value. } } // edge_index accumulates only the new edges that are added to graph.edges. function make_assoc_mat(edge_list) { let mat = {}; // It is not an array. It is a list: // Object { {source: 3, target: 5, weight: 1.5}, {source: 1, target: 2, weight: 1.99}, {source: 30, target: 2, weight: 3.14} ...} edge_list.forEach(function (edge) { mat[edge.source] = mat[edge.source] || {}; // Important because many edges share the same nodes. In // order to include an element in a 2D matrix, we need to 1st create a list to insert it. mat[edge.source][edge.target] = edge.weight || 1; mat[edge.target] = mat[edge.target] || {}; mat[edge.target][edge.source] = edge.weight || 1; }); return mat; // It is not an array (1 object containing others): Object { 1: Object { 2: 3 }, 2: Object { 2: 3 } } } // make_assoc_mat is used once in the core.edges (to create _assoc_mat). Do not forget even objects inside objects are key/value pairs. function update_assoc_mat(graph, edge) { // assoc_mat is not an array. graph._assoc_mat[edge.source] = graph._assoc_mat[edge.source] || {}; // In case we are updating a node without connections. graph._assoc_mat[edge.source][edge.target] = edge.weight; graph._assoc_mat[edge.target] = graph._assoc_mat[edge.target] || {}; graph._assoc_mat[edge.target][edge.source] = edge.weight; } // Matrix where i is the source and j the target node of the respective edge. The numeric value corresponds to the edge weight. function clone(obj) { if (obj === null || typeof(obj) !== 'object') return obj; let temp = obj.constructor(); for (let key in obj) { temp[key] = clone(obj[key]); } return temp; } // Copy paste operation. This is important because: I have an object x. I'd like to copy it as object y, such that changes to y do not modify x. function shuffle(a) { let j, x, i; for (i = a.length - 1; i > 0; i--) { j = Math.floor(Math.random() * (i + 1)); x = a[i]; a[i] = a[j]; a[j] = x; } return a; } // Returns the input vector but randomly shuffled. // ----------------------------------------- Algorithm ----------------------------------------- function init_status(graph, status, part) { // Aim of this function is to initialize network properties after Louvain // first execution or to update them after community aggregation. // Part refers to an initial partition that may be input by // the user with the initial graph data. // Defining Status status['nodes_to_com'] = {}; // Nodes linked to the communities they belong. Key: Value pair. It takes the // value of -1 if node is not assigned to a community. status['internals'] = {}; // Sum of the weights of all links inside a specified community. status['degrees'] = {}; // Sum of the weights of the links incident in each community. status['gdegrees'] = {}; // Sum of the weights of the links incident in each node. status['loops'] = {}; // Loop weight for each node. status['total_weight'] = get_graph_size(graph); // Sum of the property "weight" of all edges present in graph. // Goal of next if condition is to update the status features above. if (typeof part === 'undefined') { // No part input. graph.nodes.forEach(function (node, i) { status.nodes_to_com[node] = i; // Each node belongs to a different community. let deg = get_degree_for_node(graph, node); // Sum of the weights of all links connecting to i. if (deg < 0) throw 'Bad graph type, use positive weights!'; status.degrees[i] = deg; // Sum of the weights of the links incident in each community. status.gdegrees[node] = deg; // Sum of the weights of the links incident in each node. // When every node is part of a different community, degrees = gdegrees. status.loops[node] = get_edge_weight(graph, node, node) || 0; // Inner loop edge weight. status.internals[i] = status.loops[node]; // This condition of if should be satisfied during community aggregation phase. // Variable "i" is used for community assignments and "node" for node specific variables. }); } else { // In case there is a partition as function argument. graph.nodes.forEach(function (node) { // There are status features that are node specific. let com = part[node]; status.nodes_to_com[node] = com; let deg = get_degree_for_node(graph, node); status.degrees[com] = (status.degrees[com] || 0) + deg; // Sum of the weights of the links incident in // each community is calculated by summing the weights of the edges incident in each node of the community. status.gdegrees[node] = deg; // Sum of the weights of the links incident in each node. let inc = 0.0; let neighbours = get_neighbours_of_node(graph, node); // Printing all the neighbours of input node. neighbours.forEach(function (neighbour) { let weight = graph._assoc_mat[node][neighbour]; if (weight <= 0) { throw "Bad graph type, use positive weights"; } if (part[neighbour] === com) { // Following calculations are done only if the neighbour belongs to // the same community as the input node under analysis. if (neighbour === node) { inc += weight; } else { inc += weight / 2.0; // Next time, neighbor will be the node and vice-versa. } } }); status.internals[com] = (status.internals[com] || 0) + inc; // With inc we calculate the sum of the // weights inside each community by summing the edges between connected nodes and belonging to the same community. }); } } function __modularity(status) { // It is possible to calculate network's modularity only using graph.status. let links = status.total_weight; // Total weight of the graph's edges. let result = 0.0; let communities = make_set(obj_values(status.nodes_to_com)); // Array with all the (non-repeated & ordered) communities present in the graph. communities.forEach(function (com) { // Iterating over all different communities. let in_degree = status.internals[com] || 0; // Sum of the weights of the links inside each community. let degree = status.degrees[com] || 0; // Sum of the weights of the links incident in each community. if (links > 0) { result = result + in_degree / links - Math.pow((degree / (2.0 * links)), 2); } }); return result; // Modularity of a given partition (defined by status). } function __neighcom(node, graph, status) { // Communities in the neighborhood of a given node. let weights = {}; let neighborhood = get_neighbours_of_node(graph, node); neighborhood.forEach(function (neighbour) { if (neighbour !== node) { let weight = graph._assoc_mat[node][neighbour] || 1; // weight is a number! let neighbourcom = status.nodes_to_com[neighbour]; weights[neighbourcom] = (weights[neighbourcom] || 0) + weight; // weights is an array! } }); return weights; // Each key corresponds to a different community. The respective value is the sum of all links // connecting "node" to other nodes present in the respective cluster. } function __insert(node, com, weight, status) { // Inserting a node in community com (connected by a given weight) and modifying graph status. status.nodes_to_com[node] = +com; // Updating node community. status.degrees[com] = (status.degrees[com] || 0) + (status.gdegrees[node] || 0); // Updating the sum of the edges incident in community com. status.internals[com] = (status.internals[com] || 0) + weight + (status.loops[node] || 0); // Updating the sum of internal edges. } function __remove(node, com, weight, status) { // Removing node from community com and modifying status. status.degrees[com] = ((status.degrees[com] || 0) - (status.gdegrees[node] || 0)); status.internals[com] = ((status.internals[com] || 0) - weight - (status.loops[node] || 0)); status.nodes_to_com[node] = -1; // Important to renumber communities after removing an edge. } // After inserting or removing a node from a community it is fundamental to update community ID. When node is removed, it will be placed in community -1. function __renumber(dict) { // dict = status.nodes_to_com let count = 0; let ret = clone(dict); // Function output (deep copy) let new_values = {}; let dict_keys = Object.keys(dict); // Getting node IDs. {1: 1, 2: 2, 3: 3...} dict_keys.forEach(function (key) { let value = dict[key]; // Node's community. let new_value = typeof new_values[value] === 'undefined' ? -1 : new_values[value]; if (new_value === -1) { new_values[value] = count; new_value = count; count = count + 1; } ret[key] = new_value; // {1: , 2: , 3: ,...} }); return ret; // Returns an object similar to nodes_to_com. Although, each node's community is defined in an // ordered way like the nodes. Every single community will come across count. Communities already identified // in previous nodes will be assigned to future ones. } function __one_level(graph, status) { // Computes one level of the communities dendogram (without including community aggregation). let modif = true; // Modifications made in terms of community members. let cur_mod = __modularity(status); // Current modularity (between -1 and 1). let new_mod = cur_mod; // New modularity value (between -1 and 1). while (modif) { // This cycle is not the one that removes or inserts nodes. cur_mod = new_mod; modif = false; // Only if best community is different from the actual one, the cycle may proceed. let shuffledNodes = shuffle(graph.nodes); shuffledNodes.forEach(function (node) { let com_node = status.nodes_to_com[node]; // Returning community of the input node. let degc_totw = (status.gdegrees[node] || 0) / (status.total_weight * 2.0); // To be used below. Defined here in order to avoid (unnecessary) repeated calculation. let neigh_communities = __neighcom(node, graph, status); // Returning an array of the communities in the neighborhood of input node. __remove(node, com_node, (neigh_communities[com_node] || 0.0), status); // function __remove(node, com, weight, status) {}. Status (which // includes nodes_to_com) is updated (inside __remove). let best_com = com_node; let best_increase = 0; let neigh_communities_entries = Object.keys(neigh_communities); // Make iterable. // Checking whether modularity increased by inserting removed node in each neighbor community (once at a time). neigh_communities_entries.forEach(function (com) { let incr = neigh_communities[com] - (status.degrees[com] || 0.0) * degc_totw; // DeltaQ - Fundamental equation. This way, // it is only needed to calculate those 2 community specific values. if (incr > best_increase) { // Only the placement of the node in the community with higher increase will remain. best_increase = incr; best_com = com; // Identifying the community the node fits the best. } }); __insert(node, best_com, neigh_communities[best_com] || 0, status); // We insert the node in the // community there was a greater global modularity improvement. Status (which includes nodes_to_com) is updated (inside __insert). if (best_com !== com_node) { modif = true; // Only in this situation the algorithm will keep looking for new ways of // improving modularity (by inserting nodes into different communities). } }); new_mod = __modularity(status); if (new_mod - cur_mod < __MIN) { // Even if best_com !== com_node, if new_mod - cur_mod < __MIN // cycle is broken. break; } } } // Community aggregation: function induced_graph(partition, graph) { // partition has status.nodes_to_com format. let ret = {nodes: [], edges: [], _assoc_mat: {}}; // Output. let w_prec, weight; // Add nodes from partition values let partition_values = obj_values(partition); // obj_values returns an array with each node community. ret.nodes = ret.nodes.concat(make_set(partition_values)); // Returns an ordered array without repeated values. Inserting community aggregated nodes // as in the input. array1.concat(array2) -> returns an array which is array1 and array2 joined. graph.edges.forEach(function (edge) { weight = edge.weight || 1; // For every edge placed between the same 2 nodes, the final weight is summed. let com1 = partition[edge.source]; // Source node community. let com2 = partition[edge.target]; // Target node community. w_prec = (get_edge_weight(ret, com1, com2) || 0); // get_edge_weight(graph, node1, node2) {}. let new_weight = (w_prec + weight); // new_weight is not summing to itself. add_edge_to_graph(ret, {'source': com1, 'target': com2, 'weight': new_weight}); // Inserting community aggregated edges. }); edge_index = {}; // Reset edge_index. return ret; // Returns final graph (ret) after community aggregation pass. This has updated nodes and edges. } // Partitioning drawn dendogram at an input level. function partition_at_level(dendogram, level) { let partition = clone(dendogram[0]); // partition = __renumber(status.nodes_to_com) for (let i = 1; i < level + 1; i++) { // If it is not possible to cut at the specified level, the function will // cut at the nearest below. Object.keys(partition).forEach(function (key) { let node = key; let com = partition[key]; partition[node] = dendogram[i][com]; // CHANGE: com -> key. Once there is an init_status() before // partition_at_level(), it is the same. var com = partition[key]; }); } return partition; // A graph can be partitioned in different ways. } // Mother Function. function generate_dendogram(graph, part_init) { if (graph.edges.length === 0) { // In case we have a graph with no edges. Each node is a different community. let part = {}; graph.nodes.forEach(function (node) { part[node] = node; }); return part; } let status = {}; init_status(original_graph, status, part_init); let mod; // Modularity before 1 level partition. let status_list = []; // Set of partitions at different hierarchical levels: dendogram. __one_level(original_graph, status); // Computes 1 level of the communities dendogram. Current status to determine when to stop. let new_mod = __modularity(status); // Modularity after 1 level partition. let partition = __renumber(status.nodes_to_com); // Decreasing number of communities due to __one_level. status_list.push(partition); mod = new_mod; let current_graph = induced_graph(partition, original_graph); // Graph that results from partitioning the original. Graph // after 1st pass. Community aggregation. while (true) { // Keeps partitioning the graph until no significant modularity increase occurs. init_status(current_graph, status); __one_level(current_graph, status); new_mod = __modularity(status); if (new_mod - mod < __MIN) { break; } partition = __renumber(status.nodes_to_com); status_list.push(partition); mod = new_mod; current_graph = induced_graph(partition, current_graph); } return status_list; // Dendogram is a set of ordered partitions. } if (nds.length > 0) { original_graph_nodes = nds; // Global variable. original_graph_edges = edgs; // Global variable. let assoc_mat = make_assoc_mat(edgs); original_graph = { // Global variable. Graph is an object with node (node), edge (edges) and weight (_assoc_mat) properties. 'nodes': original_graph_nodes, 'edges': original_graph_edges, '_assoc_mat': assoc_mat }; } let dendogram = generate_dendogram(original_graph, partition_init); // Global variables. return partition_at_level(dendogram, dendogram.length - 1); }; module.exports = { jLouvain: jLouvain };