UNPKG

graphinius

Version:

Generic graph library in Typescript

327 lines (292 loc) 8.67 kB
import * as $I from './interfaces'; import {TypedGraph} from '../core/typed/TypedGraph'; import { ITypedNode } from '../core/typed/TypedNode'; export const sortFuncs = { asc: (se1: $I.SimilarityEntry, se2: $I.SimilarityEntry) => se1.sim - se2.sim, desc: (se1: $I.SimilarityEntry, se2: $I.SimilarityEntry) => se2.sim - se1.sim }; export const cutFuncs = { above: (sim: number, threshold: number) => sim >= threshold, below: (sim: number, threshold: number) => sim <= threshold }; /*----------------------------------*/ /* SIMILARITY FUNCTIONS */ /*----------------------------------*/ export function sim(algo: Function, a: Set<any>, b: Set<any>) { return algo(a, b); } /** * @description similarity between set & particular node * sorted by similarity DESC * * @param algo similarity function to use * @param s source set * @param t target sets to measure similarity to * @param cfg object */ export function simSource(algo: Function, s: string, t: $I.SetOfSets, cfg: $I.SimilarityConfig = {}) : $I.SimilarityResult { const sort = cfg.sort || sortFuncs.desc; const cutFunc = cfg.cutFunc || cutFuncs.above; let result: $I.SimilarityResult = []; const start = t[s]; for ( let [k,v] of Object.entries(t)) { if ( k === s ) { continue; } const sim: $I.Similarity = algo(start, v); if ( cfg.cutoff == null || cutFunc(sim.sim, cfg.cutoff ) ) { result.push({from: s, to: k, ...sim}); } } result.sort(sort); if ( cfg.knn != null && cfg.knn <= result.length ) { result = result.slice(0, cfg.knn); } return result; } /** * @description pairwise is a *symmetrical* algorithm, so we only need to * compute similarities in one direction * * @param algo similarity function to use * @param s all sets * @param cfg object */ export function simPairwise(algo: Function, s: $I.SetOfSets, cfg: $I.SimilarityConfig = {}) : $I.SimilarityResult { const sort = cfg.sort || sortFuncs.desc; const cutFunc = cfg.cutFunc || cutFuncs.above; let result: $I.SimilarityResult = []; const keys = Object.keys(s); for ( let i in keys ) { for ( let j = 0; j < +i; j++) { const from = keys[i]; const to = keys[j]; if ( from === to ) { continue; } const sim = algo(s[keys[i]], s[keys[j]], i, j); if ( cfg.cutoff == null || cutFunc(sim.sim, cfg.cutoff ) ) { result.push({from, to, ...sim}); } } } result.sort(sort); if ( cfg.knn != null && cfg.knn <= result.length ) { result = result.slice(0, cfg.knn); } return result; } /** * @description similarity of individuals of one subset to another * @description kNN relates to each s1-node's subset * * @param algo * @param s1 * @param s2 * @param cfg * * @returns an array of Similarity entries */ export function simSubsets(algo: Function, s1: $I.SetOfSets, s2: $I.SetOfSets, cfg: $I.SimilarityConfig = {}) : $I.SimilarityResult { const sort = cfg.sort || sortFuncs.desc; const cutFunc = cfg.cutFunc || cutFuncs.above; let result: $I.SimilarityResult = []; const keys1 = Object.keys(s1); const keys2 = Object.keys(s2); for ( let i in keys1 ) { let subRes = []; for ( let j in keys2 ) { const from = keys1[i]; const to = keys2[j]; if ( from === to ) { continue; } const sim = algo(s1[keys1[i]], s2[keys2[j]]); if ( cfg.cutoff == null || cutFunc(sim.sim, cfg.cutoff) ) { subRes.push({from, to, ...sim}); } } subRes.sort(sort); if ( cfg.knn != null && cfg.knn <= subRes.length ) { subRes = subRes.slice(0, cfg.knn); } result = result.concat(subRes); } return result.sort(sort); } // /** // * @description similarity of two groups to one another // * just collects sets & calls sim() // * // * @param algo // * @param s1 // * @param s2 // * @param config // * // * @returns an array of Similarity entries // */ // export function simGroups(algo: Function, s1: $I.SetOfSets, s2: $I.SetOfSets, config: $I.SimilarityConfig = {}) : $I.Similarity { // throw new Error('not implemented yet'); // return {isect: 0, sim: 0}; // } /** * @description top-K per node * * @param algo similarity function to use * @param s all sets * @param cfg * * @returns most similar neighbor per node * * @todo there are no duplicates in this array, similarities might differ in different directions -> adapt! */ export function knnNodeArray(algo: Function, s: $I.SetOfSets, cfg: $I.SimilarityConfig) : $I.TopKArray { const sort = cfg.sort || sortFuncs.desc; const c = cfg.cutoff || 0; const topK: $I.TopKArray = []; const dupes = {}; for ( let node of Object.keys(s) ) { const topKEntries: $I.SimilarityEntry[] = simSource(algo, node, s, {knn: cfg.knn || 1, sort: cfg.sort}); topKEntries.forEach(e => { // console.log(e); if ( c == null || e.sim < c ) { return; } if (!cfg.dup && ( dupes[e.from] && dupes[e.from][e.to] || dupes[e.to] && dupes[e.to][e.from] ) ) { return; } topK.push(e); dupes[e.from] = dupes[e.from] || {}; dupes[e.from][e.to] = true; }); } return topK.sort(sort); } /** * * @param algo * @param s * @param cfg */ export function knnNodeDict(algo: Function, s: $I.SetOfSets, cfg: $I.SimilarityConfig) { const sort = cfg.sort || sortFuncs.desc; const c = cfg.cutoff || 0; const topK: $I.TopKDict = {}; for ( let node of Object.keys(s) ) { const topKEntries: $I.SimilarityEntry[] = simSource(algo, node, s, {knn: cfg.knn || 1, sort: cfg.sort}); topKEntries.forEach(e => { if ( c == null || e.sim < c) { return; } delete e.from; topK[node] = topK[node] || []; topK[node].push(e); }); for ( let arr of Object.values(topK) ) { arr.sort(sort); } } return topK; } /** * @description Returns similarities of 2 node sets depending on shared preferences * @description default cutoff similarity is 1e-6 * * @param g graph * @param algo similarity function to use * @param cfg config object of type SimPerSharedPrefConfig * * @returns something * * @todo type return value * @todo get rid of graph somehow (transfer method to other class...!) */ export function viaSharedPrefs(g: TypedGraph, algo: Function, cfg: $I.SimPerSharedPrefConfig ) { const sort = cfg.sort || sortFuncs.desc; const cutoff = cfg.co == null ? 1e-6 : cfg.co; const cutFunc = cfg.cutFunc || cutFuncs.above; const sims = []; const t1Set = g.getNodesT(cfg.t1); const t2Set = g.getNodesT(cfg.t2); const prefCache = new Map<string, Set<ITypedNode>>(); for ( let [t1Name, t1Node] of t1Set.entries() ) { for ( let [t2Name, t2Node] of t2Set.entries() ) { let prefSet1: Set<ITypedNode>, prefSet2: Set<ITypedNode>; if ( prefCache.get(t1Node.id) ) { prefSet1 = prefCache.get(t1Node.id); } else { prefSet1 = g[cfg.d1](t1Node, cfg.e1.toUpperCase()); prefCache.set(t1Node.id, prefSet1); } if ( prefCache.get(t2Node.id) ) { prefSet2 = prefCache.get(t2Node.id); } else { prefSet2 = g[cfg.d2](t2Node, cfg.e2.toUpperCase()); prefCache.set(t2Node.id, prefSet2); } if ( !prefSet1 || !prefSet2 || prefSet1.size === 0 || prefSet2.size === 0 ) { continue; } const sim = algo(prefSet1, prefSet2); if ( cutFunc(sim.sim, cutoff) ) { sims.push({from: t1Name, to: t2Name, ...sim}); } } } return sims.sort(sort); } /** * @description returns Set of elements in B that are not in A * @param a * @param b */ export function getBsNotInA(a: Set<ITypedNode>, b: Set<ITypedNode>) : Set<ITypedNode> { let result = new Set<ITypedNode>(); let sa = new Set(), sb = new Set(); for ( let e of a ) sa.add(e.label); // for ( let e of b ) sb.add(e.label); for ( let e of b ) { if ( !sa.has(e.label) ) { result.add(e); } } return result; } /** * @description works, but we would have to completely re-vamp $G typed traversals * in order to speed the code up by a factor of ~2... * @todo Fuck speed for the moment -> concern yourself with optimization -> * !!! AFTER THE FUCKING DEMO !!! * @todo I think this doesn't pay off in any way... */ // function simUint32(a: Uint32Array, b: Uint32Array) : Similarity { // a = a.sort(); // b = b.sort(); // const union = []; // let // i = 0, // j = 0; // while ( i < a.length || j < b.length ) { // if ( i >= a.length ) { // union.push(b[j++]); // } // else if ( j >= b.length ) { // union.push(a[i++]); // } // else { // union.push(a[i]); // if (a[i++] !== b[j]) { // union.push(b[j++]); // } // else { // j++; // } // } // } // const intersectSize = a.length + b.length - union.length; // return {isect: intersectSize, sim: intersectSize / union.length}; // }