UNPKG

graphinius

Version:

Generic graph library in Typescript

197 lines (167 loc) 4.47 kB
import * as $I from './interfaces'; import {IBaseNode} from '../core/base/BaseNode'; const PRECISION = 5; export const scoreSimFuncs = { cosine, cosineSets, euclidean, euclideanSets, pearson, pearsonSets }; /*----------------------------------*/ /* SET SIMILARITY MEASURES */ /*----------------------------------*/ function euclidean(a: number[], b: number[]) { if (a.length !== b.length) { throw new Error('Vectors must be of same size'); } const at = a.length < 1e4 ? a : new Float32Array(a); const bt = b.length < 1e4 ? b : new Float32Array(b); let sum = 0, diff = 0; for (let i = 0; i < at.length; i++) { diff = at[i] - bt[i]; sum += diff * diff; } let sim = +Math.sqrt(sum).toPrecision(PRECISION); // console.log(sim); return {sim}; } /** * * @param a * @param b */ function cosine(a: number[], b: number[]) { if (a.length !== b.length) { throw new Error('Vectors must be of same size'); } const fa1 = new Float32Array(a); const fa2 = new Float32Array(b); let numerator = 0; for (let i = 0; i < fa1.length; i++) { numerator += fa1[i] * fa2[i]; } let dena = 0, denb = 0; for (let i = 0; i < fa1.length; i++) { dena += fa1[i] * fa1[i]; denb += fa2[i] * fa2[i]; } dena = Math.sqrt(dena); denb = Math.sqrt(denb); return {sim: +(numerator / (dena * denb)).toPrecision(PRECISION)}; } /** * * @param a scores of user A for common targets * @param b scores of user B for common targets * @param a_mean avg rating for user a across ALL their ratings * @param b_mean avg rating for user b across ALL their ratings */ function pearson(a: number[], b: number[], a_mean?: number, b_mean?: number) { if (a.length !== b.length) { throw new Error('Vectors must be of same size'); } let sum_a = 0, sum_b = 0, mean_a = a_mean || 0, mean_b = b_mean || 0, numerator = 0, diff_a_sq = 0, diff_b_sq = 0, denominator, a_diff, b_diff, sim; if (!a_mean || !b_mean) { for (let i = 0; i < a.length; i++) { sum_a += a[i]; sum_b += b[i]; } mean_a = sum_a / a.length; mean_b = sum_b / b.length; } for (let i = 0; i < a.length; i++) { a_diff = a[i] - mean_a; b_diff = b[i] - mean_b; numerator += a_diff * b_diff; diff_a_sq += a_diff * a_diff; diff_b_sq += b_diff * b_diff; } denominator = Math.sqrt(diff_a_sq) * Math.sqrt(diff_b_sq); sim = +(numerator / denominator).toPrecision(PRECISION); return {sim}; } /** * @description first extract * @param a * @param b */ function cosineSets(a: Set<string>, b: Set<string>) { const [aa, ba] = extractCommonTargetScores(a, b); if (!aa.length || !ba.length) { return {sim: 0}; } return cosine(aa, ba); } function euclideanSets(a: Set<string>, b: Set<string>) { const [aa, ba] = extractCommonTargetScores(a, b); if (!aa.length || !ba.length) { return {sim: 0}; } return euclidean(aa, ba); } /** * * @param a * @param b */ function pearsonSets(a: Set<string>, b: Set<string>) { const [aa, ba, a_mean, b_mean] = extractCommonTargetScores(a, b); // console.log(aa, ba); if (!aa.length || !ba.length) { return {sim: 0}; } return pearson(aa, ba, a_mean, b_mean); } /** * @description this method implicitly ensures that sets given to cosine * are always of the same length * @param a * @param b */ function extractCommonTargetScores(a: Set<string>, b: Set<string>): [number[], number[], number, number] { // we need to extract the target IDs first let a_id = new Set(), b_id = new Set(); for (let e of a) a_id.add(e.split('#')[0]); for (let e of b) b_id.add(e.split('#')[0]); // now we collect the scores for common targets (in the same order) let score, a_map = new Map(), b_map = new Map(), a_vec = [], b_vec = [], earr, a_mean = 0, b_mean = 0; for (let e of a) { earr = e.split('#'); // we can assume 0 is the target... score = +earr[earr.length - 1]; a_mean += score; if (b_id.has(earr[0])) { a_map.set(earr[0], score); } } for (let e of b) { const earr = e.split('#'); score = +earr[earr.length - 1]; b_mean += score; if (a_id.has(earr[0])) { b_map.set(earr[0], score); } } // Maps preserve the order in which items were entered // console.log(a_map, b_map); let a_keys = Array.from(a_map.keys()).sort(); for ( let key of a_keys ) { a_vec.push(a_map.get(key)); } let b_keys = Array.from(b_map.keys()).sort(); for ( let key of b_keys ) { b_vec.push(b_map.get(key)); } return [a_vec, b_vec, a_mean / a.size, b_mean / b.size]; }