siafun
Version:
A collection of structure induction algorithms
700 lines (697 loc) • 30.3 kB
JavaScript
;
Object.defineProperty(exports, "__esModule", { value: true });
const _ = require("lodash");
const util_1 = require("./util");
/** assumes that all occurrences of segments are of the same length! */
function inferHierarchyFromPatternOccurrences(occs) {
let patterns = occs.map(p => toPattern(p));
//segments.forEach(s => processSegmentPair(s));
//patterns = _.flatten(patterns.map(s => split(s)));
//TODO NOW BUILD HIERARCHY
}
exports.inferHierarchyFromPatternOccurrences = inferHierarchyFromPatternOccurrences;
function inferHierarchyFromMatrix2(matrix) {
const allPatterns = matrixToPatterns(matrix);
console.log(allPatterns.map(p => p.length));
console.log(allPatterns.map(p => p[0][0].l));
const candidates = util_1.cartesianProduct(allPatterns);
console.log(candidates.length);
const limits = candidates.map(ps => getDistributionOfLimits(_.flatten(ps).filter(p => p.l > 1)));
console.log(limits.length);
limits.map(l => console.log(JSON.stringify(l)));
const ratings = limits.map(l => util_1.getEntropy(l));
console.log(ratings);
/*const candidates = possibleSegs.map(s =>
constructHierarchyFromPatterns(s, matrix.length));
//candidates.map(h => console.log(JSON.stringify(h)));
const ratings = candidates.map(rateHierarchy);
console.log(JSON.stringify(ratings.map(r => _.round(r, 2))));*/
return candidates[ratings.indexOf(_.min(ratings))];
}
exports.inferHierarchyFromMatrix2 = inferHierarchyFromMatrix2;
function inferHierarchyFromMatrix(matrix) {
const allPatterns = matrixToPatterns(matrix);
const limits = getDistributionOfLimits(_.flatten(allPatterns[0]).filter(p => p.l > 1));
console.log(limits);
const best = searchForBestCombination(allPatterns);
console.log(JSON.stringify(best));
/*const candidates = possibleSegs.map(s =>
constructHierarchyFromPatterns(s, matrix.length));
//candidates.map(h => console.log(JSON.stringify(h)));
const ratings = candidates.map(rateHierarchy);
console.log(JSON.stringify(ratings.map(r => _.round(r, 2))));*/
//return candidates[ratings.indexOf(_.min(ratings))];
return best;
}
exports.inferHierarchyFromMatrix = inferHierarchyFromMatrix;
function searchForBestCombination(patterns) {
let currentBest = patterns.map(_p => 0);
let currentRating = getRating(patterns, currentBest);
//console.log(currentRating)
while (true) {
let newBest = currentBest;
let newRating = currentRating;
patterns.forEach((ps, i) => {
const options = ps.map((_p, j) => newBest.map((k, l) => l == i ? j : k));
const ratings = options.map(o => getRating(patterns, o));
const min = _.min(ratings);
if (min < newRating) {
newBest = options[ratings.indexOf(min)];
newRating = min;
//console.log(min, JSON.stringify(newBest))
}
});
if (newRating < currentRating) {
currentBest = newBest;
currentRating = newRating;
}
else
break;
}
return currentBest.map((b, i) => patterns[i][b]);
}
function getRating(patterns, indexes) {
const selection = _.flatten(patterns.map((p, i) => p[indexes[i]]));
return util_1.getEntropy(getDistributionOfLimits(selection.filter(p => p.l > 1)));
}
/** simply takes the first of possible patterns and builds a hierarchy */
function quicklyInferHierarchyFromMatrix(matrix, simplify, labels) {
let patterns = getFirstPatterns(matrix);
console.log(JSON.stringify(getDistributionOfLimits(patterns)));
if (simplify)
patterns = simplifyPatterns(patterns);
return constructHierarchyFromPatterns(patterns, matrix.length, labels);
}
exports.quicklyInferHierarchyFromMatrix = quicklyInferHierarchyFromMatrix;
function keepNBestSegments(matrix, n) {
const segments = matrixToSegments(matrix);
//very reductive: simply takes first of first set of patterns
const patterns = getFirstPatterns(matrix);
const best = _.reverse(_.sortBy(_.zip(segments, patterns), z => z[1].l));
return segmentsToMatrix(best.slice(0, n).map(z => z[0]), getSize(matrix));
}
exports.keepNBestSegments = keepNBestSegments;
function getTransitiveMatrix(matrix, simplify) {
//very reductive: simply takes first of first set of patterns
let patterns = getFirstPatterns(matrix);
if (simplify)
patterns = simplifyPatterns(patterns);
return patternsToMatrix(makePatternsTransitive(patterns), getSize(matrix));
}
exports.getTransitiveMatrix = getTransitiveMatrix;
function getFirstPatterns(matrix) {
//console.log(matrixToPatterns(matrix).map(s => s.length))
return _.flatten(matrixToPatterns(matrix).map(s => s[0]));
}
exports.getFirstPatterns = getFirstPatterns;
function matrixToPatterns(matrix) {
return matrixToSegments(matrix).map(s => alignmentToPatterns(s));
}
function simplifyPatterns(patterns, minLength = 2) {
console.log("full", patterns.length);
//sort by length, beginning point, and first vector
patterns = sortPatterns(patterns).filter(p => p.l >= minLength);
console.log(JSON.stringify(patterns));
patterns = mergeAdjacent(patterns).filter(p => p.l >= minLength);
console.log("merged", patterns.length);
console.log(JSON.stringify(patterns));
//pattern = addTransitivity(pattern);
patterns = removeSubsegs(patterns).filter(p => p.l >= minLength);
console.log("subsegs", patterns.length);
console.log(JSON.stringify(patterns));
patterns = syncMultiples(patterns).filter(p => p.l >= minLength);
console.log("sync", patterns.length);
console.log(JSON.stringify(patterns));
patterns = removeMultiples(patterns).filter(p => p.l >= minLength);
console.log("remove", patterns.length);
console.log(JSON.stringify(patterns));
const timeline = getDistributionOfLimits(patterns.filter(s => s.l > 1));
console.log(JSON.stringify(timeline));
//agreement of limits of each pattern with other limits...
/*const comps = limits.map((ls,i) =>
allLimits.filter(l => _.includes(ls, l)).length / allLimits.length / patterns[i].ts.length);
console.log(JSON.stringify(comps));
console.log(_.sum(limits[0].map(l => timeline[l])));
console.log(_.sum(limits[0].map(l => l+1).map(l => timeline[l])));
console.log(_.sum(limits[0].map(l => l+2).map(l => timeline[l])));
console.log(_.sum(limits[0].map(l => l+3).map(l => timeline[l])));
console.log(_.sum(limits[0].map(l => l+4).map(l => timeline[l])));
console.log(_.sum(limits[0].map(l => l+5).map(l => timeline[l])));
console.log(_.sum(limits[0].map(l => l+6).map(l => timeline[l])));
console.log(_.sum(limits[0].map(l => l+7).map(l => timeline[l])));
console.log(_.sum(limits[0].map(l => l+8).map(l => timeline[l])));
console.log(JSON.stringify(limits.map(ls => _.sum(ls.map(l => timeline[l])))));*/
//now remove pattern overlaps (ts where segs longer in other pattern...)
//seggraph difference????
//pattern = removeIncluded(pattern);
return patterns;
}
exports.simplifyPatterns = simplifyPatterns;
function getDistributionOfLimits(patterns) {
const limits = _.flatten(patterns.map(s => getLimits(s)));
const distribution = _.range(0, _.max(limits) + 1).map(_i => 0);
limits.forEach(l => distribution[l]++);
return distribution;
}
function indexesOfNMax(array, n) {
const maxes = _.reverse(_.sortBy(array.map((a, i) => [a, i]), 0))
.filter(m => m[0] > 0); //filter out <= 0
return _.takeWhile(maxes, (m, i) => i < n || m[0] == maxes[i - 1][0])
.map(m => m[1]);
}
//needs to be sorted from long to short
function mergeAdjacent(pattern) {
return pattern.reduce((segs, s) => {
const merged = segs.length > 0 ? merge(_.last(segs), s) : null;
if (merged)
segs[segs.length - 1] = merged;
else
segs.push(s);
return segs;
}, []);
}
function removeSubsegs(pattern) {
const graphs = pattern.map(s => toPatternGraph(s));
return pattern.filter((s, i) => !graphs.filter((g, j) => i != j && size(graphs[i]) < size(g)) // && pattern[j].l >= s.l)
.some(g => subGraph(graphs[i], g)));
}
function size(graph) {
return _.flatten(_.values(graph)).length;
}
/** align starting points and vectors of all patterns that are seamless multiples.
need to be sorted from long to short */
function syncMultiples(pattern) {
const seamlesses = pattern.map(s => seamless(s));
let adjusted = true;
while (adjusted) {
adjusted = false;
pattern.forEach((s, i) => pattern.slice(0, i).forEach((t, j) => {
if (seamlesses[i] && seamlesses[j]
&& multiple(t.l, s.l)
&& overlapSize(s, t) >= Math.max(s.l, t.l)
&& (s.p != t.p || t.ts.length + 1 != Math.floor((s.ts.length + 1) / (t.l / s.l)))) {
//adjust beginnings
if (s.p < t.p)
moveBeginning(t, s.p - t.p);
if (t.p < s.p)
moveBeginning(s, t.p - s.p);
//adjust vectors to cover same range
//console.log((t.ts.length+1), Math.floor((s.ts.length+1)/(t.l/s.l)))
const factor = t.l / s.l;
const copies = Math.max((t.ts.length + 1) * factor, s.ts.length + 1);
//console.log(copies, factor)
t.ts = _.range(1, Math.floor(copies / factor)).map(i => t.l * i);
s.ts = _.range(1, copies).map(i => s.l * i);
adjusted = true;
}
}));
}
return pattern;
}
//needs to be seamless
function moveBeginning(s, delta) {
s.p = s.p + delta;
s.ts = _.range(1, s.ts.length + 1 - Math.ceil(delta / s.l)).map(i => i * s.l);
}
function removeMultiples(pattern) {
//remove patterns whose ts are a multiple of another existing pattern and
//are covered entirely by it...
return pattern.filter((s, i) => !pattern.slice(i + 1).some(t => seamless(s) && seamless(t) && multiple(s.l, t.l)
&& overlapSize(s, t) >= Math.max(s.l, t.l)
&& s.p == t.p && s.ts.every(u => _.includes(t.ts, u))));
}
function seamless(s) {
return s.ts.map((t, i) => i == 0 ? t : t - s.ts[i - 1]).every(t => t == s.l);
}
/** returns true if n is a multiple of m */
function multiple(n, m) {
return util_1.modForReal(n, m) == 0;
}
function overlapSize(s, t) {
return _.intersection(_.flatten(getOccurrences(s)), _.flatten(getOccurrences(t))).length;
}
/*//removes whole occurrences in s if they are fully described by t
function removeOverlaps(pattern: Pattern[]) {
return pattern.map(s =>
pattern.filter(t => t != s && t.l <= s.l).reduce((r,t) =>
difference(r, t)
, s));
}
export function difference(s: SegGraph, t: SegGraph) {
s = _.cloneDeep(s);
_.keys(t).forEach(k => { if (s[k]) s[k] = _.difference(s[k], t[k]) });
return cleanUp(s);
}
function cleanUp(s: SegGraph) {
_.keys(s).forEach(k => { if (s[k].length == 0) delete s[k] });
const firstImage: number[] = [];
const origin = _.takeWhile(_.keys(s), k =>
!subset(s[k], firstImage) ? firstImage.push(...s[k]) : false);
console.log(JSON.stringify(firstImage.length))
console.log(JSON.stringify(origin.length))
console.log(JSON.stringify(origin.map(o => s[o])))
const reached = origin.concat(_.flatten(origin.map(o => s[o])));
_.difference(_.keys(s), reached).forEach(k => delete s[k] );
return s;
}*/
//all these functions can be optimized due to the lists being sorted...
function subGraph(s, t) {
return _.keys(s).every(k => subset(s[k], t[k]));
}
exports.subGraph = subGraph;
//can be optimized for sorted lists...
function subset(s1, s2) {
return s1.every(s => _.includes(s2, s));
}
function toPatternGraph(s) {
const graph = {};
const ts = [0].concat(s.ts);
_.range(s.p, s.p + s.l).forEach(p => ts.forEach((t, i) => i < ts.length - 1 ? graph[p + t] = ts.slice(i + 1).map(u => u + p) : null));
return graph;
}
exports.toPatternGraph = toPatternGraph;
function graphToPattern(s) {
const p = parseInt(_.keys(s)[0]);
const ts = s[p].map(t => t - p);
const l = _.takeWhile(_.keys(s), k => s[k].length == ts.length).length;
return { p: p, l: l, ts: ts };
}
exports.graphToPattern = graphToPattern;
function getEdges(matrix) {
const segs = getFirstPatterns(matrix);
const result = _.range(0, matrix.length).map(_i => 0);
segs.forEach(s => [0].concat(s.ts).forEach(t => s.p + t < result.length ? result[s.p + t] = result[s.p + t] + s.l : 0));
return result;
}
exports.getEdges = getEdges;
/** construction of a hierarchy from a given number of patterns */
function constructHierarchyFromPatterns(patterns, size, labels) {
patterns = makePatternsTransitive(patterns);
const hierarchy = _.range(0, size);
patterns.forEach(s => _.concat([0], s.ts).forEach(t => {
groupAdjacentLeavesInTree(hierarchy, _.range(s.p + t, s.p + t + s.l));
}));
return labelHierarchy(simplifyHierarchy(hierarchy), labels);
}
function makePatternsTransitive(patterns) {
const noOverlaps = removePatternOverlaps(patterns);
console.log("noov", JSON.stringify(noOverlaps));
return addTransitivity(noOverlaps);
}
function rateHierarchy(tree) {
const lengths = getNodeLengths(tree).filter(n => n > 1);
//console.log("hierarchy lengths", JSON.stringify(lengths))
const complexity = lengths.length;
const quality = _.mean(lengths);
return quality * complexity;
}
exports.rateHierarchy = rateHierarchy;
function getChildrenCounts(tree) {
return Array.isArray(tree) ?
[tree.length].concat(_.flatten(tree.map(t => getChildrenCounts(t)))) : [0];
}
function getNodeLengths(tree) {
return Array.isArray(tree) ?
[_.flattenDeep(tree).length]
.concat(_.flatten(tree.map(t => getNodeLengths(t)))) : [1];
}
function simplifyHierarchy(hierarchy) {
if (Array.isArray(hierarchy)) {
return hierarchy.length == 1 ? simplifyHierarchy(hierarchy[0]) :
hierarchy.map(h => simplifyHierarchy(h));
}
else
return hierarchy;
}
function labelHierarchy(hierarchy, labels) {
if (!labels)
return hierarchy;
if (Array.isArray(hierarchy)) {
return hierarchy.map(h => labelHierarchy(h, labels));
}
else
return labels[hierarchy];
}
function groupAdjacentLeavesInTree(tree, leaves) {
if (Array.isArray(tree)) {
if (leaves.every(b => _.includes(tree, b))) {
const index = tree.indexOf(leaves[0]);
tree.splice(index, leaves.length, leaves);
}
else { //recur
tree.forEach(t => groupAdjacentLeavesInTree(t, leaves));
}
}
}
/** overlaps have to be removed before, and need to be sorted from longest to
shortest length */
function addTransitivity(patterns) {
patterns.forEach((s, i) => {
_.reverse(patterns.slice(0, i)).forEach(t => {
const ps = getInternalPositions(s, t);
if (ps.length > 0) {
//move ref point of child pattern to first occurrence
if (t.p + ps[0] < s.p) {
moveRefPoint(s, t.p + ps[0] - s.p);
}
//update translation vectors
s.ts = _.uniq(_.sortBy(_.concat(s.ts, _.flatten(ps.map(p => getPoints(t).map(u => u + p - s.p))))))
.filter(t => t != 0);
}
});
});
return patterns;
}
exports.addTransitivity = addTransitivity;
/** returns relative positions at which s is contained in t */
function getInternalPositions(s, t) {
const positions = getOccurrences(s).map(so => getOccurrences(t).map(to => so.every(p => _.includes(to, p)) ? so[0] - to[0] : -1));
return _.sortBy(_.uniq(_.flatten(positions).filter(p => p >= 0)));
}
function moveRefPoint(s, delta) {
s.p = s.p + delta;
s.ts = s.ts.map(t => t - delta);
}
/** returns all the occurrences of a pattern as index ranges */
function getOccurrences(s) {
return [s.p].concat(s.ts.map(t => s.p + t)).map(p => _.range(p, p + s.l));
}
/** removes any pattern overlaps, starting with longest pattern,
adjusting shorter ones to fit within limits */
function removePatternOverlaps(patterns, minSegLength = 3, minDist = 2, divFactor = 1) {
let result = [];
patterns = filterAndSortPatterns(patterns, minSegLength, minDist, divFactor, result);
console.log(JSON.stringify(patterns));
while (patterns.length > 0) {
const next = patterns.shift();
result.push(next);
result = unpack(addTransitivity(result));
const newBoundaries = _.uniq(getLimits(next));
patterns = newBoundaries.reduce((segs, b) => _.flatten(segs.map(s => divideAtPos(s, b))), patterns);
patterns = filterAndSortPatterns(patterns, minSegLength, minDist, divFactor, result);
}
return result;
}
//sort by length and first translation vector
function filterAndSortPatterns(patterns, minSegLength, minDist, divFactor, refPatterns) {
patterns = patterns.filter(s => s.l >= minSegLength && minDistFromParents(s, refPatterns) >= minDist
&& s.ts.every(t => util_1.modForReal(t, divFactor) == 0));
return sortPatterns(patterns, refPatterns);
}
/** sorts patterns by min(dist from parents, length), position, smallest vector */
function sortPatterns(patterns, parentCandidates = []) {
return _.reverse(_.sortBy(_.reverse(_.sortBy(_.sortBy(patterns, s => s.ts[0]), s => s.p)), s => Math.min(s.l, minDistFromParents(s, parentCandidates))));
}
function minDistFromParents(pattern, parentCandidates) {
const parents = parentCandidates.filter(p => containedBy(pattern, p));
if (parents.length > 0) {
return _.min(parents.map(p => getDistance(pattern, p))
.concat(pattern.ts)); //also consider distance from diagonal
}
return Infinity;
}
exports.minDistFromParents = minDistFromParents;
/** returns true if p1 is fully contained by p2 */
function containedBy(p1, p2) {
const o1 = _.flatten(getOccurrences(p1));
const o2 = _.flatten(getOccurrences(p2));
return _.difference(o1, o2).length == 0;
}
exports.containedBy = containedBy;
function getDistance(p1, p2) {
return _.min(_.flatten(p1.ts.map(t => p2.ts.map(u => Math.abs(t - u)))));
}
exports.getDistance = getDistance;
function unpack(patterns) {
return _.uniqBy(_.flatten(_.flatten(patterns.map(p => [0].concat(p.ts).map((t, i) => p.ts.slice(i).map(u => ({ p: p.p + t, l: p.l, ts: [u - t] })))))), p => JSON.stringify(p));
}
exports.unpack = unpack;
function patternsToMatrix(patterns, size) {
const matrix = getZeroMatrix(size);
patterns.forEach(s => {
const occs = [0].concat(s.ts);
_.range(0, s.l).forEach(i => occs.forEach(t => occs.forEach(u => {
if (t != u) {
matrix[s.p + t + i][s.p + u + i] = 1;
matrix[s.p + u + i][s.p + t + i] = 1;
}
})));
});
return matrix;
}
exports.patternsToMatrix = patternsToMatrix;
function matrixToSegments(matrix) {
let points = _.flatten(matrix.map((row, i) => row.map((val, j) => [i, j, val])));
points = points.filter(p => p[1] > p[0] && p[2] > 0);
points = _.sortBy(points, p => p[1] - p[0]);
const segments = points.reduce((segs, p) => {
const prev = _.last(_.last(segs)) || [0, 0, 0];
p[0] - prev[0] == 1 && p[1] - prev[1] == 1 ?
_.last(segs).push(p.slice(0, 2)) : segs.push([p.slice(0, 2)]);
return segs;
}, []);
return _.reverse(_.sortBy(segments, s => s.length));
}
function segmentsToMatrix(segments, size) {
const matrix = getZeroMatrix(size);
segments.forEach(s => s.forEach(p => {
matrix[p[0]][p[1]] = 1;
matrix[p[1]][p[0]] = 1;
}));
return matrix;
}
function getSize(matrix) {
return [matrix.length, matrix[0].length];
}
function getZeroMatrix(size) {
return _.range(0, size[0]).map(_i => _.range(0, size[1]).map(_j => 0));
}
function alignmentToPattern(a) {
const interval = a[0][1] - a[0][0];
const length = Math.min(a.length, interval);
const rest = a.length > length ? util_1.modForReal(a.length, length) : 0;
return Object.apply(getTiles(a[0][0], a.length, interval), { r: rest });
}
exports.alignmentToPattern = alignmentToPattern;
/** returns all possible partitions into patterns for the given alignment.
the partitions include full occurrences and initial and final residues */
function alignmentToPatterns(a) {
const interval = a[0][1] - a[0][0];
const length = Math.min(a.length, interval);
const numSolutions = a.length > length ? util_1.modForReal(a.length, length) + 1 : 1;
/*console.log(JSON.stringify(a));
console.log(JSON.stringify((a.length > length ?
_.range(0, numSolutions).map(i => getTiles(a[0][0], a.length, interval, i))
: [[{p: a[0][0], l: a.length, ts: [interval]}]])[0]));*/
return a.length > length ?
_.range(0, numSolutions).map(i => getTiles(a[0][0], a.length, interval, i))
: [[{ p: a[0][0], l: length, ts: [interval] }]];
}
exports.alignmentToPatterns = alignmentToPatterns;
function getTiles(point, length, interval, offset = 0) {
const segs = [];
//initial residue
if (offset > 0) {
segs.push({ p: point, l: offset, ts: [interval] });
}
//main tiles
const remainingLength = length - offset;
const numCopies = Math.floor(remainingLength / interval);
const vectors = _.range(1, numCopies + 1).map(i => i * interval);
if (numCopies > 0) {
segs.push({ p: point + offset, l: interval, ts: vectors });
}
//final residue
const rest = util_1.modForReal(remainingLength, interval);
if (rest > 0) {
segs.push({ p: point + offset + _.last(vectors), l: rest, ts: [interval] });
}
return segs;
}
function toPattern(occurrences) {
const length = _.last(occurrences[0]) - occurrences[0][0];
return {
p: occurrences[0][0],
l: length,
ts: occurrences.slice(1).map(p => p[0] - occurrences[0][0])
};
}
/** offset: position relative to the beginning of the pattern (s.p) */
/*function splitUp(s: Pattern, offset = 0): Pattern[] {
const period = _.min(s.ts);
const regular = s.ts.every(t => multiple(t, period));
if (regular && offset < ) {
console.log(s)
const segs = [];
//initial residue
if (offset > 0) {
segs.push({p: s.p, l: offset, ts: s.ts[0]});
s = {p: s.p+offset, l: s.l-offset, ts: s.ts};
}
//complete patterns
segs.push({p: s.p+offset, l: period, ts: s.ts});
//final residue
if (s) segs.push({p: s.l-_.last()});
return segs;
}
}*/
function divideAtPos(s, pos) {
const locs = _.reverse(_.uniq([s.p].concat(s.ts.map(t => s.p + t)).map(p => p < pos && pos < p + s.l ? pos - p : -1).filter(loc => loc > -1)));
return _.reduce(locs, (segs, l) => divide(segs[0], l).concat(segs.slice(1)), [s]);
}
/** divides the pattern s at position loc */
function divide(s, loc) {
if (0 < loc && loc < s.l) {
return [{ p: s.p, l: loc, ts: s.ts }, { p: s.p + loc, l: s.l - loc, ts: s.ts }];
}
return [s];
}
function merge(s1, s2) {
if (s1.l == s2.l && commonPoint(s1, s2)) {
const p = _.min([s1.p, s2.p]);
const ts = _.sortBy(_.uniq(_.flatten([s1, s2].map(s => s.ts.map(t => t + s.p - p)))));
//if (ts.every((t,i) => i == 0 || t-ts[i-1] >= s1.l)) {
return {
p: p,
l: s1.l,
ts: ts
};
//}
}
//maybe also try id lengths are different! (as in old code...)
/*let minL = Math.min(s1.l, s2.l);
let s1div = divide(s1, minL);
let s2div = divide(s2, minL);*/
}
function commonPoint(s1, s2) {
return _.intersection(getPoints(s1), getPoints(s2)).length > 0;
}
function getLimits(s) {
return _.flatten(getPoints(s).map(p => [p, p + s.l]));
}
/** returns all the points at which occurrences of s begin */
function getPoints(s) {
return [s.p].concat(s.ts.map(t => s.p + t));
}
/** infers a hierarchy BOTTOM-UP from a sequence of numbers representing types */
function inferHierarchyFromTypeSequence(typeSequence, unequalPairsOnly, log) {
//generate new types by merging into binary tree
const newTypes = new Map();
let currentSequence = _.clone(typeSequence);
let currentIndex = _.max(typeSequence) + 1;
let currentPair = getMostCommonPair(currentSequence, unequalPairsOnly);
while (currentPair != null) {
currentSequence = currentSequence.reduce((s, t) => s.length > 0 && _.isEqual([_.last(s), t], currentPair) ?
_.concat(_.initial(s), currentIndex)
: _.concat(s, t), []);
const otherPreviousTypes = _.difference([...newTypes.values()], [newTypes.get(currentPair[0]), newTypes.get(currentPair[1])]);
//console.log(JSON.stringify(currentSequence));
/*console.log(newTypes.get(currentPair[0]), newTypes.get(currentPair[1]),
currentPair.every(u => !_.includes(currentSequence, u)),
currentPair.every(u => !_.includes(_.flatten(otherPreviousTypes), u)))*/
//amend type if possible
const firstNew = newTypes.get(currentPair[0]);
const secondNew = newTypes.get(currentPair[1]);
const occursPreviously = (t) => _.includes(currentSequence, t)
|| _.includes(_.flatten(otherPreviousTypes), t);
const firstOccursInType = firstNew && occursPreviously(currentPair[0]);
const secondOccursInType = secondNew && occursPreviously(currentPair[1]);
if ((firstNew || secondNew) && !firstOccursInType && !secondOccursInType) {
let operation;
if (firstNew && secondNew) {
//check if first/second type contain each other
operation = _.intersection(firstNew, currentPair).length > 0 ? 'push'
: _.intersection(secondNew, currentPair).length > 0 ? 'unshift'
: 'concat';
}
else {
operation = firstNew ? 'push' : 'unshift';
}
if (operation === 'concat') {
newTypes.set(currentIndex, _.concat(firstNew, secondNew));
newTypes.delete(currentPair[0]);
newTypes.delete(currentPair[1]);
if (log)
console.log(currentIndex, ': concat', JSON.stringify(newTypes.get(currentIndex)));
//currentSequence = currentSequence.map(s => s === currentIndex ? currentPair[0] : s);
}
else if (operation === 'push') {
newTypes.set(currentIndex, _.concat(firstNew, currentPair[1]));
newTypes.delete(currentPair[0]);
if (log)
console.log(currentIndex, ': push', JSON.stringify(newTypes.get(currentIndex)));
//currentSequence = currentSequence.map(s => s === currentIndex ? currentPair[0] : s);
}
else {
newTypes.set(currentIndex, _.concat([currentPair[0]], secondNew));
newTypes.delete(currentPair[1]);
if (log)
console.log(currentIndex, ': unshift', JSON.stringify(newTypes.get(currentIndex)));
//currentSequence = currentSequence.map(s => s === currentIndex ? currentPair[1] : s);
}
//else add a new type
}
else {
newTypes.set(currentIndex, currentPair);
if (log)
console.log(currentIndex, ':', JSON.stringify(newTypes.get(currentIndex)));
}
if (log)
console.log(JSON.stringify(currentSequence));
currentPair = getMostCommonPair(currentSequence, unequalPairsOnly);
currentIndex++;
}
//combine types that only occur in one context
_.reverse(_.sortBy([...newTypes.keys()])).forEach(t => {
const parents = [...newTypes.keys()]
.filter(n => _.includes(_.flattenDeep(newTypes.get(n)), t));
const occs = _.flattenDeep(_.concat([...newTypes.values()], currentSequence))
.reduce((c, u) => u == t ? c + 1 : c, 0);
if (parents.length == 1 && occs <= 1) {
newTypes.set(parents[0], replaceInTree(newTypes.get(parents[0]), t, newTypes.get(t)));
newTypes.delete(t);
}
});
//now flatten all types
[...newTypes.keys()].forEach(t => newTypes.set(t, _.flattenDeep(newTypes.get(t))));
//create hierarchy
let hierarchy = _.clone(currentSequence);
if (log)
console.log(_.reverse(_.sortBy([...newTypes.keys()])));
hierarchy = replaceTypesRecursively(hierarchy, newTypes);
//print types and occurrences
_.reverse(_.sortBy([...newTypes.keys()])).forEach(t => {
const seq = JSON.stringify(replaceTypesRecursively([t], newTypes)[0]);
const occs = JSON.stringify(hierarchy).split(seq).length - 1;
if (occs && log)
console.log(t, occs, seq);
});
if (log)
console.log(JSON.stringify(hierarchy));
return hierarchy;
}
exports.inferHierarchyFromTypeSequence = inferHierarchyFromTypeSequence;
function replaceTypesRecursively(hierarchy, types) {
hierarchy = _.cloneDeep(hierarchy);
_.reverse(_.sortBy([...types.keys()])).forEach(t => hierarchy = replaceInTree(hierarchy, t, types.get(t)));
return hierarchy;
}
function replaceInTree(tree, subtree, replacement) {
if (!tree.length)
return tree;
return tree.map(n => _.isEqual(n, subtree) ? replacement
: replaceInTree(n, subtree, replacement));
}
function getMostCommonPair(array, unequalOnly = false) {
let pairs = array.map((a, i) => i > 0 ? [array[i - 1], a] : null).filter(a => a).map(p => JSON.stringify(p));
let uniq = _.uniq(pairs);
if (unequalOnly)
uniq = uniq.filter(p => { const q = JSON.parse(p); return q[0] != q[1]; });
const indexes = uniq.map(u => util_1.allIndexesOf(pairs, u));
const disjunct = indexes.map(u => u.reduce((ii, i) => i == _.last(ii) + 1 ? ii : _.concat(ii, i), []));
const freqs = disjunct.map(d => d.length);
//console.log(JSON.stringify(_.reverse(_.sortBy(_.zip(uniq, freqs), p => p[1])).slice(0,5)))
const maxFreq = _.max(freqs);
if (maxFreq > 1)
return JSON.parse(uniq[freqs.indexOf(maxFreq)]);
}