UNPKG

siafun

Version:
347 lines (344 loc) 16.3 kB
"use strict"; Object.defineProperty(exports, "__esModule", { value: true }); const _ = require("lodash"); const util_1 = require("./util"); const smith_waterman_1 = require("./smith-waterman"); function getSWOptionsString(options) { return (options.maxIterations ? options.maxIterations : 't') //t to be backwards-compatible with iterative t/f save files + '_' + (options.maxThreshold ? options.maxThreshold : '') //0 == undefined + '_' + (options.endThreshold ? options.endThreshold : '') //0 == undefined + '_' + (options.minSegmentLength ? options.minSegmentLength : '') //0 == undefined + '_' + (options.similarityThreshold != null ? options.similarityThreshold : '') + '_' + (options.onlyDiagonals ? 't' : '') + '_' + (options.nLongest ? options.nLongest : '') + '_' + (options.maxGapSize != null ? options.maxGapSize : '') + '_' + (options.maxGaps ? options.maxGaps : '') + '_' + (options.maxGapRatio ? options.maxGapRatio : '') + '_' + (options.minDistance ? options.minDistance : ''); } function getSimpleSmithWatermanPath(points, points2, options) { const matrices = new smith_waterman_1.SmithWaterman(options.similarityThreshold) .run(points, points2); return getBestAlignment(matrices, options); } exports.getSimpleSmithWatermanPath = getSimpleSmithWatermanPath; function getMultiSWOccurrences(points, points2, options) { const file = 'sw_' + getSWOptionsString(options) + '.json'; return util_1.loadOrPerformAndCache(file, () => Object.assign(getSmithWatermanOccurrences2(points, options, points2), { points2: points2 }), options); } exports.getMultiSWOccurrences = getMultiSWOccurrences; function getSmithWatermanOccurrences(points, options) { const file = 'sw_' + getSWOptionsString(options) + '.json'; return util_1.loadOrPerformAndCache(file, () => getSmithWatermanOccurrences2(points, options), options); } exports.getSmithWatermanOccurrences = getSmithWatermanOccurrences; function getSmithWatermanOccurrences2(points, options, points2) { const symmetric = !points2 || _.isEqual(points2, points); const padding = options.minDistance ? options.minDistance - 1 : 0; if (symmetric) points2 = points; const allMatrices = []; const speedyResult = tryAndGetFromUnlimited(options, symmetric); if (speedyResult) return speedyResult; let selectedAlignments = []; const ignoredPoints = new Set(); //ignore diagonal if symmetric (with padding depending on minDistance) if (symmetric) getPaddedArea(points.map((_p, i) => [i, i]), padding, symmetric, points.length - 1, points.length - 1).forEach(p => ignoredPoints.add(p.join(','))); let matrices = getAdjustedSWMatrices(points, points2, options.similarityThreshold, ignoredPoints); allMatrices.push(_.clone(matrices)); let iterations = 0; let max = _.max(_.flatten(matrices.scoreMatrix)); const maxThreshold = options.maxThreshold || 0; while (max > maxThreshold && (!options.maxIterations || iterations < options.maxIterations)) { iterations++; //extract alignments const currentAlignments = getAlignments(matrices, options, symmetric); if (currentAlignments.length == 0) break; selectedAlignments.push(...currentAlignments); //update ignored points (with all found ones, not only long ones) currentAlignments.forEach(a => getPaddedArea(a, padding, symmetric, points.length - 1, points2.length - 1) .forEach(p => ignoredPoints.add(p.join(',')))); //prepare for next iteration if (!options.maxIterations || iterations < options.maxIterations) { matrices = getAdjustedSWMatrices(points, points2, options.similarityThreshold, ignoredPoints); max = _.max(_.flatten(matrices.scoreMatrix)); if (max > 0) { allMatrices.push(_.clone(matrices)); } } } return getResult(selectedAlignments, options, points, points2, symmetric, padding, allMatrices); } function getResult(alignments, options, points, points2, symmetric, padding, matrices) { let result = { points: points, patterns: [], matrices: matrices, segmentMatrix: [] }; //keep longest while respecting min dist alignments = reduceSegments(alignments, options, points.length, points2.length, symmetric, padding); //create segment matrix result.segmentMatrix = util_1.createPointMatrix(_.flatten(alignments), points, points2, symmetric); //convert to patterns result.patterns = util_1.toPatterns(alignments, points, points2); return result; } function tryAndGetFromUnlimited(options, symmetric) { const unlimitedLongestOptions = _.clone(options); unlimitedLongestOptions.nLongest = undefined; const cached = util_1.loadCached('sw_' + getSWOptionsString(unlimitedLongestOptions) + '.json', options.cacheDir); if (cached) { const points = cached.points; const points2 = cached.points2 || points; //already sorted and spaced out, so just keep nLongest const alignments = cached.patterns.map(p => patternToAlignment(p, points, points2)).slice(0, options.nLongest); let result = { points: points, patterns: [], matrices: cached.matrices, segmentMatrix: [] }; //create segment matrix result.segmentMatrix = util_1.createPointMatrix(_.flatten(alignments), points, points2, symmetric); //convert to patterns result.patterns = util_1.toPatterns(alignments, points, points2); return result; } } function reduceSegments(alignments, options, numPoints1, numPoints2, symmetric, padding) { //sort, longest first alignments = _.reverse(_.sortBy(alignments, a => a.length)); const reduced = []; const matrix = util_1.getEmptyMatrix(numPoints1, numPoints2); //keep longest while respecting min dist while (alignments.length > 0 && (!options.nLongest || reduced.length < options.nLongest)) { const currentAlignment = alignments.shift(); const nooverlap = currentAlignment.filter(p => matrix[p[0]][p[1]] == 0); //add if not covered by any previous segment if (nooverlap.length == currentAlignment.length) { reduced.push(currentAlignment); getPaddedArea(currentAlignment, padding, symmetric, numPoints1 - 1, numPoints2 - 1).forEach(p => matrix[p[0]][p[1]] = 1); } else if (nooverlap.length >= options.minSegmentLength) { //add for later const i = alignments.findIndex(a => a.length == nooverlap.length); alignments.splice(i, 0, nooverlap); } } return reduced; } function patternToAlignment(pattern, points, points2) { const stringPoints = points.map(p => JSON.stringify(p)); const occ1Indexes = pattern.occurrences[0].map(p => stringPoints.indexOf(JSON.stringify(p))); const stringPoints2 = points2.map(p => JSON.stringify(p)); const occ2Indexes = pattern.occurrences[1].map(p => stringPoints2.indexOf(JSON.stringify(p))); return _.zip(occ1Indexes, occ2Indexes); } function getPaddedArea(points, padding, symmetric, maxX, maxY) { return _.flatten(points.map(p => { const ps = [p]; ps.push(..._.flatten(_.range(1, padding + 1) .map(d => [[p[0] + d, p[1]], [p[0], p[1] + d]]))); if (symmetric) _.cloneDeep(ps).forEach(i => ps.push(_.reverse(i))); return ps; })).filter(p => p[0] <= maxX && p[1] <= maxY); } exports.getPaddedArea = getPaddedArea; function getAdjustedSWMatrices(points, points2, similarityThreshold, ignoredPoints) { //TODO MAKE SURE NO SLICING NEEDS TO HAPPEN (JUST RUN WITH COLLAPSED TEMPORAL FEATURES??) //points = points.map(p => p.slice(0,p.length-1)); points = points.map(p => p.slice(1)); points2 = points2.map(p => p.slice(1)); let matrices = new smith_waterman_1.SmithWaterman(similarityThreshold) .run(points, points2, ignoredPoints); if (points === points2) { //make lower matrix 0 matrices.scoreMatrix = matrices.scoreMatrix.map((r, i) => r.map((c, j) => j < i ? 0 : c)); } /*if (points.length <= points2.length) { //make lower matrix 0 matrices.scoreMatrix = matrices.scoreMatrix.map((r,i) => r.map((c,j) => j < i ? 0 : c)); } else { matrices.scoreMatrix = matrices.scoreMatrix.map((r,i) => r.map((c,j) => j > i ? 0 : c)); }*/ return matrices; } function getBestAlignment(matrices, options) { const flat = _.flatten(matrices.scoreMatrix); const index = flat.indexOf(_.max(flat)); const numCols = matrices.scoreMatrix[0].length; const [i, j] = [_.floor(index / numCols), util_1.modForReal(index, numCols)]; const alignment = getAlignment(matrices, i, j, options); //keep only matches return alignment.filter(([i, j]) => matrices.traceMatrix[i][j] == smith_waterman_1.TRACES.DIAGONAL && (i == 0 || j == 0 || matrices.scoreMatrix[i][j] > matrices.scoreMatrix[i - 1][j - 1])); } function getAlignments(matrices, options, symmetric) { let currentMatrix = _.cloneDeep(matrices.scoreMatrix); let currentMatrices = _.cloneDeep(matrices); currentMatrices.scoreMatrix = currentMatrix; const alignments = []; let maxes = _.flatten(currentMatrix.map((r, i) => r.map((v, j) => [v, i, j]))); maxes = maxes.filter(vij => options.maxThreshold ? vij[0] > options.maxThreshold : vij[0] > 0); maxes = _.reverse(_.sortBy(maxes, vij => vij[0])); while (maxes.length > 0) { const [i, j] = [maxes[0][1], maxes[0][2]]; let currentAlignment = getAlignment(currentMatrices, i, j, options); if ((!options.minSegmentLength || currentAlignment.length >= options.minSegmentLength)) alignments.push(currentAlignment); removeAlignmentCoverage(currentAlignment, currentMatrix, symmetric, options.onlyDiagonals); const nextMax = maxes.findIndex(m => currentMatrix[m[1]][m[2]] != 0); maxes = maxes.slice(nextMax > 1 ? nextMax : 1); } return alignments; } function removeAlignmentCoverage(alignment, matrix, symmetric, diagonal) { //remove diagonal bleeding at end of alignment (until next match) if (alignment.length > 0 && diagonal) { alignment = _.clone(alignment); let current = _.last(alignment); let currentValue = matrix[current[0]][current[1]]; let next = [current[0] + 1, current[1] + 1]; while (next[0] < matrix.length && next[1] < matrix.length && matrix[next[0]][next[1]] < currentValue) { alignment.push(next); currentValue = matrix[next[0]][next[1]]; next = [next[0] + 1, next[1] + 1]; } } //remove horizontal and vertical bleeding (any gaps) //TODO (maybe only in diagonal mode??) alignment.forEach(([i, j]) => { let ii = i + 1, jj = j + 1; let currentValue = matrix[i][j]; while (ii < matrix.length && matrix[ii][j] <= currentValue + smith_waterman_1.GAP_SCORE) { currentValue = matrix[ii][j]; matrix[ii][j] = 0; if (symmetric) matrix[j][ii] = 0; ii++; } currentValue = matrix[i][j]; while (jj < matrix[0].length && matrix[i][jj] <= currentValue + smith_waterman_1.GAP_SCORE) { currentValue = matrix[i][jj]; matrix[i][jj] = 0; if (symmetric) matrix[jj][i] = 0; jj++; } matrix[i][j] = 0; if (symmetric) matrix[j][i] = 0; }); } function getAlignment(matrices, i, j, options) { const maxGapSize = options.maxGapSize || 0; const maxGaps = options.maxGaps || 0; const maxGapRatio = options.maxGapRatio || 1; //find ij trace in matrix let currentValue = matrices.scoreMatrix[i][j]; let currentTrace = matrices.traceMatrix[i][j]; let pointsOnAlignment = [[i, j]]; let numGaps = 0; let currentGapSize = 0; let totalGapSize = 0; let gapRatio = 0; while ((!options.endThreshold || currentValue >= options.endThreshold) && (currentGapSize <= maxGapSize) && (numGaps <= maxGaps) && (gapRatio <= maxGapRatio)) { //reset current location in matrix if (currentTrace === smith_waterman_1.TRACES.DIAGONAL) { [i, j] = [i - 1, j - 1]; } else if (currentTrace === smith_waterman_1.TRACES.UP && !options.onlyDiagonals) { [i, j] = [i - 1, j]; } else if (currentTrace === smith_waterman_1.TRACES.LEFT && !options.onlyDiagonals) { [i, j] = [i, j - 1]; } if (i >= 0 && j >= 0 && matrices.scoreMatrix[i][j] !== currentValue) { //next alignment found currentValue = matrices.scoreMatrix[i][j]; currentTrace = matrices.traceMatrix[i][j]; if (!options.onlyDiagonals || (currentTrace == smith_waterman_1.TRACES.DIAGONAL && //only add in strict diagonal version if it was a match ((i == 0 || j == 0) ? currentValue > 0 : currentValue > matrices.scoreMatrix[i - 1][j - 1]))) { //first point pointsOnAlignment.push([i, j]); currentGapSize = 0; } else { if (currentGapSize == 0) numGaps++; currentGapSize++; totalGapSize++; } //TODO adjust for nondiagonals... gapRatio = totalGapSize / (pointsOnAlignment.length + totalGapSize); } else break; } //sort by first/second component pointsOnAlignment = _.sortBy(pointsOnAlignment, p => p[1]); pointsOnAlignment = _.sortBy(pointsOnAlignment, p => p[0]); //fill gaps if appropriate if (options.onlyDiagonals && options.fillGaps) { const f = _.first(pointsOnAlignment); //lowest index const l = _.last(pointsOnAlignment); //highest index pointsOnAlignment = _.zip(_.range(f[0], l[0] + 1), _.range(f[1], l[1] + 1)); } return pointsOnAlignment; } /*getSmithWatermanOccurrences(options): number[][][] { let points = quantizedPoints.map(p => p.slice(0,3)); let result = new SmithWaterman().run(points, points); let sw = result.scoreMatrix; let trace = result.traceMatrix; //make lower matrix 0 sw = sw.map((r,i) => r.map((c,j) => j < i ? 0 : c)); var max: number, i: number, j: number; [i, j, max] = getIJAndMax(sw); var segments: number[][][] = []; while (max > options.maxThreshold) { //find ij trace in matrix let currentValue = max; let currentTrace = trace[i][j]; let currentSegments = [[i],[j]]; while (currentValue > options.endThreshold) { //reset current location in matrix sw[i][j] = 0;//-= 3; if (currentTrace === TRACES.DIAGONAL) { [i,j] = [i-1,j-1]; } else if (currentTrace === TRACES.UP) { [i,j] = [i-1,j]; } else if (currentTrace === TRACES.LEFT) { [i,j] = [i,j-1]; } else { break; } currentSegments[0].push(i); currentSegments[1].push(j); currentValue = sw[i][j]; currentTrace = trace[i][j]; } //sort ascending currentSegments.forEach(o => o.sort((a,b) => a-b)); //remove duplicates currentSegments = currentSegments.map(occ => _.uniq(occ)); //let allPoints = _.union(_.flatten(segments.map(s => _.union(...s)))); //let newPoints = currentSegments.map(occ => _.difference(occ, allPoints)); //only add if longer than minSegmentLength if (currentSegments[0].length > options.minSegmentLength && currentSegments[1].length > options.minSegmentLength) { //(newPoints[0].length > minSegmentLength && newPoints[1].length > minSegmentLength) { //TODO ONLY ADD IF DIFFERENCE FROM EXISTING ONES SMALL ENOUGH!!!!! segments.push(currentSegments); } let ijMax = getIJAndMax(sw); i = ijMax[0]; j = ijMax[1]; max = ijMax[2]; } //filter out wanted segments if (options.patternIndices) { segments = segments.filter((s,i) => options.patternIndices.indexOf(i) >= 0); } console.log(JSON.stringify(segments)); return segments; }*/