@givengine/give-tree
Version:
Node implementation of interval-tree based cache data structures: base class
346 lines (326 loc) • 13.1 kB
JavaScript
/**
* @license
* Copyright 2018-2019 The Regents of the University of California.
* All Rights Reserved.
*
* Created by Xiaoyi Cao
* Department of Bioengineering
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*/
const GiveTreeNode = require('./giveTreeNode')
/**
* @typedef {import('@givengine/chrom-region')} ChromRegion
*
* Class for data storage.
*
* Every record will serve as a bin, with a start and end coordinate, and
* all records combined will serve as a division of the chromosome (no gap,
* no overlap) with all the start value for dividing points.
*
* For example:
* ```
* bins: << | | | | >>
* Data: << ------------]
* [-----------------]
* [------------]
* [------------------------------------]
* [--------------------------]
* [-------]
* [------------------------- >>
* [---------------]
* [----------------- >>
* [-]
* ```
*
* Records can have value of:
*
* `null`: data not loaded yet, when upper layer encounter this, the
* code there needs to retrieve potential data;
*
* `false`: there is no data in this bin;
*
* A `DataNode` instance:
* the instance of a class described in this file
*
* @class
* @alias module:DataNode
* @implements {GiveTreeNode}
* @property {Array<ChromRegion>} startList - A list of data entries
* that __start exactly at__ the start coordinate of this node.
* `startList` will become an empty array only if the previous bin is
* `null` (because otherwise this bin can be merged with the previous
* one), or this is the first bin of the storage unit;
* @property {Array<ChromRegion>} continuedList - A list of data entries
* that __continue into__ the start coordinate of this node. This array
* will be sorted by the actual starting points, `[]` will have the same
* effect as `undefined`. This is used in `DataNode.traverse`
* only at the first node. See `DataNode.traverse` for details.
*/
class DataNode extends GiveTreeNode {
/**
* @constructor
* @param {object} props - properties that will be passed to the
* individual implementations. For `GIVE.DataNode`, three properties
* will be used:
* @param {number} props.start - for `this.start`
* @param {Array<ChromRegion>} [props.startList] - for
* `this.startList`
* @param {Array<ChromRegion>} [props.continuedList] - for
* `this.continuedList`
*/
constructor (props) {
super(props)
this._start = props.start
this.startList = props.startList || []
this.continuedList = props.continuedList || []
}
/**
* Implementing GIVE.GiveTreeNode methods
*/
/**
* Whether this data node has data stored. Because data node is populated with
* actual data, it will always return `true` (always has data).
*
* @type {boolean}
*/
get hasData () {
return true
}
get start () {
return this._start
}
/**
* Insert data under this node
*
* @param {Array<ChromRegion>} data - the sorted array of data
* entries (each should be an extension of `GIVe.ChromRegion`).
* `data === null` or `data === []` means there is no data in
* `chrRange` and `false`s will be used in actual storage.
*
* __NOTICE:__ any data overlapping `chrRange` should appear either
* here or in `continuedList`, otherwise `continuedList` in data
* entries may not work properly.
*
* After insertion, any entry within `data` that has `.start` value
* larger than `this.start` will be deleted from the array or marked
* for deletion via `props.dataIndex`. See `props.dataIndex` for
* details.
* @param {ChromRegion} chrRanges - DataNode should not handle
* this.
* @param {Object} [props] - additional properties being passed onto nodes.
* @param {Array<ChromRegion>} [props.continuedList] - the list of data
* entries that should not start in `chrRange` but are passed from the
* earlier regions, this will be useful for later regions if date for
* multiple regions are inserted at the same time
* @param {function(ChromRegion):boolean} [props.callback] - the callback
* function to be used (with the data entry as its sole parameter) when
* inserting
* @param {number} [props.dataIndex] - current index of `data` to start
* insertion. This is to optimize large insertions.
*
* If this is specified, after insertion it will be moved to the first
* data entry whose `.start` is greater than `this.start`, if no
* such entry exists, it will be moved to `data.length`.
*
* If this is not specified, after insertion, `data[0]` will become the
* first data entry whose `.start` is greater than `this.start`.
* Or `data` will become `[]` if no such entry exists.
* @returns {DataNode} Always return `this`.
*/
insert (data, chrRange, props) {
// Steps:
// 1. Push everything in `data` that has `start` value smaller than
// `this.start` into `continuedList`
props = props || {}
var currIndex =
(typeof props.dataIndex === 'number' ? props.dataIndex : 0)
var prevIndex = currIndex
currIndex = this.constructor._traverseData(data, currIndex,
dataEntry => dataEntry.start < this.start, props.callback)
// 2. Check all `continuedList` to ensure they still overlap with `this`
// (getEnd() should be greater than `this.start`), remove those who
// don't, copy those who do to `this.continuedList`;
props.continuedList = (props.continuedList || [])
.concat(data.slice(prevIndex, currIndex))
.filter(entry => entry.end > this.start)
this.continuedList = props.continuedList.slice()
// 3. Find all `data` entries that have same `start` value as `this`,
// and copy those to `this.startList`, move them from `data` to
// `continuedList`;
prevIndex = currIndex
currIndex = this.constructor._traverseData(data, currIndex,
dataEntry => dataEntry.start === this.start, props.callback)
this.startList = data.slice(prevIndex, currIndex)
props.continuedList = props.continuedList.concat(this.startList)
if (typeof props.dataIndex !== 'number') {
// remove data if props.currIndex is not specified
data.splice(0, currIndex)
} else {
// update `props.currIndex`
props.dataIndex = currIndex
}
return this
}
remove (data, exactMatch, props) {
props = props || {}
if (data instanceof this.constructor && this.start === data.start && (
(!exactMatch) || this.constructor._compareData(data, this)
)) {
// this node should be removed
this.clear()
return false
}
if (data.start === this.start) {
this.startList = this.startList.filter(dataIn => {
if (!exactMatch || this.constructor._compareData(data, dataIn)) {
if (typeof props.callback === 'function') {
props.callback(dataIn)
}
return false
}
return true
})
}
this.continuedList = this.continuedList.filter(dataIn => {
if (dataIn.start === data.start && (
!exactMatch || this.constructor._compareData(data, dataIn)
)) {
if (typeof props.callback === 'function') {
props.callback(dataIn)
}
return false
}
return true
})
return this.isEmpty ? false : this
}
clear (convertTo) {
this.startList = []
this.continuedList = []
}
/**
* Traverse all nodes / data entries within `this` and calling
* functions on them.
*
* When traversing, everything in 'continuedList' of *the starting record
* only* will be processed first, then everything in 'startList' in all
* overlapping records will be processed.
*
* @param {ChromRegion} chrRange - the chromosomal range
* to traverse.
* @param {function} callback - the callback function, takes a
* `GIVE.ChromRegion` object as its sole parameter and returns
* something that can be evaluated as a boolean value to determine
* whether the call shall continue (if `breakOnFalse === true`).
* @param {function} [filter] - a filter function that takes a
* `GIVE.ChromRegion` object as its sole parameter and returns whether
* the region should be included in traverse.
* @param {boolean} [breakOnFalse=false] - whether the traverse should be
* stopped if `false` is returned from the callback function.
* @param {Object} [props] - additional properties being
* passed onto nodes.
* @param {boolean} [props.notFirstCall] - whether this is not the first
* call of a series of `traverse` calls.
* @param {...any} args - additional args being passed onto `callback`
* and `filter`
* @returns {boolean} - whether future traverses should be conducted.
*/
traverse (chrRange, callback, filter, breakOnFalse, props, ...args) {
// helper function
let callFunc = entry => {
// First determine if `chrRange` exists and does not overlap
// `dataEntry`. If so, return `true` to proceed with the next
if (chrRange &&
(chrRange.start >= entry.end || entry.start >= chrRange.end)
) {
return true
}
// If `chrRange` does not exist or overlaps `dataEntry`
// call `callback` and return its value (applying `filter` and
// `breakOnFalse`).
return this.constructor._callFuncOnDataEntry(callback, filter,
breakOnFalse, entry, props, ...args)
}
// needs to traverse on continuedList if `!props.notFirstCall`
if (!props.notFirstCall) {
if (!this.continuedList.every(callFunc)) {
return false
}
}
props.notFirstCall = true
return this.startList.every(callFunc)
}
hasUncachedRange (chrRange, props) {
return false
}
getUncachedRange (chrRange, props) {
return props._result || []
}
/**
* Merge this node with `node`.
*
* If `node` doesn't have any data or anything in `startList`, merge.
* Actually because of the structure of `GIVE.DataNode`, nothing needs
* to be changed in `this` if merge is successful. Just return `true`
* to let the caller handle `node`.
*
* @param {DataNode|boolean|null} node - node to be merged.
* Note that this node has to be positioned after `this`.
* @returns {boolean} whether the merge is successful
*/
mergeAfter (node) {
if (node === false || (
node instanceof this.constructor && node.startList.length <= 0
)) {
return true
} else if (node instanceof this.constructor) {
// the node is not mergable, but its continuedList may be updated
node.updateContinuedList(this.continuedList.concat(this.startList))
}
return false
}
/**
* Whether this node is empty.
*
* If there is no entry in both `this.startList` and `this.continuedList` then
* the node is considered empty.
*
* @type {boolean}
*/
get isEmpty () {
return this.startList.length <= 0 && this.continuedList.length <= 0
}
/**
* Update the continued list (this happens mainly because of node merging)
* @param {Array<ChromRegion>} continuedList
* @param {boolean} [throwIfNotConsistent] Throw an error if the final
* `continuedList` is inconsistent with the original.
* @returns {Array<ChromRegion>} return a list concatenated with
* `this.startList` as a base for future `continuedList`s
*/
updateContinuedList (continuedList, throwIfNotConsistent) {
if (continuedList) {
continuedList = continuedList.filter(entry => (entry.end > this.start))
if (throwIfNotConsistent &&
this.continuedList.length > continuedList.length
) {
throw new Error('ContinuedList inconsistent.')
}
this.continuedList = continuedList
}
return this.continuedList.concat(this.startList)
}
}
module.exports = DataNode