UNPKG

interskiplist

Version:

Binary Search for Numeric & Character Intervals; great for CSS-Unicode-Range-like tasks

449 lines (404 loc) 19.7 kB
############################################################################################################ CND = require 'cnd' rpr = CND.rpr.bind CND badge = 'INTERSKIPLIST' log = CND.get_logger 'plain', badge info = CND.get_logger 'info', badge whisper = CND.get_logger 'whisper', badge alert = CND.get_logger 'alert', badge debug = CND.get_logger 'debug', badge warn = CND.get_logger 'warn', badge help = CND.get_logger 'help', badge urge = CND.get_logger 'urge', badge echo = CND.echo.bind CND σ_plus_א = Symbol.for '+א' σ_minus_א = Symbol.for '-א' σ_misfit = Symbol.for 'misfit' #........................................................................................................... { mix, } = require 'multimix006modern' # { mix, } = require 'multimix' types = require './types' { isa validate type_of } = types.export() #----------------------------------------------------------------------------------------------------------- @new = ( settings ) -> throw new Error "settings not supported" if settings? isl_settings = minIndex: σ_minus_א maxIndex: σ_plus_א compare: ( a, b ) -> return 0 if a is b and ( a is σ_plus_א or a is σ_minus_א ) return +1 if ( a is σ_plus_א ) or ( b is σ_minus_א ) return -1 if ( a is σ_minus_א ) or ( b is σ_plus_א ) return +1 if a > b return -1 if a < b return 0 #......................................................................................................... substrate = new ( require 'interval-skip-list' ) isl_settings substrate.toString = substrate.inspect = -> "{ interval-skip-list }" #......................................................................................................... R = '~isa': 'CND/interskiplist' '%self': substrate 'entry-by-ids': {} 'idx-by-names': {} 'ids-by-names': {} 'name-by-ids': {} 'idx-by-ids': {} 'ids': [] 'idx': -1 'min': null 'max': null 'fmin': null 'fmax': null 'indexes': {} #......................................................................................................... return R #----------------------------------------------------------------------------------------------------------- @copy = ( me ) -> R = @new() @add_index R, name for name of me[ 'indexes' ] @add R, CND.deep_copy entry for entry in @entries_of me return R #----------------------------------------------------------------------------------------------------------- @add = ( me, entry ) -> ### TAINT currently we keep the identity of `entry` and amend it; wouldn't it be better to copy? or deep copy? it and then amend it? ### throw new Error "expected 2 arguments, got #{arity}" unless ( arity = arguments.length ) is 2 throw new Error "expected a POD, got a #{type_of entry}" unless isa.object entry { lo, hi, id, name, } = entry throw new Error "expected setting for 'lo', found none" unless lo? throw new Error "expected setting for 'hi', found none" unless hi? lo = as_number lo hi = as_number hi name ?= '+' group_idx = ( me[ 'idx-by-names' ][ name ] = ( me[ 'idx-by-names' ][ name ] ? -1 ) + 1 ) global_idx = ( me[ 'idx' ] += +1 ) id ?= "#{name}[#{group_idx}]" entry[ 'lo' ] = lo entry[ 'hi' ] = hi entry[ 'idx' ] = global_idx entry[ 'id' ] = id entry[ 'name' ] = name entry[ 'size' ] = hi - lo + 1 entry[ 'tag' ] = normalize_tag entry[ 'tag' ] if entry[ 'tag' ]? me[ 'min' ] ?= lo me[ 'min' ] = Math.min me[ 'min' ], lo me[ 'max' ] ?= hi me[ 'max' ] = Math.max me[ 'max' ], lo if isa.float lo me[ 'fmin' ] ?= lo me[ 'fmin' ] = Math.min me[ 'fmin' ], lo if isa.float hi me[ 'fmax' ] ?= hi me[ 'fmax' ] = Math.max me[ 'fmax' ], lo me[ 'name-by-ids' ][ id ] = name me[ 'idx-by-ids' ][ id ] = global_idx me[ 'entry-by-ids' ][ id ] = entry ? null ( me[ 'ids-by-names' ][ name ] ?= [] ).push id me[ '%self' ].insert id, lo, hi me[ 'ids' ].push id #......................................................................................................... @_index_entry me, entry #......................................................................................................... return id #----------------------------------------------------------------------------------------------------------- @delete = ( me, id ) -> me[ '%self' ].remove id # #=========================================================================================================== # # SERIALIZATION # #----------------------------------------------------------------------------------------------------------- # @to_xjson = ( me ) -> # R = # 'index-keys': ( key for key of me[ 'indexes' ] ) # 'entries': ( entry for _, entry of me[ 'entry-by-ids' ] ) # return CND.XJSON.stringify R, null, ' ' # #----------------------------------------------------------------------------------------------------------- # @new_from_xjson = ( xjson ) -> # description = CND.XJSON.parse xjson # R = @new() # @add_index R, key for key in description[ 'index-keys' ] # @add R, entry for entry in description[ 'entries' ] # return R #=========================================================================================================== # INDEXING #----------------------------------------------------------------------------------------------------------- @add_index = ( me, name ) -> throw new Error "index for #{rpr name} already exists" if me[ 'indexes' ][ name ]? return me[ 'indexes' ][ name ] = {} #----------------------------------------------------------------------------------------------------------- @delete_index = ( me, name, fallback ) -> fallback = σ_misfit if fallback is undefined R = me[ 'indexes' ][ name ] ? fallback throw new Error "no index for field #{rpr name}" if R is σ_misfit delete me[ 'indexes' ][ name ] return R #----------------------------------------------------------------------------------------------------------- @find_ids = ( me, name, value ) -> throw new Error "no index for field #{rpr name}" unless ( index = me[ 'indexes' ][ name ] )? return [] unless ( R = index[ value ] )? return Object.assign [], R #----------------------------------------------------------------------------------------------------------- @find_entries = ( me, name, value ) -> R = @find_ids me, name, value R[ idx ] = me[ 'entry-by-ids' ][ id ] for id, idx in R return R #----------------------------------------------------------------------------------------------------------- @_index_entry = ( me, entry ) -> { id, } = entry #......................................................................................................... if ( indexes = me[ 'indexes' ] )? for name, value of entry continue unless ( index = indexes[ name ] ) ### TAINT this is a minimally viable product; indexing behavior should be configurable ### if name is 'tag' for tag in normalize_tag value ( index[ tag ] ?= [] ).push id else ( index[ value ] ?= [] ).push id #......................................................................................................... return null #=========================================================================================================== # COVER AND INTERSECT #----------------------------------------------------------------------------------------------------------- @match = ( me, points, settings = {} ) -> @_match_or_intersect me, 'match', points, settings @intersect = ( me, points, settings = {} ) -> @_match_or_intersect me, 'intersect', points, settings #----------------------------------------------------------------------------------------------------------- @_match_or_intersect = ( me, mode, points, settings ) -> # throw new Error "ISL.match, ISL.intersect on hold for revision" ### TAINT can probably be greatly simplified since advanced functionality here is not needed ### unless is_subset ( keys = Object.keys settings ), setting_keys_of_cover_and_intersect expected = setting_keys_of_cover_and_intersect.join ', ' got = keys.join ', ' throw new Error "expected settings out of #{expected}, got #{got}" { pick, } = settings if mode is 'match' then R = @_find_ids_with_all_points me, points else R = @_find_ids_with_any_points me, points return R if pick is 'id' R = @entries_of me, R if pick? R = ( entry[ pick ] for entry in R ) return reduce_tag R if pick is 'tag' return fuse R #----------------------------------------------------------------------------------------------------------- setting_keys_of_cover_and_intersect = [ 'pick', ] #=========================================================================================================== # #----------------------------------------------------------------------------------------------------------- @entries_of = ( me, ids = null ) -> unless ids? R = ( entry for _, entry of me[ 'entry-by-ids' ] ) else R = [] for id in ids throw new Error "unknown ID #{rpr id}" unless ( entry = me[ 'entry-by-ids' ][ id ] )? R.push entry return sort_entries_by_insertion_order me, R #----------------------------------------------------------------------------------------------------------- @_find_ids_with_any_points = ( me, points ) -> points = normalize_points points return me[ '%self' ].findContaining points... if points.length < 2 R = new Set() for point in points ids = me[ '%self' ].findContaining point R.add id for id in ids return sort_ids_by_insertion_order me, Array.from R #----------------------------------------------------------------------------------------------------------- @_find_ids_with_all_points = ( me, points ) -> points = normalize_points points return me[ '%self' ].findContaining points... #----------------------------------------------------------------------------------------------------------- @intervals_from_points = ( me, points, mixins... ) -> mixin = ( lohi ) -> return lohi unless mixins.length > 0 return Object.assign {}, mixins..., lohi points = [ points, ] unless isa.list points points = unique as_numbers points points.sort ( a, b ) -> return +1 if a > b return -1 if a < b return 0 R = [] last_point = null last_lo = null last_hi = null for point in points unless last_lo? last_lo = point last_hi = point last_point = point continue if point is last_point + 1 last_hi = point last_point = point continue R.push ( mixin { lo: last_lo, hi: last_hi, } ) last_lo = point last_hi = point last_point = point R.push ( mixin { lo: last_lo, hi: last_hi, } ) if last_lo? and last_hi? return R #=========================================================================================================== # AGGREGATION #----------------------------------------------------------------------------------------------------------- @aggregate = ( me, point, reducers = null ) -> ( @aggregate.use me, reducers ) point #----------------------------------------------------------------------------------------------------------- @aggregate.use = ( me, reducers, settings = {} ) => unless is_subset ( keys = Object.keys settings ), [ 'memoize', ] throw new Error "unknown keys in #{rpr keys}" #......................................................................................................... if ( memoize = settings[ 'memoize' ] ? yes ) then cache = {} else cache = null #......................................................................................................... if ( not reducers? ) or ( Object.keys reducers ).length is 0 my_mix = @aggregate._mix else ### TAINT this part must be rewritten ### mixins = [ {}, ] mixins.push @aggregate._reducers mixins.push reducers if reducers? fields = Object.assign {}, ( mixin.fields for mixin in mixins when mixin.fields? )... reducers = Object.assign mixins... reducers[ 'fields' ] = fields my_mix = mix.use reducers #......................................................................................................... mix_entries_of_point = ( point ) => point_count = if ( isa.list point ) then point.length else 1 throw new Error "need single point, got #{point_count}" unless point_count is 1 entries = @entries_of me, @_find_ids_with_any_points me, point return my_mix entries... #......................................................................................................... return mix_entries_of_point unless memoize #......................................................................................................... return ( point ) => return R if ( R = cache[ point ] )? return cache[ point ] = mix_entries_of_point point #----------------------------------------------------------------------------------------------------------- @aggregate._reducers = fields: idx: 'skip' id: 'skip' name: 'skip' lo: 'skip' hi: 'skip' size: 'skip' tag: 'tag' #----------------------------------------------------------------------------------------------------------- @aggregate._mix = mix.use @aggregate._reducers #=========================================================================================================== # HELPERS #----------------------------------------------------------------------------------------------------------- sort_entries_by_insertion_order = ( me, entries ) -> entries.sort ( a, b ) -> return +1 if a[ 'idx' ] > b[ 'idx' ] return -1 if a[ 'idx' ] < b[ 'idx' ] return 0 return entries #----------------------------------------------------------------------------------------------------------- sort_ids_by_insertion_order = ( me, ids ) -> idxs = me[ 'idx-by-ids' ] ids.sort ( a, b ) -> return +1 if idxs[ a ] > idxs[ b ] return -1 if idxs[ a ] < idxs[ b ] return 0 return ids #----------------------------------------------------------------------------------------------------------- as_number = ( x ) -> return x if x in [ -Infinity, +Infinity, ] or isa.float x unless ( type = type_of x ) is 'text' throw new Error "expected number or single character text, got a #{type}" unless ( length = ( Array.from x ).length ) is 1 throw new Error "expected single character text, got one of length #{length}" return x.codePointAt 0 #----------------------------------------------------------------------------------------------------------- as_numbers = ( list ) -> ( as_number x for x in list ) #----------------------------------------------------------------------------------------------------------- normalize_points = ( points ) -> points = [ points, ] unless isa.list points return as_numbers points #----------------------------------------------------------------------------------------------------------- normalize_tag = ( tag ) -> ### Given a single string or a list of strings, return a new list that contains all whitespace-delimited words in the strings ### return normalize_tag [ tag, ] unless isa.list tag R = [] for t in tag continue if t.length is 0 R.splice R.length, 0, ( t.split /\s+/ )... ### TAINT consider to return `unique R` instead ### return R #----------------------------------------------------------------------------------------------------------- unique = ( list ) -> ### Return a copy of `list´ that only contains the last occurrence of each value ### ### TAINT consider to modify, not copy `list` ### seen = new Set() R = [] for idx in [ list.length - 1 .. 0 ] by -1 element = list[ idx ] continue if seen.has element seen.add element R.unshift element return R #----------------------------------------------------------------------------------------------------------- append = ( a, b ) -> ### Append elements of list `b` to list `a` ### ### TAINT JS has `[]::concat` ### a.splice a.length, 0, b... return a #----------------------------------------------------------------------------------------------------------- meld = ( list, value ) -> ### When `value` is a list, `append` it to `list`; else, `push` `value` to `list` ### if isa.list value then append list, value else list.push value return list #----------------------------------------------------------------------------------------------------------- fuse = ( list ) -> ### Flatten `list`, then apply `unique` to it. Does not copy `list` but modifies it ### R = [] meld R, element for element in list R = unique R list.splice 0, list.length, R... return list #----------------------------------------------------------------------------------------------------------- reduce_tag = ( raw ) -> source = fuse raw R = [] exclude = null #......................................................................................................... for idx in [ source.length - 1 .. 0 ] by -1 tag = source[ idx ] continue if exclude? and exclude.has tag if tag.startsWith '-' break if tag is '-*' ( exclude ?= new Set() ).add tag[ 1 .. ] continue R.unshift tag #......................................................................................................... return R #----------------------------------------------------------------------------------------------------------- is_subset = ( subset, superset ) -> ### `is_subset subset, superset` returns whether `subset` is a subset of `superset`; this is true if each element of `subset` is also an element of `superset`. ### type_of_sub = type_of subset type_of_super = type_of superset unless type_of_sub is type_of_super throw new Error "expected two arguments of same type, got #{type_of_sub} and #{type_of_super}" switch type_of_sub when 'list' return false unless subset.length <= superset.length for element in subset return false unless element in superset return true when 'set' return false unless subset.size <= superset.size iterator = subset.values() loop { value, done, } = iterator.next() return true if done return false unless superset.has value # for element in # return false unless element in subset return true else throw new Error "expected lists or sets, got #{type_of_sub} and #{type_of_super}" return null