UNPKG

interskiplist

Version:

Binary Search for Numeric & Character Intervals; great for CSS-Unicode-Range-like tasks

732 lines (676 loc) 24.7 kB
(function() { //########################################################################################################### var CND, alert, append, as_number, as_numbers, badge, debug, echo, fuse, help, info, is_subset, isa, log, meld, mix, normalize_points, normalize_tag, reduce_tag, rpr, setting_keys_of_cover_and_intersect, sort_entries_by_insertion_order, sort_ids_by_insertion_order, type_of, types, unique, urge, validate, warn, whisper, σ_minus_א, σ_misfit, σ_plus_א, indexOf = [].indexOf; CND = require('cnd'); rpr = CND.rpr.bind(CND); badge = 'INTERSKIPLIST'; log = CND.get_logger('plain', badge); info = CND.get_logger('info', badge); whisper = CND.get_logger('whisper', badge); alert = CND.get_logger('alert', badge); debug = CND.get_logger('debug', badge); warn = CND.get_logger('warn', badge); help = CND.get_logger('help', badge); urge = CND.get_logger('urge', badge); echo = CND.echo.bind(CND); σ_plus_א = Symbol.for('+א'); σ_minus_א = Symbol.for('-א'); σ_misfit = Symbol.for('misfit'); //........................................................................................................... ({mix} = require('multimix006modern')); // { mix, } = require 'multimix' types = require('./types'); ({isa, validate, type_of} = types.export()); //----------------------------------------------------------------------------------------------------------- this.new = function(settings) { var R, isl_settings, substrate; if (settings != null) { throw new Error("settings not supported"); } isl_settings = { minIndex: σ_minus_א, maxIndex: σ_plus_א, compare: function(a, b) { if (a === b && (a === σ_plus_א || a === σ_minus_א)) { return 0; } if ((a === σ_plus_א) || (b === σ_minus_א)) { return +1; } if ((a === σ_minus_א) || (b === σ_plus_א)) { return -1; } if (a > b) { return +1; } if (a < b) { return -1; } return 0; } }; //......................................................................................................... substrate = new (require('interval-skip-list'))(isl_settings); substrate.toString = substrate.inspect = function() { return "{ interval-skip-list }"; }; //......................................................................................................... R = { '~isa': 'CND/interskiplist', '%self': substrate, 'entry-by-ids': {}, 'idx-by-names': {}, 'ids-by-names': {}, 'name-by-ids': {}, 'idx-by-ids': {}, 'ids': [], 'idx': -1, 'min': null, 'max': null, 'fmin': null, 'fmax': null, 'indexes': {} }; //......................................................................................................... return R; }; //----------------------------------------------------------------------------------------------------------- this.copy = function(me) { var R, entry, i, len, name, ref; R = this.new(); for (name in me['indexes']) { this.add_index(R, name); } ref = this.entries_of(me); for (i = 0, len = ref.length; i < len; i++) { entry = ref[i]; this.add(R, CND.deep_copy(entry)); } return R; }; //----------------------------------------------------------------------------------------------------------- this.add = function(me, entry) { var arity, base, global_idx, group_idx, hi, id, lo, name, ref; if ((arity = arguments.length) !== 2) { /* TAINT currently we keep the identity of `entry` and amend it; wouldn't it be better to copy? or deep copy? it and then amend it? */ throw new Error(`expected 2 arguments, got ${arity}`); } if (!isa.object(entry)) { throw new Error(`expected a POD, got a ${type_of(entry)}`); } ({lo, hi, id, name} = entry); if (lo == null) { throw new Error("expected setting for 'lo', found none"); } if (hi == null) { throw new Error("expected setting for 'hi', found none"); } lo = as_number(lo); hi = as_number(hi); if (name == null) { name = '+'; } group_idx = (me['idx-by-names'][name] = ((ref = me['idx-by-names'][name]) != null ? ref : -1) + 1); global_idx = (me['idx'] += +1); if (id == null) { id = `${name}[${group_idx}]`; } entry['lo'] = lo; entry['hi'] = hi; entry['idx'] = global_idx; entry['id'] = id; entry['name'] = name; entry['size'] = hi - lo + 1; if (entry['tag'] != null) { entry['tag'] = normalize_tag(entry['tag']); } if (me['min'] == null) { me['min'] = lo; } me['min'] = Math.min(me['min'], lo); if (me['max'] == null) { me['max'] = hi; } me['max'] = Math.max(me['max'], lo); if (isa.float(lo)) { if (me['fmin'] == null) { me['fmin'] = lo; } me['fmin'] = Math.min(me['fmin'], lo); } if (isa.float(hi)) { if (me['fmax'] == null) { me['fmax'] = hi; } me['fmax'] = Math.max(me['fmax'], lo); } me['name-by-ids'][id] = name; me['idx-by-ids'][id] = global_idx; me['entry-by-ids'][id] = entry != null ? entry : null; ((base = me['ids-by-names'])[name] != null ? base[name] : base[name] = []).push(id); me['%self'].insert(id, lo, hi); me['ids'].push(id); //......................................................................................................... this._index_entry(me, entry); //......................................................................................................... return id; }; //----------------------------------------------------------------------------------------------------------- this.delete = function(me, id) { return me['%self'].remove(id); }; // #=========================================================================================================== // # SERIALIZATION // #----------------------------------------------------------------------------------------------------------- // @to_xjson = ( me ) -> // R = // 'index-keys': ( key for key of me[ 'indexes' ] ) // 'entries': ( entry for _, entry of me[ 'entry-by-ids' ] ) // return CND.XJSON.stringify R, null, ' ' // #----------------------------------------------------------------------------------------------------------- // @new_from_xjson = ( xjson ) -> // description = CND.XJSON.parse xjson // R = @new() // @add_index R, key for key in description[ 'index-keys' ] // @add R, entry for entry in description[ 'entries' ] // return R //=========================================================================================================== // INDEXING //----------------------------------------------------------------------------------------------------------- this.add_index = function(me, name) { if (me['indexes'][name] != null) { throw new Error(`index for ${rpr(name)} already exists`); } return me['indexes'][name] = {}; }; //----------------------------------------------------------------------------------------------------------- this.delete_index = function(me, name, fallback) { var R, ref; if (fallback === void 0) { fallback = σ_misfit; } R = (ref = me['indexes'][name]) != null ? ref : fallback; if (R === σ_misfit) { throw new Error(`no index for field ${rpr(name)}`); } delete me['indexes'][name]; return R; }; //----------------------------------------------------------------------------------------------------------- this.find_ids = function(me, name, value) { var R, index; if ((index = me['indexes'][name]) == null) { throw new Error(`no index for field ${rpr(name)}`); } if ((R = index[value]) == null) { return []; } return Object.assign([], R); }; //----------------------------------------------------------------------------------------------------------- this.find_entries = function(me, name, value) { var R, i, id, idx, len; R = this.find_ids(me, name, value); for (idx = i = 0, len = R.length; i < len; idx = ++i) { id = R[idx]; R[idx] = me['entry-by-ids'][id]; } return R; }; //----------------------------------------------------------------------------------------------------------- this._index_entry = function(me, entry) { var i, id, index, indexes, len, name, ref, tag, value; ({id} = entry); //......................................................................................................... if ((indexes = me['indexes']) != null) { for (name in entry) { value = entry[name]; if (!(index = indexes[name])) { continue; } /* TAINT this is a minimally viable product; indexing behavior should be configurable */ if (name === 'tag') { ref = normalize_tag(value); for (i = 0, len = ref.length; i < len; i++) { tag = ref[i]; (index[tag] != null ? index[tag] : index[tag] = []).push(id); } } else { (index[value] != null ? index[value] : index[value] = []).push(id); } } } //......................................................................................................... return null; }; //=========================================================================================================== // COVER AND INTERSECT //----------------------------------------------------------------------------------------------------------- this.match = function(me, points, settings = {}) { return this._match_or_intersect(me, 'match', points, settings); }; this.intersect = function(me, points, settings = {}) { return this._match_or_intersect(me, 'intersect', points, settings); }; //----------------------------------------------------------------------------------------------------------- this._match_or_intersect = function(me, mode, points, settings) { var R, entry, expected, got, keys, pick; // throw new Error "ISL.match, ISL.intersect on hold for revision" /* TAINT can probably be greatly simplified since advanced functionality here is not needed */ if (!is_subset((keys = Object.keys(settings)), setting_keys_of_cover_and_intersect)) { expected = setting_keys_of_cover_and_intersect.join(', '); got = keys.join(', '); throw new Error(`expected settings out of ${expected}, got ${got}`); } ({pick} = settings); if (mode === 'match') { R = this._find_ids_with_all_points(me, points); } else { R = this._find_ids_with_any_points(me, points); } if (pick === 'id') { return R; } R = this.entries_of(me, R); if (pick != null) { R = (function() { var i, len, results; results = []; for (i = 0, len = R.length; i < len; i++) { entry = R[i]; results.push(entry[pick]); } return results; })(); if (pick === 'tag') { return reduce_tag(R); } } return fuse(R); }; //----------------------------------------------------------------------------------------------------------- setting_keys_of_cover_and_intersect = ['pick']; //=========================================================================================================== //----------------------------------------------------------------------------------------------------------- this.entries_of = function(me, ids = null) { var R, _, entry, i, id, len; if (ids == null) { R = (function() { var ref, results; ref = me['entry-by-ids']; results = []; for (_ in ref) { entry = ref[_]; results.push(entry); } return results; })(); } else { R = []; for (i = 0, len = ids.length; i < len; i++) { id = ids[i]; if ((entry = me['entry-by-ids'][id]) == null) { throw new Error(`unknown ID ${rpr(id)}`); } R.push(entry); } } return sort_entries_by_insertion_order(me, R); }; //----------------------------------------------------------------------------------------------------------- this._find_ids_with_any_points = function(me, points) { var R, i, id, ids, j, len, len1, point; points = normalize_points(points); if (points.length < 2) { return me['%self'].findContaining(...points); } R = new Set(); for (i = 0, len = points.length; i < len; i++) { point = points[i]; ids = me['%self'].findContaining(point); for (j = 0, len1 = ids.length; j < len1; j++) { id = ids[j]; R.add(id); } } return sort_ids_by_insertion_order(me, Array.from(R)); }; //----------------------------------------------------------------------------------------------------------- this._find_ids_with_all_points = function(me, points) { points = normalize_points(points); return me['%self'].findContaining(...points); }; //----------------------------------------------------------------------------------------------------------- this.intervals_from_points = function(me, points, ...mixins) { var R, i, last_hi, last_lo, last_point, len, mixin, point; mixin = function(lohi) { if (!(mixins.length > 0)) { return lohi; } return Object.assign({}, ...mixins, lohi); }; if (!isa.list(points)) { points = [points]; } points = unique(as_numbers(points)); points.sort(function(a, b) { if (a > b) { return +1; } if (a < b) { return -1; } return 0; }); R = []; last_point = null; last_lo = null; last_hi = null; for (i = 0, len = points.length; i < len; i++) { point = points[i]; if (last_lo == null) { last_lo = point; last_hi = point; last_point = point; continue; } if (point === last_point + 1) { last_hi = point; last_point = point; continue; } R.push(mixin({ lo: last_lo, hi: last_hi })); last_lo = point; last_hi = point; last_point = point; } if ((last_lo != null) && (last_hi != null)) { R.push(mixin({ lo: last_lo, hi: last_hi })); } return R; }; //=========================================================================================================== // AGGREGATION //----------------------------------------------------------------------------------------------------------- this.aggregate = function(me, point, reducers = null) { return (this.aggregate.use(me, reducers))(point); }; //----------------------------------------------------------------------------------------------------------- this.aggregate.use = (me, reducers, settings = {}) => { /* TAINT this part must be rewritten */ var cache, fields, keys, memoize, mix_entries_of_point, mixin, mixins, my_mix, ref; if (!is_subset((keys = Object.keys(settings)), ['memoize'])) { throw new Error(`unknown keys in ${rpr(keys)}`); } //......................................................................................................... if ((memoize = (ref = settings['memoize']) != null ? ref : true)) { cache = {}; } else { cache = null; } //......................................................................................................... if ((reducers == null) || (Object.keys(reducers)).length === 0) { my_mix = this.aggregate._mix; } else { mixins = [{}]; mixins.push(this.aggregate._reducers); if (reducers != null) { mixins.push(reducers); } fields = Object.assign({}, ...((function() { var i, len, results; results = []; for (i = 0, len = mixins.length; i < len; i++) { mixin = mixins[i]; if (mixin.fields != null) { results.push(mixin.fields); } } return results; })())); reducers = Object.assign(...mixins); reducers['fields'] = fields; my_mix = mix.use(reducers); } //......................................................................................................... mix_entries_of_point = (point) => { var entries, point_count; point_count = (isa.list(point)) ? point.length : 1; if (point_count !== 1) { throw new Error(`need single point, got ${point_count}`); } entries = this.entries_of(me, this._find_ids_with_any_points(me, point)); return my_mix(...entries); }; if (!memoize) { //......................................................................................................... return mix_entries_of_point; } //......................................................................................................... return (point) => { var R; if ((R = cache[point]) != null) { return R; } return cache[point] = mix_entries_of_point(point); }; }; //----------------------------------------------------------------------------------------------------------- this.aggregate._reducers = { fields: { idx: 'skip', id: 'skip', name: 'skip', lo: 'skip', hi: 'skip', size: 'skip', tag: 'tag' } }; //----------------------------------------------------------------------------------------------------------- this.aggregate._mix = mix.use(this.aggregate._reducers); //=========================================================================================================== // HELPERS //----------------------------------------------------------------------------------------------------------- sort_entries_by_insertion_order = function(me, entries) { entries.sort(function(a, b) { if (a['idx'] > b['idx']) { return +1; } if (a['idx'] < b['idx']) { return -1; } return 0; }); return entries; }; //----------------------------------------------------------------------------------------------------------- sort_ids_by_insertion_order = function(me, ids) { var idxs; idxs = me['idx-by-ids']; ids.sort(function(a, b) { if (idxs[a] > idxs[b]) { return +1; } if (idxs[a] < idxs[b]) { return -1; } return 0; }); return ids; }; //----------------------------------------------------------------------------------------------------------- as_number = function(x) { var length, type; if ((x === (-2e308) || x === (+2e308)) || isa.float(x)) { return x; } if ((type = type_of(x)) !== 'text') { throw new Error(`expected number or single character text, got a ${type}`); } if ((length = (Array.from(x)).length) !== 1) { throw new Error(`expected single character text, got one of length ${length}`); } return x.codePointAt(0); }; //----------------------------------------------------------------------------------------------------------- as_numbers = function(list) { var i, len, results, x; results = []; for (i = 0, len = list.length; i < len; i++) { x = list[i]; results.push(as_number(x)); } return results; }; //----------------------------------------------------------------------------------------------------------- normalize_points = function(points) { if (!isa.list(points)) { points = [points]; } return as_numbers(points); }; //----------------------------------------------------------------------------------------------------------- normalize_tag = function(tag) { var R, i, len, t; if (!isa.list(tag)) { /* Given a single string or a list of strings, return a new list that contains all whitespace-delimited words in the strings */ return normalize_tag([tag]); } R = []; for (i = 0, len = tag.length; i < len; i++) { t = tag[i]; if (t.length === 0) { continue; } R.splice(R.length, 0, ...(t.split(/\s+/))); } /* TAINT consider to return `unique R` instead */ return R; }; //----------------------------------------------------------------------------------------------------------- unique = function(list) { /* Return a copy of `list´ that only contains the last occurrence of each value */ /* TAINT consider to modify, not copy `list` */ var R, element, i, idx, ref, seen; seen = new Set(); R = []; for (idx = i = ref = list.length - 1; i >= 0; idx = i += -1) { element = list[idx]; if (seen.has(element)) { continue; } seen.add(element); R.unshift(element); } return R; }; //----------------------------------------------------------------------------------------------------------- append = function(a, b) { /* Append elements of list `b` to list `a` */ /* TAINT JS has `[]::concat` */ a.splice(a.length, 0, ...b); return a; }; //----------------------------------------------------------------------------------------------------------- meld = function(list, value) { /* When `value` is a list, `append` it to `list`; else, `push` `value` to `list` */ if (isa.list(value)) { append(list, value); } else { list.push(value); } return list; }; //----------------------------------------------------------------------------------------------------------- fuse = function(list) { /* Flatten `list`, then apply `unique` to it. Does not copy `list` but modifies it */ var R, element, i, len; R = []; for (i = 0, len = list.length; i < len; i++) { element = list[i]; meld(R, element); } R = unique(R); list.splice(0, list.length, ...R); return list; }; //----------------------------------------------------------------------------------------------------------- reduce_tag = function(raw) { var R, exclude, i, idx, ref, source, tag; source = fuse(raw); R = []; exclude = null; //......................................................................................................... for (idx = i = ref = source.length - 1; i >= 0; idx = i += -1) { tag = source[idx]; if ((exclude != null) && exclude.has(tag)) { continue; } if (tag.startsWith('-')) { if (tag === '-*') { break; } (exclude != null ? exclude : exclude = new Set()).add(tag.slice(1)); continue; } R.unshift(tag); } //......................................................................................................... return R; }; //----------------------------------------------------------------------------------------------------------- is_subset = function(subset, superset) { /* `is_subset subset, superset` returns whether `subset` is a subset of `superset`; this is true if each element of `subset` is also an element of `superset`. */ var done, element, i, iterator, len, type_of_sub, type_of_super, value; type_of_sub = type_of(subset); type_of_super = type_of(superset); if (type_of_sub !== type_of_super) { throw new Error(`expected two arguments of same type, got ${type_of_sub} and ${type_of_super}`); } switch (type_of_sub) { case 'list': if (!(subset.length <= superset.length)) { return false; } for (i = 0, len = subset.length; i < len; i++) { element = subset[i]; if (indexOf.call(superset, element) < 0) { return false; } } return true; case 'set': if (!(subset.size <= superset.size)) { return false; } iterator = subset.values(); while (true) { ({value, done} = iterator.next()); if (done) { return true; } if (!superset.has(value)) { return false; } } // for element in // return false unless element in subset return true; default: throw new Error(`expected lists or sets, got ${type_of_sub} and ${type_of_super}`); } return null; }; }).call(this); //# sourceMappingURL=main.js.map