@skiff-org/trawler
Version:
A modern search library for Skiff
606 lines (483 loc) • 13.4 kB
JavaScript
/**!
* FlexSearch.js
* Copyright 2018-2021 Nextapps GmbH
* Author: Thomas Wilkerling
* Licence: Apache-2.0
* https://github.com/nextapps-de/flexsearch
*/
import { Index } from './index.js';
import Cache, { searchCache } from './cache.js';
import { create_object, is_array, is_string, is_object } from './common.js';
import apply_async from './async.js';
import { intersect, intersect_union } from './intersect.js';
/**
* @constructor
* @implements DocumentInterface
* @param {Object=} options
* @return {Document}
*/
export class Document {
constructor(options) {
const document = options.document || options.doc || options;
let opt;
this.tree = [];
this.field = [];
this.marker = [];
this.register = create_object();
this.key = ((opt = document.key || document.id) && parse_tree(opt, this.marker)) || 'id';
this.storetree = (opt = document.store) && (opt !== true) && [];
this.store = opt && create_object();
this.tag = ((opt = document.tag) && parse_tree(opt, this.marker));
this.tagindex = opt && create_object();
this.cache = (opt = options.cache) && new Cache(opt);
// do not apply cache again for the indexes
options.cache = false;
this.worker = options.worker;
// this switch is used by recall of promise callbacks
this.async = false;
this.index = this.parse_descriptor(options, document);
}
/**
*
* @param id
* @param content
* @param {boolean=} _append
* @returns {Document|Promise}
*/
add(id, content, _append) {
if (is_object(id)) {
content = id;
id = parse_simple(content, this.key);
}
if (content && (id || (id === 0))) {
if (!_append && this.register[id]) {
return this.update(id, content);
}
for (let i = 0, tree, field; i < this.field.length; i++) {
field = this.field[i];
tree = this.tree[i];
if (is_string(tree)) {
tree = [tree];
}
add_index(content, tree, this.marker, 0, this.index[field], id, tree[0], _append);
}
if (this.tag) {
let tag = parse_simple(content, this.tag);
let dupes = create_object();
if (is_string(tag)) {
tag = [tag];
}
for (let i = 0, key, arr; i < tag.length; i++) {
key = tag[i];
if (!dupes[key]) {
dupes[key] = 1;
arr = this.tagindex[key] || (this.tagindex[key] = []);
if (!_append || (arr.indexOf(id) === -1)) {
arr[arr.length] = id;
}
}
}
}
// TODO: how to handle store when appending contents?
if (this.store && (!_append || !this.store[id])) {
let store;
if (this.storetree) {
store = create_object();
for (let i = 0, tree; i < this.storetree.length; i++) {
tree = this.storetree[i];
if (is_string(tree)) {
store[tree] = content[tree];
}
else {
store_value(content, store, tree, 0, tree[0]);
}
}
}
this.store[id] = store || content;
}
}
return this;
}
append(id, content) {
return this.add(id, content, true);
}
update(id, content) {
return this.remove(id).add(id, content);
}
remove(id) {
if (is_object(id)) {
id = parse_simple(id, this.key);
}
if (this.register[id]) {
for (let i = 0; i < this.field.length; i++) {
// workers does not share the register
this.index[this.field[i]].remove(id, !this.worker);
}
if (this.tag) {
for (let key in this.tagindex) {
const tag = this.tagindex[key];
const pos = tag.indexOf(id);
if (pos !== -1) {
if (tag.length > 1) {
tag.splice(pos, 1);
}
else {
delete this.tagindex[key];
}
}
}
}
if (this.store) {
delete this.store[id];
}
delete this.register[id];
}
return this;
}
/**
* @param {!string|Object} query
* @param {number|Object=} limit
* @param {Object=} options
* @param {Array<Array>=} _resolve For internal use only.
* @returns {Promise|Array}
*/
search(query, limit, options, _resolve) {
if (!options) {
if (!limit && is_object(query)) {
options = /** @type {Object} */ (query);
query = options.query;
}
else if (is_object(limit)) {
options = /** @type {Object} */ (limit);
limit = 0;
}
}
let result = [], result_field = [];
let pluck, enrich;
let field, tag, bool, offset, count = 0;
if (options) {
if (is_array(options)) {
field = options;
options = null;
}
else {
pluck = options.pluck;
field = pluck || options.index || options.field /*|| (is_string(options) && [options])*/;
tag = options.tag;
enrich = this.store && options.enrich;
bool = options.bool === 'and';
limit = options.limit || 100;
offset = options.offset || 0;
if (tag) {
if (is_string(tag)) {
tag = [tag];
}
// when tags is used and no query was set,
// then just return the tag indexes
if (!query) {
for (let i = 0, res; i < tag.length; i++) {
res = get_tag.call(this, tag[i], limit, offset, enrich);
if (res) {
result[result.length] = res;
count++;
}
}
return count ? result : [];
}
}
if (is_string(field)) {
field = [field];
}
}
}
field || (field = this.field);
bool = bool && ((field.length > 1) || (tag && (tag.length > 1)));
const promises = !_resolve && (this.worker || this.async) && [];
// TODO solve this in one loop below
for (let i = 0, res, key, len; i < field.length; i++) {
let opt;
key = field[i];
if (!is_string(key)) {
opt = key;
key = key.field;
}
if (promises) {
promises[i] = this.index[key].searchAsync(query, limit, opt || options);
// just collect and continue
continue;
}
else if (_resolve) {
res = _resolve[i];
}
else {
// inherit options also when search? it is just for laziness, Object.assign() has a cost
res = this.index[key].search(query, limit, opt || options);
}
len = res && res.length;
if (tag && len) {
const arr = [];
let count = 0;
if (bool) {
// prepare for intersection
arr[0] = [res];
}
for (let y = 0, key, res; y < tag.length; y++) {
key = tag[y];
res = this.tagindex[key];
len = res && res.length;
if (len) {
count++;
arr[arr.length] = bool ? [res] : res;
}
}
if (count) {
if (bool) {
res = intersect(arr, limit || 100, offset || 0);
}
else {
res = intersect_union(res, arr);
}
len = res.length;
}
}
if (len) {
result_field[count] = key;
result[count++] = res;
}
else if (bool) {
return [];
}
}
if (promises) {
const self = this;
// anyone knows a better workaround of optionally having async promises?
// the promise.all() needs to be wrapped into additional promise,
// otherwise the recursive callback wouldn't run before return
return new Promise(function (resolve) {
Promise.all(promises).then(function (result) {
resolve(self.search(query, limit, options, result));
});
});
}
if (!count) {
// fast path "not found"
return [];
}
if (pluck && (!enrich || !this.store)) {
// fast path optimization
return result[0];
}
for (let i = 0, res; i < result_field.length; i++) {
res = result[i];
if (res.length) {
if (enrich) {
res = apply_enrich.call(this, res);
}
}
if (pluck) {
return res;
}
result[i] = {
'field': result_field[i],
'result': res
};
}
return result;
}
contain(id) {
return !!this.register[id];
}
get(id) {
return this.store[id];
}
set(id, data) {
this.store[id] = data;
return this;
}
/**
* Serialize `this` into an exportable object
*/
serialize() {
const result = {
tag: this.tagIndex,
reg: this.register,
store: this.store,
field: this.field,
index: {}
};
Object.entries(this.index).forEach(([key, index]) => {
result.index[key] = index.serialize();
});
return result;
}
/**
* Create a `Document` from a serialized object
*/
static deserialize(obj, params) {
// TODO add properties here?
const result = new Document(params);
result.tagIndex = obj.tag;
result.register = obj.reg;
result.store = obj.store;
result.field = obj.field;
Object.entries(obj.index).forEach(([key, exportedIndex]) => {
result.index[key] = Index.deserialize(exportedIndex);
result.index[key].register = obj.reg;
});
return result;
}
// Helper methods
parse_descriptor(options, document) {
const index = create_object();
let field = document.index || document.field || document;
if (is_string(field)) {
field = [field];
}
for (let i = 0, key, opt; i < field.length; i++) {
key = field[i];
if (!is_string(key)) {
opt = key;
key = key.field;
}
opt = is_object(opt) ? Object.assign({}, options, opt) : options;
if (!this.worker) {
index[key] = new Index(opt);
index[key].register = this.register;
}
this.tree[i] = parse_tree(key, this.marker);
this.field[i] = key;
}
if (this.storetree) {
let store = document.store;
if (is_string(store)) {
store = [store];
}
for (let i = 0; i < store.length; i++) {
this.storetree[i] = parse_tree(store[i], this.marker);
}
}
return index;
}
}
function parse_tree(key, marker){
const tree = key.split(':');
let count = 0;
for(let i = 0; i < tree.length; i++){
key = tree[i];
if(key.indexOf('[]') >= 0){
key = key.substring(0, key.length - 2);
if(key){
marker[count] = true;
}
}
if(key){
tree[count++] = key;
}
}
if(count < tree.length){
tree.length = count;
}
return count > 1 ? tree : tree[0];
}
function parse_simple(obj, tree){
if(is_string(tree)){
obj = obj[tree];
}
else{
for(let i = 0; obj && (i < tree.length); i++){
obj = obj[tree[i]];
}
}
return obj;
}
function store_value(obj, store, tree, pos, key){
obj = obj[key];
// reached target field
if(pos === (tree.length - 1)){
// store target value
store[key] = obj;
}
else if(obj){
if(is_array(obj)){
store = store[key] = new Array(obj.length);
for(let i = 0; i < obj.length; i++){
// do not increase pos (an array is not a field)
store_value(obj, store, tree, pos, i);
}
}
else{
store = store[key] || (store[key] = create_object());
key = tree[++pos];
store_value(obj, store, tree, pos, key);
}
}
}
function add_index(obj, tree, marker, pos, index, id, key, _append){
obj = obj[key];
if(obj){
// reached target field
if(pos === (tree.length - 1)){
// handle target value
if(is_array(obj)){
// append array contents so each entry gets a new scoring context
if(marker[pos]){
for(let i = 0; i < obj.length; i++){
index.add(id, obj[i], /* append: */ true, /* skip update: */ true);
}
return;
}
// or join array contents and use one scoring context
obj = obj.join(' ');
}
index.add(id, obj, _append, /* skip_update: */ true);
}
else{
if(is_array(obj)){
for(let i = 0; i < obj.length; i++){
// do not increase index, an array is not a field
add_index(obj, tree, marker, pos, index, id, i, _append);
}
}
else{
key = tree[++pos];
add_index(obj, tree, marker, pos, index, id, key, _append);
}
}
}
}
/**
* @this Document
*/
function get_tag(key, limit, offset, enrich){
let res = this.tagindex[key];
let len = res && (res.length - offset);
if(len && (len > 0)){
if((len > limit) || offset){
res = res.slice(offset, offset + limit);
}
if(enrich){
res = apply_enrich.call(this, res);
}
return {
'tag': key,
'result': res
};
}
}
/**
* @this Document
*/
function apply_enrich(res){
const arr = new Array(res.length);
for(let x = 0, id; x < res.length; x++){
id = res[x];
arr[x] = {
'id': id,
'doc': this.store[id]
};
}
return arr;
}
// TODO move all of this into this file
Document.prototype.searchCache = searchCache;
apply_async(Document.prototype);