UNPKG

full-text-search-light

Version:

A pure in JS written full text search with an easy to use API.

521 lines (391 loc) 15.5 kB
'use strict'; var debuglib = require('debug'); var jsonfile = require('jsonfile'); var merge = require('merge'); var debug = debuglib('full-text-search-light'); var unique = function (arr) { var a = []; for (var i = 0, l = arr.length; i < l; i++) { if (a.indexOf(arr[i]) === -1) { a.push(arr[i]); } } return a; }; function FullTextSearchLight(options) { var default_options = { index_amount: 12, ignore_case: true }; this.config = merge(default_options, options); this.indexes = []; this.data = []; this.data_ptr = 0; this.free_slots = []; this.single_data_counter = 0; this.init(); } FullTextSearchLight.prototype.init = function () { // Create indexes for (var i = 0; i < this.config.index_amount; i++) { this.indexes.push(Object.create(null)); } }; FullTextSearchLight.load = function (path, callback) { jsonfile.readFile(path, null, function (error, dataObject) { if (error) { callback(error); return; } var instance = new FullTextSearchLight(dataObject.config); instance.indexes = dataObject.indexes; instance.data = dataObject.data; instance.data_ptr = dataObject.data_ptr; instance.free_slots = dataObject.free_slots; instance.single_data_counter = dataObject.single_data_counter; callback(null, instance); }); }; FullTextSearchLight.loadSync = function (path) { var dataObject = jsonfile.readFileSync(path); var instance = new FullTextSearchLight(dataObject.config); instance.indexes = dataObject.indexes; instance.data = dataObject.data; instance.data_ptr = dataObject.data_ptr; instance.free_slots = dataObject.free_slots; instance.single_data_counter = dataObject.single_data_counter; return instance; }; FullTextSearchLight.prototype.save = function (path, callback) { jsonfile.writeFile(path, this, null, callback); }; FullTextSearchLight.prototype.saveSync = function (path) { jsonfile.writeFileSync(path, this); }; FullTextSearchLight.prototype.index_amount = function (amount) { if (amount) { this.config.index_amount = amount; return; } return this.config.index_amount; }; FullTextSearchLight.prototype.ignore_case = function (bool) { if (bool === true || bool === false) { this.config.ignore_case = bool; return; } return this.config.ignore_case; }; FullTextSearchLight.prototype.traverse = function (object, func, filter) { for (var key in object) { if (filter && filter(key, object) === false) { debug('Ignore field \'' + key + '\''); continue; } // Only care about primitives if (object[key] !== null && (object[key].constructor === Number || object[key].constructor === String || object[key].constructor === Boolean)) { func.apply(this, [key, object[key]]); } if (object[key] !== null && typeof(object[key]) == "object") { //going on step down in the object tree!! this.traverse(object[key], func, filter); } } }; FullTextSearchLight.prototype.traverseCheck = function (obj, search, result) { this.traverse(obj, function (key, value) { // Already matched if (result.match === true) { return; } var v = value; if (value.constructor === String) { v = value; } if (value.constructor === Number || value.constructor === Boolean) { v = value.toString(); } if (this.config.ignore_case === true) { v = v.toLowerCase(); } // Search term matched if (v.indexOf(search) > -1) { result.match = true; } }); }; FullTextSearchLight.prototype.add = function (obj, filter) { // Define data index var index = this.nextFreeIndex(); debug('Next free index for ' + JSON.stringify(obj) + ': ' + index); // Store data this.data[index] = obj; // Add to index this.addToIndex(obj, index, filter); return index; }; FullTextSearchLight.prototype.addToIndex = function (obj, index, filter) { var self = this; if (obj.constructor === String || obj.constructor === Number || obj.constructor === Boolean) { ++this.single_data_counter; // Create all parts for all indexes for (var i = 0; i < this.indexes.length; i++) { if (obj.constructor === String) { debug('Type of data: String'); var text = this.config.ignore_case === true ? obj.toLowerCase() : obj; } if (obj.constructor === Number || obj.constructor === Boolean) { debug('Type of data: Number | Boolean'); var text = obj.toString(); } // Split into parts, care about case sensitivity var parts = this.cut(text, i + 1); debug('Parts for ' + JSON.stringify(obj) + ': ' + JSON.stringify(parts)); // Stop if it is not splittable anymore if (parts.length == 0) { break; } for (var j = 0; j < parts.length; j++) { if (!this.indexes[i][parts[j]]) { this.indexes[i][parts[j]] = []; } // Level 1...n index, no duplicates if (this.indexes[i][parts[j]].indexOf(index) === -1) { this.indexes[i][parts[j]].push(index); } } } return; } // Add object if (obj.constructor === Object || obj.constructor === Array || obj.constructor === Function) { this.traverse(obj, function (key, value) { self.addToIndex(value, index, filter); }, filter); } }; FullTextSearchLight.prototype.search = function (text) { if (text === undefined || text === null || text === '') { return []; } if (text.constructor === Number || text.constructor === Boolean) { text = text.toString(); } if (this.config.ignore_case === true) { text = text.toLowerCase(); } debug('Search for \'' + text + '\''); // 1) Search directly for the result if (text.length <= this.config.index_amount) { var index_nr = text.length - 1; debug('Text length is ' + text.length + ' so search in index ' + index_nr); debug('Index ' + index_nr + ' is ' + JSON.stringify(this.indexes[index_nr])); var ids = this.indexes[index_nr][text]; debug('Found ids for keyword \'' + text + '\': ' + JSON.stringify(ids)); if (!ids || ids.length == 0) { debug('Index found but no ids found'); return []; } var result = []; for (var i = 0; i < ids.length; i++) { result.push(this.data[ids[i]]); } return result; } // ---------- This code will be only be entered if the search index is to small for this search term ----------- // 2) Seach indirectly debug('No matching index found, take the index with the longest words'); var last_index = this.indexes[this.indexes.length - 1]; var text_length = this.indexes.length; var parts = this.cut(text, text_length); debug('Search for: ' + JSON.stringify(parts)); var ids = []; var parts_found_counter = 0; for (var i = 0; i < parts.length; i++) { // Nothing found for that part if (!last_index[parts[i]]) { continue; } ++parts_found_counter; for (var j = 0; j < last_index[parts[i]].length; j++) { ids.push(last_index[parts[i]][j]); } } debug('Found ids: ' + JSON.stringify(ids)); // Nothing found || The index is to small for the complete search word so the word is splitted in the biggest // indexed size. If not every part has a match the result is not valid. // 1) Example: the word 'simpler' is added to the fulltext search, the index amount is 3. // Now we search for the word 'sximp' // a) First the word is splitted to: 'sxi', 'xim', 'imp' // b) 'sxi': 0 matches, , 'xim': 0 matches, 'imp': 1 match if (ids.length == 0 || parts_found_counter < parts.length) { debug('Nothing found for \'' + text + '\''); return []; } // Count elements var counter = {}; for (var i = 0; i < ids.length; i++) { if (!counter[ids[i]]) { counter[ids[i]] = 0; } counter[ids[i]]++; } debug('Count occurence ' + JSON.stringify(counter)); var true_match_ids = []; // if counter == parts.length then its a hit for (var key in counter) { if (counter[key] === parts.length) { true_match_ids.push(key); } } debug('True matching ids: ' + JSON.stringify(true_match_ids)); var result = []; for (var i = 0; i < true_match_ids.length; i++) { debug('Data for id \'' + true_match_ids[i] + '\': ' + JSON.stringify(this.data[true_match_ids[i]])); // String if (this.data[true_match_ids[i]].constructor === String) { debug('Data[' + true_match_ids[i] + '] is string'); debug('\'' + this.data[true_match_ids[i]] + '\' contains \'' + text + '\'?'); // Check if text is fully contained in the word if (this.data[true_match_ids[i]].toLowerCase().indexOf(text) > -1) { debug('Yes'); result.push(this.data[true_match_ids[i]]); } continue; } if (this.data[true_match_ids[i]].constructor === Number || this.data[true_match_ids[i]].constructor === Boolean) { debug('Data[' + true_match_ids[i] + '] is boolean | number'); // Check if text is fully contained in the number or boolean if (this.data[true_match_ids[i]].toString().indexOf(text)) { result.push(this.data[true_match_ids[i]]); } continue; } debug('Data[' + true_match_ids[i] + '] is object'); // If its a complex object like an array... var resp = { match: false }; this.traverseCheck(this.data[true_match_ids[i]], text, resp); if (resp.match === true) { result.push(this.data[true_match_ids[i]]); } } return result; }; FullTextSearchLight.prototype.removeData = function (data_index) { // Remove data this.data[data_index] = undefined; // Just overwrite with undefined // Free for overwriting this.free_slots.push(data_index); debug('Add index data[' + data_index + '] to free slots: ' + JSON.stringify(this.free_slots)); }; FullTextSearchLight.prototype.remove = function (data_index) { debug('Remove data-index: ' + data_index); var obj = this.data[data_index]; debug('Data for data-index \'' + data_index + '\' found: ' + JSON.stringify(obj)); // Primitive if (obj.constructor === Number || obj.constructor === Boolean) { obj = obj.toString(); } if (obj.constructor === String) { if (this.config.ignore_case === true) { obj = obj.toLowerCase(); } // Create all parts for all indexes and remove all data_indexes // If the data_index is found for (var i = 0; i < this.indexes.length; i++) { var parts = this.cut(obj, i + 1); for (var j = 0; j < parts.length; j++) { this.removePrimitve(parts[j], data_index); } } this.removeData(data_index); return; } // Complex Object this.traverse(obj, function (key, value) { if (value.constructor === Boolean || value.constructor === Number) { value = value.toString(); } // Create all parts for all indexes and remove all data_indexes // If the data_index is found for (var i = 0; i < this.indexes.length; i++) { var parts = this.cut(value, i + 1); for (var j = 0; j < parts.length; j++) { this.removePrimitve(parts[j], data_index); } } }); this.removeData(data_index); }; FullTextSearchLight.prototype.removePrimitve = function (text, data_index) { debug('Remove primitive \'' + text + '\'.'); // 1) Search directly for the result if (text.length <= this.config.index_amount) { var index_nr = text.length - 1; debug('Text length is ' + text.length + ' so search in index ' + index_nr); debug('Index ' + index_nr + ' is ' + JSON.stringify(this.indexes[index_nr])); var ids = this.indexes[index_nr][text]; // Remove data_id out of index debug('Remove id \'' + data_index + '\' from ' + text + ':\'' + JSON.stringify(ids) + '\''); this.removeFromArray(ids, data_index); // Is empty can be deleted, no further need if (ids.length == 0) { delete this.indexes[index_nr][text]; } debug('Removed id, resulting ids are:' + JSON.stringify(ids)); return; } // 2) Search indirectly var last_index = this.indexes[this.indexes.length - 1]; var text_length = this.indexes.length; var parts = this.cut(text, text_length); debug('Search for \'' + JSON.stringify(parts) + '\''); for (var i = 0; i < parts.length; i++) { // Nothing found for that part if (!last_index[parts[i]]) { continue; } debug('Remove \'' + data_index + '\' in ' + last_index[parts[i]]); this.removeFromArray(last_index[parts[i]], data_index); // Is empty can be deleted, no further need if (last_index[parts[i]].length == 0) { delete last_index[parts[i]]; } } }; FullTextSearchLight.prototype.removeFromArray = function (arr, val) { for (var i = arr.length - 1; i > -1; i--) { if (arr[i] == val) { arr.splice(i, 1); } } }; FullTextSearchLight.prototype.drop = function () { this.indexes = []; this.data = []; this.data_ptr = 0; this.free_slots = []; this.init(); }; FullTextSearchLight.prototype.nextFreeIndex = function () { return this.data_ptr++; }; FullTextSearchLight.prototype.cut = function (text, level) { if (level < 1) { throw new Error("Can't divide a word in smaller parts then 1 chacator"); } if (text.constructor !== String) { throw new Error("Can't handle non-strings"); } var parts = []; for (var i = 0; i < text.length; i++) { if (i + level > text.length) { break; } parts.push(text.substring(i, i + level)); } return unique(parts); }; module.exports = FullTextSearchLight;