UNPKG

elasticlunr

Version:

Lightweight full-text search engine in Javascript for browser search and offline search.

236 lines (201 loc) 5.59 kB
/*! * elasticlunr.InvertedIndex * Copyright (C) @YEAR Wei Song * Includes code from - http://tartarus.org/~martin/PorterStemmer/js.txt */ /** * elasticlunr.InvertedIndex is used for efficiently storing and * lookup of documents that contain a given token. * * @constructor */ elasticlunr.InvertedIndex = function () { this.root = { docs: {}, df: 0 }; }; /** * Loads a previously serialised inverted index. * * @param {Object} serialisedData The serialised inverted index to load. * @return {elasticlunr.InvertedIndex} */ elasticlunr.InvertedIndex.load = function (serialisedData) { var idx = new this; idx.root = serialisedData.root; return idx; }; /** * Adds a {token: tokenInfo} pair to the inverted index. * If the token already exist, then update the tokenInfo. * * tokenInfo format: { ref: 1, tf: 2} * tokenInfor should contains the document's ref and the tf(token frequency) of that token in * the document. * * By default this function starts at the root of the current inverted index, however * it can start at any node of the inverted index if required. * * @param {String} token * @param {Object} tokenInfo format: { ref: 1, tf: 2} * @param {Object} root An optional node at which to start looking for the * correct place to enter the doc, by default the root of this elasticlunr.InvertedIndex * is used. * @memberOf InvertedIndex */ elasticlunr.InvertedIndex.prototype.addToken = function (token, tokenInfo, root) { var root = root || this.root, idx = 0; while (idx <= token.length - 1) { var key = token[idx]; if (!(key in root)) root[key] = {docs: {}, df: 0}; idx += 1; root = root[key]; } var docRef = tokenInfo.ref; if (!root.docs[docRef]) { // if this doc not exist, then add this doc root.docs[docRef] = {tf: tokenInfo.tf}; root.df += 1; } else { // if this doc already exist, then update tokenInfo root.docs[docRef] = {tf: tokenInfo.tf}; } }; /** * Checks whether a token is in this elasticlunr.InvertedIndex. * * * @param {String} token The token to be checked * @return {Boolean} * @memberOf InvertedIndex */ elasticlunr.InvertedIndex.prototype.hasToken = function (token) { if (!token) return false; var node = this.root; for (var i = 0; i < token.length; i++) { if (!node[token[i]]) return false; node = node[token[i]]; } return true; }; /** * Retrieve a node from the inverted index for a given token. * If token not found in this InvertedIndex, return null. * * * @param {String} token The token to get the node for. * @return {Object} * @see InvertedIndex.prototype.get * @memberOf InvertedIndex */ elasticlunr.InvertedIndex.prototype.getNode = function (token) { if (!token) return null; var node = this.root; for (var i = 0; i < token.length; i++) { if (!node[token[i]]) return null; node = node[token[i]]; } return node; }; /** * Retrieve the documents of a given token. * If token not found, return {}. * * * @param {String} token The token to get the documents for. * @return {Object} * @memberOf InvertedIndex */ elasticlunr.InvertedIndex.prototype.getDocs = function (token) { var node = this.getNode(token); if (node == null) { return {}; } return node.docs; }; /** * Retrieve term frequency of given token in given docRef. * If token or docRef not found, return 0. * * * @param {String} token The token to get the documents for. * @param {String|Integer} docRef * @return {Integer} * @memberOf InvertedIndex */ elasticlunr.InvertedIndex.prototype.getTermFrequency = function (token, docRef) { var node = this.getNode(token); if (node == null) { return 0; } if (!(docRef in node.docs)) { return 0; } return node.docs[docRef].tf; }; /** * Retrieve the document frequency of given token. * If token not found, return 0. * * * @param {String} token The token to get the documents for. * @return {Object} * @memberOf InvertedIndex */ elasticlunr.InvertedIndex.prototype.getDocFreq = function (token) { var node = this.getNode(token); if (node == null) { return 0; } return node.df; }; /** * Remove the document identified by document's ref from the token in the inverted index. * * * @param {String} token Remove the document from which token. * @param {String} ref The ref of the document to remove from given token. * @memberOf InvertedIndex */ elasticlunr.InvertedIndex.prototype.removeToken = function (token, ref) { if (!token) return; var node = this.getNode(token); if (node == null) return; if (ref in node.docs) { delete node.docs[ref]; node.df -= 1; } }; /** * Find all the possible suffixes of given token using tokens currently in the inverted index. * If token not found, return empty Array. * * @param {String} token The token to expand. * @return {Array} * @memberOf InvertedIndex */ elasticlunr.InvertedIndex.prototype.expandToken = function (token, memo, root) { if (token == null || token == '') return []; var memo = memo || []; if (root == void 0) { root = this.getNode(token); if (root == null) return memo; } if (root.df > 0) memo.push(token); for (var key in root) { if (key === 'docs') continue; if (key === 'df') continue; this.expandToken(token + key, memo, root[key]); } return memo; }; /** * Returns a representation of the inverted index ready for serialisation. * * @return {Object} * @memberOf InvertedIndex */ elasticlunr.InvertedIndex.prototype.toJSON = function () { return { root: this.root }; };