UNPKG

search-index

Version:

A network resilient, persistent full-text search library for the browser and Node.js

449 lines (409 loc) 12.5 kB
export class Reader { #cache #docExistsSpace #ii constructor (ops, cache, ii) { this.#docExistsSpace = ops.docExistsSpace this.#ii = ii this.#cache = cache } // This function reads queries in a JSON format and then translates them to // Promises #parseJsonQuery = (q, options = {}) => { const runQuery = cmd => { // if string or object with only FIELD or VALUE, assume // that this is a GET if (typeof cmd === 'string' || typeof cmd === 'number') { return this.#ii.GET(cmd, options.PIPELINE) } if (cmd.FIELD) return this.#ii.GET(cmd) if (cmd.VALUE) return this.#ii.GET(cmd) // else: if (cmd.AND) { return this.#ii.AND(cmd.AND.map(runQuery), options.PIPELINE) } if (cmd.GET) return this.#ii.GET(cmd.GET, options.PIPELINE) if (cmd.NOT) { return this.#ii.NOT( runQuery(cmd.NOT.INCLUDE), runQuery(cmd.NOT.EXCLUDE) ) } if (cmd.OR) return this.#ii.OR(cmd.OR.map(runQuery), options.PIPELINE) // TODO this should be ALL_DOCUMENTS, such that // ALL_DOCUMENTS=true returns everything (needs test) // It should be possible to combine ALL_DOCUMENTS with FACETS // and other aggregations if (cmd.ALL_DOCUMENTS) return this.ALL_DOCUMENTS(cmd.ALL_DOCUMENTS) } const formatResults = result => result.RESULT ? Object.assign(result, { QUERY: { q, options }, RESULT_LENGTH: result.RESULT.length }) : { QUERY: q, OPTIONS: options, RESULT_LENGTH: result.length, RESULT: result } // APPEND DOCUMENTS IF SPECIFIED const appendDocuments = result => options.DOCUMENTS ? this.DOCUMENTS(...result.RESULT.map(doc => doc._id)).then(documents => Object.assign(result, { RESULT: result.RESULT.map((doc, i) => Object.assign(doc, { _doc: documents[i] }) ) }) ) : result // SCORE IF SPECIFIED const score = result => options.SCORE ? this.SCORE(result.RESULT, options.SCORE).then(scoredResult => Object.assign(result, { RESULT: scoredResult }) ) : result // SORT IF SPECIFIED const sort = result => Object.assign( result, options.SORT ? { // TODO: should this be async? RESULT: this.SORT(result.RESULT, options.SORT) } : {} ) // BUCKETS IF SPECIFIED const buckets = result => options.BUCKETS ? this.#ii.BUCKETS(...options.BUCKETS).then(bkts => Object.assign(result, { BUCKETS: this.#ii.AGGREGATION_FILTER(bkts, result.RESULT, false) }) ) : result // FACETS IF SPECIFIED // TODO: FAST OPTION FOR WHEN ALL_DOCUMENTS IS SPECIFIED // TODO: This should be 3 cases: 1. needs filter, 2. no results, // 3. no need for filter const facets = result => { // no FACETS are specified if (!options.FACETS) return result // QUERY returned no results, and facets will therefore be empty if (!result.RESULT.length) { return Object.assign(result, { FACETS: [] // if empty result set then just return empty facets }) } // ALL_DOCUMENTS so no need to filter the facets if (q.ALL_DOCUMENTS) { return this.FACETS(...options.FACETS).then(fcts => Object.assign(result, { FACETS: fcts }) ) } // else return this.FACETS(...options.FACETS).then(fcts => Object.assign(result, { FACETS: this.#ii.AGGREGATION_FILTER(fcts, result.RESULT) }) ) } // PAGE IF SPECIFIED const page = result => { let pageDetails return Object.assign( result, // TODO: surely options.PAGE is always set with default values? { RESULT: this.PAGE(result.RESULT, options.PAGE, pd => { pageDetails = pd }) }, { PAGING: pageDetails } ) } // WEIGHT IF SPECIFIED const weight = result => options.WEIGHT ? Object.assign( { RESULT: this.WEIGHT(result.RESULT, options.WEIGHT) }, result ) : result return runQuery(q) .then(formatResults) .then(buckets) .then(facets) .then(weight) .then(score) .then(sort) .then(page) .then(appendDocuments) } #DICTIONARY = (token, options = {}) => this.DISTINCT(token).then(results => ({ RESULT: Array.from( results.reduce((acc, cur) => acc.add(cur.VALUE), new Set()) ).sort((a, b) => // This should sort an array of strings and // numbers in an intuitive way (numbers numerically, strings // alphabetically) (a + '').localeCompare(b + '', undefined, { numeric: true, sensitivity: 'base' }) ), OPTIONS: options })) #DOCUMENTS = (...requestedDocs) => requestedDocs.length ? Promise.all( requestedDocs.map(_id => this.#ii.STORE.get([this.#docExistsSpace, _id]).catch(e => null) ) ) : this.ALL_DOCUMENTS() // TODO: maybe add a default page size? #SEARCH = (q, qops) => this.#parseJsonQuery( { AND: [...q] }, // TODO: destructure instead of Object.assign Object.assign( { SCORE: { TYPE: 'TFIDF' }, SORT: true }, qops ) ) // TODO add aggregation to ALL_DOCUMENTS ALL_DOCUMENTS = limit => this.#ii.STORE.iterator({ gte: [this.#docExistsSpace, null], lte: [this.#docExistsSpace, undefined], limit }) .all() .then(entries => entries.map(([key, value]) => ({ _id: value._id, _doc: value })) ) cachePipeline = (func, funcLabel, ...params) => { const cacheKey = JSON.stringify({ funcLabel, params }) return this.#cache.has(cacheKey) ? Promise.resolve(this.#cache.get(cacheKey)) : func(...params).then(res => { this.#cache.set(cacheKey, res) return res }) } DICTIONARY = (token, dops) => this.cachePipeline(this.#DICTIONARY, '#DICTIONARY', token, dops) DISTINCT = (...tokens) => this.#ii.DISTINCT(...tokens).then(result => [ // Stringify Set entries so that Set can determine duplicates ...result.reduce( (acc, cur) => acc.add( JSON.stringify( Object.assign(cur, { VALUE: cur.VALUE }) ) ), new Set() ) ].map(JSON.parse) ) // un-stringify DOCUMENTS = (...docs) => this.cachePipeline(this.#DOCUMENTS, '#DOCUMENTS', ...docs) // if count is undefined return 0 DOCUMENT_COUNT = () => this.#ii.STORE.get(['DOCUMENT_COUNT']).then((count = 0) => count) DOCUMENT_VECTORS = (...requestedDocs) => Promise.all( requestedDocs.map(_id => this.#ii.STORE.get(['DOC', _id]).catch(e => null) ) ) FACETS = (...tokens) => this.#ii.FACETS(...tokens).then(result => [ // Stringify Set entries so that Set can determine duplicates ...result.reduce( (acc, cur) => acc.add( JSON.stringify( Object.assign(cur, { // VALUE: cur.VALUE.split('#')[0] // TODO: this is wrong VALUE: cur.VALUE }) ) ), new Set() ) ].map(JSON.parse) ) // un-stringify PAGE = (results, options = {}, callback = () => null) => { options = { // TODO: 'NUMBER' should probably be renamed 'PAGE_OFFSET' NUMBER: 0, SIZE: 20, ...options } options.TOTAL = Math.ceil(results.length / options.SIZE) options.DOC_OFFSET = options.NUMBER * options.SIZE callback(options) return results.slice( options.DOC_OFFSET, // (when paging from the end with a negative page number) // handle end index correctly when (start + size) == 0 options.DOC_OFFSET + options.SIZE || undefined ) } QUERY = (q, qops) => this.cachePipeline(this.#parseJsonQuery, '#parseJsonQuery', q, qops) // score by tfidf by default // TODO: Total hits (length of _match) // TODO: better error handling: what if TYPE is 'XXXXX' // TODO: scoring precision (decimal places) should be an option SCORE = (results, scoreOps = {}) => { // TODO: test for defaulting to TFIDF scoreOps = Object.assign( { TYPE: 'TFIDF' }, scoreOps ) const filterFields = item => !scoreOps.FIELDS ? true : scoreOps.FIELDS.includes(item.FIELD) const filterMatch = _match => (_match || []).filter(filterFields) return new Promise(resolve => resolve( scoreOps.TYPE === 'TFIDF' ? this.DOCUMENT_COUNT().then(docCount => results.map((result, _, resultSet) => { const idf = Math.log((docCount + 1) / resultSet.length) result._score = +(result._match || []) .filter(filterFields) .reduce((acc, cur) => acc + idf * +cur.SCORE, 0) // TODO: make precision an option .toFixed(2) return result }) ) : scoreOps.TYPE === 'PRODUCT' ? results.map(r => ({ ...r, _score: +filterMatch(r._match) .reduce((acc, cur) => acc * +cur.SCORE, 1) .toFixed(2) })) : scoreOps.TYPE === 'CONCAT' ? results.map(r => ({ ...r, _score: filterMatch(r._match).reduce( (acc, cur) => acc + cur.SCORE, '' ) })) : scoreOps.TYPE === 'SUM' ? results.map(r => ({ ...r, _score: +filterMatch(r._match) .reduce((acc, cur) => acc + +cur.SCORE, 0) .toFixed(2) // TODO: make precision an option })) : scoreOps.TYPE === 'VALUE' ? results.map(r => ({ ...r, _score: filterMatch(r._match).reduce( (acc, cur) => acc + cur.VALUE, '' ) })) : null ) ) } SEARCH = (q, qops) => this.cachePipeline(this.#SEARCH, '#SEARCH', q, qops) SORT = (results, options) => { options = Object.assign( { DIRECTION: 'DESCENDING', TYPE: 'NUMERIC' }, options || {} ) const sortFunction = { NUMERIC: { DESCENDING: (a, b) => +b._score - +a._score, ASCENDING: (a, b) => +a._score - +b._score }, ALPHABETIC: { DESCENDING: (a, b) => { if (a._score < b._score) return 1 if (a._score > b._score) return -1 return 0 }, ASCENDING: (a, b) => { if (a._score < b._score) return -1 if (a._score > b._score) return 1 return 0 } } } return results .sort((a, b) => { if (a._id < b._id) return -1 if (a._id > b._id) return 1 return 0 }) .sort(sortFunction[options.TYPE][options.DIRECTION]) } WEIGHT = (results, weights) => results.map(r => { r._match = r._match.map(m => { weights.forEach(w => { let doWeighting = false // TODO: possible bug / edge case- does this work when weighting a field with value 0? if (w.FIELD && w.VALUE) { if (w.FIELD === m.FIELD && w.VALUE === m.VALUE) { doWeighting = true } } else if (w.FIELD) { if (w.FIELD === m.FIELD) { doWeighting = true } } else if (w.VALUE) { if (w.VALUE === m.VALUE) { doWeighting = true } } if (doWeighting) m.SCORE = (w.WEIGHT * +m.SCORE).toFixed(2) }) return m }) return r }) }