@tinacms/search-index
Version:
A network resilient, persistent full-text search library for the browser and Node.js
429 lines (394 loc) • 12 kB
JavaScript
// TODO: remove all '○' and '#'
const { EntryStream } = require('level-read-stream')
module.exports = (ops, cache) => {
// TODO add aggregation to ALL_DOCUMENTS
const ALL_DOCUMENTS = limit =>
new Promise((resolve, reject) => {
const result = []
new EntryStream(ops.fii.STORE, {
// gte: null,
// lte: undefined,
gte: ['DOC_RAW', null],
lte: ['DOC_RAW', undefined],
limit,
...ops.fii.LEVEL_OPTIONS
})
.on('data', d =>
result.push({
_id: d.value._id,
_doc: d.value
})
)
.on('end', () => resolve(result))
})
const DOCUMENTS = (...requestedDocs) =>
requestedDocs.length
? Promise.all(
requestedDocs.map(_id =>
ops.fii.STORE.get(['DOC_RAW', _id], ops.fii.LEVEL_OPTIONS).catch(e => null)
)
)
: ALL_DOCUMENTS()
const DOCUMENT_VECTORS = (...requestedDocs) =>
Promise.all(
requestedDocs.map(_id => ops.fii.STORE.get(['DOC', _id], ops.fii.LEVEL_OPTIONS).catch(e => null))
)
const DICTIONARY = token =>
DISTINCT(token).then(results =>
Array.from(
results.reduce((acc, cur) => acc.add(cur.VALUE), new Set())
).sort((a, b) =>
// This should sort an array of strings and
// numbers in an intuitive way (numbers numerically, strings
// alphabetically)
(a + '').localeCompare(b + '', undefined, {
numeric: true,
sensitivity: 'base'
})
)
)
const DISTINCT = (...tokens) =>
ops.fii.DISTINCT(...tokens).then(result => {
return [
// Stringify Set entries so that Set can determine duplicates
...result.reduce(
(acc, cur) =>
acc.add(
JSON.stringify(
Object.assign(cur, {
VALUE: cur.VALUE
})
)
),
new Set()
)
].map(JSON.parse)
}) // un-stringify
const FACETS = (...tokens) =>
ops.fii.FACETS(...tokens).then(result =>
[
// Stringify Set entries so that Set can determine duplicates
...result.reduce(
(acc, cur) =>
acc.add(
JSON.stringify(
Object.assign(cur, {
// VALUE: cur.VALUE.split('#')[0] // TODO: this is wrong
VALUE: cur.VALUE
})
)
),
new Set()
)
].map(JSON.parse)
) // un-stringify
const PAGE = (results, options) => {
options = Object.assign(
{
NUMBER: 0,
SIZE: 20
},
options || {}
)
const start = options.NUMBER * options.SIZE
// handle end index correctly when (start + size) == 0
// (when paging from the end with a negative page number)
const end = start + options.SIZE || undefined
return results.slice(start, end)
}
// score by tfidf by default
// TODO: Total hits (length of _match)
// TODO: better error handling: what if TYPE is 'XXXXX'
// TODO: scoring precision (decimal places) should be an option
const SCORE = (results, scoreOps = {}) => {
// TODO: test for defaulting to TFIDF
scoreOps = Object.assign(
{
TYPE: 'TFIDF'
},
scoreOps
)
const filterFields = item => {
if (!scoreOps.FIELDS) return true
return scoreOps.FIELDS.includes(item.FIELD)
}
const filterMatch = _match => (_match || []).filter(filterFields)
return new Promise(resolve =>
resolve(
scoreOps.TYPE === 'TFIDF'
? DOCUMENT_COUNT().then(docCount =>
results.map((result, _, resultSet) => {
const idf = Math.log((docCount + 1) / resultSet.length)
result._score = +(result._match || [])
.filter(filterFields)
.reduce((acc, cur) => acc + idf * +cur.SCORE, 0)
// TODO: make precision an option
.toFixed(2)
return result
})
)
: scoreOps.TYPE === 'PRODUCT'
? results.map(r => ({
...r,
_score: +filterMatch(r._match)
.reduce((acc, cur) => acc * +cur.SCORE, 1)
.toFixed(2)
}))
: scoreOps.TYPE === 'CONCAT'
? results.map(r => ({
...r,
_score: filterMatch(r._match).reduce(
(acc, cur) => acc + cur.SCORE,
''
)
}))
: scoreOps.TYPE === 'SUM'
? results.map(r => ({
...r,
_score: +filterMatch(r._match)
.reduce((acc, cur) => acc + +cur.SCORE, 0)
.toFixed(2) // TODO: make precision an option
}))
: scoreOps.TYPE === 'VALUE'
? results.map(r => ({
...r,
_score: filterMatch(r._match).reduce(
(acc, cur) => acc + cur.VALUE,
''
)
}))
: null
)
)
}
// TODO: maybe add a default page size?
const SEARCH = (q, qops) => {
return parseJsonQuery(
{
AND: [...q]
},
Object.assign(
{
SCORE: {
TYPE: 'TFIDF'
},
SORT: true
},
qops
)
)
}
const SORT = (results, options) => {
options = Object.assign(
{
DIRECTION: 'DESCENDING',
TYPE: 'NUMERIC'
},
options || {}
)
const sortFunction = {
NUMERIC: {
DESCENDING: (a, b) => +b._score - +a._score,
ASCENDING: (a, b) => +a._score - +b._score
},
ALPHABETIC: {
DESCENDING: (a, b) => {
if (a._score < b._score) return 1
if (a._score > b._score) return -1
return 0
},
ASCENDING: (a, b) => {
if (a._score < b._score) return -1
if (a._score > b._score) return 1
return 0
}
}
}
return results
.sort((a, b) => {
if (a._id < b._id) return -1
if (a._id > b._id) return 1
return 0
})
.sort(sortFunction[options.TYPE][options.DIRECTION])
}
const DOCUMENT_COUNT = () => ops.fii.STORE.get(['DOCUMENT_COUNT'], ops.fii.LEVEL_OPTIONS)
const WEIGHT = (results, weights) =>
results.map(r => {
r._match = r._match.map(m => {
weights.forEach(w => {
let doWeighting = false
// TODO: possible bug / edge case- does this work when weighting a field with value 0?
if (w.FIELD && w.VALUE) {
if (w.FIELD === m.FIELD && w.VALUE === m.VALUE) {
doWeighting = true
}
} else if (w.FIELD) {
if (w.FIELD === m.FIELD) {
doWeighting = true
}
} else if (w.VALUE) {
if (w.VALUE === m.VALUE) {
doWeighting = true
}
}
if (doWeighting) m.SCORE = (w.WEIGHT * +m.SCORE).toFixed(2)
})
return m
})
return r
})
// This function reads queries in a JSON format and then translates them to
// Promises
const parseJsonQuery = (q, options = {}) => {
const runQuery = cmd => {
// if string or object with only FIELD or VALUE, assume
// that this is a GET
if (typeof cmd === 'string' || typeof cmd === 'number') {
return ops.fii.GET(cmd, options.PIPELINE)
}
if (cmd.FIELD) return ops.fii.GET(cmd)
if (cmd.VALUE) return ops.fii.GET(cmd)
// else:
if (cmd.AND) return ops.fii.AND(cmd.AND.map(runQuery), options.PIPELINE)
if (cmd.GET) return ops.fii.GET(cmd.GET, options.PIPELINE)
if (cmd.NOT) {
return ops.fii.NOT(runQuery(cmd.NOT.INCLUDE), runQuery(cmd.NOT.EXCLUDE))
}
if (cmd.OR) return ops.fii.OR(cmd.OR.map(runQuery), options.PIPELINE)
// TODO this should be ALL_DOCUMENTS, such that
// ALL_DOCUMENTS=true returns everything (needs test)
// It should be possible to combine ALL_DOCUMENTS with FACETS
// and other aggregations
if (cmd.ALL_DOCUMENTS) return ALL_DOCUMENTS(cmd.ALL_DOCUMENTS)
}
const formatResults = result =>
result.RESULT
? Object.assign(result, {
RESULT_LENGTH: result.RESULT.length
})
: {
RESULT: result,
RESULT_LENGTH: result.length
}
// APPEND DOCUMENTS IF SPECIFIED
const appendDocuments = result =>
options.DOCUMENTS
? DOCUMENTS(...result.RESULT.map(doc => doc._id)).then(documents =>
Object.assign(result, {
RESULT: result.RESULT.map((doc, i) =>
Object.assign(doc, {
_doc: documents[i]
})
)
})
)
: result
// SCORE IF SPECIFIED
const score = result =>
options.SCORE
? SCORE(result.RESULT, options.SCORE).then(scoredResult =>
Object.assign(result, {
RESULT: scoredResult
})
)
: result
// SORT IF SPECIFIED
const sort = result =>
Object.assign(
result,
options.SORT
? {
RESULT: SORT(result.RESULT, options.SORT)
}
: {}
)
// BUCKETS IF SPECIFIED
const buckets = result =>
options.BUCKETS
? ops.fii.BUCKETS(...options.BUCKETS).then(bkts =>
Object.assign(result, {
BUCKETS: ops.fii.AGGREGATION_FILTER(bkts, result.RESULT)
})
)
: result
// FACETS IF SPECIFIED
// TODO: FAST OPTION FOR WHEN ALL_DOCUMENTS IS SPECIFIED
// TODO: This should be 3 cases: 1. needs filter, 2. no results,
// 3. no need for filter
const facets = result => {
// no FACETS are specified
if (!options.FACETS) return result
// QUERY returned no results, and facets will therefore be empty
if (!result.RESULT.length) {
return Object.assign(result, {
FACETS: [] // if empty result set then just return empty facets
})
}
// ALL_DOCUMENTS so no need to filter the facets
if (q.ALL_DOCUMENTS) {
return FACETS(...options.FACETS).then(fcts =>
Object.assign(result, {
FACETS: fcts
})
)
}
// else
return FACETS(...options.FACETS).then(fcts =>
Object.assign(result, {
FACETS: ops.fii.AGGREGATION_FILTER(fcts, result.RESULT)
})
)
}
// PAGE IF SPECIFIED
const page = result =>
Object.assign(
result,
options.PAGE ? { RESULT: PAGE(result.RESULT, options.PAGE) } : {}
)
// WEIGHT IF SPECIFIED
const weight = result =>
options.WEIGHT
? Object.assign(
{ RESULT: WEIGHT(result.RESULT, options.WEIGHT) },
result
)
: result
return runQuery(q)
.then(formatResults)
.then(buckets)
.then(facets)
.then(weight)
.then(score)
.then(sort)
.then(page)
.then(appendDocuments)
}
const tryCache = (q, cacheKey) =>
new Promise(resolve => {
cacheKey = JSON.stringify(cacheKey)
return cache.has(cacheKey)
? resolve(cache.get(cacheKey))
: q
.then(res => cache.set(cacheKey, res))
.then(() => resolve(cache.get(cacheKey)))
})
return {
ALL_DOCUMENTS,
DICTIONARY: token =>
tryCache(DICTIONARY(token), { DICTIONARY: token || null }),
DISTINCT,
DOCUMENTS: (...docs) =>
tryCache(DOCUMENTS(...docs), {
DOCUMENTS: docs
}),
DOCUMENT_COUNT,
DOCUMENT_VECTORS,
FACETS,
PAGE,
QUERY: (q, qops) => tryCache(parseJsonQuery(q, qops), { QUERY: [q, qops] }),
SCORE,
SEARCH: (q, qops) => tryCache(SEARCH(q, qops), { SEARCH: [q, qops] }),
SORT
}
}