minisearch
Version:
Tiny but powerful full-text search engine for browser and Node
1,356 lines (1,142 loc) • 79.1 kB
JavaScript
/* eslint-env jest */
import MiniSearch from './MiniSearch'
describe('MiniSearch', () => {
describe('constructor', () => {
it('throws error if fields option is missing', () => {
expect(() => new MiniSearch()).toThrow('MiniSearch: option "fields" must be provided')
})
it('initializes the attributes', () => {
const options = { fields: ['title', 'text'] }
const ms = new MiniSearch(options)
expect(ms._documentCount).toEqual(0)
expect(ms._fieldIds).toEqual({ title: 0, text: 1 })
expect(ms._documentIds.size).toEqual(0)
expect(ms._fieldLength.size).toEqual(0)
expect(ms._avgFieldLength.length).toEqual(0)
expect(ms._options).toMatchObject(options)
})
})
describe('add', () => {
it('adds the document to the index', () => {
const ms = new MiniSearch({ fields: ['text'] })
ms.add({ id: 1, text: 'Nel mezzo del cammin di nostra vita' })
expect(ms.documentCount).toEqual(1)
})
it('does not throw error if a field is missing', () => {
const ms = new MiniSearch({ fields: ['title', 'text'] })
ms.add({ id: 1, text: 'Nel mezzo del cammin di nostra vita' })
expect(ms.documentCount).toEqual(1)
})
it('throws error if the document does not have the ID field', () => {
const ms = new MiniSearch({ idField: 'foo', fields: ['title', 'text'] })
expect(() => {
ms.add({ text: 'I do not have an ID' })
}).toThrowError('MiniSearch: document does not have ID field "foo"')
})
it('throws error on duplicate ID', () => {
const ms = new MiniSearch({ idField: 'foo', fields: ['title', 'text'] })
ms.add({ foo: 'abc', text: 'Something' })
expect(() => {
ms.add({ foo: 'abc', text: 'I have a duplicate ID' })
}).toThrowError('MiniSearch: duplicate ID abc')
})
it('extracts the ID field using extractField', () => {
const extractField = (document, fieldName) => {
if (fieldName === 'id') { return document.id.value }
return MiniSearch.getDefault('extractField')(document, fieldName)
}
const ms = new MiniSearch({ fields: ['text'], extractField })
ms.add({ id: { value: 123 }, text: 'Nel mezzo del cammin di nostra vita' })
const results = ms.search('vita')
expect(results[0].id).toEqual(123)
})
it('rejects falsy terms', () => {
const processTerm = term => term === 'foo' ? null : term
const ms = new MiniSearch({ fields: ['title', 'text'], processTerm })
expect(() => {
ms.add({ id: 123, text: 'foo bar' })
}).not.toThrowError()
})
it('turns the field to string before tokenization', () => {
const tokenize = jest.fn(x => x.split(/\W+/))
const ms = new MiniSearch({ fields: ['id', 'tags', 'isBlinky'], tokenize })
expect(() => {
ms.add({ id: 123, tags: ['foo', 'bar'], isBlinky: false })
ms.add({ id: 321, isBlinky: true })
}).not.toThrowError()
expect(tokenize).toHaveBeenCalledWith('123', 'id')
expect(tokenize).toHaveBeenCalledWith('foo,bar', 'tags')
expect(tokenize).toHaveBeenCalledWith('false', 'isBlinky')
expect(tokenize).toHaveBeenCalledWith('321', 'id')
expect(tokenize).toHaveBeenCalledWith('true', 'isBlinky')
})
it('passes document and field name to the field extractor', () => {
const extractField = jest.fn((document, fieldName) => {
if (fieldName === 'pubDate') {
return document[fieldName] && document[fieldName].toLocaleDateString('it-IT')
}
return fieldName.split('.').reduce((doc, key) => doc && doc[key], document)
})
const tokenize = jest.fn(string => string.split(/\W+/))
const ms = new MiniSearch({
fields: ['title', 'pubDate', 'author.name'],
storeFields: ['category'],
extractField,
tokenize
})
const document = {
id: 1,
title: 'Divina Commedia',
pubDate: new Date(1320, 0, 1),
author: { name: 'Dante Alighieri' },
category: 'poetry'
}
ms.add(document)
expect(extractField).toHaveBeenCalledWith(document, 'title')
expect(extractField).toHaveBeenCalledWith(document, 'pubDate')
expect(extractField).toHaveBeenCalledWith(document, 'author.name')
expect(extractField).toHaveBeenCalledWith(document, 'category')
expect(tokenize).toHaveBeenCalledWith(document.title, 'title')
expect(tokenize).toHaveBeenCalledWith(document.pubDate.toLocaleDateString('it-IT'), 'pubDate')
expect(tokenize).toHaveBeenCalledWith(document.author.name, 'author.name')
expect(tokenize).not.toHaveBeenCalledWith(document.category, 'category')
})
it('passes field value and name to tokenizer', () => {
const tokenize = jest.fn(string => string.split(/\W+/))
const ms = new MiniSearch({ fields: ['text', 'title'], tokenize })
const document = { id: 1, title: 'Divina Commedia', text: 'Nel mezzo del cammin di nostra vita' }
ms.add(document)
expect(tokenize).toHaveBeenCalledWith(document.text, 'text')
expect(tokenize).toHaveBeenCalledWith(document.title, 'title')
})
it('passes field value and name to term processor', () => {
const processTerm = jest.fn(term => term.toLowerCase())
const ms = new MiniSearch({ fields: ['text', 'title'], processTerm })
const document = { id: 1, title: 'Divina Commedia', text: 'Nel mezzo del cammin di nostra vita' }
ms.add(document)
document.text.split(/\W+/).forEach(term => {
expect(processTerm).toHaveBeenCalledWith(term, 'text')
})
document.title.split(/\W+/).forEach(term => {
expect(processTerm).toHaveBeenCalledWith(term, 'title')
})
})
it('allows processTerm to expand a single term into several terms', () => {
const processTerm = (string) => string === 'foobar' ? ['foo', 'bar'] : string
const ms = new MiniSearch({ fields: ['title', 'text'], processTerm })
expect(() => {
ms.add({ id: 123, text: 'foobar' })
}).not.toThrowError()
expect(ms.search('bar')).toHaveLength(1)
})
})
describe('remove', () => {
const documents = [
{ id: 1, title: 'Divina Commedia', text: 'Nel mezzo del cammin di nostra vita ... cammin' },
{ id: 2, title: 'I Promessi Sposi', text: 'Quel ramo del lago di Como' },
{ id: 3, title: 'Vita Nova', text: 'In quella parte del libro della mia memoria ... cammin' }
]
let ms, _warn
beforeEach(() => {
ms = new MiniSearch({ fields: ['title', 'text'] })
ms.addAll(documents)
_warn = console.warn
console.warn = jest.fn()
})
afterEach(() => {
console.warn = _warn
})
it('removes the document from the index', () => {
expect(ms.documentCount).toEqual(3)
ms.remove(documents[0])
expect(ms.documentCount).toEqual(2)
expect(ms.search('commedia').length).toEqual(0)
expect(ms.search('vita').map(({ id }) => id)).toEqual([3])
expect(console.warn).not.toHaveBeenCalled()
})
it('cleans up all data of the deleted document', () => {
const otherDocument = { id: 4, title: 'Decameron', text: 'Umana cosa è aver compassione degli afflitti' }
const originalFieldLength = new Map(ms._fieldLength)
const originalAverageFieldLength = ms._avgFieldLength.slice()
ms.add(otherDocument)
ms.remove(otherDocument)
expect(ms.documentCount).toEqual(3)
expect(ms._fieldLength).toEqual(originalFieldLength)
expect(ms._avgFieldLength).toEqual(originalAverageFieldLength)
})
it('does not remove terms from other documents', () => {
ms.remove(documents[0])
expect(ms.search('cammin').length).toEqual(1)
})
it('removes re-added document', () => {
ms.remove(documents[0])
ms.add(documents[0])
ms.remove(documents[0])
expect(console.warn).not.toHaveBeenCalled()
})
it('removes documents when using a custom extractField', () => {
const extractField = (document, fieldName) => {
const path = fieldName.split('.')
return path.reduce((doc, key) => doc && doc[key], document)
}
const ms = new MiniSearch({ fields: ['text.value'], storeFields: ['id'], extractField })
const document = { id: 123, text: { value: 'Nel mezzo del cammin di nostra vita' } }
ms.add(document)
expect(() => {
ms.remove(document)
}).not.toThrowError()
expect(ms.search('vita')).toEqual([])
})
it('cleans up the index', () => {
const originalIdsSize = ms._documentIds.size
ms.remove(documents[0])
expect(ms._index.has('commedia')).toEqual(false)
expect(ms._documentIds.size).toEqual(originalIdsSize - 1)
expect(Array.from(ms._index.get('vita').keys())).toEqual([ms._fieldIds.title])
})
it('throws error if the document does not have the ID field', () => {
const ms = new MiniSearch({ idField: 'foo', fields: ['title', 'text'] })
expect(() => {
ms.remove({ text: 'I do not have an ID' })
}).toThrowError('MiniSearch: document does not have ID field "foo"')
})
it('extracts the ID field using extractField', () => {
const extractField = (document, fieldName) => {
if (fieldName === 'id') { return document.id.value }
return MiniSearch.getDefault('extractField')(document, fieldName)
}
const ms = new MiniSearch({ fields: ['text'], extractField })
const document = { id: { value: 123 }, text: 'Nel mezzo del cammin di nostra vita' }
ms.add(document)
expect(() => {
ms.remove(document)
}).not.toThrowError()
expect(ms.search('vita')).toEqual([])
})
it('does not crash when the document has field named like default properties of object', () => {
const ms = new MiniSearch({ fields: ['constructor'] })
const document = { id: 1 }
ms.add(document)
expect(() => {
ms.remove(document)
}).not.toThrowError()
})
it('does not reassign IDs', () => {
ms.remove(documents[0])
ms.add(documents[0])
expect(ms.search('commedia').map(result => result.id)).toEqual([documents[0].id])
expect(ms.search('nova').map(result => result.id)).toEqual([documents[documents.length - 1].id])
})
it('rejects falsy terms', () => {
const processTerm = term => term === 'foo' ? null : term
const ms = new MiniSearch({ fields: ['title', 'text'], processTerm })
const document = { id: 123, title: 'foo bar' }
ms.add(document)
expect(() => {
ms.remove(document)
}).not.toThrowError()
})
it('allows processTerm to expand a single term into several terms', () => {
const processTerm = (string) => string === 'foobar' ? ['foo', 'bar'] : string
const ms = new MiniSearch({ fields: ['title', 'text'], processTerm })
const document = { id: 123, title: 'foobar' }
ms.add(document)
expect(() => {
ms.remove(document)
}).not.toThrowError()
expect(ms.search('bar')).toHaveLength(0)
})
describe('when using custom per-field extraction/tokenizer/processing', () => {
const documents = [
{ id: 1, title: 'Divina Commedia', tags: 'dante,virgilio', author: { name: 'Dante Alighieri' } },
{ id: 2, title: 'I Promessi Sposi', tags: 'renzo,lucia', author: { name: 'Alessandro Manzoni' } },
{ id: 3, title: 'Vita Nova', author: { name: 'Dante Alighieri' } }
]
let ms, _warn
beforeEach(() => {
ms = new MiniSearch({
fields: ['title', 'tags', 'authorName'],
extractField: (doc, fieldName) => {
if (fieldName === 'authorName') {
return doc.author.name
} else {
return doc[fieldName]
}
},
tokenize: (field, fieldName) => {
if (fieldName === 'tags') {
return field.split(',')
} else {
return field.split(/\s+/)
}
},
processTerm: (term, fieldName) => {
if (fieldName === 'tags') {
return term.toUpperCase()
} else {
return term.toLowerCase()
}
}
})
ms.addAll(documents)
_warn = console.warn
console.warn = jest.fn()
})
afterEach(() => {
console.warn = _warn
})
it('removes the document from the index', () => {
expect(ms.documentCount).toEqual(3)
ms.remove(documents[0])
expect(ms.documentCount).toEqual(2)
expect(ms.search('commedia').length).toEqual(0)
expect(ms.search('vita').map(({ id }) => id)).toEqual([3])
expect(console.warn).not.toHaveBeenCalled()
})
})
describe('when the document was not in the index', () => {
it('throws an error', () => {
expect(() => ms.remove({ id: 99 }))
.toThrow('MiniSearch: cannot remove document with ID 99: it is not in the index')
})
})
describe('when the document has changed', () => {
it('warns of possible index corruption', () => {
expect(() => ms.remove({ id: 1, title: 'Divina Commedia cammin', text: 'something has changed' }))
.not.toThrow()
expect(console.warn).toHaveBeenCalledTimes(4)
;[
['cammin', 'title'],
['something', 'text'],
['has', 'text'],
['changed', 'text']
].forEach(([term, field], i) => {
expect(console.warn).toHaveBeenNthCalledWith(i + 1, `MiniSearch: document with ID 1 has changed before removal: term "${term}" was not present in field "${field}". Removing a document after it has changed can corrupt the index!`)
})
})
it('does not throw error if console.warn is undefined', () => {
console.warn = undefined
expect(() => ms.remove({ id: 1, title: 'Divina Commedia cammin', text: 'something has changed' }))
.not.toThrow()
})
it('calls the custom logger if given', () => {
const logger = jest.fn()
ms = new MiniSearch({ fields: ['title', 'text'], logger })
ms.addAll(documents)
ms.remove({ id: 1, title: 'Divina Commedia', text: 'something' })
expect(logger).toHaveBeenCalledWith('warn', 'MiniSearch: document with ID 1 has changed before removal: term "something" was not present in field "text". Removing a document after it has changed can corrupt the index!', 'version_conflict')
expect(console.warn).not.toHaveBeenCalled()
})
})
})
describe('removeAll', () => {
const documents = [
{ id: 1, title: 'Divina Commedia', text: 'Nel mezzo del cammin di nostra vita ... cammin' },
{ id: 2, title: 'I Promessi Sposi', text: 'Quel ramo del lago di Como' },
{ id: 3, title: 'Vita Nova', text: 'In quella parte del libro della mia memoria ... cammin' }
]
let ms, _warn
beforeEach(() => {
ms = new MiniSearch({ fields: ['title', 'text'] })
_warn = console.warn
console.warn = jest.fn()
})
afterEach(() => {
console.warn = _warn
})
it('removes all documents from the index if called with no argument', () => {
const empty = MiniSearch.loadJSON(JSON.stringify(ms), {
fields: ['title', 'text']
})
ms.addAll(documents)
expect(ms.documentCount).toEqual(3)
ms.removeAll()
expect(ms).toEqual(empty)
})
it('removes the given documents from the index', () => {
ms.addAll(documents)
expect(ms.documentCount).toEqual(3)
ms.removeAll([documents[0], documents[2]])
expect(ms.documentCount).toEqual(1)
expect(ms.search('commedia').length).toEqual(0)
expect(ms.search('vita').length).toEqual(0)
expect(ms.search('lago').length).toEqual(1)
})
it('raises an error if called with a falsey argument', () => {
ms.addAll(documents)
expect(() => { ms.removeAll(null) }).toThrowError()
expect(() => { ms.removeAll(undefined) }).toThrowError()
expect(() => { ms.removeAll(false) }).toThrowError()
expect(() => { ms.removeAll('') }).toThrowError()
expect(() => { ms.removeAll([]) }).not.toThrowError()
expect(ms.documentCount).toEqual(documents.length)
})
})
describe('discard', () => {
it('prevents a document from appearing in search results', () => {
const ms = new MiniSearch({ fields: ['text'] })
const documents = [
{ id: 1, text: 'Some interesting stuff' },
{ id: 2, text: 'Some more interesting stuff' }
]
ms.addAll(documents)
expect(ms.search('stuff').map((doc) => doc.id)).toEqual([1, 2])
expect([1, 2].map((id) => ms.has(id))).toEqual([true, true])
ms.discard(1)
expect(ms.search('stuff').map((doc) => doc.id)).toEqual([2])
expect([1, 2].map((id) => ms.has(id))).toEqual([false, true])
})
it('raises error if a document with the given ID does not exist', () => {
const ms = new MiniSearch({ fields: ['text'] })
expect(() => {
ms.discard(99)
}).toThrow('MiniSearch: cannot discard document with ID 99: it is not in the index')
})
it('adjusts internal data to account for the document being discarded', () => {
const ms = new MiniSearch({ fields: ['text'] })
const documents = [
{ id: 1, text: 'Some interesting stuff' },
{ id: 2, text: 'Some more interesting stuff' }
]
ms.addAll(documents)
const clone = MiniSearch.loadJSON(JSON.stringify(ms), {
fields: ['text']
})
ms.discard(1)
clone.remove({ id: 1, text: 'Some interesting stuff' })
expect(ms._idToShortId).toEqual(clone._idToShortId)
expect(ms._documentIds).toEqual(clone._documentIds)
expect(ms._fieldLength).toEqual(clone._fieldLength)
expect(ms._storedFields).toEqual(clone._storedFields)
expect(ms._avgFieldLength).toEqual(clone._avgFieldLength)
expect(ms._documentCount).toEqual(clone._documentCount)
expect(ms._dirtCount).toEqual(1)
})
it('allows adding a new version of the document afterwards', () => {
const ms = new MiniSearch({ fields: ['text'], storeFields: ['text'] })
const documents = [
{ id: 1, text: 'Some interesting stuff' },
{ id: 2, text: 'Some more interesting stuff' }
]
ms.addAll(documents)
ms.discard(1)
ms.add({ id: 1, text: 'Some new stuff' })
expect(ms.search('stuff').map((doc) => doc.id)).toEqual([1, 2])
expect(ms.search('new').map((doc) => doc.id)).toEqual([1])
ms.discard(1)
expect(ms.search('stuff').map((doc) => doc.id)).toEqual([2])
ms.add({ id: 1, text: 'Some newer stuff' })
expect(ms.search('stuff').map((doc) => doc.id)).toEqual([1, 2])
expect(ms.search('new').map((doc) => doc.id)).toEqual([])
expect(ms.search('newer').map((doc) => doc.id)).toEqual([1])
})
it('leaves the index in the same state as removal when all terms are searched at least once', () => {
const ms = new MiniSearch({ fields: ['text'], storeFields: ['text'] })
const document = { id: 1, text: 'Some stuff' }
ms.add(document)
const clone = MiniSearch.loadJSON(JSON.stringify(ms), {
fields: ['text'],
storeFields: ['text']
})
ms.discard(1)
clone.remove({ id: 1, text: 'Some stuff' })
expect(ms).not.toEqual(clone)
const results = ms.search('some stuff')
expect(ms._index).toEqual(clone._index)
// Results are the same after the first search
expect(ms.search('stuff')).toEqual(results)
})
it('triggers auto vacuum by default', () => {
const ms = new MiniSearch({ fields: ['text'] })
ms.add({ id: 1, text: 'Some stuff' })
ms._dirtCount = 1000
ms.discard(1)
expect(ms.isVacuuming).toEqual(true)
})
it('triggers auto vacuum when the threshold is met', () => {
const ms = new MiniSearch({
fields: ['text'],
autoVacuum: { minDirtCount: 2, minDirtFactor: 0, batchWait: 50, batchSize: 1 }
})
const documents = [
{ id: 1, text: 'Some stuff' },
{ id: 2, text: 'Some additional stuff' },
{ id: 3, text: 'Even more stuff' }
]
ms.addAll(documents)
expect(ms.isVacuuming).toEqual(false)
ms.discard(1)
expect(ms.isVacuuming).toEqual(false)
ms.discard(2)
expect(ms.isVacuuming).toEqual(true)
})
it('does not trigger auto vacuum if disabled', () => {
const ms = new MiniSearch({ fields: ['text'], autoVacuum: false })
const documents = [
{ id: 1, text: 'Some stuff' },
{ id: 2, text: 'Some additional stuff' }
]
ms.addAll(documents)
ms._dirtCount = 1000
ms.discard(1)
expect(ms.isVacuuming).toEqual(false)
})
it('applies default settings if autoVacuum is set to true', () => {
const ms = new MiniSearch({ fields: ['text'], autoVacuum: true })
const documents = [
{ id: 1, text: 'Some stuff' },
{ id: 2, text: 'Some additional stuff' }
]
ms.addAll(documents)
ms._dirtCount = 1000
ms.discard(1)
expect(ms.isVacuuming).toEqual(true)
})
it('applies default settings if options are set to null', async () => {
const ms = new MiniSearch({
fields: ['text'],
autoVacuum: { minDirtCount: null, minDirtFactor: null, batchWait: null, batchSize: null }
})
const documents = [
{ id: 1, text: 'Some stuff' },
{ id: 2, text: 'Some additional stuff' }
]
ms.addAll(documents)
ms._dirtCount = 1000
const x = ms.discard(1)
expect(ms.isVacuuming).toEqual(true)
await x
})
it('vacuums until under the dirt thresholds when called multiple times', async () => {
const minDirtCount = 2
const ms = new MiniSearch({
fields: ['text'],
autoVacuum: { minDirtCount, minDirtFactor: 0, batchSize: 1, batchWait: 10 }
})
const documents = []
for (let i = 0; i < 5; i++) {
documents.push({ id: i + 1, text: `Document number ${i}` })
}
ms.addAll(documents)
expect(ms._dirtCount).toEqual(0)
// Calling discard multiple times should start an auto-vacuum and enqueue
// another, so that the remaining dirt count afterwards is always below
// minDirtCount
documents.forEach((doc) => ms.discard(doc.id))
while (ms.isVacuuming) {
await ms._currentVacuum
}
expect(ms._dirtCount).toBeLessThan(minDirtCount)
})
it('does not perform unnecessary vacuuming when called multiple times', async () => {
const minDirtCount = 2
const ms = new MiniSearch({
fields: ['text'],
autoVacuum: { minDirtCount, minDirtFactor: 0, batchSize: 1, batchWait: 10 }
})
const documents = [
{ id: 1, text: 'Document one' },
{ id: 2, text: 'Document two' },
{ id: 3, text: 'Document three' }
]
ms.addAll(documents)
// Calling discard multiple times will start an auto-vacuum and enqueue
// another, subject to minDirtCount/minDirtFactor conditions. The last one
// should be a no-op, as the remaining dirt count after the first auto
// vacuum would be 1, which is below minDirtCount
documents.forEach((doc) => ms.discard(doc.id))
while (ms.isVacuuming) {
await ms._currentVacuum
}
expect(ms._dirtCount).toBe(1)
})
it('enqueued vacuum runs without conditions if a manual vacuum was called while enqueued', async () => {
const minDirtCount = 2
const ms = new MiniSearch({
fields: ['text'],
autoVacuum: { minDirtCount, minDirtFactor: 0, batchSize: 1, batchWait: 10 }
})
const documents = [
{ id: 1, text: 'Document one' },
{ id: 2, text: 'Document two' },
{ id: 3, text: 'Document three' }
]
ms.addAll(documents)
// Calling discard multiple times will start an auto-vacuum and enqueue
// another, subject to minDirtCount/minDirtFactor conditions. The last one
// would be a no-op, as the remaining dirt count after the first auto
// vacuum would be 1, which is below minDirtCount
documents.forEach((doc) => ms.discard(doc.id))
// But before the enqueued vacuum is ran, we invoke a manual vacuum with
// no conditions, so it should run even with a dirt count below
// minDirtCount
ms.vacuum()
while (ms.isVacuuming) {
await ms._currentVacuum
}
expect(ms._dirtCount).toBe(0)
})
})
describe('discardAll', () => {
it('prevents the documents from appearing in search results', () => {
const ms = new MiniSearch({ fields: ['text'] })
const documents = [
{ id: 1, text: 'Some interesting stuff' },
{ id: 2, text: 'Some more interesting stuff' },
{ id: 3, text: 'Some even more interesting stuff' }
]
ms.addAll(documents)
expect(ms.search('stuff').map((doc) => doc.id)).toEqual([1, 2, 3])
expect([1, 2, 3].map((id) => ms.has(id))).toEqual([true, true, true])
ms.discardAll([1, 3])
expect(ms.search('stuff').map((doc) => doc.id)).toEqual([2])
expect([1, 2, 3].map((id) => ms.has(id))).toEqual([false, true, false])
})
it('only triggers at most a single auto vacuum at the end', () => {
const ms = new MiniSearch({ fields: ['text'], autoVacuum: { minDirtCount: 3, minDirtFactor: 0, batchSize: 1, batchWait: 10 } })
const documents = []
for (let i = 1; i <= 10; i++) {
documents.push({ id: i, text: `Document ${i}` })
}
ms.addAll(documents)
ms.discardAll([1, 2])
expect(ms.isVacuuming).toEqual(false)
ms.discardAll([3, 4, 5, 6, 7, 8, 9, 10])
expect(ms.isVacuuming).toEqual(true)
expect(ms._enqueuedVacuum).toEqual(null)
})
it('does not change auto vacuum settings in case of errors', () => {
const ms = new MiniSearch({ fields: ['text'], autoVacuum: { minDirtCount: 1, minDirtFactor: 0, batchSize: 1, batchWait: 10 } })
ms.add({ id: 1, text: 'Some stuff' })
expect(() => { ms.discardAll([3]) }).toThrow()
expect(ms.isVacuuming).toEqual(false)
ms.discardAll([1])
expect(ms.isVacuuming).toEqual(true)
})
})
describe('replace', () => {
it('replaces an existing document with a new version', () => {
const ms = new MiniSearch({ fields: ['text'] })
const documents = [
{ id: 1, text: 'Some quite interesting stuff' },
{ id: 2, text: 'Some more interesting stuff' }
]
ms.addAll(documents)
expect(ms.search('stuff').map((doc) => doc.id)).toEqual([1, 2])
expect(ms.search('quite').map((doc) => doc.id)).toEqual([1])
expect(ms.search('even').map((doc) => doc.id)).toEqual([])
ms.replace({ id: 1, text: 'Some even more interesting stuff' })
expect(ms.search('stuff').map((doc) => doc.id)).toEqual([2, 1])
expect(ms.search('quite').map((doc) => doc.id)).toEqual([])
expect(ms.search('even').map((doc) => doc.id)).toEqual([1])
})
it('raises error if a document with the given ID does not exist', () => {
const ms = new MiniSearch({ fields: ['text'] })
expect(() => {
ms.replace({ id: 1, text: 'Some stuff' })
}).toThrow('MiniSearch: cannot discard document with ID 1: it is not in the index')
})
})
describe('vacuum', () => {
it('cleans up discarded documents from the index', async () => {
const ms = new MiniSearch({ fields: ['text'], storeFields: ['text'] })
const documents = [
{ id: 1, text: 'Some stuff' },
{ id: 2, text: 'Some additional stuff' }
]
ms.addAll(documents)
const clone = MiniSearch.loadJSON(JSON.stringify(ms), {
fields: ['text'],
storeFields: ['text']
})
ms.discard(1)
ms.discard(2)
clone.remove({ id: 1, text: 'Some stuff' })
clone.remove({ id: 2, text: 'Some additional stuff' })
expect(ms).not.toEqual(clone)
await ms.vacuum({ batchSize: 1 })
expect(ms).toEqual(clone)
expect(ms.isVacuuming).toEqual(false)
})
it('schedules a second vacuum right after the current one completes, if one is ongoing', async () => {
const ms = new MiniSearch({ fields: ['text'] })
const empty = MiniSearch.loadJSON(JSON.stringify(ms), {
fields: ['text']
})
const documents = [
{ id: 1, text: 'Some stuff' },
{ id: 2, text: 'Some additional stuff' }
]
ms.addAll(documents)
ms.discard(1)
ms.discard(2)
ms.add({ id: 3, text: 'Even more stuff' })
ms.vacuum({ batchSize: 1, batchWait: 50 })
ms.discard(3)
await ms.vacuum()
expect(ms._index).toEqual(empty._index)
expect(ms.isVacuuming).toEqual(false)
})
it('does not enqueue more than one vacuum on top of the ongoing one', async () => {
const ms = new MiniSearch({ fields: ['text'] })
const documents = [
{ id: 1, text: 'Some stuff' },
{ id: 2, text: 'Some additional stuff' }
]
ms.addAll(documents)
ms.discard(1)
ms.discard(2)
const a = ms.vacuum({ batchSize: 1, batchWait: 50 })
const b = ms.vacuum()
const c = ms.vacuum()
expect(a).not.toBe(b)
expect(b).toBe(c)
expect(ms.isVacuuming).toEqual(true)
await c
expect(ms.isVacuuming).toEqual(false)
})
it('allows batch size to be bigger than the term count', async () => {
const ms = new MiniSearch({ fields: ['text'] })
const documents = [
{ id: 1, text: 'Some stuff' },
{ id: 2, text: 'Some additional stuff' }
]
ms.addAll(documents)
await ms.vacuum({ batchSize: ms.termCount + 1 })
expect(ms.isVacuuming).toEqual(false)
})
})
describe('addAll', () => {
it('adds all the documents to the index', () => {
const ms = new MiniSearch({ fields: ['text'] })
const documents = [
{ id: 1, text: 'Nel mezzo del cammin di nostra vita' },
{ id: 2, text: 'Mi ritrovai per una selva oscura' }
]
ms.addAll(documents)
expect(ms.documentCount).toEqual(documents.length)
})
})
describe('addAllAsync', () => {
it('adds all the documents to the index', () => {
const ms = new MiniSearch({ fields: ['text'] })
const documents = [
{ id: 1, text: 'Nel mezzo' },
{ id: 2, text: 'del cammin' },
{ id: 3, text: 'di nostra vita' },
{ id: 4, text: 'Mi ritrovai' },
{ id: 5, text: 'per una' },
{ id: 6, text: 'selva oscura' },
{ id: 7, text: 'ché la' },
{ id: 8, text: 'diritta via' },
{ id: 9, text: 'era smarrita' },
{ id: 10, text: 'ahi quanto' },
{ id: 11, text: 'a dir' },
{ id: 12, text: 'qual era' },
{ id: 13, text: 'è cosa dura' }
]
return ms.addAllAsync(documents).then(() => {
expect(ms.documentCount).toEqual(documents.length)
})
})
it('accepts a chunkSize option', () => {
const ms = new MiniSearch({ fields: ['text'] })
const documents = [
{ id: 1, text: 'Nel mezzo' },
{ id: 2, text: 'del cammin' },
{ id: 3, text: 'di nostra vita' },
{ id: 4, text: 'Mi ritrovai' },
{ id: 5, text: 'per una' },
{ id: 6, text: 'selva oscura' },
{ id: 7, text: 'ché la' },
{ id: 8, text: 'diritta via' },
{ id: 9, text: 'era smarrita' },
{ id: 10, text: 'ahi quanto' },
{ id: 11, text: 'a dir' },
{ id: 12, text: 'qual era' },
{ id: 13, text: 'è cosa dura' }
]
return ms.addAllAsync(documents, { chunkSize: 3 }).then(() => {
expect(ms.documentCount).toEqual(documents.length)
})
})
})
describe('has', () => {
it('returns true if a document with the given ID was added to the index, false otherwise', () => {
const documents = [
{ id: 1, title: 'Divina Commedia', text: 'Nel mezzo del cammin di nostra vita' },
{ id: 2, title: 'I Promessi Sposi', text: 'Quel ramo del lago di Como' }
]
const ms = new MiniSearch({ fields: ['title', 'text'] })
ms.addAll(documents)
expect(ms.has(1)).toEqual(true)
expect(ms.has(2)).toEqual(true)
expect(ms.has(3)).toEqual(false)
ms.remove({ id: 1, title: 'Divina Commedia', text: 'Nel mezzo del cammin di nostra vita' })
ms.discard(2)
expect(ms.has(1)).toEqual(false)
expect(ms.has(2)).toEqual(false)
})
it('works well with custom ID fields', () => {
const documents = [
{ uid: 1, title: 'Divina Commedia', text: 'Nel mezzo del cammin di nostra vita' },
{ uid: 2, title: 'I Promessi Sposi', text: 'Quel ramo del lago di Como' }
]
const ms = new MiniSearch({ fields: ['title', 'text'], idField: 'uid' })
ms.addAll(documents)
expect(ms.has(1)).toEqual(true)
expect(ms.has(2)).toEqual(true)
expect(ms.has(3)).toEqual(false)
ms.remove({ uid: 1, title: 'Divina Commedia', text: 'Nel mezzo del cammin di nostra vita' })
ms.discard(2)
expect(ms.has(1)).toEqual(false)
expect(ms.has(2)).toEqual(false)
})
})
describe('getStoredFields', () => {
it('returns the stored fields for the given document ID, or undefined if the document is not in the index', () => {
const documents = [
{ id: 1, title: 'Divina Commedia', text: 'Nel mezzo del cammin di nostra vita' },
{ id: 2, title: 'I Promessi Sposi', text: 'Quel ramo del lago di Como' }
]
const ms = new MiniSearch({ fields: ['title', 'text'], storeFields: ['title', 'text'] })
ms.addAll(documents)
expect(ms.getStoredFields(1)).toEqual({ title: 'Divina Commedia', text: 'Nel mezzo del cammin di nostra vita' })
expect(ms.getStoredFields(2)).toEqual({ title: 'I Promessi Sposi', text: 'Quel ramo del lago di Como' })
expect(ms.getStoredFields(3)).toBe(undefined)
ms.discard(1)
expect(ms.getStoredFields(1)).toBe(undefined)
})
})
describe('search', () => {
const documents = [
{ id: 1, title: 'Divina Commedia', text: 'Nel mezzo del cammin di nostra vita' },
{ id: 2, title: 'I Promessi Sposi', text: 'Quel ramo del lago di Como', lang: 'it', category: 'fiction' },
{ id: 3, title: 'Vita Nova', text: 'In quella parte del libro della mia memoria', category: 'poetry' }
]
const ms = new MiniSearch({ fields: ['title', 'text'], storeFields: ['lang', 'category'] })
ms.addAll(documents)
it('returns scored results', () => {
const results = ms.search('vita')
expect(results.length).toBeGreaterThan(0)
expect(results.map(({ id }) => id).sort()).toEqual([1, 3])
expect(results[0].score).toBeGreaterThanOrEqual(results[1].score)
})
it('returns stored fields in the results', () => {
const results = ms.search('del')
expect(results.length).toBeGreaterThan(0)
expect(results.map(({ lang }) => lang).sort()).toEqual(['it', undefined, undefined])
expect(results.map(({ category }) => category).sort()).toEqual(['fiction', 'poetry', undefined])
})
it('returns empty array if there is no match', () => {
const results = ms.search('paguro')
expect(results).toEqual([])
})
it('returns empty array for empty search', () => {
const results = ms.search('')
expect(results).toEqual([])
})
it('returns empty results for terms that are not in the index', () => {
let results
expect(() => {
results = ms.search('sottomarino aeroplano')
}).not.toThrowError()
expect(results.length).toEqual(0)
})
it('boosts fields', () => {
const results = ms.search('vita', { boost: { title: 2 } })
expect(results.map(({ id }) => id)).toEqual([3, 1])
expect(results[0].score).toBeGreaterThan(results[1].score)
})
it('computes a meaningful score when fields are named liked default properties of object', () => {
const ms = new MiniSearch({ fields: ['constructor'] })
ms.add({ id: 1, constructor: 'something' })
ms.add({ id: 2, constructor: 'something else' })
const results = ms.search('something')
results.forEach((result) => {
expect(Number.isFinite(result.score)).toBe(true)
})
})
it('searches only selected fields', () => {
const results = ms.search('vita', { fields: ['title'] })
expect(results).toHaveLength(1)
expect(results[0].id).toEqual(3)
})
it('searches only selected fields even if other fields are boosted', () => {
const results = ms.search('vita', { fields: ['title'], boost: { text: 2 } })
expect(results).toHaveLength(1)
expect(results[0].id).toEqual(3)
})
it('combines results with OR by default', () => {
const results = ms.search('cammin como sottomarino')
expect(results.length).toEqual(2)
expect(results.map(({ id }) => id)).toEqual([2, 1])
})
it('combines results with AND if combineWith is AND', () => {
const results = ms.search('vita cammin', { combineWith: 'AND' })
expect(results.length).toEqual(1)
expect(results.map(({ id }) => id)).toEqual([1])
expect(ms.search('vita sottomarino', { combineWith: 'AND' }).length).toEqual(0)
expect(ms.search('sottomarino vita', { combineWith: 'AND' }).length).toEqual(0)
})
it('combines results with AND_NOT if combineWith is AND_NOT', () => {
const results = ms.search('vita cammin', { combineWith: 'AND_NOT' })
expect(results.length).toEqual(1)
expect(results.map(({ id }) => id)).toEqual([3])
expect(ms.search('vita sottomarino', { combineWith: 'AND_NOT' }).length).toEqual(2)
expect(ms.search('sottomarino vita', { combineWith: 'AND_NOT' }).length).toEqual(0)
})
it('raises an error if combineWith is not a valid operator', () => {
expect(() => {
ms.search('vita cammin', { combineWith: 'XOR' })
}).toThrowError('Invalid combination operator: XOR')
})
it('returns empty results for empty search', () => {
expect(ms.search('')).toEqual([])
expect(ms.search('', { combineWith: 'OR' })).toEqual([])
expect(ms.search('', { combineWith: 'AND' })).toEqual([])
expect(ms.search('', { combineWith: 'AND_NOT' })).toEqual([])
})
it('executes fuzzy search', () => {
const results = ms.search('camin memory', { fuzzy: 2 })
expect(results.length).toEqual(2)
expect(results.map(({ id }) => id)).toEqual([1, 3])
})
it('executes fuzzy search with maximum fuzziness', () => {
const results = ms.search('comedia', { fuzzy: 0.6, maxFuzzy: 3 })
expect(results.length).toEqual(1)
expect(results.map(({ id }) => id)).toEqual([1])
})
it('executes prefix search', () => {
const results = ms.search('que', { prefix: true })
expect(results.length).toEqual(2)
expect(results.map(({ id }) => id)).toEqual([2, 3])
})
it('combines prefix search and fuzzy search', () => {
const results = ms.search('cammino quel', { fuzzy: 0.25, prefix: true })
expect(results.length).toEqual(3)
expect(results.map(({ id }) => id)).toEqual([2, 1, 3])
})
it('assigns weights to prefix matches and fuzzy matches', () => {
const exact = ms.search('cammino quel')
expect(exact.map(({ id }) => id)).toEqual([2])
const prefixLast = ms.search('cammino quel', { fuzzy: true, prefix: true, weights: { prefix: 0.1 } })
expect(prefixLast.map(({ id }) => id)).toEqual([2, 1, 3])
expect(prefixLast[0].score).toEqual(exact[0].score)
const fuzzyLast = ms.search('cammino quel', { fuzzy: true, prefix: true, weights: { fuzzy: 0.1 } })
expect(fuzzyLast.map(({ id }) => id)).toEqual([2, 3, 1])
expect(fuzzyLast[0].score).toEqual(exact[0].score)
})
it('assigns weight lower than exact match to a match that is both a prefix and fuzzy match', () => {
const ms = new MiniSearch({ fields: ['text'] })
const documents = [
{ id: 1, text: 'Poi che la gente poverella crebbe' },
{ id: 2, text: 'Deus, venerunt gentes' }
]
ms.addAll(documents)
expect(ms.documentCount).toEqual(documents.length)
const exact = ms.search('gente')
const combined = ms.search('gente', { fuzzy: 0.2, prefix: true })
expect(combined.map(({ id }) => id)).toEqual([1, 2])
expect(combined[0].score).toEqual(exact[0].score)
expect(combined[1].match.gentes).toEqual(['text'])
})
it('accepts a function to compute fuzzy and prefix options from term', () => {
const fuzzy = jest.fn(term => term.length > 4 ? 2 : false)
const prefix = jest.fn(term => term.length > 4)
const results = ms.search('quel comedia', { fuzzy, prefix })
expect(fuzzy).toHaveBeenNthCalledWith(1, 'quel', 0, ['quel', 'comedia'])
expect(fuzzy).toHaveBeenNthCalledWith(2, 'comedia', 1, ['quel', 'comedia'])
expect(prefix).toHaveBeenNthCalledWith(1, 'quel', 0, ['quel', 'comedia'])
expect(prefix).toHaveBeenNthCalledWith(2, 'comedia', 1, ['quel', 'comedia'])
expect(results.length).toEqual(2)
expect(results.map(({ id }) => id)).toEqual([2, 1])
})
it('boosts documents by calling boostDocument with document ID, term, and stored fields', () => {
const query = 'divina commedia nova'
const boostFactor = 1.234
const boostDocument = jest.fn((id, term) => boostFactor)
const resultsWithoutBoost = ms.search(query)
const results = ms.search(query, { boostDocument })
expect(boostDocument).toHaveBeenCalledWith(1, 'divina', {})
expect(boostDocument).toHaveBeenCalledWith(1, 'commedia', {})
expect(boostDocument).toHaveBeenCalledWith(3, 'nova', { category: 'poetry' })
expect(results[0].score).toBeCloseTo(resultsWithoutBoost[0].score * boostFactor)
})
it('boosts terms by calling boostTerm with normalized query term, term index in the query, and array of all query terms', () => {
const query = 'Commedia nova'
const boostFactors = {
commedia: 1.5,
nova: 1.1
}
const boostTerm = jest.fn((term, i, terms) => boostFactors[term])
const resultsWithoutBoost = ms.search(query)
const results = ms.search(query, { boostTerm })
expect(boostTerm).toHaveBeenCalledWith('commedia', 0, ['commedia', 'nova'])
expect(boostTerm).toHaveBeenCalledWith('nova', 1, ['commedia', 'nova'])
expect(results[0].score).toBeCloseTo(resultsWithoutBoost[0].score * boostFactors.commedia)
expect(results[1].score).toBeCloseTo(resultsWithoutBoost[1].score * boostFactors.nova)
})
it('skips document if boostDocument returns a falsy value', () => {
const query = 'vita'
const boostDocument = jest.fn((id, term) => id === 3 ? null : 1)
const resultsWithoutBoost = ms.search(query)
const results = ms.search(query, { boostDocument })
expect(resultsWithoutBoost.map(({ id }) => id)).toContain(3)
expect(results.map(({ id }) => id)).not.toContain(3)
})
it('uses a specific search-time tokenizer if specified', () => {
const tokenize = (string) => string.split('X')
const results = ms.search('divinaXcommedia', { tokenize })
expect(results.length).toBeGreaterThan(0)
expect(results.map(({ id }) => id).sort()).toEqual([1])
})
it('uses a specific search-time term processing function if specified', () => {
const processTerm = (string) => string.replace(/1/g, 'i').replace(/4/g, 'a').toLowerCase()
const results = ms.search('d1v1n4', { processTerm })
expect(results.length).toBeGreaterThan(0)
expect(results.map(({ id }) => id).sort()).toEqual([1])
})
it('rejects falsy terms', () => {
const processTerm = (term) => term === 'quel' ? null : term
const results = ms.search('quel commedia', { processTerm })
expect(results.length).toBeGreaterThan(0)
expect(results.map(({ id }) => id).sort()).toEqual([1])
})
it('allows processTerm to expand a single term into several terms', () => {
const processTerm = (string) => string === 'divinacommedia' ? ['divina', 'commedia'] : string
const results = ms.search('divinacommedia', { processTerm })
expect(results.length).toBeGreaterThan(0)
expect(results.map(({ id }) => id).sort()).toEqual([1])
})
it('allows custom filtering of results on the basis of stored fields', () => {
const results = ms.search('del', {
filter: ({ category }) => category === 'poetry'
})
expect(results.length).toBe(1)
expect(results.every(({ category }) => category === 'poetry')).toBe(true)
})
it('allows to define a default filter upon instantiation', () => {
const ms = new MiniSearch({
fields: ['title', 'text'],
storeFields: ['category'],
searchOptions: {
filter: ({ category }) => category === 'poetry'
}
})
ms.addAll(documents)
const results = ms.search('del')
expect(results.length).toBe(1)
expect(results.every(({ category }) => category === 'poetry')).toBe(true)
})
it('allows customizing BM25+ parameters', () => {
const ms = new MiniSearch({ fields: ['text'], searchOptions: { bm25: { k: 1.2, b: 0.7, d: 0.5 } } })
const documents = [
{ id: 1, text: 'something very very very cool' },
{ id: 2, text: 'something cool' }
]
ms.addAll(documents)
expect(ms.search('very')[0].score).toBeGreaterThan(ms.search('very', { bm25: { k: 1, b: 0.7, d: 0.5 } })[0].score)
expect(ms.search('something')[1].score).toBeGreaterThan(ms.search('something', { bm25: { k: 1.2, b: 1, d: 0.5 } })[1].score)
expect(ms.search('something')[1].score).toBeGreaterThan(ms.search('something', { bm25: { k: 1.2, b: 0.7, d: 0.1 } })[1].score)
// Defaults are taken from the searchOptions passed to the constructor
const other = new MiniSearch({ fields: ['text'], searchOptions: { bm25: { k: 1, b: 0.7, d: 0.5 } } })
other.addAll(documents)
expect(other.search('very')).toEqual(ms.search('very', { bm25: { k: 1, b: 0.7, d: 0.5 } }))
})
it('allows searching for the special value `MiniSearch.wildcard` to match all terms', () => {
const ms = new MiniSearch({ fields: ['text'], storeFields: ['cool'] })
const documents = [
{ id: 1, text: 'something cool', cool: true },
{ id: 2, text: 'something else', cool: false },
{ id: 3, text: null, cool: true }
]
ms.addAll(documents)
// The string "*" is just a normal term
expect(ms.search('*')).toEqual([])
// The empty string is just a normal query
expect(ms.search('')).toEqual([])
// The value `MiniSearch.wildcard` matches all terms
expect(ms.search(MiniSearch.wildcard).map(({ id }) => id)).toEqual([1, 2, 3])
// Filters and document boosting are still applied
const results = ms.search(MiniSearch.wildcard, {
filter: (x) => x.cool,
boostDocument: (id) => id
})
expect(results.map(({ id }) => id)).toEqual([3, 1])
})
describe('when passing a query tree', () => {
it('searches according to the given combination', () => {
const results = ms.search({
combineWith: 'OR',
queries: [
{
combineWith: 'AND',
queries: ['vita', 'cammin']
},
'como sottomarino',
{
combineWith: 'AND',
queries: ['nova', 'pappagallo']
}
]
})
expect(results.length).toEqual(2)
expect(results.map(({ id }) => id)).toEqual([1, 2])
})
it('allows combining wildcard queries', () => {
const results = ms.search({
combineWith: 'AND_NOT',
queries: [
MiniSearch.wildcard,
'vita'
]
})
expect(results.length).toEqual(1)
expect(results.map(({ id }) => id)).toEqual([2])
})
it('uses the given options for each subquery, cascading them properly', () => {
const results = ms.search({
combineWith: 'OR',
fuzzy: true,
queries: [
{
prefix: true,
fields: ['title'],
queries: ['vit']
},
{
combineWith: 'AND',
queries: ['bago', 'coomo']
}
],
weights: {
fuzzy: 0.2,
prefix: 0.75
}
})
expect(results.length).toEqual(2)
expect(results.map(({ id }) => id)).toEqual([3, 2])
})
it('uses the search options in the second argument as default', () => {
let reference = ms.search({
queries: [
{ fields: ['text'], queries: ['vita'] },
{ fields: ['title'], queries: ['promessi'] }
]
})
// Boost field
let results = ms.search({
queries: [
{ fields: ['text'], queries: ['vita'] },
{ fields: ['title'], queries: ['promessi'] }
]
}, { boost: { title: 2 } })
expect(results.length).toEqual(reference.length)
expect(results.find((r) => r.id === 2).score)
.toBeGreaterThan(reference.find((r) => r.id === 2).score)
// Combine with AND
results = ms.search({
queries: [
{ fields: ['text'], queries: ['vita'] },
{ fields: ['title'], queries: ['promessi'] }