minisearch
Version:
Tiny but powerful full-text search engine for browser and Node
596 lines (526 loc) • 23.6 kB
JavaScript
/* eslint-env jest */
import MiniSearch from './MiniSearch.js'
describe('MiniSearch', () => {
describe('constructor', () => {
it('throws error if fields option is missing', () => {
expect(() => new MiniSearch()).toThrow('MiniSearch: option "fields" must be provided')
})
it('initializes the attributes', () => {
const options = { fields: ['title', 'text'] }
const ms = new MiniSearch(options)
expect(ms._documentCount).toEqual(0)
expect(ms._fieldIds).toEqual({ title: 0, text: 1 })
expect(ms._documentIds).toEqual({})
expect(ms._fieldLength).toEqual({})
expect(ms._averageFieldLength).toEqual({})
expect(ms._options).toMatchObject(options)
})
})
describe('add', () => {
it('adds the document to the index', () => {
const ms = new MiniSearch({ fields: ['text'] })
ms.add({ id: 1, text: 'Nel mezzo del cammin di nostra vita' })
expect(ms.documentCount).toEqual(1)
})
it('does not throw error if a field is missing', () => {
const ms = new MiniSearch({ fields: ['title', 'text'] })
ms.add({ id: 1, text: 'Nel mezzo del cammin di nostra vita' })
expect(ms.documentCount).toEqual(1)
})
it('throws error if the document does not have the ID field', () => {
const ms = new MiniSearch({ idField: 'foo', fields: ['title', 'text'] })
expect(() => {
ms.add({ text: 'I do not have an ID' })
}).toThrowError('MiniSearch: document does not have ID field "foo"')
})
it('rejects falsy terms', () => {
const processTerm = term => term === 'foo' ? null : term
const ms = new MiniSearch({ fields: ['title', 'text'], processTerm })
expect(() => {
ms.add({ id: 123, text: 'foo bar' })
}).not.toThrowError()
})
it('passes document and field name to the field extractor', () => {
const extractField = jest.fn((document, fieldName) => {
const value = fieldName.split('.').reduce((doc, key) => doc && doc[key], document)
return Array.isArray(value) ? value.join(' ') : value
})
const tokenize = jest.fn(string => string.split(/\W+/))
const ms = new MiniSearch({ fields: ['title', 'tags', 'author.name'], extractField, tokenize })
const document = {
id: 1,
title: 'Divina Commedia',
tags: ['divina', 'commedia', 'dante', 'alighieri'],
author: { name: 'Dante Alighieri' }
}
ms.add(document)
expect(extractField).toHaveBeenCalledWith(document, 'title')
expect(extractField).toHaveBeenCalledWith(document, 'tags')
expect(extractField).toHaveBeenCalledWith(document, 'author.name')
expect(tokenize).toHaveBeenCalledWith(document.title, 'title')
expect(tokenize).toHaveBeenCalledWith(document.tags.join(' '), 'tags')
expect(tokenize).toHaveBeenCalledWith(document.author.name, 'author.name')
})
it('passes field value and name to tokenizer', () => {
const tokenize = jest.fn(string => string.split(/\W+/))
const ms = new MiniSearch({ fields: ['text', 'title'], tokenize })
const document = { id: 1, title: 'Divina Commedia', text: 'Nel mezzo del cammin di nostra vita' }
ms.add(document)
expect(tokenize).toHaveBeenCalledWith(document.text, 'text')
expect(tokenize).toHaveBeenCalledWith(document.title, 'title')
})
it('passes field value and name to term processor', () => {
const processTerm = jest.fn(term => term.toLowerCase())
const ms = new MiniSearch({ fields: ['text', 'title'], processTerm })
const document = { id: 1, title: 'Divina Commedia', text: 'Nel mezzo del cammin di nostra vita' }
ms.add(document)
document.text.split(/\W+/).forEach(term => {
expect(processTerm).toHaveBeenCalledWith(term, 'text')
})
document.title.split(/\W+/).forEach(term => {
expect(processTerm).toHaveBeenCalledWith(term, 'title')
})
})
})
describe('remove', () => {
const documents = [
{ id: 1, title: 'Divina Commedia', text: 'Nel mezzo del cammin di nostra vita ... cammin' },
{ id: 2, title: 'I Promessi Sposi', text: 'Quel ramo del lago di Como' },
{ id: 3, title: 'Vita Nova', text: 'In quella parte del libro della mia memoria ... cammin' }
]
let ms, _warn
beforeEach(() => {
ms = new MiniSearch({ fields: ['title', 'text'] })
ms.addAll(documents)
_warn = console.warn
console.warn = jest.fn()
})
afterEach(() => {
console.warn = _warn
})
it('removes the document from the index', () => {
expect(ms.documentCount).toEqual(3)
ms.remove(documents[0])
expect(ms.documentCount).toEqual(2)
expect(ms.search('commedia').length).toEqual(0)
expect(ms.search('vita').map(({ id }) => id)).toEqual([3])
expect(console.warn).not.toHaveBeenCalled()
})
it('does not remove terms from other documents', () => {
ms.remove(documents[0])
expect(ms.search('cammin').length).toEqual(1)
})
it('removes re-added document', () => {
ms.remove(documents[0])
ms.add(documents[0])
ms.remove(documents[0])
expect(console.warn).not.toHaveBeenCalled()
})
it('cleans up the index', () => {
const originalIdsLength = Object.keys(ms._documentIds).length
ms.remove(documents[0])
expect(ms._index.has('commedia')).toEqual(false)
expect(Object.keys(ms._documentIds).length).toEqual(originalIdsLength - 1)
expect(Object.keys(ms._index.get('vita'))).toEqual([ms._fieldIds.title.toString()])
})
it('throws error if the document does not have the ID field', () => {
const ms = new MiniSearch({ idField: 'foo', fields: ['title', 'text'] })
expect(() => {
ms.remove({ text: 'I do not have an ID' })
}).toThrowError('MiniSearch: document does not have ID field "foo"')
})
it('does not reassign IDs', () => {
ms.remove(documents[0])
ms.add(documents[0])
expect(ms.search('commedia').map(result => result.id)).toEqual([documents[0].id])
expect(ms.search('nova').map(result => result.id)).toEqual([documents[documents.length - 1].id])
})
it('rejects falsy terms', () => {
const processTerm = term => term === 'foo' ? null : term
const ms = new MiniSearch({ fields: ['title', 'text'], processTerm })
const document = { id: 123, title: 'foo bar' }
ms.add(document)
expect(() => {
ms.remove(document)
}).not.toThrowError()
})
describe('when the document was not in the index', () => {
it('throws an error', () => {
expect(() => ms.remove({ id: 99 }))
.toThrow('MiniSearch: cannot remove document with ID 99: it is not in the index')
})
})
describe('when the document has changed', () => {
it('warns of possible index corruption', () => {
expect(() => ms.remove({ id: 1, title: 'Divina Commedia cammin', text: 'something has changed' }))
.not.toThrow()
expect(console.warn).toHaveBeenCalledTimes(4)
;[
['cammin', 'title'],
['something', 'text'],
['has', 'text'],
['changed', 'text']
].forEach(([term, field], i) => {
expect(console.warn).toHaveBeenNthCalledWith(i + 1, `MiniSearch: document with ID 1 has changed before removal: term "${term}" was not present in field "${field}". Removing a document after it has changed can corrupt the index!`)
})
})
it('does not throw error if console.warn is undefined', () => {
console.warn = undefined
expect(() => ms.remove({ id: 1, title: 'Divina Commedia cammin', text: 'something has changed' }))
.not.toThrow()
})
})
})
describe('addAll', () => {
it('adds all the documents to the index', () => {
const ms = new MiniSearch({ fields: ['text'] })
const documents = [
{ id: 1, text: 'Nel mezzo del cammin di nostra vita' },
{ id: 2, text: 'Mi ritrovai per una selva oscura' }
]
ms.addAll(documents)
expect(ms.documentCount).toEqual(documents.length)
})
})
describe('addAllAsync', () => {
it('adds all the documents to the index', () => {
const ms = new MiniSearch({ fields: ['text'] })
const documents = [
{ id: 1, text: 'Nel mezzo' },
{ id: 2, text: 'del cammin' },
{ id: 3, text: 'di nostra vita' },
{ id: 4, text: 'Mi ritrovai' },
{ id: 5, text: 'per una' },
{ id: 6, text: 'selva oscura' },
{ id: 7, text: 'ché la' },
{ id: 8, text: 'diritta via' },
{ id: 9, text: 'era smarrita' },
{ id: 10, text: 'ahi quanto' },
{ id: 11, text: 'a dir' },
{ id: 12, text: 'qual era' },
{ id: 13, text: 'è cosa dura' }
]
return ms.addAllAsync(documents).then(() => {
expect(ms.documentCount).toEqual(documents.length)
})
})
it('accepts a chunkSize option', () => {
const ms = new MiniSearch({ fields: ['text'] })
const documents = [
{ id: 1, text: 'Nel mezzo' },
{ id: 2, text: 'del cammin' },
{ id: 3, text: 'di nostra vita' },
{ id: 4, text: 'Mi ritrovai' },
{ id: 5, text: 'per una' },
{ id: 6, text: 'selva oscura' },
{ id: 7, text: 'ché la' },
{ id: 8, text: 'diritta via' },
{ id: 9, text: 'era smarrita' },
{ id: 10, text: 'ahi quanto' },
{ id: 11, text: 'a dir' },
{ id: 12, text: 'qual era' },
{ id: 13, text: 'è cosa dura' }
]
return ms.addAllAsync(documents, { chunkSize: 3 }).then(() => {
expect(ms.documentCount).toEqual(documents.length)
})
})
})
describe('search', () => {
const documents = [
{ id: 1, title: 'Divina Commedia', text: 'Nel mezzo del cammin di nostra vita' },
{ id: 2, title: 'I Promessi Sposi', text: 'Quel ramo del lago di Como' },
{ id: 3, title: 'Vita Nova', text: 'In quella parte del libro della mia memoria' }
]
const ms = new MiniSearch({ fields: ['title', 'text'] })
ms.addAll(documents)
it('returns scored results', () => {
const results = ms.search('vita')
expect(results.length).toBeGreaterThan(0)
expect(results.map(({ id }) => id).sort()).toEqual([1, 3])
expect(results[0].score).toBeGreaterThanOrEqual(results[1].score)
})
it('returns empty array if there is no match', () => {
const results = ms.search('paguro')
expect(results).toEqual([])
})
it('returns empty array for empty search', () => {
const results = ms.search('')
expect(results).toEqual([])
})
it('returns empty results for terms that are not in the index', () => {
let results
expect(() => {
results = ms.search('sottomarino aeroplano')
}).not.toThrowError()
expect(results.length).toEqual(0)
})
it('boosts fields', () => {
const results = ms.search('vita', { boost: { title: 2 } })
expect(results.map(({ id }) => id)).toEqual([3, 1])
expect(results[0].score).toBeGreaterThan(results[1].score)
})
it('searches in the given fields', () => {
const results = ms.search('vita', { fields: ['title'] })
expect(results).toHaveLength(1)
expect(results[0].id).toEqual(3)
})
it('combines results with OR by default', () => {
const results = ms.search('cammin como sottomarino')
expect(results.length).toEqual(2)
expect(results.map(({ id }) => id)).toEqual([2, 1])
})
it('combines results with AND if combineWith is AND', () => {
const results = ms.search('vita cammin', { combineWith: 'AND' })
expect(results.length).toEqual(1)
expect(results.map(({ id }) => id)).toEqual([1])
expect(ms.search('vita sottomarino', { combineWith: 'AND' }).length).toEqual(0)
expect(ms.search('sottomarino vita', { combineWith: 'AND' }).length).toEqual(0)
})
it('executes fuzzy search', () => {
const results = ms.search('camin memory', { fuzzy: 2 })
expect(results.length).toEqual(2)
expect(results.map(({ id }) => id)).toEqual([1, 3])
})
it('executes prefix search', () => {
const results = ms.search('que', { prefix: true })
expect(results.length).toEqual(2)
expect(results.map(({ id }) => id)).toEqual([2, 3])
})
it('combines prefix search and fuzzy search', () => {
const results = ms.search('cammino quel', { fuzzy: 0.25, prefix: true })
expect(results.length).toEqual(3)
expect(results.map(({ id }) => id)).toEqual([2, 1, 3])
})
it('accepts a function to compute fuzzy and prefix options from term', () => {
const fuzzy = jest.fn(term => term.length > 4 ? 2 : false)
const prefix = jest.fn(term => term.length > 4)
const results = ms.search('quel comedia', { fuzzy, prefix })
expect(fuzzy).toHaveBeenNthCalledWith(1, 'quel', 0, ['quel', 'comedia'])
expect(fuzzy).toHaveBeenNthCalledWith(2, 'comedia', 1, ['quel', 'comedia'])
expect(prefix).toHaveBeenNthCalledWith(1, 'quel', 0, ['quel', 'comedia'])
expect(prefix).toHaveBeenNthCalledWith(2, 'comedia', 1, ['quel', 'comedia'])
expect(results.length).toEqual(2)
expect(results.map(({ id }) => id)).toEqual([2, 1])
})
it('boosts documents by calling boostDocument with document ID and term', () => {
const query = 'divina commedia'
const boostFactor = 1.234
const boostDocument = jest.fn((id, term) => boostFactor)
const resultsWithoutBoost = ms.search(query)
const results = ms.search(query, { boostDocument })
expect(boostDocument).toHaveBeenCalledWith(1, 'divina')
expect(boostDocument).toHaveBeenCalledWith(1, 'commedia')
expect(results[0].score).toEqual(resultsWithoutBoost[0].score * boostFactor)
})
it('skips document if boostDocument returns a falsy value', () => {
const query = 'vita'
const boostDocument = jest.fn((id, term) => id === 3 ? null : 1)
const resultsWithoutBoost = ms.search(query)
const results = ms.search(query, { boostDocument })
expect(resultsWithoutBoost.map(({ id }) => id)).toContain(3)
expect(results.map(({ id }) => id)).not.toContain(3)
})
it('uses a specific search-time tokenizer if specified', () => {
const tokenize = (string) => string.split('X')
const results = ms.search('divinaXcommedia', { tokenize })
expect(results.length).toBeGreaterThan(0)
expect(results.map(({ id }) => id).sort()).toEqual([1])
})
it('uses a specific search-time term processing function if specified', () => {
const processTerm = (string) => string.replace(/1/g, 'i').replace(/4/g, 'a').toLowerCase()
const results = ms.search('d1v1n4', { processTerm })
expect(results.length).toBeGreaterThan(0)
expect(results.map(({ id }) => id).sort()).toEqual([1])
})
it('rejects falsy terms', () => {
const processTerm = (term) => term === 'quel' ? null : term
const results = ms.search('quel commedia', { processTerm })
expect(results.length).toBeGreaterThan(0)
expect(results.map(({ id }) => id).sort()).toEqual([1])
})
describe('match data', () => {
const documents = [
{ id: 1, title: 'Divina Commedia', text: 'Nel mezzo del cammin di nostra vita' },
{ id: 2, title: 'I Promessi Sposi', text: 'Quel ramo del lago di Como' },
{ id: 3, title: 'Vita Nova', text: 'In quella parte del libro della mia memoria ... vita' }
]
const ms = new MiniSearch({ fields: ['title', 'text'] })
ms.addAll(documents)
it('reports information about matched terms and fields', () => {
const results = ms.search('vita nova')
expect(results.length).toBeGreaterThan(0)
expect(results.map(({ match }) => match)).toEqual([
{ vita: ['title', 'text'], nova: ['title'] },
{ vita: ['text'] }
])
expect(results.map(({ terms }) => terms)).toEqual([
['vita', 'nova'],
['vita']
])
})
it('reports correct info when combining terms with AND', () => {
const results = ms.search('vita nova', { combineWith: 'AND' })
expect(results.map(({ match }) => match)).toEqual([
{ vita: ['title', 'text'], nova: ['title'] }
])
expect(results.map(({ terms }) => terms)).toEqual([
['vita', 'nova']
])
})
it('reports correct info for fuzzy and prefix queries', () => {
const results = ms.search('vi nuova', { fuzzy: 0.2, prefix: true })
expect(results.map(({ match }) => match)).toEqual([
{ vita: ['title', 'text'], nova: ['title'] },
{ vita: ['text'] }
])
expect(results.map(({ terms }) => terms)).toEqual([
['vita', 'nova'],
['vita']
])
})
it('passes only the query to tokenize', () => {
const tokenize = jest.fn(string => string.split(/\W+/))
const ms = new MiniSearch({ fields: ['text', 'title'], searchOptions: { tokenize } })
const query = 'some search query'
ms.search(query)
expect(tokenize).toHaveBeenCalledWith(query)
})
it('passes only the term to processTerm', () => {
const processTerm = jest.fn(term => term.toLowerCase())
const ms = new MiniSearch({ fields: ['text', 'title'], searchOptions: { processTerm } })
const query = 'some search query'
ms.search(query)
query.split(/\W+/).forEach(term => {
expect(processTerm).toHaveBeenCalledWith(term)
})
})
})
})
describe('default tokenization', () => {
it('splits on non-alphanumeric taking diacritics into account', () => {
const documents = [
{
id: 1,
text:
`Se la vita è sventura,
perché da noi si dura?
Intatta luna, tale
è lo stato mortale.
Ma tu mortal non sei,
e forse del mio dir poco ti cale`
},
{
id: 2,
text: `The estimates range from roughly 1 in 100 to 1 in 100,000. The higher figures come from the working engineers, and the very low figures from management. What are the causes and consequences of this lack of agreement? Since 1 part in 100,000 would imply that one could put a Shuttle up each day for 300 years expecting to lose only one, we could properly ask "What is the cause of management's fantastic faith in the machinery?"`
}
]
const ms = new MiniSearch({ fields: ['text'] })
ms.addAll(documents)
expect(ms.search('perché').length).toBeGreaterThan(0)
expect(ms.search('perch').length).toEqual(0)
expect(ms.search('luna').length).toBeGreaterThan(0)
expect(ms.search('300').length).toBeGreaterThan(0)
expect(ms.search('machinery').length).toBeGreaterThan(0)
})
it('supports non-latin alphabets', () => {
const documents = [
{ id: 1, title: 'София София' },
{ id: 2, title: 'アネモネ' },
{ id: 3, title: '«τέχνη»' },
{ id: 4, title: 'سمت الرأس' },
{ id: 5, title: '123 45' }
]
const ms = new MiniSearch({ fields: ['title'] })
ms.addAll(documents)
expect(ms.search('софия').map(({ id }) => id)).toEqual([1])
expect(ms.search('アネモネ').map(({ id }) => id)).toEqual([2])
expect(ms.search('τέχνη').map(({ id }) => id)).toEqual([3])
expect(ms.search('الرأس').map(({ id }) => id)).toEqual([4])
expect(ms.search('123').map(({ id }) => id)).toEqual([5])
})
})
describe('autoSuggest', () => {
const documents = [
{ id: 1, title: 'Divina Commedia', text: 'Nel mezzo del cammin di nostra vita' },
{ id: 2, title: 'I Promessi Sposi', text: 'Quel ramo del lago di Como' },
{ id: 3, title: 'Vita Nova', text: 'In quella parte del libro della mia memoria' }
]
const ms = new MiniSearch({ fields: ['title', 'text'] })
ms.addAll(documents)
it('returns scored suggestions', () => {
const results = ms.autoSuggest('com')
expect(results.length).toBeGreaterThan(0)
expect(results.map(({ suggestion }) => suggestion)).toEqual(['como', 'commedia'])
expect(results[0].score).toBeGreaterThan(results[1].score)
})
it('returns empty array if there is no match', () => {
const results = ms.autoSuggest('paguro')
expect(results).toEqual([])
})
it('returns empty array for empty search', () => {
const results = ms.autoSuggest('')
expect(results).toEqual([])
})
it('returns scored suggestions for multi-word queries', () => {
const results = ms.autoSuggest('vita no')
expect(results.length).toBeGreaterThan(0)
expect(results.map(({ suggestion }) => suggestion)).toEqual(['vita nova', 'vita nostra'])
expect(results[0].score).toBeGreaterThan(results[1].score)
})
it('respects the order of the terms in the query', () => {
const results = ms.autoSuggest('nostra vi')
expect(results.map(({ suggestion }) => suggestion)).toEqual(['nostra vita', 'vita'])
})
it('returns empty suggestions for terms that are not in the index', () => {
let results
expect(() => {
results = ms.autoSuggest('sottomarino aeroplano')
}).not.toThrowError()
expect(results.length).toEqual(0)
})
it('does not duplicate suggested terms', () => {
const results = ms.autoSuggest('vita', { fuzzy: true, prefix: true })
expect(results[0].suggestion).toEqual('vita')
expect(results[0].terms).toEqual(['vita'])
})
})
describe('loadJSON', () => {
const documents = [
{ id: 1, title: 'Divina Commedia', text: 'Nel mezzo del cammin di nostra vita' },
{ id: 2, title: 'I Promessi Sposi', text: 'Quel ramo del lago di Como' },
{ id: 3, title: 'Vita Nova', text: 'In quella parte del libro della mia memoria' }
]
it('loads a JSON-serialized search index', () => {
const options = { fields: ['title', 'text'] }
const ms = new MiniSearch(options)
ms.addAll(documents)
const json = JSON.stringify(ms)
const deserialized = MiniSearch.loadJSON(json, options)
expect(ms.search('vita')).toEqual(deserialized.search('vita'))
expect(ms.toJSON()).toEqual(deserialized.toJSON())
})
it('raises an error if called without options', () => {
const options = { fields: ['title', 'text'] }
const ms = new MiniSearch(options)
ms.addAll(documents)
const json = JSON.stringify(ms)
expect(() => {
MiniSearch.loadJSON(json)
}).toThrowError('MiniSearch: loadJSON should be given the same options used when serializing the index')
})
})
describe('getDefault', () => {
it('returns the default value of the given option', () => {
expect(MiniSearch.getDefault('idField')).toEqual('id')
expect(MiniSearch.getDefault('extractField')).toBeInstanceOf(Function)
expect(MiniSearch.getDefault('tokenize')).toBeInstanceOf(Function)
expect(MiniSearch.getDefault('processTerm')).toBeInstanceOf(Function)
expect(MiniSearch.getDefault('searchOptions')).toBe(undefined)
expect(MiniSearch.getDefault('fields')).toBe(undefined)
})
it('throws an error if there is no option with the given name', () => {
expect(() => { MiniSearch.getDefault('foo') }).toThrowError('MiniSearch: unknown option "foo"')
})
})
})