UNPKG

@nozbe/microfuzz

Version:

A tiny, simple, fast fuzzy search library

github.com/Nozbe/microfuzz

Nozbe/microfuzz

305 lines (298 loc) • 11.1 kB

JavaScript

import createFuzzySearch, { normalizeText } from '../index' import { experimentalSmartFuzzyMatch } from '../impl' describe('createFuzzySearch', () => { const matches = (text, query, expectedScore, expectedIndices = null) => { // console.log(`${text} ${query}`) const results = createFuzzySearch([text])(query) expect(results.length).toBe(1) const [result] = results expect(result.item).toBe(text) if (expectedScore != null) { expect(result.score).toBeCloseTo(expectedScore) } if (expectedIndices) { expect(result.matches.length).toBe(1) expect(result.matches[0]).toEqual(expectedIndices) } } it(`can match by: exact match`, () => { matches('foo', 'foo', 0, [[0, 2]]) matches('ABC', 'ABC', 0) matches('ąść', 'ąść', 0) matches('📚', '📚', 0) matches('123¡™£§', '123¡™£§', 0) matches('żabki', 'żabki', 0) matches('ząbki', 'ząbki', 0) matches('русский язык', 'русский язык', 0) matches('汉语', '汉语', 0) matches('日本語', '日本語', 0) matches('한국어', '한국어', 0) matches('ภาษาไทย', 'ภาษาไทย', 0) matches(' he ', ' he ', 0) matches(' he\n\t', ' he\n\t', 0) // hard space }) it(`can match by: full match`, () => { matches('Foo', 'foo', 0.1) matches('FOO', 'foo', 0.1) matches('foo', 'Foo', 0.1) matches('foo', 'FOO', 0.1) matches('foo', 'foo ', 0.1) matches(' foo bar', 'foo bar', 0.1) // eslint-disable-next-line // matches('foo bar', 'foo bar', 0.1) // hard space matches('Żabki', 'żabki', 0.1, [[0, 4]]) matches('Żabki', 'zabki', 0.1) matches('Ząbki', 'zabki', 0.1) matches('ZABKI', 'żąbki', 0.1) matches('Szczegół', 'szczegol', 0.1) matches('Язык', 'язык', 0.1, [[0, 3]]) // Check for regression - previously highlight would be off by whitespace matches('foo ', 'foo', 0.1, [[0, 2]]) }) it(`can match by: "Starts with" match`, () => { // TODO: startsWith with exact diacritics should match more strongly (e.g. ża -> Żabka before Zabawa), but case-insensitively // (we want `to` to match Tom more than Couto) matches('Tomasz Kapelak', 'to', 0.5, [[0, 1]]) matches('Żabka - oferta', 'Żab', 0.5, [[0, 2]]) matches('Żabka - oferta', 'Zab', 0.5) matches('Żabka - oferta', 'zab', 0.5) matches('Szczegółowe', 'szcz', 0.5, [[0, 3]]) matches('Русский язык', 'рус', 0.5, [[0, 2]]) matches('汉语', '汉', 0.5, [[0, 0]]) matches('日本語', '日', 0.5, [[0, 0]]) // TODO: Fix Hangul highlighting // matches('한국어', '한', 0.5, [[0, 0]]) matches('ภาษาไทย', 'ภ', 0.5, [[0, 0]]) // Check for regression - previously highlight would be 1 longer matches('There is no icon', 'the ', 0.5, [[0, 2]]) }) it(`can match by: contains query (at word boundary) exactly`, () => { matches('[N4] Marketing', 'Market', 0.9, [[5, 10]]) matches('Wypad do Żabki', 'Żabk', 0.9) matches('русский язык', 'язык', 0.9, [[8, 11]]) }) it(`can match by: contains query (at word boundary)`, () => { matches('[N4] Marketing', 'market', 1, [[5, 10]]) matches('Wypad do Żabki', 'zabki', 1) matches('Myjcie ząbki!!', 'zabki', 1, [[7, 11]]) matches('Русский Язык', 'язык', 1, [[8, 11]]) }) it(`can match by: contains query (at any position)`, () => { matches('Marco Couto', 'To', 2, [[9, 10]]) matches('汉语', '语', 2, [[1, 1]]) matches('日本語', '本', 2, [[1, 1]]) // TODO: Fix Hangul highlighting // matches('한국어', '국', 2, [[1, 1]]) matches('ภาษาไทย', 'า', 2, [[1, 1]]) }) it(`can match by words (in some order)`, () => { matches('Setting to disable fuzzy search', 'fuzzy setting', 1.9, [ [0, 6], [19, 23], ]) matches('Setting to disable fuzzy search', 'disable setting fuzzy search', 2.3, [ [0, 6], [11, 17], [19, 23], [25, 30], ]) // matches by words before matching by letters matches('Cloak Duck, test clock', 'clock test', 1.9, [ [12, 15], [17, 21], ]) }) it(`can match by: contains letters awww yisss fuzzzzz`, () => { // score(of) const s = (score) => 2 + score // full word const w = 0.2 // beginning of word const b = 0.4 // middle of word const m = 0.8 // middle of word (1 or 2 chars) const ms = 1.6 // TODO: Matching diacritics should be scored better matches('Wypad do Żabki', 'wdż', s(b + ms + b), [ [0, 0], [4, 4], [9, 9], ]) matches('Wypad do Żabki', 'wdz', s(b + ms + b), [ [0, 0], [4, 4], [9, 9], ]) matches('Wypad do Żabki', 'wypażab', s(b + b), [ [0, 3], [9, 11], ]) matches('Wypad do Żabki', 'wypa żab', s(b + w + b), [ [0, 3], [5, 5], [9, 11], ]) matches('Marco Couto', 'mc', s(b + ms)) matches('Marco Couto', 'm c', s(b + b)) matches('Tomasz Kapelak', 'tokp', s(b + b + ms)) // trying to match `Referral` (and not r, e in Marketing) is probably too complex/magic matches('[Marketing ] Referral Program', 'mrefp', s(ms + ms + ms + ms + b)) matches('[Marketing ] Referral Program', 'm refp', s(ms + w + b + b)) matches('Nozbe.com web site', 'website') matches('Książka 10 steps EN', '10en', s(w + ms + ms)) matches('Won’t fix', 'wontfix') matches('[HR] JavaScript Developer', 'jsdev', s(b + ms + b), [ [5, 5], [9, 9], [16, 18], ]) matches('MacKay', 'mckay', s(b + m)) matches('Русский Язык', 'уски', s(ms + ms)) matches('Русский Язык', 'руя', s(b + b), [ [0, 1], [8, 8], ]) matches('汉语的，又称汉文、華文、唐文', '汉语唐', s(b + ms), [ [0, 1], [12, 12], ]) matches('日本語', '日語', s(b + ms), [ [0, 0], [2, 2], ]) matches('한국어', '한어', s(b + ms)) // matches('한국어', '한어', 2, [[0, 0], [2, 2]]) // FIXME: Fix highglighting for Hangul }) const noMatch = (text, query) => { const results = createFuzzySearch([text])(query) expect(results.length).toBe(0) } it(`can not match everything, okay :(`, () => { // no stemming noMatch('recognition', 'recognize') noMatch('production', 'produce') noMatch('żołądź', 'żołędzie') noMatch('take', 'took') noMatch('produce', 'reproduce') // no synonyms/alt spellings noMatch('McKay', 'MacKay') noMatch('mac', 'macintosh') noMatch('grey', 'gray') // no soundex noMatch('kay', 'kai') // no substitutions/typo autofix noMatch('leters', 'letters') noMatch('letters', 'lettesr') noMatch('referral', 'referarl') }) it(`can search by key`, () => { expect( createFuzzySearch([{ t: 'foo' }, { t: 'foo2' }, { t: 'bar' }], { key: 't' })('foo'), ).toMatchObject([{ item: { t: 'foo' } }, { item: { t: 'foo2' } }]) }) it(`can search by many keys`, () => { const u1 = { name: 'foo1', alias: 'fooa1' } const u2 = { name: 'foo2', alias: 'bar' } const u3 = { name: 'bar', alias: '3foo' } const u4 = { name: 'bar', alias: 'bar' } expect( createFuzzySearch([u1, u2, u3, u4], { getText: (item) => [item.name, item.alias], })('foo'), ).toMatchObject([ { item: u1, matches: [[[0, 2]], [[0, 2]]], }, { item: u2, matches: [[[0, 2]], null] }, { item: u3, matches: [null, [[1, 3]]] }, ]) }) it(`sorts searches by score`, () => { expect( createFuzzySearch([ '[Marketing] Żabka etc.', 'Zabawny Katar', 'Żal Betoniarka', 'Ząbka', 'Żabka', 'Żabowe Karabiny', 'Żabka - oferta współpracy', 'żabka', '[Marketing] żabka', ])('żabka'), ).toMatchObject([ { item: 'żabka' /* score: 0 */ }, { item: 'Ząbka' /* score: 0.1 */ }, { item: 'Żabka' /* score: 0.1 */ }, { item: 'Żabka - oferta współpracy' /* score: 0.5 */ }, { item: '[Marketing] żabka' /* score: 0.9 */ }, { item: '[Marketing] Żabka etc.' /* score: 1 */ }, { item: 'Zabawny Katar' /* score: 2 */ }, { item: 'Żabowe Karabiny' /* score: 2 */ }, { item: 'Żal Betoniarka' /* score: 3 */ }, ]) }) it(`sorts searches by score for many keys`, () => { expect( createFuzzySearch( [ { name: 'Matt', alias: 'Matthias Obst-Mölinger' }, { name: 'Marco Couto', alias: null }, { name: 'Tomasz Kapelak', alias: 'Tom' }, { name: 'tommy' }, { name: 'Jacob Tom Belinger', alias: 'Jake' }, ], { getText: (item) => [item.name, item.alias], }, )('tom'), ).toMatchObject([ { item: { name: 'Tomasz Kapelak', alias: 'Tom' } /* score: 0.1 */ }, { item: { name: 'tommy' } /* score: 0.5 */ }, { item: { name: 'Jacob Tom Belinger', alias: 'Jake' } /* score: 1 */ }, { item: { name: 'Matt', alias: 'Matthias Obst-Mölinger' } /* score: 3 */ }, ]) }) it(`returns empty array for empty query`, () => { expect(createFuzzySearch(['a', 'b', 'c', 'd'])('')).toEqual([]) }) const matchesNew = (text, query, ...expectedIndices) => { const result = experimentalSmartFuzzyMatch(normalizeText(text), normalizeText(query)) expect(result).not.toBe(null) if (expectedIndices.length) { const [, indices] = result expect(indices).toEqual(expectedIndices) } } it(`experimentalSmartFuzzyMatch`, () => { matchesNew('Wypad do Żabki', 'wdż', [0, 0], [6, 6], [9, 9]) matchesNew('Wypad do Żabki', 'wdz', [0, 0], [6, 6], [9, 9]) matchesNew('Wypad do Żabki', 'wypażab', [0, 3], [9, 11]) matchesNew('Wypad do Żabki', 'wypa żab', [0, 3], [8, 11]) matchesNew( 'Marco Couto', 'mc', [0, 0], [3, 3], // NOTE: Ideally would be (6,6), but remaining query is len 1, so [3] matches ) matchesNew('Marco Couto', 'm c', [0, 0], [5, 6]) matchesNew('Tomasz Kapelak', 'tokp', [0, 1], [7, 7], [9, 9]) matchesNew('[Marketing ] Referral Program', 'mrefp', [1, 1], [13, 15], [22, 22]) matchesNew('[Marketing ] Referral Program', 'm refp', [1, 1], [12, 15], [22, 22]) matchesNew('Nozbe.com web site', 'website', [10, 12], [14, 17]) matchesNew('Książka 10 steps EN', '10en') // FIXME: // matchesNew('Won’t fix', 'wontfix') // matchesNew('[HR] JavaScript Developer', 'jsdev') matchesNew('MacKay', 'mckay') matchesNew('Русский Язык', 'усс') matchesNew('Русский Язык', 'руя') matchesNew('汉语的，又称汉文、華文、唐文', '汉语唐') matchesNew('日本語', '日語') matchesNew('한국어', '한어') // new cases matchesNew('GH - Growth Hacking', 'growha', [0, 0], [6, 8], [12, 13]) }) })