UNPKG

sanity

Version:

Sanity is a real-time content infrastructure with a scalable, hosted backend featuring a Graph Oriented Query Language (GROQ), asset pipelines and fast edge caches

125 lines (109 loc) 4.28 kB
import {type SanityDocumentLike} from '@sanity/types' import {compact, intersection, keyBy, partition, toLower, union, uniq, words} from 'lodash' import {type SearchSpec, type WeightedHit} from '../common' type SearchScore = [number, string] /** * Takes a set of terms and a value and returns a [score, story] pair where score is a value between 0, 1 and story is the explanation. * * @internal */ export const calculateScore = (searchTerms: string[], value: string): SearchScore => { // Separate search terms by phrases (wrapped with quotes) and words. const {phrases: uniqueSearchPhrases, words: uniqueSearchWords} = partitionAndSanitizeSearchTerms(searchTerms) // Calculate an aggregated score of both phrase and word matches. const [phraseScore, phraseWhy] = calculatePhraseScore(uniqueSearchPhrases, value) const [wordScore, wordWhy] = calculateWordScore(uniqueSearchWords, value) return [phraseScore + wordScore, [wordWhy, phraseWhy].join(', ')] } const stringify = (value: unknown): string => typeof value === 'string' ? value : JSON.stringify(value) /** * @internal */ export function applyWeights( searchSpec: SearchSpec[], hits: SanityDocumentLike[], terms: string[] = [], ): WeightedHit[] { const specByType = keyBy(searchSpec, (spec) => spec.typeName) return hits.map((hit, index) => { const typeSpec = specByType[hit._type] const stories = (typeSpec.paths || [])?.map((pathSpec, idx) => { const value = stringify(hit[`w${idx}`]) if (!value) { return {path: pathSpec.path, score: 0, why: 'No match'} } const [score, why] = calculateScore(terms, value) return { path: pathSpec.path, score: score * pathSpec.weight, why: `${why} (*${pathSpec.weight})`, } }) const totalScore = stories.reduce((acc, rank) => acc + rank.score, 0) return {hit, resultIndex: hits.length - index, score: totalScore, stories: stories} }) } /** * For phrases: score on the total number of matching characters. * E.g. given the phrases ["the fox", "of london"] for the target value "the wily fox of london" * * - "the fox" isn't included in the target value (score: 0) * - "of london" is included in the target value, and 9 out of 22 characters match (score: 9/22 = ~0.408) * - non-exact matches have their score divided in half (final score: ~0.204) * * @internal */ export function calculatePhraseScore(uniqueSearchPhrases: string[], value: string): SearchScore { const sanitizedValue = value.toLowerCase().trim() let fieldScore = 0 let matchCount = 0 uniqueSearchPhrases.forEach((term) => { if (sanitizedValue.includes(term)) { fieldScore += term.length / sanitizedValue.length matchCount += term.length } }) return fieldScore === 1 ? [1, '[Phrase] Exact match'] : [fieldScore / 2, `[Phrase] Matched ${matchCount} of ${sanitizedValue.length} characters`] } /** * For words: score on the total number of matching words. * E.g. given the words ["the", "fox", "of", "london"] for the target value "the wily fox of london" * * - 4 out of 5 words match (score: 4/5 = 0.8) * - non-exact matches have their score divided in half (final score: 0.4) * * @internal */ export function calculateWordScore(uniqueSearchTerms: string[], value: string): SearchScore { const uniqueValueTerms = uniq(compact(words(toLower(value)))) const matches = intersection(uniqueSearchTerms, uniqueValueTerms) const all = union(uniqueValueTerms, uniqueSearchTerms) const fieldScore = matches.length / all.length return fieldScore === 1 ? [1, '[Word] Exact match'] : [ fieldScore / 2, `[Word] Matched ${matches.length} of ${all.length} terms: [${matches.join(', ')}]`, ] } /** * @internal */ export function partitionAndSanitizeSearchTerms(searchTerms: string[]): { phrases: string[] words: string[] } { const uniqueSearchTerms = uniq(searchTerms.map(toLower)) const [searchPhrases, searchWords] = partition(uniqueSearchTerms, (term) => /^".*"$/.test(term)) return { phrases: uniq(searchPhrases).map(toLower).map(stripWrappingQuotes), // words: uniq(searchWords.map(toLower)), } } function stripWrappingQuotes(str: string) { return str.replace(/^"(.*)"$/, '$1') }