sanity
Version:
Sanity is a real-time content infrastructure with a scalable, hosted backend featuring a Graph Oriented Query Language (GROQ), asset pipelines and fast edge caches
202 lines (170 loc) • 6.61 kB
text/typescript
import {DEFAULT_MAX_FIELD_DEPTH} from '@sanity/schema/_internal'
import {type CrossDatasetType, type SchemaType} from '@sanity/types'
import {compact, flatten, flow, toLower, trim, union, uniq, words} from 'lodash'
import {
deriveSearchWeightsFromType,
type SearchFactoryOptions,
type SearchOptions,
type SearchPath,
type SearchSort,
type SearchSpec,
type SearchTerms,
} from '../common'
import {FINDABILITY_MVI} from '../constants'
export interface SearchParams {
__types: string[]
__limit: number
[key: string]: unknown
}
export interface SearchQuery {
query: string
params: SearchParams
options: Record<string, unknown>
searchSpec: SearchSpec[]
terms: string[]
}
export const DEFAULT_LIMIT = 1000
const combinePaths: (paths: string[][]) => string[] = flow([flatten, union, compact])
const pathWithMapper = ({mapWith, path}: SearchPath): string =>
mapWith ? `${mapWith}(${path})` : path
/**
* Create GROQ constraints, given search terms and the full spec of available document types and fields.
* Essentially a large list of all possible fields (joined by logical OR) to match our search terms against.
*/
function createConstraints(terms: string[], specs: SearchSpec[]) {
const combinedSearchPaths = combinePaths(
specs.map((configForType) => (configForType.paths || []).map((opt) => pathWithMapper(opt))),
)
const constraints = terms
.map((_term, i) => combinedSearchPaths.map((joinedPath) => `${joinedPath} match $t${i}`))
.filter((constraint) => constraint.length > 0)
return constraints.map((constraint) => `(${constraint.join(' || ')})`)
}
const SPECIAL_CHARS = /([^!@#$%^&*(),\\/?";:{}|[\]+<>\s-])+/g
const STRIP_EDGE_CHARS = /(^[.]+)|([.]+$)/
export function tokenize(string: string): string[] {
return (string.match(SPECIAL_CHARS) || []).map((token) => token.replace(STRIP_EDGE_CHARS, ''))
}
/**
* Convert a string into an array of tokenized terms.
*
* Any (multi word) text wrapped in double quotes will be treated as "phrases", or separate tokens that
* will not have its special characters removed.
* E.g.`"the" "fantastic mr" fox fox book` =\> ["the", `"fantastic mr"`, "fox", "book"]
*
* Phrases wrapped in quotes are assigned relevance scoring differently from regular words.
*
* @internal
*/
export function extractTermsFromQuery(query: string): string[] {
const quotedQueries = [] as string[]
const unquotedQuery = query.replace(/("[^"]*")/g, (match) => {
if (words(match).length > 1) {
quotedQueries.push(match)
return ''
}
return match
})
// Lowercase and trim quoted queries
const quotedTerms = quotedQueries.map((str) => trim(toLower(str)))
/**
* Convert (remaining) search query into an array of deduped, sanitized tokens.
* All white space and special characters are removed.
* e.g. "The saint of Saint-Germain-des-Prés" =\> ['the', 'saint', 'of', 'germain', 'des', 'pres']
*/
const remainingTerms = uniq(compact(tokenize(toLower(unquotedQuery))))
return [...quotedTerms, ...remainingTerms]
}
function toOrderClause(orderBy: SearchSort[]): string {
function wrapFieldWithFn(ordering: SearchSort): string {
return ordering.mapWith ? `${ordering.mapWith}(${ordering.field})` : ordering.field
}
return (orderBy || [])
.map((ordering) =>
[wrapFieldWithFn(ordering), (ordering.direction || '').toLowerCase()]
.map((str) => str.trim())
.filter(Boolean)
.join(' '),
)
.join(',')
}
/**
* @internal
*/
export function createSearchQuery(
searchTerms: SearchTerms<SchemaType | CrossDatasetType>,
searchOpts: SearchOptions & SearchFactoryOptions = {},
): SearchQuery {
const {filter, params, tag} = searchOpts
const specs = searchTerms.types
.map((schemaType) =>
deriveSearchWeightsFromType({
schemaType,
maxDepth: searchOpts.maxDepth || DEFAULT_MAX_FIELD_DEPTH,
isCrossDataset: searchOpts.isCrossDataset,
}),
)
.filter(({paths}) => paths.length)
// Extract search terms from string query, factoring in phrases wrapped in quotes
const terms = extractTermsFromQuery(searchTerms.query)
// Construct search filters used in this GROQ query
const filters = [
'_type in $__types',
searchOpts.includeDrafts === false && `!(_id in path('drafts.**'))`,
...createConstraints(terms, specs),
filter ? `(${filter})` : '',
searchTerms.filter ? `(${searchTerms.filter})` : '',
].filter(Boolean)
const selections = specs.map((spec) => {
const constraint = `_type == "${spec.typeName}" => `
const selection = `{ ${spec.paths.map((cfg, i) => `"w${i}": ${pathWithMapper(cfg)}`)} }`
return `${constraint}${selection}`
})
// Default to `_id asc` (GROQ default) if no search sort is provided
const sortOrder = toOrderClause(searchOpts?.sort || [{field: '_id', direction: 'asc'}])
const projectionFields = ['_type', '_id']
const selection = selections.length > 0 ? `...select(${selections.join(',\n')})` : ''
const finalProjection = projectionFields.join(', ') + (selection ? `, ${selection}` : '')
let query =
`*[${filters.join(' && ')}]` +
`| order(${sortOrder})` +
`[0...$__limit]` +
`{${finalProjection}}`
// Optionally prepend our query with an 'extended' projection.
// Required if we want to sort on nested object or reference fields.
// In future, creating the extended projection should be handled internally by `createSearchQuery`.
if (searchOpts?.__unstable_extendedProjection) {
const extendedProjection = searchOpts?.__unstable_extendedProjection
const firstProjection = projectionFields.concat(extendedProjection).join(', ')
query = [
`*[${filters.join(' && ')}]{${firstProjection}}`,
`order(${sortOrder})[0...$__limit]{${finalProjection}}`,
].join('|')
}
// Prepend GROQ comments
const groqComments = [`findability-mvi:${FINDABILITY_MVI}`]
.concat(searchOpts?.comments || [])
.map((s) => `// ${s}`)
.join('\n')
const updatedQuery = groqComments ? `${groqComments}\n${query}` : query
const limit = searchOpts?.limit ?? DEFAULT_LIMIT
return {
query: updatedQuery,
params: {
...toGroqParams(terms),
__types: specs.map((spec) => spec.typeName),
__limit: limit,
...(params || {}),
},
options: {tag},
searchSpec: specs,
terms,
}
}
const toGroqParams = (terms: string[]): Record<string, string> => {
const params: Record<string, string> = {}
return terms.reduce((acc, term, i) => {
acc[`t${i}`] = `${term}*` // "t" is short for term
return acc
}, params)
}