From 3eb6fd3860cc740ba2985fe40f886359962ba415 Mon Sep 17 00:00:00 2001 From: Robin Pyon Date: Thu, 20 Jul 2023 16:36:14 +0100 Subject: [PATCH] fix(core): various fixes and improvements to studio search [v2] (#4704) * fix(core): improve search weighting * fix(core): filter out hits with no score on the client, don't exclude _id from whole word score generation * fix(core): ensure that we use optimised search specs for both selections and weighting * fix(core): allow search traversal depth to be set with SANITY_STUDIO_UNSTABLE_SEARCH_DEPTH environment variable * fix(core): expose matched indices on array fields in search hit stories * fix(core): add a cap on total search paths extracted per root-level object * fix(core): naive first pass of limiting attributes pre-search query generation * fixup! fix(core): add a cap on total search paths extracted per root-level object * fix(core): always include user generated experimental search paths, display skipped paths in generated search specs * fix(core): export all weighted search functions and types * fixup! fix(core): add a cap on total search paths extracted per root-level object * fix(core): reduce unnecessary whitespace from search query payloads * fix(core): don't include duplicate _id and _type values in type selections * fix(core): ignore duplicate paths when calculating maxAttributes * docs(core): update docs for applyWeights/createSearchQuery * fixup! fix(core): export all weighted search functions and types * fix(core): only add paths to rolling list if not previously added, add support for defining MAX_UNIQUE_ATTRIBUTES via env var * refactor(core): update consts to better match corresponding env vars * fixup! fix(core): filter out hits with no score on the client, don't exclude _id from whole word score generation * fixup! docs(core): update docs for applyWeights/createSearchQuery * refactor(core): rename parentType to rootType, add clarifying comment --- .../@sanity/base/src/_exports/_internal.ts | 2 +- packages/@sanity/base/src/search/index.ts | 10 +- .../src/search/weighted/applyWeights.test.ts | 33 ++- .../base/src/search/weighted/applyWeights.ts | 141 +++++++++++-- .../search/weighted/createSearchQuery.test.ts | 112 ++++++++-- .../src/search/weighted/createSearchQuery.ts | 170 ++++++++++----- .../weighted/createWeightedSearch.test.ts | 12 +- .../@sanity/base/src/search/weighted/index.ts | 2 + .../@sanity/base/src/search/weighted/types.ts | 4 +- .../searchResultItem/DebugOverlay.tsx | 3 + packages/@sanity/schema/package.json | 1 + .../src/legacy/searchConfig/normalize.test.ts | 12 +- .../src/legacy/searchConfig/normalize.ts | 3 +- .../src/legacy/searchConfig/resolve.test.ts | 137 +++++++++++- .../schema/src/legacy/searchConfig/resolve.ts | 198 ++++++++++++------ .../@sanity/schema/src/legacy/types/object.ts | 6 +- packages/@sanity/types/src/schema/types.ts | 1 + 17 files changed, 661 insertions(+), 186 deletions(-) diff --git a/packages/@sanity/base/src/_exports/_internal.ts b/packages/@sanity/base/src/_exports/_internal.ts index 6f317047a2f..2791d9550c4 100644 --- a/packages/@sanity/base/src/_exports/_internal.ts +++ b/packages/@sanity/base/src/_exports/_internal.ts @@ -15,7 +15,7 @@ export type {DocumentAvailability} from '../preview/types' export {AvailabilityReason} from '../preview/types' export {getSearchableTypes} from '../search/common/utils' -export {createWeightedSearch} from '../search/weighted/createWeightedSearch' +export {createSearchQuery, createWeightedSearch} from '../search/weighted' export type {WeightedHit} from '../search/weighted/types' export {createHookFromObservableFactory} from '../util/createHookFromObservableFactory' diff --git a/packages/@sanity/base/src/search/index.ts b/packages/@sanity/base/src/search/index.ts index a0b8907066a..034191824b3 100644 --- a/packages/@sanity/base/src/search/index.ts +++ b/packages/@sanity/base/src/search/index.ts @@ -6,14 +6,6 @@ import {versionedClient} from '../client/versionedClient' import {getSearchableTypes} from './common/utils' import {createWeightedSearch} from './weighted/createWeightedSearch' -export type { - SearchOptions, - SearchSort, - SearchTerms, - SearchableType, - WeightedHit, -} from './weighted/types' - // Use >= 2021-03-25 for pt::text() support const searchClient = versionedClient.withConfig({ apiVersion: '2021-03-25', @@ -23,3 +15,5 @@ export default createWeightedSearch(getSearchableTypes(schema), searchClient, { unique: true, tag: 'search.global', }) + +export * from './weighted' diff --git a/packages/@sanity/base/src/search/weighted/applyWeights.test.ts b/packages/@sanity/base/src/search/weighted/applyWeights.test.ts index 4cdca29b6ca..484f03c424d 100644 --- a/packages/@sanity/base/src/search/weighted/applyWeights.test.ts +++ b/packages/@sanity/base/src/search/weighted/applyWeights.test.ts @@ -2,6 +2,7 @@ import { calculatePhraseScore, calculateWordScore, partitionAndSanitizeSearchTerms, + calculateCharacterScore, } from './applyWeights' describe('calculatePhraseScore', () => { @@ -11,7 +12,11 @@ describe('calculatePhraseScore', () => { it('should handle partial matches', () => { expect(calculatePhraseScore(['the fox'], 'the fox of foo')).toEqual([ 0.25, - '[Phrase] Matched 7 of 14 characters', + '[Phrase] 7/14 chars', + ]) + expect(calculatePhraseScore(['the fox', 'fox of'], 'the fox of foo')).toEqual([ + 0.4642857142857143, + '[Phrase] 13/14 chars', ]) }) }) @@ -25,17 +30,25 @@ describe('calculateWordScore', () => { expect(calculateWordScore(['foo', 'bar'], 'bar & foo')).toEqual([1, '[Word] Exact match']) }) it('should handle partial matches', () => { - expect(calculateWordScore(['foo'], 'bar foo')).toEqual([ - 0.25, - '[Word] Matched 1 of 2 terms: [foo]', - ]) - expect(calculateWordScore(['foo', 'bar'], 'foo')).toEqual([ - 0.25, - `[Word] Matched 1 of 2 terms: [foo]`, - ]) + expect(calculateWordScore(['foo'], 'bar foo')).toEqual([0.25, '[Word] 1/2 terms: [foo]']) + expect(calculateWordScore(['foo', 'bar'], 'foo')).toEqual([0.25, `[Word] 1/2 terms: [foo]`]) expect(calculateWordScore(['foo', 'bar', 'baz'], 'foo foo bar')).toEqual([ 1 / 3, - `[Word] Matched 2 of 3 terms: [foo, bar]`, + `[Word] 2/3 terms: [foo, bar]`, + ]) + }) +}) + +describe('calculateCharacterScore', () => { + it('should handle exact matches', () => { + expect(calculateCharacterScore(['bar', 'foo'], 'bar foo')).toEqual([1, '[Char] Contains all']) + }) + + it('should handle partial matches', () => { + expect(calculateCharacterScore(['foo'], 'bar foo')).toEqual([0.25, '[Char] 3/6 chars']) + expect(calculateCharacterScore(['fo', 'ba'], 'bar foo')).toEqual([ + 0.3333333333333333, + '[Char] 4/6 chars', ]) }) }) diff --git a/packages/@sanity/base/src/search/weighted/applyWeights.ts b/packages/@sanity/base/src/search/weighted/applyWeights.ts index af7d79d34ae..35428853387 100644 --- a/packages/@sanity/base/src/search/weighted/applyWeights.ts +++ b/packages/@sanity/base/src/search/weighted/applyWeights.ts @@ -3,37 +3,60 @@ import {SearchHit, WeightedHit, SearchSpec} from './types' type SearchScore = [number, string] -// takes a set of terms and a value and returns a [score, story] pair where score is a value between 0, 1 and story is the explanation +/** + * Calculates a score (between 0 and 1) indicating general search relevance of an array of + * search tokens within a specific string. + * + * @param searchTerms - All search terms + * @param value - The string to match against + * @returns A [score, story] pair containing the search score as well as a human readable explanation + * @internal + */ export const calculateScore = (searchTerms: string[], value: string): SearchScore => { // Separate search terms by phrases (wrapped with quotes) and words. const {phrases: uniqueSearchPhrases, words: uniqueSearchWords} = partitionAndSanitizeSearchTerms( searchTerms ) - - // Calculate an aggregated score of both phrase and word matches. + // Calculate an aggregated score of words (partial + whole) and phrase matches. + const [charScore, charWhy] = calculateCharacterScore(uniqueSearchWords, value) const [phraseScore, phraseWhy] = calculatePhraseScore(uniqueSearchPhrases, value) const [wordScore, wordWhy] = calculateWordScore(uniqueSearchWords, value) - return [phraseScore + wordScore, [wordWhy, phraseWhy].join(', ')] + return [charScore + wordScore + phraseScore, [charWhy, wordWhy, phraseWhy].flat().join(', ')] } const stringify = (value: unknown): string => typeof value === 'string' ? value : JSON.stringify(value) +/** + * Applies path weights from a supplied SearchSpec to existing search hits to create _weighted_ hits + * augmented with search ranking and human readable explanations. + * + * @param searchSpec - SearchSpec containing path weighting + * @param hits - SearchHit objects to augment + * @param terms - All search terms + * @returns WeightedHit array containing search scores and ranking explanations + * @internal + */ export function applyWeights( searchSpec: SearchSpec[], hits: SearchHit[], terms: string[] = [] ): WeightedHit[] { const specByType = keyBy(searchSpec, (spec) => spec.typeName) - return hits.map((hit, index) => { + + return hits.reduce((allHits, hit, index) => { const typeSpec = specByType[hit._type] const stories = typeSpec.paths.map((pathSpec, idx) => { - const value = stringify(hit[`w${idx}`]) + const pathHit = ['_id', '_type'].includes(pathSpec.path) ? hit[pathSpec.path] : hit[idx] + const indices = Array.isArray(pathHit) ? findMatchingIndices(terms, pathHit) : null + // Only stringify non-falsy values so null values don't pollute search + const value = pathHit ? stringify(pathHit) : null if (!value) { return {path: pathSpec.path, score: 0, why: 'No match'} } const [score, why] = calculateScore(terms, value) return { + indices, path: pathSpec.path, score: score * pathSpec.weight, why: `${why} (*${pathSpec.weight})`, @@ -41,10 +64,25 @@ export function applyWeights( }) const totalScore = stories.reduce((acc, rank) => acc + rank.score, 0) - - return {hit, resultIndex: hits.length - index, score: totalScore, stories: stories} - }) + /* + * Filter out hits with no score. + * (only if search terms are present, otherwise we always show results) + * + * Due to how we generate search queries, in some cases it's possible to have returned search hits + * which shouldn't be displayed. This can happen when searching on multiple document types and + * user-configured `__experimental_search` paths are in play. + * + * Since search generates a GROQ query with filters that may refer to field names shared across + * multiple document types, it's possible that one document type searches on a field path + * that is hidden by another via `__experimental_search`. + */ + if (terms.length === 0 || totalScore > 0) { + allHits.push({hit, resultIndex: hits.length - index, score: totalScore, stories: stories}) + } + return allHits + }, []) } + /** * For phrases: score on the total number of matching characters. * E.g. given the phrases ["the fox", "of london"] for the target value "the wily fox of london" @@ -52,6 +90,11 @@ export function applyWeights( * - "the fox" isn't included in the target value (score: 0) * - "of london" is included in the target value, and 9 out of 22 characters match (score: 9/22 = ~0.408) * - non-exact matches have their score divided in half (final score: ~0.204) + * + * @param uniqueSearchPhrases - All search phrases + * @param value - The string to match against + * @returns SearchScore containing the search score as well as a human readable explanation + * @internal */ export function calculatePhraseScore(uniqueSearchPhrases: string[], value: string): SearchScore { const sanitizedValue = value.toLowerCase().trim() @@ -67,30 +110,70 @@ export function calculatePhraseScore(uniqueSearchPhrases: string[], value: strin return fieldScore === 1 ? [1, '[Phrase] Exact match'] - : [fieldScore / 2, `[Phrase] Matched ${matchCount} of ${sanitizedValue.length} characters`] + : [fieldScore / 2, `[Phrase] ${matchCount}/${sanitizedValue.length} chars`] } /** * For words: score on the total number of matching words. + * E.g. given the terms ["bar", "fo"] for the target value "food bar". + * + * - "fo" is included in the target value, and 2 out of 7 non-whitespace characters match (score: 2/7) + * - "bar" is included in the target value, and 3 out of 7 non-whitespace characters match (score: 3/7) + * - all values are accumulated and have their score devidied by half (final score: ~0.357) + * + * @param uniqueSearchTerms - A string array of search terms + * @param value - The string to match against + * @returns SearchScore containing the search score as well as a human readable explanation + * @internal + */ +export function calculateCharacterScore(uniqueSearchTerms: string[], value: string): SearchScore { + const sanitizedValue = value.toLowerCase().trim() + const sanitizedValueCompact = sanitizedValue.replace(/ /g, '') + + let fieldScore = 0 + let matchCount = 0 + uniqueSearchTerms.forEach((term) => { + if (sanitizedValue.includes(term)) { + fieldScore += term.length / sanitizedValueCompact.length + matchCount += term.length + } + }) + + return fieldScore === 1 + ? [fieldScore, `[Char] Contains all`] + : [fieldScore / 2, `[Char] ${matchCount}/${sanitizedValueCompact.length} chars`] +} + +/** + * Generate a score on the total number of matching _whole_ words. * E.g. given the words ["the", "fox", "of", "london"] for the target value "the wily fox of london" * * - 4 out of 5 words match (score: 4/5 = 0.8) * - non-exact matches have their score divided in half (final score: 0.4) + * + * @param uniqueSearchTerms - All search terms + * @param value - The string to match against + * @returns SearchScore containing the search score as well as a human readable explanation + * @internal */ export function calculateWordScore(uniqueSearchTerms: string[], value: string): SearchScore { const uniqueValueTerms = uniq(compact(words(toLower(value)))) const matches = intersection(uniqueSearchTerms, uniqueValueTerms) const all = union(uniqueValueTerms, uniqueSearchTerms) - const fieldScore = matches.length / all.length + const fieldScore = matches.length / all.length || 0 return fieldScore === 1 ? [1, '[Word] Exact match'] - : [ - fieldScore / 2, - `[Word] Matched ${matches.length} of ${all.length} terms: [${matches.join(', ')}]`, - ] + : [fieldScore / 2, `[Word] ${matches.length}/${all.length} terms: [${matches.join(', ')}]`] } +/** + * Partition search terms by phrases (wrapped with quotes) and words. + * + * @param searchTerms - All search terms + * @returns Partitioned phrases and words + * @internal + */ export function partitionAndSanitizeSearchTerms( searchTerms: string[] ): { @@ -106,6 +189,34 @@ export function partitionAndSanitizeSearchTerms( } } +/** + * Returns matching array indices of `values` containing _any_ member of `uniqueSearchTerms`. + * When comparing for matches, members of `values` are stringified, trimmed and lowercased. + * + * @param uniqueSearchTerms - All search terms + * @param values - Values to match against (members are stringified) + * @returns All matching indices in `values` + * @internal + */ +export function findMatchingIndices(uniqueSearchTerms: string[], values: unknown[]): number[] { + const {phrases: uniqueSearchPhrases, words: uniqueSearchWords} = partitionAndSanitizeSearchTerms( + uniqueSearchTerms + ) + + return values.reduce((acc, val, index) => { + if (val) { + const contains = [...uniqueSearchPhrases, ...uniqueSearchWords].some((term) => { + const stringifiedValue = stringify(val).toLowerCase().trim() + return stringifiedValue.includes(term) + }) + if (contains) { + acc.push(index) + } + } + return acc + }, []) +} + function stripWrappingQuotes(str: string) { return str.replace(/^"(.*)"$/, '$1') } diff --git a/packages/@sanity/base/src/search/weighted/createSearchQuery.test.ts b/packages/@sanity/base/src/search/weighted/createSearchQuery.test.ts index 7d34fc59572..2e3ca037f62 100644 --- a/packages/@sanity/base/src/search/weighted/createSearchQuery.test.ts +++ b/packages/@sanity/base/src/search/weighted/createSearchQuery.test.ts @@ -1,6 +1,11 @@ /* eslint-disable camelcase */ -import {createSearchQuery, DEFAULT_LIMIT, extractTermsFromQuery} from './createSearchQuery' -import {SearchableType} from './types' +import { + createSearchQuery, + createSearchSpecs, + DEFAULT_LIMIT, + extractTermsFromQuery, +} from './createSearchQuery' +import {SearchPath, SearchSpec, SearchableType} from './types' const testType: SearchableType = { name: 'basic-schema-test', @@ -21,10 +26,10 @@ describe('createSearchQuery', () => { }) expect(query).toEqual( - '*[_type in $__types && (title match $t0)]' + + '*[_type in $__types&&(title match $t0)]' + '| order(_id asc)' + '[$__offset...$__limit]' + - '{_type, _id, ...select(_type == "basic-schema-test" => { "w0": title })}' + '{_type,_id,...select(_type=="basic-schema-test"=>{"0":title})}' ) expect(params).toEqual({ @@ -55,7 +60,7 @@ describe('createSearchQuery', () => { ], }) - expect(query).toContain('*[_type in $__types && (title match $t0 || object.field match $t0)]') + expect(query).toContain('*[_type in $__types&&(title match $t0||object.field match $t0)]') }) it('should have one match filter per term', () => { @@ -64,7 +69,7 @@ describe('createSearchQuery', () => { types: [testType], }) - expect(query).toContain('*[_type in $__types && (title match $t0) && (title match $t1)]') + expect(query).toContain('*[_type in $__types&&(title match $t0)&&(title match $t1)]') expect(params.t0).toEqual('term0*') expect(params.t1).toEqual('term1*') }) @@ -120,7 +125,7 @@ describe('createSearchQuery', () => { ) expect(query).toContain( - '*[_type in $__types && (title match $t0) && (randomCondition == $customParam)]' + '*[_type in $__types&&(title match $t0)&&(randomCondition == $customParam)]' ) expect(params.customParam).toEqual('custom') }) @@ -165,10 +170,10 @@ describe('createSearchQuery', () => { ) expect(query).toEqual( - '*[_type in $__types && (title match $t0)]' + + '*[_type in $__types&&(title match $t0)]' + '| order(exampleField desc)' + '[$__offset...$__limit]' + - '{_type, _id, ...select(_type == "basic-schema-test" => { "w0": title })}' + '{_type,_id,...select(_type=="basic-schema-test"=>{"0":title})}' ) }) @@ -179,10 +184,10 @@ describe('createSearchQuery', () => { }) expect(query).toEqual( - '*[_type in $__types && (title match $t0)]' + + '*[_type in $__types&&(title match $t0)]' + '| order(_id asc)' + '[$__offset...$__limit]' + - '{_type, _id, ...select(_type == "basic-schema-test" => { "w0": title })}' + '{_type,_id,...select(_type=="basic-schema-test"=>{"0":title})}' ) }) @@ -261,15 +266,12 @@ describe('createSearchQuery', () => { * As a workaround, we replace numbers with [] array syntax, so we at least get hits when the path matches anywhere in the array. * This is an improvement over before, where an illegal term was used (number-as-string, ala ["0"]), * which lead to no hits at all. */ - - '*[_type in $__types && (cover[].cards[].title match $t0) && (cover[].cards[].title match $t1)]' + + '*[_type in $__types&&(cover[].cards[].title match $t0)&&(cover[].cards[].title match $t1)]' + '| order(_id asc)' + '[$__offset...$__limit]' + // at this point we could refilter using cover[0].cards[0].title. // This solution was discarded at it would increase the size of the query payload by up to 50% - - // we still map out the path with number - '{_type, _id, ...select(_type == "numbers-in-path" => { "w0": cover[0].cards[0].title })}' + '{_type,_id,...select(_type=="numbers-in-path"=>{"0":cover[].cards[].title})}' ) }) @@ -290,12 +292,71 @@ describe('createSearchQuery', () => { ], }) - expect(query).toContain('*[_type in $__types && (pt::text(pteField) match $t0)') - expect(query).toContain('...select(_type == "type1" => { "w0": pt::text(pteField) })') + expect(query).toContain('*[_type in $__types&&(pt::text(pteField) match $t0)') + expect(query).toContain('...select(_type=="type1"=>{"0":pt::text(pteField)})') }) }) }) +describe('createSearchSpecs', () => { + const author: SearchableType = { + name: 'author', + __experimental_search: [{path: ['name'], userProvided: true, weight: 1}], + } + const book: SearchableType = { + name: 'book', + __experimental_search: [ + {path: ['author'], weight: 1}, + {path: ['title'], weight: 1}, + ], + } + const country: SearchableType = { + name: 'country', + __experimental_search: [ + {path: ['anthem'], weight: 1}, + {path: ['currency'], weight: 1}, + {path: ['language'], weight: 1}, + {path: ['location', 'lat'], weight: 1}, + {path: ['location', 'lon'], weight: 1}, + {path: ['industries', 'energy', 'natural', 'wind'], weight: 1}, + {path: ['name'], weight: 1}, + ], + } + const poem: SearchableType = { + name: 'poem', + __experimental_search: [ + {path: ['author'], weight: 1}, + {path: ['title'], weight: 1}, + ], + } + + it('should order paths by length', () => { + const {specs} = createSearchSpecs([author, book, country], true, 1000) + const {paths} = flattenSpecs(specs) + expect(paths[paths.length - 1].path).toEqual('industries.energy.natural.wind') + }) + + it('should not include duplicate paths when factoring maxAttributes', () => { + const {specs} = createSearchSpecs([book, poem], true, 1) + const {paths} = flattenSpecs(specs) + expect(paths).toHaveLength(2) + }) + + it('should always include user provided paths, regardless of attribute limit', () => { + const {specs} = createSearchSpecs([author], true, 0) + const {paths, skippedPaths} = flattenSpecs(specs) + expect(paths).toHaveLength(1) + expect(skippedPaths).toHaveLength(0) + }) + + it('should limit specs by a set number of attributes', () => { + const {specs} = createSearchSpecs([book, country], true, 1) + const {paths, skippedPaths} = flattenSpecs(specs) + expect(paths).toHaveLength(1) + expect(skippedPaths).toHaveLength(8) + }) +}) + describe('extractTermsFromQuery', () => { describe('should handle orphaned double quotes', () => { const tests: [string, string[]][] = [ @@ -328,3 +389,18 @@ describe('extractTermsFromQuery', () => { expect(terms).toEqual([`foo`]) }) }) + +function flattenSpecs( + specs: SearchSpec[] +): { + paths: SearchPath[] + skippedPaths: SearchPath[] +} { + return specs.reduce( + (acc, val) => ({ + paths: [...acc.paths, ...(val.paths || [])], + skippedPaths: [...acc.skippedPaths, ...(val.skippedPaths || [])], + }), + {paths: [], skippedPaths: []} + ) +} diff --git a/packages/@sanity/base/src/search/weighted/createSearchQuery.ts b/packages/@sanity/base/src/search/weighted/createSearchQuery.ts index 2b7e3c84a71..eefebdf14f2 100644 --- a/packages/@sanity/base/src/search/weighted/createSearchQuery.ts +++ b/packages/@sanity/base/src/search/weighted/createSearchQuery.ts @@ -1,4 +1,5 @@ import {compact, flatten, flow, toLower, trim, union, uniq, words} from 'lodash' +import {ExperimentalSearchPath} from '../../../../types' import {joinPath} from '../../util/searchUtils' import {tokenize} from '../common/tokenize' import type { @@ -26,44 +27,110 @@ export interface SearchQuery { terms: string[] } +interface IntermediateSearchType extends Omit { + path: string + pathLength: number + typeName: string +} + +// Default number of documents to fetch export const DEFAULT_LIMIT = 1000 +// Maximum number of unique searchable attributes to include in a single search query (across all document types) +const SEARCH_ATTR_LIMIT = + // eslint-disable-next-line no-process-env + Number(process.env.SANITY_STUDIO_UNSTABLE_SEARCH_ATTR_LIMIT) || 1000 + const combinePaths = flow([flatten, union, compact]) /** - * Create an object containing all available document types and weighted paths, used to construct a GROQ query for search. - * System fields `_id` and `_type` are included by default. + * Create search specs from supplied searchable types. + * Search specs contain weighted paths which are used to construct GROQ queries for search. * - * If `optimizeIndexPaths` is true, this will will convert all `__experimental_search` paths containing numbers - * into array syntax. E.g. ['cover', 0, 'cards', 0, 'title'] => "cover[].cards[].title" + * @param types - Searchable document types to create specs from. + * @param optimizedIndexPaths - If true, will will convert all `__experimental_search` paths containing numbers into array syntax. + * E.g. ['cover', 0, 'cards', 0, 'title'] => "cover[].cards[].title" * - * This optimization will yield more search results than may be intended, but offers better performance over arrays with indices. - * (which are currently unoptimizable by Content Lake) + * This optimization will yield more search results than may be intended, but offers better performance over arrays with indices. + * (which are currently unoptimizable by Content Lake) + * @param maxAttributes - Maximum number of _unique_ searchable attributes to include across all types. + * User-provided paths (e.g. with __experimental_search) do not count towards this limit. + * @returns All matching search specs and `hasIndexedPaths`, a boolean indicating whether any paths contain indices. + * @internal */ -function createSearchSpecs(types: SearchableType[], optimizeIndexedPaths) { +export function createSearchSpecs( + types: SearchableType[], + optimizeIndexedPaths: boolean, + maxAttributes: number +): { + hasIndexedPaths: boolean + specs: SearchSpec[] +} { let hasIndexedPaths = false + const addedPaths = [] - const specs = types.map((type) => ({ - typeName: type.name, - paths: type.__experimental_search.map((config) => { - const path = config.path.map((p) => { - if (typeof p === 'number') { - hasIndexedPaths = true - if (optimizeIndexedPaths) { - return [] as [] + const specsByType = types + // Extract and flatten all paths + .reduce((acc, val) => { + const newPaths = val.__experimental_search.map((config) => { + const path = config.path.map((p) => { + if (typeof p === 'number') { + hasIndexedPaths = true + if (optimizeIndexedPaths) { + return [] as [] + } } + return p + }) + return { + ...config, + path: joinPath(path), + pathLength: path.length, + typeName: val.name, } - return p }) - return { - weight: config.weight, - path: joinPath(path), - mapWith: config.mapWith, + return acc.concat(newPaths) + }, []) + // Sort by path length, typeName, path (asc) + .sort((a, b) => { + if (a.pathLength === b.pathLength) { + if (a.typeName === b.typeName) return a.path > b.path ? 1 : -1 + return a.typeName > b.typeName ? 1 : -1 } - }), - })) + return a.pathLength > b.pathLength ? 1 : -1 + }) + // Reduce into specs (by type) and conditionally add unique paths up until the `maxAttributes` limit + .reduce>((acc, val) => { + const isPathAdded = addedPaths.includes(val.path) + // Include the current path if its already been added or within the `maxAttributes` limit. + // User provided paths are always included by default. + const includeSpec = isPathAdded || val.userProvided || addedPaths.length < maxAttributes + if (!isPathAdded && addedPaths.length < maxAttributes) { + addedPaths.push(val.path) + } + + const searchPath: SearchPath = { + mapWith: val.mapWith, + path: val.path, + weight: val.weight, + } + + acc[val.typeName] = { + ...acc[val.typeName], + ...(includeSpec && { + paths: (acc[val.typeName]?.paths || []).concat([searchPath]), + }), + ...(!includeSpec && { + skippedPaths: (acc[val.typeName]?.skippedPaths || []).concat([searchPath]), + }), + typeName: val.typeName, + } + + return acc + }, {}) + return { - specs, + specs: Object.values(specsByType), hasIndexedPaths, } } @@ -83,7 +150,7 @@ function createConstraints(terms: string[], specs: SearchSpec[]) { .map((_term, i) => combinedSearchPaths.map((joinedPath) => `${joinedPath} match $t${i}`)) .filter((constraint) => constraint.length > 0) - return constraints.map((constraint) => `(${constraint.join(' || ')})`) + return constraints.map((constraint) => `(${constraint.join('||')})`) } /** @@ -94,6 +161,10 @@ function createConstraints(terms: string[], specs: SearchSpec[]) { * E.g.`"the" "fantastic mr" fox fox book` => ["the", `"fantastic mr"`, "fox", "book"] * * Phrases wrapped in quotes are assigned relevance scoring differently from regular words. + * + * @param query - A string to convert into individual tokens + * @returns All extracted tokens + * @internal */ export function extractTermsFromQuery(query: string): string[] { const quotedQueries = [] as string[] @@ -108,7 +179,7 @@ export function extractTermsFromQuery(query: string): string[] { // Lowercase and trim quoted queries const quotedTerms = quotedQueries.map((str) => trim(toLower(str))) - /** + /* * Convert (remaining) search query into an array of deduped, sanitized tokens. * All white space and special characters are removed. * e.g. "The saint of Saint-Germain-des-Prés" => ['the', 'saint', 'of', 'germain', 'des', 'pres'] @@ -118,31 +189,31 @@ export function extractTermsFromQuery(query: string): string[] { return [...quotedTerms, ...remainingTerms] } +/** + * Generate search query data based off provided search terms and options. + * + * @param searchTerms - SearchTerms containing a string query and any number of searchable document types. + * @param searchOpts - Optional search configuration. + * @returns GROQ query, params and options to be used to fetch search results. + * @internal + */ export function createSearchQuery( searchTerms: SearchTerms, searchOpts: SearchOptions & WeightedSearchOptions = {} ): SearchQuery { const {filter, params, tag} = searchOpts - /** - * First pass: create initial search specs and determine if this subset of types contains - * any indexed paths in `__experimental_search`. - * e.g. "authors.0.title" or ["authors", 0, "title"] - */ - const {specs: exactSearchSpecs, hasIndexedPaths} = createSearchSpecs(searchTerms.types, false) - // Extract search terms from string query, factoring in phrases wrapped in quotes const terms = extractTermsFromQuery(searchTerms.query) - /** - * Second pass: create an optimized spec (with array indices removed), but only if types with any - * indexed paths have been previously found. Otherwise, passthrough original search specs. + /* + * Create an optimized search spec which removes array indices from __experimental_search paths. + * e.g. ["authors", 0, "title"] => "authors[].title" * - * These optimized specs are only used when building constraints in this search query. + * These optimized specs are used when building constraints in this search query and assigning + * weight to search hits. */ - const optimizedSpecs = hasIndexedPaths - ? createSearchSpecs(searchTerms.types, true).specs - : exactSearchSpecs + const optimizedSpecs = createSearchSpecs(searchTerms.types, true, SEARCH_ATTR_LIMIT).specs // Construct search filters used in this GROQ query const filters = [ @@ -152,25 +223,28 @@ export function createSearchQuery( filter ? `(${filter})` : '', ].filter(Boolean) - const selections = exactSearchSpecs.map((spec) => { - const constraint = `_type == "${spec.typeName}" => ` - const selection = `{ ${spec.paths.map((cfg, i) => `"w${i}": ${pathWithMapper(cfg)}`)} }` + // Construct individual type selections based on __experimental_search paths, + // but ignore _id and _type keys (as these are included in all types) + const selections = optimizedSpecs.map((spec) => { + const constraint = `_type=="${spec.typeName}"=>` + const selection = `{${spec.paths + .filter((cfg) => !['_id', '_type'].includes(cfg.path)) + .map((cfg, i) => `"${i}":${pathWithMapper(cfg)}`)}}` return `${constraint}${selection}` }) - - const selection = selections.length > 0 ? `...select(${selections.join(',\n')})` : '' + const selection = selections.length > 0 ? `...select(${selections.join(',')})` : '' // Default to `_id asc` (GROQ default) if no search sort is provided const sortDirection = searchOpts?.sort?.direction || ('asc' as SortDirection) const sortField = searchOpts?.sort?.field || '_id' const query = - `*[${filters.join(' && ')}]` + + `*[${filters.join('&&')}]` + `| order(${sortField} ${sortDirection})` + `[$__offset...$__limit]` + // the following would improve search quality for paths-with-numbers, but increases the size of the query by up to 50% // `${hasIndexedPaths ? `[${createConstraints(terms, exactSearchSpec).join(' && ')}]` : ''}` + - `{_type, _id, ${selection}}` + `{_type,_id,${selection}}` // Prepend optional GROQ comments to query const groqComments = (searchOpts?.comments || []).map((s) => `// ${s}`).join('\n') @@ -183,13 +257,13 @@ export function createSearchQuery( query: updatedQuery, params: { ...toGroqParams(terms), - __types: exactSearchSpecs.map((spec) => spec.typeName), + __types: optimizedSpecs.map((spec) => spec.typeName), __limit: limit, __offset: offset, ...(params || {}), }, options: {tag}, - searchSpec: exactSearchSpecs, + searchSpec: optimizedSpecs, terms, } } diff --git a/packages/@sanity/base/src/search/weighted/createWeightedSearch.test.ts b/packages/@sanity/base/src/search/weighted/createWeightedSearch.test.ts index 84593006076..b78469fcc1f 100644 --- a/packages/@sanity/base/src/search/weighted/createWeightedSearch.test.ts +++ b/packages/@sanity/base/src/search/weighted/createWeightedSearch.test.ts @@ -20,8 +20,8 @@ const mockSchema = Schema.compile({ const searchHits = defer(() => of([ - {_id: 'id0', _type: 'book', w0: 'id0', w1: 'book', w2: 'Harry Potter'}, - {_id: 'id1', _type: 'book', w0: 'id1', w1: 'book', w2: 'Harry'}, + {_id: 'id0', _type: 'book', 0: 'id0', 1: 'book', 2: 'Harry Potter'}, + {_id: 'id1', _type: 'book', 0: 'id1', 1: 'book', 2: 'Harry'}, ]) ) @@ -37,8 +37,8 @@ describe('createWeightedSearch', () => { // @todo: replace `toPromise` with `firstValueFrom` in rxjs 7+ const result = await search({query: 'harry', types: []} as SearchTerms).toPromise() - expect(result[0].score).toEqual(10) - expect(result[1].score).toEqual(2.5) + expect(result[0].score).toEqual(20) + expect(result[1].score).toEqual(4.772727272727273) }) it('should not order hits by score if skipSortByScore is enabled', async () => { @@ -47,7 +47,7 @@ describe('createWeightedSearch', () => { skipSortByScore: true, }).toPromise() - expect(result[0].score).toEqual(2.5) - expect(result[1].score).toEqual(10) + expect(result[0].score).toEqual(4.772727272727273) + expect(result[1].score).toEqual(20) }) }) diff --git a/packages/@sanity/base/src/search/weighted/index.ts b/packages/@sanity/base/src/search/weighted/index.ts index 7267e52aa52..d72fd65de2d 100644 --- a/packages/@sanity/base/src/search/weighted/index.ts +++ b/packages/@sanity/base/src/search/weighted/index.ts @@ -1 +1,3 @@ export {createWeightedSearch} from './createWeightedSearch' +export {createSearchQuery} from './createSearchQuery' +export * from './types' diff --git a/packages/@sanity/base/src/search/weighted/types.ts b/packages/@sanity/base/src/search/weighted/types.ts index b8c22447dab..8a58731b18d 100644 --- a/packages/@sanity/base/src/search/weighted/types.ts +++ b/packages/@sanity/base/src/search/weighted/types.ts @@ -33,6 +33,7 @@ export interface SearchPath { export interface SearchSpec { typeName: string paths: SearchPath[] + skippedPaths: SearchPath[] } /** @@ -41,13 +42,14 @@ export interface SearchSpec { export interface SearchHit { _type: string _id: string - [key: string]: unknown + [key: string]: string } /** * @internal */ export interface SearchStory { + indices?: number[] path: string score: number why: string diff --git a/packages/@sanity/default-layout/src/navbar/search/components/searchResultItem/DebugOverlay.tsx b/packages/@sanity/default-layout/src/navbar/search/components/searchResultItem/DebugOverlay.tsx index 9b28ded3bc2..5ca97bcb8de 100644 --- a/packages/@sanity/default-layout/src/navbar/search/components/searchResultItem/DebugOverlay.tsx +++ b/packages/@sanity/default-layout/src/navbar/search/components/searchResultItem/DebugOverlay.tsx @@ -39,6 +39,9 @@ export function DebugOverlay({data}: DebugScoreProps) { {story.path} + {!!story.indices?.length && ( + {JSON.stringify(story.indices)} + )} {story.why} ))} diff --git a/packages/@sanity/schema/package.json b/packages/@sanity/schema/package.json index cbd192d2c6a..2ee2314dfe7 100644 --- a/packages/@sanity/schema/package.json +++ b/packages/@sanity/schema/package.json @@ -31,6 +31,7 @@ "license": "MIT", "dependencies": { "@sanity/generate-help-url": "^3.0.0", + "@sanity/types": "2.35.0", "arrify": "^1.0.1", "humanize-list": "^1.0.1", "leven": "^3.1.0", diff --git a/packages/@sanity/schema/src/legacy/searchConfig/normalize.test.ts b/packages/@sanity/schema/src/legacy/searchConfig/normalize.test.ts index fa07b0faecb..40d569a6e3c 100644 --- a/packages/@sanity/schema/src/legacy/searchConfig/normalize.test.ts +++ b/packages/@sanity/schema/src/legacy/searchConfig/normalize.test.ts @@ -1,9 +1,9 @@ -import {normalizeSearchConfigs} from './normalize' +import {normalizeUserSearchConfigs} from './normalize' describe('searchConfig.normalize', () => { describe('normalizeSearchConfigs', () => { it('should keep numbers as numbers in path segments', () => { - const normalized = normalizeSearchConfigs([ + const normalized = normalizeUserSearchConfigs([ {weight: 10, path: ['retain', 0, 'numbers']}, {weight: 1, path: 'with.0.number'}, {path: 'missing.weight'}, @@ -11,10 +11,10 @@ describe('searchConfig.normalize', () => { ]) expect(normalized).toEqual([ - {weight: 10, path: ['retain', 0, 'numbers'], mapWith: undefined}, - {weight: 1, path: ['with', 0, 'number'], mapWith: undefined}, - {weight: 1, path: ['missing', 'weight'], mapWith: undefined}, - {weight: 2, path: ['map', 'with'], mapWith: 'datetime'}, + {weight: 10, path: ['retain', 0, 'numbers'], mapWith: undefined, userProvided: true}, + {weight: 1, path: ['with', 0, 'number'], mapWith: undefined, userProvided: true}, + {weight: 1, path: ['missing', 'weight'], mapWith: undefined, userProvided: true}, + {weight: 2, path: ['map', 'with'], mapWith: 'datetime', userProvided: true}, ]) }) }) diff --git a/packages/@sanity/schema/src/legacy/searchConfig/normalize.ts b/packages/@sanity/schema/src/legacy/searchConfig/normalize.ts index 9f09b9cf79b..178b6c68200 100644 --- a/packages/@sanity/schema/src/legacy/searchConfig/normalize.ts +++ b/packages/@sanity/schema/src/legacy/searchConfig/normalize.ts @@ -1,6 +1,6 @@ import {isPlainObject, toPath} from 'lodash' -export function normalizeSearchConfigs(configs) { +export function normalizeUserSearchConfigs(configs) { if (!Array.isArray(configs)) { throw new Error( 'The search config of a document type must be an array of search config objects' @@ -17,6 +17,7 @@ export function normalizeSearchConfigs(configs) { weight: 'weight' in conf ? conf.weight : 1, path: toPath(conf.path).map(stringsToNumbers), mapWith: typeof conf.mapWith === 'string' ? conf.mapWith : undefined, + userProvided: true, } }) } diff --git a/packages/@sanity/schema/src/legacy/searchConfig/resolve.test.ts b/packages/@sanity/schema/src/legacy/searchConfig/resolve.test.ts index 7454504c3b5..88fced9bdd2 100644 --- a/packages/@sanity/schema/src/legacy/searchConfig/resolve.test.ts +++ b/packages/@sanity/schema/src/legacy/searchConfig/resolve.test.ts @@ -1,6 +1,140 @@ -import {deriveFromPreview} from './resolve' +import Schema from '../Schema' +import { + deriveFromPreview, + getCachedStringFieldPaths, + pathCountSymbol, + stringFieldsSymbol, +} from './resolve' describe('searchConfig.resolve', () => { + describe('getCachedStringFieldPaths', () => { + const mockSchema = Schema.compile({ + name: 'default', + types: [ + { + name: 'objA', + title: 'Circular Document A', + type: 'document', + fields: [ + {name: 'description', type: 'text'}, + { + name: 'nested', + type: 'object', + fields: [ + {name: 'objA', type: 'objA'}, + {name: 'nestedTitle', type: 'string'}, + ], + }, + {name: 'authors', type: 'array', of: [{type: 'string'}]}, + {name: 'objB', type: 'objB'}, + {name: 'objATitle', type: 'string'}, + {name: 'title', type: 'string'}, + ], + }, + { + name: 'objB', + title: 'Circular Document B', + type: 'document', + fields: [ + { + name: 'nested', + type: 'object', + fields: [ + {name: 'objB', type: 'objB'}, + {name: 'nestedTitle', type: 'string'}, + ], + }, + {name: 'objA', type: 'objA'}, + {name: 'authors', type: 'array', of: [{type: 'string'}]}, + {name: 'title', type: 'string'}, + {name: 'description', type: 'text'}, + {name: 'objBTitle', type: 'string'}, + ], + }, + ], + }) + + const objA = mockSchema.get('objA') + // Clear cached values + beforeEach(() => { + delete objA[stringFieldsSymbol] + delete objA[pathCountSymbol] + }) + + it('should always include _id and _type fields', () => { + expect(getCachedStringFieldPaths(objA, 0, 10)).toEqual( + expect.arrayContaining([ + {path: ['_id'], weight: 1}, + {path: ['_type'], weight: 1}, + ]) + ) + }) + + it('should sort all other fields ahead of objects and arrays', () => { + expect(getCachedStringFieldPaths(objA, 2, 100)).toEqual([ + {path: ['_id'], weight: 1}, + {path: ['_type'], weight: 1}, + {path: ['title'], weight: 10}, + {path: ['description'], weight: 1}, + {path: ['objATitle'], weight: 1}, + // + {path: ['authors', []], weight: 1}, + {path: ['nested', 'nestedTitle'], weight: 1}, + {path: ['objB', 'description'], weight: 1}, + {path: ['objB', 'objBTitle'], weight: 1}, + {path: ['objB', 'title'], weight: 1}, + ]) + }) + + it('should limit on depth (1 level)', () => { + expect(getCachedStringFieldPaths(objA, 1, 250)).toEqual([ + {path: ['_id'], weight: 1}, + {path: ['_type'], weight: 1}, + {path: ['title'], weight: 10}, + {path: ['description'], weight: 1}, + {path: ['objATitle'], weight: 1}, + ]) + }) + + it('should limit on depth (10 levels)', () => { + const paths = getCachedStringFieldPaths(objA, 10, 5000) + expect(paths).toEqual( + expect.arrayContaining([ + // prettier-ignore + { path: ['objB', 'objA', 'objB', 'objA', 'objB', 'objA', 'objB', 'objA', 'objB', 'title'], weight: 1 }, + ]) + ) + expect(paths).toEqual( + expect.arrayContaining([ + // prettier-ignore + { path: ['nested', 'objA', 'nested', 'objA', 'nested', 'objA', 'nested', 'objA', 'objB', 'title'], weight: 1 }, + ]) + ) + }) + + it('should include all root-level non-object/array fields even when dealing with recursive structures', () => { + expect(getCachedStringFieldPaths(objA, 500, 10)).toEqual( + expect.arrayContaining([ + {path: ['_id'], weight: 1}, + {path: ['_type'], weight: 1}, + {path: ['description'], weight: 1}, + {path: ['objATitle'], weight: 1}, + {path: ['title'], weight: 10}, + ]) + ) + }) + + it('should limit on the total number of search paths', () => { + expect(getCachedStringFieldPaths(objA, 100, 250)).toHaveLength(250) + }) + + it('should cache field paths by type', () => { + getCachedStringFieldPaths(objA, 100, 500) + const paths = getCachedStringFieldPaths(objA, 1, 1) + expect(paths).toHaveLength(500) + }) + }) + describe('deriveFromPreview', () => { it('should split selected fields, and add default weights, keeping numbers as numbers', () => { const weightedPaths = deriveFromPreview({ @@ -9,6 +143,7 @@ describe('searchConfig.resolve', () => { title: 'cover.0.card.0.title', subtitle: 'singleField', description: 'nested.field', + // @ts-expect-error test-specific: ignored isn't a valid key ignored: 'anyField', }, }, diff --git a/packages/@sanity/schema/src/legacy/searchConfig/resolve.ts b/packages/@sanity/schema/src/legacy/searchConfig/resolve.ts index 0fb89f29467..d8ad56b7dc1 100644 --- a/packages/@sanity/schema/src/legacy/searchConfig/resolve.ts +++ b/packages/@sanity/schema/src/legacy/searchConfig/resolve.ts @@ -1,70 +1,135 @@ import {uniqBy} from 'lodash' +import { + ArraySchemaType, + isArraySchemaType, + isObjectSchemaType, + isBlockSchemaType, + isReferenceSchemaType, + ObjectSchemaType, + SchemaType, +} from '@sanity/types' import {stringsToNumbers} from './normalize' -const stringFieldsSymbol = Symbol('__cachedStringFields') +interface SearchPath { + path: (string | number | [])[] + weight: number + mapWith?: string +} -const isReference = (type) => type.type && type.type.name === 'reference' +export const stringFieldsSymbol = Symbol('__cachedStringFields') +export const pathCountSymbol = Symbol('__cachedPathCount') -const portableTextFields = ['style', 'list'] -const isPortableTextBlock = (type) => - type.name === 'block' || (type.type && isPortableTextBlock(type.type)) -const isPortableTextArray = (type) => - type.jsonType === 'array' && Array.isArray(type.of) && type.of.some(isPortableTextBlock) +// Max number of levels to traverse per root-level object +// eslint-disable-next-line no-process-env +const SEARCH_DEPTH_LIMIT = Number(process.env.SANITY_STUDIO_UNSTABLE_SEARCH_DEPTH_LIMIT) || 15 + +// Max number of search paths to extract per root-level object +// eslint-disable-next-line no-process-env +const SEARCH_PATH_LIMIT = Number(process.env.SANITY_STUDIO_UNSTABLE_SEARCH_PATH_LIMIT) || 500 + +const BASE_WEIGHTS = [ + {weight: 1, path: ['_id']}, + {weight: 1, path: ['_type']}, +] -function reduceType(type, reducer, acc, path = [], maxDepth) { - if (maxDepth < 0) { +const PREVIEW_FIELD_WEIGHT_MAP = { + title: 10, + subtitle: 5, + description: 1.5, +} + +const PORTABLE_TEXT_FIELDS = ['style', 'list'] + +const isPortableTextArray = (type) => + isArraySchemaType(type) && Array.isArray(type.of) && type.of.some(isBlockSchemaType) + +type SchemaTypeReducer = ( + acc: SearchPath[], + type: SchemaType, + path: SearchPath['path'] +) => SearchPath[] + +// eslint-disable-next-line max-params +function reduceType( + rootType: ObjectSchemaType, + type: SchemaType, + reducer: SchemaTypeReducer, + acc: SearchPath[], + path = [], + maxDepth: number +) { + if (maxDepth < 0 || rootType[pathCountSymbol] < 0) { return acc } const accumulator = reducer(acc, type, path) - if (type.jsonType === 'array' && Array.isArray(type.of)) { - return reduceArray(type, reducer, accumulator, path, maxDepth) + if (isArraySchemaType(type) && Array.isArray(type.of)) { + return reduceArray(rootType, type, reducer, accumulator, path, maxDepth) } - if (type.jsonType === 'object' && Array.isArray(type.fields) && !isReference(type)) { - return reduceObject(type, reducer, accumulator, path, maxDepth) + if (isObjectSchemaType(type) && Array.isArray(type.fields) && !isReferenceSchemaType(type)) { + return reduceObject(rootType, type, reducer, accumulator, path, maxDepth) } + // Store and mutate count on the root type to handle circular recursive structures + rootType[pathCountSymbol] -= 1 return accumulator } -function reduceArray(arrayType, reducer, accumulator, path, maxDepth) { +// eslint-disable-next-line max-params +function reduceArray( + rootType: ObjectSchemaType, + arrayType: ArraySchemaType, + reducer: SchemaTypeReducer, + accumulator: SearchPath[], + path: SearchPath['path'], + maxDepth: number +) { return arrayType.of.reduce( - (acc, ofType) => reduceType(ofType, reducer, acc, path, maxDepth - 1), + (acc, ofType) => reduceType(rootType, ofType, reducer, acc, path, maxDepth - 1), accumulator ) } -function reduceObject(objectType, reducer, accumulator, path, maxDepth) { - const isPtBlock = isPortableTextBlock(objectType) - return objectType.fields.reduce((acc, field) => { - // Don't include styles and list types as searchable paths for portable text blocks - if (isPtBlock && portableTextFields.includes(field.name)) { - return acc - } - - const segment = [field.name].concat(field.type.jsonType === 'array' ? [[]] : []) - return reduceType(field.type, reducer, acc, path.concat(segment), maxDepth - 1) - }, accumulator) -} - -const BASE_WEIGHTS = [ - {weight: 1, path: ['_id']}, - {weight: 1, path: ['_type']}, -] - -const PREVIEW_FIELD_WEIGHT_MAP = { - title: 10, - subtitle: 5, - description: 1.5, +// eslint-disable-next-line max-params +function reduceObject( + rootType: ObjectSchemaType, + objectType: ObjectSchemaType, + reducer: SchemaTypeReducer, + accumulator: SearchPath[], + path: SearchPath['path'], + maxDepth: number +) { + return Array.from(objectType.fields) + .sort((a, b) => { + // Object fields with these types will be pushed to the end + const sortTypes = ['array', 'object'] + + const aIsObjectOrArray = sortTypes.includes(a.type.jsonType) + const bIsObjectOrArray = sortTypes.includes(b.type.jsonType) + + // Sort by name when either both (or neither) comparators are objects and/or arrays + if (aIsObjectOrArray) { + return bIsObjectOrArray ? a.name.localeCompare(b.name) : 1 + } + return bIsObjectOrArray ? -1 : a.name.localeCompare(b.name) + }) + .reduce((acc, field) => { + // Don't include styles and list types as searchable paths for portable text blocks + if (isBlockSchemaType(objectType) && PORTABLE_TEXT_FIELDS.includes(field.name)) { + return acc + } + const segment = ([field.name] as SearchPath['path']).concat( + isArraySchemaType(field.type) ? [[]] : [] + ) + return reduceType(rootType, field.type, reducer, acc, path.concat(segment), maxDepth - 1) + }, accumulator) } /** * @internal */ -export function deriveFromPreview(type: { - preview: {select: Record} -}): {weight?: number; path: (string | number)[]}[] { +export function deriveFromPreview(type: ObjectSchemaType): SearchPath[] { const select = type?.preview?.select if (!select) { @@ -79,26 +144,23 @@ export function deriveFromPreview(type: { })) } -function getCachedStringFieldPaths(type, maxDepth) { +export function getCachedStringFieldPaths( + type: ObjectSchemaType, + maxDepth: number, + maxSearchPaths: number +): SearchPath[] { + type[pathCountSymbol] = maxSearchPaths + if (!type[stringFieldsSymbol]) { type[stringFieldsSymbol] = uniqBy( - [ - ...BASE_WEIGHTS, - ...deriveFromPreview(type), - ...getStringFieldPaths(type, maxDepth).map((path) => ({weight: 1, path})), - ...getPortableTextFieldPaths(type, maxDepth).map((path) => ({ - weight: 1, - path, - mapWith: 'pt::text', - })), - ], + [...BASE_WEIGHTS, ...deriveFromPreview(type), ...getFieldSearchPaths(type, maxDepth)], (spec) => spec.path.join('.') - ) + ).slice(0, maxSearchPaths) } return type[stringFieldsSymbol] } -function getCachedBaseFieldPaths(type, maxDepth) { +function getCachedBaseFieldPaths(type: ObjectSchemaType) { if (!type[stringFieldsSymbol]) { type[stringFieldsSymbol] = uniqBy([...BASE_WEIGHTS, ...deriveFromPreview(type)], (spec) => spec.path.join('.') @@ -107,24 +169,24 @@ function getCachedBaseFieldPaths(type, maxDepth) { return type[stringFieldsSymbol] } -function getStringFieldPaths(type, maxDepth) { - const reducer = (accumulator, childType, path) => - childType.jsonType === 'string' ? [...accumulator, path] : accumulator - - return reduceType(type, reducer, [], [], maxDepth) -} - -function getPortableTextFieldPaths(type, maxDepth) { - const reducer = (accumulator, childType, path) => - isPortableTextArray(childType) ? [...accumulator, path] : accumulator +function getFieldSearchPaths(type: ObjectSchemaType, maxDepth: number) { + const reducer: SchemaTypeReducer = (acc, childType, path) => { + if (childType.jsonType === 'string') { + return [...acc, {path, weight: 1}] + } + if (isPortableTextArray(childType)) { + return [...acc, {mapWith: 'pt::text', path, weight: 1}] + } + return acc + } - return reduceType(type, reducer, [], [], maxDepth) + return reduceType(type, type, reducer, [], [], maxDepth) } -export function resolveSearchConfigForBaseFieldPaths(type) { - return getCachedBaseFieldPaths(type, 4) +export function resolveSearchConfigForBaseFieldPaths(type: ObjectSchemaType): SearchPath[] { + return getCachedBaseFieldPaths(type) } -export default function resolveSearchConfig(type) { - return getCachedStringFieldPaths(type, 4) +export default function resolveSearchConfig(type: ObjectSchemaType): SearchPath[] { + return getCachedStringFieldPaths(type, SEARCH_DEPTH_LIMIT, SEARCH_PATH_LIMIT) } diff --git a/packages/@sanity/schema/src/legacy/types/object.ts b/packages/@sanity/schema/src/legacy/types/object.ts index f57a430f360..c3d995ab9b1 100644 --- a/packages/@sanity/schema/src/legacy/types/object.ts +++ b/packages/@sanity/schema/src/legacy/types/object.ts @@ -1,7 +1,7 @@ import {castArray, flatMap, keyBy, pick, startCase} from 'lodash' import createPreviewGetter from '../preview/createPreviewGetter' import guessOrderingConfig from '../ordering/guessOrderingConfig' -import {normalizeSearchConfigs} from '../searchConfig/normalize' +import {normalizeUserSearchConfigs} from '../searchConfig/normalize' import resolveSearchConfig from '../searchConfig/resolve' import {lazyGetter} from './utils' @@ -65,12 +65,12 @@ export const ObjectType = { '__experimental_search', () => { const userProvidedSearchConfig = subTypeDef.__experimental_search - ? normalizeSearchConfigs(subTypeDef.__experimental_search) + ? normalizeUserSearchConfigs(subTypeDef.__experimental_search) : null if (userProvidedSearchConfig) { return userProvidedSearchConfig.map((entry) => - entry === 'defaults' ? normalizeSearchConfigs(subTypeDef) : entry + entry === 'defaults' ? normalizeUserSearchConfigs(subTypeDef) : entry ) } return resolveSearchConfig(parsed) diff --git a/packages/@sanity/types/src/schema/types.ts b/packages/@sanity/types/src/schema/types.ts index 2cac8598783..9104278f4d2 100644 --- a/packages/@sanity/types/src/schema/types.ts +++ b/packages/@sanity/types/src/schema/types.ts @@ -256,6 +256,7 @@ export interface ExperimentalSearchPath { path: (string | number)[] weight: number mapWith?: string + userProvided?: boolean } export interface ObjectSchemaTypeWithOptions extends ObjectSchemaType {