Skip to content

Commit

Permalink
docs(core): update docs for applyWeights/createSearchQuery
Browse files Browse the repository at this point in the history
  • Loading branch information
robinpyon committed Jul 14, 2023
1 parent 0883b84 commit 4e15ca7
Show file tree
Hide file tree
Showing 3 changed files with 66 additions and 18 deletions.
60 changes: 54 additions & 6 deletions packages/@sanity/base/src/search/weighted/applyWeights.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,15 @@ import {SearchHit, WeightedHit, SearchSpec} from './types'

type SearchScore = [number, string]

// takes a set of terms and a value and returns a [score, story] pair where score is a value between 0, 1 and story is the explanation
/**
* Calculates a score (between 0 and 1) indicating general search relevance of an array of
* search tokens within a specific string.
*
* @internal
* @param searchTerms - All search terms
* @param value - The string to match against
* @returns A [score, story] pair containing the search score as well as a human readable explanation
*/
export const calculateScore = (searchTerms: string[], value: string): SearchScore => {
// Separate search terms by phrases (wrapped with quotes) and words.
const {phrases: uniqueSearchPhrases, words: uniqueSearchWords} = partitionAndSanitizeSearchTerms(
Expand All @@ -19,6 +27,16 @@ export const calculateScore = (searchTerms: string[], value: string): SearchScor
const stringify = (value: unknown): string =>
typeof value === 'string' ? value : JSON.stringify(value)

/**
* Applies path weights from a supplied SearchSpec to existing search hits to create _weighted_ hits
* augmented with search ranking and human readable explanations.
*
* @internal
* @param searchSpec - SearchSpec containing path weighting
* @param hits - SearchHit objects to augment
* @param terms - All search terms
* @returns WeightedHit array containing search scores and ranking explanations
*/
export function applyWeights(
searchSpec: SearchSpec[],
hits: SearchHit[],
Expand Down Expand Up @@ -46,7 +64,7 @@ export function applyWeights(
})

const totalScore = stories.reduce((acc, rank) => acc + rank.score, 0)
/**
/*
* Filter out hits with no score.
* (only if search terms are present, otherwise we always show results)
*
Expand All @@ -66,12 +84,17 @@ export function applyWeights(
}

/**
* Score on the total number of matching characters.
* Generate a sccore on the total number of matching characters.
* E.g. given the phrases ["the fox", "of london"] for the target value "the wily fox of london"
*
* - "the fox" isn't included in the target value (score: 0)
* - "of london" is included in the target value, and 9 out of 22 characters match (score: 9/22 = ~0.408)
* - non-exact matches have their score divided in half (final score: ~0.204)
*
* @internal
* @param uniqueSearchPhrases - All search phrases
* @param value - The string to match against
* @returns SearchScore containing the search score as well as a human readable explanation
*/
export function calculatePhraseScore(uniqueSearchPhrases: string[], value: string): SearchScore {
const sanitizedValue = value.toLowerCase().trim()
Expand All @@ -91,12 +114,17 @@ export function calculatePhraseScore(uniqueSearchPhrases: string[], value: strin
}

/**
* Score on the total number of matching characters.
* Generate a score on the total number of matching characters.
* E.g. given the terms ["bar", "fo"] for the target value "food bar".
*
* - "fo" is included in the target value, and 2 out of 7 non-whitespace characters match (score: 2/7)
* - "bar" is included in the target value, and 3 out of 7 non-whitespace characters match (score: 3/7)
* - all values are accumulated and have their score devidied by half (final score: ~0.357)
*
* @internal
* @param uniqueSearchTerms - A string array of search terms
* @param value - The string to match against
* @returns SearchScore containing the search score as well as a human readable explanation
*/
export function calculateCharacterScore(uniqueSearchTerms: string[], value: string): SearchScore {
const sanitizedValue = value.toLowerCase().trim()
Expand All @@ -117,11 +145,16 @@ export function calculateCharacterScore(uniqueSearchTerms: string[], value: stri
}

/**
* Score on the total number of matching _whole_ words.
* Generate a score on the total number of matching _whole_ words.
* E.g. given the words ["the", "fox", "of", "london"] for the target value "the wily fox of london"
*
* - 4 out of 5 words match (score: 4/5 = 0.8)
* - non-exact matches have their score divided in half (final score: 0.4)
*
* @internal
* @param uniqueSearchTerms - All search terms
* @param value - The string to match against
* @returns SearchScore containing the search score as well as a human readable explanation
*/
export function calculateMatchingWordScore(
uniqueSearchTerms: string[],
Expand All @@ -137,6 +170,13 @@ export function calculateMatchingWordScore(
: [fieldScore / 2, `[Word] ${matches.length}/${all.length} terms: [${matches.join(', ')}]`]
}

/**
* Partition search terms by phrases (wrapped with quotes) and words.
*
* @internal
* @param searchTerms - All search terms
* @returns Partitioned phrases and words
*/
export function partitionAndSanitizeSearchTerms(
searchTerms: string[]
): {
Expand All @@ -152,8 +192,16 @@ export function partitionAndSanitizeSearchTerms(
}
}

/**
* Returns matching array indices of `values` containing _any_ member of `uniqueSearchTerms`.
* When comparing for matches, members of `values` are stringified, trimmed and lowercased.
*
* @internal
* @param uniqueSearchTerms - All search terms
* @param values - Values to match against (members are stringified)
* @returns All matching indices in `values`
*/
export function findMatchingIndices(uniqueSearchTerms: string[], values: unknown[]): number[] {
// Separate search terms by phrases (wrapped with quotes) and words.
const {phrases: uniqueSearchPhrases, words: uniqueSearchWords} = partitionAndSanitizeSearchTerms(
uniqueSearchTerms
)
Expand Down
23 changes: 11 additions & 12 deletions packages/@sanity/base/src/search/weighted/createSearchQuery.ts
Original file line number Diff line number Diff line change
Expand Up @@ -44,17 +44,16 @@ const combinePaths = flow([flatten, union, compact])
* Create search specs from supplied searchable types.
* Search specs contain weighted paths which are used to construct GROQ queries for search.
*
* @param types - The searchable document types to create specs from.
* @internal
* @param types - Searchable document types to create specs from.
* @param optimizedIndexPaths - If true, will will convert all `__experimental_search` paths containing numbers into array syntax.
* E.g. ['cover', 0, 'cards', 0, 'title'] => "cover[].cards[].title"
*
* This optimization will yield more search results than may be intended, but offers better performance over arrays with indices.
* (which are currently unoptimizable by Content Lake)
* @param maxAttributes - The maximum number of _unique_ searchable attributes to include across all types.
* @param maxAttributes - Maximum number of _unique_ searchable attributes to include across all types.
* User-provided paths (e.g. with __experimental_search) do not count towards this limit.
* @returns An object containing all matching search specs and `hasIndexedPaths`, a boolean indicating whether any paths contain indices.
* E.g. `['cover', 0, 'cards', 0, 'title']`
* @internal
* @returns All matching search specs and `hasIndexedPaths`, a boolean indicating whether any paths contain indices.
*/
export function createSearchSpecs(
types: SearchableType[],
Expand Down Expand Up @@ -159,9 +158,9 @@ function createConstraints(terms: string[], specs: SearchSpec[]) {
*
* Phrases wrapped in quotes are assigned relevance scoring differently from regular words.
*
* @param query - A string to convert into tokens.
* @returns An array of string tokens.
* @internal
* @param query - A string to convert into individual tokens
* @returns All extracted tokens
*/
export function extractTermsFromQuery(query: string): string[] {
const quotedQueries = [] as string[]
Expand All @@ -176,7 +175,7 @@ export function extractTermsFromQuery(query: string): string[] {
// Lowercase and trim quoted queries
const quotedTerms = quotedQueries.map((str) => trim(toLower(str)))

/**
/*
* Convert (remaining) search query into an array of deduped, sanitized tokens.
* All white space and special characters are removed.
* e.g. "The saint of Saint-Germain-des-Prés" => ['the', 'saint', 'of', 'germain', 'des', 'pres']
Expand All @@ -189,10 +188,10 @@ export function extractTermsFromQuery(query: string): string[] {
/**
* Generate search query data based off provided search terms and options.
*
* @param searchTerms - An object containing a string query and any number of searchable document types.
* @param searchOpts - Optional search configuration.
* @returns A object containing a GROQ query, params and options to be used to fetch search results.
* @internal
* @param searchTerms - SearchTerms containing a string query and any number of searchable document types.
* @param searchOpts - Optional search configuration.
* @returns GROQ query, params and options to be used to fetch search results.
*/
export function createSearchQuery(
searchTerms: SearchTerms,
Expand All @@ -203,7 +202,7 @@ export function createSearchQuery(
// Extract search terms from string query, factoring in phrases wrapped in quotes
const terms = extractTermsFromQuery(searchTerms.query)

/**
/*
* Create an optimized search spec which removes array indices from __experimental_search paths.
* e.g. ["authors", 0, "title"] => "authors[].title"
*
Expand Down
1 change: 1 addition & 0 deletions packages/@sanity/schema/src/legacy/searchConfig/resolve.ts
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ export const pathCountSymbol = Symbol('__cachedPathCount')
const MAX_TRAVERSAL_DEPTH = Number(process.env.SANITY_STUDIO_UNSTABLE_SEARCH_DEPTH) || 15

// Max number of search paths to extract per root-level object
// eslint-disable-next-line no-process-env
const MAX_OBJECT_SEARCH_PATHS = Number(process.env.SANITY_STUDIO_UNSTABLE_SEARCH_PATH_LIMIT) || 500

const BASE_WEIGHTS = [
Expand Down

0 comments on commit 4e15ca7

Please sign in to comment.