Skip to content

Commit

Permalink
Separate stampy-search library
Browse files Browse the repository at this point in the history
  • Loading branch information
mruwnik committed Aug 22, 2023
1 parent 3898f56 commit 401aa88
Show file tree
Hide file tree
Showing 19 changed files with 13,308 additions and 223 deletions.
1 change: 1 addition & 0 deletions .github/workflows/deploy.yml
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ jobs:
| sed s/{CODA_TOKEN}/${{ secrets.CODA_TOKEN }}/ \
| sed s/{CODA_INCOMING_TOKEN}/${{ secrets.CODA_INCOMING_TOKEN }}/ \
| sed s/{CODA_WRITES_TOKEN}/${{ secrets.CODA_WRITES_TOKEN }}/ \
| sed s/{ALLOW_ORIGINS}// \
> wrangler.toml
npm ci
npm run deploy
Expand Down
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -9,3 +9,6 @@ node_modules

.DS_store
wrangler.toml

stampy-search/dist
stampy-search/example/stampySearch.min.js
8 changes: 4 additions & 4 deletions app/components/search.tsx
Original file line number Diff line number Diff line change
@@ -1,15 +1,15 @@
import {useState, useEffect, useRef, MutableRefObject, FocusEvent} from 'react'
import debounce from 'lodash/debounce'
import {AddQuestion} from '~/routes/questions/add'
import {Action, ActionType} from '~/routes/questions/actions'
import {MagnifyingGlass, Edit} from '~/components/icons-generated'
import {
setupSearch,
searchLive,
searchUnpublished,
Question as QuestionType,
SearchResult,
} from '~/hooks/search'
} from 'stampy-search'
import {AddQuestion} from '~/routes/questions/add'
import {Action, ActionType} from '~/routes/questions/actions'
import {MagnifyingGlass, Edit} from '~/components/icons-generated'
import AutoHeight from 'react-auto-height'
import Dialog from '~/components/dialog'

Expand Down
199 changes: 0 additions & 199 deletions app/hooks/search.tsx
Original file line number Diff line number Diff line change
@@ -1,199 +0,0 @@
export type Question = {
pageid: string
title: string
}
export type SearchResult = Question & {
score: number
model: string
url?: string
}
type Search = {
resolve: (value: null | SearchResult[] | PromiseLike<null | SearchResult[]>) => void
reject: (reason?: any) => void
query: string
} | null
type SearchConfig = {
numResults?: number
getAllQuestions?: () => Question[]
searchEndpoint?: string
}

type WorkerResultMessage = {
searchResults: SearchResult[]
query?: string
}
export type WorkerMessage =
| WorkerResultMessage
| {
searchResults: SearchResult[]
query?: string
}

/**
* Sort function for the highest score on top
*/
const byScore = (a: SearchResult, b: SearchResult) => b.score - a.score

/** Baseline full-text search matching the query with each question as strings, weighting down:
* short words,
* wh* questions,
* distance,
* partial (prefix) match without full match
* normalized to ignore a/an/the, punctuation, and case
*/
export const baselineSearch = async (
searchQueryRaw: string,
questions: Question[],
numResults = 5
): Promise<SearchResult[]> => {
if (!searchQueryRaw) {
return []
}

const searchQueryTokens = normalize(searchQueryRaw).split(' ')
const matchers = searchQueryTokens.map((token) => ({
weight: token.match(/^(?:\w|\w\w|wh.*|how)$/) ? 0.2 : token.length,
fullRe: new RegExp(`\\b${token}\\b`),
prefixRe: new RegExp(`\\b${token}`),
}))
const isDefinitionRe = /^what (?:is|are)/
const totalWeight = matchers.reduce((acc, {weight}) => acc + weight, 0.1) // extra total to avoid division by 0

const scoringFn = (questionNormalized: string) => {
let score = isDefinitionRe.exec(questionNormalized) ? 0.1 : 0 // small boost to "What is x?" questions if there are many search results
let prevPosition = -1
for (const {weight, fullRe, prefixRe} of matchers) {
const fullMatch = fullRe.exec(questionNormalized)
const prefixMatch = prefixRe.exec(questionNormalized)
const currPosition = fullMatch?.index ?? prefixMatch?.index ?? prevPosition
const distanceMultiplier =
questionNormalized.slice(prevPosition, currPosition).split(' ').length === 2 ? 1 : 0.9

if (fullMatch) {
score += weight * distanceMultiplier
} else {
if (prefixMatch) {
score += 0.9 * weight * distanceMultiplier
} else {
score -= 0.2 * weight
}
}
prevPosition = currPosition
}

return score / totalWeight
}

return questions
.map(({pageid, title}) => {
const normalized = normalize(title)
return {
pageid,
title,
normalized,
model: 'plaintext',
score: scoringFn(normalized),
}
})
.sort(byScore)
.slice(0, numResults)
.filter(({score}) => score > 0)
}

/**
* Ignore unimportant details for similarity comparison
*/
const normalize = (question: string) =>
question
.toLowerCase()
.replace(/[^\w ]|\b(?:an?|the?)\b/g, '')
.replace(/(\w{2})s\b/g, '$1') // cannot use lookbehind (?<=...) because not supported on Safari
.replace(/\s+|_|&\s*/g, ' ')
.trim()

let currentSearch: Search = null
let worker: Worker
const defaultSearchConfig = {
getAllQuestions: () => [] as Question[],
numResults: 5,
searchEndpoint: '/questions/search',
}
let searchConfig = defaultSearchConfig

const resolveSearch = ({searchResults, query}: WorkerResultMessage) => {
if (currentSearch) {
currentSearch.resolve(query === currentSearch.query ? searchResults : null)
currentSearch = null
}
}

const initialiseWorker = () => {
if (worker !== undefined) return

const workerInstance = new Worker('/tfWorker.js')
workerInstance.addEventListener('message', ({data}) => {
if (data.status == 'ready') {
worker = workerInstance
} else if (data.searchResults) {
resolveSearch(data)
}
})
}

export const searchLive = (query: string, resultsNum?: number): Promise<SearchResult[] | null> => {
// Cancel any previous searches
resolveSearch({searchResults: []})

const runSearch = () => {
const numResults = resultsNum || searchConfig.numResults
const wordCount = query.split(' ').length

if (wordCount > 2 && worker) {
worker.postMessage({query, numResults})
} else {
baselineSearch(query, searchConfig.getAllQuestions(), numResults).then((res) =>
resolveSearch({searchResults: res, query})
)
}
}

const waitTillSearchReady = () => {
if (query != currentSearch?.query) {
return // this search has been superceeded with a newer one, so just give up
} else if (worker || searchConfig.getAllQuestions().length > 0) {
runSearch()
} else {
setTimeout(waitTillSearchReady, 100)
}
}

return new Promise((resolve, reject) => {
currentSearch = {resolve, reject, query}
waitTillSearchReady()
})
}

export const searchUnpublished = async (
question: string,
resultsNum?: number
): Promise<SearchResult[]> => {
const numResults = resultsNum || searchConfig.numResults
const result = await fetch(
`${searchConfig.searchEndpoint}?question=${encodeURIComponent(
question
)}&numResults=${numResults}`
)

if (result.status == 200) {
return await result.json()
}
throw new Error(await result.text())
}

export const setupSearch = (config: SearchConfig) => {
searchConfig = {
...defaultSearchConfig,
...config,
}
initialiseWorker()
}
2 changes: 1 addition & 1 deletion app/routes/questions/add.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ export const action = async ({request}: ActionArgs) => {
title = title[0].toUpperCase() + title.substring(1)
title = title.trim()

const result = 123 //await addQuestion(title, relatedQuestions)
const result = await addQuestion(title, relatedQuestions)
console.log('Added question "' + title + '", response:', result)

return redirect(redirectTo)
Expand Down
Loading

0 comments on commit 401aa88

Please sign in to comment.