-
Notifications
You must be signed in to change notification settings - Fork 7
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Same citation numbers #109
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -2,9 +2,7 @@ import type { Citation } from "../types"; | |
import { Colours, A } from "./html"; | ||
|
||
|
||
// todo: memoize this if too slow. | ||
export const ProcessText: (text: string, base_count: number) => [string, Map<string, number>] = (text, base_count) => { | ||
|
||
export const formatCitations: (text: string) => string = (text) => { | ||
// ---------------------- normalize citation form ---------------------- | ||
// the general plan here is just to add parsing cases until we can respond | ||
// well to almost everything the LLM emits. We won't ever reach five nines, | ||
|
@@ -41,33 +39,29 @@ export const ProcessText: (text: string, base_count: number) => [string, Map<str | |
/\[\s*([a-z]+)\s*\]/g, | ||
(_match: string, x: string) => `[${x}]` | ||
) | ||
return response; | ||
} | ||
|
||
// -------------- map citations from strings into numbers -------------- | ||
|
||
export const findCitations: (text: string, citations: Citations[]) => Map<string, Citation> = (text, citations) => { | ||
// figure out what citations are in the response, and map them appropriately | ||
const cite_map = new Map<string, number>(); | ||
let cite_count = 0; | ||
const cite_map = new Map<string, Citation>(); | ||
|
||
// scan a regex for [x] over the response. If x isn't in the map, add it. | ||
// (note: we're actually doing this twice - once on parsing, once on render. | ||
// if that looks like a problem, we could swap from strings to custom ropes). | ||
const regex = /\[([a-z]+)\]/g; | ||
Aprillion marked this conversation as resolved.
Show resolved
Hide resolved
|
||
let match; | ||
let response_copy = "" | ||
while ((match = regex.exec(response)) !== null) { | ||
if (!cite_map.has(match[1]!)) { | ||
cite_map.set(match[1]!, base_count + cite_count++); | ||
while ((match = regex.exec(text)) !== null) { | ||
const letter = match[1]; | ||
const citation = citations[letter.charCodeAt(0) - 'a'.charCodeAt(0)] | ||
if (!cite_map.has(letter!)) { | ||
cite_map.set(letter!, citation); | ||
} | ||
// replace [x] with [i] | ||
response_copy += response.slice(response_copy.length, match.index) + `[${cite_map.get(match[1]!)! + 1}]`; | ||
} | ||
|
||
response = response_copy + response.slice(response_copy.length); | ||
|
||
return [response, cite_map] | ||
return cite_map | ||
} | ||
|
||
export const ShowCitation: React.FC<{citation: Citation, i: number}> = ({citation, i}) => { | ||
export const ShowCitation: React.FC<{citation: Citation}> = ({citation}) => { | ||
|
||
var c_str = citation.title; | ||
|
||
|
@@ -82,22 +76,41 @@ export const ShowCitation: React.FC<{citation: Citation, i: number}> = ({citatio | |
: `https://duckduckgo.com/?q=${encodeURIComponent(citation.title)}`; | ||
|
||
return ( | ||
<A className={Colours[i % Colours.length] + " border-2 flex items-center rounded my-2 text-sm no-underline w-fit"} | ||
<A className={Colours[(citation.index - 1) % Colours.length] + " border-2 flex items-center rounded my-2 text-sm no-underline w-fit"} | ||
href={url}> | ||
<span className="mx-1"> [{i + 1}] </span> | ||
<span className="mx-1"> [{citation.index}] </span> | ||
<p className="mx-1 my-0"> {c_str} </p> | ||
</A> | ||
); | ||
}; | ||
|
||
export const ShowInTextCitation: React.FC<{citation: Citation, i: number}> = ({citation, i}) => { | ||
const url = citation.url && citation.url !== "" | ||
? citation.url | ||
: `https://duckduckgo.com/?q=${encodeURIComponent(citation.title)}`; | ||
return ( | ||
<A className={Colours[i % Colours.length] + " border-2 rounded text-sm no-underline w-min px-0.5 pb-0.5 ml-1 mr-0.5"} | ||
href={url}> | ||
[{i + 1}] | ||
</A> | ||
); | ||
export const CitationRef: React.FC<{citation: Citation}> = ({citation}) => { | ||
const url = citation.url && citation.url !== "" | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I keep wanting to add a linter, but worry that it'll mess up other PRs... :/ |
||
? citation.url | ||
: `https://duckduckgo.com/?q=${encodeURIComponent(citation.title)}`; | ||
return ( | ||
<A className={Colours[(citation.index - 1) % Colours.length] + " border-2 rounded text-sm no-underline w-min px-0.5 pb-0.5 ml-1 mr-0.5"} | ||
href={url}> | ||
[{citation.index}] | ||
</A> | ||
); | ||
}; | ||
|
||
|
||
export const CitationsBlock: React.FC<{text: string, citations: Map<string, Citation>, textRenderer: (t: str) => any}> = ({text, citations, textRenderer}) => { | ||
Aprillion marked this conversation as resolved.
Show resolved
Hide resolved
|
||
const regex = /\[([a-z]+)\]/g; | ||
return ( | ||
<p> { | ||
text.split(regex).map((part, i) => { | ||
// When splitting, the even parts are basic text sections, while the odd ones are | ||
// citations | ||
if (i % 2 == 0) { | ||
return textRenderer(part) | ||
} else { | ||
return (<CitationRef citation={citations.get(part)} />) | ||
} | ||
}) | ||
} | ||
</p> | ||
) | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -9,6 +9,7 @@ import type { | |
Followup, | ||
SearchResult, | ||
} from "../types"; | ||
import { formatCitations, findCitations } from '../components/citations'; | ||
|
||
const MAX_FOLLOWUPS = 4; | ||
const DATA_HEADER = "data: " | ||
|
@@ -50,21 +51,22 @@ export async function* iterateData(res: Response) { | |
|
||
export const extractAnswer = async ( | ||
res: Response, | ||
baseReferencesIndex: number, | ||
setCurrent: (e: CurrentSearch) => void | ||
): Promise<SearchResult> => { | ||
var result: AssistantEntry = { | ||
role: "assistant", | ||
content: "", | ||
citations: [], | ||
base_count: baseReferencesIndex, | ||
citationsMap: Map<string, Citation>, | ||
}; | ||
var followups: Followup[] = []; | ||
for await (var data of iterateData(res)) { | ||
switch (data.state) { | ||
case "loading": | ||
// display loading phases, once citations are available toss them | ||
// into the current item. | ||
setCurrent({ phase: data.phase, ...result }); | ||
break; | ||
|
||
case "citations": | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This adds a separate step for parsing citations |
||
result = { | ||
...result, | ||
citations: data?.citations || result?.citations || [], | ||
|
@@ -74,11 +76,12 @@ export const extractAnswer = async ( | |
|
||
case "streaming": | ||
// incrementally build up the response | ||
const content = formatCitations((result?.content || "") + data.content); | ||
result = { | ||
content, | ||
role: "assistant", | ||
content: (result?.content || "") + data.content, | ||
citations: result?.citations || [], | ||
base_count: result?.base_count || baseReferencesIndex, | ||
citationsMap: findCitations(content, result?.citations || []), | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The LLM returns citations like "bla bla bla [a] ble ble [b][c]". These then need to be mapped to the appropriate numbers. This |
||
}; | ||
setCurrent({ phase: "streaming", ...result }); | ||
break; | ||
|
@@ -118,7 +121,6 @@ export const queryLLM = async ( | |
query: string, | ||
mode: string, | ||
history: HistoryEntry[], | ||
baseReferencesIndex: number, | ||
setCurrent: (e?: CurrentSearch) => void, | ||
sessionId: string | ||
): Promise<SearchResult> => { | ||
|
@@ -130,7 +132,7 @@ export const queryLLM = async ( | |
} | ||
|
||
try { | ||
return await extractAnswer(res, baseReferencesIndex, setCurrent); | ||
return await extractAnswer(res, setCurrent); | ||
} catch (e) { | ||
return { | ||
result: { role: "error", content: e ? e.toString() : "unknown error" }, | ||
|
@@ -191,7 +193,6 @@ export const runSearch = async ( | |
query: string, | ||
query_source: "search" | "followups", | ||
mode: string, | ||
baseReferencesIndex: number, | ||
entries: Entry[], | ||
setCurrent: (c: CurrentSearch) => void, | ||
sessionId: string | ||
|
@@ -208,7 +209,6 @@ export const runSearch = async ( | |
query, | ||
mode, | ||
history, | ||
baseReferencesIndex, | ||
setCurrent, | ||
sessionId | ||
); | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
this was causing errors when the server returned an error (because of the empty query) which was parsed as followups