Skip to content

Commit

Permalink
#5241 - Improve assistant attribution functionality
Browse files Browse the repository at this point in the history
- Require less transfer thinking when referring to a chunk
- Handle cases where a model insists on referring to chunks as "document XXX"
  • Loading branch information
reckart committed Jan 21, 2025
1 parent 2c24d87 commit 03ff7c1
Show file tree
Hide file tree
Showing 2 changed files with 42 additions and 39 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -81,8 +81,8 @@ public List<MTextMessage> retrieve(ChatContext aAssistant, MTextMessage aMessage
var body = new StringBuilder();
for (var chunk : chunks) {
var reference = MReference.builder() //
//.withId(String.valueOf(references.size() + 1)) //
.withId(UUID.randomUUID().toString().substring(0,8)) //
// .withId(String.valueOf(references.size() + 1)) //
.withId(UUID.randomUUID().toString().substring(0, 8)) //
.withDocumentId(chunk.documentId()) //
.withDocumentName(chunk.documentName()) //
.withBegin(chunk.begin()) //
Expand All @@ -102,41 +102,32 @@ public List<MTextMessage> retrieve(ChatContext aAssistant, MTextMessage aMessage
.withRole(SYSTEM).internal() //
.withReferences(references.values());

// Works good with qwen72b but not with granite 8b
// msg.withMessage(join("\n", asList(
// "The document retriever found the following relevant information in the documents of this project.",
// "", //
// body.toString(), "",
// "It is critical to mention the source of each document text in the form `{{ref::ref-id}}`.")));

msg.withMessage(join("\n", asList(
"""
var instruction = """
Use the following documents from this project to respond.
It is absolutely critital to mention the `{{ref::ref-id}}` after each individual information from a document.
Here is an example of how to include the ref-id:
Here is an example:
Input:
{
"id": "{{ref::917}}"
"document": "The Eiffel Tower is located in Paris, France.",
"ref-id": "917"
}
{
"id": "{{ref::735}}"
"document": "It is one of the most famous landmarks in the world.",
"ref-id": "735"
}
{
"id": "{{ref::582}}"
"document": The Eiffel Tower was built from 1887 to 1889.",
"ref-id": "582"
}
Response:
The Eiffel Tower is a famous landmark located in Paris, France {{ref::917}} {{ref::735}}.
It was built from 1887 to 1889 {{ref::582}}.
Now, use the same pattern to process the following document:
""",
"", //
body.toString())));
""";
msg.withMessage(join("\n", asList(instruction, "", body.toString())));

return asList(msg.build());
}
Expand All @@ -145,8 +136,8 @@ private void renderChunkJson(StringBuilder body, Chunk chunk, MReference aRefere
{
try {
var data = new LinkedHashMap<String, String>();
data.put("id", "{{ref::" + aReference.id() + "}}");
data.put("document", chunk.text());
data.put("ref-id", aReference.id());
data.entrySet().removeIf(e -> isBlank(e.getValue()));
body.append(JSONUtil.toPrettyJsonString(data));
body.append("\n");
Expand Down
48 changes: 30 additions & 18 deletions inception/inception-assistant/src/main/ts/src/AssistantPanel.svelte
Original file line number Diff line number Diff line change
Expand Up @@ -348,25 +348,37 @@
var pureHtml = DOMPurify.sanitize(rawHtml, { RETURN_DOM: false });
var refNum = 0;
// Replace all `{{ref::X}}` with the respective reference link
pureHtml = pureHtml.replace(
/\s*{{ref::([\w-]+)}}(\.*)/g,
(match, refId, dots) => {
const reference = message.references.find(
(ref) => ref.id === refId,
);
if (reference) {
refNum++;
return `${dots}<span class="reference badge rounded-pill text-bg-secondary mx-1" data-msg="${message.id}" data-ref="${reference.id}" title="${escapeXML(reference.documentName)}">${refNum}</span>`;
}
function replaceReferences(text, pattern) {
return text.replace(
pattern,
(match, refId, dots) => {
const reference = message.references.find(
(ref) => ref.id === refId,
);
if (reference) {
refNum++;
return `${dots}<span class="reference badge rounded-pill text-bg-secondary mx-1" data-msg="${message.id}" data-ref="${reference.id}" title="${escapeXML(reference.documentName)}">${refNum}</span>`;
}
// If no matching reference is found, keep the original text
// console.trace(
// `Reference with id ${refId} not found in message ${message.id}`
// );
return match;
},
);
}
// If no matching reference is found, keep the original text
// console.trace(
// `Reference with id ${refId} not found in message ${message.id}`
// );
return match;
},
);
// Our canonical reference format
const refIdReplacementPattern = /\s*{{ref::([\w-]+)}}(\.*)/g
// Some models (deepseek-r1) can't be bothered to properly use our reference syntax
// and keep referring to documents using the "Document XXXXXXXX" syntax...
const docIdReplacementPattern = /\s*[Dd]ocument[\s,]+([0-9a-f]{8})(\.*)/g
// Replace all references with the respective reference link
pureHtml = replaceReferences(pureHtml, refIdReplacementPattern);
pureHtml = replaceReferences(pureHtml, docIdReplacementPattern);
return pureHtml;
}
Expand Down

0 comments on commit 03ff7c1

Please sign in to comment.