Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

#5241 - Improve assistant attribution functionality #5242

Merged
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
#5241 - Improve assistant attribution functionality
- Require less transfer thinking when referring to a chunk
- Handle cases where a model insists on referring to chunks as "document XXX"
reckart committed Jan 21, 2025
commit 03ff7c13643ffab766474a33f315b23a0cb8d029
Original file line number Diff line number Diff line change
@@ -81,8 +81,8 @@ public List<MTextMessage> retrieve(ChatContext aAssistant, MTextMessage aMessage
var body = new StringBuilder();
for (var chunk : chunks) {
var reference = MReference.builder() //
//.withId(String.valueOf(references.size() + 1)) //
.withId(UUID.randomUUID().toString().substring(0,8)) //
// .withId(String.valueOf(references.size() + 1)) //
.withId(UUID.randomUUID().toString().substring(0, 8)) //
.withDocumentId(chunk.documentId()) //
.withDocumentName(chunk.documentName()) //
.withBegin(chunk.begin()) //
@@ -102,41 +102,32 @@ public List<MTextMessage> retrieve(ChatContext aAssistant, MTextMessage aMessage
.withRole(SYSTEM).internal() //
.withReferences(references.values());

// Works good with qwen72b but not with granite 8b
// msg.withMessage(join("\n", asList(
// "The document retriever found the following relevant information in the documents of this project.",
// "", //
// body.toString(), "",
// "It is critical to mention the source of each document text in the form `{{ref::ref-id}}`.")));

msg.withMessage(join("\n", asList(
"""
var instruction = """
Use the following documents from this project to respond.
It is absolutely critital to mention the `{{ref::ref-id}}` after each individual information from a document.
Here is an example of how to include the ref-id:
Here is an example:

Input:
{
"id": "{{ref::917}}"
"document": "The Eiffel Tower is located in Paris, France.",
"ref-id": "917"
}
{
"id": "{{ref::735}}"
"document": "It is one of the most famous landmarks in the world.",
"ref-id": "735"
}
{
"id": "{{ref::582}}"
"document": The Eiffel Tower was built from 1887 to 1889.",
"ref-id": "582"
}

Response:
The Eiffel Tower is a famous landmark located in Paris, France {{ref::917}} {{ref::735}}.
It was built from 1887 to 1889 {{ref::582}}.

Now, use the same pattern to process the following document:
""",
"", //
body.toString())));
""";
msg.withMessage(join("\n", asList(instruction, "", body.toString())));

return asList(msg.build());
}
@@ -145,8 +136,8 @@ private void renderChunkJson(StringBuilder body, Chunk chunk, MReference aRefere
{
try {
var data = new LinkedHashMap<String, String>();
data.put("id", "{{ref::" + aReference.id() + "}}");
data.put("document", chunk.text());
data.put("ref-id", aReference.id());
data.entrySet().removeIf(e -> isBlank(e.getValue()));
body.append(JSONUtil.toPrettyJsonString(data));
body.append("\n");
48 changes: 30 additions & 18 deletions inception/inception-assistant/src/main/ts/src/AssistantPanel.svelte
Original file line number Diff line number Diff line change
@@ -348,25 +348,37 @@
var pureHtml = DOMPurify.sanitize(rawHtml, { RETURN_DOM: false });
var refNum = 0;

// Replace all `{{ref::X}}` with the respective reference link
pureHtml = pureHtml.replace(
/\s*{{ref::([\w-]+)}}(\.*)/g,
(match, refId, dots) => {
const reference = message.references.find(
(ref) => ref.id === refId,
);
if (reference) {
refNum++;
return `${dots}<span class="reference badge rounded-pill text-bg-secondary mx-1" data-msg="${message.id}" data-ref="${reference.id}" title="${escapeXML(reference.documentName)}">${refNum}</span>`;
}
function replaceReferences(text, pattern) {
return text.replace(
pattern,
(match, refId, dots) => {
const reference = message.references.find(
(ref) => ref.id === refId,
);
if (reference) {
refNum++;
return `${dots}<span class="reference badge rounded-pill text-bg-secondary mx-1" data-msg="${message.id}" data-ref="${reference.id}" title="${escapeXML(reference.documentName)}">${refNum}</span>`;
}

// If no matching reference is found, keep the original text
// console.trace(
// `Reference with id ${refId} not found in message ${message.id}`
// );
return match;
},
);
}

// If no matching reference is found, keep the original text
// console.trace(
// `Reference with id ${refId} not found in message ${message.id}`
// );
return match;
},
);
// Our canonical reference format
const refIdReplacementPattern = /\s*{{ref::([\w-]+)}}(\.*)/g

// Some models (deepseek-r1) can't be bothered to properly use our reference syntax
// and keep referring to documents using the "Document XXXXXXXX" syntax...
const docIdReplacementPattern = /\s*[Dd]ocument[\s,]+([0-9a-f]{8})(\.*)/g

// Replace all references with the respective reference link
pureHtml = replaceReferences(pureHtml, refIdReplacementPattern);
pureHtml = replaceReferences(pureHtml, docIdReplacementPattern);

return pureHtml;
}