Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

#5241 - Improve assistant attribution functionality #5242

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -81,8 +81,8 @@ public List<MTextMessage> retrieve(ChatContext aAssistant, MTextMessage aMessage
var body = new StringBuilder();
for (var chunk : chunks) {
var reference = MReference.builder() //
//.withId(String.valueOf(references.size() + 1)) //
.withId(UUID.randomUUID().toString().substring(0,8)) //
// .withId(String.valueOf(references.size() + 1)) //
.withId(UUID.randomUUID().toString().substring(0, 8)) //
.withDocumentId(chunk.documentId()) //
.withDocumentName(chunk.documentName()) //
.withBegin(chunk.begin()) //
Expand All @@ -102,41 +102,32 @@ public List<MTextMessage> retrieve(ChatContext aAssistant, MTextMessage aMessage
.withRole(SYSTEM).internal() //
.withReferences(references.values());

// Works good with qwen72b but not with granite 8b
// msg.withMessage(join("\n", asList(
// "The document retriever found the following relevant information in the documents of this project.",
// "", //
// body.toString(), "",
// "It is critical to mention the source of each document text in the form `{{ref::ref-id}}`.")));

msg.withMessage(join("\n", asList(
"""
var instruction = """
Use the following documents from this project to respond.
It is absolutely critital to mention the `{{ref::ref-id}}` after each individual information from a document.
Here is an example of how to include the ref-id:
Here is an example:

Input:
{
"id": "{{ref::917}}"
"document": "The Eiffel Tower is located in Paris, France.",
"ref-id": "917"
}
{
"id": "{{ref::735}}"
"document": "It is one of the most famous landmarks in the world.",
"ref-id": "735"
}
{
"id": "{{ref::582}}"
"document": The Eiffel Tower was built from 1887 to 1889.",
"ref-id": "582"
}

Response:
The Eiffel Tower is a famous landmark located in Paris, France {{ref::917}} {{ref::735}}.
It was built from 1887 to 1889 {{ref::582}}.

Now, use the same pattern to process the following document:
""",
"", //
body.toString())));
""";
msg.withMessage(join("\n", asList(instruction, "", body.toString())));

return asList(msg.build());
}
Expand All @@ -145,8 +136,8 @@ private void renderChunkJson(StringBuilder body, Chunk chunk, MReference aRefere
{
try {
var data = new LinkedHashMap<String, String>();
data.put("id", "{{ref::" + aReference.id() + "}}");
data.put("document", chunk.text());
data.put("ref-id", aReference.id());
data.entrySet().removeIf(e -> isBlank(e.getValue()));
body.append(JSONUtil.toPrettyJsonString(data));
body.append("\n");
Expand Down
48 changes: 30 additions & 18 deletions inception/inception-assistant/src/main/ts/src/AssistantPanel.svelte
Original file line number Diff line number Diff line change
Expand Up @@ -348,25 +348,37 @@
var pureHtml = DOMPurify.sanitize(rawHtml, { RETURN_DOM: false });
var refNum = 0;

// Replace all `{{ref::X}}` with the respective reference link
pureHtml = pureHtml.replace(
/\s*{{ref::([\w-]+)}}(\.*)/g,
(match, refId, dots) => {
const reference = message.references.find(
(ref) => ref.id === refId,
);
if (reference) {
refNum++;
return `${dots}<span class="reference badge rounded-pill text-bg-secondary mx-1" data-msg="${message.id}" data-ref="${reference.id}" title="${escapeXML(reference.documentName)}">${refNum}</span>`;
}
function replaceReferences(text, pattern) {
return text.replace(
pattern,
(match, refId, dots) => {
const reference = message.references.find(
(ref) => ref.id === refId,
);
if (reference) {
refNum++;
return `${dots}<span class="reference badge rounded-pill text-bg-secondary mx-1" data-msg="${message.id}" data-ref="${reference.id}" title="${escapeXML(reference.documentName)}">${refNum}</span>`;
}

// If no matching reference is found, keep the original text
// console.trace(
// `Reference with id ${refId} not found in message ${message.id}`
// );
return match;
},
);
}

// If no matching reference is found, keep the original text
// console.trace(
// `Reference with id ${refId} not found in message ${message.id}`
// );
return match;
},
);
// Our canonical reference format
const refIdReplacementPattern = /\s*{{ref::([\w-]+)}}(\.*)/g

// Some models (deepseek-r1) can't be bothered to properly use our reference syntax
// and keep referring to documents using the "Document XXXXXXXX" syntax...
const docIdReplacementPattern = /\s*[Dd]ocument[\s,]+([0-9a-f]{8})(\.*)/g

// Replace all references with the respective reference link
pureHtml = replaceReferences(pureHtml, refIdReplacementPattern);
pureHtml = replaceReferences(pureHtml, docIdReplacementPattern);

return pureHtml;
}
Expand Down
Loading