Skip to content

Commit

Permalink
Fix: Token counting fix (#699)
Browse files Browse the repository at this point in the history
# PR Checklist
- [ ] Have you checked if it works normally in all models? *Ignore this
if it doesn't use models.*
- [ ] Have you checked if it works normally in all web, local, and node
hosted versions? If it doesn't, have you blocked it in those versions?
- [ ] Have you added type definitions?

# Description

Emergency fix of PR #680, #696 


Source of problem: Every time when a chat is sent with HypaV2 active, it
always appended HypaAllocatedToken amount of tokens to the
currentTokens.

This did not get subtracted at the end, even though this is nothing more
than a virtual limit like the output tokens.
  • Loading branch information
kwaroran authored Jan 1, 2025
2 parents 03acfdc + d204211 commit 0c5aab0
Showing 1 changed file with 17 additions and 15 deletions.
32 changes: 17 additions & 15 deletions src/ts/process/memory/hypav2.ts
Original file line number Diff line number Diff line change
Expand Up @@ -331,16 +331,15 @@ export async function hypaMemoryV2(

let allocatedTokens = db.hypaAllocatedTokens;
let chunkSize = db.hypaChunkSize;
// Since likely the first update break the token count, will continue with this for few updates.
currentTokens = await tokenizer.tokenizeChats(chats) + allocatedTokens;
currentTokens += allocatedTokens; // WARNING: VIRTUAL VALUE. This token is NOT real. This is a placeholder appended to calculate the maximum amount of HypaV2 memory retrieved data.
let mainPrompt = "";
const lastTwoChats = chats.slice(-2);
// Error handling for infinite summarization attempts
// Error handling for failed summarization
let summarizationFailures = 0;
const maxSummarizationFailures = 3;

// Find the index to start summarizing from
let idx = 2; // first two should not be considered
let idx = 2; // first two should not be considered([Start a new chat], Memory prompt)
if (data.mainChunks.length > 0) {
const lastMainChunk = data.mainChunks[data.mainChunks.length - 1];
const lastChatMemo = lastMainChunk.lastChatMemo;
Expand All @@ -359,7 +358,7 @@ export async function hypaMemoryV2(
const startIdx = idx;

console.log(
"Starting summarization iteration:",
"[HypaV2] Starting summarization iteration:",
"\nCurrent Tokens (before):", currentTokens,
"\nMax Context Tokens:", maxContextTokens,
"\nStartIdx:", startIdx,
Expand All @@ -375,7 +374,7 @@ export async function hypaMemoryV2(
const chatTokens = await tokenizer.tokenizeChat(chat);

console.log(
"Evaluating chat for summarization:",
"[HypaV2] Evaluating chat for summarization:",
"\nIndex:", idx,
"\nRole:", chat.role,
"\nContent:", chat.content,
Expand All @@ -399,7 +398,7 @@ export async function hypaMemoryV2(

const endIdx = idx - 1;
console.log(
"Summarization batch chosen with this:",
"[HypaV2] Summarization batch chosen with this:",
"\nStartIdx:", startIdx,
"\nEndIdx:", endIdx,
"\nNumber of chats in halfData:", halfData.length,
Expand Down Expand Up @@ -443,7 +442,7 @@ export async function hypaMemoryV2(
});

console.log(
"Summarization success:",
"[HypaV2] Summarization success:",
"\nSummary Data:", summaryData.data,
"\nSummary Token Count:", summaryDataToken
);
Expand Down Expand Up @@ -476,15 +475,14 @@ export async function hypaMemoryV2(
);

console.log(
"Chunks added:",
"[HypaV2] Chunks added:",
splitted,
"\nUpdated mainChunks count:", data.mainChunks.length,
"\nUpdated chunks count:", data.chunks.length
);

// Update the currentTokens immediately, removing summarized portion.
currentTokens -= halfDataTokens;
console.log("Current tokens after summarization deduction:", currentTokens);
console.log("[HypaV2] tokens after summarization deduction:", currentTokens);
}

// Construct the mainPrompt from mainChunks
Expand Down Expand Up @@ -552,6 +550,11 @@ export async function hypaMemoryV2(
}

const fullResult = `<Past Events Summary>${mainPrompt}</Past Events Summary>\n<Past Events Details>${chunkResultPrompts}</Past Events Details>`;
const fullResultTokens = await tokenizer.tokenizeChat({
role: "system",
content: fullResult,
});
currentTokens += fullResultTokens;

// Filter out summarized chats and prepend the memory prompt
const unsummarizedChats: OpenAIChat[] = [
Expand All @@ -563,18 +566,17 @@ export async function hypaMemoryV2(
...chats.slice(idx) // Use the idx to slice out the summarized chats
];

// Add the last two chats back if they are not already included
// Remove this later, as this is already done by the index
for (const chat of lastTwoChats) {
if (!unsummarizedChats.find((c) => c.memo === chat.memo)) {
unsummarizedChats.push(chat);
}
}

// Recalculate currentTokens based on the final chat list
currentTokens = await tokenizer.tokenizeChats(unsummarizedChats);
currentTokens -= allocatedTokens; // Virtually added memory tokens got removed. Bad way, but had no choice.

console.log(
"Model being used: ",
"[HypaV2] Model being used: ",
db.hypaModel,
db.supaModelType,
"\nCurrent session tokens: ",
Expand Down

0 comments on commit 0c5aab0

Please sign in to comment.