fix: Fix message handling and summarization in HypaV2 (#711)
# PR Checklist - [x] Have you checked if it works normally in all models? *Ignore this if it doesn't use models.* - [x] Have you checked if it works normally in all web, local, and node hosted versions? If it doesn't, have you blocked it in those versions? - [ ] Have you added type definitions? # Description This PR addresses several issues in the HypaV2 memory system: 1. Improved Summarization Error Handling - Before: When unable to summarize messages while exceeding context size, HypaV2 would continue and make API requests with unintentionally truncated context - After: HypaV2 now provides clear error messages asking users to adjust their settings when: - Unable to summarize last 4 messages - A single message exceeds chunk size 2. Message Order Preservation - Before: lastTwoChats logic was causing the first message (index 1) and user input (index 2) to appear in the wrong order - After: Fixed by adjusting the initial index to preserve the original message sequence 3. Message Filtering - Added logic to skip unnecessary messages during summarization: - Excludes [Start a new chat] message at index 0 - Skips empty messages <br> Requesting review from @LightningHyperBlaze45654
This commit is contained in:
@@ -346,13 +346,17 @@ export async function hypaMemoryV2(
|
|||||||
memory?: SerializableHypaV2Data;
|
memory?: SerializableHypaV2Data;
|
||||||
}> {
|
}> {
|
||||||
const db = getDatabase();
|
const db = getDatabase();
|
||||||
currentTokens -= db.maxResponse
|
|
||||||
let data: HypaV2Data = {
|
let data: HypaV2Data = {
|
||||||
lastMainChunkID: 0,
|
lastMainChunkID: 0,
|
||||||
chunks: [],
|
chunks: [],
|
||||||
mainChunks: [],
|
mainChunks: [],
|
||||||
};
|
};
|
||||||
|
|
||||||
|
// Subtract maxResponse from currentTokens to fix the token calculation issue.
|
||||||
|
// This is not a fundamental solution but rather a temporary fix.
|
||||||
|
// It is designed to minimize impact on other code.
|
||||||
|
currentTokens -= db.maxResponse;
|
||||||
|
|
||||||
if (room.hypaV2Data) {
|
if (room.hypaV2Data) {
|
||||||
if (isOldHypaV2Data(room.hypaV2Data)) {
|
if (isOldHypaV2Data(room.hypaV2Data)) {
|
||||||
console.log("Old HypaV2 data detected. Converting to new format...");
|
console.log("Old HypaV2 data detected. Converting to new format...");
|
||||||
@@ -375,7 +379,7 @@ export async function hypaMemoryV2(
|
|||||||
const maxSummarizationFailures = 3;
|
const maxSummarizationFailures = 3;
|
||||||
|
|
||||||
// Find the index to start summarizing from
|
// Find the index to start summarizing from
|
||||||
let idx = 2; // first two should not be considered([Start a new chat], Memory prompt)
|
let idx = 0;
|
||||||
if (data.mainChunks.length > 0) {
|
if (data.mainChunks.length > 0) {
|
||||||
const lastMainChunk = data.mainChunks[data.mainChunks.length - 1];
|
const lastMainChunk = data.mainChunks[data.mainChunks.length - 1];
|
||||||
const lastChatMemo = lastMainChunk.lastChatMemo;
|
const lastChatMemo = lastMainChunk.lastChatMemo;
|
||||||
@@ -383,9 +387,9 @@ export async function hypaMemoryV2(
|
|||||||
if (lastChatIndex !== -1) {
|
if (lastChatIndex !== -1) {
|
||||||
idx = lastChatIndex + 1;
|
idx = lastChatIndex + 1;
|
||||||
|
|
||||||
// Subtract tokens of removed chats
|
// Subtract tokens of summarized chats
|
||||||
const removedChats = chats.slice(0, lastChatIndex + 1);
|
const summarizedChats = chats.slice(0, lastChatIndex + 1);
|
||||||
for (const chat of removedChats) {
|
for (const chat of summarizedChats) {
|
||||||
currentTokens -= await tokenizer.tokenizeChat(chat);
|
currentTokens -= await tokenizer.tokenizeChat(chat);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -425,6 +429,20 @@ export async function hypaMemoryV2(
|
|||||||
"\nWould adding this exceed chunkSize?", (halfDataTokens + chatTokens > chunkSize)
|
"\nWould adding this exceed chunkSize?", (halfDataTokens + chatTokens > chunkSize)
|
||||||
);
|
);
|
||||||
|
|
||||||
|
// Skip index 0 ([Start a new chat])
|
||||||
|
if (idx === 0) {
|
||||||
|
console.log("[HypaV2] Skipping index 0");
|
||||||
|
idx++;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Skip if the content of this chat is empty
|
||||||
|
if (!chat.content.trim()) {
|
||||||
|
console.log(`[HypaV2] Skipping empty content of index ${idx}`);
|
||||||
|
idx++;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
// Check if adding this chat would exceed our chunkSize limit
|
// Check if adding this chat would exceed our chunkSize limit
|
||||||
if (halfDataTokens + chatTokens > chunkSize) {
|
if (halfDataTokens + chatTokens > chunkSize) {
|
||||||
// Can't add this chat without going over chunkSize
|
// Can't add this chat without going over chunkSize
|
||||||
@@ -450,8 +468,22 @@ export async function hypaMemoryV2(
|
|||||||
|
|
||||||
// If no chats were added, break to avoid infinite loop
|
// If no chats were added, break to avoid infinite loop
|
||||||
if (halfData.length === 0) {
|
if (halfData.length === 0) {
|
||||||
console.log("HOW DID WE GET HERE???");
|
// Case 1: Can't summarize the last 4 chats
|
||||||
break;
|
if (idx >= chats.length - 4) {
|
||||||
|
return {
|
||||||
|
currentTokens: currentTokens,
|
||||||
|
chats: chats,
|
||||||
|
error: `[HypaV2] Input tokens (${currentTokens}) exceeds max context size (${maxContextTokens}), but can't summarize last 4 messages. Please increase max context size to at least ${currentTokens}.`
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
// Case 2: Chat too large for chunk size
|
||||||
|
const chatTokens = await tokenizer.tokenizeChat(chats[idx]);
|
||||||
|
return {
|
||||||
|
currentTokens: currentTokens,
|
||||||
|
chats: chats,
|
||||||
|
error: `[HypaV2] Message tokens (${chatTokens}) exceeds chunk size (${chunkSize}). Please increase chunk size to at least ${chatTokens}.`
|
||||||
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
const stringlizedChat = halfData
|
const stringlizedChat = halfData
|
||||||
@@ -465,11 +497,11 @@ export async function hypaMemoryV2(
|
|||||||
console.log("Summarization failed:", summaryData.data);
|
console.log("Summarization failed:", summaryData.data);
|
||||||
summarizationFailures++;
|
summarizationFailures++;
|
||||||
if (summarizationFailures >= maxSummarizationFailures) {
|
if (summarizationFailures >= maxSummarizationFailures) {
|
||||||
console.error("Summarization failed multiple times. Aborting...");
|
console.error("[HypaV2] Summarization failed multiple times. Aborting...");
|
||||||
return {
|
return {
|
||||||
currentTokens: currentTokens,
|
currentTokens: currentTokens,
|
||||||
chats: chats,
|
chats: chats,
|
||||||
error: "Summarization failed multiple times. Aborting to prevent infinite loop.",
|
error: "[HypaV2] Summarization failed multiple times. Aborting to prevent infinite loop.",
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
// If summarization fails, try again in next iteration
|
// If summarization fails, try again in next iteration
|
||||||
|
|||||||
Reference in New Issue
Block a user