fix: Fix message handling and summarization in HypaV2 (#711)

# PR Checklist
- [x] Have you checked if it works normally in all models? *Ignore this
if it doesn't use models.*
- [x] Have you checked if it works normally in all web, local, and node
hosted versions? If it doesn't, have you blocked it in those versions?
- [ ] Have you added type definitions?

# Description
This PR addresses several issues in the HypaV2 memory system:

1. Improved Summarization Error Handling
- Before: When unable to summarize messages while exceeding context
size, HypaV2 would continue and make API requests with unintentionally
truncated context
- After: HypaV2 now provides clear error messages asking users to adjust
their settings when:
  - Unable to summarize last 4 messages
  - A single message exceeds chunk size

2. Message Order Preservation
- Before: lastTwoChats logic was causing the first message (index 1) and
user input (index 2) to appear in the wrong order
- After: Fixed by adjusting the initial index to preserve the original
message sequence

3. Message Filtering
- Added logic to skip unnecessary messages during summarization:
  - Excludes [Start a new chat] message at index 0
  - Skips empty messages
<br>
Requesting review from @LightningHyperBlaze45654
This commit is contained in:
kwaroran
2025-01-11 01:44:10 +09:00
committed by GitHub

View File

@@ -346,13 +346,17 @@ export async function hypaMemoryV2(
memory?: SerializableHypaV2Data;
}> {
const db = getDatabase();
currentTokens -= db.maxResponse
let data: HypaV2Data = {
lastMainChunkID: 0,
chunks: [],
mainChunks: [],
};
// Subtract maxResponse from currentTokens to fix the token calculation issue.
// This is not a fundamental solution but rather a temporary fix.
// It is designed to minimize impact on other code.
currentTokens -= db.maxResponse;
if (room.hypaV2Data) {
if (isOldHypaV2Data(room.hypaV2Data)) {
console.log("Old HypaV2 data detected. Converting to new format...");
@@ -375,7 +379,7 @@ export async function hypaMemoryV2(
const maxSummarizationFailures = 3;
// Find the index to start summarizing from
let idx = 2; // first two should not be considered([Start a new chat], Memory prompt)
let idx = 0;
if (data.mainChunks.length > 0) {
const lastMainChunk = data.mainChunks[data.mainChunks.length - 1];
const lastChatMemo = lastMainChunk.lastChatMemo;
@@ -383,9 +387,9 @@ export async function hypaMemoryV2(
if (lastChatIndex !== -1) {
idx = lastChatIndex + 1;
// Subtract tokens of removed chats
const removedChats = chats.slice(0, lastChatIndex + 1);
for (const chat of removedChats) {
// Subtract tokens of summarized chats
const summarizedChats = chats.slice(0, lastChatIndex + 1);
for (const chat of summarizedChats) {
currentTokens -= await tokenizer.tokenizeChat(chat);
}
}
@@ -425,6 +429,20 @@ export async function hypaMemoryV2(
"\nWould adding this exceed chunkSize?", (halfDataTokens + chatTokens > chunkSize)
);
// Skip index 0 ([Start a new chat])
if (idx === 0) {
console.log("[HypaV2] Skipping index 0");
idx++;
continue;
}
// Skip if the content of this chat is empty
if (!chat.content.trim()) {
console.log(`[HypaV2] Skipping empty content of index ${idx}`);
idx++;
continue;
}
// Check if adding this chat would exceed our chunkSize limit
if (halfDataTokens + chatTokens > chunkSize) {
// Can't add this chat without going over chunkSize
@@ -450,8 +468,22 @@ export async function hypaMemoryV2(
// If no chats were added, break to avoid infinite loop
if (halfData.length === 0) {
console.log("HOW DID WE GET HERE???");
break;
// Case 1: Can't summarize the last 4 chats
if (idx >= chats.length - 4) {
return {
currentTokens: currentTokens,
chats: chats,
error: `[HypaV2] Input tokens (${currentTokens}) exceeds max context size (${maxContextTokens}), but can't summarize last 4 messages. Please increase max context size to at least ${currentTokens}.`
};
}
// Case 2: Chat too large for chunk size
const chatTokens = await tokenizer.tokenizeChat(chats[idx]);
return {
currentTokens: currentTokens,
chats: chats,
error: `[HypaV2] Message tokens (${chatTokens}) exceeds chunk size (${chunkSize}). Please increase chunk size to at least ${chatTokens}.`
};
}
const stringlizedChat = halfData
@@ -465,11 +497,11 @@ export async function hypaMemoryV2(
console.log("Summarization failed:", summaryData.data);
summarizationFailures++;
if (summarizationFailures >= maxSummarizationFailures) {
console.error("Summarization failed multiple times. Aborting...");
console.error("[HypaV2] Summarization failed multiple times. Aborting...");
return {
currentTokens: currentTokens,
chats: chats,
error: "Summarization failed multiple times. Aborting to prevent infinite loop.",
error: "[HypaV2] Summarization failed multiple times. Aborting to prevent infinite loop.",
};
}
// If summarization fails, try again in next iteration