import { type memoryVector, HypaProcesser, similarity } from "./hypamemory"; import { TaskRateLimiter } from "./taskRateLimiter"; import { type EmbeddingText, HypaProcessorV2 } from "./hypamemoryv2"; import { type Chat, type character, type groupChat, getDatabase, } from "src/ts/storage/database.svelte"; import { type OpenAIChat } from "../index.svelte"; import { requestChatData } from "../request"; import { chatCompletion, unloadEngine } from "../webllm"; import { parseChatML } from "src/ts/parser.svelte"; import { hypaV3ProgressStore } from "src/ts/stores.svelte"; import { type ChatTokenizer } from "src/ts/tokenizer"; export interface HypaV3Preset { name: string; settings: HypaV3Settings; } export interface HypaV3Settings { summarizationModel: string; summarizationPrompt: string; memoryTokensRatio: number; extraSummarizationRatio: number; maxChatsPerSummary: number; recentMemoryRatio: number; similarMemoryRatio: number; enableSimilarityCorrection: boolean; preserveOrphanedMemory: boolean; processRegexScript: boolean; doNotSummarizeUserMessage: boolean; // Experimental useExperimentalImpl: boolean; summarizationRequestsPerMinute: number; summarizationMaxConcurrent: number; embeddingRequestsPerMinute: number; embeddingMaxConcurrent: number; } interface HypaV3Data { summaries: Summary[]; lastSelectedSummaries?: number[]; } export interface SerializableHypaV3Data { summaries: { text: string; chatMemos: string[]; isImportant: boolean; }[]; lastSelectedSummaries?: number[]; } interface Summary { text: string; chatMemos: Set; isImportant: boolean; } interface SummaryChunk { text: string; summary: Summary; } export interface HypaV3Result { currentTokens: number; chats: OpenAIChat[]; error?: string; memory?: SerializableHypaV3Data; } const logPrefix = "[HypaV3]"; const memoryPromptTag = "Past Events Summary"; const minChatsForSimilarity = 3; const summarySeparator = "\n\n"; export async function hypaMemoryV3( chats: OpenAIChat[], currentTokens: number, maxContextTokens: number, room: Chat, char: character | groupChat, tokenizer: ChatTokenizer ): Promise { const settings = getCurrentHypaV3Preset().settings; try { if (settings.useExperimentalImpl) { console.log(logPrefix, "Using experimental implementation."); return await hypaMemoryV3MainExp( chats, currentTokens, maxContextTokens, room, char, tokenizer ); } return await hypaMemoryV3Main( chats, currentTokens, maxContextTokens, room, char, tokenizer ); } catch (error) { if (error instanceof Error) { // Standard Error instance error.message = `${logPrefix} ${error.message}`; throw error; } // Fallback for non-Error object let errorMessage: string; try { errorMessage = JSON.stringify(error); } catch { errorMessage = String(error); } throw new Error(`${logPrefix} ${errorMessage}`); } finally { if (settings.summarizationModel !== "subModel") { try { unloadEngine(); } catch {} } } } async function hypaMemoryV3MainExp( chats: OpenAIChat[], currentTokens: number, maxContextTokens: number, room: Chat, char: character | groupChat, tokenizer: ChatTokenizer ): Promise { const db = getDatabase(); const settings = getCurrentHypaV3Preset().settings; // Validate settings if (settings.recentMemoryRatio + settings.similarMemoryRatio > 1) { return { currentTokens, chats, error: `${logPrefix} The sum of Recent Memory Ratio and Similar Memory Ratio is greater than 1.`, }; } // Initial token correction currentTokens -= db.maxResponse; // Load existing hypa data if available let data: HypaV3Data = { summaries: [], lastSelectedSummaries: [], }; if (room.hypaV3Data) { data = toHypaV3Data(room.hypaV3Data); } // Clean orphaned summaries if (!settings.preserveOrphanedMemory) { cleanOrphanedSummary(chats, data); } // Determine starting index let startIdx = 0; if (data.summaries.length > 0) { const lastSummary = data.summaries.at(-1); const lastChatIndex = chats.findIndex( (chat) => chat.memo === [...lastSummary.chatMemos].at(-1) ); if (lastChatIndex !== -1) { startIdx = lastChatIndex + 1; // Exclude tokens from summarized chats const summarizedChats = chats.slice(0, lastChatIndex + 1); for (const chat of summarizedChats) { currentTokens -= await tokenizer.tokenizeChat(chat); } } } console.log(logPrefix, "Starting index:", startIdx); // Reserve memory tokens const emptyMemoryTokens = await tokenizer.tokenizeChat({ role: "system", content: wrapWithXml(memoryPromptTag, ""), }); const memoryTokens = Math.floor( maxContextTokens * settings.memoryTokensRatio ); const shouldReserveMemoryTokens = data.summaries.length > 0 || currentTokens > maxContextTokens; let availableMemoryTokens = shouldReserveMemoryTokens ? memoryTokens - emptyMemoryTokens : 0; if (shouldReserveMemoryTokens) { currentTokens += memoryTokens; console.log(logPrefix, "Reserved memory tokens:", memoryTokens); } // If summarization is needed const summarizationMode = currentTokens > maxContextTokens; const targetTokens = maxContextTokens * (1 - settings.extraSummarizationRatio); const toSummarizeArray: OpenAIChat[][] = []; while (summarizationMode) { if (currentTokens <= targetTokens) { break; } if (chats.length - startIdx <= minChatsForSimilarity) { if (currentTokens <= maxContextTokens) { break; } else { return { currentTokens, chats, error: `${logPrefix} Cannot summarize further: input token count (${currentTokens}) exceeds max context size (${maxContextTokens}), but minimum ${minChatsForSimilarity} messages required.`, memory: toSerializableHypaV3Data(data), }; } } const toSummarize: OpenAIChat[] = []; let toSummarizeTokens = 0; let currentIndex = startIdx; console.log( logPrefix, "Evaluating summarization batch:", "\nCurrent Tokens:", currentTokens, "\nMax Context Tokens:", maxContextTokens, "\nStart Index:", startIdx, "\nMax Chats Per Summary:", settings.maxChatsPerSummary ); while ( toSummarize.length < settings.maxChatsPerSummary && currentIndex < chats.length - minChatsForSimilarity ) { const chat = chats[currentIndex]; const chatTokens = await tokenizer.tokenizeChat(chat); console.log( logPrefix, "Evaluating chat:", "\nIndex:", currentIndex, "\nRole:", chat.role, "\nContent:", "\n" + chat.content, "\nTokens:", chatTokens ); toSummarizeTokens += chatTokens; let shouldSummarize = true; if ( chat.name === "example_user" || chat.name === "example_assistant" || chat.memo === "NewChatExample" ) { console.log( logPrefix, `Skipping example chat at index ${currentIndex}` ); shouldSummarize = false; } if (chat.memo === "NewChat") { console.log(logPrefix, `Skipping new chat at index ${currentIndex}`); shouldSummarize = false; } if (chat.content.trim().length === 0) { console.log(logPrefix, `Skipping empty chat at index ${currentIndex}`); shouldSummarize = false; } if (settings.doNotSummarizeUserMessage && chat.role === "user") { console.log(logPrefix, `Skipping user role at index ${currentIndex}`); shouldSummarize = false; } if (shouldSummarize) { toSummarize.push(chat); } currentIndex++; } // Stop summarization if further reduction would go below target tokens (unless we're over max tokens) if ( currentTokens <= maxContextTokens && currentTokens - toSummarizeTokens < targetTokens ) { console.log( logPrefix, "Stopping summarization:", `\ncurrentTokens(${currentTokens}) - toSummarizeTokens(${toSummarizeTokens}) < targetTokens(${targetTokens})` ); break; } // Collect summarization batch if (toSummarize.length > 0) { console.log( logPrefix, "Collecting summarization batch:", "\nTarget:", toSummarize ); toSummarizeArray.push([...toSummarize]); } currentTokens -= toSummarizeTokens; startIdx = currentIndex; } // Process all collected summarization tasks if (toSummarizeArray.length > 0) { // Initialize rate limiter // Local model must be processed sequentially const rateLimiter = new TaskRateLimiter({ tasksPerMinute: settings.summarizationModel === "subModel" ? settings.summarizationRequestsPerMinute : 1000, maxConcurrentTasks: settings.summarizationModel === "subModel" ? settings.summarizationMaxConcurrent : 1, }); rateLimiter.taskQueueChangeCallback = (queuedCount) => { hypaV3ProgressStore.set({ open: true, miniMsg: `${rateLimiter.queuedTaskCount}`, msg: `${logPrefix} Summarizing...`, subMsg: `${rateLimiter.queuedTaskCount} queued`, }); }; const summarizationTasks = toSummarizeArray.map( (item) => () => summarize(item) ); // Start of performance measurement: summarize console.log( logPrefix, `Starting ${toSummarizeArray.length} summarization.` ); const summarizeStartTime = performance.now(); const batchResult = await rateLimiter.executeBatch( summarizationTasks ); const summarizeEndTime = performance.now(); console.debug( `${logPrefix} summarization completed in ${ summarizeEndTime - summarizeStartTime }ms` ); // End of performance measurement: summarize hypaV3ProgressStore.set({ open: false, miniMsg: "", msg: "", subMsg: "", }); // Note: // We can't save some successful summaries to the DB temporarily // because don't know the actual summarization model name. // It is possible that the user can change the summarization model. for (let i = 0; i < batchResult.results.length; i++) { const result = batchResult.results[i]; // Push consecutive successes if (!result.success || !result.data) { const errorMessage = !result.success ? result.error : "Empty summary returned"; console.log(logPrefix, "Summarization failed:", `\n${errorMessage}`); return { currentTokens, chats, error: `${logPrefix} Summarization failed: ${errorMessage}`, memory: toSerializableHypaV3Data(data), }; } const summaryText = result.data; data.summaries.push({ text: summaryText, chatMemos: new Set(toSummarizeArray[i].map((chat) => chat.memo)), isImportant: false, }); } } console.log( logPrefix, `${summarizationMode ? "Completed" : "Skipped"} summarization phase:`, "\nCurrent Tokens:", currentTokens, "\nMax Context Tokens:", maxContextTokens, "\nAvailable Memory Tokens:", availableMemoryTokens ); // Early return if no summaries if (data.summaries.length === 0) { const newChats: OpenAIChat[] = chats.slice(startIdx); console.log( logPrefix, "Exiting function:", "\nCurrent Tokens:", currentTokens, "\nAll chats, including memory prompt:", newChats, "\nMemory Data:", data ); return { currentTokens, chats: newChats, memory: toSerializableHypaV3Data(data), }; } const selectedSummaries: Summary[] = []; const randomMemoryRatio = 1 - settings.recentMemoryRatio - settings.similarMemoryRatio; // Select important summaries { const selectedImportantSummaries: Summary[] = []; for (const summary of data.summaries) { if (summary.isImportant) { const summaryTokens = await tokenizer.tokenizeChat({ role: "system", content: summary.text + summarySeparator, }); if (summaryTokens > availableMemoryTokens) { break; } selectedImportantSummaries.push(summary); availableMemoryTokens -= summaryTokens; } } selectedSummaries.push(...selectedImportantSummaries); console.log( logPrefix, "After important memory selection:", "\nSummary Count:", selectedImportantSummaries.length, "\nSummaries:", selectedImportantSummaries, "\nAvailable Memory Tokens:", availableMemoryTokens ); } // Select recent summaries const reservedRecentMemoryTokens = Math.floor( availableMemoryTokens * settings.recentMemoryRatio ); let consumedRecentMemoryTokens = 0; if (settings.recentMemoryRatio > 0) { const selectedRecentSummaries: Summary[] = []; // Target only summaries that haven't been selected yet const unusedSummaries = data.summaries.filter( (e) => !selectedSummaries.includes(e) ); // Add one by one from the end for (let i = unusedSummaries.length - 1; i >= 0; i--) { const summary = unusedSummaries[i]; const summaryTokens = await tokenizer.tokenizeChat({ role: "system", content: summary.text + summarySeparator, }); if ( summaryTokens + consumedRecentMemoryTokens > reservedRecentMemoryTokens ) { break; } selectedRecentSummaries.push(summary); consumedRecentMemoryTokens += summaryTokens; } selectedSummaries.push(...selectedRecentSummaries); console.log( logPrefix, "After recent memory selection:", "\nSummary Count:", selectedRecentSummaries.length, "\nSummaries:", selectedRecentSummaries, "\nReserved Tokens:", reservedRecentMemoryTokens, "\nConsumed Tokens:", consumedRecentMemoryTokens ); } // Select similar summaries let reservedSimilarMemoryTokens = Math.floor( availableMemoryTokens * settings.similarMemoryRatio ); let consumedSimilarMemoryTokens = 0; if (settings.similarMemoryRatio > 0) { const selectedSimilarSummaries: Summary[] = []; // Utilize unused token space from recent selection if (randomMemoryRatio <= 0) { const unusedRecentTokens = reservedRecentMemoryTokens - consumedRecentMemoryTokens; reservedSimilarMemoryTokens += unusedRecentTokens; console.log( logPrefix, "Additional available token space for similar memory:", "\nFrom recent:", unusedRecentTokens ); } // Target only summaries that haven't been selected yet const unusedSummaries = data.summaries.filter( (e) => !selectedSummaries.includes(e) ); // Dynamically generate embedding texts const ebdTexts: EmbeddingText[] = unusedSummaries.flatMap( (summary) => { const splitted = summary.text .split("\n\n") .filter((e) => e.trim().length > 0); return splitted.map((e) => ({ content: e.trim(), metadata: summary, })); } ); // Initialize embedding processor const processor = new HypaProcessorV2({ rateLimiter: new TaskRateLimiter({ tasksPerMinute: settings.embeddingRequestsPerMinute, maxConcurrentTasks: settings.embeddingMaxConcurrent, }), }); processor.progressCallback = (queuedCount) => { hypaV3ProgressStore.set({ open: true, miniMsg: `${queuedCount}`, msg: `${logPrefix} Similarity searching...`, subMsg: `${queuedCount} queued`, }); }; try { // Start of performance measurement: addTexts console.log( `${logPrefix} Starting addTexts with ${ebdTexts.length} chunks` ); const addStartTime = performance.now(); // Add EmbeddingTexts to processor for similarity search await processor.addTexts(ebdTexts); const addEndTime = performance.now(); console.debug( `${logPrefix} addTexts completed in ${addEndTime - addStartTime}ms` ); // End of performance measurement: addTexts } catch (error) { return { currentTokens, chats, error: `${logPrefix} Similarity search failed: ${error}`, memory: toSerializableHypaV3Data(data), }; } finally { hypaV3ProgressStore.set({ open: false, miniMsg: "", msg: "", subMsg: "", }); } const recentChats = chats .slice(-minChatsForSimilarity) .filter((chat) => chat.content.trim().length > 0); const queries: string[] = recentChats.flatMap((chat) => { return chat.content.split("\n\n").filter((e) => e.trim().length > 0); }); if (queries.length > 0) { const scoredSummaries = new Map(); try { // Start of performance measurement: similarity search console.log( `${logPrefix} Starting similarity search with ${recentChats.length} queries` ); const searchStartTime = performance.now(); const batchScoredResults = await processor.similaritySearchScoredBatch( queries ); const searchEndTime = performance.now(); console.debug( `${logPrefix} Similarity search completed in ${ searchEndTime - searchStartTime }ms` ); // End of performance measurement: similarity search for (const scoredResults of batchScoredResults) { for (const [ebdResult, similarity] of scoredResults) { const summary = ebdResult.metadata; scoredSummaries.set( summary, (scoredSummaries.get(summary) || 0) + similarity ); } } } catch (error) { return { currentTokens, chats, error: `${logPrefix} Similarity search failed: ${error}`, memory: toSerializableHypaV3Data(data), }; } finally { hypaV3ProgressStore.set({ open: false, miniMsg: "", msg: "", subMsg: "", }); } // Normalize scores if (scoredSummaries.size > 0) { const maxScore = Math.max(...scoredSummaries.values()); for (const [summary, score] of scoredSummaries.entries()) { scoredSummaries.set(summary, score / maxScore); } } // Sort in descending order const scoredArray = [...scoredSummaries.entries()].sort( ([, scoreA], [, scoreB]) => scoreB - scoreA ); while (scoredArray.length > 0) { const [summary] = scoredArray.shift(); const summaryTokens = await tokenizer.tokenizeChat({ role: "system", content: summary.text + summarySeparator, }); /* console.log( logPrefix, "Trying to add similar summary:", "\nSummary Tokens:", summaryTokens, "\nConsumed Similar Memory Tokens:", consumedSimilarMemoryTokens, "\nReserved Tokens:", reservedSimilarMemoryTokens, "\nWould exceed:", summaryTokens + consumedSimilarMemoryTokens > reservedSimilarMemoryTokens ); */ if ( summaryTokens + consumedSimilarMemoryTokens > reservedSimilarMemoryTokens ) { console.log( logPrefix, "Stopping similar memory selection:", `\nconsumedSimilarMemoryTokens(${consumedSimilarMemoryTokens}) + summaryTokens(${summaryTokens}) > reservedSimilarMemoryTokens(${reservedSimilarMemoryTokens})` ); break; } selectedSimilarSummaries.push(summary); consumedSimilarMemoryTokens += summaryTokens; } selectedSummaries.push(...selectedSimilarSummaries); } console.log( logPrefix, "After similar memory selection:", "\nSummary Count:", selectedSimilarSummaries.length, "\nSummaries:", selectedSimilarSummaries, "\nReserved Tokens:", reservedSimilarMemoryTokens, "\nConsumed Tokens:", consumedSimilarMemoryTokens ); } // Select random summaries let reservedRandomMemoryTokens = Math.floor( availableMemoryTokens * randomMemoryRatio ); let consumedRandomMemoryTokens = 0; if (randomMemoryRatio > 0) { const selectedRandomSummaries: Summary[] = []; // Utilize unused token space from recent and similar selection const unusedRecentTokens = reservedRecentMemoryTokens - consumedRecentMemoryTokens; const unusedSimilarTokens = reservedSimilarMemoryTokens - consumedSimilarMemoryTokens; reservedRandomMemoryTokens += unusedRecentTokens + unusedSimilarTokens; console.log( logPrefix, "Additional available token space for random memory:", "\nFrom recent:", unusedRecentTokens, "\nFrom similar:", unusedSimilarTokens, "\nTotal added:", unusedRecentTokens + unusedSimilarTokens ); // Target only summaries that haven't been selected yet const unusedSummaries = data.summaries .filter((e) => !selectedSummaries.includes(e)) .sort(() => Math.random() - 0.5); // Random shuffle for (const summary of unusedSummaries) { const summaryTokens = await tokenizer.tokenizeChat({ role: "system", content: summary.text + summarySeparator, }); if ( summaryTokens + consumedRandomMemoryTokens > reservedRandomMemoryTokens ) { // Trying to select more random memory continue; } selectedRandomSummaries.push(summary); consumedRandomMemoryTokens += summaryTokens; } selectedSummaries.push(...selectedRandomSummaries); console.log( logPrefix, "After random memory selection:", "\nSummary Count:", selectedRandomSummaries.length, "\nSummaries:", selectedRandomSummaries, "\nReserved Tokens:", reservedRandomMemoryTokens, "\nConsumed Tokens:", consumedRandomMemoryTokens ); } // Sort selected summaries chronologically (by index) selectedSummaries.sort( (a, b) => data.summaries.indexOf(a) - data.summaries.indexOf(b) ); // Generate final memory prompt const memory = wrapWithXml( memoryPromptTag, selectedSummaries.map((e) => e.text).join(summarySeparator) ); const realMemoryTokens = await tokenizer.tokenizeChat({ role: "system", content: memory, }); // Release reserved memory tokens if (shouldReserveMemoryTokens) { currentTokens -= memoryTokens; } currentTokens += realMemoryTokens; console.log( logPrefix, "Final memory selection:", "\nSummary Count:", selectedSummaries.length, "\nSummaries:", selectedSummaries, "\nReal Memory Tokens:", realMemoryTokens, "\nAvailable Memory Tokens:", availableMemoryTokens ); if (currentTokens > maxContextTokens) { throw new Error( `Unexpected error: input token count (${currentTokens}) exceeds max context size (${maxContextTokens})` ); } // Save last selected summaries data.lastSelectedSummaries = selectedSummaries.map((selectedSummary) => data.summaries.findIndex((summary) => summary === selectedSummary) ); const newChats: OpenAIChat[] = [ { role: "system", content: memory, memo: "supaMemory", }, ...chats.slice(startIdx), ]; console.log( logPrefix, "Exiting function:", "\nCurrent Tokens:", currentTokens, "\nAll chats, including memory prompt:", newChats, "\nMemory Data:", data ); return { currentTokens, chats: newChats, memory: toSerializableHypaV3Data(data), }; } async function hypaMemoryV3Main( chats: OpenAIChat[], currentTokens: number, maxContextTokens: number, room: Chat, char: character | groupChat, tokenizer: ChatTokenizer ): Promise { const db = getDatabase(); const settings = getCurrentHypaV3Preset().settings; // Validate settings if (settings.recentMemoryRatio + settings.similarMemoryRatio > 1) { return { currentTokens, chats, error: `${logPrefix} The sum of Recent Memory Ratio and Similar Memory Ratio is greater than 1.`, }; } // Initial token correction currentTokens -= db.maxResponse; // Load existing hypa data if available let data: HypaV3Data = { summaries: [], lastSelectedSummaries: [], }; if (room.hypaV3Data) { data = toHypaV3Data(room.hypaV3Data); } // Clean orphaned summaries if (!settings.preserveOrphanedMemory) { cleanOrphanedSummary(chats, data); } // Determine starting index let startIdx = 0; if (data.summaries.length > 0) { const lastSummary = data.summaries.at(-1); const lastChatIndex = chats.findIndex( (chat) => chat.memo === [...lastSummary.chatMemos].at(-1) ); if (lastChatIndex !== -1) { startIdx = lastChatIndex + 1; // Exclude tokens from summarized chats const summarizedChats = chats.slice(0, lastChatIndex + 1); for (const chat of summarizedChats) { currentTokens -= await tokenizer.tokenizeChat(chat); } } } console.log(logPrefix, "Starting index:", startIdx); // Reserve memory tokens const emptyMemoryTokens = await tokenizer.tokenizeChat({ role: "system", content: wrapWithXml(memoryPromptTag, ""), }); const memoryTokens = Math.floor( maxContextTokens * settings.memoryTokensRatio ); const shouldReserveEmptyMemoryTokens = data.summaries.length === 0 && currentTokens + emptyMemoryTokens <= maxContextTokens; let availableMemoryTokens = shouldReserveEmptyMemoryTokens ? 0 : memoryTokens - emptyMemoryTokens; if (shouldReserveEmptyMemoryTokens) { currentTokens += emptyMemoryTokens; console.log(logPrefix, "Reserved empty memory tokens:", emptyMemoryTokens); } else { currentTokens += memoryTokens; console.log(logPrefix, "Reserved max memory tokens:", memoryTokens); } // If summarization is needed const summarizationMode = currentTokens > maxContextTokens; const targetTokens = maxContextTokens * (1 - settings.extraSummarizationRatio); while (summarizationMode) { if (currentTokens <= targetTokens) { break; } if (chats.length - startIdx <= minChatsForSimilarity) { if (currentTokens <= maxContextTokens) { break; } else { return { currentTokens, chats, error: `${logPrefix} Cannot summarize further: input token count (${currentTokens}) exceeds max context size (${maxContextTokens}), but minimum ${minChatsForSimilarity} messages required.`, memory: toSerializableHypaV3Data(data), }; } } const toSummarize: OpenAIChat[] = []; const endIdx = Math.min( startIdx + settings.maxChatsPerSummary, chats.length - minChatsForSimilarity ); let toSummarizeTokens = 0; console.log( logPrefix, "Evaluating summarization batch:", "\nCurrent Tokens:", currentTokens, "\nMax Context Tokens:", maxContextTokens, "\nStart Index:", startIdx, "\nEnd Index:", endIdx, "\nChat Count:", endIdx - startIdx, "\nMax Chats Per Summary:", settings.maxChatsPerSummary ); for (let i = startIdx; i < endIdx; i++) { const chat = chats[i]; const chatTokens = await tokenizer.tokenizeChat(chat); console.log( logPrefix, "Evaluating chat:", "\nIndex:", i, "\nRole:", chat.role, "\nContent:", "\n" + chat.content, "\nTokens:", chatTokens ); toSummarizeTokens += chatTokens; if ( chat.name === "example_user" || chat.name === "example_assistant" || chat.memo === "NewChatExample" ) { console.log(logPrefix, `Skipping example chat at index ${i}`); continue; } if (chat.memo === "NewChat") { console.log(logPrefix, `Skipping new chat at index ${i}`); continue; } if (chat.content.trim().length === 0) { console.log(logPrefix, `Skipping empty chat at index ${i}`); continue; } if (settings.doNotSummarizeUserMessage && chat.role === "user") { console.log(logPrefix, `Skipping user role at index ${i}`); continue; } toSummarize.push(chat); } // Stop summarization if further reduction would go below target tokens (unless we're over max tokens) if ( currentTokens <= maxContextTokens && currentTokens - toSummarizeTokens < targetTokens ) { console.log( logPrefix, "Stopping summarization:", `\ncurrentTokens(${currentTokens}) - toSummarizeTokens(${toSummarizeTokens}) < targetTokens(${targetTokens})` ); break; } // Attempt summarization if (toSummarize.length > 0) { console.log( logPrefix, "Attempting summarization:", "\nTarget:", toSummarize ); try { const summarizeResult = await summarize(toSummarize); data.summaries.push({ text: summarizeResult, chatMemos: new Set(toSummarize.map((chat) => chat.memo)), isImportant: false, }); } catch (error) { console.log(logPrefix, "Summarization failed:", `\n${error}`); return { currentTokens, chats, error: `${logPrefix} Summarization failed: ${error}`, memory: toSerializableHypaV3Data(data), }; } } currentTokens -= toSummarizeTokens; startIdx = endIdx; } console.log( logPrefix, `${summarizationMode ? "Completed" : "Skipped"} summarization phase:`, "\nCurrent Tokens:", currentTokens, "\nMax Context Tokens:", maxContextTokens, "\nAvailable Memory Tokens:", availableMemoryTokens ); // Early return if no summaries if (data.summaries.length === 0) { // Generate final memory prompt const memory = wrapWithXml(memoryPromptTag, ""); const newChats: OpenAIChat[] = [ { role: "system", content: memory, memo: "supaMemory", }, ...chats.slice(startIdx), ]; console.log( logPrefix, "Exiting function:", "\nCurrent Tokens:", currentTokens, "\nAll chats, including memory prompt:", newChats, "\nMemory Data:", data ); return { currentTokens, chats: newChats, memory: toSerializableHypaV3Data(data), }; } const selectedSummaries: Summary[] = []; const randomMemoryRatio = 1 - settings.recentMemoryRatio - settings.similarMemoryRatio; // Select important summaries { const selectedImportantSummaries: Summary[] = []; for (const summary of data.summaries) { if (summary.isImportant) { const summaryTokens = await tokenizer.tokenizeChat({ role: "system", content: summary.text + summarySeparator, }); if (summaryTokens > availableMemoryTokens) { break; } selectedImportantSummaries.push(summary); availableMemoryTokens -= summaryTokens; } } selectedSummaries.push(...selectedImportantSummaries); console.log( logPrefix, "After important memory selection:", "\nSummary Count:", selectedImportantSummaries.length, "\nSummaries:", selectedImportantSummaries, "\nAvailable Memory Tokens:", availableMemoryTokens ); } // Select recent summaries const reservedRecentMemoryTokens = Math.floor( availableMemoryTokens * settings.recentMemoryRatio ); let consumedRecentMemoryTokens = 0; if (settings.recentMemoryRatio > 0) { const selectedRecentSummaries: Summary[] = []; // Target only summaries that haven't been selected yet const unusedSummaries = data.summaries.filter( (e) => !selectedSummaries.includes(e) ); // Add one by one from the end for (let i = unusedSummaries.length - 1; i >= 0; i--) { const summary = unusedSummaries[i]; const summaryTokens = await tokenizer.tokenizeChat({ role: "system", content: summary.text + summarySeparator, }); if ( summaryTokens + consumedRecentMemoryTokens > reservedRecentMemoryTokens ) { break; } selectedRecentSummaries.push(summary); consumedRecentMemoryTokens += summaryTokens; } selectedSummaries.push(...selectedRecentSummaries); console.log( logPrefix, "After recent memory selection:", "\nSummary Count:", selectedRecentSummaries.length, "\nSummaries:", selectedRecentSummaries, "\nReserved Tokens:", reservedRecentMemoryTokens, "\nConsumed Tokens:", consumedRecentMemoryTokens ); } // Select similar summaries let reservedSimilarMemoryTokens = Math.floor( availableMemoryTokens * settings.similarMemoryRatio ); let consumedSimilarMemoryTokens = 0; if (settings.similarMemoryRatio > 0) { const selectedSimilarSummaries: Summary[] = []; // Utilize unused token space from recent selection if (randomMemoryRatio <= 0) { const unusedRecentTokens = reservedRecentMemoryTokens - consumedRecentMemoryTokens; reservedSimilarMemoryTokens += unusedRecentTokens; console.log( logPrefix, "Additional available token space for similar memory:", "\nFrom recent:", unusedRecentTokens ); } // Target only summaries that haven't been selected yet const unusedSummaries = data.summaries.filter( (e) => !selectedSummaries.includes(e) ); // Dynamically generate summary chunks const summaryChunks: SummaryChunk[] = []; unusedSummaries.forEach((summary) => { const splitted = summary.text .split("\n\n") .filter((e) => e.trim().length > 0); summaryChunks.push( ...splitted.map((e) => ({ text: e.trim(), summary, })) ); }); // Initialize embedding processor const processor = new HypaProcesserEx(db.hypaModel); processor.oaikey = db.supaMemoryKey; // Add summaryChunks to processor for similarity search try { await processor.addSummaryChunks(summaryChunks); } catch (error) { return { currentTokens, chats, error: `${logPrefix} Similarity search failed: ${error}`, memory: toSerializableHypaV3Data(data), }; } const scoredSummaries = new Map(); const recentChats = chats .slice(-minChatsForSimilarity) .filter((chat) => chat.content.trim().length > 0); if (recentChats.length > 0) { // Raw recent chat search const queries = recentChats.map((chat) => chat.content); if (settings.enableSimilarityCorrection && recentChats.length > 1) { // Raw + Summarized recent chat search // Summarizing is meaningful when there are more than 2 recent chats // Attempt summarization console.log( logPrefix, "Attempting summarization for similarity search:", "\nTarget:", recentChats ); try { const summarizeResult = await summarize(recentChats); queries.push(summarizeResult); } catch (error) { console.log(logPrefix, "Summarization failed:", `\n${error}`); return { currentTokens, chats, error: `${logPrefix} Summarization failed: ${error}`, memory: toSerializableHypaV3Data(data), }; } } try { for (const query of queries) { const scoredChunks = await processor.similaritySearchScoredEx(query); for (const [chunk, similarity] of scoredChunks) { const summary = chunk.summary; scoredSummaries.set( summary, (scoredSummaries.get(summary) || 0) + similarity ); } } } catch (error) { return { currentTokens, chats, error: `${logPrefix} Similarity search failed: ${error}`, memory: toSerializableHypaV3Data(data), }; } } // Sort in descending order const scoredArray = [...scoredSummaries.entries()].sort( ([, scoreA], [, scoreB]) => scoreB - scoreA ); while (scoredArray.length > 0) { const [summary] = scoredArray.shift(); const summaryTokens = await tokenizer.tokenizeChat({ role: "system", content: summary.text + summarySeparator, }); /* console.log( logPrefix, "Trying to add similar summary:", "\nSummary Tokens:", summaryTokens, "\nConsumed Similar Memory Tokens:", consumedSimilarMemoryTokens, "\nReserved Tokens:", reservedSimilarMemoryTokens, "\nWould exceed:", summaryTokens + consumedSimilarMemoryTokens > reservedSimilarMemoryTokens ); */ if ( summaryTokens + consumedSimilarMemoryTokens > reservedSimilarMemoryTokens ) { console.log( logPrefix, "Stopping similar memory selection:", `\nconsumedSimilarMemoryTokens(${consumedSimilarMemoryTokens}) + summaryTokens(${summaryTokens}) > reservedSimilarMemoryTokens(${reservedSimilarMemoryTokens})` ); break; } selectedSimilarSummaries.push(summary); consumedSimilarMemoryTokens += summaryTokens; } selectedSummaries.push(...selectedSimilarSummaries); console.log( logPrefix, "After similar memory selection:", "\nSummary Count:", selectedSimilarSummaries.length, "\nSummaries:", selectedSimilarSummaries, "\nReserved Tokens:", reservedSimilarMemoryTokens, "\nConsumed Tokens:", consumedSimilarMemoryTokens ); } // Select random summaries let reservedRandomMemoryTokens = Math.floor( availableMemoryTokens * randomMemoryRatio ); let consumedRandomMemoryTokens = 0; if (randomMemoryRatio > 0) { const selectedRandomSummaries: Summary[] = []; // Utilize unused token space from recent and similar selection const unusedRecentTokens = reservedRecentMemoryTokens - consumedRecentMemoryTokens; const unusedSimilarTokens = reservedSimilarMemoryTokens - consumedSimilarMemoryTokens; reservedRandomMemoryTokens += unusedRecentTokens + unusedSimilarTokens; console.log( logPrefix, "Additional available token space for random memory:", "\nFrom recent:", unusedRecentTokens, "\nFrom similar:", unusedSimilarTokens, "\nTotal added:", unusedRecentTokens + unusedSimilarTokens ); // Target only summaries that haven't been selected yet const unusedSummaries = data.summaries .filter((e) => !selectedSummaries.includes(e)) .sort(() => Math.random() - 0.5); // Random shuffle for (const summary of unusedSummaries) { const summaryTokens = await tokenizer.tokenizeChat({ role: "system", content: summary.text + summarySeparator, }); if ( summaryTokens + consumedRandomMemoryTokens > reservedRandomMemoryTokens ) { // Trying to select more random memory continue; } selectedRandomSummaries.push(summary); consumedRandomMemoryTokens += summaryTokens; } selectedSummaries.push(...selectedRandomSummaries); console.log( logPrefix, "After random memory selection:", "\nSummary Count:", selectedRandomSummaries.length, "\nSummaries:", selectedRandomSummaries, "\nReserved Tokens:", reservedRandomMemoryTokens, "\nConsumed Tokens:", consumedRandomMemoryTokens ); } // Sort selected summaries chronologically (by index) selectedSummaries.sort( (a, b) => data.summaries.indexOf(a) - data.summaries.indexOf(b) ); // Generate final memory prompt const memory = wrapWithXml( memoryPromptTag, selectedSummaries.map((e) => e.text).join(summarySeparator) ); const realMemoryTokens = await tokenizer.tokenizeChat({ role: "system", content: memory, }); // Release reserved memory tokens if (shouldReserveEmptyMemoryTokens) { currentTokens -= emptyMemoryTokens; } else { currentTokens -= memoryTokens; } currentTokens += realMemoryTokens; console.log( logPrefix, "Final memory selection:", "\nSummary Count:", selectedSummaries.length, "\nSummaries:", selectedSummaries, "\nReal Memory Tokens:", realMemoryTokens, "\nAvailable Memory Tokens:", availableMemoryTokens ); if (currentTokens > maxContextTokens) { throw new Error( `Unexpected error: input token count (${currentTokens}) exceeds max context size (${maxContextTokens})` ); } // Save last selected summaries data.lastSelectedSummaries = selectedSummaries.map((selectedSummary) => data.summaries.findIndex((summary) => summary === selectedSummary) ); const newChats: OpenAIChat[] = [ { role: "system", content: memory, memo: "supaMemory", }, ...chats.slice(startIdx), ]; console.log( logPrefix, "Exiting function:", "\nCurrent Tokens:", currentTokens, "\nAll chats, including memory prompt:", newChats, "\nMemory Data:", data ); return { currentTokens, chats: newChats, memory: toSerializableHypaV3Data(data), }; } function toHypaV3Data(serialData: SerializableHypaV3Data): HypaV3Data { return { ...serialData, summaries: serialData.summaries.map((summary) => ({ ...summary, // Convert null back to undefined (JSON serialization converts undefined to null) chatMemos: new Set( summary.chatMemos.map((memo) => (memo === null ? undefined : memo)) ), })), }; } function toSerializableHypaV3Data(data: HypaV3Data): SerializableHypaV3Data { return { ...data, summaries: data.summaries.map((summary) => ({ ...summary, chatMemos: [...summary.chatMemos], })), }; } function cleanOrphanedSummary(chats: OpenAIChat[], data: HypaV3Data): void { // Collect all memos from current chats const currentChatMemos = new Set(chats.map((chat) => chat.memo)); const originalLength = data.summaries.length; // Filter summaries - keep only those whose chatMemos are subset of current chat memos data.summaries = data.summaries.filter((summary) => { return isSubset(summary.chatMemos, currentChatMemos); }); const removedCount = originalLength - data.summaries.length; if (removedCount > 0) { console.log(logPrefix, `Cleaned ${removedCount} orphaned summaries.`); } } function isSubset(subset: Set, superset: Set): boolean { for (const elem of subset) { if (!superset.has(elem)) { return false; } } return true; } function wrapWithXml(tag: string, content: string): string { return `<${tag}>\n${content}\n`; } export async function summarize(oaiMessages: OpenAIChat[]): Promise { const db = getDatabase(); const settings = getCurrentHypaV3Preset().settings; const strMessages = oaiMessages .map((chat) => `${chat.role}: ${chat.content}`) .join("\n"); const summarizationPrompt = settings.summarizationPrompt.trim() === "" ? "[Summarize the ongoing role story, It must also remove redundancy and unnecessary text and content from the output.]" : settings.summarizationPrompt; const formated: OpenAIChat[] = parseChatML( summarizationPrompt.replaceAll("{{slot}}", strMessages) ) ?? [ { role: "user", content: strMessages, }, { role: "system", content: summarizationPrompt, }, ]; // API if (settings.summarizationModel === "subModel") { console.log(logPrefix, `Using ax model ${db.subModel} for summarization.`); const response = await requestChatData( { formated, bias: {}, useStreaming: false, noMultiGen: true, }, "memory" ); if (response.type === "streaming" || response.type === "multiline") { throw new Error("Unexpected response type"); } if (response.type === "fail") { throw new Error(response.result); } if (!response.result || response.result.trim().length === 0) { throw new Error("Empty summary returned"); } return response.result.trim(); } // Local const content = await chatCompletion(formated, settings.summarizationModel, { max_tokens: 8192, temperature: 0, extra_body: { enable_thinking: false, }, }); if (!content || content.trim().length === 0) { throw new Error("Empty summary returned"); } // Remove think content const thinkRegex = /[\s\S]*?<\/think>/g; return content.replace(thinkRegex, "").trim(); } export function getCurrentHypaV3Preset(): HypaV3Preset { const db = getDatabase(); const preset = db.hypaV3Presets?.[db.hypaV3PresetId]; if (!preset) { throw new Error("Preset not found. Please select a valid preset."); } return preset; } export function createHypaV3Preset( name = "New Preset", existingSettings = {} ): HypaV3Preset { const settings: HypaV3Settings = { summarizationModel: "subModel", summarizationPrompt: "", memoryTokensRatio: 0.2, extraSummarizationRatio: 0, maxChatsPerSummary: 6, recentMemoryRatio: 0.4, similarMemoryRatio: 0.4, enableSimilarityCorrection: false, preserveOrphanedMemory: false, processRegexScript: false, doNotSummarizeUserMessage: false, // Experimental useExperimentalImpl: false, summarizationRequestsPerMinute: 20, summarizationMaxConcurrent: 1, embeddingRequestsPerMinute: 100, embeddingMaxConcurrent: 1, }; if ( existingSettings && typeof existingSettings === "object" && !Array.isArray(existingSettings) ) { for (const [key, value] of Object.entries(existingSettings)) { if (key in settings && typeof value === typeof settings[key]) { settings[key] = value; } } } return { name, settings, }; } interface SummaryChunkVector { chunk: SummaryChunk; vector: memoryVector; } class HypaProcesserEx extends HypaProcesser { // Maintain references to SummaryChunks and their associated memoryVectors summaryChunkVectors: SummaryChunkVector[] = []; async addSummaryChunks(chunks: SummaryChunk[]): Promise { // Maintain the superclass's caching structure by adding texts const texts = chunks.map((chunk) => chunk.text); await this.addText(texts); // Create new SummaryChunkVectors const newSummaryChunkVectors: SummaryChunkVector[] = []; for (const chunk of chunks) { const vector = this.vectors.find((v) => v.content === chunk.text); if (!vector) { throw new Error( `Failed to create vector for summary chunk:\n${chunk.text}` ); } newSummaryChunkVectors.push({ chunk, vector, }); } // Append new SummaryChunkVectors to the existing collection this.summaryChunkVectors.push(...newSummaryChunkVectors); } async similaritySearchScoredEx( query: string ): Promise<[SummaryChunk, number][]> { const queryVector = (await this.getEmbeds(query))[0]; return this.summaryChunkVectors .map((scv) => ({ chunk: scv.chunk, similarity: similarity(queryVector, scv.vector.embedding), })) .sort((a, b) => (a.similarity > b.similarity ? -1 : 0)) .map((result) => [result.chunk, result.similarity]); } }