diff --git a/src/lib/ChatScreens/Chat.svelte b/src/lib/ChatScreens/Chat.svelte index bb646480..95335e45 100644 --- a/src/lib/ChatScreens/Chat.svelte +++ b/src/lib/ChatScreens/Chat.svelte @@ -465,6 +465,7 @@ }}> + diff --git a/src/lib/Others/AlertComp.svelte b/src/lib/Others/AlertComp.svelte index 5584509d..b532562f 100644 --- a/src/lib/Others/AlertComp.svelte +++ b/src/lib/Others/AlertComp.svelte @@ -291,16 +291,10 @@ {/each} - + {:else} - {#each DBState.db.characters[$selectedCharID].chats[DBState.db.characters[$selectedCharID].chatPage].hypaV2Data.chunks as chunk, i} + {#each DBState.db.characters[$selectedCharID].chats[DBState.db.characters[$selectedCharID].chatPage].hypaV2Data.mainChunks as chunk, i} // Summarized -> mainChunks
{#if i === 0} Active diff --git a/src/ts/process/memory/hypav2.ts b/src/ts/process/memory/hypav2.ts index 32041286..ee6a151b 100644 --- a/src/ts/process/memory/hypav2.ts +++ b/src/ts/process/memory/hypav2.ts @@ -1,4 +1,9 @@ -import { getDatabase, type Chat, type character, type groupChat } from "src/ts/storage/database.svelte"; +import { + getDatabase, + type Chat, + type character, + type groupChat, +} from "src/ts/storage/database.svelte"; import type { OpenAIChat } from "../index.svelte"; import type { ChatTokenizer } from "src/ts/tokenizer"; import { requestChatData } from "../request"; @@ -11,59 +16,67 @@ export interface HypaV2Data { chunks: { text: string; targetId: string; + chatRange: [number, number]; // Start and end indices of chats summarized }[]; mainChunks: { text: string; targetId: string; + chatRange: [number, number]; // Start and end indices of chats summarized }[]; } -async function summary(stringlizedChat: string): Promise<{ success: boolean; data: string }> { +async function summary( + stringlizedChat: string +): Promise<{ success: boolean; data: string }> { const db = getDatabase(); console.log("Summarizing"); - if (db.supaModelType === 'distilbart') { + if (db.supaModelType === "distilbart") { try { const sum = await runSummarizer(stringlizedChat); return { success: true, data: sum }; } catch (error) { return { success: false, - data: "SupaMemory: Summarizer: " + `${error}` + data: "SupaMemory: Summarizer: " + `${error}`, }; } } - const supaPrompt = db.supaMemoryPrompt === '' ? - "[Summarize the ongoing role story, It must also remove redundancy and unnecessary text and content from the output.]\n" - : db.supaMemoryPrompt; - let result = ''; + const supaPrompt = + db.supaMemoryPrompt === "" + ? "[Summarize the ongoing role story, It must also remove redundancy and unnecessary text and content from the output.]\n" + : db.supaMemoryPrompt; + let result = ""; - if (db.supaModelType !== 'subModel') { - const promptbody = stringlizedChat + '\n\n' + supaPrompt + "\n\nOutput:"; + if (db.supaModelType !== "subModel") { + const promptbody = stringlizedChat + "\n\n" + supaPrompt + "\n\nOutput:"; const da = await globalFetch("https://api.openai.com/v1/completions", { headers: { "Content-Type": "application/json", - "Authorization": "Bearer " + db.supaMemoryKey + Authorization: "Bearer " + db.supaMemoryKey, }, method: "POST", body: { - "model": db.supaModelType === 'curie' ? "text-curie-001" - : db.supaModelType === 'instruct35' ? 'gpt-3.5-turbo-instruct' - : "text-davinci-003", - "prompt": promptbody, - "max_tokens": 600, - "temperature": 0 - } - }) + model: + db.supaModelType === "curie" + ? "text-curie-001" + : db.supaModelType === "instruct35" + ? "gpt-3.5-turbo-instruct" + : "text-davinci-003", + prompt: promptbody, + max_tokens: 600, + temperature: 0, + }, + }); console.log("Using openAI instruct 3.5 for SupaMemory"); try { if (!da.ok) { return { success: false, - data: "SupaMemory: HTTP: " + JSON.stringify(da) + data: "SupaMemory: HTTP: " + JSON.stringify(da), }; } @@ -72,7 +85,7 @@ async function summary(stringlizedChat: string): Promise<{ success: boolean; dat if (!result) { return { success: false, - data: "SupaMemory: HTTP: " + JSON.stringify(da) + data: "SupaMemory: HTTP: " + JSON.stringify(da), }; } @@ -80,34 +93,46 @@ async function summary(stringlizedChat: string): Promise<{ success: boolean; dat } catch (error) { return { success: false, - data: "SupaMemory: HTTP: " + error + data: "SupaMemory: HTTP: " + error, }; } } else { - - let parsedPrompt = parseChatML(supaPrompt.replaceAll('{{slot}}', stringlizedChat)) + let parsedPrompt = parseChatML( + supaPrompt.replaceAll("{{slot}}", stringlizedChat) + ); const promptbody: OpenAIChat[] = parsedPrompt ?? [ { role: "user", - content: stringlizedChat + content: stringlizedChat, }, { role: "system", - content: supaPrompt - } + content: supaPrompt, + }, ]; - console.log("Using submodel: ", db.subModel, "for supaMemory model"); - const da = await requestChatData({ - formated: promptbody, - bias: {}, - useStreaming: false, - noMultiGen: true - }, 'memory'); - if (da.type === 'fail' || da.type === 'streaming' || da.type === 'multiline') { + console.log( + "Using submodel: ", + db.subModel, + "for supaMemory model" + ); + const da = await requestChatData( + { + formated: promptbody, + bias: {}, + useStreaming: false, + noMultiGen: true, + }, + "memory" + ); + if ( + da.type === "fail" || + da.type === "streaming" || + da.type === "multiline" + ) { return { success: false, - data: "SupaMemory: HTTP: " + da.result + data: "SupaMemory: HTTP: " + da.result, }; } result = da.result; @@ -115,6 +140,43 @@ async function summary(stringlizedChat: string): Promise<{ success: boolean; dat return { success: true, data: result }; } +function cleanInvalidChunks( + chats: OpenAIChat[], + data: HypaV2Data, + editedChatIndex?: number +): void { + // If editedChatIndex is provided, remove chunks and mainChunks that summarize chats from that index onwards + if (editedChatIndex !== undefined) { + data.mainChunks = data.mainChunks.filter( + (chunk) => chunk.chatRange[1] < editedChatIndex + ); + data.chunks = data.chunks.filter( + (chunk) => chunk.chatRange[1] < editedChatIndex + ); + } else { + // Build a set of current chat memo IDs + const currentChatIds = new Set(chats.map((chat) => chat.memo)); + + // Filter mainChunks + data.mainChunks = data.mainChunks.filter((chunk) => { + // Check if all chat memos in the range exist + const [startIdx, endIdx] = chunk.chatRange; + for (let i = startIdx; i <= endIdx; i++) { + if (!currentChatIds.has(chats[i]?.memo)) { + return false; // Chat no longer exists, remove this mainChunk + } + } + return true; + }); + + // Similarly for chunks + data.chunks = data.chunks.filter(() => { + // Since chunks are associated with mainChunks, they have been filtered already + return true; + }); + } +} + export async function hypaMemoryV2( chats: OpenAIChat[], currentTokens: number, @@ -122,12 +184,19 @@ export async function hypaMemoryV2( room: Chat, char: character | groupChat, tokenizer: ChatTokenizer, - arg: { asHyper?: boolean, summaryModel?: string, summaryPrompt?: string, hypaModel?: string } = {} -): Promise<{ currentTokens: number; chats: OpenAIChat[]; error?: string; memory?: HypaV2Data; }> { - + editedChatIndex?: number +): Promise<{ + currentTokens: number; + chats: OpenAIChat[]; + error?: string; + memory?: HypaV2Data; +}> { const db = getDatabase(); const data: HypaV2Data = room.hypaV2Data ?? { chunks: [], mainChunks: [] }; + // Clean invalid chunks based on the edited chat index + cleanInvalidChunks(chats, data, editedChatIndex); + let allocatedTokens = db.hypaAllocatedTokens; let chunkSize = db.hypaChunkSize; currentTokens += allocatedTokens + 50; @@ -136,49 +205,40 @@ export async function hypaMemoryV2( // Error handling for infinite summarization attempts let summarizationFailures = 0; const maxSummarizationFailures = 3; - let lastMainChunkTargetId = ''; - - // Ensure correct targetId matching - const getValidChatIndex = (targetId: string) => { - return chats.findIndex(chat => chat.memo === targetId); - }; - - // Processing mainChunks - if (data.mainChunks.length > 0) { - const chunk = data.mainChunks[0]; - const ind = getValidChatIndex(chunk.targetId); - if (ind !== -1) { - const removedChats = chats.splice(0, ind + 1); - console.log("removed chats", removedChats); - for (const chat of removedChats) { - currentTokens -= await tokenizer.tokenizeChat(chat); - } - mainPrompt = chunk.text; - const mpToken = await tokenizer.tokenizeChat({ role: 'system', content: mainPrompt }); - allocatedTokens -= mpToken; - } - } + const summarizedIndices = new Set(); // Token management loop while (currentTokens >= maxContextTokens) { let idx = 0; - let targetId = ''; + let targetId = ""; const halfData: OpenAIChat[] = []; let halfDataTokens = 0; - while (halfDataTokens < chunkSize && (idx <= chats.length - 4)) { // Ensure latest two chats are not added to summarization. - const chat = chats[idx]; - halfDataTokens += await tokenizer.tokenizeChat(chat); - halfData.push(chat); + let startIdx = -1; + + // Find the next batch of chats to summarize + while ( + halfDataTokens < chunkSize && + idx < chats.length - 2 // Ensure latest two chats are not added to summarization. + ) { + if (!summarizedIndices.has(idx)) { + const chat = chats[idx]; + if (startIdx === -1) startIdx = idx; + halfDataTokens += await tokenizer.tokenizeChat(chat); + halfData.push(chat); + targetId = chat.memo; + } idx++; - targetId = chat.memo; - console.log("current target chat: ", chat); } + const endIdx = idx - 1; // End index of the chats being summarized + // Avoid summarizing the last two chats if (halfData.length < 3) break; - const stringlizedChat = halfData.map(e => `${e.role}: ${e.content}`).join('\n'); + const stringlizedChat = halfData + .map((e) => `${e.role}: ${e.content}`) + .join("\n"); const summaryData = await summary(stringlizedChat); if (!summaryData.success) { @@ -187,7 +247,8 @@ export async function hypaMemoryV2( return { currentTokens: currentTokens, chats: chats, - error: "Summarization failed multiple times. Aborting to prevent infinite loop." + error: + "Summarization failed multiple times. Aborting to prevent infinite loop.", }; } continue; @@ -195,117 +256,142 @@ export async function hypaMemoryV2( summarizationFailures = 0; // Reset failure counter on success - const summaryDataToken = await tokenizer.tokenizeChat({ role: 'system', content: summaryData.data }); + const summaryDataToken = await tokenizer.tokenizeChat({ + role: "system", + content: summaryData.data, + }); mainPrompt += `\n\n${summaryData.data}`; currentTokens -= halfDataTokens; allocatedTokens -= summaryDataToken; data.mainChunks.unshift({ text: summaryData.data, - targetId: targetId + targetId: targetId, + chatRange: [startIdx, endIdx], }); // Split the summary into chunks based on double line breaks - const splitted = summaryData.data.split('\n\n').map(e => e.trim()).filter(e => e.length > 0); + const splitted = summaryData.data + .split("\n\n") + .map((e) => e.trim()) + .filter((e) => e.length > 0); // Update chunks with the new summary - data.chunks.push(...splitted.map(e => ({ - text: e, - targetId: targetId - }))); + data.chunks.push( + ...splitted.map((e) => ({ + text: e, + targetId: targetId, + chatRange: [startIdx, endIdx] as [number, number], + })) + ); - // Remove summarized chats - chats.splice(0, idx); + // Mark the chats as summarized + for (let i = startIdx; i <= endIdx; i++) { + summarizedIndices.add(i); + } } - // Construct the mainPrompt from mainChunks until half of the allocatedTokens are used + // Construct the mainPrompt from mainChunks mainPrompt = ""; let mainPromptTokens = 0; for (const chunk of data.mainChunks) { - const chunkTokens = await tokenizer.tokenizeChat({ role: 'system', content: chunk.text }); + const chunkTokens = await tokenizer.tokenizeChat({ + role: "system", + content: chunk.text, + }); if (mainPromptTokens + chunkTokens > allocatedTokens / 2) break; mainPrompt += `\n\n${chunk.text}`; mainPromptTokens += chunkTokens; - lastMainChunkTargetId = chunk.targetId; } // Fetch additional memory from chunks const processor = new HypaProcesser(db.hypaModel); processor.oaikey = db.supaMemoryKey; - // Find the smallest index of chunks with the same targetId as lastMainChunkTargetId - const lastMainChunkIndex = data.chunks.reduce((minIndex, chunk, index) => { - if (chunk.targetId === lastMainChunkTargetId) { - return Math.min(minIndex, index); - } - return minIndex; - }, data.chunks.length); - - // Filter chunks to only include those older than the last mainChunk's targetId - const olderChunks = lastMainChunkIndex !== data.chunks.length - ? data.chunks.slice(0, lastMainChunkIndex) - : data.chunks; - - console.log("Older Chunks:", olderChunks); - - // Add older chunks to processor for similarity search - await processor.addText(olderChunks.filter(v => v.text.trim().length > 0).map(v => "search_document: " + v.text.trim())); + // Add chunks to processor for similarity search + await processor.addText( + data.chunks + .filter((v) => v.text.trim().length > 0) + .map((v) => "search_document: " + v.text.trim()) + ); let scoredResults: { [key: string]: number } = {}; for (let i = 0; i < 3; i++) { const pop = chats[chats.length - i - 1]; if (!pop) break; - const searched = await processor.similaritySearchScored(`search_query: ${pop.content}`); + const searched = await processor.similaritySearchScored( + `search_query: ${pop.content}` + ); for (const result of searched) { const score = result[1] / (i + 1); scoredResults[result[0]] = (scoredResults[result[0]] || 0) + score; } } - const scoredArray = Object.entries(scoredResults).sort((a, b) => b[1] - a[1]); + const scoredArray = Object.entries(scoredResults).sort( + (a, b) => b[1] - a[1] + ); let chunkResultPrompts = ""; let chunkResultTokens = 0; - while (allocatedTokens - mainPromptTokens - chunkResultTokens > 0 && scoredArray.length > 0) { + while ( + allocatedTokens - mainPromptTokens - chunkResultTokens > 0 && + scoredArray.length > 0 + ) { const [text] = scoredArray.shift(); - const tokenized = await tokenizer.tokenizeChat({ role: 'system', content: text.substring(14) }); - if (tokenized > allocatedTokens - mainPromptTokens - chunkResultTokens) break; - chunkResultPrompts += text.substring(14) + '\n\n'; + const tokenized = await tokenizer.tokenizeChat({ + role: "system", + content: text.substring(14), + }); + if ( + tokenized > + allocatedTokens - mainPromptTokens - chunkResultTokens + ) + break; + chunkResultPrompts += text.substring(14) + "\n\n"; chunkResultTokens += tokenized; } const fullResult = `${mainPrompt}\n${chunkResultPrompts}`; - chats.unshift({ + // Filter out summarized chats + const unsummarizedChats = chats.filter( + (_, idx) => !summarizedIndices.has(idx) + ); + + // Insert the memory system prompt at the beginning + unsummarizedChats.unshift({ role: "system", content: fullResult, - memo: "supaMemory" + memo: "supaMemory", }); - // Add the remaining chats after the last mainChunk's targetId - const lastTargetId = data.mainChunks.length > 0 ? data.mainChunks[0].targetId : null; - if (lastTargetId) { - const lastIndex = getValidChatIndex(lastTargetId); - if (lastIndex !== -1) { - const remainingChats = chats.slice(lastIndex + 1); - chats = [chats[0], ...remainingChats]; - } - } - - // Add last two chats if they exist and are not duplicates - if (lastTwoChats.length === 2) { - const [lastChat1, lastChat2] = lastTwoChats; - if (!chats.some(chat => chat.memo === lastChat1.memo)) { - chats.push(lastChat1); - } - if (!chats.some(chat => chat.memo === lastChat2.memo)) { - chats.push(lastChat2); + // Add the last two chats back if they were removed + const lastTwoChatsSet = new Set(lastTwoChats.map((chat) => chat.memo)); + console.log(lastTwoChatsSet) // Not so sure if chat.memo is unique id. + for (const chat of lastTwoChats) { + if (!unsummarizedChats.find((c) => c.memo === chat.memo)) { + unsummarizedChats.push(chat); } } - console.log("model being used: ", db.hypaModel, db.supaModelType, "\nCurrent session tokens: ", currentTokens, "\nAll chats, including memory system prompt: ", chats, "\nMemory data, with all the chunks: ", data); + // Recalculate currentTokens + currentTokens = await tokenizer.tokenizeChats(unsummarizedChats); + + console.log( + "Model being used: ", + db.hypaModel, + db.supaModelType, + "\nCurrent session tokens: ", + currentTokens, + "\nAll chats, including memory system prompt: ", + unsummarizedChats, + "\nMemory data, with all the chunks: ", + data + ); + return { currentTokens: currentTokens, - chats: chats, - memory: data + chats: unsummarizedChats, + memory: data, }; } diff --git a/src/ts/tokenizer.ts b/src/ts/tokenizer.ts index d767481d..a7fe4623 100644 --- a/src/ts/tokenizer.ts +++ b/src/ts/tokenizer.ts @@ -222,15 +222,15 @@ export async function tokenizeAccurate(data:string, consistantChar?:boolean) { export class ChatTokenizer { - private chatAdditonalTokens:number + private chatAdditionalTokens:number private useName:'name'|'noName' - constructor(chatAdditonalTokens:number, useName:'name'|'noName'){ - this.chatAdditonalTokens = chatAdditonalTokens + constructor(chatAdditionalTokens:number, useName:'name'|'noName'){ + this.chatAdditionalTokens = chatAdditionalTokens this.useName = useName } async tokenizeChat(data:OpenAIChat) { - let encoded = (await encode(data.content)).length + this.chatAdditonalTokens + let encoded = (await encode(data.content)).length + this.chatAdditionalTokens if(data.name && this.useName ==='name'){ encoded += (await encode(data.name)).length + 1 } @@ -241,17 +241,24 @@ export class ChatTokenizer { } return encoded } + async tokenizeChats(data:OpenAIChat[]){ + let encoded = 0 + for(const chat of data){ + encoded += await this.tokenizeChat(chat) + } + return encoded + } async tokenizeMultiModal(data:MultiModal){ const db = getDatabase() if(!supportsInlayImage()){ - return this.chatAdditonalTokens + return this.chatAdditionalTokens } if(db.gptVisionQuality === 'low'){ return 87 } - let encoded = this.chatAdditonalTokens + let encoded = this.chatAdditionalTokens let height = data.height ?? 0 let width = data.width ?? 0