diff --git a/src/lib/ChatScreens/Chat.svelte b/src/lib/ChatScreens/Chat.svelte index c2f6f8ba..cf15196c 100644 --- a/src/lib/ChatScreens/Chat.svelte +++ b/src/lib/ChatScreens/Chat.svelte @@ -465,6 +465,7 @@ }}> + diff --git a/src/lib/Others/AlertComp.svelte b/src/lib/Others/AlertComp.svelte index 5584509d..7ef80344 100644 --- a/src/lib/Others/AlertComp.svelte +++ b/src/lib/Others/AlertComp.svelte @@ -287,20 +287,14 @@ {#if generationInfoMenuIndex === 0}
- {#each DBState.db.characters[$selectedCharID].chats[DBState.db.characters[$selectedCharID].chatPage].hypaV2Data.chunks as chunk} + {#each DBState.db.characters[$selectedCharID].chats[DBState.db.characters[$selectedCharID].chatPage].hypaV2Data.chunks as chunk, i} {/each} - +
{:else} - {#each DBState.db.characters[$selectedCharID].chats[DBState.db.characters[$selectedCharID].chatPage].hypaV2Data.chunks as chunk, i} + {#each DBState.db.characters[$selectedCharID].chats[DBState.db.characters[$selectedCharID].chatPage].hypaV2Data.mainChunks as chunk, i}
{#if i === 0} Active diff --git a/src/ts/process/index.svelte.ts b/src/ts/process/index.svelte.ts index 62bf88f7..dfd67895 100644 --- a/src/ts/process/index.svelte.ts +++ b/src/ts/process/index.svelte.ts @@ -803,9 +803,9 @@ export async function sendChat(chatProcessIndex = -1,arg:{ chats = hn.chats currentTokens = hn.tokens } - else if(DBState.db.hypav2){ //HypaV2 support needs to be changed like this. + else if(DBState.db.hypav2){ + console.log("Current chat's hypaV2 Data: ", currentChat.hypaV2Data) const sp = await hypaMemoryV2(chats, currentTokens, maxContextTokens, currentChat, nowChatroom, tokenizer) - console.log("All chats: ", chats) if(sp.error){ console.log(sp) alertError(sp.error) @@ -815,7 +815,9 @@ export async function sendChat(chatProcessIndex = -1,arg:{ currentTokens = sp.currentTokens currentChat.hypaV2Data = sp.memory ?? currentChat.hypaV2Data DBState.db.characters[selectedChar].chats[selectedChat].hypaV2Data = currentChat.hypaV2Data - console.log(currentChat.hypaV2Data) + + currentChat = DBState.db.characters[selectedChar].chats[selectedChat]; + console.log("[Expected to be updated] chat's HypaV2Data: ", currentChat.hypaV2Data) } else{ const sp = await supaMemory(chats, currentTokens, maxContextTokens, currentChat, nowChatroom, tokenizer, { diff --git a/src/ts/process/memory/hypamemory.ts b/src/ts/process/memory/hypamemory.ts index b6d1e883..07d7f0b8 100644 --- a/src/ts/process/memory/hypamemory.ts +++ b/src/ts/process/memory/hypamemory.ts @@ -172,8 +172,7 @@ export class HypaProcesser{ } async similaritySearchScored(query: string) { - const results = await this.similaritySearchVectorWithScore((await this.getEmbeds(query))[0],); - return results + return await this.similaritySearchVectorWithScore((await this.getEmbeds(query))[0],); } private async similaritySearchVectorWithScore( diff --git a/src/ts/process/memory/hypav2.ts b/src/ts/process/memory/hypav2.ts index c186232a..889fcc2e 100644 --- a/src/ts/process/memory/hypav2.ts +++ b/src/ts/process/memory/hypav2.ts @@ -1,4 +1,9 @@ -import { getDatabase, type Chat, type character, type groupChat } from "src/ts/storage/database.svelte"; +import { + getDatabase, + type Chat, + type character, + type groupChat, +} from "src/ts/storage/database.svelte"; import type { OpenAIChat } from "../index.svelte"; import type { ChatTokenizer } from "src/ts/tokenizer"; import { requestChatData } from "../request"; @@ -8,62 +13,71 @@ import { runSummarizer } from "../transformers"; import { parseChatML } from "src/ts/parser.svelte"; export interface HypaV2Data { - chunks: { + lastMainChunkID: number; // can be removed, but exists to more readability of the code. + mainChunks: { // summary itself + id: number; text: string; - targetId: string; + chatMemos: Set; // UUIDs of summarized chats + lastChatMemo: string; }[]; - mainChunks: { - text: string; - targetId: string; + chunks: { // split mainChunks for retrieval or something. Although quite uncomfortable logic, so maybe I will delete it soon. + mainChunkID: number; + text:string; }[]; } -async function summary(stringlizedChat: string): Promise<{ success: boolean; data: string }> { +async function summary( + stringlizedChat: string +): Promise<{ success: boolean; data: string }> { const db = getDatabase(); console.log("Summarizing"); - if (db.supaModelType === 'distilbart') { + if (db.supaModelType === "distilbart") { try { const sum = await runSummarizer(stringlizedChat); return { success: true, data: sum }; } catch (error) { return { success: false, - data: "SupaMemory: Summarizer: " + `${error}` + data: "SupaMemory: Summarizer: " + `${error}`, }; } } - const supaPrompt = db.supaMemoryPrompt === '' ? - "[Summarize the ongoing role story, It must also remove redundancy and unnecessary text and content from the output to reduce tokens for gpt3 and other sublanguage models]\n" - : db.supaMemoryPrompt; - let result = ''; + const supaPrompt = + db.supaMemoryPrompt === "" + ? "[Summarize the ongoing role story, It must also remove redundancy and unnecessary text and content from the output.]\n" + : db.supaMemoryPrompt; + let result = ""; - if (db.supaModelType !== 'subModel') { - const promptbody = stringlizedChat + '\n\n' + supaPrompt + "\n\nOutput:"; + if (db.supaModelType !== "subModel") { + const promptbody = stringlizedChat + "\n\n" + supaPrompt + "\n\nOutput:"; const da = await globalFetch("https://api.openai.com/v1/completions", { headers: { "Content-Type": "application/json", - "Authorization": "Bearer " + db.supaMemoryKey + Authorization: "Bearer " + db.supaMemoryKey, }, method: "POST", body: { - "model": db.supaModelType === 'curie' ? "text-curie-001" - : db.supaModelType === 'instruct35' ? 'gpt-3.5-turbo-instruct' - : "text-davinci-003", - "prompt": promptbody, - "max_tokens": 600, - "temperature": 0 - } - }) + model: + db.supaModelType === "curie" + ? "text-curie-001" + : db.supaModelType === "instruct35" + ? "gpt-3.5-turbo-instruct" + : "text-davinci-003", + prompt: promptbody, + max_tokens: 600, + temperature: 0, + }, + }); console.log("Using openAI instruct 3.5 for SupaMemory"); try { if (!da.ok) { return { success: false, - data: "SupaMemory: HTTP: " + JSON.stringify(da) + data: "SupaMemory: HTTP: " + JSON.stringify(da), }; } @@ -72,7 +86,7 @@ async function summary(stringlizedChat: string): Promise<{ success: boolean; dat if (!result) { return { success: false, - data: "SupaMemory: HTTP: " + JSON.stringify(da) + data: "SupaMemory: HTTP: " + JSON.stringify(da), }; } @@ -80,17 +94,18 @@ async function summary(stringlizedChat: string): Promise<{ success: boolean; dat } catch (error) { return { success: false, - data: "SupaMemory: HTTP: " + error + data: "SupaMemory: HTTP: " + error, }; } } else { - - let parsedPrompt = parseChatML(supaPrompt.replaceAll('{{slot}}', stringlizedChat)) + let parsedPrompt = parseChatML( + supaPrompt.replaceAll("{{slot}}", stringlizedChat) + ); const promptbody: OpenAIChat[] = (parsedPrompt ?? [ { role: "user", - content: stringlizedChat + content: stringlizedChat, }, { role: "system", @@ -110,207 +125,473 @@ async function summary(stringlizedChat: string): Promise<{ success: boolean; dat if (da.type === 'fail' || da.type === 'streaming' || da.type === 'multiline') { return { success: false, - data: "SupaMemory: HTTP: " + da.result + data: "SupaMemory: HTTP: " + da.result, }; } result = da.result; } return { success: true, data: result }; +} // No, I am not going to touch any http API calls. + +// Helper function start +export interface OldHypaV2Data { + chunks: { + text: string; + targetId: string; + }[]; + mainChunks: { + text: string; + targetId: string; + }[]; } +function isSubset(subset: Set, superset: Set): boolean { + for (const item of subset) { + if (!superset.has(item)) { + return false; + } + } + return true; +} +function isOldHypaV2Data(obj:any): obj is OldHypaV2Data { + return ( + typeof obj === 'object' && + obj !== null && + Array.isArray(obj.chunks) && + Array.isArray(obj.mainChunks) && + obj.chunks.every(chunk => + typeof chunk === 'object' && + chunk !== null && + typeof chunk.text === 'string' && + typeof chunk.targetId === 'string' + ) && + obj.mainChunks.every(mainChunk => + typeof mainChunk === 'object' && + mainChunk !== null && + typeof mainChunk.text === 'string' && + typeof mainChunk.targetId === 'string' + ) + ); +} +// Helper function end + +function convertOldToNewHypaV2Data(oldData: OldHypaV2Data, chats: OpenAIChat[]): HypaV2Data { + const oldMainChunks = oldData.mainChunks.slice().reverse(); // Inversed order, old mainchunk is done by unshift instead of push + const oldChunks = oldData.chunks.slice(); + const newData: HypaV2Data = { + lastMainChunkID: 0, + mainChunks: [], + chunks: [], + }; + + const mainChunkTargetIds = new Set(); + for (const mc of oldMainChunks) { + if (mc.targetId) { + mainChunkTargetIds.add(mc.targetId); + } + } + + // map chat memo to index, efficiency issues + const chatMemoToIndex = new Map(); + for (const tid of mainChunkTargetIds) { + const idx = chats.findIndex(c => c.memo === tid); + if (idx !== -1) { + chatMemoToIndex.set(tid, idx); + } else { + chatMemoToIndex.set(tid, -1); + } + } + + for (let i = 0; i < oldMainChunks.length; i++) { + const oldMainChunk = oldMainChunks[i]; + const targetId = oldMainChunk.targetId; + const mainChunkText = oldMainChunk.text; + + const previousMainChunk = i > 0 ? oldMainChunks[i - 1] : null; + const previousMainChunkTarget = previousMainChunk ? previousMainChunk.targetId : null; + + let chatMemos = new Set(); + + if (previousMainChunkTarget && targetId) { + const startIndex = chatMemoToIndex.get(previousMainChunkTarget) ?? -1; + const endIndex = chatMemoToIndex.get(targetId) ?? -1; + + if (startIndex !== -1 && endIndex !== -1) { + const lowerIndex = Math.min(startIndex, endIndex); + const upperIndex = Math.max(startIndex, endIndex); + + for (let j = lowerIndex; j <= upperIndex; j++) { + chatMemos.add(chats[j].memo); + } + } else { + // Can't identify the chats correctly, so discard this main chunk at all + continue; // Technically, if this is the case Previous HypaV2Data is bugged. Discussion opened for changing it to break; + } + } else { + // No previous chunk, so we gather all chats from index 0 up to the targetId's index + if (targetId) { + const targetIndex = chatMemoToIndex.get(targetId) ?? -1; + if (targetIndex !== -1) { + // Include all memos from 0 up to targetIndex + for (let j = 0; j <= targetIndex; j++) { + chatMemos.add(chats[j].memo); + } + } else { + continue; // Invalid MainChunk. + } + } + } + const newMainChunk = { + id: newData.lastMainChunkID, + text: mainChunkText, + chatMemos: chatMemos, + lastChatMemo: targetId, + } + newData.mainChunks.push(newMainChunk); + newData.lastMainChunkID++; + // Adding chunks accordingly, matching MainChunkID by leveraging same targetId + const matchingOldChunks = oldChunks.filter((oldChunk) => oldChunk.targetId === targetId); + for (const oldChunk of matchingOldChunks) { + newData.chunks.push({ + mainChunkID: newMainChunk.id, + text: oldChunk.text, + }); + } + } + + return newData; // updated HypaV2Data +} + +function cleanInvalidChunks( + chats: OpenAIChat[], + data: HypaV2Data, +): void { + const currentChatMemos = new Set(chats.map((chat) => chat.memo)); + + // mainChunks filtering + data.mainChunks = data.mainChunks.filter((mainChunk) => { + return isSubset(mainChunk.chatMemos, currentChatMemos); + }); + + // chunk filtering based on mainChunk's id + const validMainChunkIds = new Set(data.mainChunks.map((mainChunk) => mainChunk.id)); + data.chunks = data.chunks.filter((chunk) => + validMainChunkIds.has(chunk.mainChunkID) + ); + // Update lastMainChunkID + if (data.mainChunks.length > 0) { + data.lastMainChunkID = data.mainChunks[data.mainChunks.length - 1].id; + } else { + data.lastMainChunkID = 0; + } +} + +export async function regenerateSummary( + chats: OpenAIChat[], + data: HypaV2Data, + mainChunkIndex: number +) : Promise { + const targetMainChunk = data.mainChunks[mainChunkIndex]; + +} export async function hypaMemoryV2( chats: OpenAIChat[], currentTokens: number, maxContextTokens: number, room: Chat, char: character | groupChat, - tokenizer: ChatTokenizer, - arg: { asHyper?: boolean, summaryModel?: string, summaryPrompt?: string, hypaModel?: string } = {} -): Promise<{ currentTokens: number; chats: OpenAIChat[]; error?: string; memory?: HypaV2Data; }> { - + tokenizer: ChatTokenizer +): Promise<{ + currentTokens: number; + chats: OpenAIChat[]; + error?: string; + memory?: HypaV2Data; +}> { const db = getDatabase(); - const data: HypaV2Data = room.hypaV2Data ?? { chunks: [], mainChunks: [] }; + + if(room.hypaV2Data && isOldHypaV2Data(room.hypaV2Data)){ + console.log("Old HypaV2 data detected. Converting to new format..."); + room.hypaV2Data = convertOldToNewHypaV2Data(room.hypaV2Data, chats); + } + + const data: HypaV2Data = room.hypaV2Data ?? { + lastMainChunkID: 0, + chunks: [], + mainChunks: [] + }; + + // Clean invalid HypaV2 data + cleanInvalidChunks(chats, data); let allocatedTokens = db.hypaAllocatedTokens; let chunkSize = db.hypaChunkSize; - currentTokens += allocatedTokens + 50; + currentTokens += allocatedTokens + chats.length * 4; // ChatML token counting from official openai documentation let mainPrompt = ""; const lastTwoChats = chats.slice(-2); // Error handling for infinite summarization attempts let summarizationFailures = 0; const maxSummarizationFailures = 3; - let lastMainChunkTargetId = ''; - // Ensure correct targetId matching - const getValidChatIndex = (targetId: string) => { - return chats.findIndex(chat => chat.memo === targetId); - }; - - // Processing mainChunks + // Find the index to start summarizing from + let idx = 2; // first two should not be considered if (data.mainChunks.length > 0) { - const chunk = data.mainChunks[0]; - const ind = getValidChatIndex(chunk.targetId); - if (ind !== -1) { - const removedChats = chats.splice(0, ind + 1); - console.log("removed chats", removedChats); - for (const chat of removedChats) { - currentTokens -= await tokenizer.tokenizeChat(chat); - } - mainPrompt = chunk.text; - const mpToken = await tokenizer.tokenizeChat({ role: 'system', content: mainPrompt }); - allocatedTokens -= mpToken; + const lastMainChunk = data.mainChunks[data.mainChunks.length - 1]; + const lastChatMemo = lastMainChunk.lastChatMemo; + const lastChatIndex = chats.findIndex(chat => chat.memo === lastChatMemo); + if (lastChatIndex !== -1) { + idx = lastChatIndex + 1; } } + // Starting chat index of new mainChunk to be generated - // Token management loop +// Token management loop (If current token usage exceeds allowed amount) while (currentTokens >= maxContextTokens) { - let idx = 0; - let targetId = ''; const halfData: OpenAIChat[] = []; - let halfDataTokens = 0; - while (halfDataTokens < chunkSize && (idx <= chats.length - 4)) { // Ensure latest two chats are not added to summarization. + + const startIdx = idx; + + console.log( + "Starting summarization iteration:", + "\nCurrent Tokens (before):", currentTokens, + "\nMax Context Tokens:", maxContextTokens, + "\nStartIdx:", startIdx, + "\nchunkSize:", chunkSize + ); + + // Accumulate chats to summarize + while ( + halfDataTokens < chunkSize && + idx < chats.length - 2 // keep the last two chats from summarizing(else, the roles will be fucked up) + ) { const chat = chats[idx]; - halfDataTokens += await tokenizer.tokenizeChat(chat); + const chatTokens = await tokenizer.tokenizeChat(chat); + + console.log( + "Evaluating chat for summarization:", + "\nIndex:", idx, + "\nRole:", chat.role, + "\nContent:", chat.content, + "\nchatTokens:", chatTokens, + "\nhalfDataTokens so far:", halfDataTokens, + "\nWould adding this exceed chunkSize?", (halfDataTokens + chatTokens > chunkSize) + ); + + // Check if adding this chat would exceed our chunkSize limit + if (halfDataTokens + chatTokens > chunkSize) { + // Can't add this chat without going over chunkSize + // Break out, and summarize what we have so far. + break; + } + + // Add this chat to the halfData batch halfData.push(chat); + halfDataTokens += chatTokens; idx++; - targetId = chat.memo; - console.log("current target chat: ", chat); } - // Avoid summarizing the last two chats - if (halfData.length < 3) break; + const endIdx = idx - 1; + console.log( + "Summarization batch chosen with this:", + "\nStartIdx:", startIdx, + "\nEndIdx:", endIdx, + "\nNumber of chats in halfData:", halfData.length, + "\nTotal tokens in halfData:", halfDataTokens, + "\nChats selected:", halfData.map(h => ({role: h.role, content: h.content})) + ); - const stringlizedChat = halfData.map(e => `${e.role}: ${e.content}`).join('\n'); + // If no chats were added, break to avoid infinite loop + if (halfData.length === 0) { + console.log("No chats to summarize in this iteration, breaking out."); + break; + } + + const stringlizedChat = halfData + .map((e) => `${e.role}: ${e.content}`) + .join("\n"); + + // Summarize the accumulated chunk const summaryData = await summary(stringlizedChat); if (!summaryData.success) { + console.log("Summarization failed:", summaryData.data); summarizationFailures++; if (summarizationFailures >= maxSummarizationFailures) { + console.error("Summarization failed multiple times. Aborting..."); return { currentTokens: currentTokens, chats: chats, - error: "Summarization failed multiple times. Aborting to prevent infinite loop." + error: "Summarization failed multiple times. Aborting to prevent infinite loop.", }; } + // If summarization fails, try again in next iteration continue; } - summarizationFailures = 0; // Reset failure counter on success + summarizationFailures = 0; // Reset on success - const summaryDataToken = await tokenizer.tokenizeChat({ role: 'system', content: summaryData.data }); - mainPrompt += `\n\n${summaryData.data}`; - currentTokens -= halfDataTokens; - allocatedTokens -= summaryDataToken; - - data.mainChunks.unshift({ - text: summaryData.data, - targetId: targetId + const summaryDataToken = await tokenizer.tokenizeChat({ + role: "system", + content: summaryData.data, }); - // Split the summary into chunks based on double line breaks - const splitted = summaryData.data.split('\n\n').map(e => e.trim()).filter(e => e.length > 0); + console.log( + "Summarization success:", + "\nSummary Data:", summaryData.data, + "\nSummary Token Count:", summaryDataToken + ); - // Update chunks with the new summary - data.chunks.push(...splitted.map(e => ({ - text: e, - targetId: targetId - }))); + // **Token accounting fix:** + // Previous commits, the code likely have missed removing summarized chat's tokens. + // and never actually accounted for adding the summary tokens. + // Now we: + // 1. Remove old chats' tokens (they are replaced by summary) + // 2. Add summary tokens instead + currentTokens -= halfDataTokens; // remove original chats' tokens + currentTokens += summaryDataToken; // add the summary's tokens - // Remove summarized chats - chats.splice(0, idx); + console.log( + "After token adjustment:", + "\nRemoved halfDataTokens:", halfDataTokens, + "\nAdded summaryDataToken:", summaryDataToken, + "\nCurrent Tokens (after):", currentTokens + ); + + // Update lastMainChunkID and create a new mainChunk + data.lastMainChunkID++; + const newMainChunkId = data.lastMainChunkID; + + const chatMemos = new Set(halfData.map((chat) => chat.memo)); + const lastChatMemo = halfData[halfData.length - 1].memo; + + data.mainChunks.push({ + id: newMainChunkId, + text: summaryData.data, + chatMemos: chatMemos, + lastChatMemo: lastChatMemo, + }); + + // Split the summary into chunks + const splitted = summaryData.data + .split("\n\n") + .map((e) => e.trim()) + .filter((e) => e.length > 0); + + data.chunks.push( + ...splitted.map((e) => ({ + mainChunkID: newMainChunkId, + text: e, + })) + ); + + console.log( + "Chunks added:", + splitted, + "\nUpdated mainChunks count:", data.mainChunks.length, + "\nUpdated chunks count:", data.chunks.length + ); } - // Construct the mainPrompt from mainChunks until half of the allocatedTokens are used + // Construct the mainPrompt from mainChunks mainPrompt = ""; let mainPromptTokens = 0; for (const chunk of data.mainChunks) { - const chunkTokens = await tokenizer.tokenizeChat({ role: 'system', content: chunk.text }); + const chunkTokens = await tokenizer.tokenizeChat({ + role: "system", + content: chunk.text, + }); if (mainPromptTokens + chunkTokens > allocatedTokens / 2) break; mainPrompt += `\n\n${chunk.text}`; mainPromptTokens += chunkTokens; - lastMainChunkTargetId = chunk.targetId; } // Fetch additional memory from chunks - const searchDocumentPrefix = "search_document: "; const processor = new HypaProcesser(db.hypaModel); processor.oaikey = db.supaMemoryKey; - // Find the smallest index of chunks with the same targetId as lastMainChunkTargetId - const lastMainChunkIndex = data.chunks.reduce((minIndex, chunk, index) => { - if (chunk.targetId === lastMainChunkTargetId) { - return Math.min(minIndex, index); - } - return minIndex; - }, data.chunks.length); + const searchDocumentPrefix = "search_document: "; + const prefixLength = searchDocumentPrefix.length; - // Filter chunks to only include those older than the last mainChunk's targetId - const olderChunks = lastMainChunkIndex !== data.chunks.length - ? data.chunks.slice(0, lastMainChunkIndex) - : data.chunks; - - console.log("Older Chunks:", olderChunks); - - // Add older chunks to processor for similarity search - await processor.addText(olderChunks.filter(v => v.text.trim().length > 0).map(v => searchDocumentPrefix + v.text.trim())); + // Add chunks to processor for similarity search + await processor.addText( + data.chunks + .filter((v) => v.text.trim().length > 0) + .map((v) => searchDocumentPrefix + v.text.trim()) // sometimes this should not be used at all. RisuAI does not support embedding model that this is meaningful, isn't it? + ); let scoredResults: { [key: string]: number } = {}; - for (let i = 0; i < 3; i++) { + for (let i = 0; i < 3; i++) { // Should parameterize this, fixed length 3 is a magic number without explanation const pop = chats[chats.length - i - 1]; if (!pop) break; - const searched = await processor.similaritySearchScored(`search_query: ${pop.content}`); + const searched = await processor.similaritySearchScored( + `search_query: ${pop.content}` + ); for (const result of searched) { const score = result[1] / (i + 1); scoredResults[result[0]] = (scoredResults[result[0]] || 0) + score; } } - const scoredArray = Object.entries(scoredResults).sort((a, b) => b[1] - a[1]); + const scoredArray = Object.entries(scoredResults).sort( + (a, b) => b[1] - a[1] + ); let chunkResultPrompts = ""; let chunkResultTokens = 0; - while (allocatedTokens - mainPromptTokens - chunkResultTokens > 0 && scoredArray.length > 0) { + while ( + allocatedTokens - mainPromptTokens - chunkResultTokens > 0 && + scoredArray.length > 0 + ) { const [text] = scoredArray.shift(); - const tokenized = await tokenizer.tokenizeChat({ role: 'system', content: text.substring(searchDocumentPrefix.length) }); - if (tokenized > allocatedTokens - mainPromptTokens - chunkResultTokens) break; - // Ensure strings are truncated correctly using searchDocumentPrefix.length - chunkResultPrompts += text.substring(searchDocumentPrefix.length) + '\n\n'; + const content = text.substring(prefixLength); + const tokenized = await tokenizer.tokenizeChat({ + role: "system", + content: content, + }); + if ( + tokenized > + allocatedTokens - mainPromptTokens - chunkResultTokens + ) + break; + chunkResultPrompts += content + "\n\n"; chunkResultTokens += tokenized; } const fullResult = `${mainPrompt}\n${chunkResultPrompts}`; - chats.unshift({ + // Filter out summarized chats + const unsummarizedChats = chats.slice(idx); + + // Insert the memory system prompt at the beginning + unsummarizedChats.unshift({ role: "system", content: fullResult, - memo: "supaMemory" + memo: "supaMemory", }); - // Add the remaining chats after the last mainChunk's targetId - const lastTargetId = data.mainChunks.length > 0 ? data.mainChunks[0].targetId : null; - if (lastTargetId) { - const lastIndex = getValidChatIndex(lastTargetId); - if (lastIndex !== -1) { - const remainingChats = chats.slice(lastIndex + 1); - chats = [chats[0], ...remainingChats]; - } - } - - // Add last two chats if they exist and are not duplicates - if (lastTwoChats.length === 2) { - const [lastChat1, lastChat2] = lastTwoChats; - if (!chats.some(chat => chat.memo === lastChat1.memo)) { - chats.push(lastChat1); - } - if (!chats.some(chat => chat.memo === lastChat2.memo)) { - chats.push(lastChat2); + for (const chat of lastTwoChats) { + if (!unsummarizedChats.find((c) => c.memo === chat.memo)) { + unsummarizedChats.push(chat); } } - console.log("model being used: ", db.hypaModel, db.supaModelType, "\nCurrent session tokens: ", currentTokens, "\nAll chats, including memory system prompt: ", chats, "\nMemory data, with all the chunks: ", data); + // Recalculate currentTokens + currentTokens = await tokenizer.tokenizeChats(unsummarizedChats); + + console.log( + "Model being used: ", + db.hypaModel, + db.supaModelType, + "\nCurrent session tokens: ", + currentTokens, + "\nAll chats, including memory system prompt: ", + unsummarizedChats, + "\nMemory data, with all the chunks: ", + data + ); + return { currentTokens: currentTokens, - chats: chats, - memory: data + chats: unsummarizedChats, + memory: data, }; -} +} \ No newline at end of file diff --git a/src/ts/storage/database.svelte.ts b/src/ts/storage/database.svelte.ts index 23367856..b425a1e1 100644 --- a/src/ts/storage/database.svelte.ts +++ b/src/ts/storage/database.svelte.ts @@ -229,6 +229,9 @@ export function setDatabase(data:Database){ if(checkNullish(data.supaMemoryKey)){ data.supaMemoryKey = "" } + if(checkNullish(data.hypaMemoryKey)){ + data.hypaMemoryKey = "" + } if(checkNullish(data.supaModelType)){ data.supaModelType = "none" } @@ -630,6 +633,7 @@ export interface Database{ useStreaming:boolean palmAPI:string, supaMemoryKey:string + hypaMemoryKey:string supaModelType:string textScreenColor?:string textBorder?:boolean diff --git a/src/ts/tokenizer.ts b/src/ts/tokenizer.ts index 442501fd..bbea6bf2 100644 --- a/src/ts/tokenizer.ts +++ b/src/ts/tokenizer.ts @@ -283,15 +283,15 @@ export async function tokenizeAccurate(data:string, consistantChar?:boolean) { export class ChatTokenizer { - private chatAdditonalTokens:number + private chatAdditionalTokens:number private useName:'name'|'noName' - constructor(chatAdditonalTokens:number, useName:'name'|'noName'){ - this.chatAdditonalTokens = chatAdditonalTokens + constructor(chatAdditionalTokens:number, useName:'name'|'noName'){ + this.chatAdditionalTokens = chatAdditionalTokens this.useName = useName } async tokenizeChat(data:OpenAIChat) { - let encoded = (await encode(data.content)).length + this.chatAdditonalTokens + let encoded = (await encode(data.content)).length + this.chatAdditionalTokens if(data.name && this.useName ==='name'){ encoded += (await encode(data.name)).length + 1 } @@ -302,17 +302,24 @@ export class ChatTokenizer { } return encoded } + async tokenizeChats(data:OpenAIChat[]){ + let encoded = 0 + for(const chat of data){ + encoded += await this.tokenizeChat(chat) + } + return encoded + } async tokenizeMultiModal(data:MultiModal){ const db = getDatabase() if(!supportsInlayImage()){ - return this.chatAdditonalTokens + return this.chatAdditionalTokens } if(db.gptVisionQuality === 'low'){ return 87 } - let encoded = this.chatAdditonalTokens + let encoded = this.chatAdditionalTokens let height = data.height ?? 0 let width = data.width ?? 0