HyperV2 Custom settings enhancement (#511)

# PR Checklist - [x] Did you check if it works normally in all models? *ignore this when it dosen't uses models* - [x] Did you check if it works normally in all of web, local and node hosted versions? if it dosen't, did you blocked it in those versions? - [x] Did you added a type def? # Description ~No bug, work as intended. But, haven't checked actual summary with it. Very unstable, need confirmation/check.~ ### **NO bug, work as intended, summarizes correctly, moving mainChunks to chunks works well, no re-summarization issues. It works!** Added new variable to database.ts, which indicates if HypaV2 Memory is activated or not. hypav2: boolean This was added to stop overlapping of memoryType, when HypaV2 Memory is selected and supaMemoryModel is changed. Added supaMemoryType selection, HypaMemoryType selection, supaMemoryPrompt changing section on OtherBotSettings.svelte, and implemented on hypav2.ts. Also added OpenAI key when summarization model is GPT 3.5 instruct. Also suggested memoryAlgorithmType:string variable on database.ts, to further add more memory types. ![image](https://github.com/kwaroran/RisuAI/assets/73149145/5d167b03-d7e7-41a1-8875-1780561cd3ac) fixed minor punctuations, and changed summarize function of hypav2.ts(same as the one on supaMemory.ts)
2024-06-19 03:49:19 +09:00
parent 55cb71fefc beccdf0bb8
commit adadc74eda
7 changed files with 305 additions and 178 deletions
--- a/src/ts/process/index.ts
+++ b/src/ts/process/index.ts
@@ -714,7 +714,7 @@ export async function sendChat(chatProcessIndex = -1,arg:{chatAdditonalTokens?:n
        currentTokens += await tokenizer.tokenizeChat(chat)
    }
    
-    if(nowChatroom.supaMemory && (db.supaMemoryType !== 'none' || db.hanuraiEnable)){
+    if(nowChatroom.supaMemory && (db.supaModelType !== 'none' || db.hanuraiEnable || db.hypav2)){
        chatProcessStage.set(2)
        if(db.hanuraiEnable){
            const hn = await hanuraiMemory(chats, {
@@ -730,9 +730,11 @@ export async function sendChat(chatProcessIndex = -1,arg:{chatAdditonalTokens?:n
            chats = hn.chats
            currentTokens = hn.tokens
        }
-        else if(db.supaMemoryType === 'hypaV2'){
+        else if(db.hypav2){ //HypaV2 support needs to be changed like this.
            const sp = await hypaMemoryV2(chats, currentTokens, maxContextTokens, currentChat, nowChatroom, tokenizer)
+            console.log("All chats: ", chats)
            if(sp.error){
+                console.log(sp)
                alertError(sp.error)
                return false
            }
--- a/src/ts/process/memory/hypav2.ts
+++ b/src/ts/process/memory/hypav2.ts
@@ -4,209 +4,306 @@ import type { ChatTokenizer } from "src/ts/tokenizer";
 import { get } from "svelte/store";
 import { requestChatData } from "../request";
 import { HypaProcesser } from "./hypamemory";
+import { globalFetch } from "src/ts/storage/globalApi";
+import { runSummarizer } from "../transformers";
+import { last, remove } from "lodash";

-export interface HypaV2Data{
+export interface HypaV2Data {
    chunks: {
-        text:string
-        targetId:string
-    }[]
+        text: string;
+        targetId: string;
+    }[];
    mainChunks: {
-        text:string
-        targetId:string
-    }[]
+        text: string;
+        targetId: string;
+    }[];
 }

+async function summary(stringlizedChat: string): Promise<{ success: boolean; data: string }> {
+    const db = get(DataBase);
+    console.log("Summarizing");

-async function summary(stringlizedChat:string):Promise<{
-    success:boolean
-    data:string
-}>{
-    const promptbody:OpenAIChat[] = [
-        {
-            role: "user",
-            content: stringlizedChat
-        },
-        {
-            role: "system",
-            content: "Summarize this roleplay scene in a coherent narrative format for future reference. Summarize what happened, focusing on events and interactions between them. If someone or something is new or changed, include a brief characterization of them."
-        }
-    ]
-    const da = await requestChatData({
-        formated: promptbody,
-        bias: {},
-        useStreaming: false,
-        noMultiGen: true
-    }, 'model')
-    if(da.type === 'fail' || da.type === 'streaming' || da.type === 'multiline'){
-        return {
-            data: "Hypamemory HTTP: " + da.result,
-            success: false
+    if (db.supaModelType === 'distilbart') {
+        try {
+            const sum = await runSummarizer(stringlizedChat);
+            return { success: true, data: sum };
+        } catch (error) {
+            return {
+                success: false,
+                data: "SupaMemory: Summarizer: " + `${error}`
+            };
        }
    }
-    return {
-        data: da.result,
-        success: true
+
+    const supaPrompt = db.supaMemoryPrompt === '' ?
+        "[Summarize the ongoing role story, It must also remove redundancy and unnecessary text and content from the output to reduce tokens for gpt3 and other sublanguage models]\n"
+        : db.supaMemoryPrompt;
+    let result = '';
+
+    if (db.supaModelType !== 'subModel') {
+        const promptbody = stringlizedChat + '\n\n' + supaPrompt + "\n\nOutput:";
+
+        const da = await globalFetch("https://api.openai.com/v1/completions", {
+            headers: {
+                "Content-Type": "application/json",
+                "Authorization": "Bearer " + db.supaMemoryKey
+            },
+            method: "POST",
+            body: {
+                "model": db.supaModelType === 'curie' ? "text-curie-001"
+                    : db.supaModelType === 'instruct35' ? 'gpt-3.5-turbo-instruct'
+                    : "text-davinci-003",
+                "prompt": promptbody,
+                "max_tokens": 600,
+                "temperature": 0
+            }
+        })
+        console.log("Using openAI instruct 3.5 for SupaMemory");
+
+        try {
+            if (!da.ok) {
+                return {
+                    success: false,
+                    data: "SupaMemory: HTTP: " + JSON.stringify(da)
+                };
+            }
+
+            result = (await da.data)?.choices[0]?.text?.trim();
+
+            if (!result) {
+                return {
+                    success: false,
+                    data: "SupaMemory: HTTP: " + JSON.stringify(da)
+                };
+            }
+
+            return { success: true, data: result };
+        } catch (error) {
+            return {
+                success: false,
+                data: "SupaMemory: HTTP: " + error
+            };
+        }
+    } else {
+        const promptbody: OpenAIChat[] = [
+            {
+                role: "user",
+                content: stringlizedChat
+            },
+            {
+                role: "system",
+                content: supaPrompt
+            }
+        ];
+        console.log("Using submodel: ", db.subModel, "for supaMemory model");
+        const da = await requestChatData({
+            formated: promptbody,
+            bias: {},
+            useStreaming: false,
+            noMultiGen: true
+        }, 'submodel');
+        if (da.type === 'fail' || da.type === 'streaming' || da.type === 'multiline') {
+            return {
+                success: false,
+                data: "SupaMemory: HTTP: " + da.result
+            };
+        }
+        result = da.result;
    }
+    return { success: true, data: result };
 }

 export async function hypaMemoryV2(
-    chats:OpenAIChat[],
-    currentTokens:number,
-    maxContextTokens:number,
-    room:Chat,
-    char:character|groupChat,
-    tokenizer:ChatTokenizer,
-    arg:{asHyper?:boolean} = {}
-): Promise<{ currentTokens: number; chats: OpenAIChat[]; error?:string; memory?:HypaV2Data;}>{
+    chats: OpenAIChat[],
+    currentTokens: number,
+    maxContextTokens: number,
+    room: Chat,
+    char: character | groupChat,
+    tokenizer: ChatTokenizer,
+    arg: { asHyper?: boolean, summaryModel?: string, summaryPrompt?: string, hypaModel?: string } = {}
+): Promise<{ currentTokens: number; chats: OpenAIChat[]; error?: string; memory?: HypaV2Data; }> {

-    const db = get(DataBase)
+    const db = get(DataBase);
+    const data: HypaV2Data = room.hypaV2Data ?? { chunks: [], mainChunks: [] };

-    const data:HypaV2Data = room.hypaV2Data ?? {
-        chunks:[],
-        mainChunks:[]
-    }
-    
-    //this is for the prompt
+    let allocatedTokens = db.hypaAllocatedTokens;
+    let chunkSize = db.hypaChunkSize;
+    currentTokens += allocatedTokens + 50;
+    let mainPrompt = "";
+    const lastTwoChats = chats.slice(-2);
+    // Error handling for infinite summarization attempts
+    let summarizationFailures = 0;
+    const maxSummarizationFailures = 3;
+    let lastMainChunkTargetId = '';

-    let allocatedTokens = db.hypaAllocatedTokens
-    let chunkSize = db.hypaChunkSize
-    currentTokens += allocatedTokens
-    currentTokens += 50 //this is for the template prompt
-    let mainPrompt = ""
+    // Ensure correct targetId matching
+    const getValidChatIndex = (targetId: string) => {
+        return chats.findIndex(chat => chat.memo === targetId);
+    };

-    while(data.mainChunks.length > 0){
-        const chunk = data.mainChunks[0]
-        const ind = chats.findIndex(e => e.memo === chunk.targetId)
-        if(ind === -1){
-            data.mainChunks.shift()
-            continue
+    // Processing mainChunks
+    if (data.mainChunks.length > 0) {
+        const chunk = data.mainChunks[0];
+        const ind = getValidChatIndex(chunk.targetId);
+        if (ind !== -1) {
+            const removedChats = chats.splice(0, ind + 1);
+            console.log("removed chats", removedChats);
+            for (const chat of removedChats) {
+                currentTokens -= await tokenizer.tokenizeChat(chat);
+            }
+            mainPrompt = chunk.text;
+            const mpToken = await tokenizer.tokenizeChat({ role: 'system', content: mainPrompt });
+            allocatedTokens -= mpToken;
        }
-
-        const removedChats = chats.splice(0, ind)
-        for(const chat of removedChats){
-            currentTokens -= await tokenizer.tokenizeChat(chat)
-        }
-        chats = chats.slice(ind)
-        mainPrompt = chunk.text
-        const mpToken = await tokenizer.tokenizeChat({role:'system', content:mainPrompt})
-        allocatedTokens -= mpToken
-        break
    }

-    while(currentTokens >= maxContextTokens){
-    
-        let idx = 0
-        let targetId = ''
-        const halfData:OpenAIChat[] = []
+    // Token management loop
+    while (currentTokens >= maxContextTokens) {
+        let idx = 0;
+        let targetId = '';
+        const halfData: OpenAIChat[] = [];

-        let halfDataTokens = 0
-        while(halfDataTokens < chunkSize){
-            const chat = chats[idx]
-            if(!chat){
-                break
-            }
-            halfDataTokens += await tokenizer.tokenizeChat(chat)
-            halfData.push(chat)
-            idx++
-            targetId = chat.memo
+        let halfDataTokens = 0;
+        while (halfDataTokens < chunkSize && (idx <= chats.length - 4)) { // Ensure latest two chats are not added to summarization.
+            const chat = chats[idx];
+            halfDataTokens += await tokenizer.tokenizeChat(chat);
+            halfData.push(chat);
+            idx++;
+            targetId = chat.memo;
+            console.log("current target chat: ", chat);
        }

-        const stringlizedChat = halfData.map(e => `${e.role}: ${e.content}`).join('\n')
+        // Avoid summarizing the last two chats
+        if (halfData.length < 3) break;

-        const summaryData = await summary(stringlizedChat)
+        const stringlizedChat = halfData.map(e => `${e.role}: ${e.content}`).join('\n');
+        const summaryData = await summary(stringlizedChat);

-        if(!summaryData.success){
-            return {
-                currentTokens: currentTokens,
-                chats: chats,
-                error: summaryData.data
+        if (!summaryData.success) {
+            summarizationFailures++;
+            if (summarizationFailures >= maxSummarizationFailures) {
+                return {
+                    currentTokens: currentTokens,
+                    chats: chats,
+                    error: "Summarization failed multiple times. Aborting to prevent infinite loop."
+                };
            }
+            continue;
        }

-        const summaryDataToken = await tokenizer.tokenizeChat({role:'system', content:summaryData.data})
-        mainPrompt += `\n\n${summaryData.data}`
-        currentTokens -= halfDataTokens
-        allocatedTokens -= summaryDataToken
+        summarizationFailures = 0; // Reset failure counter on success
+
+        const summaryDataToken = await tokenizer.tokenizeChat({ role: 'system', content: summaryData.data });
+        mainPrompt += `\n\n${summaryData.data}`;
+        currentTokens -= halfDataTokens;
+        allocatedTokens -= summaryDataToken;

        data.mainChunks.unshift({
-            text: mainPrompt,
+            text: summaryData.data,
            targetId: targetId
-        })
+        });

-        if(allocatedTokens < 1500){
-            const summarizedMp = await summary(mainPrompt)
-            const mpToken = await tokenizer.tokenizeChat({role:'system', content:mainPrompt})
-            const summaryToken = await tokenizer.tokenizeChat({role:'system', content:summarizedMp.data})
+        // Split the summary into chunks based on double line breaks
+        const splitted = summaryData.data.split('\n\n').map(e => e.trim()).filter(e => e.length > 0);

-            allocatedTokens -= summaryToken
-            allocatedTokens += mpToken
+        // Update chunks with the new summary
+        data.chunks.push(...splitted.map(e => ({
+            text: e,
+            targetId: targetId
+        })));

-            const splited = mainPrompt.split('\n\n').map(e => e.trim()).filter(e => e.length > 0)
-
-            data.chunks.push(...splited.map(e => ({
-                text: e,
-                targetId: targetId
-            })))
-
-            data.mainChunks[0].text = mainPrompt
-        }
+        // Remove summarized chats
+        chats.splice(0, idx);
    }
-    
-    const processer = new HypaProcesser("nomic")

-    await processer.addText(data.chunks.filter(v => {
-        return v.text.trim().length > 0
-    }).map((v) => {
-        return "search_document: " + v.text.trim()
-    }))
+    // Construct the mainPrompt from mainChunks until half of the allocatedTokens are used
+    mainPrompt = "";
+    let mainPromptTokens = 0;
+    for (const chunk of data.mainChunks) {
+        const chunkTokens = await tokenizer.tokenizeChat({ role: 'system', content: chunk.text });
+        if (mainPromptTokens + chunkTokens > allocatedTokens / 2) break;
+        mainPrompt += `\n\n${chunk.text}`;
+        mainPromptTokens += chunkTokens;
+        lastMainChunkTargetId = chunk.targetId;
+    }

-    let scoredResults:{[key:string]:number} = {}
-    for(let i=0;i<3;i++){
-        const pop = chats[chats.length - i - 1]
-        if(!pop){
-            break
+    // Fetch additional memory from chunks
+    const processor = new HypaProcesser(db.hypaModel);
+    processor.oaikey = db.supaMemoryKey;
+
+    // Find the smallest index of chunks with the same targetId as lastMainChunkTargetId
+    const lastMainChunkIndex = data.chunks.reduce((minIndex, chunk, index) => {
+        if (chunk.targetId === lastMainChunkTargetId) {
+            return Math.min(minIndex, index);
        }
-        const searched = await processer.similaritySearchScored(`search_query: ${pop.content}`)
-        for(const result of searched){
-            const score = result[1]/(i+1)
-            if(scoredResults[result[0]]){
-                scoredResults[result[0]] += score
-            }else{
-                scoredResults[result[0]] = score
-            }
+        return minIndex;
+    }, data.chunks.length);
+
+    // Filter chunks to only include those older than the last mainChunk's targetId
+    const olderChunks = lastMainChunkIndex !== data.chunks.length
+        ? data.chunks.slice(0, lastMainChunkIndex)
+        : data.chunks;
+
+    console.log("Older Chunks:", olderChunks);
+
+    // Add older chunks to processor for similarity search
+    await processor.addText(olderChunks.filter(v => v.text.trim().length > 0).map(v => "search_document: " + v.text.trim()));
+
+    let scoredResults: { [key: string]: number } = {};
+    for (let i = 0; i < 3; i++) {
+        const pop = chats[chats.length - i - 1];
+        if (!pop) break;
+        const searched = await processor.similaritySearchScored(`search_query: ${pop.content}`);
+        for (const result of searched) {
+            const score = result[1] / (i + 1);
+            scoredResults[result[0]] = (scoredResults[result[0]] || 0) + score;
        }
    }

-    const scoredArray = Object.entries(scoredResults).sort((a,b) => b[1] - a[1])
-
-    let chunkResultPrompts = ""
-    while(allocatedTokens > 0){
-        const target = scoredArray.shift()
-        if(!target){
-            break
-        }
-        const tokenized = await tokenizer.tokenizeChat({
-            role: 'system',
-            content: target[0].substring(14)
-        })
-        if(tokenized > allocatedTokens){
-            break
-        }
-        chunkResultPrompts += target[0].substring(14) + '\n\n'
-        allocatedTokens -= tokenized
+    const scoredArray = Object.entries(scoredResults).sort((a, b) => b[1] - a[1]);
+    let chunkResultPrompts = "";
+    let chunkResultTokens = 0;
+    while (allocatedTokens - mainPromptTokens - chunkResultTokens > 0 && scoredArray.length > 0) {
+        const [text] = scoredArray.shift();
+        const tokenized = await tokenizer.tokenizeChat({ role: 'system', content: text.substring(14) });
+        if (tokenized > allocatedTokens - mainPromptTokens - chunkResultTokens) break;
+        chunkResultPrompts += text.substring(14) + '\n\n';
+        chunkResultTokens += tokenized;
    }

-
-    const fullResult = `<Past Events Summary>${mainPrompt}</Past Events Summary>\n<Past Events Details>${chunkResultPrompts}</Past Events Details>`
+    const fullResult = `<Past Events Summary>${mainPrompt}</Past Events Summary>\n<Past Events Details>${chunkResultPrompts}</Past Events Details>`;

    chats.unshift({
        role: "system",
        content: fullResult,
        memo: "supaMemory"
-    })
+    });
+
+    // Add the remaining chats after the last mainChunk's targetId
+    const lastTargetId = data.mainChunks.length > 0 ? data.mainChunks[0].targetId : null;
+    if (lastTargetId) {
+        const lastIndex = getValidChatIndex(lastTargetId);
+        if (lastIndex !== -1) {
+            const remainingChats = chats.slice(lastIndex + 1);
+            chats = [chats[0], ...remainingChats];
+        } 
+    } 
+
+    // Add last two chats if they exist and are not duplicates
+    if (lastTwoChats.length === 2) {
+        const [lastChat1, lastChat2] = lastTwoChats;
+        if (!chats.some(chat => chat.memo === lastChat1.memo)) {
+            chats.push(lastChat1);
+        }
+        if (!chats.some(chat => chat.memo === lastChat2.memo)) {
+            chats.push(lastChat2);
+        }
+    }
+
+    console.log("model being used: ", db.hypaModel, db.supaModelType, "\nCurrent session tokens: ", currentTokens, "\nAll chats, including memory system prompt: ", chats, "\nMemory data, with all the chunks: ", data);
    return {
        currentTokens: currentTokens,
        chats: chats,
        memory: data
-    }
-}
+    };
+}
--- a/src/ts/process/memory/supaMemory.ts
+++ b/src/ts/process/memory/supaMemory.ts
@@ -183,7 +183,7 @@ export async function supaMemory(

        async function summarize(stringlizedChat:string){

-            if(db.supaMemoryType === 'distilbart'){
+            if(db.supaModelType === 'distilbart'){
                try {
                    const sum =  await runSummarizer(stringlizedChat)
                    return sum
@@ -204,7 +204,7 @@ export async function supaMemory(
    
            let result = ''

-            if(db.supaMemoryType !== 'subModel'){
+            if(db.supaModelType !== 'subModel'){
                const promptbody = stringlizedChat + '\n\n' + supaPrompt + "\n\nOutput:"

                const da = await globalFetch("https://api.openai.com/v1/completions",{
@@ -214,8 +214,8 @@ export async function supaMemory(
                    },
                    method: "POST",
                    body: {
-                        "model": db.supaMemoryType === 'curie' ? "text-curie-001"
-                            : db.supaMemoryType === 'instruct35' ? 'gpt-3.5-turbo-instruct'
+                        "model": db.supaModelType === 'curie' ? "text-curie-001"
+                            : db.supaModelType === 'instruct35' ? 'gpt-3.5-turbo-instruct'
                            : "text-davinci-003",
                        "prompt": promptbody,
                        "max_tokens": 600,
--- a/src/ts/storage/database.ts
+++ b/src/ts/storage/database.ts
@@ -230,8 +230,8 @@ export function setDatabase(data:Database){
    if(checkNullish(data.supaMemoryKey)){
        data.supaMemoryKey = ""
    }
-    if(checkNullish(data.supaMemoryType)){
-        data.supaMemoryType = "none"
+    if(checkNullish(data.supaModelType)){
+        data.supaModelType = "none"
    }
    if(checkNullish(data.askRemoval)){
        data.askRemoval = true
@@ -527,7 +527,7 @@ export interface Database{
    useStreaming:boolean
    palmAPI:string,
    supaMemoryKey:string
-    supaMemoryType:string
+    supaModelType:string
    textScreenColor?:string
    textBorder?:boolean
    textScreenRounded?:boolean
@@ -569,6 +569,8 @@ export interface Database{
    useAdditionalAssetsPreview:boolean,
    usePlainFetch:boolean
    hypaMemory:boolean
+    hypav2:boolean
+    memoryAlgorithmType:string // To enable new memory module/algorithms 
    proxyRequestModel:string
    ooba:OobaSettings
    ainconfig: AINsettings