[feat] hypamemory first commit

2023-06-29 00:25:51 +09:00
parent 1026945996
commit 6dc105c69a
5 changed files with 727 additions and 33 deletions
--- a/src/ts/process/index.ts
+++ b/src/ts/process/index.ts
@@ -11,7 +11,7 @@ import { stableDiff } from "./stableDiff";
 import { processScript, processScriptFull } from "./scripts";
 import { exampleMessage } from "./exampleMessages";
 import { sayTTS } from "./tts";
-import { supaMemory } from "./supaMemory";
+import { supaMemory } from "./memory/supaMemory";
 import { v4 } from "uuid";
 import { cloneDeep } from "lodash";
 import { groupOrder } from "./group";
--- a/src/ts/process/memory/hypamemory.ts
+++ b/src/ts/process/memory/hypamemory.ts
@@ -0,0 +1,139 @@
+import localforage from "localforage";
+import { similarity } from "ml-distance";
+import { globalFetch } from "src/ts/storage/globalApi";
+
+
+export class HypaProcesser{
+    oaikey:string
+    vectors:memoryVector[]
+    forage:LocalForage
+
+    constructor(){
+        this.forage = localforage.createInstance({
+            name: "hypaVector"
+        })
+    }
+
+    async embedDocuments(texts: string[]): Promise<number[][]> {
+        const subPrompts = chunkArray(texts,512);
+    
+        const embeddings: number[][] = [];
+    
+        for (let i = 0; i < subPrompts.length; i += 1) {
+          const input = subPrompts[i];
+    
+          const data = await this.getEmbeds(input)
+    
+          embeddings.push(...data);
+        }
+    
+        return embeddings;
+    }
+    
+    
+    async getEmbeds(input:string[]|string) {
+        const gf = await globalFetch("https://api.openai.com/v1/embeddings", {
+            headers: {
+            "Authorization": "Bearer " + this.oaikey
+            },
+            body: {
+            "input": input,
+            "model": "text-embedding-ada-002"
+            }
+        })
+        const data = gf.data
+    
+    
+        if(!gf.ok){
+            throw gf.data
+        }
+    
+        const result:number[][] = []
+        for(let i=0;i<data.data.length;i++){
+            result.push(data.data[i].embedding)
+        }
+    
+        return result
+    }
+
+
+    
+    async addText(texts:string[]) {
+
+        for(let i=0;i<texts.length;i++){
+            const itm:memoryVector = await this.forage.getItem(texts[i])
+            if(itm){
+                itm.alreadySaved = true
+                this.vectors.push(itm)
+            }
+        }
+
+        texts = texts.filter((v) => {
+            for(let i=0;i<vectors.length;i++){
+                if(this.vectors[i].content === v){
+                    return false
+                }
+            }
+            return true
+        })
+
+        if(texts.length === 0){
+            return
+        }
+        const vectors = await this.embedDocuments(texts)
+
+        const memoryVectors:memoryVector[] = vectors.map((embedding, idx) => ({
+            content: texts[idx],
+            embedding
+        }));
+
+        for(let i=0;i<memoryVectors.length;i++){
+            const vec = memoryVectors[i]
+            if(!vec.alreadySaved){
+                await this.forage.setItem(texts[i], vec)
+            }
+        }
+
+        this.vectors = memoryVectors.concat(this.vectors)
+    }
+
+    async similaritySearch(query: string) {
+        const results = await this.similaritySearchVectorWithScore((await this.getEmbeds(query))[0],);
+    
+        return results.map((result) => result[0]);
+    }
+
+    async similaritySearchVectorWithScore(
+        query: number[],
+      ): Promise<[string, number][]> {
+          const memoryVectors = this.vectors
+          const searches = memoryVectors
+              .map((vector, index) => ({
+              similarity: similarity.cosine(query, vector.embedding),
+              index,
+              }))
+              .sort((a, b) => (a.similarity > b.similarity ? -1 : 0))
+      
+          const result: [string, number][] = searches.map((search) => [
+              memoryVectors[search.index].content,
+              search.similarity,
+          ]);
+      
+          return result;
+      }
+}
+
+
+type memoryVector = {
+    embedding:number[]
+    content:string,
+    alreadySaved?:boolean
+}
+
+const chunkArray = <T>(arr: T[], chunkSize: number) =>
+    arr.reduce((chunks, elem, index) => {
+        const chunkIndex = Math.floor(index / chunkSize);
+        const chunk = chunks[chunkIndex] || [];
+        chunks[chunkIndex] = chunk.concat([elem]);
+        return chunks;
+}, [] as T[][]);
--- a/src/ts/process/memory/supaMemory.ts
+++ b/src/ts/process/memory/supaMemory.ts
@@ -1,8 +1,11 @@
 import { get } from "svelte/store";
-import type { OpenAIChat } from ".";
-import { DataBase, type Chat, type character, type groupChat } from "../storage/database";
-import { tokenize, type ChatTokenizer } from "../tokenizer";
-import { requestChatData } from "./request";
+import type { OpenAIChat } from "..";
+import { DataBase, type Chat, type character, type groupChat } from "../../storage/database";
+import { tokenize, type ChatTokenizer } from "../../tokenizer";
+import { requestChatData } from "../request";
+import { cloneDeep } from "lodash";
+import { HypaProcesser } from "./hypamemory";
+import { stringlizeChat } from "../stringlize";

 export async function supaMemory(
        chats:OpenAIChat[],
@@ -10,7 +13,8 @@ export async function supaMemory(
        maxContextTokens:number,
        room:Chat,
        char:character|groupChat,
-        tokenizer:ChatTokenizer
+        tokenizer:ChatTokenizer,
+        arg:{asHyper?:boolean} = {}
    ): Promise<{ currentTokens: number; chats: OpenAIChat[]; error?:string; memory?:string;lastId?:string}>{
    const db = get(DataBase)

@@ -32,33 +36,98 @@ export async function supaMemory(
        }

        let supaMemory = ''
+        let hypaChunks:string[] = []
        let lastId = ''
+        let HypaData:HypaData[] = []

        if(room.supaMemoryData && room.supaMemoryData.length > 4){
            const splited = room.supaMemoryData.split('\n')
-            const id = splited.splice(0,1)[0]
+            let id = splited.splice(0,1)[0]
            const data = splited.join('\n')

-            let i =0;
-            while(true){
-                if(chats.length === 0){
-                    return {
-                        currentTokens: currentTokens,
-                        chats: chats,
-                        error: "SupaMemory: chat ID not found"
-                    }
-                }
-                if(chats[0].memo === id){
-                    lastId = id
-                    break
-                }
-                currentTokens -= await tokenizer.tokenizeChat(chats[0])
-                chats.splice(0, 1)
-                i += 1
-            }
+            if(arg.asHyper && (!id.startsWith("hypa:"))){
+                supaMemory = ""

-            supaMemory = data
-            currentTokens += await tokenize(supaMemory)
+            }
+            else{
+                if(id.startsWith("hypa:")){
+                
+                    if((!arg.asHyper)){
+                        return {
+                            currentTokens: currentTokens,
+                            chats: chats,
+                            error: "SupaMemory: Data saved in hypaMemory, loaded as SupaMemory."
+                        }
+                    }
+
+                    HypaData = JSON.parse(data.substring(0,5).trim())
+                    if(!Array.isArray(HypaData)){
+                        return {
+                            currentTokens: currentTokens,
+                            chats: chats,
+                            error: "hypaMemory: hypaMemory isn't Array"
+                        }
+                    }
+
+                    let indexSelected = -1
+                    for(let i=0;i<HypaData.length;i++){
+                        let i =0;
+                        let countTokens  = currentTokens
+                        let countChats = cloneDeep(chats)
+                        while(true){
+                            if(countChats.length === 0){
+                                break
+                            }
+                            if(countChats[0].memo === HypaData[i].id){
+                                lastId = HypaData[i].id
+                                currentTokens = countTokens
+                                chats = countChats
+                                indexSelected = i
+                                break
+                            }
+                            countTokens -= await tokenizer.tokenizeChat(countChats[0])
+                            countChats.splice(0, 1)
+                            i += 1
+                        }
+                        if(indexSelected !== -1){
+                            break
+                        }
+                    }
+                    if(indexSelected === -1){
+                        return {
+                            currentTokens: currentTokens,
+                            chats: chats,
+                            error: "hypaMemory: chat ID not found"
+                        }
+                    }
+
+                    supaMemory = HypaData[indexSelected].supa
+                    hypaChunks = HypaData[indexSelected].hypa
+
+                }
+                else{
+                    let i =0;
+                    while(true){
+                        if(chats.length === 0){
+                            return {
+                                currentTokens: currentTokens,
+                                chats: chats,
+                                error: "SupaMemory: chat ID not found"
+                            }
+                        }
+                        if(chats[0].memo === id){
+                            lastId = id
+                            break
+                        }
+                        currentTokens -= await tokenizer.tokenizeChat(chats[0])
+                        chats.splice(0, 1)
+                        i += 1
+                    }
+        
+                    supaMemory = data
+                    currentTokens += await tokenize(supaMemory)
+                }
+            }
        }


@@ -135,6 +204,20 @@ export async function supaMemory(
            return result
        }

+        let hypaResult = ""
+
+        if(arg.asHyper){
+            const hypa = new HypaProcesser()
+            await hypa.addText(hypaChunks)
+            const filteredChat = chats.filter((r) => r.role !== 'system' && r.role !== 'function')
+            const s = await hypa.similaritySearch(stringlizeChat(filteredChat.slice(0, 4)))
+            hypaResult = s.slice(0,4).join("\n\n")
+            currentTokens += await tokenizer.tokenizeChat({
+                role: "assistant",
+                content: hypaResult
+            })
+        }
+
        while(currentTokens > maxContextTokens){
            const beforeToken = currentTokens
            let maxChunkSize = maxContextTokens > 3500 ? 1200 : Math.floor(maxContextTokens / 3)
@@ -181,7 +264,11 @@ export async function supaMemory(
                const tokens = await tokenizer.tokenizeChat(cont)
                if((chunkSize + tokens) > maxChunkSize){
                    if(stringlizedChat === ''){
-                        stringlizedChat += `${cont.role === 'assistant' ? char.type === 'group' ? '' : char.name : db.username}: ${cont.content}\n\n`
+                        
+
+                        if(cont.role !== 'function' && cont.role !== 'system'){
+                            stringlizedChat += `${cont.role === 'assistant' ? char.type === 'group' ? '' : char.name : db.username}: ${cont.content}\n\n`
+                        }
                    }
                    lastId = cont.memo
                    break
@@ -203,13 +290,67 @@ export async function supaMemory(
                const tokenz = await tokenize(result + '\n\n')
                currentTokens += tokenz
                supaMemory += result.replace(/\n+/g,'\n') + '\n\n'
+
+                let SupaMemoryList = supaMemory.split('\n\n')
+                if(SupaMemoryList.length >= 5){
+                    const oldSupaMemory = supaMemory
+                    let modifies:string[] = []
+                    for(let i=0;i<3;i++){
+                        modifies.push(SupaMemoryList.shift())
+                    }
+                    hypaChunks.push(...modifies)
+
+                    const result = await summarize(supaMemory)
+                    if(typeof(result) !== 'string'){
+                        return result
+                    }
+
+                    modifies.unshift(result.replace(/\n+/g,'\n'))
+                    supaMemory = modifies.join('\n\n') + '\n\n'
+
+                    currentTokens -= await tokenize(oldSupaMemory)
+                    currentTokens += await tokenize(supaMemory)
+                }
            }
        }

        chats.unshift({
            role: "system",
-            content: supaMemory
+            content: supaMemory,
+            name: "supaMemory"
        })
+
+        
+
+        if(arg.asHyper){
+            if(hypaResult !== ''){
+                chats.unshift({
+                    role: "assistant",
+                    content: hypaResult
+                })
+            }
+            
+            if(HypaData[0] && HypaData[0].id === lastId){
+                HypaData[0].hypa = hypaChunks
+                HypaData[0].supa = supaMemory
+            }
+            else{
+                HypaData.push({
+                    id: lastId,
+                    hypa: hypaChunks,
+                    supa: supaMemory
+                })
+            }
+
+            return {
+                currentTokens: currentTokens,
+                chats: chats,
+                memory: JSON.stringify(HypaData, null, 2),
+                lastId: lastId
+            }
+
+        }
+
        return {
            currentTokens: currentTokens,
            chats: chats,
@@ -224,3 +365,5 @@ export async function supaMemory(
    }
 }

+type HypaData = {id:string,supa:string,hypa:string[]}
+