[feat] hypamemory first commit

2023-06-29 00:25:51 +09:00
parent 1026945996
commit 6dc105c69a
5 changed files with 727 additions and 33 deletions
--- a/src/ts/process/memory/hypamemory.ts
+++ b/src/ts/process/memory/hypamemory.ts
@@ -0,0 +1,139 @@
+import localforage from "localforage";
+import { similarity } from "ml-distance";
+import { globalFetch } from "src/ts/storage/globalApi";
+
+
+export class HypaProcesser{
+    oaikey:string
+    vectors:memoryVector[]
+    forage:LocalForage
+
+    constructor(){
+        this.forage = localforage.createInstance({
+            name: "hypaVector"
+        })
+    }
+
+    async embedDocuments(texts: string[]): Promise<number[][]> {
+        const subPrompts = chunkArray(texts,512);
+    
+        const embeddings: number[][] = [];
+    
+        for (let i = 0; i < subPrompts.length; i += 1) {
+          const input = subPrompts[i];
+    
+          const data = await this.getEmbeds(input)
+    
+          embeddings.push(...data);
+        }
+    
+        return embeddings;
+    }
+    
+    
+    async getEmbeds(input:string[]|string) {
+        const gf = await globalFetch("https://api.openai.com/v1/embeddings", {
+            headers: {
+            "Authorization": "Bearer " + this.oaikey
+            },
+            body: {
+            "input": input,
+            "model": "text-embedding-ada-002"
+            }
+        })
+        const data = gf.data
+    
+    
+        if(!gf.ok){
+            throw gf.data
+        }
+    
+        const result:number[][] = []
+        for(let i=0;i<data.data.length;i++){
+            result.push(data.data[i].embedding)
+        }
+    
+        return result
+    }
+
+
+    
+    async addText(texts:string[]) {
+
+        for(let i=0;i<texts.length;i++){
+            const itm:memoryVector = await this.forage.getItem(texts[i])
+            if(itm){
+                itm.alreadySaved = true
+                this.vectors.push(itm)
+            }
+        }
+
+        texts = texts.filter((v) => {
+            for(let i=0;i<vectors.length;i++){
+                if(this.vectors[i].content === v){
+                    return false
+                }
+            }
+            return true
+        })
+
+        if(texts.length === 0){
+            return
+        }
+        const vectors = await this.embedDocuments(texts)
+
+        const memoryVectors:memoryVector[] = vectors.map((embedding, idx) => ({
+            content: texts[idx],
+            embedding
+        }));
+
+        for(let i=0;i<memoryVectors.length;i++){
+            const vec = memoryVectors[i]
+            if(!vec.alreadySaved){
+                await this.forage.setItem(texts[i], vec)
+            }
+        }
+
+        this.vectors = memoryVectors.concat(this.vectors)
+    }
+
+    async similaritySearch(query: string) {
+        const results = await this.similaritySearchVectorWithScore((await this.getEmbeds(query))[0],);
+    
+        return results.map((result) => result[0]);
+    }
+
+    async similaritySearchVectorWithScore(
+        query: number[],
+      ): Promise<[string, number][]> {
+          const memoryVectors = this.vectors
+          const searches = memoryVectors
+              .map((vector, index) => ({
+              similarity: similarity.cosine(query, vector.embedding),
+              index,
+              }))
+              .sort((a, b) => (a.similarity > b.similarity ? -1 : 0))
+      
+          const result: [string, number][] = searches.map((search) => [
+              memoryVectors[search.index].content,
+              search.similarity,
+          ]);
+      
+          return result;
+      }
+}
+
+
+type memoryVector = {
+    embedding:number[]
+    content:string,
+    alreadySaved?:boolean
+}
+
+const chunkArray = <T>(arr: T[], chunkSize: number) =>
+    arr.reduce((chunks, elem, index) => {
+        const chunkIndex = Math.floor(index / chunkSize);
+        const chunk = chunks[chunkIndex] || [];
+        chunks[chunkIndex] = chunk.concat([elem]);
+        return chunks;
+}, [] as T[][]);
--- a/src/ts/process/memory/supaMemory.ts
+++ b/src/ts/process/memory/supaMemory.ts
@@ -0,0 +1,369 @@
+import { get } from "svelte/store";
+import type { OpenAIChat } from "..";
+import { DataBase, type Chat, type character, type groupChat } from "../../storage/database";
+import { tokenize, type ChatTokenizer } from "../../tokenizer";
+import { requestChatData } from "../request";
+import { cloneDeep } from "lodash";
+import { HypaProcesser } from "./hypamemory";
+import { stringlizeChat } from "../stringlize";
+
+export async function supaMemory(
+        chats:OpenAIChat[],
+        currentTokens:number,
+        maxContextTokens:number,
+        room:Chat,
+        char:character|groupChat,
+        tokenizer:ChatTokenizer,
+        arg:{asHyper?:boolean} = {}
+    ): Promise<{ currentTokens: number; chats: OpenAIChat[]; error?:string; memory?:string;lastId?:string}>{
+    const db = get(DataBase)
+
+    currentTokens += 10
+
+    if(currentTokens > maxContextTokens){
+        let coIndex = -1
+        for(let i=0;i<chats.length;i++){
+            if(chats[i].memo === 'NewChat'){
+                coIndex = i
+                break
+            }
+        }
+        if(coIndex !== -1){
+            for(let i=0;i<coIndex;i++){
+                currentTokens -= await tokenizer.tokenizeChat(chats[0])
+                chats.splice(0, 1)
+            }
+        }
+
+        let supaMemory = ''
+        let hypaChunks:string[] = []
+        let lastId = ''
+        let HypaData:HypaData[] = []
+
+        if(room.supaMemoryData && room.supaMemoryData.length > 4){
+            const splited = room.supaMemoryData.split('\n')
+            let id = splited.splice(0,1)[0]
+            const data = splited.join('\n')
+
+            if(arg.asHyper && (!id.startsWith("hypa:"))){
+                supaMemory = ""
+
+            }
+            else{
+                if(id.startsWith("hypa:")){
+                
+                    if((!arg.asHyper)){
+                        return {
+                            currentTokens: currentTokens,
+                            chats: chats,
+                            error: "SupaMemory: Data saved in hypaMemory, loaded as SupaMemory."
+                        }
+                    }
+
+                    HypaData = JSON.parse(data.substring(0,5).trim())
+                    if(!Array.isArray(HypaData)){
+                        return {
+                            currentTokens: currentTokens,
+                            chats: chats,
+                            error: "hypaMemory: hypaMemory isn't Array"
+                        }
+                    }
+
+                    let indexSelected = -1
+                    for(let i=0;i<HypaData.length;i++){
+                        let i =0;
+                        let countTokens  = currentTokens
+                        let countChats = cloneDeep(chats)
+                        while(true){
+                            if(countChats.length === 0){
+                                break
+                            }
+                            if(countChats[0].memo === HypaData[i].id){
+                                lastId = HypaData[i].id
+                                currentTokens = countTokens
+                                chats = countChats
+                                indexSelected = i
+                                break
+                            }
+                            countTokens -= await tokenizer.tokenizeChat(countChats[0])
+                            countChats.splice(0, 1)
+                            i += 1
+                        }
+                        if(indexSelected !== -1){
+                            break
+                        }
+                    }
+                    if(indexSelected === -1){
+                        return {
+                            currentTokens: currentTokens,
+                            chats: chats,
+                            error: "hypaMemory: chat ID not found"
+                        }
+                    }
+
+                    supaMemory = HypaData[indexSelected].supa
+                    hypaChunks = HypaData[indexSelected].hypa
+
+                }
+                else{
+                    let i =0;
+                    while(true){
+                        if(chats.length === 0){
+                            return {
+                                currentTokens: currentTokens,
+                                chats: chats,
+                                error: "SupaMemory: chat ID not found"
+                            }
+                        }
+                        if(chats[0].memo === id){
+                            lastId = id
+                            break
+                        }
+                        currentTokens -= await tokenizer.tokenizeChat(chats[0])
+                        chats.splice(0, 1)
+                        i += 1
+                    }
+        
+                    supaMemory = data
+                    currentTokens += await tokenize(supaMemory)
+                }
+            }
+        }
+
+
+        if(currentTokens < maxContextTokens){
+            chats.unshift({
+                role: "system",
+                content: supaMemory
+            })
+            return {
+                currentTokens: currentTokens,
+                chats: chats
+            }
+        }
+
+
+        async function summarize(stringlizedChat:string){
+
+            const supaPrompt = db.supaMemoryPrompt === '' ?
+            "[Summarize the ongoing role story, It must also remove redundancy and unnecessary text and content from the output to reduce tokens for gpt3 and other sublanguage models]\n"
+            : db.supaMemoryPrompt
+    
+            let result = ''
+
+            if(db.supaMemoryType !== 'subModel'){
+                const promptbody = stringlizedChat + '\n\n' + supaPrompt + "\n\nOutput:"
+
+                const da = await fetch("https://api.openai.com/v1/completions",{
+                    headers: {
+                        "Content-Type": "application/json",
+                        "Authorization": "Bearer " + db.supaMemoryKey
+                    },
+                    method: "POST",
+                    body: JSON.stringify({
+                        "model": db.supaMemoryType === 'curie' ? "text-curie-001" : "text-davinci-003",
+                        "prompt": promptbody,
+                        "max_tokens": 600,
+                        "temperature": 0
+                    })
+                })
+    
+                if(da.status < 200 || da.status >= 300){
+                    return {
+                        currentTokens: currentTokens,
+                        chats: chats,
+                        error: "SupaMemory: HTTP: " + await da.text()
+                    }
+                }
+                result = (await da.json()).choices[0].text.trim()
+            }
+            else {
+                const promptbody:OpenAIChat[] = [
+                    {
+                        role: "user",
+                        content: stringlizedChat
+                    },
+                    {
+                        role: "system",
+                        content: supaPrompt
+                    }
+                ]
+                const da = await requestChatData({
+                    formated: promptbody,
+                    bias: {}
+                }, 'submodel')
+                if(da.type === 'fail' || da.type === 'streaming' || da.type === 'multiline'){
+                    return {
+                        currentTokens: currentTokens,
+                        chats: chats,
+                        error: "SupaMemory: HTTP: " + da.result
+                    }
+                }
+                result = da.result
+            }
+            return result
+        }
+
+        let hypaResult = ""
+
+        if(arg.asHyper){
+            const hypa = new HypaProcesser()
+            await hypa.addText(hypaChunks)
+            const filteredChat = chats.filter((r) => r.role !== 'system' && r.role !== 'function')
+            const s = await hypa.similaritySearch(stringlizeChat(filteredChat.slice(0, 4)))
+            hypaResult = s.slice(0,4).join("\n\n")
+            currentTokens += await tokenizer.tokenizeChat({
+                role: "assistant",
+                content: hypaResult
+            })
+        }
+
+        while(currentTokens > maxContextTokens){
+            const beforeToken = currentTokens
+            let maxChunkSize = maxContextTokens > 3500 ? 1200 : Math.floor(maxContextTokens / 3)
+            let summarized = false
+            let chunkSize = 0
+            let stringlizedChat = ''
+            let spiceLen = 0
+            while(true){
+                const cont = chats[spiceLen]
+                if(!cont){
+                    currentTokens = beforeToken
+                    stringlizedChat = ''
+                    chunkSize = 0
+                    spiceLen = 0
+                    if(summarized){
+                        if(maxChunkSize < 500){
+                            return {
+                                currentTokens: currentTokens,
+                                chats: chats,
+                                error: "Not Enough Tokens"
+                            }
+                        }
+                        maxChunkSize = maxChunkSize * 0.7
+                    }
+                    else{
+                        const result = await summarize(supaMemory)
+                        if(typeof(result) !== 'string'){
+                            return result
+                        }
+
+                        console.log(currentTokens)
+                        currentTokens -= await tokenize(supaMemory)
+                        currentTokens += await tokenize(result + '\n\n')
+                        console.log(currentTokens)
+
+                        supaMemory = result + '\n\n'
+                        summarized = true
+                        if(currentTokens <= maxContextTokens){
+                            break
+                        }
+                    }
+                    continue
+                }
+                const tokens = await tokenizer.tokenizeChat(cont)
+                if((chunkSize + tokens) > maxChunkSize){
+                    if(stringlizedChat === ''){
+                        
+
+                        if(cont.role !== 'function' && cont.role !== 'system'){
+                            stringlizedChat += `${cont.role === 'assistant' ? char.type === 'group' ? '' : char.name : db.username}: ${cont.content}\n\n`
+                        }
+                    }
+                    lastId = cont.memo
+                    break
+                }
+                stringlizedChat += `${cont.role === 'assistant' ? char.type === 'group' ? '' : char.name : db.username}: ${cont.content}\n\n`
+                spiceLen += 1
+                currentTokens -= tokens
+                chunkSize += tokens
+            }
+            chats.splice(0, spiceLen)
+
+            if(stringlizedChat !== ''){
+                const result = await summarize(stringlizedChat)
+
+                if(typeof(result) !== 'string'){
+                    return result
+                }
+    
+                const tokenz = await tokenize(result + '\n\n')
+                currentTokens += tokenz
+                supaMemory += result.replace(/\n+/g,'\n') + '\n\n'
+
+                let SupaMemoryList = supaMemory.split('\n\n')
+                if(SupaMemoryList.length >= 5){
+                    const oldSupaMemory = supaMemory
+                    let modifies:string[] = []
+                    for(let i=0;i<3;i++){
+                        modifies.push(SupaMemoryList.shift())
+                    }
+                    hypaChunks.push(...modifies)
+
+                    const result = await summarize(supaMemory)
+                    if(typeof(result) !== 'string'){
+                        return result
+                    }
+
+                    modifies.unshift(result.replace(/\n+/g,'\n'))
+                    supaMemory = modifies.join('\n\n') + '\n\n'
+
+                    currentTokens -= await tokenize(oldSupaMemory)
+                    currentTokens += await tokenize(supaMemory)
+                }
+            }
+        }
+
+        chats.unshift({
+            role: "system",
+            content: supaMemory,
+            name: "supaMemory"
+        })
+
+        
+
+        if(arg.asHyper){
+            if(hypaResult !== ''){
+                chats.unshift({
+                    role: "assistant",
+                    content: hypaResult
+                })
+            }
+            
+            if(HypaData[0] && HypaData[0].id === lastId){
+                HypaData[0].hypa = hypaChunks
+                HypaData[0].supa = supaMemory
+            }
+            else{
+                HypaData.push({
+                    id: lastId,
+                    hypa: hypaChunks,
+                    supa: supaMemory
+                })
+            }
+
+            return {
+                currentTokens: currentTokens,
+                chats: chats,
+                memory: JSON.stringify(HypaData, null, 2),
+                lastId: lastId
+            }
+
+        }
+
+        return {
+            currentTokens: currentTokens,
+            chats: chats,
+            memory: lastId + '\n' + supaMemory,
+            lastId: lastId
+        }
+
+    }
+    return {
+        currentTokens: currentTokens,
+        chats: chats
+    }
+}
+
+type HypaData = {id:string,supa:string,hypa:string[]}
+