From 45acd4b6d8b33ec71b274fdf85604634d6ef0f35 Mon Sep 17 00:00:00 2001 From: kwaroran Date: Tue, 23 Apr 2024 22:50:44 +0900 Subject: [PATCH] Refactor chat formatting and fix Harunai Memory --- src/ts/process/index.ts | 3 +++ src/ts/process/memory/hanuraiMemory.ts | 29 +++++++++++++++----------- src/ts/process/memory/hypamemory.ts | 9 ++++---- src/ts/storage/database.ts | 2 +- 4 files changed, 26 insertions(+), 17 deletions(-) diff --git a/src/ts/process/index.ts b/src/ts/process/index.ts index a143e88e..cef757ab 100644 --- a/src/ts/process/index.ts +++ b/src/ts/process/index.ts @@ -721,6 +721,9 @@ export async function sendChat(chatProcessIndex = -1,arg:{chatAdditonalTokens?:n content: '', } as const } + else{ + v.content = `${v.content}` + } return v }).filter((v) => { return v.content !== '' diff --git a/src/ts/process/memory/hanuraiMemory.ts b/src/ts/process/memory/hanuraiMemory.ts index cf14e997..cf10cacd 100644 --- a/src/ts/process/memory/hanuraiMemory.ts +++ b/src/ts/process/memory/hanuraiMemory.ts @@ -13,9 +13,10 @@ export async function hanuraiMemory(chats:OpenAIChat[],arg:{ }){ const db = get(DataBase) const tokenizer = arg.tokenizer - const processer = new HypaProcesser('nomic') + const processer = new HypaProcesser('MiniLM') let addTexts:string[] = [] chats.map((chat) => { + if(!chat?.content?.trim()){ return } @@ -28,12 +29,14 @@ export async function hanuraiMemory(chats:OpenAIChat[],arg:{ addTexts.push(`search_document: ${split.trim()}`) } } - addTexts.push(`search_document: ${chat.content?.trim()}`) + else{ + addTexts.push(`search_document: ${chat.content?.trim()}`) + } }) - processer.addText(addTexts) + await processer.addText(addTexts) let scoredResults:{[key:string]:number} = {} - for(let i=1;i<5;i++){ + for(let i=1;i<4;i++){ const chat = chats[chats.length-i] if(!chat?.content){ continue @@ -49,37 +52,39 @@ export async function hanuraiMemory(chats:OpenAIChat[],arg:{ } } } - const vectorResult = Object.entries(scoredResults).sort((a,b)=>a[1]-b[1]) - + const vectorResult = Object.entries(scoredResults).sort((a,b)=>b[1]-a[1]) let tokens = arg.currentTokens + db.hanuraiTokens - while(tokens < arg.maxContextTokens){ + while(tokens > arg.maxContextTokens){ const poped = chats.pop() if(!poped){ alertError(language.errors.toomuchtoken + "\n\nRequired Tokens: " + tokens) return false } - tokens -= await tokenizer.tokenizeChat(chats[0]) + tokens -= await tokenizer.tokenizeChat(poped) } tokens -= db.hanuraiTokens let resultTexts:string[] = [] for(const vector of vectorResult){ - const chat = chats.find((chat) => chat.content === vector[0].substring(14)) + const chat = chats.find((chat) => chat.content === vector[0].substring(16)) if(chat){ continue } - const tokenized = await tokenizer.tokenizeChat(chat) + 2 + const tokenized = await tokenizer.tokenizeChat({ + role: 'system', + memo: 'supaMemory', + content: vector[0].substring(16) + }) + 2 tokens += tokenized if(tokens >= arg.maxContextTokens){ tokens -= tokenized break } - resultTexts.push(vector[0].substring(14)) + resultTexts.push(vector[0].substring(16)) } - console.log(resultTexts) chats.unshift({ role: "system", memo: "supaMemory", diff --git a/src/ts/process/memory/hypamemory.ts b/src/ts/process/memory/hypamemory.ts index f4a89cc7..57dee944 100644 --- a/src/ts/process/memory/hypamemory.ts +++ b/src/ts/process/memory/hypamemory.ts @@ -1,5 +1,4 @@ import localforage from "localforage"; -import { similarity } from "ml-distance"; import { globalFetch } from "src/ts/storage/globalApi"; import { runEmbedding } from "../transformers"; @@ -144,7 +143,7 @@ export class HypaProcesser{ const memoryVectors = this.vectors const searches = memoryVectors .map((vector, index) => ({ - similarity: similarity.cosine(query, vector.embedding), + similarity: similarity(query, vector.embedding), index, })) .sort((a, b) => (a.similarity > b.similarity ? -1 : 0)) @@ -158,10 +157,12 @@ export class HypaProcesser{ } similarityCheck(query1:number[],query2: number[]) { - return similarity.cosine(query1, query2) + return similarity(query1, query2) } } - +function similarity(a:number[], b:number[]) { + return a.reduce((acc, val, i) => acc + val * b[i], 0); +} type memoryVector = { embedding:number[] diff --git a/src/ts/storage/database.ts b/src/ts/storage/database.ts index 3e1dd30e..b426f512 100644 --- a/src/ts/storage/database.ts +++ b/src/ts/storage/database.ts @@ -395,7 +395,7 @@ export function setDatabase(data:Database){ data.openrouterProvider ??= '' data.useInstructPrompt ??= false data.hanuraiEnable ??= false - data.hanuraiSplit ??= true + data.hanuraiSplit ??= false data.hanuraiTokens ??= 1000 changeLanguage(data.language)