From f98764776ee4bed98558835d474bddadbd9d39ea Mon Sep 17 00:00:00 2001 From: kwaroran Date: Wed, 31 May 2023 05:26:10 +0900 Subject: [PATCH] [fix] tokenizer problems --- src/ts/process/index.ts | 33 ++++++++++++++++++++++++++------- src/ts/process/stringlize.ts | 12 ++++++++++++ src/ts/process/supaMemory.ts | 22 +++++++++++++++------- 3 files changed, 53 insertions(+), 14 deletions(-) diff --git a/src/ts/process/index.ts b/src/ts/process/index.ts index a2a84b26..0c48d7e4 100644 --- a/src/ts/process/index.ts +++ b/src/ts/process/index.ts @@ -15,6 +15,7 @@ import { supaMemory } from "./supaMemory"; import { v4 } from "uuid"; import { cloneDeep } from "lodash"; import { groupOrder } from "./group"; +import { getNameMaxTokens } from "./stringlize"; export interface OpenAIChat{ role: 'system'|'user'|'assistant' @@ -25,7 +26,7 @@ export interface OpenAIChat{ export const doingChat = writable(false) -export async function sendChat(chatProcessIndex = -1):Promise { +export async function sendChat(chatProcessIndex = -1,arg:{chatAdditonalTokens?:number} = {}):Promise { let findCharCache:{[key:string]:character} = {} function findCharacterbyIdwithCache(id:string){ @@ -57,9 +58,19 @@ export async function sendChat(chatProcessIndex = -1):Promise { let selectedChar = get(selectedCharID) const nowChatroom = db.characters[selectedChar] let currentChar:character + let caculatedChatTokens = 0 + if(db.aiModel.startsWith('gpt')){ + caculatedChatTokens += 5 + } + else{ + caculatedChatTokens += 3 + } if(nowChatroom.type === 'group'){ if(chatProcessIndex === -1){ + const charNames =nowChatroom.characters.map((v) => findCharacterbyIdwithCache(v).name) + caculatedChatTokens += await getNameMaxTokens([...charNames, db.username]) + const messages = nowChatroom.chats[nowChatroom.chatPage].message const lastMessage = messages[messages.length-1] let order = nowChatroom.characters.map((v,i) => { @@ -78,7 +89,9 @@ export async function sendChat(chatProcessIndex = -1):Promise { }) } for(let i=0;i { } else{ currentChar = nowChatroom + if(!db.aiModel.startsWith('gpt')){ + caculatedChatTokens += await getNameMaxTokens([currentChar.name, db.username]) + } + } + + let chatAdditonalTokens = arg.chatAdditonalTokens ?? caculatedChatTokens let selectedChat = nowChatroom.chatPage let currentChat = nowChatroom.chats[selectedChat] @@ -188,13 +207,13 @@ export async function sendChat(chatProcessIndex = -1):Promise { return (unformated[key] as OpenAIChat[]).map((d) => { return d.content }).join('\n\n') - }).join('\n\n')) + db.maxResponse) + 150 + }).join('\n\n')) + db.maxResponse) + 100 const examples = exampleMessage(currentChar) for(const example of examples){ - currentTokens += await tokenize(example.content) + 5 + currentTokens += await tokenize(example.content) + chatAdditonalTokens } let chats:OpenAIChat[] = examples @@ -244,11 +263,11 @@ export async function sendChat(chatProcessIndex = -1):Promise { memo: msg.chatId, name: name }) - currentTokens += (await tokenize(formedChat) + 5) + currentTokens += (await tokenize(formedChat) + chatAdditonalTokens) } if(nowChatroom.supaMemory && db.supaMemoryType !== 'none'){ - const sp = await supaMemory(chats, currentTokens, maxContextTokens, currentChat, nowChatroom) + const sp = await supaMemory(chats, currentTokens, maxContextTokens, currentChat, nowChatroom, chatAdditonalTokens) if(sp.error){ alertError(sp.error) return false @@ -266,7 +285,7 @@ export async function sendChat(chatProcessIndex = -1):Promise { return false } - currentTokens -= (await tokenize(chats[0].content) + 5) + currentTokens -= (await tokenize(chats[0].content) + chatAdditonalTokens) chats.splice(0, 1) } currentChat.lastMemory = chats[0].memo diff --git a/src/ts/process/stringlize.ts b/src/ts/process/stringlize.ts index bc62b518..140389b6 100644 --- a/src/ts/process/stringlize.ts +++ b/src/ts/process/stringlize.ts @@ -1,4 +1,5 @@ import type { OpenAIChat } from "."; +import { tokenize } from "../tokenizer"; export function multiChatReplacer(){ @@ -52,4 +53,15 @@ export function unstringlizeChat(text:string, formated:OpenAIChat[], char:string } return text +} + +export async function getNameMaxTokens(names:string[]){ + let maxCharNameTokens = 0 + for(const name of names){ + const tokens = await tokenize(name + ': ') + 1 + if(maxCharNameTokens < tokens){ + maxCharNameTokens = tokens + } + } + return maxCharNameTokens } \ No newline at end of file diff --git a/src/ts/process/supaMemory.ts b/src/ts/process/supaMemory.ts index e58e1876..634fd447 100644 --- a/src/ts/process/supaMemory.ts +++ b/src/ts/process/supaMemory.ts @@ -5,9 +5,17 @@ import { tokenize } from "../tokenizer"; import { findCharacterbyId } from "../util"; import { requestChatData } from "./request"; -export async function supaMemory(chats:OpenAIChat[],currentTokens:number,maxContextTokens:number,room:Chat,char:character|groupChat): Promise<{ currentTokens: number; chats: OpenAIChat[]; error?:string; memory?:string;lastId?:string}>{ +export async function supaMemory( + chats:OpenAIChat[], + currentTokens:number, + maxContextTokens:number, + room:Chat, + char:character|groupChat, + chatAdditonalTokens:number + ): Promise<{ currentTokens: number; chats: OpenAIChat[]; error?:string; memory?:string;lastId?:string}>{ const db = get(DataBase) - console.log("Memory: " + currentTokens) + + currentTokens += 10 if(currentTokens > maxContextTokens){ let coIndex = -1 @@ -19,7 +27,7 @@ export async function supaMemory(chats:OpenAIChat[],currentTokens:number,maxCont } if(coIndex !== -1){ for(let i=0;i maxChunkSize){ if(stringlizedChat === ''){ stringlizedChat += `${cont.role === 'assistant' ? char.type === 'group' ? '' : char.name : db.username}: ${cont.content}\n\n` @@ -193,7 +201,7 @@ export async function supaMemory(chats:OpenAIChat[],currentTokens:number,maxCont return result } - const tokenz = await tokenize(result + '\n\n') + 5 + const tokenz = await tokenize(result + '\n\n') + chatAdditonalTokens currentTokens += tokenz supaMemory += result.replace(/\n+/g,'\n') + '\n\n'