[fix] tokenizer problems

This commit is contained in:
kwaroran
2023-05-31 05:26:10 +09:00
parent 7ea768cb5b
commit f98764776e
3 changed files with 53 additions and 14 deletions

View File

@@ -15,6 +15,7 @@ import { supaMemory } from "./supaMemory";
import { v4 } from "uuid"; import { v4 } from "uuid";
import { cloneDeep } from "lodash"; import { cloneDeep } from "lodash";
import { groupOrder } from "./group"; import { groupOrder } from "./group";
import { getNameMaxTokens } from "./stringlize";
export interface OpenAIChat{ export interface OpenAIChat{
role: 'system'|'user'|'assistant' role: 'system'|'user'|'assistant'
@@ -25,7 +26,7 @@ export interface OpenAIChat{
export const doingChat = writable(false) export const doingChat = writable(false)
export async function sendChat(chatProcessIndex = -1):Promise<boolean> { export async function sendChat(chatProcessIndex = -1,arg:{chatAdditonalTokens?:number} = {}):Promise<boolean> {
let findCharCache:{[key:string]:character} = {} let findCharCache:{[key:string]:character} = {}
function findCharacterbyIdwithCache(id:string){ function findCharacterbyIdwithCache(id:string){
@@ -57,9 +58,19 @@ export async function sendChat(chatProcessIndex = -1):Promise<boolean> {
let selectedChar = get(selectedCharID) let selectedChar = get(selectedCharID)
const nowChatroom = db.characters[selectedChar] const nowChatroom = db.characters[selectedChar]
let currentChar:character let currentChar:character
let caculatedChatTokens = 0
if(db.aiModel.startsWith('gpt')){
caculatedChatTokens += 5
}
else{
caculatedChatTokens += 3
}
if(nowChatroom.type === 'group'){ if(nowChatroom.type === 'group'){
if(chatProcessIndex === -1){ if(chatProcessIndex === -1){
const charNames =nowChatroom.characters.map((v) => findCharacterbyIdwithCache(v).name)
caculatedChatTokens += await getNameMaxTokens([...charNames, db.username])
const messages = nowChatroom.chats[nowChatroom.chatPage].message const messages = nowChatroom.chats[nowChatroom.chatPage].message
const lastMessage = messages[messages.length-1] const lastMessage = messages[messages.length-1]
let order = nowChatroom.characters.map((v,i) => { let order = nowChatroom.characters.map((v,i) => {
@@ -78,7 +89,9 @@ export async function sendChat(chatProcessIndex = -1):Promise<boolean> {
}) })
} }
for(let i=0;i<order.length;i++){ for(let i=0;i<order.length;i++){
const r = await sendChat(order[i].index) const r = await sendChat(order[i].index, {
chatAdditonalTokens: caculatedChatTokens
})
if(!r){ if(!r){
return false return false
} }
@@ -95,8 +108,14 @@ export async function sendChat(chatProcessIndex = -1):Promise<boolean> {
} }
else{ else{
currentChar = nowChatroom currentChar = nowChatroom
if(!db.aiModel.startsWith('gpt')){
caculatedChatTokens += await getNameMaxTokens([currentChar.name, db.username])
} }
}
let chatAdditonalTokens = arg.chatAdditonalTokens ?? caculatedChatTokens
let selectedChat = nowChatroom.chatPage let selectedChat = nowChatroom.chatPage
let currentChat = nowChatroom.chats[selectedChat] let currentChat = nowChatroom.chats[selectedChat]
let maxContextTokens = db.maxContext let maxContextTokens = db.maxContext
@@ -188,13 +207,13 @@ export async function sendChat(chatProcessIndex = -1):Promise<boolean> {
return (unformated[key] as OpenAIChat[]).map((d) => { return (unformated[key] as OpenAIChat[]).map((d) => {
return d.content return d.content
}).join('\n\n') }).join('\n\n')
}).join('\n\n')) + db.maxResponse) + 150 }).join('\n\n')) + db.maxResponse) + 100
const examples = exampleMessage(currentChar) const examples = exampleMessage(currentChar)
for(const example of examples){ for(const example of examples){
currentTokens += await tokenize(example.content) + 5 currentTokens += await tokenize(example.content) + chatAdditonalTokens
} }
let chats:OpenAIChat[] = examples let chats:OpenAIChat[] = examples
@@ -244,11 +263,11 @@ export async function sendChat(chatProcessIndex = -1):Promise<boolean> {
memo: msg.chatId, memo: msg.chatId,
name: name name: name
}) })
currentTokens += (await tokenize(formedChat) + 5) currentTokens += (await tokenize(formedChat) + chatAdditonalTokens)
} }
if(nowChatroom.supaMemory && db.supaMemoryType !== 'none'){ if(nowChatroom.supaMemory && db.supaMemoryType !== 'none'){
const sp = await supaMemory(chats, currentTokens, maxContextTokens, currentChat, nowChatroom) const sp = await supaMemory(chats, currentTokens, maxContextTokens, currentChat, nowChatroom, chatAdditonalTokens)
if(sp.error){ if(sp.error){
alertError(sp.error) alertError(sp.error)
return false return false
@@ -266,7 +285,7 @@ export async function sendChat(chatProcessIndex = -1):Promise<boolean> {
return false return false
} }
currentTokens -= (await tokenize(chats[0].content) + 5) currentTokens -= (await tokenize(chats[0].content) + chatAdditonalTokens)
chats.splice(0, 1) chats.splice(0, 1)
} }
currentChat.lastMemory = chats[0].memo currentChat.lastMemory = chats[0].memo

View File

@@ -1,4 +1,5 @@
import type { OpenAIChat } from "."; import type { OpenAIChat } from ".";
import { tokenize } from "../tokenizer";
export function multiChatReplacer(){ export function multiChatReplacer(){
@@ -53,3 +54,14 @@ export function unstringlizeChat(text:string, formated:OpenAIChat[], char:string
return text return text
} }
export async function getNameMaxTokens(names:string[]){
let maxCharNameTokens = 0
for(const name of names){
const tokens = await tokenize(name + ': ') + 1
if(maxCharNameTokens < tokens){
maxCharNameTokens = tokens
}
}
return maxCharNameTokens
}

View File

@@ -5,9 +5,17 @@ import { tokenize } from "../tokenizer";
import { findCharacterbyId } from "../util"; import { findCharacterbyId } from "../util";
import { requestChatData } from "./request"; import { requestChatData } from "./request";
export async function supaMemory(chats:OpenAIChat[],currentTokens:number,maxContextTokens:number,room:Chat,char:character|groupChat): Promise<{ currentTokens: number; chats: OpenAIChat[]; error?:string; memory?:string;lastId?:string}>{ export async function supaMemory(
chats:OpenAIChat[],
currentTokens:number,
maxContextTokens:number,
room:Chat,
char:character|groupChat,
chatAdditonalTokens:number
): Promise<{ currentTokens: number; chats: OpenAIChat[]; error?:string; memory?:string;lastId?:string}>{
const db = get(DataBase) const db = get(DataBase)
console.log("Memory: " + currentTokens)
currentTokens += 10
if(currentTokens > maxContextTokens){ if(currentTokens > maxContextTokens){
let coIndex = -1 let coIndex = -1
@@ -19,7 +27,7 @@ export async function supaMemory(chats:OpenAIChat[],currentTokens:number,maxCont
} }
if(coIndex !== -1){ if(coIndex !== -1){
for(let i=0;i<coIndex;i++){ for(let i=0;i<coIndex;i++){
currentTokens -= (await tokenize(chats[0].content) + 1) currentTokens -= (await tokenize(chats[0].content) + chatAdditonalTokens)
chats.splice(0, 1) chats.splice(0, 1)
} }
} }
@@ -45,13 +53,13 @@ export async function supaMemory(chats:OpenAIChat[],currentTokens:number,maxCont
lastId = id lastId = id
break break
} }
currentTokens -= (await tokenize(chats[0].content) + 1) currentTokens -= (await tokenize(chats[0].content) + chatAdditonalTokens)
chats.splice(0, 1) chats.splice(0, 1)
i += 1 i += 1
} }
supaMemory = data supaMemory = data
currentTokens += await tokenize(supaMemory) + 1 currentTokens += await tokenize(supaMemory) + chatAdditonalTokens
} }
@@ -171,7 +179,7 @@ export async function supaMemory(chats:OpenAIChat[],currentTokens:number,maxCont
} }
continue continue
} }
const tokens = await tokenize(cont.content) + 5 const tokens = await tokenize(cont.content) + chatAdditonalTokens
if((chunkSize + tokens) > maxChunkSize){ if((chunkSize + tokens) > maxChunkSize){
if(stringlizedChat === ''){ if(stringlizedChat === ''){
stringlizedChat += `${cont.role === 'assistant' ? char.type === 'group' ? '' : char.name : db.username}: ${cont.content}\n\n` stringlizedChat += `${cont.role === 'assistant' ? char.type === 'group' ? '' : char.name : db.username}: ${cont.content}\n\n`
@@ -193,7 +201,7 @@ export async function supaMemory(chats:OpenAIChat[],currentTokens:number,maxCont
return result return result
} }
const tokenz = await tokenize(result + '\n\n') + 5 const tokenz = await tokenize(result + '\n\n') + chatAdditonalTokens
currentTokens += tokenz currentTokens += tokenz
supaMemory += result.replace(/\n+/g,'\n') + '\n\n' supaMemory += result.replace(/\n+/g,'\n') + '\n\n'