diff --git a/src/ts/process/exampleMessages.ts b/src/ts/process/exampleMessages.ts index 19debb23..0aa86704 100644 --- a/src/ts/process/exampleMessages.ts +++ b/src/ts/process/exampleMessages.ts @@ -2,7 +2,7 @@ import type { OpenAIChat } from "."; import type { character } from "../storage/database"; import { replacePlaceholders } from "../util"; -export function exampleMessage(char:character):OpenAIChat[]{ +export function exampleMessage(char:character, userName:string):OpenAIChat[]{ if(char.exampleMessage === ''){ return [] } @@ -34,14 +34,16 @@ export function exampleMessage(char:character):OpenAIChat[]{ add() currentMessage = { role: "assistant", - content: trimed.split(':', 2)[1] + content: trimed.split(':', 2)[1], + name: 'example_' + char.name } } else if(lowered.startsWith('{{user}}:') || lowered.startsWith(':')){ add() currentMessage = { role: "user", - content: trimed.split(':', 2)[1] + content: trimed.split(':', 2)[1], + name: 'example_' + userName } } else{ diff --git a/src/ts/process/index.ts b/src/ts/process/index.ts index 5df79a20..cee85fea 100644 --- a/src/ts/process/index.ts +++ b/src/ts/process/index.ts @@ -1,7 +1,7 @@ import { get, writable } from "svelte/store"; import { DataBase, setDatabase, type character } from "../storage/database"; import { CharEmotion, selectedCharID } from "../stores"; -import { tokenize, tokenizeNum } from "../tokenizer"; +import { ChatTokenizer, tokenizeNum } from "../tokenizer"; import { language } from "../../lang"; import { alertError } from "../alert"; import { loadLoreBookPrompt } from "./lorebook"; @@ -15,7 +15,6 @@ import { supaMemory } from "./supaMemory"; import { v4 } from "uuid"; import { cloneDeep } from "lodash"; import { groupOrder } from "./group"; -import { getNameMaxTokens } from "./stringlize"; export interface OpenAIChat{ role: 'system'|'user'|'assistant' @@ -69,7 +68,6 @@ export async function sendChat(chatProcessIndex = -1,arg:{chatAdditonalTokens?:n if(nowChatroom.type === 'group'){ if(chatProcessIndex === -1){ const charNames =nowChatroom.characters.map((v) => findCharacterbyIdwithCache(v).name) - caculatedChatTokens += await getNameMaxTokens([...charNames, db.username]) const messages = nowChatroom.chats[nowChatroom.chatPage].message const lastMessage = messages[messages.length-1] @@ -110,14 +108,10 @@ export async function sendChat(chatProcessIndex = -1,arg:{chatAdditonalTokens?:n } else{ currentChar = nowChatroom - if(!db.aiModel.startsWith('gpt')){ - caculatedChatTokens += await getNameMaxTokens([currentChar.name, db.username]) - } - } let chatAdditonalTokens = arg.chatAdditonalTokens ?? caculatedChatTokens - + const tokenizer = new ChatTokenizer(chatAdditonalTokens, db.aiModel.startsWith('gpt') ? 'noName' : 'name') let selectedChat = nowChatroom.chatPage let currentChat = nowChatroom.chats[selectedChat] let maxContextTokens = db.maxContext @@ -205,17 +199,17 @@ export async function sendChat(chatProcessIndex = -1,arg:{chatAdditonalTokens?:n }) //await tokenize currernt - let currentTokens = (await tokenize(Object.keys(unformated).map((key) => { - return (unformated[key] as OpenAIChat[]).map((d) => { - return d.content - }).join('\n\n') - }).join('\n\n')) + db.maxResponse) + 130 + let currentTokens = 0 + + for(const key in unformated){ + currentTokens += await tokenizer.tokenizeChat(unformated[key]) + } - const examples = exampleMessage(currentChar) + const examples = exampleMessage(currentChar, db.username) for(const example of examples){ - currentTokens += await tokenize(example.content) + chatAdditonalTokens + currentTokens += await tokenizer.tokenizeChat(example) } let chats:OpenAIChat[] = examples @@ -230,15 +224,14 @@ export async function sendChat(chatProcessIndex = -1,arg:{chatAdditonalTokens?:n if(nowChatroom.type !== 'group'){ const firstMsg = nowChatroom.firstMsgIndex === -1 ? nowChatroom.firstMessage : nowChatroom.alternateGreetings[nowChatroom.firstMsgIndex] - chats.push({ + const chat:OpenAIChat = { role: 'assistant', content: processScript(currentChar, replacePlaceholders(firstMsg, currentChar.name), 'editprocess') - }) - currentTokens += await tokenize(processScript(currentChar, - replacePlaceholders(firstMsg, currentChar.name), - 'editprocess')) + } + chats.push(chat) + currentTokens += await tokenizer.tokenizeChat(chat) } const ms = currentChat.message @@ -259,17 +252,18 @@ export async function sendChat(chatProcessIndex = -1,arg:{chatAdditonalTokens?:n if(!msg.chatId){ msg.chatId = v4() } - chats.push({ + const chat:OpenAIChat = { role: msg.role === 'user' ? 'user' : 'assistant', content: formedChat, memo: msg.chatId, name: name - }) - currentTokens += (await tokenize(formedChat) + chatAdditonalTokens) + } + chats.push(chat) + currentTokens += await tokenizer.tokenizeChat(chat) } if(nowChatroom.supaMemory && db.supaMemoryType !== 'none'){ - const sp = await supaMemory(chats, currentTokens, maxContextTokens, currentChat, nowChatroom, chatAdditonalTokens) + const sp = await supaMemory(chats, currentTokens, maxContextTokens, currentChat, nowChatroom, tokenizer) if(sp.error){ alertError(sp.error) return false @@ -287,7 +281,7 @@ export async function sendChat(chatProcessIndex = -1,arg:{chatAdditonalTokens?:n return false } - currentTokens -= (await tokenize(chats[0].content) + chatAdditonalTokens) + currentTokens -= await tokenizer.tokenizeChat(chats[0]) chats.splice(0, 1) } currentChat.lastMemory = chats[0].memo diff --git a/src/ts/process/stringlize.ts b/src/ts/process/stringlize.ts index 140389b6..fe608a09 100644 --- a/src/ts/process/stringlize.ts +++ b/src/ts/process/stringlize.ts @@ -53,15 +53,4 @@ export function unstringlizeChat(text:string, formated:OpenAIChat[], char:string } return text -} - -export async function getNameMaxTokens(names:string[]){ - let maxCharNameTokens = 0 - for(const name of names){ - const tokens = await tokenize(name + ': ') + 1 - if(maxCharNameTokens < tokens){ - maxCharNameTokens = tokens - } - } - return maxCharNameTokens } \ No newline at end of file diff --git a/src/ts/process/supaMemory.ts b/src/ts/process/supaMemory.ts index d3b5748a..11862ce7 100644 --- a/src/ts/process/supaMemory.ts +++ b/src/ts/process/supaMemory.ts @@ -1,8 +1,7 @@ import { get } from "svelte/store"; import type { OpenAIChat } from "."; import { DataBase, type Chat, type character, type groupChat } from "../storage/database"; -import { tokenize } from "../tokenizer"; -import { findCharacterbyId } from "../util"; +import { tokenize, type ChatTokenizer } from "../tokenizer"; import { requestChatData } from "./request"; export async function supaMemory( @@ -11,7 +10,7 @@ export async function supaMemory( maxContextTokens:number, room:Chat, char:character|groupChat, - chatAdditonalTokens:number + tokenizer:ChatTokenizer ): Promise<{ currentTokens: number; chats: OpenAIChat[]; error?:string; memory?:string;lastId?:string}>{ const db = get(DataBase) @@ -27,7 +26,7 @@ export async function supaMemory( } if(coIndex !== -1){ for(let i=0;i maxChunkSize){ if(stringlizedChat === ''){ stringlizedChat += `${cont.role === 'assistant' ? char.type === 'group' ? '' : char.name : db.username}: ${cont.content}\n\n` @@ -201,7 +200,7 @@ export async function supaMemory( return result } - const tokenz = await tokenize(result + '\n\n') + chatAdditonalTokens + const tokenz = await tokenize(result + '\n\n') currentTokens += tokenz supaMemory += result.replace(/\n+/g,'\n') + '\n\n' } diff --git a/src/ts/tokenizer.ts b/src/ts/tokenizer.ts index 032a9460..1cd1280f 100644 --- a/src/ts/tokenizer.ts +++ b/src/ts/tokenizer.ts @@ -2,6 +2,7 @@ import type { Tiktoken } from "@dqbd/tiktoken"; import { DataBase, type character } from "./storage/database"; import { get } from "svelte/store"; import { tokenizeTransformers } from "./transformers/transformer"; +import type { OpenAIChat } from "./process"; async function encode(data:string):Promise<(number[]|Uint32Array)>{ let db = get(DataBase) @@ -37,6 +38,25 @@ export async function tokenize(data:string) { return encoded.length } + +export class ChatTokenizer { + + private chatAdditonalTokens:number + private useName:'name'|'noName' + + constructor(chatAdditonalTokens:number, useName:'name'|'noName'){ + this.chatAdditonalTokens = chatAdditonalTokens + this.useName = useName + } + async tokenizeChat(data:OpenAIChat) { + const encoded = (await encode(data.content)).length + + this.useName === 'name' ? (await encode(data.name)).length : 0 + this.chatAdditonalTokens + return encoded + } + + +} + export async function tokenizeNum(data:string) { const encoded = await encode(data) return encoded