From 26f4ce94fa55413538cc49207411892c750d1255 Mon Sep 17 00:00:00 2001 From: kwaroran Date: Fri, 24 Nov 2023 14:50:06 +0900 Subject: [PATCH] [fix] consistant tokenizing --- src/lib/Setting/Pages/BotSettings.svelte | 8 ++++---- src/ts/parser.ts | 14 ++++++++++++++ src/ts/process/proompt.ts | 4 ++-- src/ts/tokenizer.ts | 5 +++-- 4 files changed, 23 insertions(+), 8 deletions(-) diff --git a/src/lib/Setting/Pages/BotSettings.svelte b/src/lib/Setting/Pages/BotSettings.svelte index 19036350..5a044f23 100644 --- a/src/lib/Setting/Pages/BotSettings.svelte +++ b/src/lib/Setting/Pages/BotSettings.svelte @@ -40,10 +40,10 @@ export let goPromptTemplate = () => {} async function loadTokenize(){ - tokens.mainPrompt = await tokenizeAccurate($DataBase.mainPrompt) - tokens.jailbreak = await tokenizeAccurate($DataBase.jailbreak) - tokens.globalNote = await tokenizeAccurate($DataBase.globalNote) - tokens.autoSuggest = await tokenizeAccurate($DataBase.autoSuggestPrompt) + tokens.mainPrompt = await tokenizeAccurate($DataBase.mainPrompt, true) + tokens.jailbreak = await tokenizeAccurate($DataBase.jailbreak, true) + tokens.globalNote = await tokenizeAccurate($DataBase.globalNote, true) + tokens.autoSuggest = await tokenizeAccurate($DataBase.autoSuggestPrompt, true) } let advancedBotSettings = false diff --git a/src/ts/parser.ts b/src/ts/parser.ts index f729d269..3d2132e0 100644 --- a/src/ts/parser.ts +++ b/src/ts/parser.ts @@ -375,6 +375,7 @@ type matcherArg = { rmVar:boolean, var?:{[key:string]:string} tokenizeAccurate?:boolean + consistantChar?:boolean } const matcher = (p1:string,matcherArg:matcherArg) => { if(p1.length > 10000){ @@ -417,6 +418,9 @@ const matcher = (p1:string,matcherArg:matcherArg) => { } case 'char': case 'bot':{ + if(matcherArg.consistantChar){ + return 'botname' + } let selectedChar = get(selectedCharID) let currentChar = db.characters[selectedChar] if(currentChar && currentChar.type !== 'group'){ @@ -433,6 +437,9 @@ const matcher = (p1:string,matcherArg:matcherArg) => { return currentChar.name } case 'user':{ + if(matcherArg.consistantChar){ + return 'username' + } return db.username } case 'personality': @@ -698,6 +705,9 @@ const smMatcher = (p1:string,matcherArg:matcherArg) => { switch(lowerCased){ case 'char': case 'bot':{ + if(matcherArg.consistantChar){ + return 'botname' + } let selectedChar = get(selectedCharID) let currentChar = db.characters[selectedChar] if(currentChar && currentChar.type !== 'group'){ @@ -714,6 +724,9 @@ const smMatcher = (p1:string,matcherArg:matcherArg) => { return currentChar.name } case 'user':{ + if(matcherArg.consistantChar){ + return 'username' + } return db.username } } @@ -752,6 +765,7 @@ export function risuChatParser(da:string, arg:{ rmVar?:boolean, var?:{[key:string]:string} tokenizeAccurate?:boolean + consistantChar?:boolean } = {}):string{ const chatID = arg.chatID ?? -1 const db = arg.db ?? get(DataBase) diff --git a/src/ts/process/proompt.ts b/src/ts/process/proompt.ts index 057431d0..4040b167 100644 --- a/src/ts/process/proompt.ts +++ b/src/ts/process/proompt.ts @@ -33,7 +33,7 @@ export async function tokenizePreset(proompts:Proompt[]){ switch(proompt.type){ case 'plain': case 'jailbreak':{ - total += await tokenizeAccurate(proompt.text) + total += await tokenizeAccurate(proompt.text, true) break } case 'persona': @@ -43,7 +43,7 @@ export async function tokenizePreset(proompts:Proompt[]){ case 'authornote': case 'memory':{ if(proompt.innerFormat){ - total += await tokenizeAccurate(proompt.innerFormat) + total += await tokenizeAccurate(proompt.innerFormat, true) } break } diff --git a/src/ts/tokenizer.ts b/src/ts/tokenizer.ts index 34639341..fc24e2b2 100644 --- a/src/ts/tokenizer.ts +++ b/src/ts/tokenizer.ts @@ -87,9 +87,10 @@ export async function tokenize(data:string) { return encoded.length } -export async function tokenizeAccurate(data:string) { +export async function tokenizeAccurate(data:string, consistantChar?:boolean) { data = risuChatParser(data.replace('{{slot}}',''), { - tokenizeAccurate: true + tokenizeAccurate: true, + consistantChar: consistantChar, }) const encoded = await encode(data) return encoded.length