[fix] consistant tokenizing

This commit is contained in:
kwaroran
2023-11-24 14:50:06 +09:00
parent 4aa6adb637
commit 26f4ce94fa
4 changed files with 23 additions and 8 deletions

View File

@@ -375,6 +375,7 @@ type matcherArg = {
rmVar:boolean,
var?:{[key:string]:string}
tokenizeAccurate?:boolean
consistantChar?:boolean
}
const matcher = (p1:string,matcherArg:matcherArg) => {
if(p1.length > 10000){
@@ -417,6 +418,9 @@ const matcher = (p1:string,matcherArg:matcherArg) => {
}
case 'char':
case 'bot':{
if(matcherArg.consistantChar){
return 'botname'
}
let selectedChar = get(selectedCharID)
let currentChar = db.characters[selectedChar]
if(currentChar && currentChar.type !== 'group'){
@@ -433,6 +437,9 @@ const matcher = (p1:string,matcherArg:matcherArg) => {
return currentChar.name
}
case 'user':{
if(matcherArg.consistantChar){
return 'username'
}
return db.username
}
case 'personality':
@@ -698,6 +705,9 @@ const smMatcher = (p1:string,matcherArg:matcherArg) => {
switch(lowerCased){
case 'char':
case 'bot':{
if(matcherArg.consistantChar){
return 'botname'
}
let selectedChar = get(selectedCharID)
let currentChar = db.characters[selectedChar]
if(currentChar && currentChar.type !== 'group'){
@@ -714,6 +724,9 @@ const smMatcher = (p1:string,matcherArg:matcherArg) => {
return currentChar.name
}
case 'user':{
if(matcherArg.consistantChar){
return 'username'
}
return db.username
}
}
@@ -752,6 +765,7 @@ export function risuChatParser(da:string, arg:{
rmVar?:boolean,
var?:{[key:string]:string}
tokenizeAccurate?:boolean
consistantChar?:boolean
} = {}):string{
const chatID = arg.chatID ?? -1
const db = arg.db ?? get(DataBase)

View File

@@ -33,7 +33,7 @@ export async function tokenizePreset(proompts:Proompt[]){
switch(proompt.type){
case 'plain':
case 'jailbreak':{
total += await tokenizeAccurate(proompt.text)
total += await tokenizeAccurate(proompt.text, true)
break
}
case 'persona':
@@ -43,7 +43,7 @@ export async function tokenizePreset(proompts:Proompt[]){
case 'authornote':
case 'memory':{
if(proompt.innerFormat){
total += await tokenizeAccurate(proompt.innerFormat)
total += await tokenizeAccurate(proompt.innerFormat, true)
}
break
}

View File

@@ -87,9 +87,10 @@ export async function tokenize(data:string) {
return encoded.length
}
export async function tokenizeAccurate(data:string) {
export async function tokenizeAccurate(data:string, consistantChar?:boolean) {
data = risuChatParser(data.replace('{{slot}}',''), {
tokenizeAccurate: true
tokenizeAccurate: true,
consistantChar: consistantChar,
})
const encoded = await encode(data)
return encoded.length