[feat] accurate tokenizing

This commit is contained in:
kwaroran
2023-11-23 17:52:00 +09:00
parent dab121c9c7
commit 634fe418b4
6 changed files with 116 additions and 16 deletions

View File

@@ -1,3 +1,5 @@
import { tokenizeAccurate } from "../tokenizer";
export type Proompt = ProomptPlain|ProomptTyped|ProomptChat|ProomptAuthorNote;
export interface ProomptPlain {
@@ -23,4 +25,29 @@ export interface ProomptChat {
type: 'chat';
rangeStart: number;
rangeEnd: number|'end';
}
export async function tokenizePreset(proompts:Proompt[]){
let total = 0
for(const proompt of proompts){
switch(proompt.type){
case 'plain':
case 'jailbreak':{
total += await tokenizeAccurate(proompt.text)
break
}
case 'persona':
case 'description':
case 'lorebook':
case 'postEverything':
case 'authornote':
case 'memory':{
if(proompt.innerFormat){
total += await tokenizeAccurate(proompt.innerFormat)
}
break
}
}
}
return total
}