[feat] new tokenizing

This commit is contained in:
kwaroran
2023-06-09 22:27:34 +09:00
parent cfe20c968b
commit dbf5f6b25a
5 changed files with 51 additions and 47 deletions

View File

@@ -2,6 +2,7 @@ import type { Tiktoken } from "@dqbd/tiktoken";
import { DataBase, type character } from "./storage/database";
import { get } from "svelte/store";
import { tokenizeTransformers } from "./transformers/transformer";
import type { OpenAIChat } from "./process";
async function encode(data:string):Promise<(number[]|Uint32Array)>{
let db = get(DataBase)
@@ -37,6 +38,25 @@ export async function tokenize(data:string) {
return encoded.length
}
export class ChatTokenizer {
private chatAdditonalTokens:number
private useName:'name'|'noName'
constructor(chatAdditonalTokens:number, useName:'name'|'noName'){
this.chatAdditonalTokens = chatAdditonalTokens
this.useName = useName
}
async tokenizeChat(data:OpenAIChat) {
const encoded = (await encode(data.content)).length
+ this.useName === 'name' ? (await encode(data.name)).length : 0 + this.chatAdditonalTokens
return encoded
}
}
export async function tokenizeNum(data:string) {
const encoded = await encode(data)
return encoded