[feat] new tokenizing
This commit is contained in:
@@ -2,6 +2,7 @@ import type { Tiktoken } from "@dqbd/tiktoken";
|
||||
import { DataBase, type character } from "./storage/database";
|
||||
import { get } from "svelte/store";
|
||||
import { tokenizeTransformers } from "./transformers/transformer";
|
||||
import type { OpenAIChat } from "./process";
|
||||
|
||||
async function encode(data:string):Promise<(number[]|Uint32Array)>{
|
||||
let db = get(DataBase)
|
||||
@@ -37,6 +38,25 @@ export async function tokenize(data:string) {
|
||||
return encoded.length
|
||||
}
|
||||
|
||||
|
||||
export class ChatTokenizer {
|
||||
|
||||
private chatAdditonalTokens:number
|
||||
private useName:'name'|'noName'
|
||||
|
||||
constructor(chatAdditonalTokens:number, useName:'name'|'noName'){
|
||||
this.chatAdditonalTokens = chatAdditonalTokens
|
||||
this.useName = useName
|
||||
}
|
||||
async tokenizeChat(data:OpenAIChat) {
|
||||
const encoded = (await encode(data.content)).length
|
||||
+ this.useName === 'name' ? (await encode(data.name)).length : 0 + this.chatAdditonalTokens
|
||||
return encoded
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
||||
export async function tokenizeNum(data:string) {
|
||||
const encoded = await encode(data)
|
||||
return encoded
|
||||
|
||||
Reference in New Issue
Block a user