[refactor] transformer tokenizer

This commit is contained in:
kwaroran
2023-06-07 09:23:29 +09:00
parent b631da7e72
commit 9974ff0df1
2 changed files with 4 additions and 10 deletions

View File

@@ -6,7 +6,7 @@ import { tokenizeTransformers } from "./transformers/transformer";
async function encode(data:string):Promise<(number[]|Uint32Array)>{
let db = get(DataBase)
if(db.aiModel === 'novellist'){
return await tokenizeTransformers('trin',data)
return await tokenizeTransformers('naclbit/trin_tokenizer_v3',data)
}
return await tikJS(data)
}

View File

@@ -3,13 +3,7 @@ type transformerLibType = typeof import("@xenova/transformers");
let tokenizer:PreTrainedTokenizer = null
let transformerLib:transformerLibType
const tokenizerDict = {
'trin': 'naclbit/trin_tokenizer_v3',
} as const
type tokenizerTypes = keyof(typeof tokenizerDict)
let tokenizerType:tokenizerTypes|'' = ''
let tokenizerType:string = ''
async function loadTransformers() {
@@ -18,11 +12,11 @@ async function loadTransformers() {
}
}
export async function tokenizeTransformers(type:tokenizerTypes, text:string) {
export async function tokenizeTransformers(type:string, text:string) {
await loadTransformers()
if(tokenizerType !== type){
const AutoTokenizer = transformerLib.AutoTokenizer
tokenizer = await AutoTokenizer.from_pretrained(tokenizerDict[type])
tokenizer = await AutoTokenizer.from_pretrained(type)
tokenizerType = type
}