[refactor] transformer tokenizer

This commit is contained in:
kwaroran
2023-06-07 09:23:29 +09:00
parent b631da7e72
commit 9974ff0df1
2 changed files with 4 additions and 10 deletions

View File

@@ -6,7 +6,7 @@ import { tokenizeTransformers } from "./transformers/transformer";
async function encode(data:string):Promise<(number[]|Uint32Array)>{ async function encode(data:string):Promise<(number[]|Uint32Array)>{
let db = get(DataBase) let db = get(DataBase)
if(db.aiModel === 'novellist'){ if(db.aiModel === 'novellist'){
return await tokenizeTransformers('trin',data) return await tokenizeTransformers('naclbit/trin_tokenizer_v3',data)
} }
return await tikJS(data) return await tikJS(data)
} }

View File

@@ -3,13 +3,7 @@ type transformerLibType = typeof import("@xenova/transformers");
let tokenizer:PreTrainedTokenizer = null let tokenizer:PreTrainedTokenizer = null
let transformerLib:transformerLibType let transformerLib:transformerLibType
const tokenizerDict = { let tokenizerType:string = ''
'trin': 'naclbit/trin_tokenizer_v3',
} as const
type tokenizerTypes = keyof(typeof tokenizerDict)
let tokenizerType:tokenizerTypes|'' = ''
async function loadTransformers() { async function loadTransformers() {
@@ -18,11 +12,11 @@ async function loadTransformers() {
} }
} }
export async function tokenizeTransformers(type:tokenizerTypes, text:string) { export async function tokenizeTransformers(type:string, text:string) {
await loadTransformers() await loadTransformers()
if(tokenizerType !== type){ if(tokenizerType !== type){
const AutoTokenizer = transformerLib.AutoTokenizer const AutoTokenizer = transformerLib.AutoTokenizer
tokenizer = await AutoTokenizer.from_pretrained(tokenizerDict[type]) tokenizer = await AutoTokenizer.from_pretrained(type)
tokenizerType = type tokenizerType = type
} }