[feat] llama tokenzier

This commit is contained in:
kwaroran
2023-09-09 07:18:20 +09:00
parent 9b757301e4
commit 560f6b56e3
2 changed files with 10 additions and 1 deletions

Binary file not shown.

View File

@@ -17,10 +17,14 @@ async function encode(data:string):Promise<(number[]|Uint32Array|Int32Array)>{
if(db.aiModel.startsWith('novelai')){
return await tokenizeWebTokenizers(data, 'novelai')
}
if(db.aiModel.startsWith('local_') || db.aiModel === 'mancer' || db.aiModel === 'textgen_webui'){
return await tokenizeWebTokenizers(data, 'llama')
}
return await tikJS(data)
}
type tokenizerType = 'novellist'|'claude'|'novelai'
type tokenizerType = 'novellist'|'claude'|'novelai'|'llama'
let tikParser:Tiktoken = null
let tokenizersTokenizer:Tokenizer = null
@@ -60,6 +64,11 @@ async function tokenizeWebTokenizers(text:string, type:tokenizerType) {
).arrayBuffer())
break
case 'llama':
tokenizersTokenizer = await webTokenizer.Tokenizer.fromSentencePiece(
await (await fetch("/token/llama/llama.model")
).arrayBuffer())
}
tokenizersType = type
}