diff --git a/public/token/llama/llama.model b/public/token/llama/llama.model new file mode 100644 index 00000000..22bccbcb Binary files /dev/null and b/public/token/llama/llama.model differ diff --git a/src/ts/tokenizer.ts b/src/ts/tokenizer.ts index 5fce431a..4e2665f0 100644 --- a/src/ts/tokenizer.ts +++ b/src/ts/tokenizer.ts @@ -17,10 +17,14 @@ async function encode(data:string):Promise<(number[]|Uint32Array|Int32Array)>{ if(db.aiModel.startsWith('novelai')){ return await tokenizeWebTokenizers(data, 'novelai') } + if(db.aiModel.startsWith('local_') || db.aiModel === 'mancer' || db.aiModel === 'textgen_webui'){ + return await tokenizeWebTokenizers(data, 'llama') + + } return await tikJS(data) } -type tokenizerType = 'novellist'|'claude'|'novelai' +type tokenizerType = 'novellist'|'claude'|'novelai'|'llama' let tikParser:Tiktoken = null let tokenizersTokenizer:Tokenizer = null @@ -60,6 +64,11 @@ async function tokenizeWebTokenizers(text:string, type:tokenizerType) { ).arrayBuffer()) break + case 'llama': + tokenizersTokenizer = await webTokenizer.Tokenizer.fromSentencePiece( + await (await fetch("/token/llama/llama.model") + ).arrayBuffer()) + } tokenizersType = type }