diff --git a/public/token/nai/nerdstash_v2.model b/public/token/nai/nerdstash_v2.model new file mode 100644 index 00000000..ec245319 Binary files /dev/null and b/public/token/nai/nerdstash_v2.model differ diff --git a/src/ts/tokenizer.ts b/src/ts/tokenizer.ts index 1cae5505..74320652 100644 --- a/src/ts/tokenizer.ts +++ b/src/ts/tokenizer.ts @@ -14,10 +14,13 @@ async function encode(data:string):Promise<(number[]|Uint32Array|Int32Array)>{ if(db.aiModel.startsWith('claude')){ return await tokenizeWebTokenizers(data, 'claude') } + if(db.aiModel.startsWith('novelai')){ + return await tokenizeWebTokenizers(data, 'novelai') + } return await tikJS(data) } -type tokenizerType = 'novellist'|'claude' +type tokenizerType = 'novellist'|'claude'|'novelai' let tikParser:Tiktoken = null let tokenizersTokenizer:Tokenizer = null @@ -51,6 +54,11 @@ async function tokenizeWebTokenizers(text:string, type:tokenizerType) { await (await fetch("/token/claude/claude.json") ).arrayBuffer()) break + case 'novelai': + tokenizersTokenizer = await webTokenizer.Tokenizer.fromSentencePiece( + await (await fetch("/token/nai/nerdstash_v2.model") + ).arrayBuffer()) + break } tokenizersType = type }