From 9016b506d92f9158e35762269dab82d57013f6ef Mon Sep 17 00:00:00 2001 From: kwaroran <116663078+kwaroran@users.noreply.github.com> Date: Mon, 14 Apr 2025 14:24:55 +0900 Subject: [PATCH 1/7] Revert "refactor: regex caching" --- src/ts/process/scripts.ts | 49 +++++++++++++++------------------------ 1 file changed, 19 insertions(+), 30 deletions(-) diff --git a/src/ts/process/scripts.ts b/src/ts/process/scripts.ts index 3c95ab81..872779ee 100644 --- a/src/ts/process/scripts.ts +++ b/src/ts/process/scripts.ts @@ -68,29 +68,26 @@ export async function importRegex(o?:customscript[]):Promise{ let bestMatchCache = new Map() let processScriptCache = new Map() -function generateScriptCacheKey(scripts: customscript[], data: string, mode: ScriptMode, chatID = -1, cbsConditions: CbsConditions = {}) { - let hash = data + '|||' + mode + '|||'; - for (const script of scripts) { - if(script.type !== mode){ - continue - } - hash += `${script.flag?.includes('') ? - risuChatParser(script.in, { chatID: chatID, cbsConditions }) : - script.in}|||${risuChatParser(script.out, { chatID: chatID, cbsConditions})}|||${script.flag ?? ''}|||${script.ableFlag ? 1 : 0}`; +function cacheScript(scripts:customscript[], data:string, result:string, mode:ScriptMode){ + let hash = data + '|||' + mode + '|||' + for(const script of scripts){ + hash += `${script.in}|||${script.out}|||${script.flag}|||${script.ableFlag}|||${script.type}` } - return hash; -} -function cacheScript(hash:string, result:string){ processScriptCache.set(hash, result) - if(processScriptCache.size > 1000){ + if(processScriptCache.size > 500){ processScriptCache.delete(processScriptCache.keys().next().value) } } -function getScriptCache(hash:string){ +function getScriptCache(scripts:customscript[], data:string, mode:ScriptMode){ + let hash = data + '|||' + mode + '|||' + for(const script of scripts){ + hash += `${script.in}|||${script.out}|||${script.flag}|||${script.ableFlag}|||${script.type}` + } + return processScriptCache.get(hash) } @@ -101,7 +98,12 @@ export function resetScriptCache(){ export async function processScriptFull(char:character|groupChat|simpleCharacterArgument, data:string, mode:ScriptMode, chatID = -1, cbsConditions:CbsConditions = {}){ let db = getDatabase() const originalData = data + const cached = getScriptCache((db.presetRegex ?? []).concat(char.customscript), originalData, mode) + if(cached){ + return {data: cached, emoChanged: false} + } let emoChanged = false + const scripts = (db.presetRegex ?? []).concat(char.customscript).concat(getModuleRegexScripts()) data = await runLuaEditTrigger(char, mode, data) if(mode === 'editdisplay'){ @@ -115,7 +117,7 @@ export async function processScriptFull(char:character|groupChat|simpleCharacter displayData: data }) - data = d?.displayData ?? data + data = d.displayData ?? data console.log('Trigger time', performance.now() - perf) } catch(e){ @@ -132,26 +134,14 @@ export async function processScriptFull(char:character|groupChat|simpleCharacter } } } - - data = risuChatParser(data, { chatID: chatID, cbsConditions }) - const scripts = (db.presetRegex ?? []).concat(char.customscript).concat(getModuleRegexScripts()) - const hash = generateScriptCacheKey(scripts, data, mode, chatID, cbsConditions) - const cached = getScriptCache(hash) - if(cached){ - return {data: cached, emoChanged: false} - } if(scripts.length === 0){ - cacheScript(hash, data) + cacheScript(scripts, originalData, data, mode) return {data, emoChanged} } function executeScript(pscript:pScript){ const script = pscript.script - if(script.in === ''){ - return - } - if(script.type === mode){ let outScript2 = script.out.replaceAll("$n", "\n") @@ -347,7 +337,6 @@ export async function processScriptFull(char:character|groupChat|simpleCharacter if(db.dynamicAssets && (char.type === 'simple' || char.type === 'character') && char.additionalAssets && char.additionalAssets.length > 0){ if(!db.dynamicAssetsEditDisplay && mode === 'editdisplay'){ - cacheScript(hash, data) return {data, emoChanged} } const assetNames = char.additionalAssets.map((v) => v[0]) @@ -383,7 +372,7 @@ export async function processScriptFull(char:character|groupChat|simpleCharacter } } - cacheScript(hash, data) + cacheScript(scripts, originalData, data, mode) return {data, emoChanged} } From 36e0935bb04c1c454729b8c69406009a22ca9c51 Mon Sep 17 00:00:00 2001 From: kwaroran Date: Wed, 16 Apr 2025 10:57:48 +0900 Subject: [PATCH 2/7] Make tokenizer caching to an option --- src/lang/en.ts | 3 +- src/lib/Setting/Pages/AdvancedSettings.svelte | 4 +++ src/ts/storage/database.svelte.ts | 1 + src/ts/tokenizer.ts | 32 +++++++++++-------- 4 files changed, 26 insertions(+), 14 deletions(-) diff --git a/src/lang/en.ts b/src/lang/en.ts index c1cb81df..ead4740f 100644 --- a/src/lang/en.ts +++ b/src/lang/en.ts @@ -1115,5 +1115,6 @@ export const languageEnglish = { fallbackWhenBlankResponse: "Fallback When Blank Response", doNotChangeFallbackModels: "Do Not Change Fallback Models on Preset Change", customModels: "Custom Models", - igpPrompt: "IGP Prompt" + igpPrompt: "IGP Prompt", + useTokenizerCaching: "Tokenizer Caching" } diff --git a/src/lib/Setting/Pages/AdvancedSettings.svelte b/src/lib/Setting/Pages/AdvancedSettings.svelte index 805cd9dc..49a50881 100644 --- a/src/lib/Setting/Pages/AdvancedSettings.svelte +++ b/src/lib/Setting/Pages/AdvancedSettings.svelte @@ -207,6 +207,10 @@ +
+ + +
{#if DBState.db.useExperimental}
diff --git a/src/ts/storage/database.svelte.ts b/src/ts/storage/database.svelte.ts index 12618efe..ec9a2591 100644 --- a/src/ts/storage/database.svelte.ts +++ b/src/ts/storage/database.svelte.ts @@ -1022,6 +1022,7 @@ export interface Database{ flags: LLMFlags[] }[] igpPrompt:string + useTokenizerCaching:boolean } interface SeparateParameters{ diff --git a/src/ts/tokenizer.ts b/src/ts/tokenizer.ts index 3211c878..b553ba27 100644 --- a/src/ts/tokenizer.ts +++ b/src/ts/tokenizer.ts @@ -47,18 +47,21 @@ export async function encode(data:string):Promise<(number[]|Uint32Array|Int32Arr const modelInfo = getModelInfo(db.aiModel); const pluginTokenizer = pluginV2.providerOptions.get(db.currentPluginProvider)?.tokenizer ?? "none"; - const cacheKey = getHash( - data, - db.aiModel, - db.customTokenizer, - db.currentPluginProvider, - db.googleClaudeTokenizing, - modelInfo, - pluginTokenizer - ); - const cachedResult = encodeCache.get(cacheKey); - if (cachedResult !== undefined) { - return cachedResult; + let cacheKey = '' + if(db.useTokenizerCaching){ + cacheKey = getHash( + data, + db.aiModel, + db.customTokenizer, + db.currentPluginProvider, + db.googleClaudeTokenizing, + modelInfo, + pluginTokenizer + ); + const cachedResult = encodeCache.get(cacheKey); + if (cachedResult !== undefined) { + return cachedResult; + } } let result: number[] | Uint32Array | Int32Array; @@ -142,7 +145,10 @@ export async function encode(data:string):Promise<(number[]|Uint32Array|Int32Arr } else { result = await tikJS(data); } - encodeCache.set(cacheKey, result); + + if(db.useTokenizerCaching){ + encodeCache.set(cacheKey, result); + } return result; } From ed992407a7c9cd35219d58425e0a19325f5838f7 Mon Sep 17 00:00:00 2001 From: kwaroran Date: Wed, 16 Apr 2025 11:04:54 +0900 Subject: [PATCH 3/7] Fix import var --- src/ts/parser.svelte.ts | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/ts/parser.svelte.ts b/src/ts/parser.svelte.ts index 3ad61a3b..54824838 100644 --- a/src/ts/parser.svelte.ts +++ b/src/ts/parser.svelte.ts @@ -650,6 +650,11 @@ function decodeStyleRule(rule:CssAtRuleAST){ rule.rules[i] = decodeStyleRule(rule.rules[i]) } } + if(rule.type === 'import'){ + if(rule.import.startsWith('data:')){ + rule.import = 'data:,' + } + } return rule } From 040652d618b3785e491efd8a69f75756c676613a Mon Sep 17 00:00:00 2001 From: kwaroran Date: Wed, 16 Apr 2025 11:14:13 +0900 Subject: [PATCH 4/7] Add requestTokenParts structure to manage token parts in chat requests --- src/ts/process/index.svelte.ts | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/ts/process/index.svelte.ts b/src/ts/process/index.svelte.ts index 08e02c07..65bb4157 100644 --- a/src/ts/process/index.svelte.ts +++ b/src/ts/process/index.svelte.ts @@ -59,9 +59,15 @@ export interface OpenAIChatFull extends OpenAIChat{ } } +export interface requestTokenPart{ + name:string + tokens:number +} + export const doingChat = writable(false) export const chatProcessStage = writable(0) export const abortChat = writable(false) +export let requestTokenParts:{[key:string]:requestTokenPart[]} = {} export let previewFormated:OpenAIChat[] = [] export let previewBody:string = '' From 5120c141549eae15a473994c653cbaf539ae8aa3 Mon Sep 17 00:00:00 2001 From: kwaroran Date: Mon, 21 Apr 2025 02:02:52 +0900 Subject: [PATCH 5/7] Bump version to 159.0.0 in configuration and related files --- src-tauri/tauri.conf.json | 2 +- src/ts/storage/database.svelte.ts | 2 +- version.json | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src-tauri/tauri.conf.json b/src-tauri/tauri.conf.json index d4aa0621..3df199a8 100644 --- a/src-tauri/tauri.conf.json +++ b/src-tauri/tauri.conf.json @@ -29,7 +29,7 @@ }, "productName": "RisuAI", "mainBinaryName": "RisuAI", - "version": "158.2.1", + "version": "159.0.0", "identifier": "co.aiclient.risu", "plugins": { "updater": { diff --git a/src/ts/storage/database.svelte.ts b/src/ts/storage/database.svelte.ts index ec9a2591..1c0f6aad 100644 --- a/src/ts/storage/database.svelte.ts +++ b/src/ts/storage/database.svelte.ts @@ -12,7 +12,7 @@ import { defaultColorScheme, type ColorScheme } from '../gui/colorscheme'; import type { PromptItem, PromptSettings } from '../process/prompt'; import type { OobaChatCompletionRequestParams } from '../model/ooba'; -export let appVer = "158.2.1" +export let appVer = "159.0.0" export let webAppSubVer = '' diff --git a/version.json b/version.json index 3f89e9b8..4e3840fb 100644 --- a/version.json +++ b/version.json @@ -1 +1 @@ -{"version":"158.2.1"} \ No newline at end of file +{"version":"159.0.0"} \ No newline at end of file From 8ef2a991fc48f0aa4f77bb991813f0c45f1bf0a6 Mon Sep 17 00:00:00 2001 From: kwaroran <116663078+kwaroran@users.noreply.github.com> Date: Mon, 21 Apr 2025 05:24:46 +0900 Subject: [PATCH 6/7] Revert "Revert "refactor: regex caching"" --- src/ts/process/scripts.ts | 49 ++++++++++++++++++++++++--------------- 1 file changed, 30 insertions(+), 19 deletions(-) diff --git a/src/ts/process/scripts.ts b/src/ts/process/scripts.ts index 872779ee..3c95ab81 100644 --- a/src/ts/process/scripts.ts +++ b/src/ts/process/scripts.ts @@ -68,26 +68,29 @@ export async function importRegex(o?:customscript[]):Promise{ let bestMatchCache = new Map() let processScriptCache = new Map() -function cacheScript(scripts:customscript[], data:string, result:string, mode:ScriptMode){ - let hash = data + '|||' + mode + '|||' - for(const script of scripts){ - hash += `${script.in}|||${script.out}|||${script.flag}|||${script.ableFlag}|||${script.type}` +function generateScriptCacheKey(scripts: customscript[], data: string, mode: ScriptMode, chatID = -1, cbsConditions: CbsConditions = {}) { + let hash = data + '|||' + mode + '|||'; + for (const script of scripts) { + if(script.type !== mode){ + continue + } + hash += `${script.flag?.includes('') ? + risuChatParser(script.in, { chatID: chatID, cbsConditions }) : + script.in}|||${risuChatParser(script.out, { chatID: chatID, cbsConditions})}|||${script.flag ?? ''}|||${script.ableFlag ? 1 : 0}`; } + return hash; +} +function cacheScript(hash:string, result:string){ processScriptCache.set(hash, result) - if(processScriptCache.size > 500){ + if(processScriptCache.size > 1000){ processScriptCache.delete(processScriptCache.keys().next().value) } } -function getScriptCache(scripts:customscript[], data:string, mode:ScriptMode){ - let hash = data + '|||' + mode + '|||' - for(const script of scripts){ - hash += `${script.in}|||${script.out}|||${script.flag}|||${script.ableFlag}|||${script.type}` - } - +function getScriptCache(hash:string){ return processScriptCache.get(hash) } @@ -98,12 +101,7 @@ export function resetScriptCache(){ export async function processScriptFull(char:character|groupChat|simpleCharacterArgument, data:string, mode:ScriptMode, chatID = -1, cbsConditions:CbsConditions = {}){ let db = getDatabase() const originalData = data - const cached = getScriptCache((db.presetRegex ?? []).concat(char.customscript), originalData, mode) - if(cached){ - return {data: cached, emoChanged: false} - } let emoChanged = false - const scripts = (db.presetRegex ?? []).concat(char.customscript).concat(getModuleRegexScripts()) data = await runLuaEditTrigger(char, mode, data) if(mode === 'editdisplay'){ @@ -117,7 +115,7 @@ export async function processScriptFull(char:character|groupChat|simpleCharacter displayData: data }) - data = d.displayData ?? data + data = d?.displayData ?? data console.log('Trigger time', performance.now() - perf) } catch(e){ @@ -134,14 +132,26 @@ export async function processScriptFull(char:character|groupChat|simpleCharacter } } } + + data = risuChatParser(data, { chatID: chatID, cbsConditions }) + const scripts = (db.presetRegex ?? []).concat(char.customscript).concat(getModuleRegexScripts()) + const hash = generateScriptCacheKey(scripts, data, mode, chatID, cbsConditions) + const cached = getScriptCache(hash) + if(cached){ + return {data: cached, emoChanged: false} + } if(scripts.length === 0){ - cacheScript(scripts, originalData, data, mode) + cacheScript(hash, data) return {data, emoChanged} } function executeScript(pscript:pScript){ const script = pscript.script + if(script.in === ''){ + return + } + if(script.type === mode){ let outScript2 = script.out.replaceAll("$n", "\n") @@ -337,6 +347,7 @@ export async function processScriptFull(char:character|groupChat|simpleCharacter if(db.dynamicAssets && (char.type === 'simple' || char.type === 'character') && char.additionalAssets && char.additionalAssets.length > 0){ if(!db.dynamicAssetsEditDisplay && mode === 'editdisplay'){ + cacheScript(hash, data) return {data, emoChanged} } const assetNames = char.additionalAssets.map((v) => v[0]) @@ -372,7 +383,7 @@ export async function processScriptFull(char:character|groupChat|simpleCharacter } } - cacheScript(scripts, originalData, data, mode) + cacheScript(hash, data) return {data, emoChanged} } From 22a50904f88f0536863b66d1aba18eed8c812432 Mon Sep 17 00:00:00 2001 From: kwaroran Date: Mon, 21 Apr 2025 05:27:05 +0900 Subject: [PATCH 7/7] Revert #805 due to tokenizing error --- src/ts/tokenizer.ts | 162 ++++++++++++++++++-------------------------- 1 file changed, 65 insertions(+), 97 deletions(-) diff --git a/src/ts/tokenizer.ts b/src/ts/tokenizer.ts index b553ba27..e71528a4 100644 --- a/src/ts/tokenizer.ts +++ b/src/ts/tokenizer.ts @@ -6,27 +6,9 @@ import { supportsInlayImage } from "./process/files/inlays"; import { risuChatParser } from "./parser.svelte"; import { tokenizeGGUFModel } from "./process/models/local"; import { globalFetch } from "./globalApi.svelte"; -import { getModelInfo, LLMTokenizer, type LLMModel } from "./model/modellist"; +import { getModelInfo, LLMTokenizer } from "./model/modellist"; import { pluginV2 } from "./plugins/plugins"; import type { GemmaTokenizer } from "@huggingface/transformers"; -import { LRUMap } from 'mnemonist'; - -const MAX_CACHE_SIZE = 1500; - -const encodeCache = new LRUMap(MAX_CACHE_SIZE); - -function getHash( - data: string, - aiModel: string, - customTokenizer: string, - currentPluginProvider: string, - googleClaudeTokenizing: boolean, - modelInfo: LLMModel, - pluginTokenizer: string -): string { - const combined = `${data}::${aiModel}::${customTokenizer}::${currentPluginProvider}::${googleClaudeTokenizing ? '1' : '0'}::${modelInfo.tokenizer}::${pluginTokenizer}`; - return combined; -} export const tokenizerList = [ @@ -43,114 +25,100 @@ export const tokenizerList = [ ] as const export async function encode(data:string):Promise<(number[]|Uint32Array|Int32Array)>{ - const db = getDatabase(); - const modelInfo = getModelInfo(db.aiModel); - const pluginTokenizer = pluginV2.providerOptions.get(db.currentPluginProvider)?.tokenizer ?? "none"; - - let cacheKey = '' - if(db.useTokenizerCaching){ - cacheKey = getHash( - data, - db.aiModel, - db.customTokenizer, - db.currentPluginProvider, - db.googleClaudeTokenizing, - modelInfo, - pluginTokenizer - ); - const cachedResult = encodeCache.get(cacheKey); - if (cachedResult !== undefined) { - return cachedResult; - } - } - - let result: number[] | Uint32Array | Int32Array; - + let db = getDatabase() if(db.aiModel === 'openrouter' || db.aiModel === 'reverse_proxy'){ switch(db.customTokenizer){ case 'mistral': - result = await tokenizeWebTokenizers(data, 'mistral'); break; + return await tokenizeWebTokenizers(data, 'mistral') case 'llama': - result = await tokenizeWebTokenizers(data, 'llama'); break; + return await tokenizeWebTokenizers(data, 'llama') case 'novelai': - result = await tokenizeWebTokenizers(data, 'novelai'); break; + return await tokenizeWebTokenizers(data, 'novelai') case 'claude': - result = await tokenizeWebTokenizers(data, 'claude'); break; + return await tokenizeWebTokenizers(data, 'claude') case 'novellist': - result = await tokenizeWebTokenizers(data, 'novellist'); break; + return await tokenizeWebTokenizers(data, 'novellist') case 'llama3': - result = await tokenizeWebTokenizers(data, 'llama'); break; + return await tokenizeWebTokenizers(data, 'llama') case 'gemma': - result = await gemmaTokenize(data); break; + return await gemmaTokenize(data) case 'cohere': - result = await tokenizeWebTokenizers(data, 'cohere'); break; + return await tokenizeWebTokenizers(data, 'cohere') case 'deepseek': - result = await tokenizeWebTokenizers(data, 'DeepSeek'); break; + return await tokenizeWebTokenizers(data, 'DeepSeek') default: - result = await tikJS(data, 'o200k_base'); break; + return await tikJS(data, 'o200k_base') } } - - if(db.aiModel === 'custom' && pluginTokenizer){ - switch(pluginTokenizer){ + + const modelInfo = getModelInfo(db.aiModel) + + if(db.aiModel === 'custom' && pluginV2.providerOptions.get(db.currentPluginProvider)?.tokenizer){ + const tokenizer = pluginV2.providerOptions.get(db.currentPluginProvider)?.tokenizer + switch(tokenizer){ case 'mistral': - result = await tokenizeWebTokenizers(data, 'mistral'); break; + return await tokenizeWebTokenizers(data, 'mistral') case 'llama': - result = await tokenizeWebTokenizers(data, 'llama'); break; + return await tokenizeWebTokenizers(data, 'llama') case 'novelai': - result = await tokenizeWebTokenizers(data, 'novelai'); break; + return await tokenizeWebTokenizers(data, 'novelai') case 'claude': - result = await tokenizeWebTokenizers(data, 'claude'); break; + return await tokenizeWebTokenizers(data, 'claude') case 'novellist': - result = await tokenizeWebTokenizers(data, 'novellist'); break; + return await tokenizeWebTokenizers(data, 'novellist') case 'llama3': - result = await tokenizeWebTokenizers(data, 'llama'); break; + return await tokenizeWebTokenizers(data, 'llama') case 'gemma': - result = await gemmaTokenize(data); break; + return await gemmaTokenize(data) case 'cohere': - result = await tokenizeWebTokenizers(data, 'cohere'); break; + return await tokenizeWebTokenizers(data, 'cohere') case 'o200k_base': - result = await tikJS(data, 'o200k_base'); break; + return await tikJS(data, 'o200k_base') case 'cl100k_base': - result = await tikJS(data, 'cl100k_base'); break; + return await tikJS(data, 'cl100k_base') case 'custom': - result = await pluginV2.providerOptions.get(db.currentPluginProvider)?.tokenizerFunc?.(data) ?? [0]; break; + return await pluginV2.providerOptions.get(db.currentPluginProvider)?.tokenizerFunc?.(data) ?? [0] default: - result = await tikJS(data, 'o200k_base'); break; + return await tikJS(data, 'o200k_base') } - } - + } + if(modelInfo.tokenizer === LLMTokenizer.NovelList){ - result = await tokenizeWebTokenizers(data, 'novellist'); - } else if(modelInfo.tokenizer === LLMTokenizer.Claude){ - result = await tokenizeWebTokenizers(data, 'claude'); - } else if(modelInfo.tokenizer === LLMTokenizer.NovelAI){ - result = await tokenizeWebTokenizers(data, 'novelai'); - } else if(modelInfo.tokenizer === LLMTokenizer.Mistral){ - result = await tokenizeWebTokenizers(data, 'mistral'); - } else if(modelInfo.tokenizer === LLMTokenizer.Llama){ - result = await tokenizeWebTokenizers(data, 'llama'); - } else if(modelInfo.tokenizer === LLMTokenizer.Local){ - result = await tokenizeGGUFModel(data); - } else if(modelInfo.tokenizer === LLMTokenizer.tiktokenO200Base){ - result = await tikJS(data, 'o200k_base'); - } else if(modelInfo.tokenizer === LLMTokenizer.GoogleCloud && db.googleClaudeTokenizing){ - result = await tokenizeGoogleCloud(data); - } else if(modelInfo.tokenizer === LLMTokenizer.Gemma || modelInfo.tokenizer === LLMTokenizer.GoogleCloud){ - result = await gemmaTokenize(data); - } else if(modelInfo.tokenizer === LLMTokenizer.DeepSeek){ - result = await tokenizeWebTokenizers(data, 'DeepSeek'); - } else if(modelInfo.tokenizer === LLMTokenizer.Cohere){ - result = await tokenizeWebTokenizers(data, 'cohere'); - } else { - result = await tikJS(data); + const nv= await tokenizeWebTokenizers(data, 'novellist') + return nv + } + if(modelInfo.tokenizer === LLMTokenizer.Claude){ + return await tokenizeWebTokenizers(data, 'claude') + } + if(modelInfo.tokenizer === LLMTokenizer.NovelAI){ + return await tokenizeWebTokenizers(data, 'novelai') + } + if(modelInfo.tokenizer === LLMTokenizer.Mistral){ + return await tokenizeWebTokenizers(data, 'mistral') + } + if(modelInfo.tokenizer === LLMTokenizer.Llama){ + return await tokenizeWebTokenizers(data, 'llama') + } + if(modelInfo.tokenizer === LLMTokenizer.Local){ + return await tokenizeGGUFModel(data) + } + if(modelInfo.tokenizer === LLMTokenizer.tiktokenO200Base){ + return await tikJS(data, 'o200k_base') + } + if(modelInfo.tokenizer === LLMTokenizer.GoogleCloud && db.googleClaudeTokenizing){ + return await tokenizeGoogleCloud(data) + } + if(modelInfo.tokenizer === LLMTokenizer.Gemma || modelInfo.tokenizer === LLMTokenizer.GoogleCloud){ + return await gemmaTokenize(data) + } + if(modelInfo.tokenizer === LLMTokenizer.DeepSeek){ + return await tokenizeWebTokenizers(data, 'DeepSeek') + } + if(modelInfo.tokenizer === LLMTokenizer.Cohere){ + return await tokenizeWebTokenizers(data, 'cohere') } - if(db.useTokenizerCaching){ - encodeCache.set(cacheKey, result); - } - - return result; + return await tikJS(data) } type tokenizerType = 'novellist'|'claude'|'novelai'|'llama'|'mistral'|'llama3'|'gemma'|'cohere'|'googleCloud'|'DeepSeek'