From 36e0935bb04c1c454729b8c69406009a22ca9c51 Mon Sep 17 00:00:00 2001 From: kwaroran Date: Wed, 16 Apr 2025 10:57:48 +0900 Subject: [PATCH] Make tokenizer caching to an option --- src/lang/en.ts | 3 +- src/lib/Setting/Pages/AdvancedSettings.svelte | 4 +++ src/ts/storage/database.svelte.ts | 1 + src/ts/tokenizer.ts | 32 +++++++++++-------- 4 files changed, 26 insertions(+), 14 deletions(-) diff --git a/src/lang/en.ts b/src/lang/en.ts index c1cb81df..ead4740f 100644 --- a/src/lang/en.ts +++ b/src/lang/en.ts @@ -1115,5 +1115,6 @@ export const languageEnglish = { fallbackWhenBlankResponse: "Fallback When Blank Response", doNotChangeFallbackModels: "Do Not Change Fallback Models on Preset Change", customModels: "Custom Models", - igpPrompt: "IGP Prompt" + igpPrompt: "IGP Prompt", + useTokenizerCaching: "Tokenizer Caching" } diff --git a/src/lib/Setting/Pages/AdvancedSettings.svelte b/src/lib/Setting/Pages/AdvancedSettings.svelte index 805cd9dc..49a50881 100644 --- a/src/lib/Setting/Pages/AdvancedSettings.svelte +++ b/src/lib/Setting/Pages/AdvancedSettings.svelte @@ -207,6 +207,10 @@ +
+ + +
{#if DBState.db.useExperimental}
diff --git a/src/ts/storage/database.svelte.ts b/src/ts/storage/database.svelte.ts index 12618efe..ec9a2591 100644 --- a/src/ts/storage/database.svelte.ts +++ b/src/ts/storage/database.svelte.ts @@ -1022,6 +1022,7 @@ export interface Database{ flags: LLMFlags[] }[] igpPrompt:string + useTokenizerCaching:boolean } interface SeparateParameters{ diff --git a/src/ts/tokenizer.ts b/src/ts/tokenizer.ts index 3211c878..b553ba27 100644 --- a/src/ts/tokenizer.ts +++ b/src/ts/tokenizer.ts @@ -47,18 +47,21 @@ export async function encode(data:string):Promise<(number[]|Uint32Array|Int32Arr const modelInfo = getModelInfo(db.aiModel); const pluginTokenizer = pluginV2.providerOptions.get(db.currentPluginProvider)?.tokenizer ?? "none"; - const cacheKey = getHash( - data, - db.aiModel, - db.customTokenizer, - db.currentPluginProvider, - db.googleClaudeTokenizing, - modelInfo, - pluginTokenizer - ); - const cachedResult = encodeCache.get(cacheKey); - if (cachedResult !== undefined) { - return cachedResult; + let cacheKey = '' + if(db.useTokenizerCaching){ + cacheKey = getHash( + data, + db.aiModel, + db.customTokenizer, + db.currentPluginProvider, + db.googleClaudeTokenizing, + modelInfo, + pluginTokenizer + ); + const cachedResult = encodeCache.get(cacheKey); + if (cachedResult !== undefined) { + return cachedResult; + } } let result: number[] | Uint32Array | Int32Array; @@ -142,7 +145,10 @@ export async function encode(data:string):Promise<(number[]|Uint32Array|Int32Arr } else { result = await tikJS(data); } - encodeCache.set(cacheKey, result); + + if(db.useTokenizerCaching){ + encodeCache.set(cacheKey, result); + } return result; }