Refactor: Caching tokenize result (#805)

# PR Checklist
- [ ] Have you checked if it works normally in all models? *Ignore this
if it doesn't use models.*
- [ ] Have you checked if it works normally in all web, local, and node
hosted versions? If it doesn't, have you blocked it in those versions?
- [ ] Have you added type definitions?

# Improved performance

![image](https://github.com/user-attachments/assets/c7fa773f-afbb-4eb6-ac24-35c8451be33d)
The picture above was taken when I sent a chat.
As you can see, the encode function occupies **0.68 seconds** out of 1.8
seconds.
This is **36.5%** of sending time

![image](https://github.com/user-attachments/assets/cd2c4bde-556e-4151-82a9-030d27f2872a)
This is a picture taken after adding a caching function.
encode time Reduced from **0.68 seconds to 0.12 seconds, an improvement
of 27.2%**

Since configuration values are also used as key values in the hash
function,
it is guaranteed that **different results will be produced when the
configuration changes**.
I tested this myself by changing the bot settings

# Implementation
I added the `mnemonist` library to implement this. At first, I used
lru-cache, but that library recommended using this one if performance is
important.
It actually performed better too.

# Integrity Check

![image](https://github.com/user-attachments/assets/ae1272a8-ea95-47ef-85f5-c22dd9f34be7)
Lastly,
the encoding result calculated without caching when sending the first
chat,
the newly calculated value when sending the second chat, and
the result cached when sending the first chat were all the same.
This commit is contained in:
kwaroran
2025-04-14 14:10:24 +09:00
committed by GitHub
2 changed files with 92 additions and 65 deletions

View File

@@ -66,6 +66,7 @@
"lucide-svelte": "^0.292.0", "lucide-svelte": "^0.292.0",
"markdown-it": "^14.1.0", "markdown-it": "^14.1.0",
"ml-distance": "^4.0.1", "ml-distance": "^4.0.1",
"mnemonist": "^0.40.3",
"mobile-drag-drop": "3.0.0-rc.0", "mobile-drag-drop": "3.0.0-rc.0",
"msgpackr": "1.10.1", "msgpackr": "1.10.1",
"node-fetch": "2", "node-fetch": "2",

View File

@@ -6,9 +6,27 @@ import { supportsInlayImage } from "./process/files/inlays";
import { risuChatParser } from "./parser.svelte"; import { risuChatParser } from "./parser.svelte";
import { tokenizeGGUFModel } from "./process/models/local"; import { tokenizeGGUFModel } from "./process/models/local";
import { globalFetch } from "./globalApi.svelte"; import { globalFetch } from "./globalApi.svelte";
import { getModelInfo, LLMTokenizer } from "./model/modellist"; import { getModelInfo, LLMTokenizer, type LLMModel } from "./model/modellist";
import { pluginV2 } from "./plugins/plugins"; import { pluginV2 } from "./plugins/plugins";
import type { GemmaTokenizer } from "@huggingface/transformers"; import type { GemmaTokenizer } from "@huggingface/transformers";
import { LRUMap } from 'mnemonist';
const MAX_CACHE_SIZE = 1500;
const encodeCache = new LRUMap<string, number[] | Uint32Array | Int32Array>(MAX_CACHE_SIZE);
function getHash(
data: string,
aiModel: string,
customTokenizer: string,
currentPluginProvider: string,
googleClaudeTokenizing: boolean,
modelInfo: LLMModel,
pluginTokenizer: string
): string {
const combined = `${data}::${aiModel}::${customTokenizer}::${currentPluginProvider}::${googleClaudeTokenizing ? '1' : '0'}::${modelInfo.tokenizer}::${pluginTokenizer}`;
return combined;
}
export const tokenizerList = [ export const tokenizerList = [
@@ -25,100 +43,108 @@ export const tokenizerList = [
] as const ] as const
export async function encode(data:string):Promise<(number[]|Uint32Array|Int32Array)>{ export async function encode(data:string):Promise<(number[]|Uint32Array|Int32Array)>{
let db = getDatabase() const db = getDatabase();
const modelInfo = getModelInfo(db.aiModel);
const pluginTokenizer = pluginV2.providerOptions.get(db.currentPluginProvider)?.tokenizer ?? "none";
const cacheKey = getHash(
data,
db.aiModel,
db.customTokenizer,
db.currentPluginProvider,
db.googleClaudeTokenizing,
modelInfo,
pluginTokenizer
);
const cachedResult = encodeCache.get(cacheKey);
if (cachedResult !== undefined) {
return cachedResult;
}
let result: number[] | Uint32Array | Int32Array;
if(db.aiModel === 'openrouter' || db.aiModel === 'reverse_proxy'){ if(db.aiModel === 'openrouter' || db.aiModel === 'reverse_proxy'){
switch(db.customTokenizer){ switch(db.customTokenizer){
case 'mistral': case 'mistral':
return await tokenizeWebTokenizers(data, 'mistral') result = await tokenizeWebTokenizers(data, 'mistral'); break;
case 'llama': case 'llama':
return await tokenizeWebTokenizers(data, 'llama') result = await tokenizeWebTokenizers(data, 'llama'); break;
case 'novelai': case 'novelai':
return await tokenizeWebTokenizers(data, 'novelai') result = await tokenizeWebTokenizers(data, 'novelai'); break;
case 'claude': case 'claude':
return await tokenizeWebTokenizers(data, 'claude') result = await tokenizeWebTokenizers(data, 'claude'); break;
case 'novellist': case 'novellist':
return await tokenizeWebTokenizers(data, 'novellist') result = await tokenizeWebTokenizers(data, 'novellist'); break;
case 'llama3': case 'llama3':
return await tokenizeWebTokenizers(data, 'llama') result = await tokenizeWebTokenizers(data, 'llama'); break;
case 'gemma': case 'gemma':
return await gemmaTokenize(data) result = await gemmaTokenize(data); break;
case 'cohere': case 'cohere':
return await tokenizeWebTokenizers(data, 'cohere') result = await tokenizeWebTokenizers(data, 'cohere'); break;
case 'deepseek': case 'deepseek':
return await tokenizeWebTokenizers(data, 'DeepSeek') result = await tokenizeWebTokenizers(data, 'DeepSeek'); break;
default: default:
return await tikJS(data, 'o200k_base') result = await tikJS(data, 'o200k_base'); break;
} }
} }
const modelInfo = getModelInfo(db.aiModel) if(db.aiModel === 'custom' && pluginTokenizer){
switch(pluginTokenizer){
if(db.aiModel === 'custom' && pluginV2.providerOptions.get(db.currentPluginProvider)?.tokenizer){
const tokenizer = pluginV2.providerOptions.get(db.currentPluginProvider)?.tokenizer
switch(tokenizer){
case 'mistral': case 'mistral':
return await tokenizeWebTokenizers(data, 'mistral') result = await tokenizeWebTokenizers(data, 'mistral'); break;
case 'llama': case 'llama':
return await tokenizeWebTokenizers(data, 'llama') result = await tokenizeWebTokenizers(data, 'llama'); break;
case 'novelai': case 'novelai':
return await tokenizeWebTokenizers(data, 'novelai') result = await tokenizeWebTokenizers(data, 'novelai'); break;
case 'claude': case 'claude':
return await tokenizeWebTokenizers(data, 'claude') result = await tokenizeWebTokenizers(data, 'claude'); break;
case 'novellist': case 'novellist':
return await tokenizeWebTokenizers(data, 'novellist') result = await tokenizeWebTokenizers(data, 'novellist'); break;
case 'llama3': case 'llama3':
return await tokenizeWebTokenizers(data, 'llama') result = await tokenizeWebTokenizers(data, 'llama'); break;
case 'gemma': case 'gemma':
return await gemmaTokenize(data) result = await gemmaTokenize(data); break;
case 'cohere': case 'cohere':
return await tokenizeWebTokenizers(data, 'cohere') result = await tokenizeWebTokenizers(data, 'cohere'); break;
case 'o200k_base': case 'o200k_base':
return await tikJS(data, 'o200k_base') result = await tikJS(data, 'o200k_base'); break;
case 'cl100k_base': case 'cl100k_base':
return await tikJS(data, 'cl100k_base') result = await tikJS(data, 'cl100k_base'); break;
case 'custom': case 'custom':
return await pluginV2.providerOptions.get(db.currentPluginProvider)?.tokenizerFunc?.(data) ?? [0] result = await pluginV2.providerOptions.get(db.currentPluginProvider)?.tokenizerFunc?.(data) ?? [0]; break;
default: default:
return await tikJS(data, 'o200k_base') result = await tikJS(data, 'o200k_base'); break;
} }
} }
if(modelInfo.tokenizer === LLMTokenizer.NovelList){ if(modelInfo.tokenizer === LLMTokenizer.NovelList){
const nv= await tokenizeWebTokenizers(data, 'novellist') result = await tokenizeWebTokenizers(data, 'novellist');
return nv } else if(modelInfo.tokenizer === LLMTokenizer.Claude){
} result = await tokenizeWebTokenizers(data, 'claude');
if(modelInfo.tokenizer === LLMTokenizer.Claude){ } else if(modelInfo.tokenizer === LLMTokenizer.NovelAI){
return await tokenizeWebTokenizers(data, 'claude') result = await tokenizeWebTokenizers(data, 'novelai');
} } else if(modelInfo.tokenizer === LLMTokenizer.Mistral){
if(modelInfo.tokenizer === LLMTokenizer.NovelAI){ result = await tokenizeWebTokenizers(data, 'mistral');
return await tokenizeWebTokenizers(data, 'novelai') } else if(modelInfo.tokenizer === LLMTokenizer.Llama){
} result = await tokenizeWebTokenizers(data, 'llama');
if(modelInfo.tokenizer === LLMTokenizer.Mistral){ } else if(modelInfo.tokenizer === LLMTokenizer.Local){
return await tokenizeWebTokenizers(data, 'mistral') result = await tokenizeGGUFModel(data);
} } else if(modelInfo.tokenizer === LLMTokenizer.tiktokenO200Base){
if(modelInfo.tokenizer === LLMTokenizer.Llama){ result = await tikJS(data, 'o200k_base');
return await tokenizeWebTokenizers(data, 'llama') } else if(modelInfo.tokenizer === LLMTokenizer.GoogleCloud && db.googleClaudeTokenizing){
} result = await tokenizeGoogleCloud(data);
if(modelInfo.tokenizer === LLMTokenizer.Local){ } else if(modelInfo.tokenizer === LLMTokenizer.Gemma || modelInfo.tokenizer === LLMTokenizer.GoogleCloud){
return await tokenizeGGUFModel(data) result = await gemmaTokenize(data);
} } else if(modelInfo.tokenizer === LLMTokenizer.DeepSeek){
if(modelInfo.tokenizer === LLMTokenizer.tiktokenO200Base){ result = await tokenizeWebTokenizers(data, 'DeepSeek');
return await tikJS(data, 'o200k_base') } else if(modelInfo.tokenizer === LLMTokenizer.Cohere){
} result = await tokenizeWebTokenizers(data, 'cohere');
if(modelInfo.tokenizer === LLMTokenizer.GoogleCloud && db.googleClaudeTokenizing){ } else {
return await tokenizeGoogleCloud(data) result = await tikJS(data);
}
if(modelInfo.tokenizer === LLMTokenizer.Gemma || modelInfo.tokenizer === LLMTokenizer.GoogleCloud){
return await gemmaTokenize(data)
}
if(modelInfo.tokenizer === LLMTokenizer.DeepSeek){
return await tokenizeWebTokenizers(data, 'DeepSeek')
}
if(modelInfo.tokenizer === LLMTokenizer.Cohere){
return await tokenizeWebTokenizers(data, 'cohere')
} }
encodeCache.set(cacheKey, result);
return await tikJS(data) return result;
} }
type tokenizerType = 'novellist'|'claude'|'novelai'|'llama'|'mistral'|'llama3'|'gemma'|'cohere'|'googleCloud'|'DeepSeek' type tokenizerType = 'novellist'|'claude'|'novelai'|'llama'|'mistral'|'llama3'|'gemma'|'cohere'|'googleCloud'|'DeepSeek'