Refactor: Caching tokenize result (#805)
# PR Checklist - [ ] Have you checked if it works normally in all models? *Ignore this if it doesn't use models.* - [ ] Have you checked if it works normally in all web, local, and node hosted versions? If it doesn't, have you blocked it in those versions? - [ ] Have you added type definitions? # Improved performance  The picture above was taken when I sent a chat. As you can see, the encode function occupies **0.68 seconds** out of 1.8 seconds. This is **36.5%** of sending time  This is a picture taken after adding a caching function. encode time Reduced from **0.68 seconds to 0.12 seconds, an improvement of 27.2%** Since configuration values are also used as key values in the hash function, it is guaranteed that **different results will be produced when the configuration changes**. I tested this myself by changing the bot settings # Implementation I added the `mnemonist` library to implement this. At first, I used lru-cache, but that library recommended using this one if performance is important. It actually performed better too. # Integrity Check  Lastly, the encoding result calculated without caching when sending the first chat, the newly calculated value when sending the second chat, and the result cached when sending the first chat were all the same.
This commit is contained in:
@@ -66,6 +66,7 @@
|
|||||||
"lucide-svelte": "^0.292.0",
|
"lucide-svelte": "^0.292.0",
|
||||||
"markdown-it": "^14.1.0",
|
"markdown-it": "^14.1.0",
|
||||||
"ml-distance": "^4.0.1",
|
"ml-distance": "^4.0.1",
|
||||||
|
"mnemonist": "^0.40.3",
|
||||||
"mobile-drag-drop": "3.0.0-rc.0",
|
"mobile-drag-drop": "3.0.0-rc.0",
|
||||||
"msgpackr": "1.10.1",
|
"msgpackr": "1.10.1",
|
||||||
"node-fetch": "2",
|
"node-fetch": "2",
|
||||||
|
|||||||
@@ -6,9 +6,27 @@ import { supportsInlayImage } from "./process/files/inlays";
|
|||||||
import { risuChatParser } from "./parser.svelte";
|
import { risuChatParser } from "./parser.svelte";
|
||||||
import { tokenizeGGUFModel } from "./process/models/local";
|
import { tokenizeGGUFModel } from "./process/models/local";
|
||||||
import { globalFetch } from "./globalApi.svelte";
|
import { globalFetch } from "./globalApi.svelte";
|
||||||
import { getModelInfo, LLMTokenizer } from "./model/modellist";
|
import { getModelInfo, LLMTokenizer, type LLMModel } from "./model/modellist";
|
||||||
import { pluginV2 } from "./plugins/plugins";
|
import { pluginV2 } from "./plugins/plugins";
|
||||||
import type { GemmaTokenizer } from "@huggingface/transformers";
|
import type { GemmaTokenizer } from "@huggingface/transformers";
|
||||||
|
import { LRUMap } from 'mnemonist';
|
||||||
|
|
||||||
|
const MAX_CACHE_SIZE = 1500;
|
||||||
|
|
||||||
|
const encodeCache = new LRUMap<string, number[] | Uint32Array | Int32Array>(MAX_CACHE_SIZE);
|
||||||
|
|
||||||
|
function getHash(
|
||||||
|
data: string,
|
||||||
|
aiModel: string,
|
||||||
|
customTokenizer: string,
|
||||||
|
currentPluginProvider: string,
|
||||||
|
googleClaudeTokenizing: boolean,
|
||||||
|
modelInfo: LLMModel,
|
||||||
|
pluginTokenizer: string
|
||||||
|
): string {
|
||||||
|
const combined = `${data}::${aiModel}::${customTokenizer}::${currentPluginProvider}::${googleClaudeTokenizing ? '1' : '0'}::${modelInfo.tokenizer}::${pluginTokenizer}`;
|
||||||
|
return combined;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
export const tokenizerList = [
|
export const tokenizerList = [
|
||||||
@@ -25,100 +43,108 @@ export const tokenizerList = [
|
|||||||
] as const
|
] as const
|
||||||
|
|
||||||
export async function encode(data:string):Promise<(number[]|Uint32Array|Int32Array)>{
|
export async function encode(data:string):Promise<(number[]|Uint32Array|Int32Array)>{
|
||||||
let db = getDatabase()
|
const db = getDatabase();
|
||||||
|
const modelInfo = getModelInfo(db.aiModel);
|
||||||
|
const pluginTokenizer = pluginV2.providerOptions.get(db.currentPluginProvider)?.tokenizer ?? "none";
|
||||||
|
|
||||||
|
const cacheKey = getHash(
|
||||||
|
data,
|
||||||
|
db.aiModel,
|
||||||
|
db.customTokenizer,
|
||||||
|
db.currentPluginProvider,
|
||||||
|
db.googleClaudeTokenizing,
|
||||||
|
modelInfo,
|
||||||
|
pluginTokenizer
|
||||||
|
);
|
||||||
|
const cachedResult = encodeCache.get(cacheKey);
|
||||||
|
if (cachedResult !== undefined) {
|
||||||
|
return cachedResult;
|
||||||
|
}
|
||||||
|
|
||||||
|
let result: number[] | Uint32Array | Int32Array;
|
||||||
|
|
||||||
if(db.aiModel === 'openrouter' || db.aiModel === 'reverse_proxy'){
|
if(db.aiModel === 'openrouter' || db.aiModel === 'reverse_proxy'){
|
||||||
switch(db.customTokenizer){
|
switch(db.customTokenizer){
|
||||||
case 'mistral':
|
case 'mistral':
|
||||||
return await tokenizeWebTokenizers(data, 'mistral')
|
result = await tokenizeWebTokenizers(data, 'mistral'); break;
|
||||||
case 'llama':
|
case 'llama':
|
||||||
return await tokenizeWebTokenizers(data, 'llama')
|
result = await tokenizeWebTokenizers(data, 'llama'); break;
|
||||||
case 'novelai':
|
case 'novelai':
|
||||||
return await tokenizeWebTokenizers(data, 'novelai')
|
result = await tokenizeWebTokenizers(data, 'novelai'); break;
|
||||||
case 'claude':
|
case 'claude':
|
||||||
return await tokenizeWebTokenizers(data, 'claude')
|
result = await tokenizeWebTokenizers(data, 'claude'); break;
|
||||||
case 'novellist':
|
case 'novellist':
|
||||||
return await tokenizeWebTokenizers(data, 'novellist')
|
result = await tokenizeWebTokenizers(data, 'novellist'); break;
|
||||||
case 'llama3':
|
case 'llama3':
|
||||||
return await tokenizeWebTokenizers(data, 'llama')
|
result = await tokenizeWebTokenizers(data, 'llama'); break;
|
||||||
case 'gemma':
|
case 'gemma':
|
||||||
return await gemmaTokenize(data)
|
result = await gemmaTokenize(data); break;
|
||||||
case 'cohere':
|
case 'cohere':
|
||||||
return await tokenizeWebTokenizers(data, 'cohere')
|
result = await tokenizeWebTokenizers(data, 'cohere'); break;
|
||||||
case 'deepseek':
|
case 'deepseek':
|
||||||
return await tokenizeWebTokenizers(data, 'DeepSeek')
|
result = await tokenizeWebTokenizers(data, 'DeepSeek'); break;
|
||||||
default:
|
default:
|
||||||
return await tikJS(data, 'o200k_base')
|
result = await tikJS(data, 'o200k_base'); break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
const modelInfo = getModelInfo(db.aiModel)
|
if(db.aiModel === 'custom' && pluginTokenizer){
|
||||||
|
switch(pluginTokenizer){
|
||||||
if(db.aiModel === 'custom' && pluginV2.providerOptions.get(db.currentPluginProvider)?.tokenizer){
|
|
||||||
const tokenizer = pluginV2.providerOptions.get(db.currentPluginProvider)?.tokenizer
|
|
||||||
switch(tokenizer){
|
|
||||||
case 'mistral':
|
case 'mistral':
|
||||||
return await tokenizeWebTokenizers(data, 'mistral')
|
result = await tokenizeWebTokenizers(data, 'mistral'); break;
|
||||||
case 'llama':
|
case 'llama':
|
||||||
return await tokenizeWebTokenizers(data, 'llama')
|
result = await tokenizeWebTokenizers(data, 'llama'); break;
|
||||||
case 'novelai':
|
case 'novelai':
|
||||||
return await tokenizeWebTokenizers(data, 'novelai')
|
result = await tokenizeWebTokenizers(data, 'novelai'); break;
|
||||||
case 'claude':
|
case 'claude':
|
||||||
return await tokenizeWebTokenizers(data, 'claude')
|
result = await tokenizeWebTokenizers(data, 'claude'); break;
|
||||||
case 'novellist':
|
case 'novellist':
|
||||||
return await tokenizeWebTokenizers(data, 'novellist')
|
result = await tokenizeWebTokenizers(data, 'novellist'); break;
|
||||||
case 'llama3':
|
case 'llama3':
|
||||||
return await tokenizeWebTokenizers(data, 'llama')
|
result = await tokenizeWebTokenizers(data, 'llama'); break;
|
||||||
case 'gemma':
|
case 'gemma':
|
||||||
return await gemmaTokenize(data)
|
result = await gemmaTokenize(data); break;
|
||||||
case 'cohere':
|
case 'cohere':
|
||||||
return await tokenizeWebTokenizers(data, 'cohere')
|
result = await tokenizeWebTokenizers(data, 'cohere'); break;
|
||||||
case 'o200k_base':
|
case 'o200k_base':
|
||||||
return await tikJS(data, 'o200k_base')
|
result = await tikJS(data, 'o200k_base'); break;
|
||||||
case 'cl100k_base':
|
case 'cl100k_base':
|
||||||
return await tikJS(data, 'cl100k_base')
|
result = await tikJS(data, 'cl100k_base'); break;
|
||||||
case 'custom':
|
case 'custom':
|
||||||
return await pluginV2.providerOptions.get(db.currentPluginProvider)?.tokenizerFunc?.(data) ?? [0]
|
result = await pluginV2.providerOptions.get(db.currentPluginProvider)?.tokenizerFunc?.(data) ?? [0]; break;
|
||||||
default:
|
default:
|
||||||
return await tikJS(data, 'o200k_base')
|
result = await tikJS(data, 'o200k_base'); break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if(modelInfo.tokenizer === LLMTokenizer.NovelList){
|
if(modelInfo.tokenizer === LLMTokenizer.NovelList){
|
||||||
const nv= await tokenizeWebTokenizers(data, 'novellist')
|
result = await tokenizeWebTokenizers(data, 'novellist');
|
||||||
return nv
|
} else if(modelInfo.tokenizer === LLMTokenizer.Claude){
|
||||||
}
|
result = await tokenizeWebTokenizers(data, 'claude');
|
||||||
if(modelInfo.tokenizer === LLMTokenizer.Claude){
|
} else if(modelInfo.tokenizer === LLMTokenizer.NovelAI){
|
||||||
return await tokenizeWebTokenizers(data, 'claude')
|
result = await tokenizeWebTokenizers(data, 'novelai');
|
||||||
}
|
} else if(modelInfo.tokenizer === LLMTokenizer.Mistral){
|
||||||
if(modelInfo.tokenizer === LLMTokenizer.NovelAI){
|
result = await tokenizeWebTokenizers(data, 'mistral');
|
||||||
return await tokenizeWebTokenizers(data, 'novelai')
|
} else if(modelInfo.tokenizer === LLMTokenizer.Llama){
|
||||||
}
|
result = await tokenizeWebTokenizers(data, 'llama');
|
||||||
if(modelInfo.tokenizer === LLMTokenizer.Mistral){
|
} else if(modelInfo.tokenizer === LLMTokenizer.Local){
|
||||||
return await tokenizeWebTokenizers(data, 'mistral')
|
result = await tokenizeGGUFModel(data);
|
||||||
}
|
} else if(modelInfo.tokenizer === LLMTokenizer.tiktokenO200Base){
|
||||||
if(modelInfo.tokenizer === LLMTokenizer.Llama){
|
result = await tikJS(data, 'o200k_base');
|
||||||
return await tokenizeWebTokenizers(data, 'llama')
|
} else if(modelInfo.tokenizer === LLMTokenizer.GoogleCloud && db.googleClaudeTokenizing){
|
||||||
}
|
result = await tokenizeGoogleCloud(data);
|
||||||
if(modelInfo.tokenizer === LLMTokenizer.Local){
|
} else if(modelInfo.tokenizer === LLMTokenizer.Gemma || modelInfo.tokenizer === LLMTokenizer.GoogleCloud){
|
||||||
return await tokenizeGGUFModel(data)
|
result = await gemmaTokenize(data);
|
||||||
}
|
} else if(modelInfo.tokenizer === LLMTokenizer.DeepSeek){
|
||||||
if(modelInfo.tokenizer === LLMTokenizer.tiktokenO200Base){
|
result = await tokenizeWebTokenizers(data, 'DeepSeek');
|
||||||
return await tikJS(data, 'o200k_base')
|
} else if(modelInfo.tokenizer === LLMTokenizer.Cohere){
|
||||||
}
|
result = await tokenizeWebTokenizers(data, 'cohere');
|
||||||
if(modelInfo.tokenizer === LLMTokenizer.GoogleCloud && db.googleClaudeTokenizing){
|
} else {
|
||||||
return await tokenizeGoogleCloud(data)
|
result = await tikJS(data);
|
||||||
}
|
|
||||||
if(modelInfo.tokenizer === LLMTokenizer.Gemma || modelInfo.tokenizer === LLMTokenizer.GoogleCloud){
|
|
||||||
return await gemmaTokenize(data)
|
|
||||||
}
|
|
||||||
if(modelInfo.tokenizer === LLMTokenizer.DeepSeek){
|
|
||||||
return await tokenizeWebTokenizers(data, 'DeepSeek')
|
|
||||||
}
|
|
||||||
if(modelInfo.tokenizer === LLMTokenizer.Cohere){
|
|
||||||
return await tokenizeWebTokenizers(data, 'cohere')
|
|
||||||
}
|
}
|
||||||
|
encodeCache.set(cacheKey, result);
|
||||||
|
|
||||||
return await tikJS(data)
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
type tokenizerType = 'novellist'|'claude'|'novelai'|'llama'|'mistral'|'llama3'|'gemma'|'cohere'|'googleCloud'|'DeepSeek'
|
type tokenizerType = 'novellist'|'claude'|'novelai'|'llama'|'mistral'|'llama3'|'gemma'|'cohere'|'googleCloud'|'DeepSeek'
|
||||||
|
|||||||
Reference in New Issue
Block a user