Refactor: caching tokenize result

This commit is contained in:
sub-hub
2025-04-02 22:01:01 +09:00
parent 9e00c9b17a
commit c553478a78
2 changed files with 94 additions and 65 deletions

View File

@@ -66,6 +66,7 @@
"lucide-svelte": "^0.292.0", "lucide-svelte": "^0.292.0",
"markdown-it": "^14.1.0", "markdown-it": "^14.1.0",
"ml-distance": "^4.0.1", "ml-distance": "^4.0.1",
"mnemonist": "^0.40.3",
"mobile-drag-drop": "3.0.0-rc.0", "mobile-drag-drop": "3.0.0-rc.0",
"msgpackr": "1.10.1", "msgpackr": "1.10.1",
"node-html-parser": "^6.1.12", "node-html-parser": "^6.1.12",

View File

@@ -6,9 +6,28 @@ import { supportsInlayImage } from "./process/files/inlays";
import { risuChatParser } from "./parser.svelte"; import { risuChatParser } from "./parser.svelte";
import { tokenizeGGUFModel } from "./process/models/local"; import { tokenizeGGUFModel } from "./process/models/local";
import { globalFetch } from "./globalApi.svelte"; import { globalFetch } from "./globalApi.svelte";
import { getModelInfo, LLMTokenizer } from "./model/modellist"; import { getModelInfo, LLMTokenizer, type LLMModel } from "./model/modellist";
import { pluginV2 } from "./plugins/plugins"; import { pluginV2 } from "./plugins/plugins";
import type { GemmaTokenizer } from "@huggingface/transformers"; import type { GemmaTokenizer } from "@huggingface/transformers";
import md5 from 'blueimp-md5';
import { LRUMap } from 'mnemonist';
const MAX_CACHE_SIZE = 3000;
const encodeCache = new LRUMap<string, number[] | Uint32Array | Int32Array>(MAX_CACHE_SIZE);
function getHash(
data: string,
aiModel: string,
customTokenizer: string,
currentPluginProvider: string,
googleClaudeTokenizing: boolean,
modelInfo: LLMModel,
pluginTokenizer: string
): string {
const combined = `${data}::${aiModel}::${customTokenizer}::${currentPluginProvider}::${googleClaudeTokenizing ? '1' : '0'}::${modelInfo.tokenizer}::${pluginTokenizer}`;
return md5(combined);
}
export const tokenizerList = [ export const tokenizerList = [
@@ -25,100 +44,109 @@ export const tokenizerList = [
] as const ] as const
export async function encode(data:string):Promise<(number[]|Uint32Array|Int32Array)>{ export async function encode(data:string):Promise<(number[]|Uint32Array|Int32Array)>{
let db = getDatabase() const db = getDatabase();
const modelInfo = getModelInfo(db.aiModel);
const pluginTokenizer = pluginV2.providerOptions.get(db.currentPluginProvider)?.tokenizer ?? "none";
const cacheKey = getHash(
data,
db.aiModel,
db.customTokenizer,
db.currentPluginProvider,
db.googleClaudeTokenizing,
modelInfo,
pluginTokenizer
);
const cachedResult = encodeCache.get(cacheKey);
console.log(encodeCache.size)
if (cachedResult !== undefined) {
return cachedResult;
}
let result: number[] | Uint32Array | Int32Array;
if(db.aiModel === 'openrouter' || db.aiModel === 'reverse_proxy'){ if(db.aiModel === 'openrouter' || db.aiModel === 'reverse_proxy'){
switch(db.customTokenizer){ switch(db.customTokenizer){
case 'mistral': case 'mistral':
return await tokenizeWebTokenizers(data, 'mistral') result = await tokenizeWebTokenizers(data, 'mistral'); break;
case 'llama': case 'llama':
return await tokenizeWebTokenizers(data, 'llama') result = await tokenizeWebTokenizers(data, 'llama'); break;
case 'novelai': case 'novelai':
return await tokenizeWebTokenizers(data, 'novelai') result = await tokenizeWebTokenizers(data, 'novelai'); break;
case 'claude': case 'claude':
return await tokenizeWebTokenizers(data, 'claude') result = await tokenizeWebTokenizers(data, 'claude'); break;
case 'novellist': case 'novellist':
return await tokenizeWebTokenizers(data, 'novellist') result = await tokenizeWebTokenizers(data, 'novellist'); break;
case 'llama3': case 'llama3':
return await tokenizeWebTokenizers(data, 'llama') result = await tokenizeWebTokenizers(data, 'llama'); break;
case 'gemma': case 'gemma':
return await gemmaTokenize(data) result = await gemmaTokenize(data); break;
case 'cohere': case 'cohere':
return await tokenizeWebTokenizers(data, 'cohere') result = await tokenizeWebTokenizers(data, 'cohere'); break;
case 'deepseek': case 'deepseek':
return await tokenizeWebTokenizers(data, 'DeepSeek') result = await tokenizeWebTokenizers(data, 'DeepSeek'); break;
default: default:
return await tikJS(data, 'o200k_base') result = await tikJS(data, 'o200k_base'); break;
} }
} }
const modelInfo = getModelInfo(db.aiModel) if(db.aiModel === 'custom' && pluginTokenizer){
switch(pluginTokenizer){
if(db.aiModel === 'custom' && pluginV2.providerOptions.get(db.currentPluginProvider)?.tokenizer){
const tokenizer = pluginV2.providerOptions.get(db.currentPluginProvider)?.tokenizer
switch(tokenizer){
case 'mistral': case 'mistral':
return await tokenizeWebTokenizers(data, 'mistral') result = await tokenizeWebTokenizers(data, 'mistral'); break;
case 'llama': case 'llama':
return await tokenizeWebTokenizers(data, 'llama') result = await tokenizeWebTokenizers(data, 'llama'); break;
case 'novelai': case 'novelai':
return await tokenizeWebTokenizers(data, 'novelai') result = await tokenizeWebTokenizers(data, 'novelai'); break;
case 'claude': case 'claude':
return await tokenizeWebTokenizers(data, 'claude') result = await tokenizeWebTokenizers(data, 'claude'); break;
case 'novellist': case 'novellist':
return await tokenizeWebTokenizers(data, 'novellist') result = await tokenizeWebTokenizers(data, 'novellist'); break;
case 'llama3': case 'llama3':
return await tokenizeWebTokenizers(data, 'llama') result = await tokenizeWebTokenizers(data, 'llama'); break;
case 'gemma': case 'gemma':
return await gemmaTokenize(data) result = await gemmaTokenize(data); break;
case 'cohere': case 'cohere':
return await tokenizeWebTokenizers(data, 'cohere') result = await tokenizeWebTokenizers(data, 'cohere'); break;
case 'o200k_base': case 'o200k_base':
return await tikJS(data, 'o200k_base') result = await tikJS(data, 'o200k_base'); break;
case 'cl100k_base': case 'cl100k_base':
return await tikJS(data, 'cl100k_base') result = await tikJS(data, 'cl100k_base'); break;
case 'custom': case 'custom':
return await pluginV2.providerOptions.get(db.currentPluginProvider)?.tokenizerFunc?.(data) ?? [0] result = await pluginV2.providerOptions.get(db.currentPluginProvider)?.tokenizerFunc?.(data) ?? [0]; break;
default: default:
return await tikJS(data, 'o200k_base') result = await tikJS(data, 'o200k_base'); break;
} }
} }
if(modelInfo.tokenizer === LLMTokenizer.NovelList){ if(modelInfo.tokenizer === LLMTokenizer.NovelList){
const nv= await tokenizeWebTokenizers(data, 'novellist') result = await tokenizeWebTokenizers(data, 'novellist');
return nv } else if(modelInfo.tokenizer === LLMTokenizer.Claude){
} result = await tokenizeWebTokenizers(data, 'claude');
if(modelInfo.tokenizer === LLMTokenizer.Claude){ } else if(modelInfo.tokenizer === LLMTokenizer.NovelAI){
return await tokenizeWebTokenizers(data, 'claude') result = await tokenizeWebTokenizers(data, 'novelai');
} } else if(modelInfo.tokenizer === LLMTokenizer.Mistral){
if(modelInfo.tokenizer === LLMTokenizer.NovelAI){ result = await tokenizeWebTokenizers(data, 'mistral');
return await tokenizeWebTokenizers(data, 'novelai') } else if(modelInfo.tokenizer === LLMTokenizer.Llama){
} result = await tokenizeWebTokenizers(data, 'llama');
if(modelInfo.tokenizer === LLMTokenizer.Mistral){ } else if(modelInfo.tokenizer === LLMTokenizer.Local){
return await tokenizeWebTokenizers(data, 'mistral') result = await tokenizeGGUFModel(data);
} } else if(modelInfo.tokenizer === LLMTokenizer.tiktokenO200Base){
if(modelInfo.tokenizer === LLMTokenizer.Llama){ result = await tikJS(data, 'o200k_base');
return await tokenizeWebTokenizers(data, 'llama') } else if(modelInfo.tokenizer === LLMTokenizer.GoogleCloud && db.googleClaudeTokenizing){
} result = await tokenizeGoogleCloud(data);
if(modelInfo.tokenizer === LLMTokenizer.Local){ } else if(modelInfo.tokenizer === LLMTokenizer.Gemma || modelInfo.tokenizer === LLMTokenizer.GoogleCloud){
return await tokenizeGGUFModel(data) result = await gemmaTokenize(data);
} } else if(modelInfo.tokenizer === LLMTokenizer.DeepSeek){
if(modelInfo.tokenizer === LLMTokenizer.tiktokenO200Base){ result = await tokenizeWebTokenizers(data, 'DeepSeek');
return await tikJS(data, 'o200k_base') } else if(modelInfo.tokenizer === LLMTokenizer.Cohere){
} result = await tokenizeWebTokenizers(data, 'cohere');
if(modelInfo.tokenizer === LLMTokenizer.GoogleCloud && db.googleClaudeTokenizing){ } else {
return await tokenizeGoogleCloud(data) result = await tikJS(data);
}
if(modelInfo.tokenizer === LLMTokenizer.Gemma || modelInfo.tokenizer === LLMTokenizer.GoogleCloud){
return await gemmaTokenize(data)
}
if(modelInfo.tokenizer === LLMTokenizer.DeepSeek){
return await tokenizeWebTokenizers(data, 'DeepSeek')
}
if(modelInfo.tokenizer === LLMTokenizer.Cohere){
return await tokenizeWebTokenizers(data, 'cohere')
} }
encodeCache.set(cacheKey, result);
return await tikJS(data) return result;
} }
type tokenizerType = 'novellist'|'claude'|'novelai'|'llama'|'mistral'|'llama3'|'gemma'|'cohere'|'googleCloud'|'DeepSeek' type tokenizerType = 'novellist'|'claude'|'novelai'|'llama'|'mistral'|'llama3'|'gemma'|'cohere'|'googleCloud'|'DeepSeek'