From 1c51afc626d8c3916f7d8fac8e9c3e8073191e2d Mon Sep 17 00:00:00 2001 From: Kwaroran Date: Wed, 25 Dec 2024 04:55:05 +0900 Subject: [PATCH] Enhance plugin functionality by adding optional provider parameters and improving thoughts extraction regex --- plugins.md | 5 ++++- src/ts/parser.svelte.ts | 2 +- src/ts/plugins/plugins.ts | 9 ++++++++- src/ts/process/index.svelte.ts | 2 +- src/ts/tokenizer.ts | 32 ++++++++++++++++++++++++++++++++ 5 files changed, 46 insertions(+), 4 deletions(-) diff --git a/plugins.md b/plugins.md index ab5f0fe6..115db9ef 100644 --- a/plugins.md +++ b/plugins.md @@ -89,7 +89,7 @@ Gets the current character. Sets the current character. -### `addProvider(type: string, func: (arg:PluginV2ProviderArgument) => Promise<{success:boolean,content:string}>): void` +### `addProvider(type: string, func: (arg:PluginV2ProviderArgument, options?:PluginV2ProviderOptions) => Promise<{success:boolean,content:string}>): void` Adds a provider to the plugin. @@ -111,6 +111,9 @@ Adds a provider to the plugin. - `Promise<{success:boolean,content:string|ReadableStream}>` - The provider result. - `success: boolean` - If the provider was successful. - `content: string|ReadableStream` - The provider content. if it's a ReadableStream, it will be streamed to the chat. +- `options?: PluginV2ProviderOptions` - The provider options. + - `tokenizer?: string` - The tokenizer name. must be one of `"mistral"`, `"llama"`, `"novelai"`, `"claude"`, `"novellist"`, `"llama3"`, `"gemma"`, `"cohere"`, `"tiktoken"` or `"custom"`. if it's `"custom"`, you have to provide `tokenizerFunc`. + - `tokenizerFunc?: (content: string) => number[]|Promise` - The tokenizer function. ### `addRisuScriptHandler(type: string, func: (content:string) => string|null|undefined|Promise): void` diff --git a/src/ts/parser.svelte.ts b/src/ts/parser.svelte.ts index 90dd5d6d..304a2720 100644 --- a/src/ts/parser.svelte.ts +++ b/src/ts/parser.svelte.ts @@ -2337,7 +2337,7 @@ export function parseChatML(data:string):OpenAIChat[]|null{ let thoughts:string[] = [] - v = v.replace(/(.*?)<\/Thoughts>/g, (match, p1) => { + v = v.replace(/(.+)<\/Thoughts>/gms, (match, p1) => { thoughts.push(p1) return '' }) diff --git a/src/ts/plugins/plugins.ts b/src/ts/plugins/plugins.ts index ae0eaa6a..ada46156 100644 --- a/src/ts/plugins/plugins.ts +++ b/src/ts/plugins/plugins.ts @@ -131,11 +131,17 @@ type PluginV2ProviderArgument = { mode: string } +type PluginV2ProviderOptions = { + tokenizer?: string + tokenizerFunc?: (content:string) => number[]|Promise +} + type EditFunction = (content:string) => string|null|undefined|Promise type ReplacerFunction = (content:OpenAIChat[], type:string) => OpenAIChat[]|Promise export const pluginV2 = { providers: new Map Promise<{success:boolean,content:string|ReadableStream}> >(), + providerOptions: new Map(), editdisplay: new Set(), editoutput: new Set(), editprocess: new Set(), @@ -183,10 +189,11 @@ export async function loadV2Plugin(plugins:RisuPlugin[]){ db.characters[charid] = char setDatabaseLite(db) }, - addProvider: (name:string, func:(arg:PluginV2ProviderArgument) => Promise<{success:boolean,content:string}>) => { + addProvider: (name:string, func:(arg:PluginV2ProviderArgument) => Promise<{success:boolean,content:string}>, options?:PluginV2ProviderOptions) => { let provs = get(customProviderStore) provs.push(name) pluginV2.providers.set(name, func) + pluginV2.providerOptions.set(name, options ?? {}) customProviderStore.set(provs) }, addRisuScriptHandler: (name:ScriptMode, func:EditFunction) => { diff --git a/src/ts/process/index.svelte.ts b/src/ts/process/index.svelte.ts index b2783cf0..62bf88f7 100644 --- a/src/ts/process/index.svelte.ts +++ b/src/ts/process/index.svelte.ts @@ -753,7 +753,7 @@ export async function sendChat(chatProcessIndex = -1,arg:{ } } let thoughts:string[] = [] - formatedChat = formatedChat.replace(/([\s\S]+?)<\/Thoughts>/g, (match, p1) => { + formatedChat = formatedChat.replace(/(.+)<\/Thoughts>/gms, (match, p1) => { thoughts.push(p1) return '' }) diff --git a/src/ts/tokenizer.ts b/src/ts/tokenizer.ts index 4565454f..442501fd 100644 --- a/src/ts/tokenizer.ts +++ b/src/ts/tokenizer.ts @@ -7,6 +7,7 @@ import { risuChatParser } from "./parser.svelte"; import { tokenizeGGUFModel } from "./process/models/local"; import { globalFetch } from "./globalApi.svelte"; import { getModelInfo, LLMTokenizer } from "./model/modellist"; +import { pluginV2 } from "./plugins/plugins"; export const tokenizerList = [ @@ -45,8 +46,39 @@ export async function encode(data:string):Promise<(number[]|Uint32Array|Int32Arr return await tikJS(data, 'o200k_base') } } + const modelInfo = getModelInfo(db.aiModel) + if(db.aiModel === 'custom' && pluginV2.providerOptions.get(db.currentPluginProvider)?.tokenizer){ + const tokenizer = pluginV2.providerOptions.get(db.currentPluginProvider)?.tokenizer + switch(tokenizer){ + case 'mistral': + return await tokenizeWebTokenizers(data, 'mistral') + case 'llama': + return await tokenizeWebTokenizers(data, 'llama') + case 'novelai': + return await tokenizeWebTokenizers(data, 'novelai') + case 'claude': + return await tokenizeWebTokenizers(data, 'claude') + case 'novellist': + return await tokenizeWebTokenizers(data, 'novellist') + case 'llama3': + return await tokenizeWebTokenizers(data, 'llama') + case 'gemma': + return await tokenizeWebTokenizers(data, 'gemma') + case 'cohere': + return await tokenizeWebTokenizers(data, 'cohere') + case 'o200k_base': + return await tikJS(data, 'o200k_base') + case 'cl100k_base': + return await tikJS(data, 'cl100k_base') + case 'custom': + return await pluginV2.providerOptions.get(db.currentPluginProvider)?.tokenizerFunc?.(data) ?? [0] + default: + return await tikJS(data, 'o200k_base') + } + } + if(modelInfo.tokenizer === LLMTokenizer.NovelList){ const nv= await tokenizeWebTokenizers(data, 'novellist') return nv