Enhance plugin functionality by adding optional provider parameters and improving thoughts extraction regex

This commit is contained in:
Kwaroran
2024-12-25 04:55:05 +09:00
parent 8b6fc5bc8b
commit 1c51afc626
5 changed files with 46 additions and 4 deletions

View File

@@ -89,7 +89,7 @@ Gets the current character.
Sets the current character.
### `addProvider(type: string, func: (arg:PluginV2ProviderArgument) => Promise<{success:boolean,content:string}>): void`
### `addProvider(type: string, func: (arg:PluginV2ProviderArgument, options?:PluginV2ProviderOptions) => Promise<{success:boolean,content:string}>): void`
Adds a provider to the plugin.
@@ -111,6 +111,9 @@ Adds a provider to the plugin.
- `Promise<{success:boolean,content:string|ReadableStream<string>}>` - The provider result.
- `success: boolean` - If the provider was successful.
- `content: string|ReadableStream<string>` - The provider content. if it's a ReadableStream, it will be streamed to the chat.
- `options?: PluginV2ProviderOptions` - The provider options.
- `tokenizer?: string` - The tokenizer name. must be one of `"mistral"`, `"llama"`, `"novelai"`, `"claude"`, `"novellist"`, `"llama3"`, `"gemma"`, `"cohere"`, `"tiktoken"` or `"custom"`. if it's `"custom"`, you have to provide `tokenizerFunc`.
- `tokenizerFunc?: (content: string) => number[]|Promise<number[]>` - The tokenizer function.
### `addRisuScriptHandler(type: string, func: (content:string) => string|null|undefined|Promise<string|null|undefined>): void`

View File

@@ -2337,7 +2337,7 @@ export function parseChatML(data:string):OpenAIChat[]|null{
let thoughts:string[] = []
v = v.replace(/<Thoughts>(.*?)<\/Thoughts>/g, (match, p1) => {
v = v.replace(/<Thoughts>(.+)<\/Thoughts>/gms, (match, p1) => {
thoughts.push(p1)
return ''
})

View File

@@ -131,11 +131,17 @@ type PluginV2ProviderArgument = {
mode: string
}
type PluginV2ProviderOptions = {
tokenizer?: string
tokenizerFunc?: (content:string) => number[]|Promise<number[]>
}
type EditFunction = (content:string) => string|null|undefined|Promise<string|null|undefined>
type ReplacerFunction = (content:OpenAIChat[], type:string) => OpenAIChat[]|Promise<OpenAIChat[]>
export const pluginV2 = {
providers: new Map<string, (arg:PluginV2ProviderArgument) => Promise<{success:boolean,content:string|ReadableStream<string>}> >(),
providerOptions: new Map<string, PluginV2ProviderOptions>(),
editdisplay: new Set<EditFunction>(),
editoutput: new Set<EditFunction>(),
editprocess: new Set<EditFunction>(),
@@ -183,10 +189,11 @@ export async function loadV2Plugin(plugins:RisuPlugin[]){
db.characters[charid] = char
setDatabaseLite(db)
},
addProvider: (name:string, func:(arg:PluginV2ProviderArgument) => Promise<{success:boolean,content:string}>) => {
addProvider: (name:string, func:(arg:PluginV2ProviderArgument) => Promise<{success:boolean,content:string}>, options?:PluginV2ProviderOptions) => {
let provs = get(customProviderStore)
provs.push(name)
pluginV2.providers.set(name, func)
pluginV2.providerOptions.set(name, options ?? {})
customProviderStore.set(provs)
},
addRisuScriptHandler: (name:ScriptMode, func:EditFunction) => {

View File

@@ -753,7 +753,7 @@ export async function sendChat(chatProcessIndex = -1,arg:{
}
}
let thoughts:string[] = []
formatedChat = formatedChat.replace(/<Thoughts>([\s\S]+?)<\/Thoughts>/g, (match, p1) => {
formatedChat = formatedChat.replace(/<Thoughts>(.+)<\/Thoughts>/gms, (match, p1) => {
thoughts.push(p1)
return ''
})

View File

@@ -7,6 +7,7 @@ import { risuChatParser } from "./parser.svelte";
import { tokenizeGGUFModel } from "./process/models/local";
import { globalFetch } from "./globalApi.svelte";
import { getModelInfo, LLMTokenizer } from "./model/modellist";
import { pluginV2 } from "./plugins/plugins";
export const tokenizerList = [
@@ -45,8 +46,39 @@ export async function encode(data:string):Promise<(number[]|Uint32Array|Int32Arr
return await tikJS(data, 'o200k_base')
}
}
const modelInfo = getModelInfo(db.aiModel)
if(db.aiModel === 'custom' && pluginV2.providerOptions.get(db.currentPluginProvider)?.tokenizer){
const tokenizer = pluginV2.providerOptions.get(db.currentPluginProvider)?.tokenizer
switch(tokenizer){
case 'mistral':
return await tokenizeWebTokenizers(data, 'mistral')
case 'llama':
return await tokenizeWebTokenizers(data, 'llama')
case 'novelai':
return await tokenizeWebTokenizers(data, 'novelai')
case 'claude':
return await tokenizeWebTokenizers(data, 'claude')
case 'novellist':
return await tokenizeWebTokenizers(data, 'novellist')
case 'llama3':
return await tokenizeWebTokenizers(data, 'llama')
case 'gemma':
return await tokenizeWebTokenizers(data, 'gemma')
case 'cohere':
return await tokenizeWebTokenizers(data, 'cohere')
case 'o200k_base':
return await tikJS(data, 'o200k_base')
case 'cl100k_base':
return await tikJS(data, 'cl100k_base')
case 'custom':
return await pluginV2.providerOptions.get(db.currentPluginProvider)?.tokenizerFunc?.(data) ?? [0]
default:
return await tikJS(data, 'o200k_base')
}
}
if(modelInfo.tokenizer === LLMTokenizer.NovelList){
const nv= await tokenizeWebTokenizers(data, 'novellist')
return nv