diff --git a/src/ts/process/index.svelte.ts b/src/ts/process/index.svelte.ts index 9fac0a04..d8b622e7 100644 --- a/src/ts/process/index.svelte.ts +++ b/src/ts/process/index.svelte.ts @@ -41,7 +41,7 @@ export interface OpenAIChat{ } export interface MultiModal{ - type:'image'|'video' + type:'image'|'video'|'audio' base64:string, height?:number, width?:number diff --git a/src/ts/process/request.ts b/src/ts/process/request.ts index df15704b..eccdb823 100644 --- a/src/ts/process/request.ts +++ b/src/ts/process/request.ts @@ -1406,7 +1406,11 @@ async function requestGoogleCloudVertex(arg:RequestDataArgumentExtended):Promise }); for (const modal of chat.multimodals) { - if (modal.type === "image") { + if ( + (modal.type === "image" && arg.modelInfo.flags.includes(LLMFlags.hasImageInput)) || + (modal.type === "audio" && arg.modelInfo.flags.includes(LLMFlags.hasAudioInput)) || + (modal.type === "video" && arg.modelInfo.flags.includes(LLMFlags.hasVideoInput)) + ) { const dataurl = modal.base64; const base64 = dataurl.split(",")[1]; const mediaType = dataurl.split(";")[0].split(":")[1]; @@ -1486,8 +1490,11 @@ async function requestGoogleCloudVertex(arg:RequestDataArgumentExtended):Promise contents: reformatedChat, generation_config: applyParameters({ "maxOutputTokens": maxTokens, - }, ['temperature', 'top_p'], { - 'top_p': "topP" + }, ['temperature', 'top_p', 'top_k', 'presence_penalty', 'frequency_penalty'], { + 'top_p': "topP", + 'top_k': "topK", + 'presence_penalty': "presencePenalty", + 'frequency_penalty': "frequencyPenalty" }, arg.mode), safetySettings: uncensoredCatagory, systemInstruction: {