diff --git a/src/ts/process/files/image.ts b/src/ts/process/files/image.ts
index 8cbdbc80..d4f76fd3 100644
--- a/src/ts/process/files/image.ts
+++ b/src/ts/process/files/image.ts
@@ -87,6 +87,7 @@ export async function getInlayImage(id: string){
 export function supportsInlayImage(){
     const db = get(DataBase)
     return db.aiModel.startsWith('gptv') || (db.aiModel === 'reverse_proxy' && db.proxyRequestModel?.startsWith('gptv')) || db.aiModel === 'gemini-pro-vision'
+            || db.aiModel.startsWith('claude-3') || db.proxyRequestModel?.startsWith('claude-3')
 }
 
 export async function reencodeImage(img:Uint8Array){
diff --git a/src/ts/process/index.ts b/src/ts/process/index.ts
index 7d46ce71..4108f730 100644
--- a/src/ts/process/index.ts
+++ b/src/ts/process/index.ts
@@ -33,6 +33,14 @@ export interface OpenAIChat{
     name?:string
     removable?:boolean
     attr?:string[]
+    multimodals?: MultiModal[]
+}
+
+export interface MultiModal{
+    type:'image'|'video'
+    base64:string,
+    height?:number,
+    width?:number
 }
 
 export interface OpenAIChatFull extends OpenAIChat{
@@ -568,19 +576,19 @@ export async function sendChat(chatProcessIndex = -1,arg:{chatAdditonalTokens?:n
             }
         }
 
+        let multimodal:MultiModal[] = []
         if(inlays.length > 0){
             for(const inlay of inlays){
                 const inlayName = inlay.replace('{{inlay::', '').replace('}}', '')
                 const inlayData = await getInlayImage(inlayName)
                 if(inlayData){
                     if(supportsInlayImage()){
-                        const imgchat = {
-                            role: msg.role === 'user' ? 'user' : 'assistant',
-                            content: inlayData.data,
-                            memo: `inlayImage-${inlayData.height}-${inlayData.width}`,
-                        } as const
-                        chats.push(imgchat)
-                        currentTokens += await tokenizer.tokenizeChat(imgchat)
+                        multimodal.push({
+                            type: 'image',
+                            base64: inlayData.data,
+                            width: inlayData.width,
+                            height: inlayData.height
+                        })
                     }
                 }
                 formatedChat = formatedChat.replace(inlay, '')
@@ -604,7 +612,11 @@ export async function sendChat(chatProcessIndex = -1,arg:{chatAdditonalTokens?:n
             role: msg.role === 'user' ? 'user' : 'assistant',
             content: formatedChat,
             memo: msg.chatId,
-            attr: attr
+            attr: attr,
+            multimodals: multimodal
+        }
+        if(chat.multimodals.length === 0){
+            delete chat.multimodals
         }
         chats.push(chat)
         currentTokens += await tokenizer.tokenizeChat(chat)
diff --git a/src/ts/process/request.ts b/src/ts/process/request.ts
index 55598c00..270b0257 100644
--- a/src/ts/process/request.ts
+++ b/src/ts/process/request.ts
@@ -1,5 +1,5 @@
 import { get } from "svelte/store";
-import type { OpenAIChat, OpenAIChatFull } from ".";
+import type { MultiModal, OpenAIChat, OpenAIChatFull } from ".";
 import { DataBase, setDatabase, type character } from "../storage/database";
 import { pluginProcess } from "../plugins/plugins";
 import { language } from "../../lang";
@@ -117,6 +117,7 @@ export interface OpenAIChatExtra {
     name?:string
     removable?:boolean
     attr?:string[]
+    multimodals?:MultiModal[]
 }
 
 
@@ -172,33 +173,29 @@ export async function requestChatDataMain(arg:requestDataArgument, model:'model'
         case 'reverse_proxy':{
             let formatedChat:OpenAIChatExtra[] = []
             if(db.inlayImage){
-                let pendingImages:OpenAIImageContents[] = []
                 for(let i=0;i<formated.length;i++){
                     const m = formated[i]
-                    if(m.memo && m.memo.startsWith('inlayImage')){
-                        pendingImages.push({
-                            "type": "image",
-                            "image_url": {
-                                "url": m.content,
-                                "detail": db.gptVisionQuality
-                            }
+                    if(m.multimodals && m.multimodals.length > 0 && m.role === 'user'){
+                        let v:OpenAIChatExtra = cloneDeep(m)
+                        let contents:OpenAIContents[] = []
+                        for(let j=0;j<m.multimodals.length;j++){
+                            contents.push({
+                                "type": "image",
+                                "image_url": {
+                                    "url": m.multimodals[j].base64,
+                                    "detail": db.gptVisionQuality
+                                }
+                            })
+                        }
+                        contents.push({
+                            "type": "text",
+                            "text": m.content
                         })
+                        v.content = contents
+                        formatedChat.push(v)
                     }
                     else{
-                        if(pendingImages.length > 0 && m.role === 'user'){
-                            let v:OpenAIChatExtra = cloneDeep(m)
-                            let contents:OpenAIContents[] = pendingImages
-                            contents.push({
-                                "type": "text",
-                                "text": m.content
-                            })
-                            v.content = contents
-                            formatedChat.push(v)
-                            pendingImages = []
-                        }
-                        else{
-                            formatedChat.push(m)
-                        }
+                        formatedChat.push(m)
                     }
                 }
             }
@@ -218,6 +215,7 @@ export async function requestChatDataMain(arg:requestDataArgument, model:'model'
                     delete formatedChat[i].memo
                     delete formatedChat[i].removable
                     delete formatedChat[i].attr
+                    delete formatedChat[i].multimodals
                 }
                 if(aiModel === 'reverse_proxy' && db.reverseProxyOobaMode && formatedChat[i].role === 'system'){
                     const cont = formatedChat[i].content
@@ -301,6 +299,7 @@ export async function requestChatDataMain(arg:requestDataArgument, model:'model'
                 openrouterRequestModel = await getFreeOpenRouterModel()
             }
 
+            console.log(formatedChat)
             if(aiModel.startsWith('mistral')){
                 requestModel = aiModel
 
@@ -1414,20 +1413,106 @@ export async function requestChatDataMain(arg:requestDataArgument, model:'model'
                     }
                 }
 
+                interface Claude3TextBlock {
+                    type: 'text',
+                    text: string
+                }
+
+                interface Claude3ImageBlock {
+                    type: 'image',
+                    source: {
+                        type: 'base64'
+                        media_type: string,
+                        data: string
+                    }
+                }
+
+                type Claude3ContentBlock = Claude3TextBlock|Claude3ImageBlock
+
                 interface Claude3Chat {
                     role: 'user'|'assistant'
-                    content: string
+                    content: string|Claude3ContentBlock[]
                 }
 
                 let claudeChat: Claude3Chat[] = []
                 let systemPrompt:string = ''
 
-                const addClaudeChat = (chat:Claude3Chat) => {
+                const addClaudeChat = (chat:{
+                    role: 'user'|'assistant'
+                    content: string
+                }, multimodals?:MultiModal[]) => {
                     if(claudeChat.length > 0 && claudeChat[claudeChat.length-1].role === chat.role){
-                        claudeChat[claudeChat.length-1].content += "\n\n" + chat.content
+                        let content = claudeChat[claudeChat.length-1].content
+                        if(multimodals && multimodals.length > 0 && !Array.isArray(content)){
+                            content = [{
+                                type: 'text',
+                                text: content
+                            }]
+                        }
+
+                        if(Array.isArray(content)){
+                            let lastContent = content[content.length-1]
+                            if( lastContent?.type === 'text'){
+                                lastContent.text += "\n\n" + chat.content
+                                content[content.length-1] = lastContent
+                            }
+                            else{
+                                content.push({
+                                    type: 'text',
+                                    text: chat.content
+                                })
+                            }
+
+                            if(multimodals && multimodals.length > 0){
+                                for(const modal of multimodals){
+                                    if(modal.type === 'image'){
+                                        const dataurl = modal.base64
+                                        const base64 = dataurl.split(',')[1]
+                                        const mediaType = dataurl.split(';')[0].split(':')[1]
+    
+                                        content.unshift({
+                                            type: 'image',
+                                            source: {
+                                                type: 'base64',
+                                                media_type: mediaType,
+                                                data: base64
+                                            }
+                                        })
+                                    }
+                                }
+                            }
+                        }
+                        else{
+                            content += "\n\n" + chat.content
+                        }
+                        claudeChat[claudeChat.length-1].content = content
                     }
                     else{
-                        claudeChat.push(chat)
+                        let formatedChat:Claude3Chat = chat
+                        if(multimodals && multimodals.length > 0){
+                            formatedChat.content = [{
+                                type: 'text',
+                                text: chat.content
+                            }]
+                            for(const modal of multimodals){
+                                if(modal.type === 'image'){
+                                    const dataurl = modal.base64
+                                    const base64 = dataurl.split(',')[1]
+                                    const mediaType = dataurl.split(';')[0].split(':')[1]
+
+                                    formatedChat.content.unshift({
+                                        type: 'image',
+                                        source: {
+                                            type: 'base64',
+                                            media_type: mediaType,
+                                            data: base64
+                                        }
+                                    })
+                                }
+                            }
+
+                        }
+                        claudeChat.push(formatedChat)
                     }
                 }
                 for(const chat of formated){
@@ -1436,14 +1521,14 @@ export async function requestChatDataMain(arg:requestDataArgument, model:'model'
                             addClaudeChat({
                                 role: 'user',
                                 content: chat.content
-                            })
+                            }, chat.multimodals)
                             break
                         }
                         case 'assistant':{
                             addClaudeChat({
                                 role: 'assistant',
                                 content: chat.content
-                            })
+                            }, chat.multimodals)
                             break
                         }
                         case 'system':{
@@ -1464,7 +1549,7 @@ export async function requestChatDataMain(arg:requestDataArgument, model:'model'
                         }
                     }
                 }
-
+                console.log(claudeChat)
                 if(claudeChat.length === 0 && systemPrompt === ''){
                     return {
                         type: 'fail',
diff --git a/src/ts/tokenizer.ts b/src/ts/tokenizer.ts
index 6665e0af..9995c411 100644
--- a/src/ts/tokenizer.ts
+++ b/src/ts/tokenizer.ts
@@ -2,7 +2,7 @@ import type { Tiktoken } from "@dqbd/tiktoken";
 import type { Tokenizer } from "@mlc-ai/web-tokenizers";
 import { DataBase, type character } from "./storage/database";
 import { get } from "svelte/store";
-import type { OpenAIChat } from "./process";
+import type { MultiModal, OpenAIChat } from "./process";
 import { supportsInlayImage } from "./process/files/image";
 import { risuChatParser } from "./parser";
 import { tokenizeGGUFModel } from "./process/models/local";
@@ -132,53 +132,56 @@ export class ChatTokenizer {
         this.useName = useName
     }
     async tokenizeChat(data:OpenAIChat) {
-        if(data.memo && data.memo.startsWith('inlayImage')){
-            const db = get(DataBase)
-            if(!supportsInlayImage()){
-                return this.chatAdditonalTokens
-            }
-            if(db.gptVisionQuality === 'low'){
-                return 87
-            }
-
-            let encoded = this.chatAdditonalTokens
-            const memo = data.memo.split('-')
-            let height = parseInt(memo[1])
-            let width = parseInt(memo[2])
-
-            if(height === width){
-                if(height > 768){
-                    height = 768
-                    width = 768
-                }
-            }
-            else if(height > width){
-                if(width > 768){
-                    width = 768
-                    height = height * (768 / width)
-                }
-            }
-            else{
-                if(height > 768){
-                    height = 768
-                    width = width * (768 / height)
-                }
-            }
-
-            const chunkSize = Math.ceil(width / 512) * Math.ceil(height / 512)
-            encoded += chunkSize * 2
-            encoded += 85
-
-            return encoded
-        }
-
         let encoded = (await encode(data.content)).length + this.chatAdditonalTokens
         if(data.name && this.useName ==='name'){
             encoded += (await encode(data.name)).length + 1
         }
+        if(data.multimodals && data.multimodals.length > 0){
+            for(const multimodal of data.multimodals){
+                encoded += await this.tokenizeMultiModal(multimodal)
+            }
+        }
         return encoded
     }
 
+    async tokenizeMultiModal(data:MultiModal){
+        const db = get(DataBase)
+        if(!supportsInlayImage()){
+            return this.chatAdditonalTokens
+        }
+        if(db.gptVisionQuality === 'low'){
+            return 87
+        }
+
+        let encoded = this.chatAdditonalTokens
+        let height = data.height ?? 0
+        let width = data.width ?? 0
+
+        if(height === width){
+            if(height > 768){
+                height = 768
+                width = 768
+            }
+        }
+        else if(height > width){
+            if(width > 768){
+                width = 768
+                height = height * (768 / width)
+            }
+        }
+        else{
+            if(height > 768){
+                height = 768
+                width = width * (768 / height)
+            }
+        }
+
+        const chunkSize = Math.ceil(width / 512) * Math.ceil(height / 512)
+        encoded += chunkSize * 2
+        encoded += 85
+
+        return encoded
+    }
     
 }