diff --git a/src/ts/parser.ts b/src/ts/parser.ts
index 36e893d7..05f00a8f 100644
--- a/src/ts/parser.ts
+++ b/src/ts/parser.ts
@@ -154,16 +154,13 @@ async function parseAdditionalAssets(data:string, char:simpleCharacterArgument|c
}
async function parseInlayImages(data:string){
- const db = get(DataBase)
- if(db.inlayImage){
- const inlayMatch = data.match(/{{inlay::(.+?)}}/g)
- if(inlayMatch){
- for(const inlay of inlayMatch){
- const id = inlay.substring(9, inlay.length - 2)
- const img = await getInlayImage(id)
- if(img){
- data = data.replace(inlay, `
`)
- }
+ const inlayMatch = data.match(/{{inlay::(.+?)}}/g)
+ if(inlayMatch){
+ for(const inlay of inlayMatch){
+ const id = inlay.substring(9, inlay.length - 2)
+ const img = await getInlayImage(id)
+ if(img){
+ data = data.replace(inlay, `
`)
}
}
}
diff --git a/src/ts/process/index.ts b/src/ts/process/index.ts
index 4108f730..07d1cf89 100644
--- a/src/ts/process/index.ts
+++ b/src/ts/process/index.ts
@@ -25,6 +25,7 @@ import { sendPeerChar } from "../sync/multiuser";
import { runInlayScreen } from "./inlayScreen";
import { runCharacterJS } from "../plugins/embedscript";
import { addRerolls } from "./prereroll";
+import { runImageEmbedding } from "./transformers";
export interface OpenAIChat{
role: 'system'|'user'|'assistant'|'function'
@@ -562,16 +563,14 @@ export async function sendChat(chatProcessIndex = -1,arg:{chatAdditonalTokens?:n
msg.chatId = v4()
}
let inlays:string[] = []
- if(db.inlayImage){
- if(msg.role === 'char'){
- formatedChat = formatedChat.replace(/{{inlay::(.+?)}}/g, '')
- }
- else{
- const inlayMatch = formatedChat.match(/{{inlay::(.+?)}}/g)
- if(inlayMatch){
- for(const inlay of inlayMatch){
- inlays.push(inlay)
- }
+ if(msg.role === 'char'){
+ formatedChat = formatedChat.replace(/{{inlay::(.+?)}}/g, '')
+ }
+ else{
+ const inlayMatch = formatedChat.match(/{{inlay::(.+?)}}/g)
+ if(inlayMatch){
+ for(const inlay of inlayMatch){
+ inlays.push(inlay)
}
}
}
@@ -590,6 +589,10 @@ export async function sendChat(chatProcessIndex = -1,arg:{chatAdditonalTokens?:n
height: inlayData.height
})
}
+ else{
+ const captionResult = await runImageEmbedding(inlayData.data)
+ formatedChat += `[${captionResult[0].generated_text}]`
+ }
}
formatedChat = formatedChat.replace(inlay, '')
}
diff --git a/src/ts/process/request.ts b/src/ts/process/request.ts
index 768e6971..ec8bbcfc 100644
--- a/src/ts/process/request.ts
+++ b/src/ts/process/request.ts
@@ -172,35 +172,30 @@ export async function requestChatDataMain(arg:requestDataArgument, model:'model'
case 'mistral-large-latest':
case 'reverse_proxy':{
let formatedChat:OpenAIChatExtra[] = []
- if(db.inlayImage){
- for(let i=0;i 0 && m.role === 'user'){
- let v:OpenAIChatExtra = cloneDeep(m)
- let contents:OpenAIContents[] = []
- for(let j=0;j 0 && m.role === 'user'){
+ let v:OpenAIChatExtra = cloneDeep(m)
+ let contents:OpenAIContents[] = []
+ for(let j=0;j => {
return (result?.data as Float32Array) ?? null;
}
+export const runImageEmbedding = async (dataurl:string) => {
+ await initTransformers()
+ const captioner = await pipeline('image-to-text', 'Xenova/vit-gpt2-image-captioning');
+ const output = await captioner(dataurl)
+ return output as ImageToTextOutput
+}
+
let synthesizer:TextToAudioPipeline = null
let lastSynth:string = null