From 6e5465356a288e1459fa73dd9797a88459509be9 Mon Sep 17 00:00:00 2001 From: kwaroran Date: Mon, 18 Mar 2024 00:05:03 +0900 Subject: [PATCH] Refactor inlay image parsing and add image embedding functionality --- src/ts/parser.ts | 17 ++++++------- src/ts/process/index.ts | 23 +++++++++-------- src/ts/process/request.ts | 45 +++++++++++++++------------------- src/ts/process/transformers.ts | 9 ++++++- 4 files changed, 48 insertions(+), 46 deletions(-) diff --git a/src/ts/parser.ts b/src/ts/parser.ts index 36e893d7..05f00a8f 100644 --- a/src/ts/parser.ts +++ b/src/ts/parser.ts @@ -154,16 +154,13 @@ async function parseAdditionalAssets(data:string, char:simpleCharacterArgument|c } async function parseInlayImages(data:string){ - const db = get(DataBase) - if(db.inlayImage){ - const inlayMatch = data.match(/{{inlay::(.+?)}}/g) - if(inlayMatch){ - for(const inlay of inlayMatch){ - const id = inlay.substring(9, inlay.length - 2) - const img = await getInlayImage(id) - if(img){ - data = data.replace(inlay, ``) - } + const inlayMatch = data.match(/{{inlay::(.+?)}}/g) + if(inlayMatch){ + for(const inlay of inlayMatch){ + const id = inlay.substring(9, inlay.length - 2) + const img = await getInlayImage(id) + if(img){ + data = data.replace(inlay, ``) } } } diff --git a/src/ts/process/index.ts b/src/ts/process/index.ts index 4108f730..07d1cf89 100644 --- a/src/ts/process/index.ts +++ b/src/ts/process/index.ts @@ -25,6 +25,7 @@ import { sendPeerChar } from "../sync/multiuser"; import { runInlayScreen } from "./inlayScreen"; import { runCharacterJS } from "../plugins/embedscript"; import { addRerolls } from "./prereroll"; +import { runImageEmbedding } from "./transformers"; export interface OpenAIChat{ role: 'system'|'user'|'assistant'|'function' @@ -562,16 +563,14 @@ export async function sendChat(chatProcessIndex = -1,arg:{chatAdditonalTokens?:n msg.chatId = v4() } let inlays:string[] = [] - if(db.inlayImage){ - if(msg.role === 'char'){ - formatedChat = formatedChat.replace(/{{inlay::(.+?)}}/g, '') - } - else{ - const inlayMatch = formatedChat.match(/{{inlay::(.+?)}}/g) - if(inlayMatch){ - for(const inlay of inlayMatch){ - inlays.push(inlay) - } + if(msg.role === 'char'){ + formatedChat = formatedChat.replace(/{{inlay::(.+?)}}/g, '') + } + else{ + const inlayMatch = formatedChat.match(/{{inlay::(.+?)}}/g) + if(inlayMatch){ + for(const inlay of inlayMatch){ + inlays.push(inlay) } } } @@ -590,6 +589,10 @@ export async function sendChat(chatProcessIndex = -1,arg:{chatAdditonalTokens?:n height: inlayData.height }) } + else{ + const captionResult = await runImageEmbedding(inlayData.data) + formatedChat += `[${captionResult[0].generated_text}]` + } } formatedChat = formatedChat.replace(inlay, '') } diff --git a/src/ts/process/request.ts b/src/ts/process/request.ts index 768e6971..ec8bbcfc 100644 --- a/src/ts/process/request.ts +++ b/src/ts/process/request.ts @@ -172,35 +172,30 @@ export async function requestChatDataMain(arg:requestDataArgument, model:'model' case 'mistral-large-latest': case 'reverse_proxy':{ let formatedChat:OpenAIChatExtra[] = [] - if(db.inlayImage){ - for(let i=0;i 0 && m.role === 'user'){ - let v:OpenAIChatExtra = cloneDeep(m) - let contents:OpenAIContents[] = [] - for(let j=0;j 0 && m.role === 'user'){ + let v:OpenAIChatExtra = cloneDeep(m) + let contents:OpenAIContents[] = [] + for(let j=0;j => { return (result?.data as Float32Array) ?? null; } +export const runImageEmbedding = async (dataurl:string) => { + await initTransformers() + const captioner = await pipeline('image-to-text', 'Xenova/vit-gpt2-image-captioning'); + const output = await captioner(dataurl) + return output as ImageToTextOutput +} + let synthesizer:TextToAudioPipeline = null let lastSynth:string = null