Refactor inlay image parsing and add image embedding functionality
This commit is contained in:
@@ -154,16 +154,13 @@ async function parseAdditionalAssets(data:string, char:simpleCharacterArgument|c
|
||||
}
|
||||
|
||||
async function parseInlayImages(data:string){
|
||||
const db = get(DataBase)
|
||||
if(db.inlayImage){
|
||||
const inlayMatch = data.match(/{{inlay::(.+?)}}/g)
|
||||
if(inlayMatch){
|
||||
for(const inlay of inlayMatch){
|
||||
const id = inlay.substring(9, inlay.length - 2)
|
||||
const img = await getInlayImage(id)
|
||||
if(img){
|
||||
data = data.replace(inlay, `<img src="${img.data}"/>`)
|
||||
}
|
||||
const inlayMatch = data.match(/{{inlay::(.+?)}}/g)
|
||||
if(inlayMatch){
|
||||
for(const inlay of inlayMatch){
|
||||
const id = inlay.substring(9, inlay.length - 2)
|
||||
const img = await getInlayImage(id)
|
||||
if(img){
|
||||
data = data.replace(inlay, `<img src="${img.data}"/>`)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -25,6 +25,7 @@ import { sendPeerChar } from "../sync/multiuser";
|
||||
import { runInlayScreen } from "./inlayScreen";
|
||||
import { runCharacterJS } from "../plugins/embedscript";
|
||||
import { addRerolls } from "./prereroll";
|
||||
import { runImageEmbedding } from "./transformers";
|
||||
|
||||
export interface OpenAIChat{
|
||||
role: 'system'|'user'|'assistant'|'function'
|
||||
@@ -562,16 +563,14 @@ export async function sendChat(chatProcessIndex = -1,arg:{chatAdditonalTokens?:n
|
||||
msg.chatId = v4()
|
||||
}
|
||||
let inlays:string[] = []
|
||||
if(db.inlayImage){
|
||||
if(msg.role === 'char'){
|
||||
formatedChat = formatedChat.replace(/{{inlay::(.+?)}}/g, '')
|
||||
}
|
||||
else{
|
||||
const inlayMatch = formatedChat.match(/{{inlay::(.+?)}}/g)
|
||||
if(inlayMatch){
|
||||
for(const inlay of inlayMatch){
|
||||
inlays.push(inlay)
|
||||
}
|
||||
if(msg.role === 'char'){
|
||||
formatedChat = formatedChat.replace(/{{inlay::(.+?)}}/g, '')
|
||||
}
|
||||
else{
|
||||
const inlayMatch = formatedChat.match(/{{inlay::(.+?)}}/g)
|
||||
if(inlayMatch){
|
||||
for(const inlay of inlayMatch){
|
||||
inlays.push(inlay)
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -590,6 +589,10 @@ export async function sendChat(chatProcessIndex = -1,arg:{chatAdditonalTokens?:n
|
||||
height: inlayData.height
|
||||
})
|
||||
}
|
||||
else{
|
||||
const captionResult = await runImageEmbedding(inlayData.data)
|
||||
formatedChat += `[${captionResult[0].generated_text}]`
|
||||
}
|
||||
}
|
||||
formatedChat = formatedChat.replace(inlay, '')
|
||||
}
|
||||
|
||||
@@ -172,35 +172,30 @@ export async function requestChatDataMain(arg:requestDataArgument, model:'model'
|
||||
case 'mistral-large-latest':
|
||||
case 'reverse_proxy':{
|
||||
let formatedChat:OpenAIChatExtra[] = []
|
||||
if(db.inlayImage){
|
||||
for(let i=0;i<formated.length;i++){
|
||||
const m = formated[i]
|
||||
if(m.multimodals && m.multimodals.length > 0 && m.role === 'user'){
|
||||
let v:OpenAIChatExtra = cloneDeep(m)
|
||||
let contents:OpenAIContents[] = []
|
||||
for(let j=0;j<m.multimodals.length;j++){
|
||||
contents.push({
|
||||
"type": "image",
|
||||
"image_url": {
|
||||
"url": m.multimodals[j].base64,
|
||||
"detail": db.gptVisionQuality
|
||||
}
|
||||
})
|
||||
}
|
||||
for(let i=0;i<formated.length;i++){
|
||||
const m = formated[i]
|
||||
if(m.multimodals && m.multimodals.length > 0 && m.role === 'user'){
|
||||
let v:OpenAIChatExtra = cloneDeep(m)
|
||||
let contents:OpenAIContents[] = []
|
||||
for(let j=0;j<m.multimodals.length;j++){
|
||||
contents.push({
|
||||
"type": "text",
|
||||
"text": m.content
|
||||
"type": "image",
|
||||
"image_url": {
|
||||
"url": m.multimodals[j].base64,
|
||||
"detail": db.gptVisionQuality
|
||||
}
|
||||
})
|
||||
v.content = contents
|
||||
formatedChat.push(v)
|
||||
}
|
||||
else{
|
||||
formatedChat.push(m)
|
||||
}
|
||||
contents.push({
|
||||
"type": "text",
|
||||
"text": m.content
|
||||
})
|
||||
v.content = contents
|
||||
formatedChat.push(v)
|
||||
}
|
||||
else{
|
||||
formatedChat.push(m)
|
||||
}
|
||||
}
|
||||
else{
|
||||
formatedChat = formated
|
||||
}
|
||||
|
||||
let oobaSystemPrompts:string[] = []
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
import {env, AutoTokenizer, pipeline, type SummarizationOutput, type TextGenerationConfig, type TextGenerationOutput, FeatureExtractionPipeline, TextToAudioPipeline } from '@xenova/transformers';
|
||||
import {env, AutoTokenizer, pipeline, type SummarizationOutput, type TextGenerationConfig, type TextGenerationOutput, FeatureExtractionPipeline, TextToAudioPipeline, type ImageToTextOutput } from '@xenova/transformers';
|
||||
import { unzip } from 'fflate';
|
||||
import { globalFetch, loadAsset, saveAsset } from 'src/ts/storage/globalApi';
|
||||
import { selectSingleFile } from 'src/ts/util';
|
||||
@@ -93,6 +93,13 @@ export const runEmbedding = async (text: string):Promise<Float32Array> => {
|
||||
return (result?.data as Float32Array) ?? null;
|
||||
}
|
||||
|
||||
export const runImageEmbedding = async (dataurl:string) => {
|
||||
await initTransformers()
|
||||
const captioner = await pipeline('image-to-text', 'Xenova/vit-gpt2-image-captioning');
|
||||
const output = await captioner(dataurl)
|
||||
return output as ImageToTextOutput
|
||||
}
|
||||
|
||||
let synthesizer:TextToAudioPipeline = null
|
||||
let lastSynth:string = null
|
||||
|
||||
|
||||
Reference in New Issue
Block a user