[feat] inlay images

This commit is contained in:
kwaroran
2023-11-10 19:49:16 +09:00
parent 501167dd57
commit da49cf05f6
11 changed files with 308 additions and 20 deletions

89
src/ts/image.ts Normal file
View File

@@ -0,0 +1,89 @@
import localforage from "localforage";
import { selectSingleFile } from "./util";
import { v4 } from "uuid";
import { DataBase } from "./storage/database";
import { get } from "svelte/store";
const inlayStorage = localforage.createInstance({
name: 'inlay',
storeName: 'inlay'
})
export async function postInlayImage(){
const img = await selectSingleFile([
//image format
'jpg',
'jpeg',
'png',
'webp'
])
if(!img){
return null
}
const extention = img.name.split('.').at(-1)
//darw in canvas to convert to png
const canvas = document.createElement('canvas')
const ctx = canvas.getContext('2d')
const imgObj = new Image()
let drawHeight, drawWidth = 0
imgObj.src = URL.createObjectURL(new Blob([img.data], {type: `image/${extention}`}))
await new Promise((resolve) => {
imgObj.onload = () => {
drawHeight = imgObj.height
drawWidth = imgObj.width
//resize image to fit inlay, if it's too big (max 1024px)
if(drawHeight > 1024){
drawWidth = drawWidth * (1024 / drawHeight)
drawHeight = 1024
}
if(drawWidth > 1024){
drawHeight = drawHeight * (1024 / drawWidth)
drawWidth = 1024
}
drawHeight = Math.floor(drawHeight)
drawWidth = Math.floor(drawWidth)
canvas.width = drawWidth
canvas.height = drawHeight
ctx.drawImage(imgObj, 0, 0, drawWidth, drawHeight)
resolve(null)
}
})
const dataURI = canvas.toDataURL('image/png')
const imgid = v4()
await inlayStorage.setItem(imgid, {
name: img.name,
data: dataURI,
ext: extention,
height: drawHeight,
width: drawWidth
})
return `{{inlay::${imgid}}}`
}
export async function getInlayImage(id: string){
const img:{
name: string,
data: string
ext: string
height: number
width: number
} = await inlayStorage.getItem(id)
if(img === null){
return null
}
return img
}
export function supportsInlayImage(){
const db = get(DataBase)
return db.aiModel.startsWith('gptv')
}

View File

@@ -10,6 +10,7 @@ import css from '@adobe/css-tools'
import { selectedCharID } from './stores';
import { calcString } from './process/infunctions';
import { findCharacterbyId } from './util';
import { getInlayImage } from './image';
const convertora = new showdown.Converter({
simpleLineBreaks: true,
@@ -93,11 +94,25 @@ async function parseAdditionalAssets(data:string, char:simpleCharacterArgument|c
if(mode === 'back'){
return `<div style="width:100%;height:100%;background: linear-gradient(rgba(0, 0, 0, 0.8), rgba(0, 0, 0, 0.8)),url(${path}); background-size: cover;"></div>`
}
break
}
return ''
})
}
if(db.inlayImage){
const inlayMatch = data.match(/{{inlay::(.+?)}}/g)
if(inlayMatch){
for(const inlay of inlayMatch){
const id = inlay.substring(9, inlay.length - 2)
const img = await getInlayImage(id)
if(img){
data = data.replace(inlay, `<img src="${img.data}"/>`)
}
}
}
}
return data
}

View File

@@ -19,6 +19,7 @@ import { runTrigger, type additonalSysPrompt } from "./triggers";
import { HypaProcesser } from "./memory/hypamemory";
import { additionalInformations } from "./embedding/addinfo";
import { cipherChat, decipherChat } from "./cipherChat";
import { getInlayImage, supportsInlayImage } from "../image";
export interface OpenAIChat{
role: 'system'|'user'|'assistant'|'function'
@@ -33,7 +34,6 @@ export interface OpenAIChatFull extends OpenAIChat{
name: string
arguments:string
}
}
export const doingChat = writable(false)
@@ -464,6 +464,35 @@ export async function sendChat(chatProcessIndex = -1,arg:{chatAdditonalTokens?:n
if(!msg.chatId){
msg.chatId = v4()
}
let inlays:string[] = []
if(db.inlayImage){
const inlayMatch = formedChat.match(/{{inlay::(.+?)}}/g)
if(inlayMatch){
for(const inlay of inlayMatch){
inlays.push(inlay)
}
}
}
if(inlays.length > 0){
for(const inlay of inlays){
const inlayName = inlay.replace('{{inlay::', '').replace('}}', '')
const inlayData = await getInlayImage(inlayName)
if(inlayData){
if(supportsInlayImage()){
const imgchat = {
role: msg.role === 'user' ? 'user' : 'assistant',
content: inlayData.data,
memo: `inlayImage-${inlayData.height}-${inlayData.width}`,
} as const
chats.push(imgchat)
currentTokens += await tokenizer.tokenizeChat(imgchat)
}
}
formedChat = formedChat.replace(inlay, '')
}
}
const chat:OpenAIChat = {
role: msg.role === 'user' ? 'user' : 'assistant',
content: formedChat,
@@ -786,7 +815,6 @@ export async function sendChat(chatProcessIndex = -1,arg:{chatAdditonalTokens?:n
}
}
const req = await requestChatData({
formated: formated,
biasString: biases,

View File

@@ -16,6 +16,8 @@ import { SignatureV4 } from "@smithy/signature-v4";
import { HttpRequest } from "@smithy/protocol-http";
import { Sha256 } from "@aws-crypto/sha256-js";
import { v4 } from "uuid";
import { cloneDeep } from "lodash";
import { supportsInlayImage } from "../image";
@@ -88,13 +90,34 @@ export async function requestChatData(arg:requestDataArgument, model:'model'|'su
}
}
interface OpenAITextContents {
type: 'text'
text: string
}
interface OpenAIImageContents {
type: 'image'
image_url: {
url: string
detail: string
}
}
type OpenAIContents = OpenAITextContents|OpenAIImageContents
export interface OpenAIChatExtra {
role: 'system'|'user'|'assistant'|'function'
content: string|OpenAIContents[]
memo?:string
name?:string
removable?:boolean
}
export async function requestChatDataMain(arg:requestDataArgument, model:'model'|'submodel', abortSignal:AbortSignal=null):Promise<requestDataResponse> {
const db = get(DataBase)
let result = ''
let formated = arg.formated
let formated = cloneDeep(arg.formated)
let maxTokens = arg.maxTokens ??db.maxResponse
let temperature = arg.temperature ?? (db.temperature / 100)
let bias = arg.bias
@@ -125,27 +148,66 @@ export async function requestChatDataMain(arg:requestDataArgument, model:'model'
case 'gpt35_1106':
case 'gpt35_0301':
case 'gpt4_0301':
case 'gptvi4_1106':
case 'openrouter':
case 'reverse_proxy':{
for(let i=0;i<formated.length;i++){
if(formated[i].role !== 'function'){
if(arg.isGroupChat && formated[i].name){
formated[i].content = formated[i].name + ": " + formated[i].content
formated[i].name = undefined
let formatedChat:OpenAIChatExtra[] = []
if(db.inlayImage){
let pendingImages:OpenAIImageContents[] = []
for(let i=0;i<formated.length;i++){
const m = formated[i]
if(m.memo && m.memo.startsWith('inlayImage')){
pendingImages.push({
"type": "image",
"image_url": {
"url": m.content,
"detail": db.gptVisionQuality
}
})
}
if(!(formated[i].name && formated[i].name.startsWith('example_') && db.newOAIHandle)){
formated[i].name = undefined
else{
if(pendingImages.length > 0 && m.role === 'user'){
let v:OpenAIChatExtra = cloneDeep(m)
let contents:OpenAIContents[] = pendingImages
contents.push({
"type": "text",
"text": m.content
})
v.content = contents
formatedChat.push(v)
pendingImages = []
}
else{
formatedChat.push(m)
}
}
if(db.newOAIHandle && formated[i].memo && formated[i].memo.startsWith('NewChat')){
formated[i].content === ''
}
}
else{
formatedChat = formated
}
for(let i=0;i<formatedChat.length;i++){
if(formatedChat[i].role !== 'function'){
if(arg.isGroupChat && formatedChat[i].name){
formatedChat[i].content = formatedChat[i].name + ": " + formatedChat[i].content
formatedChat[i].name = undefined
}
delete formated[i].memo
delete formated[i].removable
if(!(formatedChat[i].name && formatedChat[i].name.startsWith('example_') && db.newOAIHandle)){
formatedChat[i].name = undefined
}
if(db.newOAIHandle && formatedChat[i].memo && formatedChat[i].memo.startsWith('NewChat')){
formatedChat[i].content = ''
}
delete formatedChat[i].memo
delete formatedChat[i].removable
}
}
if(db.newOAIHandle){
formated = formated.filter(m => {
formatedChat = formatedChat.filter(m => {
return m.content !== ''
})
}
@@ -195,6 +257,7 @@ export async function requestChatDataMain(arg:requestDataArgument, model:'model'
}
console.log(bias)
db.cipherChat = false
let body = ({
model: aiModel === 'openrouter' ? db.openrouterRequestModel :
@@ -207,12 +270,13 @@ export async function requestChatDataMain(arg:requestDataArgument, model:'model'
: requestModel === "gpt4_0613" ? 'gpt-4-0613'
: requestModel === "gpt4_32k_0613" ? 'gpt-4-32k-0613'
: requestModel === "gpt4_1106" ? 'gpt-4-1106-preview'
: requestModel === "gptvi4_1106" ? 'gpt-4-vision-preview'
: requestModel === "gpt35_1106" ? 'gpt-3.5-turbo-1106'
: requestModel === 'gpt35_0301' ? 'gpt-3.5-turbo-0301'
: requestModel === 'gpt4_0301' ? 'gpt-4-0301'
: (!requestModel) ? 'gpt-3.5-turbo'
: requestModel,
messages: formated,
messages: formatedChat,
temperature: temperature,
max_tokens: maxTokens,
presence_penalty: arg.PresensePenalty || (db.PresensePenalty / 100),
@@ -226,11 +290,17 @@ export async function requestChatDataMain(arg:requestDataArgument, model:'model'
body.seed = db.generationSeed
}
if(db.newOAIHandle){
if(db.putUserOpen){
// @ts-ignore
body.user = getOpenUserString()
}
if(supportsInlayImage()){
// inlay models doesn't support logit_bias
// @ts-ignore
delete body.logit_bias
}
let replacerURL = aiModel === 'openrouter' ? "https://openrouter.ai/api/v1/chat/completions" :
(aiModel === 'reverse_proxy') ? (db.forceReplaceUrl) : ('https://api.openai.com/v1/chat/completions')

View File

@@ -319,6 +319,7 @@ export function setDatabase(data:Database){
data.customProxyRequestModel ??= ''
data.generationSeed ??= -1
data.newOAIHandle ??= true
data.gptVisionQuality ??= 'low'
changeLanguage(data.language)
DataBase.set(data)
}
@@ -494,6 +495,9 @@ export interface Database{
customProxyRequestModel:string
generationSeed:number
newOAIHandle:boolean
putUserOpen: boolean
inlayImage:boolean
gptVisionQuality:string
}
export interface customscript{

View File

@@ -3,6 +3,7 @@ import type { Tokenizer } from "@mlc-ai/web-tokenizers";
import { DataBase, type character } from "./storage/database";
import { get } from "svelte/store";
import type { OpenAIChat } from "./process";
import { supportsInlayImage } from "./image";
async function encode(data:string):Promise<(number[]|Uint32Array|Int32Array)>{
let db = get(DataBase)
@@ -94,6 +95,46 @@ export class ChatTokenizer {
this.useName = useName
}
async tokenizeChat(data:OpenAIChat) {
if(data.memo && data.memo.startsWith('inlayImage')){
const db = get(DataBase)
if(!supportsInlayImage()){
return this.chatAdditonalTokens
}
if(db.gptVisionQuality === 'low'){
return 87
}
let encoded = this.chatAdditonalTokens
const memo = data.memo.split('-')
let height = parseInt(memo[1])
let width = parseInt(memo[2])
if(height === width){
if(height > 768){
height = 768
width = 768
}
}
else if(height > width){
if(width > 768){
width = 768
height = height * (768 / width)
}
}
else{
if(height > 768){
height = 768
width = width * (768 / height)
}
}
const chunkSize = Math.ceil(width / 512) * Math.ceil(height / 512)
encoded += chunkSize * 2
encoded += 85
return encoded
}
let encoded = (await encode(data.content)).length + this.chatAdditonalTokens
if(data.name && this.useName ==='name'){
encoded += (await encode(data.name)).length + 1