Add gemini image response
This commit is contained in:
@@ -1072,4 +1072,5 @@ export const languageEnglish = {
|
||||
automaticCachePoint: "Automatic Cache Point",
|
||||
experimentalChatCompression: "Experimental Chat Data Handling",
|
||||
loadingChatData: "Loading Chat Data",
|
||||
outputImageModal: "Output Image Modal",
|
||||
}
|
||||
|
||||
@@ -126,6 +126,8 @@
|
||||
<Check bind:check={DBState.db.promptSettings.sendName} name={language.formatGroupInSingle} className="mt-4"/>
|
||||
<Check bind:check={DBState.db.promptSettings.utilOverride} name={language.utilOverride} className="mt-4"/>
|
||||
<Check bind:check={DBState.db.jsonSchemaEnabled} name={language.enableJsonSchema} className="mt-4"/>
|
||||
<Check bind:check={DBState.db.outputImageModal} name={language.outputImageModal} className="mt-4"/>
|
||||
|
||||
<Check bind:check={DBState.db.strictJsonSchema} name={language.strictJsonSchema} className="mt-4"/>
|
||||
|
||||
{#if DBState.db.showUnrecommended}
|
||||
|
||||
@@ -81,6 +81,14 @@
|
||||
md += `> ${modals.length} non-text content(s) included\n`
|
||||
}
|
||||
|
||||
if(formated[i].thoughts && formated[i].thoughts.length > 0){
|
||||
md += `> ${formated[i].thoughts.length} thought(s) included\n`
|
||||
}
|
||||
|
||||
if(formated[i].cachePoint){
|
||||
md += `> Cache point\n`
|
||||
}
|
||||
|
||||
md += '```\n' + formated[i].content.replaceAll('```', '\\`\\`\\`') + '\n```\n'
|
||||
}
|
||||
$doingChat = false
|
||||
|
||||
@@ -969,7 +969,7 @@ export const LLMModels: LLMModel[] = [
|
||||
id: 'gemini-2.0-flash-exp',
|
||||
provider: LLMProvider.GoogleCloud,
|
||||
format: LLMFormat.GoogleCloud,
|
||||
flags: [LLMFlags.geminiBlockOff,LLMFlags.hasImageInput, LLMFlags.hasFirstSystemPrompt, LLMFlags.poolSupported, LLMFlags.hasAudioInput, LLMFlags.hasVideoInput, LLMFlags.hasStreaming, LLMFlags.requiresAlternateRole],
|
||||
flags: [LLMFlags.geminiBlockOff,LLMFlags.hasImageInput, LLMFlags.hasImageOutput, LLMFlags.poolSupported, LLMFlags.hasAudioInput, LLMFlags.hasVideoInput, LLMFlags.hasStreaming, LLMFlags.requiresAlternateRole],
|
||||
parameters: ['temperature', 'top_k', 'top_p', 'presence_penalty', 'frequency_penalty'],
|
||||
tokenizer: LLMTokenizer.GoogleCloud,
|
||||
},
|
||||
|
||||
@@ -495,14 +495,14 @@ function trimmer(str:string){
|
||||
}
|
||||
|
||||
async function parseInlayAssets(data:string){
|
||||
const inlayMatch = data.match(/{{(inlay|inlayed)::(.+?)}}/g)
|
||||
const inlayMatch = data.match(/{{(inlay|inlayed|inlayeddata)::(.+?)}}/g)
|
||||
if(inlayMatch){
|
||||
for(const inlay of inlayMatch){
|
||||
const inlayType = inlay.startsWith('{{inlayed') ? 'inlayed' : 'inlay'
|
||||
const id = inlay.substring(inlay.indexOf('::') + 2, inlay.length - 2)
|
||||
const asset = await getInlayAsset(id)
|
||||
let prefix = inlayType === 'inlayed' ? `<div class="risu-inlay-image">` : ''
|
||||
let postfix = inlayType === 'inlayed' ? `</div>\n\n` : ''
|
||||
let prefix = inlayType !== 'inlay' ? `<div class="risu-inlay-image">` : ''
|
||||
let postfix = inlayType !== 'inlay' ? `</div>\n\n` : ''
|
||||
switch(asset?.type){
|
||||
case 'image':
|
||||
data = data.replace(inlay, `${prefix}<img src="${asset.data}"/>${postfix}`)
|
||||
|
||||
@@ -71,7 +71,7 @@ export async function postInlayAsset(img:{
|
||||
return null
|
||||
}
|
||||
|
||||
export async function writeInlayImage(imgObj:HTMLImageElement, arg:{name?:string, ext?:string} = {}) {
|
||||
export async function writeInlayImage(imgObj:HTMLImageElement, arg:{name?:string, ext?:string, id?:string} = {}) {
|
||||
|
||||
let drawHeight = 0
|
||||
let drawWidth = 0
|
||||
@@ -103,7 +103,7 @@ export async function writeInlayImage(imgObj:HTMLImageElement, arg:{name?:string
|
||||
const dataURI = canvas.toDataURL('image/png')
|
||||
|
||||
|
||||
const imgid = v4()
|
||||
const imgid = arg.id ?? v4()
|
||||
|
||||
await inlayStorage.setItem(imgid, {
|
||||
name: arg.name ?? imgid,
|
||||
@@ -132,6 +132,17 @@ export async function getInlayAsset(id: string){
|
||||
return img
|
||||
}
|
||||
|
||||
export async function setInlayAsset(id: string, img:{
|
||||
name: string,
|
||||
data: string,
|
||||
ext: string,
|
||||
height: number,
|
||||
width: number,
|
||||
type: 'image'|'video'|'audio'
|
||||
}){
|
||||
await inlayStorage.setItem(id, img)
|
||||
}
|
||||
|
||||
export function supportsInlayImage(){
|
||||
const db = getDatabase()
|
||||
return getModelInfo(db.aiModel).flags.includes(LLMFlags.hasImageInput)
|
||||
|
||||
@@ -724,10 +724,19 @@ export async function sendChat(chatProcessIndex = -1,arg:{
|
||||
}
|
||||
let inlays:string[] = []
|
||||
if(msg.role === 'char'){
|
||||
formatedChat = formatedChat.replace(/{{(inlay|inlayed)::(.+?)}}/g, '')
|
||||
formatedChat = formatedChat.replace(/{{(inlay|inlayed|inlayeddata)::(.+?)}}/g, (
|
||||
match: string,
|
||||
p1: string,
|
||||
p2: string
|
||||
) => {
|
||||
if(p2 && p1 === 'inlayeddata'){
|
||||
inlays.push(p2)
|
||||
}
|
||||
return ''
|
||||
})
|
||||
}
|
||||
else{
|
||||
const inlayMatch = formatedChat.match(/{{(inlay|inlayed)::(.+?)}}/g)
|
||||
const inlayMatch = formatedChat.match(/{{(inlay|inlayed|inlayeddata)::(.+?)}}/g)
|
||||
if(inlayMatch){
|
||||
for(const inlay of inlayMatch){
|
||||
inlays.push(inlay)
|
||||
@@ -1293,7 +1302,8 @@ export async function sendChat(chatProcessIndex = -1,arg:{
|
||||
isGroupChat: nowChatroom.type === 'group',
|
||||
bias: {},
|
||||
continue: arg.continue,
|
||||
chatId: generationId
|
||||
chatId: generationId,
|
||||
imageResponse: DBState.db.outputImageModal
|
||||
}, 'model', abortSignal)
|
||||
|
||||
let result = ''
|
||||
|
||||
@@ -11,7 +11,7 @@ import { risuChatParser } from "../parser.svelte";
|
||||
import { SignatureV4 } from "@smithy/signature-v4";
|
||||
import { HttpRequest } from "@smithy/protocol-http";
|
||||
import { Sha256 } from "@aws-crypto/sha256-js";
|
||||
import { supportsInlayImage } from "./files/inlays";
|
||||
import { supportsInlayImage, writeInlayImage } from "./files/inlays";
|
||||
import { Capacitor } from "@capacitor/core";
|
||||
import { getFreeOpenRouterModel } from "../model/openrouter";
|
||||
import { runTransformers } from "./transformers";
|
||||
@@ -42,6 +42,7 @@ interface requestDataArgument{
|
||||
noMultiGen?:boolean
|
||||
schema?:string
|
||||
extractJson?:string
|
||||
imageResponse?:boolean
|
||||
}
|
||||
|
||||
interface RequestDataArgumentExtended extends requestDataArgument{
|
||||
@@ -374,13 +375,15 @@ export interface OpenAIChatExtra {
|
||||
cachePoint?:boolean
|
||||
}
|
||||
|
||||
function reformater(formated:OpenAIChat[],modelInfo:LLMModel){
|
||||
export function reformater(formated:OpenAIChat[],modelInfo:LLMModel|LLMFlags[]){
|
||||
|
||||
const flags = Array.isArray(modelInfo) ? modelInfo : modelInfo.flags
|
||||
|
||||
const db = getDatabase()
|
||||
let systemPrompt:OpenAIChat|null = null
|
||||
|
||||
if(!modelInfo.flags.includes(LLMFlags.hasFullSystemPrompt)){
|
||||
if(modelInfo.flags.includes(LLMFlags.hasFirstSystemPrompt)){
|
||||
if(!flags.includes(LLMFlags.hasFullSystemPrompt)){
|
||||
if(flags.includes(LLMFlags.hasFirstSystemPrompt)){
|
||||
while(formated[0].role === 'system'){
|
||||
if(systemPrompt){
|
||||
systemPrompt.content += '\n\n' + formated[0].content
|
||||
@@ -400,7 +403,7 @@ function reformater(formated:OpenAIChat[],modelInfo:LLMModel){
|
||||
}
|
||||
}
|
||||
|
||||
if(modelInfo.flags.includes(LLMFlags.requiresAlternateRole)){
|
||||
if(flags.includes(LLMFlags.requiresAlternateRole)){
|
||||
let newFormated:OpenAIChat[] = []
|
||||
for(let i=0;i<formated.length;i++){
|
||||
const m = formated[i]
|
||||
@@ -427,6 +430,12 @@ function reformater(formated:OpenAIChat[],modelInfo:LLMModel){
|
||||
newFormated[newFormated.length-1].thoughts.push(...m.thoughts)
|
||||
}
|
||||
|
||||
if(m.cachePoint){
|
||||
if(!newFormated[newFormated.length-1].cachePoint){
|
||||
newFormated[newFormated.length-1].cachePoint = true
|
||||
}
|
||||
}
|
||||
|
||||
continue
|
||||
}
|
||||
else{
|
||||
@@ -436,7 +445,7 @@ function reformater(formated:OpenAIChat[],modelInfo:LLMModel){
|
||||
formated = newFormated
|
||||
}
|
||||
|
||||
if(modelInfo.flags.includes(LLMFlags.mustStartWithUserInput)){
|
||||
if(flags.includes(LLMFlags.mustStartWithUserInput)){
|
||||
if(formated.length === 0 || formated[0].role !== 'user'){
|
||||
formated.unshift({
|
||||
role: 'user',
|
||||
@@ -1804,7 +1813,7 @@ async function requestGoogleCloudVertex(arg:RequestDataArgumentExtended):Promise
|
||||
const body = {
|
||||
contents: reformatedChat,
|
||||
generation_config: applyParameters({
|
||||
"maxOutputTokens": maxTokens,
|
||||
"maxOutputTokens": maxTokens
|
||||
}, para, {
|
||||
'top_p': "topP",
|
||||
'top_k': "topK",
|
||||
@@ -1823,6 +1832,16 @@ async function requestGoogleCloudVertex(arg:RequestDataArgumentExtended):Promise
|
||||
},
|
||||
}
|
||||
|
||||
if(systemPrompt === ''){
|
||||
delete body.systemInstruction
|
||||
}
|
||||
|
||||
if(!arg.imageResponse){
|
||||
body.generation_config.responseModalities = [
|
||||
'TEXT', 'IMAGE'
|
||||
]
|
||||
}
|
||||
|
||||
let headers:{[key:string]:string} = {}
|
||||
|
||||
const PROJECT_ID=db.google.projectId
|
||||
@@ -2060,7 +2079,14 @@ async function requestGoogleCloudVertex(arg:RequestDataArgumentExtended):Promise
|
||||
rDatas.push('')
|
||||
}
|
||||
|
||||
rDatas[rDatas.length-1] += part.text
|
||||
rDatas[rDatas.length-1] += part.text ?? ''
|
||||
if(part.inlineData){
|
||||
const imgHTML = new Image()
|
||||
const id = crypto.randomUUID()
|
||||
imgHTML.src = `data:${part.inlineData.mimeType};base64,${part.inlineData.data}`
|
||||
writeInlayImage(imgHTML)
|
||||
rDatas[rDatas.length-1] += (`\n{{inlayeddata::${id}}}\n`)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -2072,9 +2098,15 @@ async function requestGoogleCloudVertex(arg:RequestDataArgumentExtended):Promise
|
||||
}
|
||||
|
||||
if(rDatas.length > 1){
|
||||
const thought = rDatas.splice(rDatas.length-2, 1)[0]
|
||||
rDatas[rDatas.length-1] = `<Thoughts>${thought}</Thoughts>\n\n${rDatas.join('\n\n')}`
|
||||
if(arg.modelInfo.flags.includes(LLMFlags.geminiThinking)){
|
||||
const thought = rDatas.splice(rDatas.length-2, 1)[0]
|
||||
rDatas[rDatas.length-1] = `<Thoughts>${thought}</Thoughts>\n\n${rDatas.join('\n\n')}`
|
||||
}
|
||||
else{
|
||||
rDatas[rDatas.length-1] = rDatas.join('\n\n')
|
||||
}
|
||||
}
|
||||
|
||||
control.enqueue({
|
||||
'0': rDatas[rDatas.length-1],
|
||||
})
|
||||
@@ -2110,7 +2142,7 @@ async function requestGoogleCloudVertex(arg:RequestDataArgumentExtended):Promise
|
||||
}
|
||||
|
||||
let rDatas:string[] = ['']
|
||||
const processDataItem = (data:any) => {
|
||||
const processDataItem = async (data:any) => {
|
||||
const parts = data?.candidates?.[0]?.content?.parts
|
||||
if(parts){
|
||||
|
||||
@@ -2120,7 +2152,21 @@ async function requestGoogleCloudVertex(arg:RequestDataArgumentExtended):Promise
|
||||
rDatas.push('')
|
||||
}
|
||||
|
||||
rDatas[rDatas.length-1] += part.text
|
||||
rDatas[rDatas.length-1] += part.text ?? ''
|
||||
if(part.inlineData){
|
||||
const imgHTML = new Image()
|
||||
const id = crypto.randomUUID()
|
||||
imgHTML.src = `data:${part.inlineData.mimeType};base64,${part.inlineData.data}`
|
||||
console.log('decoding', part.inlineData.mimeType, part.inlineData.data, id)
|
||||
console.log('writing')
|
||||
await writeInlayImage(imgHTML, {
|
||||
id: id
|
||||
})
|
||||
console.log(JSON.stringify(rDatas))
|
||||
rDatas[rDatas.length-1] += (`\n{{inlayeddata::${id}}}\n`)
|
||||
console.log(JSON.stringify(rDatas))
|
||||
console.log('done', id)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -2141,10 +2187,10 @@ async function requestGoogleCloudVertex(arg:RequestDataArgumentExtended):Promise
|
||||
// traverse responded data if it contains multipart contents
|
||||
if (typeof (res.data)[Symbol.iterator] === 'function') {
|
||||
for(const data of res.data){
|
||||
processDataItem(data)
|
||||
await processDataItem(data)
|
||||
}
|
||||
} else {
|
||||
processDataItem(res.data)
|
||||
await processDataItem(res.data)
|
||||
}
|
||||
|
||||
if(arg.extractJson && (db.jsonSchemaEnabled || arg.schema)){
|
||||
@@ -2154,10 +2200,13 @@ async function requestGoogleCloudVertex(arg:RequestDataArgumentExtended):Promise
|
||||
}
|
||||
}
|
||||
|
||||
if(rDatas.length > 1){
|
||||
if(rDatas.length > 1 && arg.modelInfo.flags.includes(LLMFlags.geminiThinking)){
|
||||
const thought = rDatas.splice(rDatas.length-2, 1)[0]
|
||||
rDatas[rDatas.length-1] = `<Thoughts>${thought}</Thoughts>\n\n${rDatas.join('\n\n')}`
|
||||
}
|
||||
else if(rDatas.length > 1){
|
||||
rDatas[rDatas.length-1] = rDatas.join('\n\n')
|
||||
}
|
||||
|
||||
return {
|
||||
type: 'success',
|
||||
|
||||
@@ -3,7 +3,7 @@ import { getDatabase, type character } from "../storage/database.svelte"
|
||||
import { requestChatData } from "./request"
|
||||
import { alertError } from "../alert"
|
||||
import { fetchNative, globalFetch, readImage } from "../globalApi.svelte"
|
||||
import { CharEmotion } from "../stores.svelte"
|
||||
import { CharEmotion, DBState } from "../stores.svelte"
|
||||
import type { OpenAIChat } from "./index.svelte"
|
||||
import { processZip } from "./processzip"
|
||||
import { keiServerURL } from "../kei/kei"
|
||||
|
||||
@@ -928,6 +928,8 @@ export interface Database{
|
||||
automaticCachePoint: boolean
|
||||
chatCompression: boolean
|
||||
claudeRetrivalCaching: boolean
|
||||
outputImageModal: boolean
|
||||
|
||||
}
|
||||
|
||||
interface SeparateParameters{
|
||||
@@ -941,8 +943,11 @@ interface SeparateParameters{
|
||||
presence_penalty?:number
|
||||
reasoning_effort?:number
|
||||
thinking_tokens?:number
|
||||
outputImageModal?:boolean
|
||||
}
|
||||
|
||||
type OutputModal = 'image'|'audio'|'video'
|
||||
|
||||
export interface customscript{
|
||||
comment: string;
|
||||
in:string
|
||||
@@ -1258,6 +1263,7 @@ export interface botPreset{
|
||||
regex?:customscript[]
|
||||
reasonEffort?:number
|
||||
thinkingTokens?:number
|
||||
outputImageModal?:boolean
|
||||
}
|
||||
|
||||
|
||||
@@ -1574,6 +1580,7 @@ export function saveCurrentPreset(){
|
||||
image: pres?.[db.botPresetsId]?.image ?? '',
|
||||
reasonEffort: db.reasoningEffort ?? 0,
|
||||
thinkingTokens: db.thinkingTokens ?? null,
|
||||
outputImageModal: db.outputImageModal ?? false
|
||||
}
|
||||
db.botPresets = pres
|
||||
setDatabase(db)
|
||||
@@ -1685,6 +1692,7 @@ export function setPreset(db:Database, newPres: botPreset){
|
||||
db.presetRegex = newPres.regex ?? []
|
||||
db.reasoningEffort = newPres.reasonEffort ?? 0
|
||||
db.thinkingTokens = newPres.thinkingTokens ?? null
|
||||
db.outputImageModal = newPres.outputImageModal ?? false
|
||||
return db
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user