From 51740827962b3bbe416177d815327f687965dc7d Mon Sep 17 00:00:00 2001 From: Kwaroran Date: Thu, 12 Dec 2024 08:38:33 +0900 Subject: [PATCH] Add Gemini related features --- src/lib/ChatScreens/DefaultChatScreen.svelte | 33 ++++++-- src/ts/characterCards.ts | 2 +- src/ts/model/modellist.ts | 5 +- src/ts/parser.svelte.ts | 19 +++-- src/ts/persona.ts | 2 +- src/ts/process/files/{image.ts => inlays.ts} | 82 ++++++++++++++------ src/ts/process/files/multisend.ts | 49 ++++++++++-- src/ts/process/index.svelte.ts | 18 ++++- src/ts/process/inlayScreen.ts | 2 +- src/ts/process/lua.ts | 2 +- src/ts/process/request.ts | 17 +++- src/ts/process/triggers.ts | 2 +- src/ts/tokenizer.ts | 2 +- 13 files changed, 180 insertions(+), 55 deletions(-) rename src/ts/process/files/{image.ts => inlays.ts} (61%) diff --git a/src/lib/ChatScreens/DefaultChatScreen.svelte b/src/lib/ChatScreens/DefaultChatScreen.svelte index 7bdf0b87..ecba3cbc 100644 --- a/src/lib/ChatScreens/DefaultChatScreen.svelte +++ b/src/lib/ChatScreens/DefaultChatScreen.svelte @@ -2,7 +2,7 @@ import Suggestion from './Suggestion.svelte'; import AdvancedChatEditor from './AdvancedChatEditor.svelte'; - import { CameraIcon, DatabaseIcon, DicesIcon, GlobeIcon, ImagePlusIcon, LanguagesIcon, Laugh, MenuIcon, MicOffIcon, PackageIcon, Plus, RefreshCcwIcon, ReplyIcon, Send, StepForwardIcon } from "lucide-svelte"; + import { CameraIcon, DatabaseIcon, DicesIcon, GlobeIcon, ImagePlusIcon, LanguagesIcon, Laugh, MenuIcon, MicOffIcon, PackageIcon, Plus, RefreshCcwIcon, ReplyIcon, Send, StepForwardIcon, XIcon } from "lucide-svelte"; import { selectedCharID, PlaygroundStore, createSimpleCharacter } from "../../ts/stores.svelte"; import Chat from "./Chat.svelte"; import { type Message, type character, type groupChat } from "../../ts/storage/database.svelte"; @@ -25,7 +25,7 @@ import { PreUnreroll, Prereroll } from 'src/ts/process/prereroll'; import { processMultiCommand } from 'src/ts/process/command'; import { postChatFile } from 'src/ts/process/files/multisend'; - import { getInlayImage } from 'src/ts/process/files/image'; + import { getInlayAsset } from 'src/ts/process/files/inlays'; import PlaygroundMenu from '../Playground/PlaygroundMenu.svelte'; import { ConnectionOpenStore } from 'src/ts/sync/multiuser'; @@ -546,8 +546,31 @@ {#if fileInput.length > 0}
{#each fileInput as file, i} - {#await getInlayImage(file) then inlayImage} - Inlay + {#await getInlayAsset(file) then inlayAsset} +
+ {#if inlayAsset.type === 'image'} + Inlay + {:else if inlayAsset.type === 'video'} + + {:else if inlayAsset.type === 'audio'} + + {:else} +
{file}
+ {/if} + +
{/await} {/each}
@@ -741,7 +764,7 @@
{ const res = await postChatFile(messageInput) - if(res?.type === 'image'){ + if(res?.type === 'asset'){ fileInput.push(res.data) updateInputSizeAll() } diff --git a/src/ts/characterCards.ts b/src/ts/characterCards.ts index 2d5b9910..ff1e2892 100644 --- a/src/ts/characterCards.ts +++ b/src/ts/characterCards.ts @@ -9,7 +9,7 @@ import { AppendableBuffer, BlankWriter, checkCharOrder, downloadFile, isNodeServ import { SettingsMenuIndex, ShowRealmFrameStore, selectedCharID, settingsOpen } from "./stores.svelte" import { convertImage, hasher } from "./parser.svelte" import { CCardLib, type CharacterCardV3, type LorebookEntry } from '@risuai/ccardlib' -import { reencodeImage } from "./process/files/image" +import { reencodeImage } from "./process/files/inlays" import { PngChunk } from "./pngChunk" import type { OnnxModelFiles } from "./process/transformers" import { CharXReader, CharXWriter } from "./process/processzip" diff --git a/src/ts/model/modellist.ts b/src/ts/model/modellist.ts index 49711994..d319f127 100644 --- a/src/ts/model/modellist.ts +++ b/src/ts/model/modellist.ts @@ -782,13 +782,14 @@ export const LLMModels: LLMModel[] = [ tokenizer: LLMTokenizer.GoogleCloud }, { - name: "gemini-2.0-flash-exp", + name: "Gemini Flash 2.0 Exp", id: 'gemini-2.0-flash-exp', provider: LLMProvider.GoogleCloud, format: LLMFormat.GoogleCloud, flags: [LLMFlags.hasImageInput, LLMFlags.hasFirstSystemPrompt, LLMFlags.poolSupported, LLMFlags.hasAudioInput, LLMFlags.hasVideoInput], parameters: ['temperature', 'top_k', 'top_p', 'presence_penalty', 'frequency_penalty'], - tokenizer: LLMTokenizer.GoogleCloud + tokenizer: LLMTokenizer.GoogleCloud, + recommended: true }, { name: "Gemini Pro 1.5", diff --git a/src/ts/parser.svelte.ts b/src/ts/parser.svelte.ts index 563737fc..3e19cff7 100644 --- a/src/ts/parser.svelte.ts +++ b/src/ts/parser.svelte.ts @@ -9,7 +9,7 @@ import css, { type CssAtRuleAST } from '@adobe/css-tools' import { SizeStore, selectedCharID } from './stores.svelte'; import { calcString } from './process/infunctions'; import { findCharacterbyId, getPersonaPrompt, getUserIcon, getUserName, parseKeyValue, sfc32, sleep, uuidtoNumber } from './util'; -import { getInlayImage } from './process/files/image'; +import { getInlayAsset } from './process/files/inlays'; import { getModuleAssets, getModuleLorebooks } from './process/modules'; import type { OpenAIChat } from './process/index.svelte'; import hljs from 'highlight.js/lib/core' @@ -428,15 +428,22 @@ function getClosestMatch(name:string, assetPaths:{[key:string]:{path:string, ext return assetPaths[closest] } -async function parseInlayImages(data:string){ +async function parseInlayAssets(data:string){ const inlayMatch = data.match(/{{inlay::(.+?)}}/g) if(inlayMatch){ for(const inlay of inlayMatch){ const id = inlay.substring(9, inlay.length - 2) - const img = await getInlayImage(id) - if(img){ - data = data.replace(inlay, ``) + const asset = await getInlayAsset(id) + if(asset?.type === 'image'){ + data = data.replace(inlay, ``) } + if(asset?.type === 'video'){ + data = data.replace(inlay, ``) + } + if(asset?.type === 'audio'){ + data = data.replace(inlay, ``) + } + } } return data @@ -473,7 +480,7 @@ export async function ParseMarkdown( if(firstParsed !== data && char && char.type !== 'group'){ data = await parseAdditionalAssets(data, char, additionalAssetMode, 'post') } - data = await parseInlayImages(data ?? '') + data = await parseInlayAssets(data ?? '') data = encodeStyle(data) if(mode === 'normal'){ diff --git a/src/ts/persona.ts b/src/ts/persona.ts index 4b5478bc..52771947 100644 --- a/src/ts/persona.ts +++ b/src/ts/persona.ts @@ -4,7 +4,7 @@ import { getUserName, selectSingleFile, sleep } from "./util" import { alertError, alertNormal, alertStore } from "./alert" import { downloadFile, readImage } from "./globalApi.svelte" import { language } from "src/lang" -import { reencodeImage } from "./process/files/image" +import { reencodeImage } from "./process/files/inlays" import { PngChunk } from "./pngChunk" import { v4 } from "uuid" diff --git a/src/ts/process/files/image.ts b/src/ts/process/files/inlays.ts similarity index 61% rename from src/ts/process/files/image.ts rename to src/ts/process/files/inlays.ts index 1441c628..0d08b9cf 100644 --- a/src/ts/process/files/image.ts +++ b/src/ts/process/files/inlays.ts @@ -2,25 +2,73 @@ import localforage from "localforage"; import { v4 } from "uuid"; import { getDatabase } from "../../storage/database.svelte"; import { checkImageType } from "../../parser.svelte"; +import { getModelInfo, LLMFlags } from "src/ts/model/modellist"; + +const inlayImageExts = [ + 'jpg', 'jpeg', 'png', 'gif', 'webp', 'avif' +] + +const inlayAudioExts = [ + 'wav', 'mp3', 'ogg', 'flac' +] + +const inlayVideoExts = [ + 'webm', 'mp4', 'mkv' +] const inlayStorage = localforage.createInstance({ name: 'inlay', storeName: 'inlay' }) -export async function postInlayImage(img:{ +export async function postInlayAsset(img:{ name:string, data:Uint8Array }){ const extention = img.name.split('.').at(-1) const imgObj = new Image() - imgObj.src = URL.createObjectURL(new Blob([img.data], {type: `image/${extention}`})) - return await writeInlayImage(imgObj, { - name: img.name, - ext: extention - }) + if(inlayImageExts.includes(extention)){ + imgObj.src = URL.createObjectURL(new Blob([img.data], {type: `image/${extention}`})) + + return await writeInlayImage(imgObj, { + name: img.name, + ext: extention + }) + } + + if(inlayAudioExts.includes(extention)){ + const b64 = Buffer.from(img.data).toString('base64') + const dataURI = `data:audio/${extention};base64,${b64}` + const imgid = v4() + + await inlayStorage.setItem(imgid, { + name: img.name, + data: dataURI, + ext: extention, + type: 'audio' + }) + + return `${imgid}` + } + + if(inlayVideoExts.includes(extention)){ + const b64 = Buffer.from(img.data).toString('base64') + const dataURI = `data:video/${extention};base64,${b64}` + const imgid = v4() + + await inlayStorage.setItem(imgid, { + name: img.name, + data: dataURI, + ext: extention, + type: 'video' + }) + + return `${imgid}` + } + + return null } export async function writeInlayImage(imgObj:HTMLImageElement, arg:{name?:string, ext?:string} = {}) { @@ -60,21 +108,23 @@ export async function writeInlayImage(imgObj:HTMLImageElement, arg:{name?:string await inlayStorage.setItem(imgid, { name: arg.name ?? imgid, data: dataURI, - ext: arg.ext ?? 'png', + ext: 'png', height: drawHeight, - width: drawWidth + width: drawWidth, + type: 'image' }) return `${imgid}` } -export async function getInlayImage(id: string){ +export async function getInlayAsset(id: string){ const img:{ name: string, data: string ext: string height: number width: number + type: 'image'|'video'|'audio' } = await inlayStorage.getItem(id) if(img === null){ return null @@ -84,19 +134,7 @@ export async function getInlayImage(id: string){ export function supportsInlayImage(){ const db = getDatabase() - return db.aiModel.startsWith('gptv') || db.aiModel === 'gemini-pro-vision' || db.aiModel.startsWith('gemini-exp') || db.aiModel.startsWith('claude-3') || db.aiModel.startsWith('gpt4_turbo') || db.aiModel.startsWith('gpt5') || db.aiModel.startsWith('gpt4o') || - (db.aiModel === 'reverse_proxy' && ( - db.proxyRequestModel?.startsWith('gptv') || db.proxyRequestModel === 'gemini-pro-vision' || db.proxyRequestModel?.startsWith('claude-3') || db.proxyRequestModel.startsWith('gpt4_turbo') || - db.proxyRequestModel?.startsWith('gpt5') || db.proxyRequestModel?.startsWith('gpt4o') || - db.proxyRequestModel === 'custom' && ( - db.customProxyRequestModel?.startsWith('gptv') || - db.customProxyRequestModel === 'gemini-pro-vision' || - db.customProxyRequestModel?.startsWith('claude-3') || - db.customProxyRequestModel.startsWith('gpt-4-turbo') || - db.customProxyRequestModel?.startsWith('gpt5') || - db.customProxyRequestModel?.startsWith('gpt4o') - ) - )) + return getModelInfo(db.aiModel).flags.includes(LLMFlags.hasImageInput) } export async function reencodeImage(img:Uint8Array){ diff --git a/src/ts/process/files/multisend.ts b/src/ts/process/files/multisend.ts index 65bd95ba..e79d5033 100644 --- a/src/ts/process/files/multisend.ts +++ b/src/ts/process/files/multisend.ts @@ -5,7 +5,7 @@ import { doingChat, sendChat } from '../index.svelte'; import { downloadFile, isTauri } from 'src/ts/globalApi.svelte'; import { HypaProcesser } from '../memory/hypamemory'; import { BufferToText as BufferToText, selectSingleFile, sleep } from 'src/ts/util'; -import { postInlayImage } from './image'; +import { postInlayAsset } from './inlays'; type sendFileArg = { file:string @@ -178,11 +178,11 @@ async function sendXMLFile(arg:sendFileArg) { return Buffer.from(`\n${message}\n\n`).toString('base64') } -type postFileResult = postFileResultImage | postFileResultVoid | postFileResultText +type postFileResult = postFileResultAsset | postFileResultVoid | postFileResultText -type postFileResultImage = { +type postFileResultAsset = { data: string, - type: 'image', + type: 'asset', } type postFileResultVoid = { @@ -201,6 +201,22 @@ export async function postChatFile(query:string):Promise{ 'jpeg', 'png', 'webp', + 'gif', + 'avif', + + //audio format + 'wav', + 'mp3', + 'ogg', + 'flac', + + //video format + 'mp4', + 'webm', + 'mpeg', + 'avi', + + //other format 'po', // 'pdf', 'txt' @@ -243,14 +259,33 @@ export async function postChatFile(query:string):Promise{ name: file.name } } + + //image format case 'jpg': case 'jpeg': case 'png': - case 'webp':{ - const postData = await postInlayImage(file) + case 'webp': + case 'gif': + case 'avif': + + //audio format + case 'wav': + case 'mp3': + case 'ogg': + case 'flac': + + //video format + case 'mp4': + case 'webm': + case 'mpeg': + case 'avi':{ + const postData = await postInlayAsset(file) + if(!postData){ + return null + } return { data: postData, - type: 'image' + type: 'asset' } } case 'txt':{ diff --git a/src/ts/process/index.svelte.ts b/src/ts/process/index.svelte.ts index d8b622e7..b3e33813 100644 --- a/src/ts/process/index.svelte.ts +++ b/src/ts/process/index.svelte.ts @@ -18,7 +18,7 @@ import { groupOrder } from "./group"; import { runTrigger } from "./triggers"; import { HypaProcesser } from "./memory/hypamemory"; import { additionalInformations } from "./embedding/addinfo"; -import { getInlayImage, supportsInlayImage } from "./files/image"; +import { getInlayAsset, supportsInlayImage } from "./files/inlays"; import { getGenerationModelString } from "./models/modelString"; import { connectionOpen, peerRevertChat, peerSafeCheck, peerSync } from "../sync/multiuser"; import { runInlayScreen } from "./inlayScreen"; @@ -29,6 +29,7 @@ import { hanuraiMemory } from "./memory/hanuraiMemory"; import { hypaMemoryV2 } from "./memory/hypav2"; import { runLuaEditTrigger } from "./lua"; import { parseChatML } from "../parser.svelte"; +import { getModelInfo, LLMFlags } from "../model/modellist"; export interface OpenAIChat{ role: 'system'|'user'|'assistant'|'function' @@ -699,12 +700,13 @@ export async function sendChat(chatProcessIndex = -1,arg:{ } let multimodal:MultiModal[] = [] + const modelinfo = getModelInfo(DBState.db.aiModel) if(inlays.length > 0){ for(const inlay of inlays){ const inlayName = inlay.replace('{{inlay::', '').replace('}}', '') - const inlayData = await getInlayImage(inlayName) - if(inlayData){ - if(supportsInlayImage()){ + const inlayData = await getInlayAsset(inlayName) + if(inlayData?.type === 'image'){ + if(modelinfo.flags.includes(LLMFlags.hasImageInput)){ multimodal.push({ type: 'image', base64: inlayData.data, @@ -717,6 +719,14 @@ export async function sendChat(chatProcessIndex = -1,arg:{ formatedChat += `[${captionResult[0].generated_text}]` } } + if(inlayData?.type === 'video' || inlayData?.type === 'audio'){ + if(multimodal.length === 0){ + multimodal.push({ + type: inlayData.type, + base64: inlayData.data + }) + } + } formatedChat = formatedChat.replace(inlay, '') } } diff --git a/src/ts/process/inlayScreen.ts b/src/ts/process/inlayScreen.ts index 5e7cbaaf..9e945171 100644 --- a/src/ts/process/inlayScreen.ts +++ b/src/ts/process/inlayScreen.ts @@ -1,4 +1,4 @@ -import { writeInlayImage } from "./files/image"; +import { writeInlayImage } from "./files/inlays"; import type { character } from "../storage/database.svelte"; import { generateAIImage } from "./stableDiff"; diff --git a/src/ts/process/lua.ts b/src/ts/process/lua.ts index bdd4e0fb..3571860e 100644 --- a/src/ts/process/lua.ts +++ b/src/ts/process/lua.ts @@ -6,7 +6,7 @@ import { ReloadGUIPointer, selectedCharID } from "../stores.svelte"; import { alertError, alertInput, alertNormal } from "../alert"; import { HypaProcesser } from "./memory/hypamemory"; import { generateAIImage } from "./stableDiff"; -import { writeInlayImage } from "./files/image"; +import { writeInlayImage } from "./files/inlays"; import type { OpenAIChat } from "./index.svelte"; import { requestChatData } from "./request"; import { v4 } from "uuid"; diff --git a/src/ts/process/request.ts b/src/ts/process/request.ts index eccdb823..f577f406 100644 --- a/src/ts/process/request.ts +++ b/src/ts/process/request.ts @@ -11,7 +11,7 @@ import { risuChatParser } from "../parser.svelte"; import { SignatureV4 } from "@smithy/signature-v4"; import { HttpRequest } from "@smithy/protocol-http"; import { Sha256 } from "@aws-crypto/sha256-js"; -import { supportsInlayImage } from "./files/image"; +import { supportsInlayImage } from "./files/inlays"; import { Capacitor } from "@capacitor/core"; import { getFreeOpenRouterModel } from "../model/openrouter"; import { runTransformers } from "./transformers"; @@ -95,7 +95,9 @@ type ParameterMap = { [key in Parameter]?: string; }; -function applyParameters(data: { [key: string]: any }, parameters: Parameter[], rename: ParameterMap, ModelMode:ModelModeExtended): { [key: string]: any } { +function applyParameters(data: { [key: string]: any }, parameters: Parameter[], rename: ParameterMap, ModelMode:ModelModeExtended, arg:{ + ignoreTopKIfOne?:boolean +} = {}): { [key: string]: any } { const db = getDatabase() if(db.seperateParametersEnabled && ModelMode !== 'model'){ if(ModelMode === 'submodel'){ @@ -103,6 +105,10 @@ function applyParameters(data: { [key: string]: any }, parameters: Parameter[], } for(const parameter of parameters){ + if(parameter === 'top_k' && arg.ignoreTopKIfOne && db.seperateParameters[ModelMode][parameter] === 1){ + continue + } + let value = db.seperateParameters[ModelMode][parameter] if(value === -1000 || value === undefined){ @@ -117,6 +123,9 @@ function applyParameters(data: { [key: string]: any }, parameters: Parameter[], for(const parameter of parameters){ let value = 0 + if(parameter === 'top_k' && arg.ignoreTopKIfOne && db.top_k === 1){ + value = 0 + } switch(parameter){ case 'temperature':{ value = db.temperature === -1000 ? -1000 : (db.temperature / 100) @@ -1495,7 +1504,9 @@ async function requestGoogleCloudVertex(arg:RequestDataArgumentExtended):Promise 'top_k': "topK", 'presence_penalty': "presencePenalty", 'frequency_penalty': "frequencyPenalty" - }, arg.mode), + }, arg.mode, { + ignoreTopKIfOne: true + }), safetySettings: uncensoredCatagory, systemInstruction: { parts: [ diff --git a/src/ts/process/triggers.ts b/src/ts/process/triggers.ts index e7833d10..ac4d24a8 100644 --- a/src/ts/process/triggers.ts +++ b/src/ts/process/triggers.ts @@ -11,7 +11,7 @@ import type { OpenAIChat } from "./index.svelte"; import { HypaProcesser } from "./memory/hypamemory"; import { requestChatData } from "./request"; import { generateAIImage } from "./stableDiff"; -import { writeInlayImage } from "./files/image"; +import { writeInlayImage } from "./files/inlays"; import { runLua } from "./lua"; diff --git a/src/ts/tokenizer.ts b/src/ts/tokenizer.ts index a3e8f4b8..4565454f 100644 --- a/src/ts/tokenizer.ts +++ b/src/ts/tokenizer.ts @@ -2,7 +2,7 @@ import type { Tiktoken } from "@dqbd/tiktoken"; import type { Tokenizer } from "@mlc-ai/web-tokenizers"; import { type groupChat, type character, type Chat, getCurrentCharacter, getDatabase } from "./storage/database.svelte"; import type { MultiModal, OpenAIChat } from "./process/index.svelte"; -import { supportsInlayImage } from "./process/files/image"; +import { supportsInlayImage } from "./process/files/inlays"; import { risuChatParser } from "./parser.svelte"; import { tokenizeGGUFModel } from "./process/models/local"; import { globalFetch } from "./globalApi.svelte";