diff --git a/src/lib/ChatScreens/DefaultChatScreen.svelte b/src/lib/ChatScreens/DefaultChatScreen.svelte
index 7bdf0b87..ecba3cbc 100644
--- a/src/lib/ChatScreens/DefaultChatScreen.svelte
+++ b/src/lib/ChatScreens/DefaultChatScreen.svelte
@@ -2,7 +2,7 @@
import Suggestion from './Suggestion.svelte';
import AdvancedChatEditor from './AdvancedChatEditor.svelte';
- import { CameraIcon, DatabaseIcon, DicesIcon, GlobeIcon, ImagePlusIcon, LanguagesIcon, Laugh, MenuIcon, MicOffIcon, PackageIcon, Plus, RefreshCcwIcon, ReplyIcon, Send, StepForwardIcon } from "lucide-svelte";
+ import { CameraIcon, DatabaseIcon, DicesIcon, GlobeIcon, ImagePlusIcon, LanguagesIcon, Laugh, MenuIcon, MicOffIcon, PackageIcon, Plus, RefreshCcwIcon, ReplyIcon, Send, StepForwardIcon, XIcon } from "lucide-svelte";
import { selectedCharID, PlaygroundStore, createSimpleCharacter } from "../../ts/stores.svelte";
import Chat from "./Chat.svelte";
import { type Message, type character, type groupChat } from "../../ts/storage/database.svelte";
@@ -25,7 +25,7 @@
import { PreUnreroll, Prereroll } from 'src/ts/process/prereroll';
import { processMultiCommand } from 'src/ts/process/command';
import { postChatFile } from 'src/ts/process/files/multisend';
- import { getInlayImage } from 'src/ts/process/files/image';
+ import { getInlayAsset } from 'src/ts/process/files/inlays';
import PlaygroundMenu from '../Playground/PlaygroundMenu.svelte';
import { ConnectionOpenStore } from 'src/ts/sync/multiuser';
@@ -546,8 +546,31 @@
{#if fileInput.length > 0}
{
const res = await postChatFile(messageInput)
- if(res?.type === 'image'){
+ if(res?.type === 'asset'){
fileInput.push(res.data)
updateInputSizeAll()
}
diff --git a/src/ts/characterCards.ts b/src/ts/characterCards.ts
index 2d5b9910..ff1e2892 100644
--- a/src/ts/characterCards.ts
+++ b/src/ts/characterCards.ts
@@ -9,7 +9,7 @@ import { AppendableBuffer, BlankWriter, checkCharOrder, downloadFile, isNodeServ
import { SettingsMenuIndex, ShowRealmFrameStore, selectedCharID, settingsOpen } from "./stores.svelte"
import { convertImage, hasher } from "./parser.svelte"
import { CCardLib, type CharacterCardV3, type LorebookEntry } from '@risuai/ccardlib'
-import { reencodeImage } from "./process/files/image"
+import { reencodeImage } from "./process/files/inlays"
import { PngChunk } from "./pngChunk"
import type { OnnxModelFiles } from "./process/transformers"
import { CharXReader, CharXWriter } from "./process/processzip"
diff --git a/src/ts/model/modellist.ts b/src/ts/model/modellist.ts
index 49711994..d319f127 100644
--- a/src/ts/model/modellist.ts
+++ b/src/ts/model/modellist.ts
@@ -782,13 +782,14 @@ export const LLMModels: LLMModel[] = [
tokenizer: LLMTokenizer.GoogleCloud
},
{
- name: "gemini-2.0-flash-exp",
+ name: "Gemini Flash 2.0 Exp",
id: 'gemini-2.0-flash-exp',
provider: LLMProvider.GoogleCloud,
format: LLMFormat.GoogleCloud,
flags: [LLMFlags.hasImageInput, LLMFlags.hasFirstSystemPrompt, LLMFlags.poolSupported, LLMFlags.hasAudioInput, LLMFlags.hasVideoInput],
parameters: ['temperature', 'top_k', 'top_p', 'presence_penalty', 'frequency_penalty'],
- tokenizer: LLMTokenizer.GoogleCloud
+ tokenizer: LLMTokenizer.GoogleCloud,
+ recommended: true
},
{
name: "Gemini Pro 1.5",
diff --git a/src/ts/parser.svelte.ts b/src/ts/parser.svelte.ts
index 563737fc..3e19cff7 100644
--- a/src/ts/parser.svelte.ts
+++ b/src/ts/parser.svelte.ts
@@ -9,7 +9,7 @@ import css, { type CssAtRuleAST } from '@adobe/css-tools'
import { SizeStore, selectedCharID } from './stores.svelte';
import { calcString } from './process/infunctions';
import { findCharacterbyId, getPersonaPrompt, getUserIcon, getUserName, parseKeyValue, sfc32, sleep, uuidtoNumber } from './util';
-import { getInlayImage } from './process/files/image';
+import { getInlayAsset } from './process/files/inlays';
import { getModuleAssets, getModuleLorebooks } from './process/modules';
import type { OpenAIChat } from './process/index.svelte';
import hljs from 'highlight.js/lib/core'
@@ -428,15 +428,22 @@ function getClosestMatch(name:string, assetPaths:{[key:string]:{path:string, ext
return assetPaths[closest]
}
-async function parseInlayImages(data:string){
+async function parseInlayAssets(data:string){
const inlayMatch = data.match(/{{inlay::(.+?)}}/g)
if(inlayMatch){
for(const inlay of inlayMatch){
const id = inlay.substring(9, inlay.length - 2)
- const img = await getInlayImage(id)
- if(img){
- data = data.replace(inlay, `

`)
+ const asset = await getInlayAsset(id)
+ if(asset?.type === 'image'){
+ data = data.replace(inlay, `

`)
}
+ if(asset?.type === 'video'){
+ data = data.replace(inlay, `
`)
+ }
+ if(asset?.type === 'audio'){
+ data = data.replace(inlay, `
`)
+ }
+
}
}
return data
@@ -473,7 +480,7 @@ export async function ParseMarkdown(
if(firstParsed !== data && char && char.type !== 'group'){
data = await parseAdditionalAssets(data, char, additionalAssetMode, 'post')
}
- data = await parseInlayImages(data ?? '')
+ data = await parseInlayAssets(data ?? '')
data = encodeStyle(data)
if(mode === 'normal'){
diff --git a/src/ts/persona.ts b/src/ts/persona.ts
index 4b5478bc..52771947 100644
--- a/src/ts/persona.ts
+++ b/src/ts/persona.ts
@@ -4,7 +4,7 @@ import { getUserName, selectSingleFile, sleep } from "./util"
import { alertError, alertNormal, alertStore } from "./alert"
import { downloadFile, readImage } from "./globalApi.svelte"
import { language } from "src/lang"
-import { reencodeImage } from "./process/files/image"
+import { reencodeImage } from "./process/files/inlays"
import { PngChunk } from "./pngChunk"
import { v4 } from "uuid"
diff --git a/src/ts/process/files/image.ts b/src/ts/process/files/inlays.ts
similarity index 61%
rename from src/ts/process/files/image.ts
rename to src/ts/process/files/inlays.ts
index 1441c628..0d08b9cf 100644
--- a/src/ts/process/files/image.ts
+++ b/src/ts/process/files/inlays.ts
@@ -2,25 +2,73 @@ import localforage from "localforage";
import { v4 } from "uuid";
import { getDatabase } from "../../storage/database.svelte";
import { checkImageType } from "../../parser.svelte";
+import { getModelInfo, LLMFlags } from "src/ts/model/modellist";
+
+const inlayImageExts = [
+ 'jpg', 'jpeg', 'png', 'gif', 'webp', 'avif'
+]
+
+const inlayAudioExts = [
+ 'wav', 'mp3', 'ogg', 'flac'
+]
+
+const inlayVideoExts = [
+ 'webm', 'mp4', 'mkv'
+]
const inlayStorage = localforage.createInstance({
name: 'inlay',
storeName: 'inlay'
})
-export async function postInlayImage(img:{
+export async function postInlayAsset(img:{
name:string,
data:Uint8Array
}){
const extention = img.name.split('.').at(-1)
const imgObj = new Image()
- imgObj.src = URL.createObjectURL(new Blob([img.data], {type: `image/${extention}`}))
- return await writeInlayImage(imgObj, {
- name: img.name,
- ext: extention
- })
+ if(inlayImageExts.includes(extention)){
+ imgObj.src = URL.createObjectURL(new Blob([img.data], {type: `image/${extention}`}))
+
+ return await writeInlayImage(imgObj, {
+ name: img.name,
+ ext: extention
+ })
+ }
+
+ if(inlayAudioExts.includes(extention)){
+ const b64 = Buffer.from(img.data).toString('base64')
+ const dataURI = `data:audio/${extention};base64,${b64}`
+ const imgid = v4()
+
+ await inlayStorage.setItem(imgid, {
+ name: img.name,
+ data: dataURI,
+ ext: extention,
+ type: 'audio'
+ })
+
+ return `${imgid}`
+ }
+
+ if(inlayVideoExts.includes(extention)){
+ const b64 = Buffer.from(img.data).toString('base64')
+ const dataURI = `data:video/${extention};base64,${b64}`
+ const imgid = v4()
+
+ await inlayStorage.setItem(imgid, {
+ name: img.name,
+ data: dataURI,
+ ext: extention,
+ type: 'video'
+ })
+
+ return `${imgid}`
+ }
+
+ return null
}
export async function writeInlayImage(imgObj:HTMLImageElement, arg:{name?:string, ext?:string} = {}) {
@@ -60,21 +108,23 @@ export async function writeInlayImage(imgObj:HTMLImageElement, arg:{name?:string
await inlayStorage.setItem(imgid, {
name: arg.name ?? imgid,
data: dataURI,
- ext: arg.ext ?? 'png',
+ ext: 'png',
height: drawHeight,
- width: drawWidth
+ width: drawWidth,
+ type: 'image'
})
return `${imgid}`
}
-export async function getInlayImage(id: string){
+export async function getInlayAsset(id: string){
const img:{
name: string,
data: string
ext: string
height: number
width: number
+ type: 'image'|'video'|'audio'
} = await inlayStorage.getItem(id)
if(img === null){
return null
@@ -84,19 +134,7 @@ export async function getInlayImage(id: string){
export function supportsInlayImage(){
const db = getDatabase()
- return db.aiModel.startsWith('gptv') || db.aiModel === 'gemini-pro-vision' || db.aiModel.startsWith('gemini-exp') || db.aiModel.startsWith('claude-3') || db.aiModel.startsWith('gpt4_turbo') || db.aiModel.startsWith('gpt5') || db.aiModel.startsWith('gpt4o') ||
- (db.aiModel === 'reverse_proxy' && (
- db.proxyRequestModel?.startsWith('gptv') || db.proxyRequestModel === 'gemini-pro-vision' || db.proxyRequestModel?.startsWith('claude-3') || db.proxyRequestModel.startsWith('gpt4_turbo') ||
- db.proxyRequestModel?.startsWith('gpt5') || db.proxyRequestModel?.startsWith('gpt4o') ||
- db.proxyRequestModel === 'custom' && (
- db.customProxyRequestModel?.startsWith('gptv') ||
- db.customProxyRequestModel === 'gemini-pro-vision' ||
- db.customProxyRequestModel?.startsWith('claude-3') ||
- db.customProxyRequestModel.startsWith('gpt-4-turbo') ||
- db.customProxyRequestModel?.startsWith('gpt5') ||
- db.customProxyRequestModel?.startsWith('gpt4o')
- )
- ))
+ return getModelInfo(db.aiModel).flags.includes(LLMFlags.hasImageInput)
}
export async function reencodeImage(img:Uint8Array){
diff --git a/src/ts/process/files/multisend.ts b/src/ts/process/files/multisend.ts
index 65bd95ba..e79d5033 100644
--- a/src/ts/process/files/multisend.ts
+++ b/src/ts/process/files/multisend.ts
@@ -5,7 +5,7 @@ import { doingChat, sendChat } from '../index.svelte';
import { downloadFile, isTauri } from 'src/ts/globalApi.svelte';
import { HypaProcesser } from '../memory/hypamemory';
import { BufferToText as BufferToText, selectSingleFile, sleep } from 'src/ts/util';
-import { postInlayImage } from './image';
+import { postInlayAsset } from './inlays';
type sendFileArg = {
file:string
@@ -178,11 +178,11 @@ async function sendXMLFile(arg:sendFileArg) {
return Buffer.from(`
\n${message}\n\n`).toString('base64')
}
-type postFileResult = postFileResultImage | postFileResultVoid | postFileResultText
+type postFileResult = postFileResultAsset | postFileResultVoid | postFileResultText
-type postFileResultImage = {
+type postFileResultAsset = {
data: string,
- type: 'image',
+ type: 'asset',
}
type postFileResultVoid = {
@@ -201,6 +201,22 @@ export async function postChatFile(query:string):Promise
{
'jpeg',
'png',
'webp',
+ 'gif',
+ 'avif',
+
+ //audio format
+ 'wav',
+ 'mp3',
+ 'ogg',
+ 'flac',
+
+ //video format
+ 'mp4',
+ 'webm',
+ 'mpeg',
+ 'avi',
+
+ //other format
'po',
// 'pdf',
'txt'
@@ -243,14 +259,33 @@ export async function postChatFile(query:string):Promise{
name: file.name
}
}
+
+ //image format
case 'jpg':
case 'jpeg':
case 'png':
- case 'webp':{
- const postData = await postInlayImage(file)
+ case 'webp':
+ case 'gif':
+ case 'avif':
+
+ //audio format
+ case 'wav':
+ case 'mp3':
+ case 'ogg':
+ case 'flac':
+
+ //video format
+ case 'mp4':
+ case 'webm':
+ case 'mpeg':
+ case 'avi':{
+ const postData = await postInlayAsset(file)
+ if(!postData){
+ return null
+ }
return {
data: postData,
- type: 'image'
+ type: 'asset'
}
}
case 'txt':{
diff --git a/src/ts/process/index.svelte.ts b/src/ts/process/index.svelte.ts
index d8b622e7..b3e33813 100644
--- a/src/ts/process/index.svelte.ts
+++ b/src/ts/process/index.svelte.ts
@@ -18,7 +18,7 @@ import { groupOrder } from "./group";
import { runTrigger } from "./triggers";
import { HypaProcesser } from "./memory/hypamemory";
import { additionalInformations } from "./embedding/addinfo";
-import { getInlayImage, supportsInlayImage } from "./files/image";
+import { getInlayAsset, supportsInlayImage } from "./files/inlays";
import { getGenerationModelString } from "./models/modelString";
import { connectionOpen, peerRevertChat, peerSafeCheck, peerSync } from "../sync/multiuser";
import { runInlayScreen } from "./inlayScreen";
@@ -29,6 +29,7 @@ import { hanuraiMemory } from "./memory/hanuraiMemory";
import { hypaMemoryV2 } from "./memory/hypav2";
import { runLuaEditTrigger } from "./lua";
import { parseChatML } from "../parser.svelte";
+import { getModelInfo, LLMFlags } from "../model/modellist";
export interface OpenAIChat{
role: 'system'|'user'|'assistant'|'function'
@@ -699,12 +700,13 @@ export async function sendChat(chatProcessIndex = -1,arg:{
}
let multimodal:MultiModal[] = []
+ const modelinfo = getModelInfo(DBState.db.aiModel)
if(inlays.length > 0){
for(const inlay of inlays){
const inlayName = inlay.replace('{{inlay::', '').replace('}}', '')
- const inlayData = await getInlayImage(inlayName)
- if(inlayData){
- if(supportsInlayImage()){
+ const inlayData = await getInlayAsset(inlayName)
+ if(inlayData?.type === 'image'){
+ if(modelinfo.flags.includes(LLMFlags.hasImageInput)){
multimodal.push({
type: 'image',
base64: inlayData.data,
@@ -717,6 +719,14 @@ export async function sendChat(chatProcessIndex = -1,arg:{
formatedChat += `[${captionResult[0].generated_text}]`
}
}
+ if(inlayData?.type === 'video' || inlayData?.type === 'audio'){
+ if(multimodal.length === 0){
+ multimodal.push({
+ type: inlayData.type,
+ base64: inlayData.data
+ })
+ }
+ }
formatedChat = formatedChat.replace(inlay, '')
}
}
diff --git a/src/ts/process/inlayScreen.ts b/src/ts/process/inlayScreen.ts
index 5e7cbaaf..9e945171 100644
--- a/src/ts/process/inlayScreen.ts
+++ b/src/ts/process/inlayScreen.ts
@@ -1,4 +1,4 @@
-import { writeInlayImage } from "./files/image";
+import { writeInlayImage } from "./files/inlays";
import type { character } from "../storage/database.svelte";
import { generateAIImage } from "./stableDiff";
diff --git a/src/ts/process/lua.ts b/src/ts/process/lua.ts
index bdd4e0fb..3571860e 100644
--- a/src/ts/process/lua.ts
+++ b/src/ts/process/lua.ts
@@ -6,7 +6,7 @@ import { ReloadGUIPointer, selectedCharID } from "../stores.svelte";
import { alertError, alertInput, alertNormal } from "../alert";
import { HypaProcesser } from "./memory/hypamemory";
import { generateAIImage } from "./stableDiff";
-import { writeInlayImage } from "./files/image";
+import { writeInlayImage } from "./files/inlays";
import type { OpenAIChat } from "./index.svelte";
import { requestChatData } from "./request";
import { v4 } from "uuid";
diff --git a/src/ts/process/request.ts b/src/ts/process/request.ts
index eccdb823..f577f406 100644
--- a/src/ts/process/request.ts
+++ b/src/ts/process/request.ts
@@ -11,7 +11,7 @@ import { risuChatParser } from "../parser.svelte";
import { SignatureV4 } from "@smithy/signature-v4";
import { HttpRequest } from "@smithy/protocol-http";
import { Sha256 } from "@aws-crypto/sha256-js";
-import { supportsInlayImage } from "./files/image";
+import { supportsInlayImage } from "./files/inlays";
import { Capacitor } from "@capacitor/core";
import { getFreeOpenRouterModel } from "../model/openrouter";
import { runTransformers } from "./transformers";
@@ -95,7 +95,9 @@ type ParameterMap = {
[key in Parameter]?: string;
};
-function applyParameters(data: { [key: string]: any }, parameters: Parameter[], rename: ParameterMap, ModelMode:ModelModeExtended): { [key: string]: any } {
+function applyParameters(data: { [key: string]: any }, parameters: Parameter[], rename: ParameterMap, ModelMode:ModelModeExtended, arg:{
+ ignoreTopKIfOne?:boolean
+} = {}): { [key: string]: any } {
const db = getDatabase()
if(db.seperateParametersEnabled && ModelMode !== 'model'){
if(ModelMode === 'submodel'){
@@ -103,6 +105,10 @@ function applyParameters(data: { [key: string]: any }, parameters: Parameter[],
}
for(const parameter of parameters){
+ if(parameter === 'top_k' && arg.ignoreTopKIfOne && db.seperateParameters[ModelMode][parameter] === 1){
+ continue
+ }
+
let value = db.seperateParameters[ModelMode][parameter]
if(value === -1000 || value === undefined){
@@ -117,6 +123,9 @@ function applyParameters(data: { [key: string]: any }, parameters: Parameter[],
for(const parameter of parameters){
let value = 0
+ if(parameter === 'top_k' && arg.ignoreTopKIfOne && db.top_k === 1){
+ value = 0
+ }
switch(parameter){
case 'temperature':{
value = db.temperature === -1000 ? -1000 : (db.temperature / 100)
@@ -1495,7 +1504,9 @@ async function requestGoogleCloudVertex(arg:RequestDataArgumentExtended):Promise
'top_k': "topK",
'presence_penalty': "presencePenalty",
'frequency_penalty': "frequencyPenalty"
- }, arg.mode),
+ }, arg.mode, {
+ ignoreTopKIfOne: true
+ }),
safetySettings: uncensoredCatagory,
systemInstruction: {
parts: [
diff --git a/src/ts/process/triggers.ts b/src/ts/process/triggers.ts
index e7833d10..ac4d24a8 100644
--- a/src/ts/process/triggers.ts
+++ b/src/ts/process/triggers.ts
@@ -11,7 +11,7 @@ import type { OpenAIChat } from "./index.svelte";
import { HypaProcesser } from "./memory/hypamemory";
import { requestChatData } from "./request";
import { generateAIImage } from "./stableDiff";
-import { writeInlayImage } from "./files/image";
+import { writeInlayImage } from "./files/inlays";
import { runLua } from "./lua";
diff --git a/src/ts/tokenizer.ts b/src/ts/tokenizer.ts
index a3e8f4b8..4565454f 100644
--- a/src/ts/tokenizer.ts
+++ b/src/ts/tokenizer.ts
@@ -2,7 +2,7 @@ import type { Tiktoken } from "@dqbd/tiktoken";
import type { Tokenizer } from "@mlc-ai/web-tokenizers";
import { type groupChat, type character, type Chat, getCurrentCharacter, getDatabase } from "./storage/database.svelte";
import type { MultiModal, OpenAIChat } from "./process/index.svelte";
-import { supportsInlayImage } from "./process/files/image";
+import { supportsInlayImage } from "./process/files/inlays";
import { risuChatParser } from "./parser.svelte";
import { tokenizeGGUFModel } from "./process/models/local";
import { globalFetch } from "./globalApi.svelte";