Add Gemini related features

This commit is contained in:
Kwaroran
2024-12-12 08:38:33 +09:00
parent bffa3d73d4
commit 5174082796
13 changed files with 180 additions and 55 deletions

View File

@@ -2,7 +2,7 @@
import Suggestion from './Suggestion.svelte';
import AdvancedChatEditor from './AdvancedChatEditor.svelte';
import { CameraIcon, DatabaseIcon, DicesIcon, GlobeIcon, ImagePlusIcon, LanguagesIcon, Laugh, MenuIcon, MicOffIcon, PackageIcon, Plus, RefreshCcwIcon, ReplyIcon, Send, StepForwardIcon } from "lucide-svelte";
import { CameraIcon, DatabaseIcon, DicesIcon, GlobeIcon, ImagePlusIcon, LanguagesIcon, Laugh, MenuIcon, MicOffIcon, PackageIcon, Plus, RefreshCcwIcon, ReplyIcon, Send, StepForwardIcon, XIcon } from "lucide-svelte";
import { selectedCharID, PlaygroundStore, createSimpleCharacter } from "../../ts/stores.svelte";
import Chat from "./Chat.svelte";
import { type Message, type character, type groupChat } from "../../ts/storage/database.svelte";
@@ -25,7 +25,7 @@
import { PreUnreroll, Prereroll } from 'src/ts/process/prereroll';
import { processMultiCommand } from 'src/ts/process/command';
import { postChatFile } from 'src/ts/process/files/multisend';
import { getInlayImage } from 'src/ts/process/files/image';
import { getInlayAsset } from 'src/ts/process/files/inlays';
import PlaygroundMenu from '../Playground/PlaygroundMenu.svelte';
import { ConnectionOpenStore } from 'src/ts/sync/multiuser';
@@ -546,8 +546,31 @@
{#if fileInput.length > 0}
<div class="flex items-center ml-4 flex-wrap p-2 m-2 border-darkborderc border rounded-md">
{#each fileInput as file, i}
{#await getInlayImage(file) then inlayImage}
<img src={inlayImage.data} alt="Inlay" class="max-w-24 max-h-24">
{#await getInlayAsset(file) then inlayAsset}
<div class="relative">
{#if inlayAsset.type === 'image'}
<img src={inlayAsset.data} alt="Inlay" class="max-w-48 max-h-48 border border-darkborderc">
{:else if inlayAsset.type === 'video'}
<video controls class="max-w-48 max-h-48 border border-darkborderc">
<source src={inlayAsset.data} type="video/mp4" />
<track kind="captions" />
Your browser does not support the video tag.
</video>
{:else if inlayAsset.type === 'audio'}
<audio controls class="max-w-48 max-h-24 border border-darkborderc">
<source src={inlayAsset.data} type="audio/mpeg" />
Your browser does not support the audio tag.
</audio>
{:else}
<div class="max-w-24 max-h-24">{file}</div>
{/if}
<button class="absolute -right-1 -top-1 p-1 bg-darkbg text-textcolor rounded-md transition-colors hover:text-draculared focus:text-draculared" onclick={() => {
fileInput.splice(i, 1)
updateInputSizeAll()
}}>
<XIcon size={18} />
</button>
</div>
{/await}
{/each}
</div>
@@ -741,7 +764,7 @@
<div class="flex items-center cursor-pointer hover:text-green-500 transition-colors" onclick={async () => {
const res = await postChatFile(messageInput)
if(res?.type === 'image'){
if(res?.type === 'asset'){
fileInput.push(res.data)
updateInputSizeAll()
}

View File

@@ -9,7 +9,7 @@ import { AppendableBuffer, BlankWriter, checkCharOrder, downloadFile, isNodeServ
import { SettingsMenuIndex, ShowRealmFrameStore, selectedCharID, settingsOpen } from "./stores.svelte"
import { convertImage, hasher } from "./parser.svelte"
import { CCardLib, type CharacterCardV3, type LorebookEntry } from '@risuai/ccardlib'
import { reencodeImage } from "./process/files/image"
import { reencodeImage } from "./process/files/inlays"
import { PngChunk } from "./pngChunk"
import type { OnnxModelFiles } from "./process/transformers"
import { CharXReader, CharXWriter } from "./process/processzip"

View File

@@ -782,13 +782,14 @@ export const LLMModels: LLMModel[] = [
tokenizer: LLMTokenizer.GoogleCloud
},
{
name: "gemini-2.0-flash-exp",
name: "Gemini Flash 2.0 Exp",
id: 'gemini-2.0-flash-exp',
provider: LLMProvider.GoogleCloud,
format: LLMFormat.GoogleCloud,
flags: [LLMFlags.hasImageInput, LLMFlags.hasFirstSystemPrompt, LLMFlags.poolSupported, LLMFlags.hasAudioInput, LLMFlags.hasVideoInput],
parameters: ['temperature', 'top_k', 'top_p', 'presence_penalty', 'frequency_penalty'],
tokenizer: LLMTokenizer.GoogleCloud
tokenizer: LLMTokenizer.GoogleCloud,
recommended: true
},
{
name: "Gemini Pro 1.5",

View File

@@ -9,7 +9,7 @@ import css, { type CssAtRuleAST } from '@adobe/css-tools'
import { SizeStore, selectedCharID } from './stores.svelte';
import { calcString } from './process/infunctions';
import { findCharacterbyId, getPersonaPrompt, getUserIcon, getUserName, parseKeyValue, sfc32, sleep, uuidtoNumber } from './util';
import { getInlayImage } from './process/files/image';
import { getInlayAsset } from './process/files/inlays';
import { getModuleAssets, getModuleLorebooks } from './process/modules';
import type { OpenAIChat } from './process/index.svelte';
import hljs from 'highlight.js/lib/core'
@@ -428,15 +428,22 @@ function getClosestMatch(name:string, assetPaths:{[key:string]:{path:string, ext
return assetPaths[closest]
}
async function parseInlayImages(data:string){
async function parseInlayAssets(data:string){
const inlayMatch = data.match(/{{inlay::(.+?)}}/g)
if(inlayMatch){
for(const inlay of inlayMatch){
const id = inlay.substring(9, inlay.length - 2)
const img = await getInlayImage(id)
if(img){
data = data.replace(inlay, `<img src="${img.data}"/>`)
const asset = await getInlayAsset(id)
if(asset?.type === 'image'){
data = data.replace(inlay, `<img src="${asset.data}"/>`)
}
if(asset?.type === 'video'){
data = data.replace(inlay, `<video controls><source src="${asset.data}" type="video/mp4"></video>`)
}
if(asset?.type === 'audio'){
data = data.replace(inlay, `<audio controls><source src="${asset.data}" type="audio/mpeg"></audio>`)
}
}
}
return data
@@ -473,7 +480,7 @@ export async function ParseMarkdown(
if(firstParsed !== data && char && char.type !== 'group'){
data = await parseAdditionalAssets(data, char, additionalAssetMode, 'post')
}
data = await parseInlayImages(data ?? '')
data = await parseInlayAssets(data ?? '')
data = encodeStyle(data)
if(mode === 'normal'){

View File

@@ -4,7 +4,7 @@ import { getUserName, selectSingleFile, sleep } from "./util"
import { alertError, alertNormal, alertStore } from "./alert"
import { downloadFile, readImage } from "./globalApi.svelte"
import { language } from "src/lang"
import { reencodeImage } from "./process/files/image"
import { reencodeImage } from "./process/files/inlays"
import { PngChunk } from "./pngChunk"
import { v4 } from "uuid"

View File

@@ -2,25 +2,73 @@ import localforage from "localforage";
import { v4 } from "uuid";
import { getDatabase } from "../../storage/database.svelte";
import { checkImageType } from "../../parser.svelte";
import { getModelInfo, LLMFlags } from "src/ts/model/modellist";
const inlayImageExts = [
'jpg', 'jpeg', 'png', 'gif', 'webp', 'avif'
]
const inlayAudioExts = [
'wav', 'mp3', 'ogg', 'flac'
]
const inlayVideoExts = [
'webm', 'mp4', 'mkv'
]
const inlayStorage = localforage.createInstance({
name: 'inlay',
storeName: 'inlay'
})
export async function postInlayImage(img:{
export async function postInlayAsset(img:{
name:string,
data:Uint8Array
}){
const extention = img.name.split('.').at(-1)
const imgObj = new Image()
imgObj.src = URL.createObjectURL(new Blob([img.data], {type: `image/${extention}`}))
return await writeInlayImage(imgObj, {
name: img.name,
ext: extention
})
if(inlayImageExts.includes(extention)){
imgObj.src = URL.createObjectURL(new Blob([img.data], {type: `image/${extention}`}))
return await writeInlayImage(imgObj, {
name: img.name,
ext: extention
})
}
if(inlayAudioExts.includes(extention)){
const b64 = Buffer.from(img.data).toString('base64')
const dataURI = `data:audio/${extention};base64,${b64}`
const imgid = v4()
await inlayStorage.setItem(imgid, {
name: img.name,
data: dataURI,
ext: extention,
type: 'audio'
})
return `${imgid}`
}
if(inlayVideoExts.includes(extention)){
const b64 = Buffer.from(img.data).toString('base64')
const dataURI = `data:video/${extention};base64,${b64}`
const imgid = v4()
await inlayStorage.setItem(imgid, {
name: img.name,
data: dataURI,
ext: extention,
type: 'video'
})
return `${imgid}`
}
return null
}
export async function writeInlayImage(imgObj:HTMLImageElement, arg:{name?:string, ext?:string} = {}) {
@@ -60,21 +108,23 @@ export async function writeInlayImage(imgObj:HTMLImageElement, arg:{name?:string
await inlayStorage.setItem(imgid, {
name: arg.name ?? imgid,
data: dataURI,
ext: arg.ext ?? 'png',
ext: 'png',
height: drawHeight,
width: drawWidth
width: drawWidth,
type: 'image'
})
return `${imgid}`
}
export async function getInlayImage(id: string){
export async function getInlayAsset(id: string){
const img:{
name: string,
data: string
ext: string
height: number
width: number
type: 'image'|'video'|'audio'
} = await inlayStorage.getItem(id)
if(img === null){
return null
@@ -84,19 +134,7 @@ export async function getInlayImage(id: string){
export function supportsInlayImage(){
const db = getDatabase()
return db.aiModel.startsWith('gptv') || db.aiModel === 'gemini-pro-vision' || db.aiModel.startsWith('gemini-exp') || db.aiModel.startsWith('claude-3') || db.aiModel.startsWith('gpt4_turbo') || db.aiModel.startsWith('gpt5') || db.aiModel.startsWith('gpt4o') ||
(db.aiModel === 'reverse_proxy' && (
db.proxyRequestModel?.startsWith('gptv') || db.proxyRequestModel === 'gemini-pro-vision' || db.proxyRequestModel?.startsWith('claude-3') || db.proxyRequestModel.startsWith('gpt4_turbo') ||
db.proxyRequestModel?.startsWith('gpt5') || db.proxyRequestModel?.startsWith('gpt4o') ||
db.proxyRequestModel === 'custom' && (
db.customProxyRequestModel?.startsWith('gptv') ||
db.customProxyRequestModel === 'gemini-pro-vision' ||
db.customProxyRequestModel?.startsWith('claude-3') ||
db.customProxyRequestModel.startsWith('gpt-4-turbo') ||
db.customProxyRequestModel?.startsWith('gpt5') ||
db.customProxyRequestModel?.startsWith('gpt4o')
)
))
return getModelInfo(db.aiModel).flags.includes(LLMFlags.hasImageInput)
}
export async function reencodeImage(img:Uint8Array){

View File

@@ -5,7 +5,7 @@ import { doingChat, sendChat } from '../index.svelte';
import { downloadFile, isTauri } from 'src/ts/globalApi.svelte';
import { HypaProcesser } from '../memory/hypamemory';
import { BufferToText as BufferToText, selectSingleFile, sleep } from 'src/ts/util';
import { postInlayImage } from './image';
import { postInlayAsset } from './inlays';
type sendFileArg = {
file:string
@@ -178,11 +178,11 @@ async function sendXMLFile(arg:sendFileArg) {
return Buffer.from(`<File>\n${message}\n</File>\n`).toString('base64')
}
type postFileResult = postFileResultImage | postFileResultVoid | postFileResultText
type postFileResult = postFileResultAsset | postFileResultVoid | postFileResultText
type postFileResultImage = {
type postFileResultAsset = {
data: string,
type: 'image',
type: 'asset',
}
type postFileResultVoid = {
@@ -201,6 +201,22 @@ export async function postChatFile(query:string):Promise<postFileResult>{
'jpeg',
'png',
'webp',
'gif',
'avif',
//audio format
'wav',
'mp3',
'ogg',
'flac',
//video format
'mp4',
'webm',
'mpeg',
'avi',
//other format
'po',
// 'pdf',
'txt'
@@ -243,14 +259,33 @@ export async function postChatFile(query:string):Promise<postFileResult>{
name: file.name
}
}
//image format
case 'jpg':
case 'jpeg':
case 'png':
case 'webp':{
const postData = await postInlayImage(file)
case 'webp':
case 'gif':
case 'avif':
//audio format
case 'wav':
case 'mp3':
case 'ogg':
case 'flac':
//video format
case 'mp4':
case 'webm':
case 'mpeg':
case 'avi':{
const postData = await postInlayAsset(file)
if(!postData){
return null
}
return {
data: postData,
type: 'image'
type: 'asset'
}
}
case 'txt':{

View File

@@ -18,7 +18,7 @@ import { groupOrder } from "./group";
import { runTrigger } from "./triggers";
import { HypaProcesser } from "./memory/hypamemory";
import { additionalInformations } from "./embedding/addinfo";
import { getInlayImage, supportsInlayImage } from "./files/image";
import { getInlayAsset, supportsInlayImage } from "./files/inlays";
import { getGenerationModelString } from "./models/modelString";
import { connectionOpen, peerRevertChat, peerSafeCheck, peerSync } from "../sync/multiuser";
import { runInlayScreen } from "./inlayScreen";
@@ -29,6 +29,7 @@ import { hanuraiMemory } from "./memory/hanuraiMemory";
import { hypaMemoryV2 } from "./memory/hypav2";
import { runLuaEditTrigger } from "./lua";
import { parseChatML } from "../parser.svelte";
import { getModelInfo, LLMFlags } from "../model/modellist";
export interface OpenAIChat{
role: 'system'|'user'|'assistant'|'function'
@@ -699,12 +700,13 @@ export async function sendChat(chatProcessIndex = -1,arg:{
}
let multimodal:MultiModal[] = []
const modelinfo = getModelInfo(DBState.db.aiModel)
if(inlays.length > 0){
for(const inlay of inlays){
const inlayName = inlay.replace('{{inlay::', '').replace('}}', '')
const inlayData = await getInlayImage(inlayName)
if(inlayData){
if(supportsInlayImage()){
const inlayData = await getInlayAsset(inlayName)
if(inlayData?.type === 'image'){
if(modelinfo.flags.includes(LLMFlags.hasImageInput)){
multimodal.push({
type: 'image',
base64: inlayData.data,
@@ -717,6 +719,14 @@ export async function sendChat(chatProcessIndex = -1,arg:{
formatedChat += `[${captionResult[0].generated_text}]`
}
}
if(inlayData?.type === 'video' || inlayData?.type === 'audio'){
if(multimodal.length === 0){
multimodal.push({
type: inlayData.type,
base64: inlayData.data
})
}
}
formatedChat = formatedChat.replace(inlay, '')
}
}

View File

@@ -1,4 +1,4 @@
import { writeInlayImage } from "./files/image";
import { writeInlayImage } from "./files/inlays";
import type { character } from "../storage/database.svelte";
import { generateAIImage } from "./stableDiff";

View File

@@ -6,7 +6,7 @@ import { ReloadGUIPointer, selectedCharID } from "../stores.svelte";
import { alertError, alertInput, alertNormal } from "../alert";
import { HypaProcesser } from "./memory/hypamemory";
import { generateAIImage } from "./stableDiff";
import { writeInlayImage } from "./files/image";
import { writeInlayImage } from "./files/inlays";
import type { OpenAIChat } from "./index.svelte";
import { requestChatData } from "./request";
import { v4 } from "uuid";

View File

@@ -11,7 +11,7 @@ import { risuChatParser } from "../parser.svelte";
import { SignatureV4 } from "@smithy/signature-v4";
import { HttpRequest } from "@smithy/protocol-http";
import { Sha256 } from "@aws-crypto/sha256-js";
import { supportsInlayImage } from "./files/image";
import { supportsInlayImage } from "./files/inlays";
import { Capacitor } from "@capacitor/core";
import { getFreeOpenRouterModel } from "../model/openrouter";
import { runTransformers } from "./transformers";
@@ -95,7 +95,9 @@ type ParameterMap = {
[key in Parameter]?: string;
};
function applyParameters(data: { [key: string]: any }, parameters: Parameter[], rename: ParameterMap, ModelMode:ModelModeExtended): { [key: string]: any } {
function applyParameters(data: { [key: string]: any }, parameters: Parameter[], rename: ParameterMap, ModelMode:ModelModeExtended, arg:{
ignoreTopKIfOne?:boolean
} = {}): { [key: string]: any } {
const db = getDatabase()
if(db.seperateParametersEnabled && ModelMode !== 'model'){
if(ModelMode === 'submodel'){
@@ -103,6 +105,10 @@ function applyParameters(data: { [key: string]: any }, parameters: Parameter[],
}
for(const parameter of parameters){
if(parameter === 'top_k' && arg.ignoreTopKIfOne && db.seperateParameters[ModelMode][parameter] === 1){
continue
}
let value = db.seperateParameters[ModelMode][parameter]
if(value === -1000 || value === undefined){
@@ -117,6 +123,9 @@ function applyParameters(data: { [key: string]: any }, parameters: Parameter[],
for(const parameter of parameters){
let value = 0
if(parameter === 'top_k' && arg.ignoreTopKIfOne && db.top_k === 1){
value = 0
}
switch(parameter){
case 'temperature':{
value = db.temperature === -1000 ? -1000 : (db.temperature / 100)
@@ -1495,7 +1504,9 @@ async function requestGoogleCloudVertex(arg:RequestDataArgumentExtended):Promise
'top_k': "topK",
'presence_penalty': "presencePenalty",
'frequency_penalty': "frequencyPenalty"
}, arg.mode),
}, arg.mode, {
ignoreTopKIfOne: true
}),
safetySettings: uncensoredCatagory,
systemInstruction: {
parts: [

View File

@@ -11,7 +11,7 @@ import type { OpenAIChat } from "./index.svelte";
import { HypaProcesser } from "./memory/hypamemory";
import { requestChatData } from "./request";
import { generateAIImage } from "./stableDiff";
import { writeInlayImage } from "./files/image";
import { writeInlayImage } from "./files/inlays";
import { runLua } from "./lua";

View File

@@ -2,7 +2,7 @@ import type { Tiktoken } from "@dqbd/tiktoken";
import type { Tokenizer } from "@mlc-ai/web-tokenizers";
import { type groupChat, type character, type Chat, getCurrentCharacter, getDatabase } from "./storage/database.svelte";
import type { MultiModal, OpenAIChat } from "./process/index.svelte";
import { supportsInlayImage } from "./process/files/image";
import { supportsInlayImage } from "./process/files/inlays";
import { risuChatParser } from "./parser.svelte";
import { tokenizeGGUFModel } from "./process/models/local";
import { globalFetch } from "./globalApi.svelte";