Merge branch 'kwaroran:main' into main

2024-12-15 14:13:02 -08:00
parent 69f44c03c6 556e1ea46a
commit cd3294d529
23 changed files with 395 additions and 200 deletions
--- a/src/ts/process/files/inlays.ts
+++ b/src/ts/process/files/inlays.ts
@@ -2,25 +2,73 @@ import localforage from "localforage";
 import { v4 } from "uuid";
 import { getDatabase } from "../../storage/database.svelte";
 import { checkImageType } from "../../parser.svelte";
+import { getModelInfo, LLMFlags } from "src/ts/model/modellist";
+
+const inlayImageExts = [
+    'jpg', 'jpeg', 'png', 'gif', 'webp', 'avif'
+]
+
+const inlayAudioExts = [
+    'wav', 'mp3', 'ogg', 'flac'
+]
+
+const inlayVideoExts = [
+    'webm', 'mp4', 'mkv'
+]

 const inlayStorage = localforage.createInstance({
    name: 'inlay',
    storeName: 'inlay'
 })

-export async function postInlayImage(img:{
+export async function postInlayAsset(img:{
    name:string,
    data:Uint8Array
 }){

    const extention = img.name.split('.').at(-1)
    const imgObj = new Image()
-    imgObj.src = URL.createObjectURL(new Blob([img.data], {type: `image/${extention}`}))

-    return await writeInlayImage(imgObj, {
-        name: img.name,
-        ext: extention
-    })
+    if(inlayImageExts.includes(extention)){
+        imgObj.src = URL.createObjectURL(new Blob([img.data], {type: `image/${extention}`}))
+
+        return await writeInlayImage(imgObj, {
+            name: img.name,
+            ext: extention
+        })
+    }
+
+    if(inlayAudioExts.includes(extention)){
+        const b64 = Buffer.from(img.data).toString('base64')
+        const dataURI = `data:audio/${extention};base64,${b64}`
+        const imgid = v4()
+
+        await inlayStorage.setItem(imgid, {
+            name: img.name,
+            data: dataURI,
+            ext: extention,
+            type: 'audio'
+        })
+
+        return `${imgid}`
+    }
+
+    if(inlayVideoExts.includes(extention)){
+        const b64 = Buffer.from(img.data).toString('base64')
+        const dataURI = `data:video/${extention};base64,${b64}`
+        const imgid = v4()
+
+        await inlayStorage.setItem(imgid, {
+            name: img.name,
+            data: dataURI,
+            ext: extention,
+            type: 'video'
+        })
+
+        return `${imgid}`
+    }
+
+    return null
 }

 export async function writeInlayImage(imgObj:HTMLImageElement, arg:{name?:string, ext?:string} = {}) {
@@ -60,21 +108,23 @@ export async function writeInlayImage(imgObj:HTMLImageElement, arg:{name?:string
    await inlayStorage.setItem(imgid, {
        name: arg.name ?? imgid,
        data: dataURI,
-        ext: arg.ext ?? 'png',
+        ext: 'png',
        height: drawHeight,
-        width: drawWidth
+        width: drawWidth,
+        type: 'image'
    })

    return `${imgid}`
 }

-export async function getInlayImage(id: string){
+export async function getInlayAsset(id: string){
    const img:{
        name: string,
        data: string
        ext: string
        height: number
        width: number
+        type: 'image'|'video'|'audio'
    } = await inlayStorage.getItem(id)
    if(img === null){
        return null
@@ -84,19 +134,7 @@ export async function getInlayImage(id: string){

 export function supportsInlayImage(){
    const db = getDatabase()
-    return db.aiModel.startsWith('gptv') || db.aiModel === 'gemini-pro-vision' || db.aiModel.startsWith('gemini-exp') || db.aiModel.startsWith('claude-3') || db.aiModel.startsWith('gpt4_turbo') || db.aiModel.startsWith('gpt5') || db.aiModel.startsWith('gpt4o') ||
-        (db.aiModel === 'reverse_proxy' && (
-            db.proxyRequestModel?.startsWith('gptv') || db.proxyRequestModel === 'gemini-pro-vision' || db.proxyRequestModel?.startsWith('claude-3') || db.proxyRequestModel.startsWith('gpt4_turbo') ||
-            db.proxyRequestModel?.startsWith('gpt5') || db.proxyRequestModel?.startsWith('gpt4o') ||
-            db.proxyRequestModel === 'custom'  && (
-                db.customProxyRequestModel?.startsWith('gptv') ||
-                db.customProxyRequestModel === 'gemini-pro-vision' ||
-                db.customProxyRequestModel?.startsWith('claude-3') ||
-                db.customProxyRequestModel.startsWith('gpt-4-turbo') ||
-                db.customProxyRequestModel?.startsWith('gpt5') ||
-                db.customProxyRequestModel?.startsWith('gpt4o')
-            )
-        ))
+    return getModelInfo(db.aiModel).flags.includes(LLMFlags.hasImageInput)
 }

 export async function reencodeImage(img:Uint8Array){
--- a/src/ts/process/files/multisend.ts
+++ b/src/ts/process/files/multisend.ts
@@ -5,7 +5,7 @@ import { doingChat, sendChat } from '../index.svelte';
 import { downloadFile, isTauri } from 'src/ts/globalApi.svelte';
 import { HypaProcesser } from '../memory/hypamemory';
 import { BufferToText as BufferToText, selectSingleFile, sleep } from 'src/ts/util';
-import { postInlayImage } from './image';
+import { postInlayAsset } from './inlays';

 type sendFileArg = {
    file:string
@@ -178,11 +178,11 @@ async function sendXMLFile(arg:sendFileArg) {
    return Buffer.from(`<File>\n${message}\n</File>\n`).toString('base64')    
 }

-type postFileResult = postFileResultImage | postFileResultVoid | postFileResultText
+type postFileResult = postFileResultAsset | postFileResultVoid | postFileResultText

-type postFileResultImage = {
+type postFileResultAsset = {
    data: string,
-    type: 'image',
+    type: 'asset',
 }

 type postFileResultVoid = {
@@ -201,6 +201,22 @@ export async function postChatFile(query:string):Promise<postFileResult>{
        'jpeg',
        'png',
        'webp',
+        'gif',
+        'avif',
+
+        //audio format
+        'wav',
+        'mp3',
+        'ogg',
+        'flac',
+
+        //video format
+        'mp4',
+        'webm',
+        'mpeg',
+        'avi',
+
+        //other format
        'po',
        // 'pdf',
        'txt'
@@ -243,14 +259,33 @@ export async function postChatFile(query:string):Promise<postFileResult>{
                name: file.name
            }
        }
+
+        //image format
        case 'jpg':
        case 'jpeg':
        case 'png':
-        case 'webp':{
-            const postData = await postInlayImage(file)
+        case 'webp':
+        case 'gif':
+        case 'avif':
+            
+        //audio format
+        case 'wav':
+        case 'mp3':
+        case 'ogg':
+        case 'flac':
+            
+        //video format
+        case 'mp4':
+        case 'webm':
+        case 'mpeg':
+        case 'avi':{
+            const postData = await postInlayAsset(file)
+            if(!postData){
+                return null
+            }
            return {
                data: postData,
-                type: 'image'
+                type: 'asset'
            }
        }
        case 'txt':{
--- a/src/ts/process/index.svelte.ts
+++ b/src/ts/process/index.svelte.ts
@@ -18,7 +18,7 @@ import { groupOrder } from "./group";
 import { runTrigger } from "./triggers";
 import { HypaProcesser } from "./memory/hypamemory";
 import { additionalInformations } from "./embedding/addinfo";
-import { getInlayImage, supportsInlayImage } from "./files/image";
+import { getInlayAsset, supportsInlayImage } from "./files/inlays";
 import { getGenerationModelString } from "./models/modelString";
 import { connectionOpen, peerRevertChat, peerSafeCheck, peerSync } from "../sync/multiuser";
 import { runInlayScreen } from "./inlayScreen";
@@ -29,6 +29,7 @@ import { hanuraiMemory } from "./memory/hanuraiMemory";
 import { hypaMemoryV2 } from "./memory/hypav2";
 import { runLuaEditTrigger } from "./lua";
 import { parseChatML } from "../parser.svelte";
+import { getModelInfo, LLMFlags } from "../model/modellist";

 export interface OpenAIChat{
    role: 'system'|'user'|'assistant'|'function'
@@ -41,7 +42,7 @@ export interface OpenAIChat{
 }

 export interface MultiModal{
-    type:'image'|'video'
+    type:'image'|'video'|'audio'
    base64:string,
    height?:number,
    width?:number
@@ -687,10 +688,10 @@ export async function sendChat(chatProcessIndex = -1,arg:{
        }
        let inlays:string[] = []
        if(msg.role === 'char'){
-            formatedChat = formatedChat.replace(/{{inlay::(.+?)}}/g, '')
+            formatedChat = formatedChat.replace(/{{(inlay|inlayed)::(.+?)}}/g, '')
        }
        else{
-            const inlayMatch = formatedChat.match(/{{inlay::(.+?)}}/g)
+            const inlayMatch = formatedChat.match(/{{(inlay|inlayed)::(.+?)}}/g)
            if(inlayMatch){
                for(const inlay of inlayMatch){
                    inlays.push(inlay)
@@ -699,12 +700,13 @@ export async function sendChat(chatProcessIndex = -1,arg:{
        }

        let multimodal:MultiModal[] = []
+        const modelinfo = getModelInfo(DBState.db.aiModel)
        if(inlays.length > 0){
            for(const inlay of inlays){
-                const inlayName = inlay.replace('{{inlay::', '').replace('}}', '')
-                const inlayData = await getInlayImage(inlayName)
-                if(inlayData){
-                    if(supportsInlayImage()){
+                const inlayName = inlay.replace('{{inlayed::', '').replace('{{inlay::', '').replace('}}', '')
+                const inlayData = await getInlayAsset(inlayName)
+                if(inlayData?.type === 'image'){
+                    if(modelinfo.flags.includes(LLMFlags.hasImageInput)){
                        multimodal.push({
                            type: 'image',
                            base64: inlayData.data,
@@ -717,6 +719,14 @@ export async function sendChat(chatProcessIndex = -1,arg:{
                        formatedChat += `[${captionResult[0].generated_text}]`
                    }
                }
+                if(inlayData?.type === 'video' || inlayData?.type === 'audio'){
+                    if(multimodal.length === 0){
+                        multimodal.push({
+                            type: inlayData.type,
+                            base64: inlayData.data
+                        })
+                    }
+                }
                formatedChat = formatedChat.replace(inlay, '')
            }
        }
@@ -1133,7 +1143,7 @@ export async function sendChat(chatProcessIndex = -1,arg:{
            pointer++
        }
        formated = formated.filter((v) => {
-            return v.content !== ''
+            return v.content !== ''  || (v.multimodals && v.multimodals.length > 0)
        })
    }

--- a/src/ts/process/inlayScreen.ts
+++ b/src/ts/process/inlayScreen.ts
@@ -1,4 +1,4 @@
-import { writeInlayImage } from "./files/image";
+import { writeInlayImage } from "./files/inlays";
 import type { character } from "../storage/database.svelte";
 import { generateAIImage } from "./stableDiff";

--- a/src/ts/process/lua.ts
+++ b/src/ts/process/lua.ts
@@ -6,7 +6,7 @@ import { ReloadGUIPointer, selectedCharID } from "../stores.svelte";
 import { alertError, alertInput, alertNormal } from "../alert";
 import { HypaProcesser } from "./memory/hypamemory";
 import { generateAIImage } from "./stableDiff";
-import { writeInlayImage } from "./files/image";
+import { writeInlayImage } from "./files/inlays";
 import type { OpenAIChat } from "./index.svelte";
 import { requestChatData } from "./request";
 import { v4 } from "uuid";
--- a/src/ts/process/request.ts
+++ b/src/ts/process/request.ts
@@ -11,7 +11,7 @@ import { risuChatParser } from "../parser.svelte";
 import { SignatureV4 } from "@smithy/signature-v4";
 import { HttpRequest } from "@smithy/protocol-http";
 import { Sha256 } from "@aws-crypto/sha256-js";
-import { supportsInlayImage } from "./files/image";
+import { supportsInlayImage } from "./files/inlays";
 import { Capacitor } from "@capacitor/core";
 import { getFreeOpenRouterModel } from "../model/openrouter";
 import { runTransformers } from "./transformers";
@@ -95,7 +95,9 @@ type ParameterMap = {
    [key in Parameter]?: string;
 };

-function applyParameters(data: { [key: string]: any }, parameters: Parameter[], rename: ParameterMap, ModelMode:ModelModeExtended): { [key: string]: any } {
+function applyParameters(data: { [key: string]: any }, parameters: Parameter[], rename: ParameterMap, ModelMode:ModelModeExtended, arg:{
+    ignoreTopKIfZero?:boolean
+} = {}): { [key: string]: any } {
    const db = getDatabase()
    if(db.seperateParametersEnabled && ModelMode !== 'model'){
        if(ModelMode === 'submodel'){
@@ -103,6 +105,10 @@ function applyParameters(data: { [key: string]: any }, parameters: Parameter[],
        }

        for(const parameter of parameters){
+            if(parameter === 'top_k' && arg.ignoreTopKIfZero && db.seperateParameters[ModelMode][parameter] === 0){
+                continue
+            }
+
            let value = db.seperateParameters[ModelMode][parameter]

            if(value === -1000 || value === undefined){
@@ -117,6 +123,9 @@ function applyParameters(data: { [key: string]: any }, parameters: Parameter[],

    for(const parameter of parameters){
        let value = 0
+        if(parameter === 'top_k' && arg.ignoreTopKIfZero && db.top_k === 0){
+            continue
+        }
        switch(parameter){
            case 'temperature':{
                value = db.temperature === -1000 ? -1000 : (db.temperature / 100)
@@ -209,8 +218,13 @@ function reformater(formated:OpenAIChat[],modelInfo:LLMModel){

    if(!modelInfo.flags.includes(LLMFlags.hasFullSystemPrompt)){
        if(modelInfo.flags.includes(LLMFlags.hasFirstSystemPrompt)){
-            if(formated[0].role === 'system'){
-                systemPrompt = formated[0]
+            while(formated[0].role === 'system'){
+                if(systemPrompt){
+                    systemPrompt.content += '\n\n' + formated[0].content
+                }
+                else{
+                    systemPrompt = formated[0]
+                }
                formated = formated.slice(1)
            }
        }
@@ -391,7 +405,7 @@ async function requestOpenAI(arg:RequestDataArgumentExtended):Promise<requestDat

    if(db.newOAIHandle){
        formatedChat = formatedChat.filter(m => {
-            return m.content !== ''
+            return m.content !== '' || (m.multimodals && m.multimodals.length > 0)
        })
    }

@@ -520,10 +534,9 @@ async function requestOpenAI(arg:RequestDataArgumentExtended):Promise<requestDat
            body: applyParameters({
                model: requestModel,
                messages: reformatedChat,
-                top_p: db.top_p,
                safe_prompt: false,
                max_tokens: arg.maxTokens,
-            }, ['temperature', 'presence_penalty', 'frequency_penalty'], {}, arg.mode ),
+            }, ['temperature', 'presence_penalty', 'frequency_penalty', 'top_p'], {}, arg.mode ),
            headers: {
                "Authorization": "Bearer " + db.mistralKey,
            },
@@ -1407,7 +1420,11 @@ async function requestGoogleCloudVertex(arg:RequestDataArgumentExtended):Promise
                });
                
                for (const modal of chat.multimodals) {
-                    if (modal.type === "image") {
+                    if (
+                        (modal.type === "image" && arg.modelInfo.flags.includes(LLMFlags.hasImageInput)) ||
+                        (modal.type === "audio" && arg.modelInfo.flags.includes(LLMFlags.hasAudioInput)) ||
+                        (modal.type === "video" && arg.modelInfo.flags.includes(LLMFlags.hasVideoInput))
+                    ) {
                        const dataurl = modal.base64;
                        const base64 = dataurl.split(",")[1];
                        const mediaType = dataurl.split(";")[0].split(":")[1];
@@ -1482,14 +1499,24 @@ async function requestGoogleCloudVertex(arg:RequestDataArgumentExtended):Promise
        },
    ]

+    let para:Parameter[] = ['temperature', 'top_p', 'top_k', 'presence_penalty', 'frequency_penalty']
+
+    para = para.filter((v) => {
+        return arg.modelInfo.parameters.includes(v)
+    })

    const body = {
        contents: reformatedChat,
        generation_config: applyParameters({
            "maxOutputTokens": maxTokens,
-        }, ['temperature', 'top_p'], {
-            'top_p': "topP"
-        }, arg.mode),
+        }, para, {
+            'top_p': "topP",
+            'top_k': "topK",
+            'presence_penalty': "presencePenalty",
+            'frequency_penalty': "frequencyPenalty"
+        }, arg.mode, {
+            ignoreTopKIfZero: true
+        }),
        safetySettings: uncensoredCatagory,
        systemInstruction: {
            parts: [
@@ -1582,9 +1609,65 @@ async function requestGoogleCloudVertex(arg:RequestDataArgumentExtended):Promise
    else if(arg.modelInfo.format === LLMFormat.VertexAIGemini){
        url =`https://${REGION}-aiplatform.googleapis.com/v1/projects/${PROJECT_ID}/locations/us-central1/publishers/google/models/${arg.modelInfo.internalID}:streamGenerateContent`
    }
+    else if(arg.modelInfo.format === LLMFormat.GoogleCloud && arg.useStreaming){
+        url = `https://generativelanguage.googleapis.com/v1beta/models/${arg.modelInfo.internalID}:streamGenerateContent?key=${db.google.accessToken}`
+    }
    else{
        url = `https://generativelanguage.googleapis.com/v1beta/models/${arg.modelInfo.internalID}:generateContent?key=${db.google.accessToken}`
    }
+
+
+    if(arg.modelInfo.format === LLMFormat.GoogleCloud && arg.useStreaming){
+        headers['Content-Type'] = 'application/json'
+        const f = await fetchNative(url, {
+            headers: headers,
+            body: JSON.stringify(body),
+            method: 'POST',
+            chatId: arg.chatId,
+        })
+
+        if(f.status !== 200){
+            return {
+                type: 'fail',
+                result: await textifyReadableStream(f.body)
+            }
+        }
+
+        let fullResult:string = ''
+
+        const stream = new TransformStream<Uint8Array, StreamResponseChunk>(  {
+            async transform(chunk, control) {
+                fullResult += new TextDecoder().decode(chunk)
+                try {
+                    let reformatted = fullResult
+                    if(reformatted.endsWith(',')){
+                        reformatted = fullResult.slice(0, -1) + ']'
+                    }
+                    if(!reformatted.endsWith(']')){
+                        reformatted = fullResult + ']'
+                    }
+
+                    const data = JSON.parse(reformatted)
+
+                    let r = ''
+                    for(const d of data){
+                        r += d.candidates[0].content.parts[0].text
+                    }
+                    control.enqueue({
+                        '0': r
+                    })
+                } catch (error) {
+                    console.log(error)
+                }
+            }
+        },)
+
+        return {
+            type: 'streaming',
+            result: f.body.pipeThrough(stream)
+        }
+    }
+
    const res = await globalFetch(url, {
        headers: headers,
        body: body,
--- a/src/ts/process/triggers.ts
+++ b/src/ts/process/triggers.ts
@@ -11,7 +11,7 @@ import type { OpenAIChat } from "./index.svelte";
 import { HypaProcesser } from "./memory/hypamemory";
 import { requestChatData } from "./request";
 import { generateAIImage } from "./stableDiff";
-import { writeInlayImage } from "./files/image";
+import { writeInlayImage } from "./files/inlays";
 import { runLua } from "./lua";