[feat] add tts

2023-05-13 21:24:54 +09:00
parent 801a3d5f70
commit f7d7eb3ab9
6 changed files with 133 additions and 15 deletions
--- a/src/lang/en.ts
+++ b/src/lang/en.ts
@@ -241,5 +241,6 @@ export const languageEnglish = {
    useGlobalSettings: "Use Global Settings",
    recursiveScanning: "Recursive Scanning",
    creator: "Creator",
-    CharVersion: "Character Version"
+    CharVersion: "Character Version",
+    Speech: "Speech"
 }
--- a/src/lib/SideBars/CharConfig.svelte
+++ b/src/lib/SideBars/CharConfig.svelte
@@ -3,7 +3,7 @@
    import { tokenize } from "../../ts/tokenizer";
    import { DataBase, type Database, type character, type groupChat } from "../../ts/database";
    import { selectedCharID } from "../../ts/stores";
-    import { PlusIcon, SmileIcon, TrashIcon, UserIcon, ActivityIcon, BookIcon, LoaderIcon, User, DnaIcon, CurlyBracesIcon } from 'lucide-svelte'
+    import { PlusIcon, SmileIcon, TrashIcon, UserIcon, ActivityIcon, BookIcon, LoaderIcon, User, DnaIcon, CurlyBracesIcon, Volume2Icon } from 'lucide-svelte'
    import Check from "../Others/Check.svelte";
    import { addCharEmotion, addingEmotion, getCharImage, rmCharEmotion, selectCharImg, makeGroupImage } from "../../ts/characters";
    import LoreBook from "./LoreBookSetting.svelte";
@@ -15,6 +15,7 @@
    import Help from "../Others/Help.svelte";
    import RegexData from "./RegexData.svelte";
    import { exportChar } from "src/ts/characterCards";
+    import { getElevenTTSVoices, getWebSpeechTTSVoices } from "src/ts/process/tts";

    let subMenu = 0
    let subberMenu = 0
@@ -157,6 +158,9 @@
        <BookIcon />
    </button>
    {#if currentChar.type === 'character'}
+        <button class={subMenu === 5 ? 'text-gray-200' : 'text-gray-500'} on:click={() => {subMenu = 5}}>
+            <Volume2Icon />
+        </button>
        <button class={subMenu === 4 ? 'text-gray-200' : 'text-gray-500'} on:click={() => {subMenu = 4}}>
            <CurlyBracesIcon />
        </button>
@@ -445,6 +449,49 @@
            }
        }}><PlusIcon /></button>
    {/if}
+{:else if subMenu === 5}
+    {#if currentChar.type === 'character'}
+        <h2 class="mb-2 text-2xl font-bold mt-2">TTS</h2>
+        <span class="text-neutral-200">{language.provider}</span>
+        <select class="bg-transparent input-text mt-2 mb-4 text-gray-200 appearance-none text-sm" bind:value={currentChar.data.ttsMode} on:change={() => {
+            if(currentChar.type === 'character'){
+                currentChar.data.ttsSpeech = ''
+            }
+        }}>
+            <option value="" class="bg-darkbg appearance-none">{language.disabled}</option>
+            <option value="elevenlab" class="bg-darkbg appearance-none">ElevenLabs</option>
+            <option value="webspeech" class="bg-darkbg appearance-none">Web Speech</option>
+        </select>
+        
+
+        {#if currentChar.data.ttsMode === 'webspeech'}
+            {#if !speechSynthesis}
+                <span class="text-neutral-200">Web Speech isn't supported in your browser or OS</span>
+            {:else}
+                <span class="text-neutral-200">{language.Speech}</span>
+                <select class="bg-transparent input-text mt-2 mb-4 text-gray-200 appearance-none text-sm" bind:value={currentChar.data.ttsSpeech}>
+                    <option value="" class="bg-darkbg appearance-none">Auto</option>
+                    {#each getWebSpeechTTSVoices() as voice}
+                        <option value={voice} class="bg-darkbg appearance-none">{voice}</option>
+                    {/each}
+                </select>
+                {#if currentChar.data.ttsSpeech !== ''}
+                    <span class="text-red-400 text-sm">If you do not set it to Auto, it may not work properly when importing from another OS or browser.</span>
+                {/if}
+            {/if}
+        {:else if currentChar.data.ttsMode === 'elevenlab'}
+            <span class="text-sm mb-2 text-gray-400">Please set the ElevenLabs API key in "global Settings → Bot Settings → Others → ElevenLabs API key"</span>
+            {#await getElevenTTSVoices() then voices}
+                <span class="text-neutral-200">{language.Speech}</span>
+                <select class="bg-transparent input-text mt-2 mb-4 text-gray-200 appearance-none text-sm" bind:value={currentChar.data.ttsSpeech}>
+                    <option value="" class="bg-darkbg appearance-none">Unset</option>
+                        {#each voices as voice}
+                            <option value={voice.voice_id} class="bg-darkbg appearance-none">{voice.name}</option>
+                        {/each}
+                </select>
+            {/await}
+        {/if}
+    {/if}
 {:else if subMenu === 2}
    <h2 class="mb-2 text-2xl font-bold mt-2">{language.advancedSettings}</h2>
    {#if currentChar.type !== 'group'}
--- a/src/lib/SideBars/Settings.svelte
+++ b/src/lib/SideBars/Settings.svelte
@@ -277,8 +277,10 @@
    {/if}


-    
-    
+    <span class="text-neutral-200 mt-4 text-lg font-bold">TTS</span>
+    <span class="text-neutral-200 mt-2">ElevenLabs API key</span>
+    <input class="text-neutral-200 mb-4 p-2 bg-transparent input-text focus:bg-selected text-sm" bind:value={$DataBase.elevenLabKey}>
+
    
 {:else if subMenu == 3}
    <h2 class="mb-2 text-2xl font-bold mt-2">{language.display}</h2>
--- a/src/ts/database.ts
+++ b/src/ts/database.ts
@@ -181,6 +181,9 @@ export function setDatabase(data:Database){
    if(checkNullish(data.showUnrecommended)){
        data.showUnrecommended = false
    }
+    if(checkNullish(data.elevenLabKey)){
+        data.elevenLabKey = ''
+    }
    if(checkNullish(data.sdConfig)){
        data.sdConfig = {
            width:512,
@@ -262,6 +265,8 @@ export interface character{
        creator?:string
        character_version?:number
    }
+    ttsMode?:string
+    ttsSpeech?:string
 }


@@ -386,6 +391,7 @@ export interface Database{
    requestmet: string
    requestproxy: string
    showUnrecommended:boolean
+    elevenLabKey:string
 }


--- a/src/ts/process/index.ts
+++ b/src/ts/process/index.ts
@@ -10,6 +10,7 @@ import { requestChatData } from "./request";
 import { stableDiff } from "./stableDiff";
 import { processScript, processScriptFull } from "./scripts";
 import { exampleMessage } from "./exampleMessages";
+import { sayTTS } from "./tts";

 export interface OpenAIChat{
    role: 'system'|'user'|'assistant'
@@ -165,7 +166,7 @@ export async function sendChat(chatProcessIndex = -1):Promise<boolean> {
    }).join('\n\n')) + db.maxResponse) + 150

    let chats:OpenAIChat[] = exampleMessage(currentChar)
-    
+
    chats.push({
        role: 'system',
        content: '[Start a new chat]'
@@ -214,9 +215,6 @@ export async function sendChat(chatProcessIndex = -1):Promise<boolean> {
        currentTokens += (await tokenize(systemMsg) + 1)
    }

-    console.log(currentTokens)
-    console.log(maxContextTokens)
-
    while(currentTokens > maxContextTokens){
        if(chats.length <= 1){
            alertError(language.errors.toomuchtoken)
@@ -228,8 +226,6 @@ export async function sendChat(chatProcessIndex = -1):Promise<boolean> {
        chats.splice(0, 1)
    }

-    console.log(currentTokens)
-
    let bias:{[key:number]:number} = {}

    for(let i=0;i<currentChar.bias.length;i++){
@@ -318,6 +314,7 @@ export async function sendChat(chatProcessIndex = -1):Promise<boolean> {
            data: result,
            saying: currentChar.chaId
        })
+        await sayTTS(currentChar, result)
        setDatabase(db)
    }

--- a/src/ts/process/tts.ts
+++ b/src/ts/process/tts.ts
@@ -1,6 +1,71 @@
-export async function sayTTS(text:string) {
-    const utterThis = new SpeechSynthesisUtterance(text);
-    const voices = speechSynthesis.getVoices();
-    utterThis.voice = voices[0]
-    speechSynthesis.speak(utterThis)
+import { get } from "svelte/store";
+import { alertError } from "../alert";
+import { DataBase, type character } from "../database";
+
+export async function sayTTS(character:character,text:string) {
+
+    let db = get(DataBase)
+
+    switch(character.ttsMode){
+        case "webspeech":{
+            if(speechSynthesis && SpeechSynthesisUtterance){
+                const utterThis = new SpeechSynthesisUtterance(text);
+                const voices = speechSynthesis.getVoices();
+                let voiceIndex = 0
+                for(let i=0;i<voices.length;i++){
+                    if(voices[i].name === character.ttsSpeech){
+                        voiceIndex = i
+                    }
+                }
+                utterThis.voice = voices[voiceIndex]
+                speechSynthesis.speak(utterThis)
+            }
+            break
+        }
+        case "elevenlab": {
+            const audioContext = new AudioContext();
+            const da = await fetch(`https://api.elevenlabs.io/v1/text-to-speech/${character.ttsSpeech}`, {
+                body: JSON.stringify({
+                    text: text
+                }),
+                method: "POST",
+                headers: {
+                    "Content-Type": "application/json",
+                    'xi-api-key': db.elevenLabKey || undefined
+                }
+            })
+            if(da.status >= 200 && da.status < 300){
+                const audioBuffer = await audioContext.decodeAudioData(await da.arrayBuffer())
+                const sourceNode = audioContext.createBufferSource();
+                sourceNode.buffer = audioBuffer;
+                sourceNode.connect(audioContext.destination);            
+                sourceNode.start();
+            }
+            else{
+                alertError(await da.text())
+            }
+        }
+    }
+
+}
+
+
+export function getWebSpeechTTSVoices() {
+    return speechSynthesis.getVoices().map(v => {
+        return v.name
+    })
+}
+
+export async function getElevenTTSVoices() {
+    let db = get(DataBase)
+
+    const data = await fetch('https://api.elevenlabs.io/v1/voices', {
+        headers: {
+            'xi-api-key': db.elevenLabKey || undefined
+        }
+    })
+    const res = await data.json()
+
+    console.log(res)
+    return res.voices
 }