added VOICEVOX TTS

2023-05-25 19:47:47 +09:00
parent 45fab25bbf
commit aef1d84755
7 changed files with 3733 additions and 3 deletions
--- a/package-lock.json
+++ b/package-lock.json
--- a/src/lib/Setting/Pages/OtherBotSettings.svelte
+++ b/src/lib/Setting/Pages/OtherBotSettings.svelte
@@ -58,6 +58,8 @@
 <span class="text-neutral-200 mt-2">ElevenLabs API key</span>
 <input class="text-neutral-200 mb-4 p-2 bg-transparent input-text focus:bg-selected text-sm" bind:value={$DataBase.elevenLabKey}>
 <span class="text-neutral-200 mt-2">VOICEVOX URL</span>
 <input class="text-neutral-200 mb-4 p-2 bg-transparent input-text focus:bg-selected text-sm" bind:value={$DataBase.voicevoxUrl}>
 <span class="text-neutral-200 mt-4 text-lg font-bold">{language.SuperMemory} <Help key="superMemory" /></span>
 <span class="text-neutral-200 mt-4">{language.SuperMemory} {language.model}</span>
--- a/src/lib/SideBars/CharConfig.svelte
+++ b/src/lib/SideBars/CharConfig.svelte
@@ -15,7 +15,7 @@
    import Help from "../Others/Help.svelte";
    import RegexData from "./RegexData.svelte";
    import { exportChar } from "src/ts/characterCards";
-    import { getElevenTTSVoices, getWebSpeechTTSVoices } from "src/ts/process/tts";
+    import { getElevenTTSVoices, getWebSpeechTTSVoices, getVOICEVOXVoices } from "src/ts/process/tts";
    import { checkCharOrder } from "src/ts/globalApi";
    let subMenu = 0
@@ -460,6 +460,7 @@
            <option value="" class="bg-darkbg appearance-none">{language.disabled}</option>
            <option value="elevenlab" class="bg-darkbg appearance-none">ElevenLabs</option>
            <option value="webspeech" class="bg-darkbg appearance-none">Web Speech</option>
            <option value="VOICEVOX" class="bg-darkbg appearance-none">VOICEVOX</option>
        </select>
@@ -489,8 +490,28 @@
                        {/each}
                </select>
            {/await}
         {:else if currentChar.data.ttsMode === 'VOICEVOX'}
                <span class="text-neutral-200">Voice</span>
                <select class="bg-transparent input-text mt-2 mb-4 text-gray-200 appearance-none text-sm" bind:value={currentChar.data.ttsSpeech}>
                    {#await getVOICEVOXVoices() then voices}
                        {#each voices as voice}
                            <option value={voice.id} class="bg-darkbg appearance-none">{voice.name}</option>
                        {/each}
                    {/await}
                </select>
                <span class="text-neutral-200">Speed scale</span>
                <input class="bg-transparent input-text mt-2 mb-2 text-gray-200 text-xs resize-none h-5 focus:bg-selected" autocomplete="off" bind:value={currentChar.data.voicevoxConfig.SPEED_SCALE}/>
                <span class="text-neutral-200">Pitch scale</span>
                <input class="bg-transparent input-text mt-2 mb-2 text-gray-200 text-xs resize-none h-5 focus:bg-selected" autocomplete="off" bind:value={currentChar.data.voicevoxConfig.PITCH_SCALE}/>
                <span class="text-neutral-200">Volume scale</span>
                <input class="bg-transparent input-text mt-2 mb-2 text-gray-200 text-xs resize-none h-5 focus:bg-selected" autocomplete="off" bind:value={currentChar.data.voicevoxConfig.VOLUME_SCALE}/>
                <span class="text-neutral-200">Intonation scale</span>
                <input class="bg-transparent input-text mt-2 mb-2 text-gray-200 text-xs resize-none h-5 focus:bg-selected" autocomplete="off" bind:value={currentChar.data.voicevoxConfig.INTONATION_SCALE}/>
        {/if}
-        {#if currentChar.data.ttsMode === 'webspeech' || currentChar.data.ttsMode === 'elevenlab'}
+        {#if currentChar.data.ttsMode === 'webspeech' || currentChar.data.ttsMode === 'elevenlab' || currentChar.data.ttsMode === 'VOICEVOX'}
            <div class="flex items-center mt-2">
                <Check bind:check={currentChar.data.ttsReadOnlyQuoted}/>
                <span>{language.ttsReadOnlyQuoted}</span>
--- a/src/ts/characters.ts
+++ b/src/ts/characters.ts
@@ -286,7 +286,12 @@ export function characterFormatUpdate(index:number|character){
            creator: '',
            character_version: 0
        }
-
+        cha.voicevoxConfig = cha.voicevoxConfig ?? {
            SPEED_SCALE: 1,
            PITCH_SCALE: 0,
            INTONATION_SCALE: 1,
            VOLUME_SCALE: 1
        }
        if(cha.postHistoryInstructions){
            cha.chats[cha.chatPage].note += "\n" + cha.postHistoryInstructions
            cha.chats[cha.chatPage].note = cha.chats[cha.chatPage].note.trim()
--- a/src/ts/database.ts
+++ b/src/ts/database.ts
@@ -187,6 +187,9 @@ export function setDatabase(data:Database){
    if(checkNullish(data.elevenLabKey)){
        data.elevenLabKey = ''
    }
    if(checkNullish(data.voicevoxUrl)){
        data.voicevoxUrl = ''
    }
    if(checkNullish(data.supaMemoryPrompt)){
        data.supaMemoryPrompt = ''
    }
@@ -309,6 +312,12 @@ export interface character{
    }
    ttsMode?:string
    ttsSpeech?:string
    voicevoxConfig?:{
        SPEED_SCALE?: number
        PITCH_SCALE?: number
        INTONATION_SCALE?: number
        VOLUME_SCALE?: number
    }
    supaMemory?:boolean
    additionalAssets?:[string, string][]
    ttsReadOnlyQuoted?:boolean
@@ -447,6 +456,7 @@ export interface Database{
    requestproxy: string
    showUnrecommended:boolean
    elevenLabKey:string
    voicevoxUrl:string
    useExperimental:boolean
    showMemoryLimit:boolean
    roundIcons:boolean
--- a/src/ts/process/tts.ts
+++ b/src/ts/process/tts.ts
@@ -1,6 +1,7 @@
 import { get } from "svelte/store";
 import { alertError } from "../alert";
 import { DataBase, type character } from "../database";
 import { translateVox } from "../translator/translator";
 let sourceNode:AudioBufferSourceNode = null
@@ -58,6 +59,44 @@ export async function sayTTS(character:character,text:string) {
                alertError(await da.text())
            }
        }
        case "VOICEVOX": {
            const jpText = await translateVox(text)
            console.log(jpText);
            const audioContext = new AudioContext();
            const query = await fetch(`${db.voicevoxUrl}/audio_query?text=${jpText}&speaker=${character.ttsSpeech}`, {
                method: 'POST',
                headers: { "Content-Type": "application/json"},
            })
            if (query.status == 200){
                const queryJson = await query.json();
                const bodyData = {
                    accent_phrases: queryJson.accent_phrases,
                    speedScale: character.voicevoxConfig.SPEED_SCALE,
                    pitchScale: character.voicevoxConfig.PITCH_SCALE,
                    volumeScale: character.voicevoxConfig.VOLUME_SCALE,
                    intonationScale: character.voicevoxConfig.INTONATION_SCALE,
                    prePhonemeLength: queryJson.prePhonemeLength,
                    postPhonemeLength: queryJson.postPhonemeLength,
                    outputSamplingRate: queryJson.outputSamplingRate,
                    outputStereo: queryJson.outputStereo,
                    kana: queryJson.kana,
                }
                console.log(JSON.stringify(bodyData))
                console.log (bodyData)
                const getVoice = await fetch(`${db.voicevoxUrl}/synthesis?speaker=${character.ttsSpeech}`, {
                    method: 'POST',
                    headers: { "Content-Type": "application/json"},
                    body: JSON.stringify(bodyData),
                })
                if (getVoice.status == 200 && getVoice.headers.get('content-type') === 'audio/wav'){
                    const audioBuffer = await audioContext.decodeAudioData(await getVoice.arrayBuffer())
                    sourceNode = audioContext.createBufferSource();
                    sourceNode.buffer = audioBuffer;
                    sourceNode.connect(audioContext.destination);            
                    sourceNode.start();
                }
            }
        }
    }
 }
@@ -90,4 +129,16 @@ export async function getElevenTTSVoices() {
    console.log(res)
    return res.voices
 }
 export async function getVOICEVOXVoices() {
    const db = get(DataBase);
    const speakerData = await fetch(`${db.voicevoxUrl}/speakers`)
    const speakerList = await speakerData.json()
    const speakersInfo = speakerList.map((speaker) => {
      const normalStyle = speaker.styles.find((style) => style.name === 'ノーマル' || 'ふつう' || '人間ver.')
      return {'name': speaker.name, 'id': normalStyle.id}
    })
    return speakersInfo;
 }
--- a/src/ts/translator/translator.ts
+++ b/src/ts/translator/translator.ts
@@ -70,4 +70,45 @@ async function googleTrans(text:string, reverse:boolean) {
    return result
 }
 export async function translateVox(text:string) {
    const plug = await translatorPlugin(text, 'en', 'jp')
    if(plug){
        return plug.content
    }
    return jpTrans(text)
 }
 async function jpTrans(text:string) {
    const host = 'translate.googleapis.com'
    const url = `https://${host}/translate_a/single?client=gtx&sl=auto&tl=ja&dt=t&q=` + encodeURIComponent(text)
    const f = await fetch(url, {
        method: "GET",
    })
    const res = await f.json()
    if(typeof(res) === 'string'){
        return res as unknown as string
    }
    const result = res[0].map((s) => s[0]).filter(Boolean).join('');
    return result
 }