added VOICEVOX TTS (#104)

Add VOICEVOX (https://voicevox.hiroshiba.jp/) to the TTS options. This will translate the character's text into Japanese and read it in different voices. You can run VOICEVOX in Google colab and use it by adding localtunnel URL. https://colab.research.google.com/drive/1tyeXJSklNfjW-aZJAib1JfgOMFarAwze example : https://github.com/kwaroran/RisuAI/assets/61553001/5121ce6d-75a1-4ad4-ad27-4e214a2c5b4d
2023-05-25 21:14:57 +09:00
parent 1bcf299201 aef1d84755
commit 3246a4511c
8 changed files with 3736 additions and 3 deletions
--- a/src/ts/characters.ts
+++ b/src/ts/characters.ts
@@ -286,7 +286,12 @@ export function characterFormatUpdate(index:number|character){
            creator: '',
            character_version: 0
        }
-
+        cha.voicevoxConfig = cha.voicevoxConfig ?? {
+            SPEED_SCALE: 1,
+            PITCH_SCALE: 0,
+            INTONATION_SCALE: 1,
+            VOLUME_SCALE: 1
+        }
        if(cha.postHistoryInstructions){
            cha.chats[cha.chatPage].note += "\n" + cha.postHistoryInstructions
            cha.chats[cha.chatPage].note = cha.chats[cha.chatPage].note.trim()
--- a/src/ts/database.ts
+++ b/src/ts/database.ts
@@ -187,6 +187,9 @@ export function setDatabase(data:Database){
    if(checkNullish(data.elevenLabKey)){
        data.elevenLabKey = ''
    }
+    if(checkNullish(data.voicevoxUrl)){
+        data.voicevoxUrl = ''
+    }
    if(checkNullish(data.supaMemoryPrompt)){
        data.supaMemoryPrompt = ''
    }
@@ -309,6 +312,12 @@ export interface character{
    }
    ttsMode?:string
    ttsSpeech?:string
+    voicevoxConfig?:{
+        SPEED_SCALE?: number
+        PITCH_SCALE?: number
+        INTONATION_SCALE?: number
+        VOLUME_SCALE?: number
+    }
    supaMemory?:boolean
    additionalAssets?:[string, string][]
    ttsReadOnlyQuoted?:boolean
@@ -447,6 +456,7 @@ export interface Database{
    requestproxy: string
    showUnrecommended:boolean
    elevenLabKey:string
+    voicevoxUrl:string
    useExperimental:boolean
    showMemoryLimit:boolean
    roundIcons:boolean
--- a/src/ts/process/tts.ts
+++ b/src/ts/process/tts.ts
@@ -1,6 +1,7 @@
 import { get } from "svelte/store";
 import { alertError } from "../alert";
 import { DataBase, type character } from "../database";
+import { translateVox } from "../translator/translator";

 let sourceNode:AudioBufferSourceNode = null

@@ -58,6 +59,44 @@ export async function sayTTS(character:character,text:string) {
                alertError(await da.text())
            }
        }
+        case "VOICEVOX": {
+            const jpText = await translateVox(text)
+            console.log(jpText);
+            const audioContext = new AudioContext();
+            const query = await fetch(`${db.voicevoxUrl}/audio_query?text=${jpText}&speaker=${character.ttsSpeech}`, {
+                method: 'POST',
+                headers: { "Content-Type": "application/json"},
+            })
+            if (query.status == 200){
+                const queryJson = await query.json();
+                const bodyData = {
+                    accent_phrases: queryJson.accent_phrases,
+                    speedScale: character.voicevoxConfig.SPEED_SCALE,
+                    pitchScale: character.voicevoxConfig.PITCH_SCALE,
+                    volumeScale: character.voicevoxConfig.VOLUME_SCALE,
+                    intonationScale: character.voicevoxConfig.INTONATION_SCALE,
+                    prePhonemeLength: queryJson.prePhonemeLength,
+                    postPhonemeLength: queryJson.postPhonemeLength,
+                    outputSamplingRate: queryJson.outputSamplingRate,
+                    outputStereo: queryJson.outputStereo,
+                    kana: queryJson.kana,
+                }
+                console.log(JSON.stringify(bodyData))
+                console.log (bodyData)
+                const getVoice = await fetch(`${db.voicevoxUrl}/synthesis?speaker=${character.ttsSpeech}`, {
+                    method: 'POST',
+                    headers: { "Content-Type": "application/json"},
+                    body: JSON.stringify(bodyData),
+                })
+                if (getVoice.status == 200 && getVoice.headers.get('content-type') === 'audio/wav'){
+                    const audioBuffer = await audioContext.decodeAudioData(await getVoice.arrayBuffer())
+                    sourceNode = audioContext.createBufferSource();
+                    sourceNode.buffer = audioBuffer;
+                    sourceNode.connect(audioContext.destination);            
+                    sourceNode.start();
+                }
+            }
+        }
    }

 }
@@ -90,4 +129,16 @@ export async function getElevenTTSVoices() {

    console.log(res)
    return res.voices
+}
+
+export async function getVOICEVOXVoices() {
+    const db = get(DataBase);
+    const speakerData = await fetch(`${db.voicevoxUrl}/speakers`)
+    const speakerList = await speakerData.json()
+    const speakersInfo = speakerList.map((speaker) => {
+      const normalStyle = speaker.styles.find((style) => style.name === 'ノーマル' || 'ふつう' || '人間ver.')
+      return {'name': speaker.name, 'id': normalStyle.id}
+    })
+  
+    return speakersInfo;
 }
--- a/src/ts/translator/translator.ts
+++ b/src/ts/translator/translator.ts
@@ -70,4 +70,45 @@ async function googleTrans(text:string, reverse:boolean) {

    return result

+}
+
+export async function translateVox(text:string) {
+    const plug = await translatorPlugin(text, 'en', 'jp')
+    if(plug){
+        return plug.content
+    }
+    
+    return jpTrans(text)
+}
+
+
+async function jpTrans(text:string) {
+
+    const host = 'translate.googleapis.com'
+
+    
+    const url = `https://${host}/translate_a/single?client=gtx&sl=auto&tl=ja&dt=t&q=` + encodeURIComponent(text)
+
+
+
+    const f = await fetch(url, {
+
+        method: "GET",
+
+    })
+
+    const res = await f.json()
+
+    
+
+    if(typeof(res) === 'string'){
+
+        return res as unknown as string
+
+    }
+
+    const result = res[0].map((s) => s[0]).filter(Boolean).join('');
+
+    return result
+
 }