From 410d0ecb8054f18bae54d0010aa63eb1c508e63d Mon Sep 17 00:00:00 2001 From: Junha Heo Date: Sat, 24 Aug 2024 12:56:01 +0900 Subject: [PATCH 1/4] feat: Add gptSoVitsConfig to character interface --- src/ts/storage/database.ts | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/src/ts/storage/database.ts b/src/ts/storage/database.ts index f0c80c32..b4f80b2b 100644 --- a/src/ts/storage/database.ts +++ b/src/ts/storage/database.ts @@ -802,6 +802,18 @@ export interface character{ voice?: string version?: string } + gptSoVitsConfig?:{ + ref_audio_data?:string + text_lang?: "auto" | "auto_yue" | "en" | "zh" | "ja" | "yue" | "ko" | "all_zh" | "all_ja" | "all_yue" | "all_ko" + text?:string + prompt?:string | null + prompt_lang?:string + top_p?:number + temperature?:number + speed?:number + top_k?:number + text_split_method?:string + } supaMemory?:boolean additionalAssets?:[string, string, string][] ttsReadOnlyQuoted?:boolean From 14fb2267c316efaddf8ad716c4c25db372e39856 Mon Sep 17 00:00:00 2001 From: Junha Heo Date: Sat, 24 Aug 2024 17:37:27 +0900 Subject: [PATCH 2/4] feat: Add support for gptSoVitsConfig in TTS processing --- src/lib/SideBars/CharConfig.svelte | 137 +++++++++++++++++++++++++++-- src/ts/process/tts.ts | 63 ++++++++++++- src/ts/storage/database.ts | 13 ++- 3 files changed, 199 insertions(+), 14 deletions(-) diff --git a/src/lib/SideBars/CharConfig.svelte b/src/lib/SideBars/CharConfig.svelte index 554beb21..914cc8b4 100644 --- a/src/lib/SideBars/CharConfig.svelte +++ b/src/lib/SideBars/CharConfig.svelte @@ -9,7 +9,7 @@ import LoreBook from "./LoreBook/LoreBookSetting.svelte"; import { alertConfirm, alertMd, alertNormal, alertSelectChar, alertTOS, showHypaV2Alert } from "../../ts/alert"; import BarIcon from "./BarIcon.svelte"; - import { findCharacterbyId, getAuthorNoteDefaultText, parseKeyValue, selectMultipleFile } from "../../ts/util"; + import { findCharacterbyId, getAuthorNoteDefaultText, parseKeyValue, selectMultipleFile, selectSingleFile } from "../../ts/util"; import { onDestroy } from "svelte"; import {isEqual} from 'lodash' import Help from "../Others/Help.svelte"; @@ -29,7 +29,8 @@ import { updateInlayScreen } from "src/ts/process/inlayScreen"; import { registerOnnxModel } from "src/ts/process/transformers"; import MultiLangInput from "../UI/GUI/MultiLangInput.svelte"; - import { applyModule } from "src/ts/process/modules"; + import { applyModule } from "src/ts/process/modules"; + import SliderInput from "../UI/GUI/SliderInput.svelte"; let subMenu = 0 @@ -103,6 +104,12 @@ } emos = currentChar.data.emotionImages currentChar = currentChar + + if (currentChar.data.ttsMode === 'gptsovits' && (currentChar.data as character).gptSoVitsConfig) { + if (!(currentChar.data as character).gptSoVitsConfig.use_prompt) { + (currentChar.data as character).gptSoVitsConfig.prompt = undefined + } + } }) let assetFileExtensions:string[] = [] @@ -147,6 +154,27 @@ version: 'v2' }; } + $: if (currentChar.data.ttsMode === 'gptsovits' && (currentChar.data as character).gptSoVitsConfig === undefined) { + (currentChar.data as character).gptSoVitsConfig = { + url: '', + ref_audio_path: 'C:/Users/user/Downloads/GPT-SoVITS-v2-240821', + ref_audio_data: { + fileName: '', + assetId: '' + }, + volume: 1.0, + text_lang: 'auto', + text: 'en', + use_prompt: false, + prompt_lang: 'en', + top_p: 1, + temperature: 0.7, + speed: 1, + top_k: 5, + text_split_method: 'cut0', + }; + } + {#if licensed !== 'private'} @@ -668,7 +696,7 @@ {#if currentChar.type === 'character'}

TTS

{language.provider} - { + { if(currentChar.type === 'character'){ currentChar.data.ttsSpeech = '' } @@ -681,6 +709,7 @@ NovelAI Huggingface VITS + GPT-SoVITS @@ -764,23 +793,20 @@ v1 v2 - {/if} - {#if currentChar.data.ttsMode === 'openai'} + {:else if currentChar.data.ttsMode === 'openai'} Unset {#each oaiVoices as voice} {voice} {/each} - {/if} - {#if currentChar.data.ttsMode === 'huggingface'} + {:else if currentChar.data.ttsMode === 'huggingface'} Model Language - {/if} - {#if currentChar.data.ttsMode === 'vits'} + {:else if currentChar.data.ttsMode === 'vits'} {#if currentChar.data.vits} {currentChar.data.vits.name ?? 'Unnamed VitsModel'} {:else} @@ -792,6 +818,99 @@ currentChar.data.vits = model } }}>{language.selectModel} + {:else if currentChar.data.ttsMode === 'gptsovits'} + Volume + + URL + + + Reference Audio Path (e.g. C:/Users/user/Downloads/GPT-SoVITS-v2-240821) + + + Reference Audio Data (3~10s audio file) + + Text Language + + Auto + Auto (Cantonese) + English + Chinese + Japanese + Cantonese + Korean + All Chinese + All Japanese + All Cantonese + All Korean + + + Use Reference Audio Script + + + {#if currentChar.data.gptSoVitsConfig.use_prompt} + Reference Audio Script + + {/if} + + Reference Audio Language + + Auto + Auto (Cantonese) + English + Chinese + Japanese + Cantonese + Korean + English And Chinese + English And Japanese + English And Cantonese + English And Korean + + Top P + + + Temperature + + + Speed + + + Top K + + + Text Split Method + + Cut 0 (No splitting) + Cut 1 (Split every 4 sentences) + Cut 2 (Split every 50 characters) + Cut 3 (Split by Chinese periods) + Cut 4 (Split by English periods) + Cut 5 (Split by various punctuation marks) + {/if} {#if currentChar.data.ttsMode}
diff --git a/src/ts/process/tts.ts b/src/ts/process/tts.ts index db3e32c2..6ecbb755 100644 --- a/src/ts/process/tts.ts +++ b/src/ts/process/tts.ts @@ -2,7 +2,7 @@ import { get } from "svelte/store"; import { alertError } from "../alert"; import { DataBase, type character } from "../storage/database"; import { runTranslator, translateVox } from "../translator/translator"; -import { globalFetch } from "../storage/globalApi"; +import { globalFetch, loadAsset } from "../storage/globalApi"; import { language } from "src/lang"; import { getCurrentCharacter, sleep } from "../util"; import { registerOnnxModel, runVITS } from "./transformers"; @@ -27,7 +27,7 @@ export async function sayTTS(character:character,text:string) { text = text.replace(/\*/g,'') if(character.ttsReadOnlyQuoted){ - const matches = text.match(/"(.*?)"/g) + const matches = text.match(/["「](.*?)["」]/g) if(matches && matches.length > 0){ text = matches.map(match => match.slice(1, -1)).join(""); } @@ -231,12 +231,71 @@ export async function sayTTS(character:character,text:string) { case 'vits':{ await runVITS(text, character.vits) } + case 'gptsovits':{ + const audioContext = new AudioContext(); + + const audio: Uint8Array = await loadAsset(character.gptSoVitsConfig.ref_audio_data.assetId); + const base64Audio = btoa(new Uint8Array(audio).reduce((data, byte) => data + String.fromCharCode(byte), '')); + + const body = { + text: text, + text_lang: character.gptSoVitsConfig.text_lang, + ref_audio_path: character.gptSoVitsConfig.ref_audio_path + '/public/audio/' + character.gptSoVitsConfig.ref_audio_data.fileName, + ref_audio_name: character.gptSoVitsConfig.ref_audio_data.fileName, + ref_audio_data: base64Audio, + prompt_text: undefined, + prompt_lang: character.gptSoVitsConfig.prompt_lang, + top_p: character.gptSoVitsConfig.top_p, + temperature: character.gptSoVitsConfig.temperature, + speed_factor: character.gptSoVitsConfig.speed, + top_k: character.gptSoVitsConfig.top_k, + text_split_method: character.gptSoVitsConfig.text_split_method, + parallel_infer: false, + } + + if (character.gptSoVitsConfig.use_prompt){ + body.prompt_text = character.gptSoVitsConfig.prompt + } + console.log(body) + + const response = await globalFetch(`${character.gptSoVitsConfig.url}/tts`, { + method: 'POST', + headers: { + "Content-Type": "application/json", + }, + body: body, + rawResponse: true, + }) + console.log(response) + + if (response.ok) { + const audioBuffer = response.data.buffer; + audioContext.decodeAudioData(audioBuffer, (decodedData) => { + const sourceNode = audioContext.createBufferSource(); + sourceNode.buffer = decodedData; + + const gainNode = audioContext.createGain(); + gainNode.gain.value = character.gptSoVitsConfig.volume || 1.0; + + sourceNode.connect(gainNode); + gainNode.connect(audioContext.destination); + + sourceNode.start(); + }); + } else { + const textBuffer: Uint8Array = response.data.buffer + const text = Buffer.from(textBuffer).toString('utf-8') + throw new Error(text); + } + } } } catch (error) { alertError(`TTS Error: ${error}`) } } + + export const oaiVoices = [ 'alloy', 'echo', 'fable', 'onyx', 'nova', 'shimmer' ] diff --git a/src/ts/storage/database.ts b/src/ts/storage/database.ts index b4f80b2b..1f5f8b26 100644 --- a/src/ts/storage/database.ts +++ b/src/ts/storage/database.ts @@ -803,16 +803,23 @@ export interface character{ version?: string } gptSoVitsConfig?:{ - ref_audio_data?:string + url?:string + ref_audio_path?:string + ref_audio_data?: { + fileName:string + assetId:string + } + volume?:number text_lang?: "auto" | "auto_yue" | "en" | "zh" | "ja" | "yue" | "ko" | "all_zh" | "all_ja" | "all_yue" | "all_ko" text?:string + use_prompt?:boolean prompt?:string | null - prompt_lang?:string + prompt_lang?: "auto" | "auto_yue" | "en" | "zh" | "ja" | "yue" | "ko" | "all_zh" | "all_ja" | "all_yue" | "all_ko" top_p?:number temperature?:number speed?:number top_k?:number - text_split_method?:string + text_split_method?: "cut0" | "cut1" | "cut2" | "cut3" | "cut4" | "cut5" } supaMemory?:boolean additionalAssets?:[string, string, string][] From c14503625f04c1b2012a3accc5bcabfc33672d9d Mon Sep 17 00:00:00 2001 From: Junha Heo Date: Sat, 24 Aug 2024 17:57:34 +0900 Subject: [PATCH 3/4] refactor: Update translator.ts to include auto language detection --- src/ts/translator/translator.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/ts/translator/translator.ts b/src/ts/translator/translator.ts index 7640249b..f7f447ec 100644 --- a/src/ts/translator/translator.ts +++ b/src/ts/translator/translator.ts @@ -165,7 +165,7 @@ async function translateMain(text:string, arg:{from:string, to:string, host:stri } - const url = `https://${arg.host}/translate_a/single?client=gtx&dt=t&sl=${arg.from}&tl=${arg.to}&q=` + encodeURIComponent(text) + const url = `https://${arg.host}/translate_a/single?client=gtx&dt=t&sl=auto&tl=${arg.to}&q=` + encodeURIComponent(text) From 9d82b1dbd6343c3be846c5db63962db5cfd6ff44 Mon Sep 17 00:00:00 2001 From: Junha Heo Date: Sat, 24 Aug 2024 19:01:52 +0900 Subject: [PATCH 4/4] feat: Add support for AAC audio files in CharConfig.svelte --- src/lib/SideBars/CharConfig.svelte | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/lib/SideBars/CharConfig.svelte b/src/lib/SideBars/CharConfig.svelte index 914cc8b4..00441f7f 100644 --- a/src/lib/SideBars/CharConfig.svelte +++ b/src/lib/SideBars/CharConfig.svelte @@ -830,9 +830,9 @@ Reference Audio Data (3~10s audio file)