refactor: Update TTS function to handle auto audio path
This commit is contained in:
@@ -112,6 +112,13 @@
|
|||||||
if (!(currentChar.data as character).gptSoVitsConfig.use_prompt) {
|
if (!(currentChar.data as character).gptSoVitsConfig.use_prompt) {
|
||||||
(currentChar.data as character).gptSoVitsConfig.prompt = undefined
|
(currentChar.data as character).gptSoVitsConfig.prompt = undefined
|
||||||
}
|
}
|
||||||
|
if((currentChar.data as character).gptSoVitsConfig.use_auto_path){
|
||||||
|
(currentChar.data as character).gptSoVitsConfig.ref_audio_path = undefined;
|
||||||
|
|
||||||
|
(currentChar.data as character).gptSoVitsConfig.use_prompt = false;
|
||||||
|
(currentChar.data as character).gptSoVitsConfig.prompt = undefined;
|
||||||
|
|
||||||
|
}
|
||||||
}
|
}
|
||||||
})
|
})
|
||||||
|
|
||||||
@@ -160,7 +167,9 @@
|
|||||||
$: if (currentChar.data.ttsMode === 'gptsovits' && (currentChar.data as character).gptSoVitsConfig === undefined) {
|
$: if (currentChar.data.ttsMode === 'gptsovits' && (currentChar.data as character).gptSoVitsConfig === undefined) {
|
||||||
(currentChar.data as character).gptSoVitsConfig = {
|
(currentChar.data as character).gptSoVitsConfig = {
|
||||||
url: '',
|
url: '',
|
||||||
ref_audio_path: 'C:/Users/user/Downloads/GPT-SoVITS-v2-240821',
|
use_auto_path: false,
|
||||||
|
ref_audio_path: '',
|
||||||
|
use_long_audio: false,
|
||||||
ref_audio_data: {
|
ref_audio_data: {
|
||||||
fileName: '',
|
fileName: '',
|
||||||
assetId: ''
|
assetId: ''
|
||||||
@@ -835,15 +844,24 @@
|
|||||||
<span class="text-textcolor">URL</span>
|
<span class="text-textcolor">URL</span>
|
||||||
<TextInput className="mb-4 mt-2" bind:value={currentChar.data.gptSoVitsConfig.url}/>
|
<TextInput className="mb-4 mt-2" bind:value={currentChar.data.gptSoVitsConfig.url}/>
|
||||||
|
|
||||||
<span class="text-textcolor">Reference Audio Path (e.g. C:/Users/user/Downloads/GPT-SoVITS-v2-240821)</span>
|
<span class="text-textcolor">Use Auto Path</span>
|
||||||
<TextInput className="mb-4 mt-2" bind:value={currentChar.data.gptSoVitsConfig.ref_audio_path}/>
|
<Check bind:check={currentChar.data.gptSoVitsConfig.use_auto_path}/>
|
||||||
|
|
||||||
|
{#if !currentChar.data.gptSoVitsConfig.use_auto_path}
|
||||||
|
<span class="text-textcolor">Reference Audio Path (e.g. C:/Users/user/Downloads/GPT-SoVITS-v2-240821)</span>
|
||||||
|
<TextInput className="mb-4 mt-2" bind:value={currentChar.data.gptSoVitsConfig.ref_audio_path}/>
|
||||||
|
{/if}
|
||||||
|
|
||||||
|
<span class="text-textcolor">Use Long Audio</span>
|
||||||
|
<Check bind:check={currentChar.data.gptSoVitsConfig.use_long_audio}/>
|
||||||
|
|
||||||
<span class="text-textcolor">Reference Audio Data (3~10s audio file)</span>
|
<span class="text-textcolor">Reference Audio Data (3~10s audio file)</span>
|
||||||
<Button on:click={async () => {
|
<Button on:click={async () => {
|
||||||
const audio = await selectSingleFile([
|
const audio = await selectSingleFile([
|
||||||
'wav',
|
'wav',
|
||||||
'ogg',
|
'ogg',
|
||||||
'aac'
|
'aac',
|
||||||
|
'mp3'
|
||||||
])
|
])
|
||||||
if(!audio){
|
if(!audio){
|
||||||
return null
|
return null
|
||||||
@@ -866,40 +884,42 @@
|
|||||||
</Button>
|
</Button>
|
||||||
<span class="text-textcolor">Text Language</span>
|
<span class="text-textcolor">Text Language</span>
|
||||||
<SelectInput className="mb-4 mt-2" bind:value={currentChar.data.gptSoVitsConfig.text_lang}>
|
<SelectInput className="mb-4 mt-2" bind:value={currentChar.data.gptSoVitsConfig.text_lang}>
|
||||||
<OptionInput value="auto">Auto</OptionInput>
|
<OptionInput value="auto">Multi-language Mixed</OptionInput>
|
||||||
<OptionInput value="auto_yue">Auto (Cantonese)</OptionInput>
|
<OptionInput value="auto_yue">Multi-language Mixed (Cantonese)</OptionInput>
|
||||||
<OptionInput value="en">English</OptionInput>
|
<OptionInput value="en">English</OptionInput>
|
||||||
<OptionInput value="zh">Chinese</OptionInput>
|
<OptionInput value="zh">Chinese-English Mixed</OptionInput>
|
||||||
<OptionInput value="ja">Japanese</OptionInput>
|
<OptionInput value="ja">Japanese-English Mixed</OptionInput>
|
||||||
<OptionInput value="yue">Cantonese</OptionInput>
|
<OptionInput value="yue">Cantonese-English Mixed</OptionInput>
|
||||||
<OptionInput value="ko">Korean</OptionInput>
|
<OptionInput value="ko">Korean-English Mixed</OptionInput>
|
||||||
<OptionInput value="all_zh">All Chinese</OptionInput>
|
<OptionInput value="all_zh">Chinese</OptionInput>
|
||||||
<OptionInput value="all_ja">All Japanese</OptionInput>
|
<OptionInput value="all_ja">Japanese</OptionInput>
|
||||||
<OptionInput value="all_yue">All Cantonese</OptionInput>
|
<OptionInput value="all_yue">Cantonese</OptionInput>
|
||||||
<OptionInput value="all_ko">All Korean</OptionInput>
|
<OptionInput value="all_ko">Korean</OptionInput>
|
||||||
</SelectInput>
|
</SelectInput>
|
||||||
|
|
||||||
<span class="text-textcolor">Use Reference Audio Script</span>
|
{#if !currentChar.data.gptSoVitsConfig.use_long_audio}
|
||||||
<Check bind:check={currentChar.data.gptSoVitsConfig.use_prompt}/>
|
<span class="text-textcolor">Use Reference Audio Script</span>
|
||||||
|
<Check bind:check={currentChar.data.gptSoVitsConfig.use_prompt}/>
|
||||||
|
{/if}
|
||||||
|
|
||||||
{#if currentChar.data.gptSoVitsConfig.use_prompt}
|
{#if currentChar.data.gptSoVitsConfig.use_prompt && !currentChar.data.gptSoVitsConfig.use_long_audio}
|
||||||
<span class="text-textcolor">Reference Audio Script</span>
|
<span class="text-textcolor">Reference Audio Script</span>
|
||||||
<TextAreaInput className="mb-4 mt-2" bind:value={currentChar.data.gptSoVitsConfig.prompt}/>
|
<TextAreaInput className="mb-4 mt-2" bind:value={currentChar.data.gptSoVitsConfig.prompt}/>
|
||||||
{/if}
|
{/if}
|
||||||
|
|
||||||
<span class="text-textcolor">Reference Audio Language</span>
|
<span class="text-textcolor">Reference Audio Language</span>
|
||||||
<SelectInput className="mb-4 mt-2" bind:value={currentChar.data.gptSoVitsConfig.prompt_lang}>
|
<SelectInput className="mb-4 mt-2" bind:value={currentChar.data.gptSoVitsConfig.prompt_lang}>
|
||||||
<OptionInput value="auto">Auto</OptionInput>
|
<OptionInput value="auto">Multi-language Mixed</OptionInput>
|
||||||
<OptionInput value="auto_yue">Auto (Cantonese)</OptionInput>
|
<OptionInput value="auto_yue">Multi-language Mixed (Cantonese)</OptionInput>
|
||||||
<OptionInput value="en">English</OptionInput>
|
<OptionInput value="en">English</OptionInput>
|
||||||
<OptionInput value="zh">Chinese</OptionInput>
|
<OptionInput value="zh">Chinese-English Mixed</OptionInput>
|
||||||
<OptionInput value="ja">Japanese</OptionInput>
|
<OptionInput value="ja">Japanese-English Mixed</OptionInput>
|
||||||
<OptionInput value="yue">Cantonese</OptionInput>
|
<OptionInput value="yue">Cantonese-English Mixed</OptionInput>
|
||||||
<OptionInput value="ko">Korean</OptionInput>
|
<OptionInput value="ko">Korean-English Mixed</OptionInput>
|
||||||
<OptionInput value="all_zh">English And Chinese</OptionInput>
|
<OptionInput value="all_zh">Chinese</OptionInput>
|
||||||
<OptionInput value="all_ja">English And Japanese</OptionInput>
|
<OptionInput value="all_ja">Japanese</OptionInput>
|
||||||
<OptionInput value="all_yue">English And Cantonese</OptionInput>
|
<OptionInput value="all_yue">Cantonese</OptionInput>
|
||||||
<OptionInput value="all_ko">English And Korean</OptionInput>
|
<OptionInput value="all_ko">Korean</OptionInput>
|
||||||
</SelectInput>
|
</SelectInput>
|
||||||
<span class="text-textcolor">Top P</span>
|
<span class="text-textcolor">Top P</span>
|
||||||
<SliderInput min={0.0} max={1.0} step={0.05} fixed={2} bind:value={currentChar.data.gptSoVitsConfig.top_p}/>
|
<SliderInput min={0.0} max={1.0} step={0.05} fixed={2} bind:value={currentChar.data.gptSoVitsConfig.top_p}/>
|
||||||
|
|||||||
@@ -235,12 +235,12 @@ export async function sayTTS(character:character,text:string) {
|
|||||||
const audioContext = new AudioContext();
|
const audioContext = new AudioContext();
|
||||||
|
|
||||||
const audio: Uint8Array = await loadAsset(character.gptSoVitsConfig.ref_audio_data.assetId);
|
const audio: Uint8Array = await loadAsset(character.gptSoVitsConfig.ref_audio_data.assetId);
|
||||||
const base64Audio = btoa(new Uint8Array(audio).reduce((data, byte) => data + String.fromCharCode(byte), ''));
|
const base64Audio = btoa(new Uint8Array(audio).reduce((data, byte) => data + String.fromCharCode(byte), ''));
|
||||||
|
|
||||||
const body = {
|
const body = {
|
||||||
text: text,
|
text: text,
|
||||||
text_lang: character.gptSoVitsConfig.text_lang,
|
text_lang: character.gptSoVitsConfig.text_lang,
|
||||||
ref_audio_path: character.gptSoVitsConfig.ref_audio_path + '/public/audio/' + character.gptSoVitsConfig.ref_audio_data.fileName,
|
ref_audio_path: undefined,
|
||||||
ref_audio_name: character.gptSoVitsConfig.ref_audio_data.fileName,
|
ref_audio_name: character.gptSoVitsConfig.ref_audio_data.fileName,
|
||||||
ref_audio_data: base64Audio,
|
ref_audio_data: base64Audio,
|
||||||
prompt_text: undefined,
|
prompt_text: undefined,
|
||||||
@@ -250,18 +250,41 @@ export async function sayTTS(character:character,text:string) {
|
|||||||
speed_factor: character.gptSoVitsConfig.speed,
|
speed_factor: character.gptSoVitsConfig.speed,
|
||||||
top_k: character.gptSoVitsConfig.top_k,
|
top_k: character.gptSoVitsConfig.top_k,
|
||||||
text_split_method: character.gptSoVitsConfig.text_split_method,
|
text_split_method: character.gptSoVitsConfig.text_split_method,
|
||||||
parallel_infer: false,
|
parallel_infer: true,
|
||||||
|
// media_type: character.gptSoVitsConfig.ref_audio_data.fileName.split('.')[1],
|
||||||
|
ref_free: character.gptSoVitsConfig.use_long_audio || !character.gptSoVitsConfig.use_prompt,
|
||||||
}
|
}
|
||||||
|
|
||||||
if (character.gptSoVitsConfig.use_prompt){
|
if (character.gptSoVitsConfig.use_prompt){
|
||||||
body.prompt_text = character.gptSoVitsConfig.prompt
|
body.prompt_text = character.gptSoVitsConfig.prompt
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (character.gptSoVitsConfig.use_auto_path){
|
||||||
|
console.log('auto')
|
||||||
|
const path = await globalFetch(`${character.gptSoVitsConfig.url}/get_path`, {
|
||||||
|
method: 'GET',
|
||||||
|
headers: {
|
||||||
|
'Content-Type': 'application/json'
|
||||||
|
},
|
||||||
|
rawResponse: false,
|
||||||
|
|
||||||
|
})
|
||||||
|
console.log(path)
|
||||||
|
if(path.ok){
|
||||||
|
body.ref_audio_path = path.data.message + '/public/audio/' + character.gptSoVitsConfig.ref_audio_data.fileName
|
||||||
|
}
|
||||||
|
else{
|
||||||
|
throw new Error('Failed to Auto get path')
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
body.ref_audio_path = character.gptSoVitsConfig.ref_audio_path + '/public/audio/' + character.gptSoVitsConfig.ref_audio_data.fileName
|
||||||
|
}
|
||||||
console.log(body)
|
console.log(body)
|
||||||
|
|
||||||
const response = await globalFetch(`${character.gptSoVitsConfig.url}/tts`, {
|
const response = await globalFetch(`${character.gptSoVitsConfig.url}/tts`, {
|
||||||
method: 'POST',
|
method: 'POST',
|
||||||
headers: {
|
headers: {
|
||||||
"Content-Type": "application/json",
|
'Content-Type': 'application/json'
|
||||||
},
|
},
|
||||||
body: body,
|
body: body,
|
||||||
rawResponse: true,
|
rawResponse: true,
|
||||||
|
|||||||
Reference in New Issue
Block a user