add fish speech tts

This commit is contained in:
Junha Heo
2024-10-12 01:21:16 +09:00
parent 648f4a8597
commit 826e59dfc5
4 changed files with 132 additions and 1 deletions

View File

@@ -347,6 +347,9 @@
<span class="text-textcolor">Huggingface Key</span> <span class="text-textcolor">Huggingface Key</span>
<TextInput size="sm" marginBottom bind:value={$DataBase.huggingfaceKey} placeholder="hf_..."/> <TextInput size="sm" marginBottom bind:value={$DataBase.huggingfaceKey} placeholder="hf_..."/>
<span class="text-textcolor">fish-speech API Key</span>
<TextInput size="sm" marginBottom bind:value={$DataBase.fishSpeechKey}/>
</Arcodion> </Arcodion>
{/if} {/if}

View File

@@ -82,7 +82,7 @@
} }
const unsub = DataBase.subscribe((v) => { const unsub = DataBase.subscribe(async (v) => {
database = v database = v
const cha = (v.characters[$selectedCharID]) const cha = (v.characters[$selectedCharID])
if(!cha){ if(!cha){
@@ -117,6 +117,7 @@
} }
} }
}) })
let assetFileExtensions:string[] = [] let assetFileExtensions:string[] = []
@@ -184,10 +185,56 @@
}; };
} }
let fishSpeechModels:{
_id:string,
title:string,
description:string
}[] = []
$: if (currentChar.data.ttsMode === 'fishspeech' && (currentChar.data as character).fishSpeechConfig === undefined) {
(currentChar.data as character).fishSpeechConfig = {
model: {
_id: '',
title: '',
description: ''
},
chunk_length: 200,
normalize: false,
};
}
$: { $: {
if(currentChar.type === 'group' && ($CharConfigSubMenu === 4 || $CharConfigSubMenu === 5)){ if(currentChar.type === 'group' && ($CharConfigSubMenu === 4 || $CharConfigSubMenu === 5)){
$CharConfigSubMenu = 0 $CharConfigSubMenu = 0
} }
}
async function getFishSpeechModels() {
try {
const res = await fetch(`https://api.fish.audio/model?self=true`, {
headers: {
'Authorization': `Bearer ${$DataBase.fishSpeechKey}`
}
});
const data = await res.json();
console.log(data.items);
console.log(currentChar.data)
if (Array.isArray(data.items)) {
fishSpeechModels = data.items.map((item) => ({
_id: item._id || '',
title: item.title || '',
description: item.description || ''
}));
} else {
console.error('Expected an array of items, but received:', data.items);
fishSpeechModels = [];
}
} catch (error) {
console.error('Error fetching fish speech models:', error);
fishSpeechModels = [];
}
} }
</script> </script>
@@ -665,6 +712,7 @@
<OptionInput value="huggingface">Huggingface</OptionInput> <OptionInput value="huggingface">Huggingface</OptionInput>
<OptionInput value="vits">VITS</OptionInput> <OptionInput value="vits">VITS</OptionInput>
<OptionInput value="gptsovits">GPT-SoVITS</OptionInput> <OptionInput value="gptsovits">GPT-SoVITS</OptionInput>
<OptionInput value="fishspeech">fish-speech</OptionInput>
</SelectInput> </SelectInput>
@@ -877,6 +925,31 @@
<OptionInput value="cut4">Cut 4 (Split by English periods)</OptionInput> <OptionInput value="cut4">Cut 4 (Split by English periods)</OptionInput>
<OptionInput value="cut5">Cut 5 (Split by various punctuation marks)</OptionInput> <OptionInput value="cut5">Cut 5 (Split by various punctuation marks)</OptionInput>
</SelectInput> </SelectInput>
{:else if currentChar.data.ttsMode === 'fishspeech'}
{#await getFishSpeechModels()}
<span class="text-textcolor">Loading...</span>
{:then}
<span class="text-textcolor">Model</span>
<SelectInput className="mb-4 mt-2" bind:value={currentChar.data.fishSpeechConfig.model._id}>
<OptionInput value="">Not selected</OptionInput>
{#each fishSpeechModels as model}
<OptionInput value={model._id}>
<div class="flex items-center">
<span>{model.title}</span>
<span class="text-sm text-textcolor2">{model.description}</span>
</div>
</OptionInput>
{/each}
</SelectInput>
{:catch}
<span class="text-textcolor">An error occurred while fetching the models.</span>
{/await}
<span class="text-textcolor">Chunk Length</span>
<NumberInput className="mb-4 mt-2" bind:value={currentChar.data.fishSpeechConfig.chunk_length}/>
<span class="mt-2 text-textcolor">Normalize</span>
<Check className="mb-4 mt-2" bind:check={currentChar.data.fishSpeechConfig.normalize}/>
{/if} {/if}
{#if currentChar.data.ttsMode} {#if currentChar.data.ttsMode}
<div class="flex items-center mt-2"> <div class="flex items-center mt-2">

View File

@@ -311,6 +311,49 @@ export async function sayTTS(character:character,text:string) {
throw new Error(text); throw new Error(text);
} }
} }
case 'fishspeech':{
if (character.fishSpeechConfig.model._id === ''){
throw new Error('FishSpeech Model is not selected')
}
const audioContext = new AudioContext();
const body = {
text: text,
reference_id: character.fishSpeechConfig.model._id,
chunk_length: character.fishSpeechConfig.chunk_length,
normalize: character.fishSpeechConfig.normalize,
format: 'mp3',
mp3_bitrate: 192,
}
console.log(body)
const response = await globalFetch(`https://api.fish.audio/v1/tts`, {
method: 'POST',
headers: {
'Content-Type': 'application/json',
'Authorization': `Bearer ${db.fishSpeechKey}`
},
body: body,
rawResponse: true,
})
console.log(response)
if (response.ok) {
const audioBuffer = response.data.buffer;
audioContext.decodeAudioData(audioBuffer, (decodedData) => {
const sourceNode = audioContext.createBufferSource();
sourceNode.buffer = decodedData;
sourceNode.connect(audioContext.destination);
sourceNode.start();
});
} else {
const textBuffer: Uint8Array = response.data.buffer
const text = Buffer.from(textBuffer).toString('utf-8')
throw new Error(text);
}
}
} }
} catch (error) { } catch (error) {
alertError(`TTS Error: ${error}`) alertError(`TTS Error: ${error}`)

View File

@@ -351,6 +351,7 @@ export function setDatabase(data:Database){
data.newOAIHandle ??= true data.newOAIHandle ??= true
data.gptVisionQuality ??= 'low' data.gptVisionQuality ??= 'low'
data.huggingfaceKey ??= '' data.huggingfaceKey ??= ''
data.fishSpeechKey ??= ''
data.statistics ??= {} data.statistics ??= {}
data.reverseProxyOobaArgs ??= { data.reverseProxyOobaArgs ??= {
mode: 'instruct' mode: 'instruct'
@@ -651,6 +652,7 @@ export interface Database{
tpo?:boolean tpo?:boolean
automark?:boolean automark?:boolean
huggingfaceKey:string huggingfaceKey:string
fishSpeechKey:string
allowAllExtentionFiles?:boolean allowAllExtentionFiles?:boolean
translatorPrompt:string translatorPrompt:string
translatorMaxResponse:number translatorMaxResponse:number
@@ -863,6 +865,16 @@ export interface character{
top_k?:number top_k?:number
text_split_method?: "cut0" | "cut1" | "cut2" | "cut3" | "cut4" | "cut5" text_split_method?: "cut0" | "cut1" | "cut2" | "cut3" | "cut4" | "cut5"
} }
fishSpeechConfig?:{
model?: {
_id:string
title:string
description:string
},
chunk_length:number,
normalize:boolean,
}
supaMemory?:boolean supaMemory?:boolean
additionalAssets?:[string, string, string][] additionalAssets?:[string, string, string][]
ttsReadOnlyQuoted?:boolean ttsReadOnlyQuoted?:boolean