[feat] add hf tts

This commit is contained in:
kwaroran
2023-11-28 02:48:47 +09:00
parent cecc1731bc
commit a6b11c22de
8 changed files with 668 additions and 564 deletions

View File

@@ -133,10 +133,15 @@
<span class="text-textcolor mt-2">VOICEVOX URL</span>
<TextInput size="sm" marginBottom bind:value={$DataBase.voicevoxUrl}/>
<span class="text-textcolor">OpenAI Key</span>
<TextInput size="sm" marginBottom bind:value={$DataBase.openAIKey}/>
<span class="text-textcolor mt-2">NovelAI API key</span>
<TextInput size="sm" marginBottom placeholder="pst-..." bind:value={$DataBase.NAIApiKey}/>
<span class="text-textcolor">Huggingface Key</span>
<TextInput size="sm" marginBottom bind:value={$DataBase.huggingfaceKey} placeholder="hf_..."/>
</Arcodion>
<Arcodion name={language.emotionImage} styled>

View File

@@ -519,6 +519,7 @@
<OptionInput value="VOICEVOX">VOICEVOX</OptionInput>
<OptionInput value="openai">OpenAI</OptionInput>
<OptionInput value="novelai">NovelAI</OptionInput>
<OptionInput value="huggingface">Huggingface</OptionInput>
</SelectInput>
@@ -604,7 +605,6 @@
</SelectInput>
{/if}
{#if currentChar.data.ttsMode === 'openai'}
<span class="text-textcolor">OpenAI TTS uses your OpenAI key on the chat model section</span>
<SelectInput className="mb-4 mt-2" bind:value={currentChar.data.oaiVoice}>
<OptionInput value="">Unset</OptionInput>
{#each oaiVoices as voice}
@@ -612,7 +612,19 @@
{/each}
</SelectInput>
{/if}
{#if currentChar.data.ttsMode === 'webspeech' || currentChar.data.ttsMode === 'elevenlab' || currentChar.data.ttsMode === 'VOICEVOX' || currentChar.data.ttsMode === 'novelai'}
{#if currentChar.data.ttsMode === 'huggingface'}
<span class="text-textcolor">Model</span>
<TextInput additionalClass="mb-4 mt-2" bind:value={currentChar.data.hfTTS.model} />
<span class="text-textcolor">Language</span>
<TextInput additionalClass="mb-4 mt-2" bind:value={currentChar.data.hfTTS.language} placeholder="en" />
{/if}
{#if currentChar.data.ttsMode === 'webspeech' ||
currentChar.data.ttsMode === 'elevenlab' ||
currentChar.data.ttsMode === 'VOICEVOX' ||
currentChar.data.ttsMode === 'huggingface' ||
currentChar.data.ttsMode === 'openai' ||
currentChar.data.ttsMode === 'novelai'}
<div class="flex items-center mt-2">
<Check bind:check={currentChar.data.ttsReadOnlyQuoted} name={language.ttsReadOnlyQuoted}/>
</div>

View File

@@ -301,6 +301,10 @@ export function characterFormatUpdate(index:number|character){
depth: 0,
prompt: ''
}
cha.hfTTS = {
model: '',
language: 'en'
}
if(!cha.newGenData){
cha = updateInlayScreen(cha)
}

View File

@@ -52,4 +52,4 @@ export const runEmbedding = async (text: string):Promise<Float32Array> => {
let extractor = await pipeline('feature-extraction', 'Xenova/all-MiniLM-L6-v2');
let result = await extractor(text, { pooling: 'mean', normalize: true });
return result?.data ?? null;
}
}

View File

@@ -4,6 +4,7 @@ import { DataBase, type character } from "../storage/database";
import { translateVox } from "../translator/translator";
import { globalFetch } from "../storage/globalApi";
import { language } from "src/lang";
import { sleep } from "../util";
let sourceNode:AudioBufferSourceNode = null
@@ -161,6 +162,45 @@ export async function sayTTS(character:character,text:string) {
}
break;
}
case 'huggingface': {
while(true){
const audioContext = new AudioContext();
const response = await fetch(`https://api-inference.huggingface.co/models/${character.hfTTS.model}`, {
method: 'POST',
headers: {
"Authorization": "Bearer " + db.huggingfaceKey,
"Content-Type": "application/json",
},
body: JSON.stringify({
inputs: text,
})
});
if(response.status === 503 && response.headers.get('content-type') === 'application/json'){
const json = await response.json()
if(json.estimated_time){
await sleep(json.estimated_time * 1000)
continue
}
}
else if(response.status >= 400){
alertError(language.errors.httpError + `${await response.text()}`)
return
}
else if (response.status === 200) {
const audioBuffer = await response.arrayBuffer();
audioContext.decodeAudioData(audioBuffer, (decodedData) => {
const sourceNode = audioContext.createBufferSource();
sourceNode.buffer = decodedData;
sourceNode.connect(audioContext.destination);
sourceNode.start();
});
} else {
alertError("Error fetching or decoding audio data");
}
return
}
}
}
}

View File

@@ -347,6 +347,7 @@ export function setDatabase(data:Database){
data.generationSeed ??= -1
data.newOAIHandle ??= true
data.gptVisionQuality ??= 'low'
data.huggingfaceKey ??= ''
data.reverseProxyOobaArgs ??= {
mode: 'instruct'
}
@@ -538,7 +539,7 @@ export interface Database{
reverseProxyOobaArgs: OobaChatCompletionRequestParams
tpo?:boolean
automark?:boolean
huggingfaceKey:string
allowAllExtentionFiles?:boolean
}
@@ -648,6 +649,10 @@ export interface character{
largePortrait?:boolean
lorePlus?:boolean
inlayViewScreen?:boolean
hfTTS?: {
model: string
language: string
}
}