added VOICEVOX TTS

This commit is contained in:
drPpZero
2023-05-25 19:47:47 +09:00
parent 45fab25bbf
commit aef1d84755
7 changed files with 3733 additions and 3 deletions

3600
package-lock.json generated Normal file

File diff suppressed because it is too large Load Diff

View File

@@ -58,6 +58,8 @@
<span class="text-neutral-200 mt-2">ElevenLabs API key</span> <span class="text-neutral-200 mt-2">ElevenLabs API key</span>
<input class="text-neutral-200 mb-4 p-2 bg-transparent input-text focus:bg-selected text-sm" bind:value={$DataBase.elevenLabKey}> <input class="text-neutral-200 mb-4 p-2 bg-transparent input-text focus:bg-selected text-sm" bind:value={$DataBase.elevenLabKey}>
<span class="text-neutral-200 mt-2">VOICEVOX URL</span>
<input class="text-neutral-200 mb-4 p-2 bg-transparent input-text focus:bg-selected text-sm" bind:value={$DataBase.voicevoxUrl}>
<span class="text-neutral-200 mt-4 text-lg font-bold">{language.SuperMemory} <Help key="superMemory" /></span> <span class="text-neutral-200 mt-4 text-lg font-bold">{language.SuperMemory} <Help key="superMemory" /></span>
<span class="text-neutral-200 mt-4">{language.SuperMemory} {language.model}</span> <span class="text-neutral-200 mt-4">{language.SuperMemory} {language.model}</span>

View File

@@ -15,7 +15,7 @@
import Help from "../Others/Help.svelte"; import Help from "../Others/Help.svelte";
import RegexData from "./RegexData.svelte"; import RegexData from "./RegexData.svelte";
import { exportChar } from "src/ts/characterCards"; import { exportChar } from "src/ts/characterCards";
import { getElevenTTSVoices, getWebSpeechTTSVoices } from "src/ts/process/tts"; import { getElevenTTSVoices, getWebSpeechTTSVoices, getVOICEVOXVoices } from "src/ts/process/tts";
import { checkCharOrder } from "src/ts/globalApi"; import { checkCharOrder } from "src/ts/globalApi";
let subMenu = 0 let subMenu = 0
@@ -460,6 +460,7 @@
<option value="" class="bg-darkbg appearance-none">{language.disabled}</option> <option value="" class="bg-darkbg appearance-none">{language.disabled}</option>
<option value="elevenlab" class="bg-darkbg appearance-none">ElevenLabs</option> <option value="elevenlab" class="bg-darkbg appearance-none">ElevenLabs</option>
<option value="webspeech" class="bg-darkbg appearance-none">Web Speech</option> <option value="webspeech" class="bg-darkbg appearance-none">Web Speech</option>
<option value="VOICEVOX" class="bg-darkbg appearance-none">VOICEVOX</option>
</select> </select>
@@ -489,8 +490,28 @@
{/each} {/each}
</select> </select>
{/await} {/await}
{:else if currentChar.data.ttsMode === 'VOICEVOX'}
<span class="text-neutral-200">Voice</span>
<select class="bg-transparent input-text mt-2 mb-4 text-gray-200 appearance-none text-sm" bind:value={currentChar.data.ttsSpeech}>
{#await getVOICEVOXVoices() then voices}
{#each voices as voice}
<option value={voice.id} class="bg-darkbg appearance-none">{voice.name}</option>
{/each}
{/await}
</select>
<span class="text-neutral-200">Speed scale</span>
<input class="bg-transparent input-text mt-2 mb-2 text-gray-200 text-xs resize-none h-5 focus:bg-selected" autocomplete="off" bind:value={currentChar.data.voicevoxConfig.SPEED_SCALE}/>
<span class="text-neutral-200">Pitch scale</span>
<input class="bg-transparent input-text mt-2 mb-2 text-gray-200 text-xs resize-none h-5 focus:bg-selected" autocomplete="off" bind:value={currentChar.data.voicevoxConfig.PITCH_SCALE}/>
<span class="text-neutral-200">Volume scale</span>
<input class="bg-transparent input-text mt-2 mb-2 text-gray-200 text-xs resize-none h-5 focus:bg-selected" autocomplete="off" bind:value={currentChar.data.voicevoxConfig.VOLUME_SCALE}/>
<span class="text-neutral-200">Intonation scale</span>
<input class="bg-transparent input-text mt-2 mb-2 text-gray-200 text-xs resize-none h-5 focus:bg-selected" autocomplete="off" bind:value={currentChar.data.voicevoxConfig.INTONATION_SCALE}/>
{/if} {/if}
{#if currentChar.data.ttsMode === 'webspeech' || currentChar.data.ttsMode === 'elevenlab'} {#if currentChar.data.ttsMode === 'webspeech' || currentChar.data.ttsMode === 'elevenlab' || currentChar.data.ttsMode === 'VOICEVOX'}
<div class="flex items-center mt-2"> <div class="flex items-center mt-2">
<Check bind:check={currentChar.data.ttsReadOnlyQuoted}/> <Check bind:check={currentChar.data.ttsReadOnlyQuoted}/>
<span>{language.ttsReadOnlyQuoted}</span> <span>{language.ttsReadOnlyQuoted}</span>

View File

@@ -286,7 +286,12 @@ export function characterFormatUpdate(index:number|character){
creator: '', creator: '',
character_version: 0 character_version: 0
} }
cha.voicevoxConfig = cha.voicevoxConfig ?? {
SPEED_SCALE: 1,
PITCH_SCALE: 0,
INTONATION_SCALE: 1,
VOLUME_SCALE: 1
}
if(cha.postHistoryInstructions){ if(cha.postHistoryInstructions){
cha.chats[cha.chatPage].note += "\n" + cha.postHistoryInstructions cha.chats[cha.chatPage].note += "\n" + cha.postHistoryInstructions
cha.chats[cha.chatPage].note = cha.chats[cha.chatPage].note.trim() cha.chats[cha.chatPage].note = cha.chats[cha.chatPage].note.trim()

View File

@@ -187,6 +187,9 @@ export function setDatabase(data:Database){
if(checkNullish(data.elevenLabKey)){ if(checkNullish(data.elevenLabKey)){
data.elevenLabKey = '' data.elevenLabKey = ''
} }
if(checkNullish(data.voicevoxUrl)){
data.voicevoxUrl = ''
}
if(checkNullish(data.supaMemoryPrompt)){ if(checkNullish(data.supaMemoryPrompt)){
data.supaMemoryPrompt = '' data.supaMemoryPrompt = ''
} }
@@ -309,6 +312,12 @@ export interface character{
} }
ttsMode?:string ttsMode?:string
ttsSpeech?:string ttsSpeech?:string
voicevoxConfig?:{
SPEED_SCALE?: number
PITCH_SCALE?: number
INTONATION_SCALE?: number
VOLUME_SCALE?: number
}
supaMemory?:boolean supaMemory?:boolean
additionalAssets?:[string, string][] additionalAssets?:[string, string][]
ttsReadOnlyQuoted?:boolean ttsReadOnlyQuoted?:boolean
@@ -447,6 +456,7 @@ export interface Database{
requestproxy: string requestproxy: string
showUnrecommended:boolean showUnrecommended:boolean
elevenLabKey:string elevenLabKey:string
voicevoxUrl:string
useExperimental:boolean useExperimental:boolean
showMemoryLimit:boolean showMemoryLimit:boolean
roundIcons:boolean roundIcons:boolean

View File

@@ -1,6 +1,7 @@
import { get } from "svelte/store"; import { get } from "svelte/store";
import { alertError } from "../alert"; import { alertError } from "../alert";
import { DataBase, type character } from "../database"; import { DataBase, type character } from "../database";
import { translateVox } from "../translator/translator";
let sourceNode:AudioBufferSourceNode = null let sourceNode:AudioBufferSourceNode = null
@@ -58,6 +59,44 @@ export async function sayTTS(character:character,text:string) {
alertError(await da.text()) alertError(await da.text())
} }
} }
case "VOICEVOX": {
const jpText = await translateVox(text)
console.log(jpText);
const audioContext = new AudioContext();
const query = await fetch(`${db.voicevoxUrl}/audio_query?text=${jpText}&speaker=${character.ttsSpeech}`, {
method: 'POST',
headers: { "Content-Type": "application/json"},
})
if (query.status == 200){
const queryJson = await query.json();
const bodyData = {
accent_phrases: queryJson.accent_phrases,
speedScale: character.voicevoxConfig.SPEED_SCALE,
pitchScale: character.voicevoxConfig.PITCH_SCALE,
volumeScale: character.voicevoxConfig.VOLUME_SCALE,
intonationScale: character.voicevoxConfig.INTONATION_SCALE,
prePhonemeLength: queryJson.prePhonemeLength,
postPhonemeLength: queryJson.postPhonemeLength,
outputSamplingRate: queryJson.outputSamplingRate,
outputStereo: queryJson.outputStereo,
kana: queryJson.kana,
}
console.log(JSON.stringify(bodyData))
console.log (bodyData)
const getVoice = await fetch(`${db.voicevoxUrl}/synthesis?speaker=${character.ttsSpeech}`, {
method: 'POST',
headers: { "Content-Type": "application/json"},
body: JSON.stringify(bodyData),
})
if (getVoice.status == 200 && getVoice.headers.get('content-type') === 'audio/wav'){
const audioBuffer = await audioContext.decodeAudioData(await getVoice.arrayBuffer())
sourceNode = audioContext.createBufferSource();
sourceNode.buffer = audioBuffer;
sourceNode.connect(audioContext.destination);
sourceNode.start();
}
}
}
} }
} }
@@ -90,4 +129,16 @@ export async function getElevenTTSVoices() {
console.log(res) console.log(res)
return res.voices return res.voices
}
export async function getVOICEVOXVoices() {
const db = get(DataBase);
const speakerData = await fetch(`${db.voicevoxUrl}/speakers`)
const speakerList = await speakerData.json()
const speakersInfo = speakerList.map((speaker) => {
const normalStyle = speaker.styles.find((style) => style.name === 'ノーマル' || 'ふつう' || '人間ver.')
return {'name': speaker.name, 'id': normalStyle.id}
})
return speakersInfo;
} }

View File

@@ -70,4 +70,45 @@ async function googleTrans(text:string, reverse:boolean) {
return result return result
}
export async function translateVox(text:string) {
const plug = await translatorPlugin(text, 'en', 'jp')
if(plug){
return plug.content
}
return jpTrans(text)
}
async function jpTrans(text:string) {
const host = 'translate.googleapis.com'
const url = `https://${host}/translate_a/single?client=gtx&sl=auto&tl=ja&dt=t&q=` + encodeURIComponent(text)
const f = await fetch(url, {
method: "GET",
})
const res = await f.json()
if(typeof(res) === 'string'){
return res as unknown as string
}
const result = res[0].map((s) => s[0]).filter(Boolean).join('');
return result
} }