added VOICEVOX TTS
This commit is contained in:
3600
package-lock.json
generated
Normal file
3600
package-lock.json
generated
Normal file
File diff suppressed because it is too large
Load Diff
@@ -58,6 +58,8 @@
|
|||||||
<span class="text-neutral-200 mt-2">ElevenLabs API key</span>
|
<span class="text-neutral-200 mt-2">ElevenLabs API key</span>
|
||||||
<input class="text-neutral-200 mb-4 p-2 bg-transparent input-text focus:bg-selected text-sm" bind:value={$DataBase.elevenLabKey}>
|
<input class="text-neutral-200 mb-4 p-2 bg-transparent input-text focus:bg-selected text-sm" bind:value={$DataBase.elevenLabKey}>
|
||||||
|
|
||||||
|
<span class="text-neutral-200 mt-2">VOICEVOX URL</span>
|
||||||
|
<input class="text-neutral-200 mb-4 p-2 bg-transparent input-text focus:bg-selected text-sm" bind:value={$DataBase.voicevoxUrl}>
|
||||||
|
|
||||||
<span class="text-neutral-200 mt-4 text-lg font-bold">{language.SuperMemory} <Help key="superMemory" /></span>
|
<span class="text-neutral-200 mt-4 text-lg font-bold">{language.SuperMemory} <Help key="superMemory" /></span>
|
||||||
<span class="text-neutral-200 mt-4">{language.SuperMemory} {language.model}</span>
|
<span class="text-neutral-200 mt-4">{language.SuperMemory} {language.model}</span>
|
||||||
|
|||||||
@@ -15,7 +15,7 @@
|
|||||||
import Help from "../Others/Help.svelte";
|
import Help from "../Others/Help.svelte";
|
||||||
import RegexData from "./RegexData.svelte";
|
import RegexData from "./RegexData.svelte";
|
||||||
import { exportChar } from "src/ts/characterCards";
|
import { exportChar } from "src/ts/characterCards";
|
||||||
import { getElevenTTSVoices, getWebSpeechTTSVoices } from "src/ts/process/tts";
|
import { getElevenTTSVoices, getWebSpeechTTSVoices, getVOICEVOXVoices } from "src/ts/process/tts";
|
||||||
import { checkCharOrder } from "src/ts/globalApi";
|
import { checkCharOrder } from "src/ts/globalApi";
|
||||||
|
|
||||||
let subMenu = 0
|
let subMenu = 0
|
||||||
@@ -460,6 +460,7 @@
|
|||||||
<option value="" class="bg-darkbg appearance-none">{language.disabled}</option>
|
<option value="" class="bg-darkbg appearance-none">{language.disabled}</option>
|
||||||
<option value="elevenlab" class="bg-darkbg appearance-none">ElevenLabs</option>
|
<option value="elevenlab" class="bg-darkbg appearance-none">ElevenLabs</option>
|
||||||
<option value="webspeech" class="bg-darkbg appearance-none">Web Speech</option>
|
<option value="webspeech" class="bg-darkbg appearance-none">Web Speech</option>
|
||||||
|
<option value="VOICEVOX" class="bg-darkbg appearance-none">VOICEVOX</option>
|
||||||
</select>
|
</select>
|
||||||
|
|
||||||
|
|
||||||
@@ -489,8 +490,28 @@
|
|||||||
{/each}
|
{/each}
|
||||||
</select>
|
</select>
|
||||||
{/await}
|
{/await}
|
||||||
|
{:else if currentChar.data.ttsMode === 'VOICEVOX'}
|
||||||
|
<span class="text-neutral-200">Voice</span>
|
||||||
|
<select class="bg-transparent input-text mt-2 mb-4 text-gray-200 appearance-none text-sm" bind:value={currentChar.data.ttsSpeech}>
|
||||||
|
{#await getVOICEVOXVoices() then voices}
|
||||||
|
{#each voices as voice}
|
||||||
|
<option value={voice.id} class="bg-darkbg appearance-none">{voice.name}</option>
|
||||||
|
{/each}
|
||||||
|
{/await}
|
||||||
|
</select>
|
||||||
|
<span class="text-neutral-200">Speed scale</span>
|
||||||
|
<input class="bg-transparent input-text mt-2 mb-2 text-gray-200 text-xs resize-none h-5 focus:bg-selected" autocomplete="off" bind:value={currentChar.data.voicevoxConfig.SPEED_SCALE}/>
|
||||||
|
|
||||||
|
<span class="text-neutral-200">Pitch scale</span>
|
||||||
|
<input class="bg-transparent input-text mt-2 mb-2 text-gray-200 text-xs resize-none h-5 focus:bg-selected" autocomplete="off" bind:value={currentChar.data.voicevoxConfig.PITCH_SCALE}/>
|
||||||
|
|
||||||
|
<span class="text-neutral-200">Volume scale</span>
|
||||||
|
<input class="bg-transparent input-text mt-2 mb-2 text-gray-200 text-xs resize-none h-5 focus:bg-selected" autocomplete="off" bind:value={currentChar.data.voicevoxConfig.VOLUME_SCALE}/>
|
||||||
|
|
||||||
|
<span class="text-neutral-200">Intonation scale</span>
|
||||||
|
<input class="bg-transparent input-text mt-2 mb-2 text-gray-200 text-xs resize-none h-5 focus:bg-selected" autocomplete="off" bind:value={currentChar.data.voicevoxConfig.INTONATION_SCALE}/>
|
||||||
{/if}
|
{/if}
|
||||||
{#if currentChar.data.ttsMode === 'webspeech' || currentChar.data.ttsMode === 'elevenlab'}
|
{#if currentChar.data.ttsMode === 'webspeech' || currentChar.data.ttsMode === 'elevenlab' || currentChar.data.ttsMode === 'VOICEVOX'}
|
||||||
<div class="flex items-center mt-2">
|
<div class="flex items-center mt-2">
|
||||||
<Check bind:check={currentChar.data.ttsReadOnlyQuoted}/>
|
<Check bind:check={currentChar.data.ttsReadOnlyQuoted}/>
|
||||||
<span>{language.ttsReadOnlyQuoted}</span>
|
<span>{language.ttsReadOnlyQuoted}</span>
|
||||||
|
|||||||
@@ -286,7 +286,12 @@ export function characterFormatUpdate(index:number|character){
|
|||||||
creator: '',
|
creator: '',
|
||||||
character_version: 0
|
character_version: 0
|
||||||
}
|
}
|
||||||
|
cha.voicevoxConfig = cha.voicevoxConfig ?? {
|
||||||
|
SPEED_SCALE: 1,
|
||||||
|
PITCH_SCALE: 0,
|
||||||
|
INTONATION_SCALE: 1,
|
||||||
|
VOLUME_SCALE: 1
|
||||||
|
}
|
||||||
if(cha.postHistoryInstructions){
|
if(cha.postHistoryInstructions){
|
||||||
cha.chats[cha.chatPage].note += "\n" + cha.postHistoryInstructions
|
cha.chats[cha.chatPage].note += "\n" + cha.postHistoryInstructions
|
||||||
cha.chats[cha.chatPage].note = cha.chats[cha.chatPage].note.trim()
|
cha.chats[cha.chatPage].note = cha.chats[cha.chatPage].note.trim()
|
||||||
|
|||||||
@@ -187,6 +187,9 @@ export function setDatabase(data:Database){
|
|||||||
if(checkNullish(data.elevenLabKey)){
|
if(checkNullish(data.elevenLabKey)){
|
||||||
data.elevenLabKey = ''
|
data.elevenLabKey = ''
|
||||||
}
|
}
|
||||||
|
if(checkNullish(data.voicevoxUrl)){
|
||||||
|
data.voicevoxUrl = ''
|
||||||
|
}
|
||||||
if(checkNullish(data.supaMemoryPrompt)){
|
if(checkNullish(data.supaMemoryPrompt)){
|
||||||
data.supaMemoryPrompt = ''
|
data.supaMemoryPrompt = ''
|
||||||
}
|
}
|
||||||
@@ -309,6 +312,12 @@ export interface character{
|
|||||||
}
|
}
|
||||||
ttsMode?:string
|
ttsMode?:string
|
||||||
ttsSpeech?:string
|
ttsSpeech?:string
|
||||||
|
voicevoxConfig?:{
|
||||||
|
SPEED_SCALE?: number
|
||||||
|
PITCH_SCALE?: number
|
||||||
|
INTONATION_SCALE?: number
|
||||||
|
VOLUME_SCALE?: number
|
||||||
|
}
|
||||||
supaMemory?:boolean
|
supaMemory?:boolean
|
||||||
additionalAssets?:[string, string][]
|
additionalAssets?:[string, string][]
|
||||||
ttsReadOnlyQuoted?:boolean
|
ttsReadOnlyQuoted?:boolean
|
||||||
@@ -447,6 +456,7 @@ export interface Database{
|
|||||||
requestproxy: string
|
requestproxy: string
|
||||||
showUnrecommended:boolean
|
showUnrecommended:boolean
|
||||||
elevenLabKey:string
|
elevenLabKey:string
|
||||||
|
voicevoxUrl:string
|
||||||
useExperimental:boolean
|
useExperimental:boolean
|
||||||
showMemoryLimit:boolean
|
showMemoryLimit:boolean
|
||||||
roundIcons:boolean
|
roundIcons:boolean
|
||||||
|
|||||||
@@ -1,6 +1,7 @@
|
|||||||
import { get } from "svelte/store";
|
import { get } from "svelte/store";
|
||||||
import { alertError } from "../alert";
|
import { alertError } from "../alert";
|
||||||
import { DataBase, type character } from "../database";
|
import { DataBase, type character } from "../database";
|
||||||
|
import { translateVox } from "../translator/translator";
|
||||||
|
|
||||||
let sourceNode:AudioBufferSourceNode = null
|
let sourceNode:AudioBufferSourceNode = null
|
||||||
|
|
||||||
@@ -58,6 +59,44 @@ export async function sayTTS(character:character,text:string) {
|
|||||||
alertError(await da.text())
|
alertError(await da.text())
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
case "VOICEVOX": {
|
||||||
|
const jpText = await translateVox(text)
|
||||||
|
console.log(jpText);
|
||||||
|
const audioContext = new AudioContext();
|
||||||
|
const query = await fetch(`${db.voicevoxUrl}/audio_query?text=${jpText}&speaker=${character.ttsSpeech}`, {
|
||||||
|
method: 'POST',
|
||||||
|
headers: { "Content-Type": "application/json"},
|
||||||
|
})
|
||||||
|
if (query.status == 200){
|
||||||
|
const queryJson = await query.json();
|
||||||
|
const bodyData = {
|
||||||
|
accent_phrases: queryJson.accent_phrases,
|
||||||
|
speedScale: character.voicevoxConfig.SPEED_SCALE,
|
||||||
|
pitchScale: character.voicevoxConfig.PITCH_SCALE,
|
||||||
|
volumeScale: character.voicevoxConfig.VOLUME_SCALE,
|
||||||
|
intonationScale: character.voicevoxConfig.INTONATION_SCALE,
|
||||||
|
prePhonemeLength: queryJson.prePhonemeLength,
|
||||||
|
postPhonemeLength: queryJson.postPhonemeLength,
|
||||||
|
outputSamplingRate: queryJson.outputSamplingRate,
|
||||||
|
outputStereo: queryJson.outputStereo,
|
||||||
|
kana: queryJson.kana,
|
||||||
|
}
|
||||||
|
console.log(JSON.stringify(bodyData))
|
||||||
|
console.log (bodyData)
|
||||||
|
const getVoice = await fetch(`${db.voicevoxUrl}/synthesis?speaker=${character.ttsSpeech}`, {
|
||||||
|
method: 'POST',
|
||||||
|
headers: { "Content-Type": "application/json"},
|
||||||
|
body: JSON.stringify(bodyData),
|
||||||
|
})
|
||||||
|
if (getVoice.status == 200 && getVoice.headers.get('content-type') === 'audio/wav'){
|
||||||
|
const audioBuffer = await audioContext.decodeAudioData(await getVoice.arrayBuffer())
|
||||||
|
sourceNode = audioContext.createBufferSource();
|
||||||
|
sourceNode.buffer = audioBuffer;
|
||||||
|
sourceNode.connect(audioContext.destination);
|
||||||
|
sourceNode.start();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
@@ -90,4 +129,16 @@ export async function getElevenTTSVoices() {
|
|||||||
|
|
||||||
console.log(res)
|
console.log(res)
|
||||||
return res.voices
|
return res.voices
|
||||||
|
}
|
||||||
|
|
||||||
|
export async function getVOICEVOXVoices() {
|
||||||
|
const db = get(DataBase);
|
||||||
|
const speakerData = await fetch(`${db.voicevoxUrl}/speakers`)
|
||||||
|
const speakerList = await speakerData.json()
|
||||||
|
const speakersInfo = speakerList.map((speaker) => {
|
||||||
|
const normalStyle = speaker.styles.find((style) => style.name === 'ノーマル' || 'ふつう' || '人間ver.')
|
||||||
|
return {'name': speaker.name, 'id': normalStyle.id}
|
||||||
|
})
|
||||||
|
|
||||||
|
return speakersInfo;
|
||||||
}
|
}
|
||||||
@@ -70,4 +70,45 @@ async function googleTrans(text:string, reverse:boolean) {
|
|||||||
|
|
||||||
return result
|
return result
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
export async function translateVox(text:string) {
|
||||||
|
const plug = await translatorPlugin(text, 'en', 'jp')
|
||||||
|
if(plug){
|
||||||
|
return plug.content
|
||||||
|
}
|
||||||
|
|
||||||
|
return jpTrans(text)
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
async function jpTrans(text:string) {
|
||||||
|
|
||||||
|
const host = 'translate.googleapis.com'
|
||||||
|
|
||||||
|
|
||||||
|
const url = `https://${host}/translate_a/single?client=gtx&sl=auto&tl=ja&dt=t&q=` + encodeURIComponent(text)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
const f = await fetch(url, {
|
||||||
|
|
||||||
|
method: "GET",
|
||||||
|
|
||||||
|
})
|
||||||
|
|
||||||
|
const res = await f.json()
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
if(typeof(res) === 'string'){
|
||||||
|
|
||||||
|
return res as unknown as string
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
const result = res[0].map((s) => s[0]).filter(Boolean).join('');
|
||||||
|
|
||||||
|
return result
|
||||||
|
|
||||||
}
|
}
|
||||||
Reference in New Issue
Block a user