added VOICEVOX TTS (#104)
Add VOICEVOX (https://voicevox.hiroshiba.jp/) to the TTS options. This will translate the character's text into Japanese and read it in different voices. You can run VOICEVOX in Google colab and use it by adding localtunnel URL. https://colab.research.google.com/drive/1tyeXJSklNfjW-aZJAib1JfgOMFarAwze example : https://github.com/kwaroran/RisuAI/assets/61553001/5121ce6d-75a1-4ad4-ad27-4e214a2c5b4d
This commit is contained in:
@@ -27,3 +27,6 @@ Installer version
|
|||||||
- Choose it from 3 themes
|
- Choose it from 3 themes
|
||||||
- **Customizable, Friendly UI**
|
- **Customizable, Friendly UI**
|
||||||
- Great Accessibility and mobile friendly
|
- Great Accessibility and mobile friendly
|
||||||
|
|
||||||
|
## Discord
|
||||||
|
- https://discord.gg/JzP8tB9ZK8
|
||||||
|
|||||||
3600
package-lock.json
generated
Normal file
3600
package-lock.json
generated
Normal file
File diff suppressed because it is too large
Load Diff
@@ -58,6 +58,8 @@
|
|||||||
<span class="text-neutral-200 mt-2">ElevenLabs API key</span>
|
<span class="text-neutral-200 mt-2">ElevenLabs API key</span>
|
||||||
<input class="text-neutral-200 mb-4 p-2 bg-transparent input-text focus:bg-selected text-sm" bind:value={$DataBase.elevenLabKey}>
|
<input class="text-neutral-200 mb-4 p-2 bg-transparent input-text focus:bg-selected text-sm" bind:value={$DataBase.elevenLabKey}>
|
||||||
|
|
||||||
|
<span class="text-neutral-200 mt-2">VOICEVOX URL</span>
|
||||||
|
<input class="text-neutral-200 mb-4 p-2 bg-transparent input-text focus:bg-selected text-sm" bind:value={$DataBase.voicevoxUrl}>
|
||||||
|
|
||||||
<span class="text-neutral-200 mt-4 text-lg font-bold">{language.SuperMemory} <Help key="superMemory" /></span>
|
<span class="text-neutral-200 mt-4 text-lg font-bold">{language.SuperMemory} <Help key="superMemory" /></span>
|
||||||
<span class="text-neutral-200 mt-4">{language.SuperMemory} {language.model}</span>
|
<span class="text-neutral-200 mt-4">{language.SuperMemory} {language.model}</span>
|
||||||
|
|||||||
@@ -15,7 +15,7 @@
|
|||||||
import Help from "../Others/Help.svelte";
|
import Help from "../Others/Help.svelte";
|
||||||
import RegexData from "./RegexData.svelte";
|
import RegexData from "./RegexData.svelte";
|
||||||
import { exportChar } from "src/ts/characterCards";
|
import { exportChar } from "src/ts/characterCards";
|
||||||
import { getElevenTTSVoices, getWebSpeechTTSVoices } from "src/ts/process/tts";
|
import { getElevenTTSVoices, getWebSpeechTTSVoices, getVOICEVOXVoices } from "src/ts/process/tts";
|
||||||
import { checkCharOrder } from "src/ts/globalApi";
|
import { checkCharOrder } from "src/ts/globalApi";
|
||||||
|
|
||||||
let subMenu = 0
|
let subMenu = 0
|
||||||
@@ -460,6 +460,7 @@
|
|||||||
<option value="" class="bg-darkbg appearance-none">{language.disabled}</option>
|
<option value="" class="bg-darkbg appearance-none">{language.disabled}</option>
|
||||||
<option value="elevenlab" class="bg-darkbg appearance-none">ElevenLabs</option>
|
<option value="elevenlab" class="bg-darkbg appearance-none">ElevenLabs</option>
|
||||||
<option value="webspeech" class="bg-darkbg appearance-none">Web Speech</option>
|
<option value="webspeech" class="bg-darkbg appearance-none">Web Speech</option>
|
||||||
|
<option value="VOICEVOX" class="bg-darkbg appearance-none">VOICEVOX</option>
|
||||||
</select>
|
</select>
|
||||||
|
|
||||||
|
|
||||||
@@ -489,8 +490,28 @@
|
|||||||
{/each}
|
{/each}
|
||||||
</select>
|
</select>
|
||||||
{/await}
|
{/await}
|
||||||
|
{:else if currentChar.data.ttsMode === 'VOICEVOX'}
|
||||||
|
<span class="text-neutral-200">Voice</span>
|
||||||
|
<select class="bg-transparent input-text mt-2 mb-4 text-gray-200 appearance-none text-sm" bind:value={currentChar.data.ttsSpeech}>
|
||||||
|
{#await getVOICEVOXVoices() then voices}
|
||||||
|
{#each voices as voice}
|
||||||
|
<option value={voice.id} class="bg-darkbg appearance-none">{voice.name}</option>
|
||||||
|
{/each}
|
||||||
|
{/await}
|
||||||
|
</select>
|
||||||
|
<span class="text-neutral-200">Speed scale</span>
|
||||||
|
<input class="bg-transparent input-text mt-2 mb-2 text-gray-200 text-xs resize-none h-5 focus:bg-selected" autocomplete="off" bind:value={currentChar.data.voicevoxConfig.SPEED_SCALE}/>
|
||||||
|
|
||||||
|
<span class="text-neutral-200">Pitch scale</span>
|
||||||
|
<input class="bg-transparent input-text mt-2 mb-2 text-gray-200 text-xs resize-none h-5 focus:bg-selected" autocomplete="off" bind:value={currentChar.data.voicevoxConfig.PITCH_SCALE}/>
|
||||||
|
|
||||||
|
<span class="text-neutral-200">Volume scale</span>
|
||||||
|
<input class="bg-transparent input-text mt-2 mb-2 text-gray-200 text-xs resize-none h-5 focus:bg-selected" autocomplete="off" bind:value={currentChar.data.voicevoxConfig.VOLUME_SCALE}/>
|
||||||
|
|
||||||
|
<span class="text-neutral-200">Intonation scale</span>
|
||||||
|
<input class="bg-transparent input-text mt-2 mb-2 text-gray-200 text-xs resize-none h-5 focus:bg-selected" autocomplete="off" bind:value={currentChar.data.voicevoxConfig.INTONATION_SCALE}/>
|
||||||
{/if}
|
{/if}
|
||||||
{#if currentChar.data.ttsMode === 'webspeech' || currentChar.data.ttsMode === 'elevenlab'}
|
{#if currentChar.data.ttsMode === 'webspeech' || currentChar.data.ttsMode === 'elevenlab' || currentChar.data.ttsMode === 'VOICEVOX'}
|
||||||
<div class="flex items-center mt-2">
|
<div class="flex items-center mt-2">
|
||||||
<Check bind:check={currentChar.data.ttsReadOnlyQuoted}/>
|
<Check bind:check={currentChar.data.ttsReadOnlyQuoted}/>
|
||||||
<span>{language.ttsReadOnlyQuoted}</span>
|
<span>{language.ttsReadOnlyQuoted}</span>
|
||||||
|
|||||||
@@ -286,7 +286,12 @@ export function characterFormatUpdate(index:number|character){
|
|||||||
creator: '',
|
creator: '',
|
||||||
character_version: 0
|
character_version: 0
|
||||||
}
|
}
|
||||||
|
cha.voicevoxConfig = cha.voicevoxConfig ?? {
|
||||||
|
SPEED_SCALE: 1,
|
||||||
|
PITCH_SCALE: 0,
|
||||||
|
INTONATION_SCALE: 1,
|
||||||
|
VOLUME_SCALE: 1
|
||||||
|
}
|
||||||
if(cha.postHistoryInstructions){
|
if(cha.postHistoryInstructions){
|
||||||
cha.chats[cha.chatPage].note += "\n" + cha.postHistoryInstructions
|
cha.chats[cha.chatPage].note += "\n" + cha.postHistoryInstructions
|
||||||
cha.chats[cha.chatPage].note = cha.chats[cha.chatPage].note.trim()
|
cha.chats[cha.chatPage].note = cha.chats[cha.chatPage].note.trim()
|
||||||
|
|||||||
@@ -187,6 +187,9 @@ export function setDatabase(data:Database){
|
|||||||
if(checkNullish(data.elevenLabKey)){
|
if(checkNullish(data.elevenLabKey)){
|
||||||
data.elevenLabKey = ''
|
data.elevenLabKey = ''
|
||||||
}
|
}
|
||||||
|
if(checkNullish(data.voicevoxUrl)){
|
||||||
|
data.voicevoxUrl = ''
|
||||||
|
}
|
||||||
if(checkNullish(data.supaMemoryPrompt)){
|
if(checkNullish(data.supaMemoryPrompt)){
|
||||||
data.supaMemoryPrompt = ''
|
data.supaMemoryPrompt = ''
|
||||||
}
|
}
|
||||||
@@ -309,6 +312,12 @@ export interface character{
|
|||||||
}
|
}
|
||||||
ttsMode?:string
|
ttsMode?:string
|
||||||
ttsSpeech?:string
|
ttsSpeech?:string
|
||||||
|
voicevoxConfig?:{
|
||||||
|
SPEED_SCALE?: number
|
||||||
|
PITCH_SCALE?: number
|
||||||
|
INTONATION_SCALE?: number
|
||||||
|
VOLUME_SCALE?: number
|
||||||
|
}
|
||||||
supaMemory?:boolean
|
supaMemory?:boolean
|
||||||
additionalAssets?:[string, string][]
|
additionalAssets?:[string, string][]
|
||||||
ttsReadOnlyQuoted?:boolean
|
ttsReadOnlyQuoted?:boolean
|
||||||
@@ -447,6 +456,7 @@ export interface Database{
|
|||||||
requestproxy: string
|
requestproxy: string
|
||||||
showUnrecommended:boolean
|
showUnrecommended:boolean
|
||||||
elevenLabKey:string
|
elevenLabKey:string
|
||||||
|
voicevoxUrl:string
|
||||||
useExperimental:boolean
|
useExperimental:boolean
|
||||||
showMemoryLimit:boolean
|
showMemoryLimit:boolean
|
||||||
roundIcons:boolean
|
roundIcons:boolean
|
||||||
|
|||||||
@@ -1,6 +1,7 @@
|
|||||||
import { get } from "svelte/store";
|
import { get } from "svelte/store";
|
||||||
import { alertError } from "../alert";
|
import { alertError } from "../alert";
|
||||||
import { DataBase, type character } from "../database";
|
import { DataBase, type character } from "../database";
|
||||||
|
import { translateVox } from "../translator/translator";
|
||||||
|
|
||||||
let sourceNode:AudioBufferSourceNode = null
|
let sourceNode:AudioBufferSourceNode = null
|
||||||
|
|
||||||
@@ -58,6 +59,44 @@ export async function sayTTS(character:character,text:string) {
|
|||||||
alertError(await da.text())
|
alertError(await da.text())
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
case "VOICEVOX": {
|
||||||
|
const jpText = await translateVox(text)
|
||||||
|
console.log(jpText);
|
||||||
|
const audioContext = new AudioContext();
|
||||||
|
const query = await fetch(`${db.voicevoxUrl}/audio_query?text=${jpText}&speaker=${character.ttsSpeech}`, {
|
||||||
|
method: 'POST',
|
||||||
|
headers: { "Content-Type": "application/json"},
|
||||||
|
})
|
||||||
|
if (query.status == 200){
|
||||||
|
const queryJson = await query.json();
|
||||||
|
const bodyData = {
|
||||||
|
accent_phrases: queryJson.accent_phrases,
|
||||||
|
speedScale: character.voicevoxConfig.SPEED_SCALE,
|
||||||
|
pitchScale: character.voicevoxConfig.PITCH_SCALE,
|
||||||
|
volumeScale: character.voicevoxConfig.VOLUME_SCALE,
|
||||||
|
intonationScale: character.voicevoxConfig.INTONATION_SCALE,
|
||||||
|
prePhonemeLength: queryJson.prePhonemeLength,
|
||||||
|
postPhonemeLength: queryJson.postPhonemeLength,
|
||||||
|
outputSamplingRate: queryJson.outputSamplingRate,
|
||||||
|
outputStereo: queryJson.outputStereo,
|
||||||
|
kana: queryJson.kana,
|
||||||
|
}
|
||||||
|
console.log(JSON.stringify(bodyData))
|
||||||
|
console.log (bodyData)
|
||||||
|
const getVoice = await fetch(`${db.voicevoxUrl}/synthesis?speaker=${character.ttsSpeech}`, {
|
||||||
|
method: 'POST',
|
||||||
|
headers: { "Content-Type": "application/json"},
|
||||||
|
body: JSON.stringify(bodyData),
|
||||||
|
})
|
||||||
|
if (getVoice.status == 200 && getVoice.headers.get('content-type') === 'audio/wav'){
|
||||||
|
const audioBuffer = await audioContext.decodeAudioData(await getVoice.arrayBuffer())
|
||||||
|
sourceNode = audioContext.createBufferSource();
|
||||||
|
sourceNode.buffer = audioBuffer;
|
||||||
|
sourceNode.connect(audioContext.destination);
|
||||||
|
sourceNode.start();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
@@ -90,4 +129,16 @@ export async function getElevenTTSVoices() {
|
|||||||
|
|
||||||
console.log(res)
|
console.log(res)
|
||||||
return res.voices
|
return res.voices
|
||||||
|
}
|
||||||
|
|
||||||
|
export async function getVOICEVOXVoices() {
|
||||||
|
const db = get(DataBase);
|
||||||
|
const speakerData = await fetch(`${db.voicevoxUrl}/speakers`)
|
||||||
|
const speakerList = await speakerData.json()
|
||||||
|
const speakersInfo = speakerList.map((speaker) => {
|
||||||
|
const normalStyle = speaker.styles.find((style) => style.name === 'ノーマル' || 'ふつう' || '人間ver.')
|
||||||
|
return {'name': speaker.name, 'id': normalStyle.id}
|
||||||
|
})
|
||||||
|
|
||||||
|
return speakersInfo;
|
||||||
}
|
}
|
||||||
@@ -70,4 +70,45 @@ async function googleTrans(text:string, reverse:boolean) {
|
|||||||
|
|
||||||
return result
|
return result
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
export async function translateVox(text:string) {
|
||||||
|
const plug = await translatorPlugin(text, 'en', 'jp')
|
||||||
|
if(plug){
|
||||||
|
return plug.content
|
||||||
|
}
|
||||||
|
|
||||||
|
return jpTrans(text)
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
async function jpTrans(text:string) {
|
||||||
|
|
||||||
|
const host = 'translate.googleapis.com'
|
||||||
|
|
||||||
|
|
||||||
|
const url = `https://${host}/translate_a/single?client=gtx&sl=auto&tl=ja&dt=t&q=` + encodeURIComponent(text)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
const f = await fetch(url, {
|
||||||
|
|
||||||
|
method: "GET",
|
||||||
|
|
||||||
|
})
|
||||||
|
|
||||||
|
const res = await f.json()
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
if(typeof(res) === 'string'){
|
||||||
|
|
||||||
|
return res as unknown as string
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
const result = res[0].map((s) => s[0]).filter(Boolean).join('');
|
||||||
|
|
||||||
|
return result
|
||||||
|
|
||||||
}
|
}
|
||||||
Reference in New Issue
Block a user