Add VITS support
This commit is contained in:
@@ -64,6 +64,7 @@
|
|||||||
"tippy.js": "^6.3.7",
|
"tippy.js": "^6.3.7",
|
||||||
"uuid": "^9.0.1",
|
"uuid": "^9.0.1",
|
||||||
"wasmoon": "^1.15.1",
|
"wasmoon": "^1.15.1",
|
||||||
|
"wavefile": "^11.0.0",
|
||||||
"web-streams-polyfill": "^3.2.1"
|
"web-streams-polyfill": "^3.2.1"
|
||||||
},
|
},
|
||||||
"devDependencies": {
|
"devDependencies": {
|
||||||
|
|||||||
9
pnpm-lock.yaml
generated
9
pnpm-lock.yaml
generated
@@ -155,6 +155,9 @@ dependencies:
|
|||||||
wasmoon:
|
wasmoon:
|
||||||
specifier: ^1.15.1
|
specifier: ^1.15.1
|
||||||
version: 1.15.1
|
version: 1.15.1
|
||||||
|
wavefile:
|
||||||
|
specifier: ^11.0.0
|
||||||
|
version: 11.0.0
|
||||||
web-streams-polyfill:
|
web-streams-polyfill:
|
||||||
specifier: ^3.2.1
|
specifier: ^3.2.1
|
||||||
version: 3.2.1
|
version: 3.2.1
|
||||||
@@ -5675,6 +5678,12 @@ packages:
|
|||||||
'@types/emscripten': 1.39.9
|
'@types/emscripten': 1.39.9
|
||||||
dev: false
|
dev: false
|
||||||
|
|
||||||
|
/wavefile@11.0.0:
|
||||||
|
resolution: {integrity: sha512-/OBiAALgWU24IG7sC84cDO/KfFuvajWc5Uec0oV2zrpOOZZDgGdOwHwgEzOrwh8jkubBk7PtZfQBIcI1OaE5Ng==}
|
||||||
|
engines: {node: '>=8'}
|
||||||
|
hasBin: true
|
||||||
|
dev: false
|
||||||
|
|
||||||
/web-streams-polyfill@3.2.1:
|
/web-streams-polyfill@3.2.1:
|
||||||
resolution: {integrity: sha512-e0MO3wdXWKrLbL0DgGnUV7WHVuw9OUvL4hjgnPkIeEvESk74gAITi5G606JtZPp39cd8HA9VQzCIvA49LpPN5Q==}
|
resolution: {integrity: sha512-e0MO3wdXWKrLbL0DgGnUV7WHVuw9OUvL4hjgnPkIeEvESk74gAITi5G606JtZPp39cd8HA9VQzCIvA49LpPN5Q==}
|
||||||
engines: {node: '>= 8'}
|
engines: {node: '>= 8'}
|
||||||
|
|||||||
@@ -526,6 +526,7 @@
|
|||||||
<OptionInput value="openai">OpenAI</OptionInput>
|
<OptionInput value="openai">OpenAI</OptionInput>
|
||||||
<OptionInput value="novelai">NovelAI</OptionInput>
|
<OptionInput value="novelai">NovelAI</OptionInput>
|
||||||
<OptionInput value="huggingface">Huggingface</OptionInput>
|
<OptionInput value="huggingface">Huggingface</OptionInput>
|
||||||
|
<OptionInput value="vits">VITS</OptionInput>
|
||||||
</SelectInput>
|
</SelectInput>
|
||||||
|
|
||||||
|
|
||||||
@@ -625,12 +626,7 @@
|
|||||||
<span class="text-textcolor">Language</span>
|
<span class="text-textcolor">Language</span>
|
||||||
<TextInput additionalClass="mb-4 mt-2" bind:value={currentChar.data.hfTTS.language} placeholder="en" />
|
<TextInput additionalClass="mb-4 mt-2" bind:value={currentChar.data.hfTTS.language} placeholder="en" />
|
||||||
{/if}
|
{/if}
|
||||||
{#if currentChar.data.ttsMode === 'webspeech' ||
|
{#if currentChar.data.ttsMode}
|
||||||
currentChar.data.ttsMode === 'elevenlab' ||
|
|
||||||
currentChar.data.ttsMode === 'VOICEVOX' ||
|
|
||||||
currentChar.data.ttsMode === 'huggingface' ||
|
|
||||||
currentChar.data.ttsMode === 'openai' ||
|
|
||||||
currentChar.data.ttsMode === 'novelai'}
|
|
||||||
<div class="flex items-center mt-2">
|
<div class="flex items-center mt-2">
|
||||||
<Check bind:check={currentChar.data.ttsReadOnlyQuoted} name={language.ttsReadOnlyQuoted}/>
|
<Check bind:check={currentChar.data.ttsReadOnlyQuoted} name={language.ttsReadOnlyQuoted}/>
|
||||||
</div>
|
</div>
|
||||||
|
|||||||
@@ -1,11 +1,11 @@
|
|||||||
import transformers, { AutoTokenizer, pipeline, type SummarizationOutput } from '@xenova/transformers';
|
import {env, AutoTokenizer, pipeline, VitsModel, type SummarizationOutput, type TextGenerationConfig, type TextGenerationOutput, FeatureExtractionPipeline, TextToAudioPipeline } from '@xenova/transformers';
|
||||||
|
|
||||||
transformers.env.localModelPath = "https://sv.risuai.xyz/transformers/"
|
env.localModelPath = "https://sv.risuai.xyz/transformers/"
|
||||||
|
|
||||||
export const runTransformers = async (baseText:string, model:string,config:transformers.TextGenerationConfig = {}) => {
|
export const runTransformers = async (baseText:string, model:string,config:TextGenerationConfig = {}) => {
|
||||||
let text = baseText
|
let text = baseText
|
||||||
let generator = await pipeline('text-generation', model);
|
let generator = await pipeline('text-generation', model);
|
||||||
let output = await generator(text, config) as transformers.TextGenerationOutput
|
let output = await generator(text, config) as TextGenerationOutput
|
||||||
const outputOne = output[0]
|
const outputOne = output[0]
|
||||||
return outputOne
|
return outputOne
|
||||||
}
|
}
|
||||||
@@ -16,7 +16,7 @@ export const runSummarizer = async (text: string) => {
|
|||||||
return v[0].summary_text
|
return v[0].summary_text
|
||||||
}
|
}
|
||||||
|
|
||||||
let extractor:transformers.FeatureExtractionPipeline = null
|
let extractor:FeatureExtractionPipeline = null
|
||||||
export const runEmbedding = async (text: string):Promise<Float32Array> => {
|
export const runEmbedding = async (text: string):Promise<Float32Array> => {
|
||||||
if(!extractor){
|
if(!extractor){
|
||||||
extractor = await pipeline('feature-extraction', 'Xenova/all-MiniLM-L6-v2');
|
extractor = await pipeline('feature-extraction', 'Xenova/all-MiniLM-L6-v2');
|
||||||
@@ -59,9 +59,22 @@ export const runEmbedding = async (text: string):Promise<Float32Array> => {
|
|||||||
return (result?.data as Float32Array) ?? null;
|
return (result?.data as Float32Array) ?? null;
|
||||||
}
|
}
|
||||||
|
|
||||||
export const runTTS = async (text: string) => {
|
let synthesizer:TextToAudioPipeline = null
|
||||||
let speaker_embeddings = 'https://huggingface.co/datasets/Xenova/transformers.js-docs/resolve/main/speaker_embeddings.bin';
|
let lastSynth:string = null
|
||||||
let synthesizer = await pipeline('text-to-speech', 'Xenova/speecht5_tts', { local_files_only: true });
|
export const runVITS = async (text: string, model:string = 'Xenova/mms-tts-eng') => {
|
||||||
let out = await synthesizer(text, { speaker_embeddings });
|
const {WaveFile} = await import('wavefile')
|
||||||
return out
|
if((!synthesizer) || (lastSynth !== model)){
|
||||||
|
lastSynth = model
|
||||||
|
synthesizer = await pipeline('text-to-speech', model);
|
||||||
|
}
|
||||||
|
let out = await synthesizer(text, {});
|
||||||
|
const wav = new WaveFile();
|
||||||
|
wav.fromScratch(1, out.sampling_rate, '32f', out.audio);
|
||||||
|
const audioContext = new AudioContext();
|
||||||
|
audioContext.decodeAudioData(wav.toBuffer().buffer, (decodedData) => {
|
||||||
|
const sourceNode = audioContext.createBufferSource();
|
||||||
|
sourceNode.buffer = decodedData;
|
||||||
|
sourceNode.connect(audioContext.destination);
|
||||||
|
sourceNode.start();
|
||||||
|
});
|
||||||
}
|
}
|
||||||
@@ -5,6 +5,7 @@ import { runTranslator, translateVox } from "../translator/translator";
|
|||||||
import { globalFetch } from "../storage/globalApi";
|
import { globalFetch } from "../storage/globalApi";
|
||||||
import { language } from "src/lang";
|
import { language } from "src/lang";
|
||||||
import { getCurrentCharacter, sleep } from "../util";
|
import { getCurrentCharacter, sleep } from "../util";
|
||||||
|
import { runVITS } from "./embedding/transformers";
|
||||||
|
|
||||||
let sourceNode:AudioBufferSourceNode = null
|
let sourceNode:AudioBufferSourceNode = null
|
||||||
|
|
||||||
@@ -221,6 +222,9 @@ export async function sayTTS(character:character,text:string) {
|
|||||||
return
|
return
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
case 'vits':{
|
||||||
|
await runVITS(text)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user