[feat] add hf tts

This commit is contained in:
kwaroran
2023-11-28 02:48:47 +09:00
parent cecc1731bc
commit a6b11c22de
8 changed files with 668 additions and 564 deletions

View File

@@ -15,80 +15,80 @@
}, },
"dependencies": { "dependencies": {
"@adobe/css-tools": "4.3.1", "@adobe/css-tools": "4.3.1",
"@aws-crypto/sha256-js": "^5.1.0", "@aws-crypto/sha256-js": "^5.2.0",
"@dqbd/tiktoken": "^1.0.7", "@dqbd/tiktoken": "^1.0.7",
"@mlc-ai/web-tokenizers": "^0.1.0", "@mlc-ai/web-tokenizers": "^0.1.2",
"@smithy/protocol-http": "^3.0.7", "@smithy/protocol-http": "^3.0.10",
"@smithy/signature-v4": "^2.0.11", "@smithy/signature-v4": "^2.0.16",
"@tauri-apps/api": "1.4.0", "@tauri-apps/api": "1.4.0",
"@types/marked": "^5.0.1", "@types/marked": "^5.0.2",
"@xenova/transformers": "^2.5.0", "@xenova/transformers": "^2.9.0",
"blueimp-md5": "^2.19.0", "blueimp-md5": "^2.19.0",
"body-parser": "^1.20.2", "body-parser": "^1.20.2",
"buffer": "^6.0.3", "buffer": "^6.0.3",
"core-js": "^3.31.1", "core-js": "^3.33.3",
"cors": "^2.8.5", "cors": "^2.8.5",
"dompurify": "^3.0.5", "dompurify": "^3.0.6",
"exifr": "^7.1.3", "exifr": "^7.1.3",
"express": "^4.18.2", "express": "^4.18.2",
"fflate": "^0.8.0", "fflate": "^0.8.1",
"gpt-3-encoder": "^1.1.4", "gpt-3-encoder": "^1.1.4",
"gpt3-tokenizer": "^1.1.5", "gpt3-tokenizer": "^1.1.5",
"html-to-image": "^1.11.11", "html-to-image": "^1.11.11",
"isomorphic-dompurify": "^1.8.0", "isomorphic-dompurify": "^1.9.0",
"jszip": "^3.10.1", "jszip": "^3.10.1",
"libsodium-wrappers-sumo": "^0.7.11", "libsodium-wrappers-sumo": "^0.7.13",
"localforage": "^1.10.0", "localforage": "^1.10.0",
"lodash": "^4.17.21", "lodash": "^4.17.21",
"lucide-svelte": "^0.292.0", "lucide-svelte": "^0.292.0",
"marked": "^5.1.1", "marked": "^5.1.2",
"ml-distance": "^4.0.1", "ml-distance": "^4.0.1",
"mobile-drag-drop": "3.0.0-rc.0", "mobile-drag-drop": "3.0.0-rc.0",
"msgpackr": "^1.9.5", "msgpackr": "^1.9.9",
"node-html-parser": "^6.1.5", "node-html-parser": "^6.1.11",
"peerjs": "^1.5.1", "peerjs": "^1.5.1",
"png-chunk-text": "^1.0.0", "png-chunk-text": "^1.0.0",
"png-chunks-encode": "^1.0.0", "png-chunks-encode": "^1.0.0",
"png-chunks-extract": "^1.0.0", "png-chunks-extract": "^1.0.0",
"pngjs": "^7.0.0", "pngjs": "^7.0.0",
"rollup": "^3.26.3", "rollup": "^3.29.4",
"showdown": "^2.1.0", "showdown": "^2.1.0",
"sortablejs": "^1.15.0", "sortablejs": "^1.15.0",
"three": "^0.154.0", "three": "^0.154.0",
"tippy.js": "^6.3.7", "tippy.js": "^6.3.7",
"uuid": "^9.0.0", "uuid": "^9.0.1",
"wasmoon": "^1.15.0", "wasmoon": "^1.15.1",
"web-streams-polyfill": "^3.2.1", "web-streams-polyfill": "^3.2.1",
"yuso": "^0.1.3" "yuso": "^0.1.3"
}, },
"devDependencies": { "devDependencies": {
"@sveltejs/vite-plugin-svelte": "^2.4.2", "@sveltejs/vite-plugin-svelte": "^2.5.3",
"@tailwindcss/typography": "^0.5.9", "@tailwindcss/typography": "^0.5.10",
"@tauri-apps/cli": "1.4.0", "@tauri-apps/cli": "1.4.0",
"@tsconfig/svelte": "^3.0.0", "@tsconfig/svelte": "^3.0.0",
"@types/blueimp-md5": "^2.18.0", "@types/blueimp-md5": "^2.18.2",
"@types/dompurify": "^3.0.2", "@types/dompurify": "^3.0.5",
"@types/libsodium-wrappers-sumo": "^0.7.5", "@types/libsodium-wrappers-sumo": "^0.7.8",
"@types/lodash": "^4.14.195", "@types/lodash": "^4.14.202",
"@types/lodash.clonedeep": "^4.5.7", "@types/lodash.clonedeep": "^4.5.9",
"@types/lodash.isequal": "^4.5.6", "@types/lodash.isequal": "^4.5.8",
"@types/node": "^18.16.19", "@types/node": "^18.18.13",
"@types/pngjs": "^6.0.1", "@types/pngjs": "^6.0.4",
"@types/showdown": "^2.0.1", "@types/showdown": "^2.0.6",
"@types/sortablejs": "^1.15.1", "@types/sortablejs": "^1.15.7",
"@types/three": "^0.154.0", "@types/three": "^0.154.0",
"@types/uuid": "^9.0.2", "@types/uuid": "^9.0.7",
"@types/wicg-file-system-access": "^2020.9.6", "@types/wicg-file-system-access": "^2020.9.8",
"autoprefixer": "^10.4.14", "autoprefixer": "^10.4.16",
"internal-ip": "^7.0.0", "internal-ip": "^7.0.0",
"postcss": "^8.4.26", "postcss": "^8.4.31",
"svelte": "^4.1.0", "svelte": "^4.2.7",
"svelte-check": "^3.4.6", "svelte-check": "^3.6.2",
"svelte-preprocess": "^5.0.4", "svelte-preprocess": "^5.1.1",
"tailwindcss": "^3.3.3", "tailwindcss": "^3.3.5",
"tslib": "^2.6.0", "tslib": "^2.6.2",
"typescript": "^5.1.6", "typescript": "^5.3.2",
"vite": "^4.4.5", "vite": "^4.5.0",
"vite-plugin-top-level-await": "^1.3.1", "vite-plugin-top-level-await": "^1.3.1",
"vite-plugin-wasm": "^3.2.2" "vite-plugin-wasm": "^3.2.2"
} }

1076
pnpm-lock.yaml generated

File diff suppressed because it is too large Load Diff

View File

@@ -133,10 +133,15 @@
<span class="text-textcolor mt-2">VOICEVOX URL</span> <span class="text-textcolor mt-2">VOICEVOX URL</span>
<TextInput size="sm" marginBottom bind:value={$DataBase.voicevoxUrl}/> <TextInput size="sm" marginBottom bind:value={$DataBase.voicevoxUrl}/>
<span class="text-textcolor">OpenAI Key</span>
<TextInput size="sm" marginBottom bind:value={$DataBase.openAIKey}/>
<span class="text-textcolor mt-2">NovelAI API key</span> <span class="text-textcolor mt-2">NovelAI API key</span>
<TextInput size="sm" marginBottom placeholder="pst-..." bind:value={$DataBase.NAIApiKey}/> <TextInput size="sm" marginBottom placeholder="pst-..." bind:value={$DataBase.NAIApiKey}/>
<span class="text-textcolor">Huggingface Key</span>
<TextInput size="sm" marginBottom bind:value={$DataBase.huggingfaceKey} placeholder="hf_..."/>
</Arcodion> </Arcodion>
<Arcodion name={language.emotionImage} styled> <Arcodion name={language.emotionImage} styled>

View File

@@ -519,6 +519,7 @@
<OptionInput value="VOICEVOX">VOICEVOX</OptionInput> <OptionInput value="VOICEVOX">VOICEVOX</OptionInput>
<OptionInput value="openai">OpenAI</OptionInput> <OptionInput value="openai">OpenAI</OptionInput>
<OptionInput value="novelai">NovelAI</OptionInput> <OptionInput value="novelai">NovelAI</OptionInput>
<OptionInput value="huggingface">Huggingface</OptionInput>
</SelectInput> </SelectInput>
@@ -604,7 +605,6 @@
</SelectInput> </SelectInput>
{/if} {/if}
{#if currentChar.data.ttsMode === 'openai'} {#if currentChar.data.ttsMode === 'openai'}
<span class="text-textcolor">OpenAI TTS uses your OpenAI key on the chat model section</span>
<SelectInput className="mb-4 mt-2" bind:value={currentChar.data.oaiVoice}> <SelectInput className="mb-4 mt-2" bind:value={currentChar.data.oaiVoice}>
<OptionInput value="">Unset</OptionInput> <OptionInput value="">Unset</OptionInput>
{#each oaiVoices as voice} {#each oaiVoices as voice}
@@ -612,7 +612,19 @@
{/each} {/each}
</SelectInput> </SelectInput>
{/if} {/if}
{#if currentChar.data.ttsMode === 'webspeech' || currentChar.data.ttsMode === 'elevenlab' || currentChar.data.ttsMode === 'VOICEVOX' || currentChar.data.ttsMode === 'novelai'} {#if currentChar.data.ttsMode === 'huggingface'}
<span class="text-textcolor">Model</span>
<TextInput additionalClass="mb-4 mt-2" bind:value={currentChar.data.hfTTS.model} />
<span class="text-textcolor">Language</span>
<TextInput additionalClass="mb-4 mt-2" bind:value={currentChar.data.hfTTS.language} placeholder="en" />
{/if}
{#if currentChar.data.ttsMode === 'webspeech' ||
currentChar.data.ttsMode === 'elevenlab' ||
currentChar.data.ttsMode === 'VOICEVOX' ||
currentChar.data.ttsMode === 'huggingface' ||
currentChar.data.ttsMode === 'openai' ||
currentChar.data.ttsMode === 'novelai'}
<div class="flex items-center mt-2"> <div class="flex items-center mt-2">
<Check bind:check={currentChar.data.ttsReadOnlyQuoted} name={language.ttsReadOnlyQuoted}/> <Check bind:check={currentChar.data.ttsReadOnlyQuoted} name={language.ttsReadOnlyQuoted}/>
</div> </div>

View File

@@ -301,6 +301,10 @@ export function characterFormatUpdate(index:number|character){
depth: 0, depth: 0,
prompt: '' prompt: ''
} }
cha.hfTTS = {
model: '',
language: 'en'
}
if(!cha.newGenData){ if(!cha.newGenData){
cha = updateInlayScreen(cha) cha = updateInlayScreen(cha)
} }

View File

@@ -52,4 +52,4 @@ export const runEmbedding = async (text: string):Promise<Float32Array> => {
let extractor = await pipeline('feature-extraction', 'Xenova/all-MiniLM-L6-v2'); let extractor = await pipeline('feature-extraction', 'Xenova/all-MiniLM-L6-v2');
let result = await extractor(text, { pooling: 'mean', normalize: true }); let result = await extractor(text, { pooling: 'mean', normalize: true });
return result?.data ?? null; return result?.data ?? null;
} }

View File

@@ -4,6 +4,7 @@ import { DataBase, type character } from "../storage/database";
import { translateVox } from "../translator/translator"; import { translateVox } from "../translator/translator";
import { globalFetch } from "../storage/globalApi"; import { globalFetch } from "../storage/globalApi";
import { language } from "src/lang"; import { language } from "src/lang";
import { sleep } from "../util";
let sourceNode:AudioBufferSourceNode = null let sourceNode:AudioBufferSourceNode = null
@@ -161,6 +162,45 @@ export async function sayTTS(character:character,text:string) {
} }
break; break;
} }
case 'huggingface': {
while(true){
const audioContext = new AudioContext();
const response = await fetch(`https://api-inference.huggingface.co/models/${character.hfTTS.model}`, {
method: 'POST',
headers: {
"Authorization": "Bearer " + db.huggingfaceKey,
"Content-Type": "application/json",
},
body: JSON.stringify({
inputs: text,
})
});
if(response.status === 503 && response.headers.get('content-type') === 'application/json'){
const json = await response.json()
if(json.estimated_time){
await sleep(json.estimated_time * 1000)
continue
}
}
else if(response.status >= 400){
alertError(language.errors.httpError + `${await response.text()}`)
return
}
else if (response.status === 200) {
const audioBuffer = await response.arrayBuffer();
audioContext.decodeAudioData(audioBuffer, (decodedData) => {
const sourceNode = audioContext.createBufferSource();
sourceNode.buffer = decodedData;
sourceNode.connect(audioContext.destination);
sourceNode.start();
});
} else {
alertError("Error fetching or decoding audio data");
}
return
}
}
} }
} }

View File

@@ -347,6 +347,7 @@ export function setDatabase(data:Database){
data.generationSeed ??= -1 data.generationSeed ??= -1
data.newOAIHandle ??= true data.newOAIHandle ??= true
data.gptVisionQuality ??= 'low' data.gptVisionQuality ??= 'low'
data.huggingfaceKey ??= ''
data.reverseProxyOobaArgs ??= { data.reverseProxyOobaArgs ??= {
mode: 'instruct' mode: 'instruct'
} }
@@ -538,7 +539,7 @@ export interface Database{
reverseProxyOobaArgs: OobaChatCompletionRequestParams reverseProxyOobaArgs: OobaChatCompletionRequestParams
tpo?:boolean tpo?:boolean
automark?:boolean automark?:boolean
huggingfaceKey:string
allowAllExtentionFiles?:boolean allowAllExtentionFiles?:boolean
} }
@@ -648,6 +649,10 @@ export interface character{
largePortrait?:boolean largePortrait?:boolean
lorePlus?:boolean lorePlus?:boolean
inlayViewScreen?:boolean inlayViewScreen?:boolean
hfTTS?: {
model: string
language: string
}
} }