Add subtitle
This commit is contained in:
@@ -14,6 +14,7 @@
|
||||
import PlaygroundParser from "./PlaygroundParser.svelte";
|
||||
import ToolConvertion from "./ToolConvertion.svelte";
|
||||
import { joinMultiuserRoom } from "src/ts/sync/multiuser";
|
||||
import PlaygroundSubtitle from "./PlaygroundSubtitle.svelte";
|
||||
|
||||
let easterEggTouch = $state(0)
|
||||
|
||||
@@ -83,6 +84,11 @@
|
||||
}}>
|
||||
<h1 class="text-2xl font-bold text-start">Parser</h1>
|
||||
</button>
|
||||
<button class="bg-darkbg rounded-md p-6 flex flex-col transition-shadow hover:ring-1" onclick={() => {
|
||||
PlaygroundStore.set(9)
|
||||
}}>
|
||||
<h1 class="text-2xl font-bold text-start">{language.subtitles}</h1>
|
||||
</button>
|
||||
<button class="bg-darkbg rounded-md p-6 flex flex-col transition-shadow hover:ring-1" onclick={() => {
|
||||
PlaygroundStore.set(101)
|
||||
}}>
|
||||
@@ -139,6 +145,9 @@
|
||||
{#if $PlaygroundStore === 8}
|
||||
<PlaygroundParser/>
|
||||
{/if}
|
||||
{#if $PlaygroundStore === 9}
|
||||
<PlaygroundSubtitle/>
|
||||
{/if}
|
||||
{#if $PlaygroundStore === 101}
|
||||
<ToolConvertion/>
|
||||
{/if}
|
||||
|
||||
443
src/lib/Playground/PlaygroundSubtitle.svelte
Normal file
443
src/lib/Playground/PlaygroundSubtitle.svelte
Normal file
@@ -0,0 +1,443 @@
|
||||
|
||||
<script lang="ts">
|
||||
import { language } from "src/lang";
|
||||
import TextInput from "../UI/GUI/TextInput.svelte";
|
||||
import TextAreaInput from "../UI/GUI/TextAreaInput.svelte";
|
||||
import Button from "../UI/GUI/Button.svelte";
|
||||
import { DBState } from "src/ts/stores.svelte";
|
||||
import { getModelInfo, LLMFlags } from "src/ts/model/modellist";
|
||||
import { requestChatData } from "src/ts/process/request";
|
||||
import { selectFileByDom, selectSingleFile, sleep } from "src/ts/util";
|
||||
import { alertError, alertSelect } from "src/ts/alert";
|
||||
import { risuChatParser } from "src/ts/parser.svelte";
|
||||
import { AppendableBuffer, downloadFile, globalFetch } from "src/ts/globalApi.svelte";
|
||||
import SliderInput from "../UI/GUI/SliderInput.svelte";
|
||||
import SelectInput from "../UI/GUI/SelectInput.svelte";
|
||||
import OptionInput from "../UI/GUI/OptionInput.svelte";
|
||||
|
||||
|
||||
|
||||
let LLMModePrompt ="Transcribe and create a caption and timestamp of it, according to the user's audio or video input. inside a markdown code block. (prefix ```webvtt / postfix ```)\n\nFormat\n```\n[TIME] CONTENT\n```\n\nExample\n```\n[00:00] Hildy!\n[00:01] How are you?\n[00:03] Tell me, is the lord of the universe in?\n[00:07] Somebody must've stolen the crown jewels\n```\n\nStep 2. Generate another subtitle, this time, as a translation to {{slot}}, with same format with Step 1., using step 1 as ref.\n\n The translation must be in natural {{slot}}.\n\n Now, start (Hint: media length is {{slot::time}})"
|
||||
let WhisperModePrompt = "```\n{{slot::data}}\n``` Translate the following WEBVTT to natural {{slot}}, with keeping the timestamp and header, inside a markdown code block. (prefix ``` / postfix ```)"
|
||||
|
||||
let selLang = $state(DBState.db.language)
|
||||
let prompt = $state(LLMModePrompt)
|
||||
let modelInfo = $derived(getModelInfo(DBState.db.aiModel))
|
||||
let outputText = $state('')
|
||||
let fileB64 = $state('')
|
||||
let vttB64 = $state('')
|
||||
let vobj:TranscribeObj[] = $state([])
|
||||
let mode = $state('llm')
|
||||
|
||||
|
||||
|
||||
async function runLLMMode() {
|
||||
outputText = 'Loading...\n\n'
|
||||
|
||||
const file = await selectSingleFile([
|
||||
'mp3', 'ogg', 'wav', 'flac',
|
||||
'mp4', 'webm', 'mkv', 'avi', 'mov'
|
||||
])
|
||||
|
||||
if(!file){
|
||||
outputText = ''
|
||||
return
|
||||
}
|
||||
|
||||
const videos = [
|
||||
'mp4', 'webm', 'mkv', 'avi', 'mov'
|
||||
]
|
||||
|
||||
const ext = file.name.split('.').pop()
|
||||
|
||||
fileB64 = `data:${
|
||||
videos.includes(ext) ? 'video' : 'audio'
|
||||
}/${ext};base64,${Buffer.from(file.data).toString('base64')}`
|
||||
|
||||
const media = {
|
||||
type: videos.includes(ext) ? 'video' : 'audio',
|
||||
base64: fileB64,
|
||||
} as const
|
||||
|
||||
let time = ''
|
||||
|
||||
if(prompt.includes('{{slot::time}}')){
|
||||
const video = document.createElement('video')
|
||||
video.src = fileB64
|
||||
video.preload = 'metadata'
|
||||
video.muted = true
|
||||
await video.play()
|
||||
const d = video.duration
|
||||
console.log(d)
|
||||
if(isNaN(d)){
|
||||
time = 'unknown'
|
||||
}else{
|
||||
time = `${Math.floor(d / 60)}:${Math.floor(d % 60)}`
|
||||
}
|
||||
video.pause()
|
||||
video.remove()
|
||||
}
|
||||
|
||||
const v =await requestChatData({
|
||||
formated: [{
|
||||
role: "user",
|
||||
content: risuChatParser(prompt).replace(/{{slot}}/g, selLang).replace(/{{slot::time}}/g, time),
|
||||
multimodals: [media]
|
||||
}],
|
||||
bias: {},
|
||||
useStreaming: true
|
||||
}, 'model')
|
||||
|
||||
if(v.type === 'multiline'){
|
||||
alertError(v.result[0][1])
|
||||
return
|
||||
}
|
||||
|
||||
if(v.type !== 'streaming'){
|
||||
alertError(v.result)
|
||||
return
|
||||
}
|
||||
|
||||
const reader = v.result.getReader()
|
||||
|
||||
while(true){
|
||||
const { done, value } = await reader.read()
|
||||
if(done){
|
||||
break
|
||||
}
|
||||
const firstKey = Object.keys(value)[0]
|
||||
|
||||
outputText = value[firstKey]
|
||||
}
|
||||
|
||||
const extracted = outputText.matchAll(/```(web)?(vtt)?\n(.*?)\n```/gs)
|
||||
|
||||
let latest = ''
|
||||
for(const match of extracted){
|
||||
latest = match[3].trim()
|
||||
}
|
||||
|
||||
vobj = convertTransToObj(latest)
|
||||
outputText = makeWebVtt(vobj)
|
||||
vttB64 = `data:text/vtt;base64,${Buffer.from(outputText).toString('base64')}`
|
||||
}
|
||||
|
||||
async function runWhisperMode() {
|
||||
outputText = 'Loading...\n\n'
|
||||
|
||||
const files = await selectFileByDom([
|
||||
'mp3', 'ogg', 'wav', 'flac',
|
||||
'mp4', 'webm', 'mkv', 'avi', 'mov'
|
||||
|
||||
])
|
||||
|
||||
const file = files?.[0]
|
||||
|
||||
if(!file){
|
||||
outputText = ''
|
||||
return
|
||||
}
|
||||
|
||||
const formData = new FormData()
|
||||
|
||||
const videos = [
|
||||
'mp4', 'webm', 'mkv', 'avi', 'mov'
|
||||
]
|
||||
|
||||
const ext = file.name.split('.').pop()
|
||||
if(videos.includes(ext)){
|
||||
|
||||
|
||||
//check duration
|
||||
let duration = 0
|
||||
{
|
||||
const video = document.createElement('video')
|
||||
video.src = URL.createObjectURL(file)
|
||||
video.preload = 'metadata'
|
||||
video.muted = true
|
||||
await video.play()
|
||||
const d = video.duration
|
||||
if(isNaN(d)){
|
||||
alertError('This video does not have a duration')
|
||||
return
|
||||
}
|
||||
video.pause()
|
||||
video.remove()
|
||||
duration = d
|
||||
}
|
||||
|
||||
outputText = 'Converting video to audio...\n\n'
|
||||
const audioContext = new AudioContext()
|
||||
const audioBuffer = await audioContext.decodeAudioData(await file.arrayBuffer())
|
||||
|
||||
const [left, right] = [audioBuffer.getChannelData(0), audioBuffer.getChannelData(1)]
|
||||
|
||||
const leftInt16 = new Int16Array(left.length)
|
||||
const rightInt16 = new Int16Array(right.length)
|
||||
|
||||
for(let i = 0; i < left.length; i++){
|
||||
leftInt16[i] = left[i] * 0x7FFF
|
||||
rightInt16[i] = right[i] * 0x7FFF
|
||||
}
|
||||
|
||||
const lamejs = await import('@breezystack/lamejs')
|
||||
const mp3encoder = new lamejs.Mp3Encoder(2, 44100, 128);
|
||||
const enc = new AppendableBuffer()
|
||||
|
||||
for(let pointer = 0; pointer < leftInt16.length; pointer += 1152){
|
||||
enc.append(mp3encoder.encodeBuffer(leftInt16.subarray(pointer, pointer + 1152), rightInt16.subarray(pointer, pointer + 1152)))
|
||||
if(pointer % 115200 === 0){
|
||||
outputText = `Converting video to audio... ${(pointer / leftInt16.length * 100).toFixed(2)}%\n`
|
||||
await sleep(1)
|
||||
}
|
||||
}
|
||||
enc.append(mp3encoder.flush())
|
||||
|
||||
const file2 = new File([enc.buffer], 'audio.mp3', {
|
||||
type: 'audio/mp3'
|
||||
})
|
||||
|
||||
outputText = 'Transcribing audio...\n\n'
|
||||
formData.append('file', file2)
|
||||
}
|
||||
else{
|
||||
formData.append('file', file)
|
||||
}
|
||||
|
||||
formData.append('model', 'whisper-1')
|
||||
formData.append('response_format', 'vtt')
|
||||
|
||||
|
||||
const d = await fetch('https://api.openai.com/v1/audio/transcriptions', {
|
||||
method: 'POST',
|
||||
headers: {
|
||||
'Authorization': `Bearer ${DBState.db.openAIKey}`
|
||||
},
|
||||
body: formData
|
||||
|
||||
})
|
||||
|
||||
const fileBuffer = await file.arrayBuffer()
|
||||
|
||||
outputText = await d.text()
|
||||
|
||||
const v = await requestChatData({
|
||||
formated: [{
|
||||
role: "user",
|
||||
content: risuChatParser(prompt).replace(/{{slot}}/g, selLang).replace(/{{slot::data}}/g, outputText),
|
||||
}],
|
||||
bias: {},
|
||||
useStreaming: true
|
||||
}, 'model')
|
||||
|
||||
|
||||
if(v.type === 'multiline'){
|
||||
alertError(v.result[0][1])
|
||||
return
|
||||
}
|
||||
|
||||
if(v.type !== 'streaming'){
|
||||
alertError(v.result)
|
||||
return
|
||||
}
|
||||
|
||||
console.log("Reading...")
|
||||
|
||||
const reader = v.result.getReader()
|
||||
|
||||
while(true){
|
||||
const { done, value } = await reader.read()
|
||||
if(done){
|
||||
break
|
||||
}
|
||||
const firstKey = Object.keys(value)[0]
|
||||
|
||||
outputText = value[firstKey]
|
||||
}
|
||||
|
||||
console.log(outputText)
|
||||
|
||||
if(!outputText.trim().endsWith('```')){
|
||||
outputText = outputText.trim() + '\n```'
|
||||
}
|
||||
|
||||
const extracted = outputText.matchAll(/```(web)?(vtt)?\n(.*?)\n```/gs)
|
||||
|
||||
let latest = ''
|
||||
for(const match of extracted){
|
||||
latest = match[3].trim()
|
||||
}
|
||||
|
||||
outputText = latest
|
||||
vttB64 = `data:text/vtt;base64,${Buffer.from(outputText).toString('base64')}`
|
||||
fileB64 = `data:audio/wav;base64,${Buffer.from(fileBuffer).toString('base64')}`
|
||||
vobj = convertWebVTTtoObj(outputText)
|
||||
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
type TranscribeObj = {
|
||||
start: string
|
||||
end: string
|
||||
text: string
|
||||
}
|
||||
|
||||
|
||||
function convertTransToObj(r:string){
|
||||
const lines = r.split('\n').map(v => v.trim()).filter(v => v)
|
||||
const obj:TranscribeObj[] = []
|
||||
for(let i = 0; i < lines.length; i++){
|
||||
const line = lines[i]
|
||||
if(line.startsWith('[')){
|
||||
let [time, ...text] = line.split(']')
|
||||
time = time.slice(1)
|
||||
if(obj.length > 0){
|
||||
obj[obj.length - 1].end = time + '.000'
|
||||
}
|
||||
obj.push({
|
||||
start: time + '.000',
|
||||
end: '',
|
||||
text: text.join(' ')
|
||||
})
|
||||
}
|
||||
}
|
||||
//rediculously long line
|
||||
obj[obj.length - 1].end = '99:99.000'
|
||||
return obj
|
||||
}
|
||||
|
||||
function convertWebVTTtoObj(r:string){
|
||||
const chunks = r.split('\n\n').map(v => v.trim()).filter(v => v)
|
||||
const obj:TranscribeObj[] = []
|
||||
for(const chunk of chunks){
|
||||
if(chunk.startsWith('WEBVTT')){
|
||||
continue
|
||||
}
|
||||
const [time, ...text] = chunk.split('\n')
|
||||
const [start, end] = time.split(' --> ')
|
||||
obj.push({
|
||||
start: start,
|
||||
end: end,
|
||||
text: text.join('\n')
|
||||
})
|
||||
}
|
||||
return obj
|
||||
}
|
||||
|
||||
function makeWebVtt(obj: TranscribeObj[]){
|
||||
let vtt = 'WEBVTT\n\n'
|
||||
|
||||
for(const line of obj){
|
||||
vtt += `${line.start} --> ${line.end}\n${line.text}\n\n`
|
||||
}
|
||||
|
||||
return vtt
|
||||
}
|
||||
|
||||
function webVttToSrt(){
|
||||
const srt = outputText.replace('WEBVTT', '').trim().split('\n\n').map((v, i) => {
|
||||
const [time, ...text] = v.split('\n')
|
||||
const [start, end] = time.split(' --> ')
|
||||
return `${i + 1}\n${start.replace('.', ',')} --> ${end.replace('.', ',')}\n${text.join('\n')}`
|
||||
})
|
||||
return srt
|
||||
}
|
||||
|
||||
type WaveOptions = {
|
||||
isFloat: boolean
|
||||
numChannels: number
|
||||
sampleRate: number
|
||||
}
|
||||
</script>
|
||||
|
||||
<h2 class="text-4xl text-textcolor my-6 font-black relative">{language.subtitles}</h2>
|
||||
|
||||
<span class="text-textcolor text-lg mt-4">{language.language}</span>
|
||||
<TextInput bind:value={selLang} />
|
||||
|
||||
<span class="text-textcolor text-lg mt-4">{language.prompt}</span>
|
||||
<TextAreaInput bind:value={prompt} />
|
||||
|
||||
<span class="text-textcolor text-lg mt-4">{language.type}</span>
|
||||
<SelectInput bind:value={mode} onchange={(e) => {
|
||||
if(mode === 'llm'){
|
||||
prompt = LLMModePrompt
|
||||
}
|
||||
if(mode === 'whisper'){
|
||||
prompt = WhisperModePrompt
|
||||
}
|
||||
}}>
|
||||
<OptionInput value="llm">LLM</OptionInput>
|
||||
<OptionInput value="whisper">Whisper</OptionInput>
|
||||
</SelectInput>
|
||||
|
||||
{#if !(modelInfo.flags.includes(LLMFlags.hasAudioInput) && modelInfo.flags.includes(LLMFlags.hasVideoInput))}
|
||||
<span class="text-draculared text-lg mt-4">{language.subtitlesWarning1}</span>
|
||||
{/if}
|
||||
{#if !(modelInfo.flags.includes(LLMFlags.hasStreaming) && DBState.db.useStreaming)}
|
||||
<span class="text-draculared text-lg mt-4">{language.subtitlesWarning2}</span>
|
||||
{/if}
|
||||
|
||||
{#if !outputText}
|
||||
<Button className="mt-4" onclick={() => {
|
||||
if(mode === 'llm'){
|
||||
runLLMMode()
|
||||
}
|
||||
if(mode === 'whisper'){
|
||||
runWhisperMode()
|
||||
}
|
||||
}}>
|
||||
{language.run}
|
||||
</Button>
|
||||
{:else if vttB64 && fileB64}
|
||||
<details class="mt-4">
|
||||
<pre>{outputText}</pre>
|
||||
</details>
|
||||
{:else}
|
||||
<pre>{outputText}</pre>
|
||||
{/if}
|
||||
|
||||
{#if vttB64 && fileB64}
|
||||
<div class="mt-4">
|
||||
{#key vttB64}
|
||||
<video controls src={fileB64} class="w-full">
|
||||
<track default kind="captions" src={vttB64} srclang="en" />
|
||||
</video>
|
||||
{/key}
|
||||
</div>
|
||||
|
||||
<span class="text-textcolor text-lg mt-4">{language.download}</span>
|
||||
|
||||
<Button className="mt-4" onclick={() => {
|
||||
outputText = ''
|
||||
fileB64 = ''
|
||||
vttB64 = ''
|
||||
}}>
|
||||
{language.reset}
|
||||
</Button>
|
||||
|
||||
<Button className="mt-4" onclick={async () => {
|
||||
const sel = parseInt(await alertSelect([
|
||||
'WebVTT',
|
||||
'SRT'
|
||||
]))
|
||||
const a = document.createElement('a')
|
||||
|
||||
// WebVTT
|
||||
if(sel === 0){
|
||||
downloadFile('subtitle.vtt', outputText)
|
||||
return
|
||||
}
|
||||
|
||||
// SRT
|
||||
if(sel === 1){
|
||||
downloadFile('subtitle.srt', webVttToSrt().join('\n\n'))
|
||||
return
|
||||
}
|
||||
}}>
|
||||
{language.download}
|
||||
</Button>
|
||||
{/if}
|
||||
Reference in New Issue
Block a user