From ddf9aec9818964bddc533e143f36e383f571bd85 Mon Sep 17 00:00:00 2001 From: minco Date: Sun, 28 Dec 2025 02:30:42 +0900 Subject: [PATCH] feat: add codec --- backend/src/main.rs | 213 ++++++++++++++++++++++++++++++------------- frontend/src/main.ts | 2 +- 2 files changed, 151 insertions(+), 64 deletions(-) diff --git a/backend/src/main.rs b/backend/src/main.rs index 3aee21f..bd1cdec 100644 --- a/backend/src/main.rs +++ b/backend/src/main.rs @@ -95,88 +95,175 @@ async fn accept_connection(stream: TcpStream) { fn process_video( tx: tokio::sync::mpsc::Sender<(Vec, bool)>, ) -> Result<(), Box> { - let input_file = "video.mp4"; - let mut ictx = ffmpeg::format::input(&input_file)?; + ffmpeg::device::register_all(); + let mut dictionary = ffmpeg::Dictionary::new(); + dictionary.set("framerate", "30"); + dictionary.set("video_size", "1920x1080"); - let input_stream = ictx - .streams() - .best(ffmpeg::media::Type::Video) - .ok_or(ffmpeg::Error::StreamNotFound)?; - let input_stream_index = input_stream.index(); + // Find the gdigrab input format (Windows) + // ffmpeg::format::format::find returns Option in some versions? + // Let's try to use the device directly if possible or finding the demuxer. + // Based on errors, let's try assuming ffmpeg::format::format::find exists and works for inputs. + // If not, we might need ffmpeg::format::demuxer. - let decoder_ctx = ffmpeg::codec::context::Context::from_parameters(input_stream.parameters())?; - let mut decoder = decoder_ctx.decoder().video()?; + // We'll assume the error 'private module input' implies 'ffmpeg::format::format::input' is private + // so we can't look inside it. But 'ffmpeg::format::format' might have 'find'. - // Setup Encoder - // Try to find libsvtav1 or fallback to AV1 generic - let codec = ffmpeg::codec::encoder::find_by_name("libsvtav1") - .or_else(|| ffmpeg::codec::encoder::find(ffmpeg::codec::Id::AV1)) - .ok_or(ffmpeg::Error::EncoderNotFound)?; + // Actually, let's try a different approach: + // If we can't find the format easily, maybe we can just use "gdigrab" as the format name if we had a way to convert string to Format. - let output_ctx = ffmpeg::codec::context::Context::new(); - let mut encoder_builder = output_ctx.encoder().video()?; + // We cannot easily look up "gdigrab" by name due to API limitations in the safe wrapper or versioning. + // However, if we enable all devices, ffmpeg might be able to detect it via input(). - // We will scale to YUV420P because it's widely supported and good for streaming - encoder_builder.set_format(ffmpeg::format::Pixel::YUV420P); - encoder_builder.set_width(decoder.width()); - encoder_builder.set_height(decoder.height()); - encoder_builder.set_time_base(input_stream.time_base()); - encoder_builder.set_frame_rate(Some(input_stream.rate())); + // Another trick: We can manually iterate via `ffmpeg::format::format::Input::next()` if we could access it, but it's hidden. - let mut encoder = encoder_builder.open_as(codec)?; + // Let's try to bypass the explicit format finding by using `ffmpeg::format::input_with_dictionary` + // but we need to specify the format. Wait, `input_with_dictionary` takes a path. + // If the path is prefixed with "gdigrab:", maybe it auto-detects? No, gdigrab is a format. - // Scaler to convert whatever input to YUV420P - let mut scaler = ffmpeg::software::scaling::context::Context::get( - decoder.format(), - decoder.width(), - decoder.height(), - ffmpeg::format::Pixel::YUV420P, - decoder.width(), - decoder.height(), - ffmpeg::software::scaling::flag::Flags::BILINEAR, - )?; + // There IS a `ffmpeg::device::input::video` which might help? + // Let's check if we can use the `av_find_input_format` ffi directly if safe wrapper fails us. + // But that requires `unsafe`. - // Send packet function closure not easy due to ownership, doing inline + // Ideally we should use: + // `ffmpeg::format::format::list()` but it is gated by `ffmpeg_5_0` feature being NOT enabled? + // Wait, the error said `list` is not found, and the code I Grepped says `#[cfg(not(feature = "ffmpeg_5_0"))]`. + // If we are on ffmpeg 5.0+, then `av_register_all` is gone and iterating formats is different. - for (stream, packet) in ictx.packets() { - if stream.index() == input_stream_index { - decoder.send_packet(&packet)?; + // If we are on newer FFmpeg, we might not need to look it up manually if we can hint it. + // But `open_with` needs `&Format`. - let mut decoded = ffmpeg::util::frame::Video::empty(); - while decoder.receive_frame(&mut decoded).is_ok() { - // Scale frame - let mut scaled = ffmpeg::util::frame::Video::empty(); - scaler.run(&decoded, &mut scaled)?; + // Let's assume we can use `ffmpeg::device::input::video` if it exists? + // Check `ffmpeg::device` module content. - // Set pts for the scaled frame to match decoded - scaled.set_pts(decoded.pts()); + // Fallback: Use `ffmpeg::format::input(&path)` but force format via dictionary? No, dictionary is options. - // Send to encoder - encoder.send_frame(&scaled)?; + // Actually, look at `ffmpeg::format::open_with`: it takes `&Format`. + // We MUST find the format. - // Receive encoded packets - let mut encoded = ffmpeg::Packet::empty(); - while encoder.receive_packet(&mut encoded).is_ok() { - let is_key = encoded.is_key(); - let data = encoded.data().ok_or("Empty packet data")?.to_vec(); + // Since `list()` is missing, maybe we are on a version > 5.0 feature-wise? + // The crate is version 8.0.0. - // Blocking send to the tokio channel - if tx.blocking_send((data, is_key)).is_err() { - return Ok(()); // Receiver dropped + // Let's try using `ffmpeg::format::Input` directly if there's a way to construct it. + // No. + + // What if we try `ffmpeg::device::input::video()`? + // Let's check `ffmpeg::device` capabilities. + + // For now, let's try a gross hack: + // If `list()` is unavailable, it means we probably can't iterate. + // But we might be able to use `ffmpeg::ffi::av_find_input_format`. + + unsafe { + let name = std::ffi::CString::new("gdigrab").unwrap(); + let ptr = ffmpeg::ffi::av_find_input_format(name.as_ptr()); + if ptr.is_null() { + return Err(ffmpeg::Error::DemuxerNotFound.into()); + } + + let format_input = ffmpeg::format::format::Input::wrap(ptr as *mut _); + // We need to wrap Input into Format, but Format might be private in some contexts or re-exported. + // It is defined in `ffmpeg::format::format::mod.rs` as `pub enum Format`. + // And it is re-exported in `ffmpeg` root? No, `ffmpeg::format::Format` should be public. + // The error says `ffmpeg::format::Format` is private? + // Ah, `use {Dictionary, Error, Format};` in `src/format/mod.rs` means it imports from parent/root? + // No, `pub mod format` defines `Format` enum. + + // Let's try `ffmpeg::format::format::Format::Input` + let format = ffmpeg::format::format::Format::Input(format_input); + + // Now we have the format, proceed. + // Note: `Input::wrap` is `unsafe`. + + let context = + ffmpeg::format::open_with(&std::path::Path::new("desktop"), &format, dictionary)?; + + let mut ictx = match context { + ffmpeg::format::context::Context::Input(ictx) => ictx, + _ => return Err(ffmpeg::Error::DemuxerNotFound.into()), + }; + + let input_stream = ictx + .streams() + .best(ffmpeg::media::Type::Video) + .ok_or(ffmpeg::Error::StreamNotFound)?; + let input_stream_index = input_stream.index(); + + let decoder_ctx = + ffmpeg::codec::context::Context::from_parameters(input_stream.parameters())?; + let mut decoder = decoder_ctx.decoder().video()?; + + // Setup Encoder + // Try to find libsvtav1 or fallback to AV1 generic + let codec = ffmpeg::codec::encoder::find_by_name("libsvtav1") + .or_else(|| ffmpeg::codec::encoder::find(ffmpeg::codec::Id::AV1)) + .ok_or(ffmpeg::Error::EncoderNotFound)?; + + let output_ctx = ffmpeg::codec::context::Context::new(); + let mut encoder_builder = output_ctx.encoder().video()?; + + // We will scale to YUV420P because it's widely supported and good for streaming + encoder_builder.set_format(ffmpeg::format::Pixel::YUV420P); + encoder_builder.set_width(decoder.width()); + encoder_builder.set_height(decoder.height()); + encoder_builder.set_time_base(input_stream.time_base()); + encoder_builder.set_frame_rate(Some(input_stream.rate())); + + let mut encoder = encoder_builder.open_as(codec)?; + + // Scaler to convert whatever input to YUV420P + let mut scaler = ffmpeg::software::scaling::context::Context::get( + decoder.format(), + decoder.width(), + decoder.height(), + ffmpeg::format::Pixel::YUV420P, + decoder.width(), + decoder.height(), + ffmpeg::software::scaling::flag::Flags::BILINEAR, + )?; + + // Send packet function closure not easy due to ownership, doing inline + + for (stream, packet) in ictx.packets() { + if stream.index() == input_stream_index { + decoder.send_packet(&packet)?; + + let mut decoded = ffmpeg::util::frame::Video::empty(); + while decoder.receive_frame(&mut decoded).is_ok() { + // Scale frame + let mut scaled = ffmpeg::util::frame::Video::empty(); + scaler.run(&decoded, &mut scaled)?; + + // Set pts for the scaled frame to match decoded + scaled.set_pts(decoded.pts()); + + // Send to encoder + encoder.send_frame(&scaled)?; + + // Receive encoded packets + let mut encoded = ffmpeg::Packet::empty(); + while encoder.receive_packet(&mut encoded).is_ok() { + let is_key = encoded.is_key(); + let data = encoded.data().ok_or("Empty packet data")?.to_vec(); + + // Blocking send to the tokio channel + if tx.blocking_send((data, is_key)).is_err() { + return Ok(()); // Receiver dropped + } } } } } - } - // Flush encoder - encoder.send_eof()?; - let mut encoded = ffmpeg::Packet::empty(); - while encoder.receive_packet(&mut encoded).is_ok() { - let is_key = encoded.is_key(); - let data = encoded.data().ok_or("Empty packet data")?.to_vec(); - if tx.blocking_send((data, is_key)).is_err() { - return Ok(()); + // Flush encoder + encoder.send_eof()?; + let mut encoded = ffmpeg::Packet::empty(); + while encoder.receive_packet(&mut encoded).is_ok() { + let is_key = encoded.is_key(); + let data = encoded.data().ok_or("Empty packet data")?.to_vec(); + if tx.blocking_send((data, is_key)).is_err() { + return Ok(()); + } } } diff --git a/frontend/src/main.ts b/frontend/src/main.ts index 999ce9b..41516d2 100644 --- a/frontend/src/main.ts +++ b/frontend/src/main.ts @@ -16,6 +16,6 @@ const decoder = createDecoder((frame) => { }) const discordClientId = import.meta.env.VITE_DISCORD_CLIENT_ID; -const addr = `wss://${discordClientId}.discordsays.com/ws` +const addr = import.meta.env.PROD ? `wss://${discordClientId}.discordsays.com/ws` : 'ws://localhost:8080'; createSocket(addr, decoder);