From c668a2bcd8ca8b43fc8cc8531e97e197986e0de5 Mon Sep 17 00:00:00 2001 From: minco Date: Mon, 29 Dec 2025 00:13:37 +0900 Subject: [PATCH] feat: poc --- backend/.gitignore | 1 + backend/Cargo.toml | 10 +- backend/flake.nix | 4 +- backend/src/main.rs | 250 +++++++++++++------------------------- backend/src/pixelutil.rs | 72 +++++++++++ backend/src/probe.rs | 7 ++ frontend/src/transport.ts | 3 +- 7 files changed, 177 insertions(+), 170 deletions(-) create mode 100644 backend/src/pixelutil.rs create mode 100644 backend/src/probe.rs diff --git a/backend/.gitignore b/backend/.gitignore index ea8c4bf..c221188 100644 --- a/backend/.gitignore +++ b/backend/.gitignore @@ -1 +1,2 @@ /target +video.mp4 diff --git a/backend/Cargo.toml b/backend/Cargo.toml index 616d9bc..f96621d 100644 --- a/backend/Cargo.toml +++ b/backend/Cargo.toml @@ -4,7 +4,15 @@ version = "0.1.0" edition = "2024" [dependencies] -ffmpeg-next = "8.0.0" +crossbeam = { version = "0.8.4", features = ["crossbeam-channel"] } futures-util = "0.3.31" +parking_lot = "0.12.5" +rav1e = "0.8.1" +scap = "0.0.8" +scopeguard = "1.2.0" tokio = { version = "1.48.0", features = ["full"] } tokio-tungstenite = "0.28.0" +v_frame = "0.3.9" +vpx-rs = "0.2.1" +yuv = "0.8.9" +yuvutils-rs = "0.8.3" diff --git a/backend/flake.nix b/backend/flake.nix index 873c4f4..c5e6ef0 100644 --- a/backend/flake.nix +++ b/backend/flake.nix @@ -16,7 +16,9 @@ ]; buildInputs = with pkgs; [ - ffmpeg.dev + pipewire + dbus + libvpx ]; shellHook = '' diff --git a/backend/src/main.rs b/backend/src/main.rs index bd1cdec..1aed546 100644 --- a/backend/src/main.rs +++ b/backend/src/main.rs @@ -1,13 +1,23 @@ -use ffmpeg_next as ffmpeg; +use std::borrow::Borrow; +use std::num::NonZero; +use std::thread; +use std::time::Instant; + use futures_util::{SinkExt, StreamExt}; +use rav1e::config::SpeedSettings; +use rav1e::data::Rational; +use scap::capturer::{Area, Capturer, Options, Point}; +use scap::frame::FrameType; use tokio::net::{TcpListener, TcpStream}; use tokio_tungstenite::accept_async; use tokio_tungstenite::tungstenite::Message; +use vpx_rs::enc::{CodecId, CompressedFrameFlags, EncoderProfile}; +use vpx_rs::{EncoderFlags, RateControl, Timebase, YUVImageData}; -#[tokio::main] +mod pixelutil; + +#[tokio::main(flavor = "multi_thread")] async fn main() -> Result<(), Box> { - ffmpeg::init()?; - let addr = "0.0.0.0:8080"; let listener = TcpListener::bind(&addr).await?; println!("Listening on: {}", addr); @@ -37,14 +47,14 @@ async fn accept_connection(stream: TcpStream) { let (mut write, mut read) = ws_stream.split(); - // Channel to communicate between the ffmpeg thread and the websocket task + // Channel to communicate between the video source thread and the websocket task // We send (data, is_key) let (tx, mut rx) = tokio::sync::mpsc::channel::<(Vec, bool)>(100); - // Spawn a blocking thread for FFmpeg processing + // Spawn a blocking thread for video processing (currently just a placeholder loop) std::thread::spawn(move || { if let Err(e) = process_video(tx) { - eprintln!("FFmpeg processing error: {}", e); + eprintln!("Video processing error: {}", e); } }); @@ -95,177 +105,85 @@ async fn accept_connection(stream: TcpStream) { fn process_video( tx: tokio::sync::mpsc::Sender<(Vec, bool)>, ) -> Result<(), Box> { - ffmpeg::device::register_all(); - let mut dictionary = ffmpeg::Dictionary::new(); - dictionary.set("framerate", "30"); - dictionary.set("video_size", "1920x1080"); + let options = Options { + fps: 30, + target: None, + show_cursor: true, + show_highlight: true, + excluded_targets: None, + output_type: FrameType::YUVFrame, + output_resolution: scap::capturer::Resolution::_1080p, + crop_area: Some(Area { + origin: Point { x: 0.0, y: 0.0 }, + size: scap::capturer::Size { + width: 1920.0, + height: 1080.0, + }, + }), + ..Default::default() + }; - // Find the gdigrab input format (Windows) - // ffmpeg::format::format::find returns Option in some versions? - // Let's try to use the device directly if possible or finding the demuxer. - // Based on errors, let's try assuming ffmpeg::format::format::find exists and works for inputs. - // If not, we might need ffmpeg::format::demuxer. + let mut config: vpx_rs::EncoderConfig = vpx_rs::EncoderConfig::new( + CodecId::VP9, + 1920, + 1080, + Timebase { + num: NonZero::new(1).unwrap(), + den: NonZero::new(30).unwrap(), + }, + RateControl::ConstantBitRate(500), + )?; + config.threads = 32; + config.lag_in_frames = 0; - // We'll assume the error 'private module input' implies 'ffmpeg::format::format::input' is private - // so we can't look inside it. But 'ffmpeg::format::format' might have 'find'. + let mut encoder = vpx_rs::Encoder::new(config)?; - // Actually, let's try a different approach: - // If we can't find the format easily, maybe we can just use "gdigrab" as the format name if we had a way to convert string to Format. + let mut capturer = Capturer::build(options)?; + capturer.start_capture(); - // We cannot easily look up "gdigrab" by name due to API limitations in the safe wrapper or versioning. - // However, if we enable all devices, ffmpeg might be able to detect it via input(). + let (frame_tx, frame_rx) = crossbeam::channel::bounded::>(1); - // Another trick: We can manually iterate via `ffmpeg::format::format::Input::next()` if we could access it, but it's hidden. + thread::spawn(move || { + loop { + let instant = Instant::now(); + if let Ok(captured_frame) = capturer.get_next_frame() { + let output_frame = pixelutil::frame_to_yuv(captured_frame); - // Let's try to bypass the explicit format finding by using `ffmpeg::format::input_with_dictionary` - // but we need to specify the format. Wait, `input_with_dictionary` takes a path. - // If the path is prefixed with "gdigrab:", maybe it auto-detects? No, gdigrab is a format. + let _ = frame_tx.try_send(output_frame); - // There IS a `ffmpeg::device::input::video` which might help? - // Let's check if we can use the `av_find_input_format` ffi directly if safe wrapper fails us. - // But that requires `unsafe`. - - // Ideally we should use: - // `ffmpeg::format::format::list()` but it is gated by `ffmpeg_5_0` feature being NOT enabled? - // Wait, the error said `list` is not found, and the code I Grepped says `#[cfg(not(feature = "ffmpeg_5_0"))]`. - // If we are on ffmpeg 5.0+, then `av_register_all` is gone and iterating formats is different. - - // If we are on newer FFmpeg, we might not need to look it up manually if we can hint it. - // But `open_with` needs `&Format`. - - // Let's assume we can use `ffmpeg::device::input::video` if it exists? - // Check `ffmpeg::device` module content. - - // Fallback: Use `ffmpeg::format::input(&path)` but force format via dictionary? No, dictionary is options. - - // Actually, look at `ffmpeg::format::open_with`: it takes `&Format`. - // We MUST find the format. - - // Since `list()` is missing, maybe we are on a version > 5.0 feature-wise? - // The crate is version 8.0.0. - - // Let's try using `ffmpeg::format::Input` directly if there's a way to construct it. - // No. - - // What if we try `ffmpeg::device::input::video()`? - // Let's check `ffmpeg::device` capabilities. - - // For now, let's try a gross hack: - // If `list()` is unavailable, it means we probably can't iterate. - // But we might be able to use `ffmpeg::ffi::av_find_input_format`. - - unsafe { - let name = std::ffi::CString::new("gdigrab").unwrap(); - let ptr = ffmpeg::ffi::av_find_input_format(name.as_ptr()); - if ptr.is_null() { - return Err(ffmpeg::Error::DemuxerNotFound.into()); - } - - let format_input = ffmpeg::format::format::Input::wrap(ptr as *mut _); - // We need to wrap Input into Format, but Format might be private in some contexts or re-exported. - // It is defined in `ffmpeg::format::format::mod.rs` as `pub enum Format`. - // And it is re-exported in `ffmpeg` root? No, `ffmpeg::format::Format` should be public. - // The error says `ffmpeg::format::Format` is private? - // Ah, `use {Dictionary, Error, Format};` in `src/format/mod.rs` means it imports from parent/root? - // No, `pub mod format` defines `Format` enum. - - // Let's try `ffmpeg::format::format::Format::Input` - let format = ffmpeg::format::format::Format::Input(format_input); - - // Now we have the format, proceed. - // Note: `Input::wrap` is `unsafe`. - - let context = - ffmpeg::format::open_with(&std::path::Path::new("desktop"), &format, dictionary)?; - - let mut ictx = match context { - ffmpeg::format::context::Context::Input(ictx) => ictx, - _ => return Err(ffmpeg::Error::DemuxerNotFound.into()), - }; - - let input_stream = ictx - .streams() - .best(ffmpeg::media::Type::Video) - .ok_or(ffmpeg::Error::StreamNotFound)?; - let input_stream_index = input_stream.index(); - - let decoder_ctx = - ffmpeg::codec::context::Context::from_parameters(input_stream.parameters())?; - let mut decoder = decoder_ctx.decoder().video()?; - - // Setup Encoder - // Try to find libsvtav1 or fallback to AV1 generic - let codec = ffmpeg::codec::encoder::find_by_name("libsvtav1") - .or_else(|| ffmpeg::codec::encoder::find(ffmpeg::codec::Id::AV1)) - .ok_or(ffmpeg::Error::EncoderNotFound)?; - - let output_ctx = ffmpeg::codec::context::Context::new(); - let mut encoder_builder = output_ctx.encoder().video()?; - - // We will scale to YUV420P because it's widely supported and good for streaming - encoder_builder.set_format(ffmpeg::format::Pixel::YUV420P); - encoder_builder.set_width(decoder.width()); - encoder_builder.set_height(decoder.height()); - encoder_builder.set_time_base(input_stream.time_base()); - encoder_builder.set_frame_rate(Some(input_stream.rate())); - - let mut encoder = encoder_builder.open_as(codec)?; - - // Scaler to convert whatever input to YUV420P - let mut scaler = ffmpeg::software::scaling::context::Context::get( - decoder.format(), - decoder.width(), - decoder.height(), - ffmpeg::format::Pixel::YUV420P, - decoder.width(), - decoder.height(), - ffmpeg::software::scaling::flag::Flags::BILINEAR, - )?; - - // Send packet function closure not easy due to ownership, doing inline - - for (stream, packet) in ictx.packets() { - if stream.index() == input_stream_index { - decoder.send_packet(&packet)?; - - let mut decoded = ffmpeg::util::frame::Video::empty(); - while decoder.receive_frame(&mut decoded).is_ok() { - // Scale frame - let mut scaled = ffmpeg::util::frame::Video::empty(); - scaler.run(&decoded, &mut scaled)?; - - // Set pts for the scaled frame to match decoded - scaled.set_pts(decoded.pts()); - - // Send to encoder - encoder.send_frame(&scaled)?; - - // Receive encoded packets - let mut encoded = ffmpeg::Packet::empty(); - while encoder.receive_packet(&mut encoded).is_ok() { - let is_key = encoded.is_key(); - let data = encoded.data().ok_or("Empty packet data")?.to_vec(); - - // Blocking send to the tokio channel - if tx.blocking_send((data, is_key)).is_err() { - return Ok(()); // Receiver dropped - } - } + let elapsed = instant.elapsed(); + let frame_duration = std::time::Duration::from_millis(33); // ~30 FPS + if elapsed < frame_duration { + std::thread::sleep(frame_duration - elapsed); } } } + }); - // Flush encoder - encoder.send_eof()?; - let mut encoded = ffmpeg::Packet::empty(); - while encoder.receive_packet(&mut encoded).is_ok() { - let is_key = encoded.is_key(); - let data = encoded.data().ok_or("Empty packet data")?.to_vec(); - if tx.blocking_send((data, is_key)).is_err() { - return Ok(()); + loop { + let yuv_frame_raw = frame_rx.recv()?; + let yuv_frame = pixelutil::apply_frame(&yuv_frame_raw, 1920, 1080); + + let pts = 0; + let duration = 1; + let deadline = vpx_rs::EncodingDeadline::Realtime; + let flags = vpx_rs::EncoderFrameFlags::empty(); + + let packets = encoder.encode(pts, duration, yuv_frame, deadline, flags)?; + + for packet in packets { + match packet { + vpx_rs::Packet::CompressedFrame(frame) => { + let is_key = frame.flags.is_key; + let data = frame.data.to_vec(); + println!("encoded frame: size={}, is_key={}", data.len(), is_key); + if let Err(e) = tx.blocking_send((data, is_key)) { + eprintln!("Error sending encoded frame: {}", e); + return Err(Box::new(e)); + } + } + _ => {} } } } - - Ok(()) } diff --git a/backend/src/pixelutil.rs b/backend/src/pixelutil.rs new file mode 100644 index 0000000..67025f6 --- /dev/null +++ b/backend/src/pixelutil.rs @@ -0,0 +1,72 @@ +use std::time::Instant; + +use scap::frame::Frame as ScapFrame; +use vpx_rs::{ + ImageFormat, YUVImageData, YUVImageDataOwned, + image::{ + UVImagePlanes, UVImagePlanesInterleaved, UVImagePlanesInterleavedMut, UVImagePlanesMut, + }, +}; +use yuv::{YuvBiPlanarImageMut, YuvPlanarImageMut}; + +pub fn frame_to_yuv<'a>(captured_frame: ScapFrame) -> Vec { + match captured_frame { + ScapFrame::BGRx(bgrx_image) => { + let width = bgrx_image.width as u32; + let height = bgrx_image.height as u32; + // let mut planar_image = + // YuvBiPlanarImageMut::alloc(width, height, yuv::YuvChromaSubsampling::Yuv420); + + let mut buf = Vec::new(); + + //println!("Converting BGRx frame: width={}, height={}", width, height); + let start = Instant::now(); + + let r = yuv::bgra_to_yuv420( + &mut wrap_yuv420_buf(width as usize, height as usize, &mut buf), + &bgrx_image.data, + width * 4, + yuv::YuvRange::Full, + yuv::YuvStandardMatrix::Bt601, + yuv::YuvConversionMode::Balanced, + ); + + //println!("BGRx to YUV420 conversion took {:?}", start.elapsed()); + + //println!("Copy took {:?}", start.elapsed()); + + buf + } + _ => { + panic!("Unsupported frame format"); + } + } +} + +pub fn apply_frame<'a>(data: &'a [u8], width: usize, height: usize) -> YUVImageData<'a, u8> { + YUVImageData::from_raw_data(ImageFormat::I420, width as usize, height as usize, data).unwrap() +} + +fn wrap_yuv420_buf( + width: usize, + height: usize, + outbuf: &mut Vec, +) -> yuv::YuvPlanarImageMut<'_, u8> { + let bufsz = ImageFormat::I420 + .buffer_len(width, height) + .unwrap_or_else(|_| panic!("Invalid {width} or {height}")); + outbuf.resize(bufsz, 0); + // Split the output buffer into Y, U and V at the plane boundaries + let (y_plane, uv_plane) = outbuf.split_at_mut(width * height); + let (u_plane, v_plane) = uv_plane.split_at_mut((width / 2) * (height / 2)); + yuv::YuvPlanarImageMut { + y_plane: yuv::BufferStoreMut::Borrowed(y_plane), + y_stride: width as u32, + u_plane: yuv::BufferStoreMut::Borrowed(u_plane), + u_stride: (width / 2) as u32, + v_plane: yuv::BufferStoreMut::Borrowed(v_plane), + v_stride: (width / 2) as u32, + width: width as u32, + height: height as u32, + } +} diff --git a/backend/src/probe.rs b/backend/src/probe.rs new file mode 100644 index 0000000..1aa331e --- /dev/null +++ b/backend/src/probe.rs @@ -0,0 +1,7 @@ +use yuvutils_rs::{YuvBiPlanarImageMut, rgb_to_yuv_nv12, YuvRange, YuvStandardMatrix, YuvConversionMode}; + +fn main() { + let _ = YuvRange::Limited; + let _ = YuvStandardMatrix::Bt709; + // Let's guess simple names first, compiler will correct me +} diff --git a/frontend/src/transport.ts b/frontend/src/transport.ts index e027cab..66b6e7a 100644 --- a/frontend/src/transport.ts +++ b/frontend/src/transport.ts @@ -5,8 +5,7 @@ export function createDecoder(f: (frame: VideoFrame) => void): VideoDecoder { }) decoder.configure({ - codec: 'av01.0.04M.08', - optimizeForLatency: true + codec: 'vp09.00.10.08', }); return decoder;