feat: poc

2025-12-29 00:13:37 +09:00
parent ddf9aec981
commit c668a2bcd8
7 changed files with 177 additions and 170 deletions
--- a/backend/.gitignore
+++ b/backend/.gitignore
@@ -1 +1,2 @@
 /target
+video.mp4
--- a/backend/Cargo.toml
+++ b/backend/Cargo.toml
@@ -4,7 +4,15 @@ version = "0.1.0"
 edition = "2024"

 [dependencies]
-ffmpeg-next = "8.0.0"
+crossbeam = { version = "0.8.4", features = ["crossbeam-channel"] }
 futures-util = "0.3.31"
+parking_lot = "0.12.5"
+rav1e = "0.8.1"
+scap = "0.0.8"
+scopeguard = "1.2.0"
 tokio = { version = "1.48.0", features = ["full"] }
 tokio-tungstenite = "0.28.0"
+v_frame = "0.3.9"
+vpx-rs = "0.2.1"
+yuv = "0.8.9"
+yuvutils-rs = "0.8.3"
--- a/backend/flake.nix
+++ b/backend/flake.nix
@@ -16,7 +16,9 @@
        ];
        
        buildInputs = with pkgs; [
-            ffmpeg.dev
+            pipewire
+            dbus
+            libvpx
        ];

          shellHook = ''
--- a/backend/src/main.rs
+++ b/backend/src/main.rs
@@ -1,13 +1,23 @@
-use ffmpeg_next as ffmpeg;
+use std::borrow::Borrow;
+use std::num::NonZero;
+use std::thread;
+use std::time::Instant;
+
 use futures_util::{SinkExt, StreamExt};
+use rav1e::config::SpeedSettings;
+use rav1e::data::Rational;
+use scap::capturer::{Area, Capturer, Options, Point};
+use scap::frame::FrameType;
 use tokio::net::{TcpListener, TcpStream};
 use tokio_tungstenite::accept_async;
 use tokio_tungstenite::tungstenite::Message;
+use vpx_rs::enc::{CodecId, CompressedFrameFlags, EncoderProfile};
+use vpx_rs::{EncoderFlags, RateControl, Timebase, YUVImageData};

-#[tokio::main]
+mod pixelutil;
+
+#[tokio::main(flavor = "multi_thread")]
 async fn main() -> Result<(), Box<dyn std::error::Error>> {
-    ffmpeg::init()?;
-
    let addr = "0.0.0.0:8080";
    let listener = TcpListener::bind(&addr).await?;
    println!("Listening on: {}", addr);
@@ -37,14 +47,14 @@ async fn accept_connection(stream: TcpStream) {

    let (mut write, mut read) = ws_stream.split();

-    // Channel to communicate between the ffmpeg thread and the websocket task
+    // Channel to communicate between the video source thread and the websocket task
    // We send (data, is_key)
    let (tx, mut rx) = tokio::sync::mpsc::channel::<(Vec<u8>, bool)>(100);

-    // Spawn a blocking thread for FFmpeg processing
+    // Spawn a blocking thread for video processing (currently just a placeholder loop)
    std::thread::spawn(move || {
        if let Err(e) = process_video(tx) {
-            eprintln!("FFmpeg processing error: {}", e);
+            eprintln!("Video processing error: {}", e);
        }
    });

@@ -95,177 +105,85 @@ async fn accept_connection(stream: TcpStream) {
 fn process_video(
    tx: tokio::sync::mpsc::Sender<(Vec<u8>, bool)>,
 ) -> Result<(), Box<dyn std::error::Error>> {
-    ffmpeg::device::register_all();
-    let mut dictionary = ffmpeg::Dictionary::new();
-    dictionary.set("framerate", "30");
-    dictionary.set("video_size", "1920x1080");
+    let options = Options {
+        fps: 30,
+        target: None,
+        show_cursor: true,
+        show_highlight: true,
+        excluded_targets: None,
+        output_type: FrameType::YUVFrame,
+        output_resolution: scap::capturer::Resolution::_1080p,
+        crop_area: Some(Area {
+            origin: Point { x: 0.0, y: 0.0 },
+            size: scap::capturer::Size {
+                width: 1920.0,
+                height: 1080.0,
+            },
+        }),
+        ..Default::default()
+    };

-    // Find the gdigrab input format (Windows)
-    // ffmpeg::format::format::find returns Option<ffmpeg::format::format::Input> in some versions?
-    // Let's try to use the device directly if possible or finding the demuxer.
-    // Based on errors, let's try assuming ffmpeg::format::format::find exists and works for inputs.
-    // If not, we might need ffmpeg::format::demuxer.
+    let mut config: vpx_rs::EncoderConfig<u8> = vpx_rs::EncoderConfig::new(
+        CodecId::VP9,
+        1920,
+        1080,
+        Timebase {
+            num: NonZero::new(1).unwrap(),
+            den: NonZero::new(30).unwrap(),
+        },
+        RateControl::ConstantBitRate(500),
+    )?;
+    config.threads = 32;
+    config.lag_in_frames = 0;

-    // We'll assume the error 'private module input' implies 'ffmpeg::format::format::input' is private
-    // so we can't look inside it. But 'ffmpeg::format::format' might have 'find'.
+    let mut encoder = vpx_rs::Encoder::new(config)?;

-    // Actually, let's try a different approach:
-    // If we can't find the format easily, maybe we can just use "gdigrab" as the format name if we had a way to convert string to Format.
+    let mut capturer = Capturer::build(options)?;
+    capturer.start_capture();

-    // We cannot easily look up "gdigrab" by name due to API limitations in the safe wrapper or versioning.
-    // However, if we enable all devices, ffmpeg might be able to detect it via input().
+    let (frame_tx, frame_rx) = crossbeam::channel::bounded::<Vec<u8>>(1);

-    // Another trick: We can manually iterate via `ffmpeg::format::format::Input::next()` if we could access it, but it's hidden.
+    thread::spawn(move || {
+        loop {
+            let instant = Instant::now();
+            if let Ok(captured_frame) = capturer.get_next_frame() {
+                let output_frame = pixelutil::frame_to_yuv(captured_frame);

-    // Let's try to bypass the explicit format finding by using `ffmpeg::format::input_with_dictionary`
-    // but we need to specify the format. Wait, `input_with_dictionary` takes a path.
-    // If the path is prefixed with "gdigrab:", maybe it auto-detects? No, gdigrab is a format.
+                let _ = frame_tx.try_send(output_frame);

-    // There IS a `ffmpeg::device::input::video` which might help?
-    // Let's check if we can use the `av_find_input_format` ffi directly if safe wrapper fails us.
-    // But that requires `unsafe`.
-
-    // Ideally we should use:
-    // `ffmpeg::format::format::list()` but it is gated by `ffmpeg_5_0` feature being NOT enabled?
-    // Wait, the error said `list` is not found, and the code I Grepped says `#[cfg(not(feature = "ffmpeg_5_0"))]`.
-    // If we are on ffmpeg 5.0+, then `av_register_all` is gone and iterating formats is different.
-
-    // If we are on newer FFmpeg, we might not need to look it up manually if we can hint it.
-    // But `open_with` needs `&Format`.
-
-    // Let's assume we can use `ffmpeg::device::input::video` if it exists?
-    // Check `ffmpeg::device` module content.
-
-    // Fallback: Use `ffmpeg::format::input(&path)` but force format via dictionary? No, dictionary is options.
-
-    // Actually, look at `ffmpeg::format::open_with`: it takes `&Format`.
-    // We MUST find the format.
-
-    // Since `list()` is missing, maybe we are on a version > 5.0 feature-wise?
-    // The crate is version 8.0.0.
-
-    // Let's try using `ffmpeg::format::Input` directly if there's a way to construct it.
-    // No.
-
-    // What if we try `ffmpeg::device::input::video()`?
-    // Let's check `ffmpeg::device` capabilities.
-
-    // For now, let's try a gross hack:
-    // If `list()` is unavailable, it means we probably can't iterate.
-    // But we might be able to use `ffmpeg::ffi::av_find_input_format`.
-
-    unsafe {
-        let name = std::ffi::CString::new("gdigrab").unwrap();
-        let ptr = ffmpeg::ffi::av_find_input_format(name.as_ptr());
-        if ptr.is_null() {
-            return Err(ffmpeg::Error::DemuxerNotFound.into());
-        }
-
-        let format_input = ffmpeg::format::format::Input::wrap(ptr as *mut _);
-        // We need to wrap Input into Format, but Format might be private in some contexts or re-exported.
-        // It is defined in `ffmpeg::format::format::mod.rs` as `pub enum Format`.
-        // And it is re-exported in `ffmpeg` root? No, `ffmpeg::format::Format` should be public.
-        // The error says `ffmpeg::format::Format` is private?
-        // Ah, `use {Dictionary, Error, Format};` in `src/format/mod.rs` means it imports from parent/root?
-        // No, `pub mod format` defines `Format` enum.
-
-        // Let's try `ffmpeg::format::format::Format::Input`
-        let format = ffmpeg::format::format::Format::Input(format_input);
-
-        // Now we have the format, proceed.
-        // Note: `Input::wrap` is `unsafe`.
-
-        let context =
-            ffmpeg::format::open_with(&std::path::Path::new("desktop"), &format, dictionary)?;
-
-        let mut ictx = match context {
-            ffmpeg::format::context::Context::Input(ictx) => ictx,
-            _ => return Err(ffmpeg::Error::DemuxerNotFound.into()),
-        };
-
-        let input_stream = ictx
-            .streams()
-            .best(ffmpeg::media::Type::Video)
-            .ok_or(ffmpeg::Error::StreamNotFound)?;
-        let input_stream_index = input_stream.index();
-
-        let decoder_ctx =
-            ffmpeg::codec::context::Context::from_parameters(input_stream.parameters())?;
-        let mut decoder = decoder_ctx.decoder().video()?;
-
-        // Setup Encoder
-        // Try to find libsvtav1 or fallback to AV1 generic
-        let codec = ffmpeg::codec::encoder::find_by_name("libsvtav1")
-            .or_else(|| ffmpeg::codec::encoder::find(ffmpeg::codec::Id::AV1))
-            .ok_or(ffmpeg::Error::EncoderNotFound)?;
-
-        let output_ctx = ffmpeg::codec::context::Context::new();
-        let mut encoder_builder = output_ctx.encoder().video()?;
-
-        // We will scale to YUV420P because it's widely supported and good for streaming
-        encoder_builder.set_format(ffmpeg::format::Pixel::YUV420P);
-        encoder_builder.set_width(decoder.width());
-        encoder_builder.set_height(decoder.height());
-        encoder_builder.set_time_base(input_stream.time_base());
-        encoder_builder.set_frame_rate(Some(input_stream.rate()));
-
-        let mut encoder = encoder_builder.open_as(codec)?;
-
-        // Scaler to convert whatever input to YUV420P
-        let mut scaler = ffmpeg::software::scaling::context::Context::get(
-            decoder.format(),
-            decoder.width(),
-            decoder.height(),
-            ffmpeg::format::Pixel::YUV420P,
-            decoder.width(),
-            decoder.height(),
-            ffmpeg::software::scaling::flag::Flags::BILINEAR,
-        )?;
-
-        // Send packet function closure not easy due to ownership, doing inline
-
-        for (stream, packet) in ictx.packets() {
-            if stream.index() == input_stream_index {
-                decoder.send_packet(&packet)?;
-
-                let mut decoded = ffmpeg::util::frame::Video::empty();
-                while decoder.receive_frame(&mut decoded).is_ok() {
-                    // Scale frame
-                    let mut scaled = ffmpeg::util::frame::Video::empty();
-                    scaler.run(&decoded, &mut scaled)?;
-
-                    // Set pts for the scaled frame to match decoded
-                    scaled.set_pts(decoded.pts());
-
-                    // Send to encoder
-                    encoder.send_frame(&scaled)?;
-
-                    // Receive encoded packets
-                    let mut encoded = ffmpeg::Packet::empty();
-                    while encoder.receive_packet(&mut encoded).is_ok() {
-                        let is_key = encoded.is_key();
-                        let data = encoded.data().ok_or("Empty packet data")?.to_vec();
-
-                        // Blocking send to the tokio channel
-                        if tx.blocking_send((data, is_key)).is_err() {
-                            return Ok(()); // Receiver dropped
-                        }
-                    }
+                let elapsed = instant.elapsed();
+                let frame_duration = std::time::Duration::from_millis(33); // ~30 FPS
+                if elapsed < frame_duration {
+                    std::thread::sleep(frame_duration - elapsed);
                }
            }
        }
+    });

-        // Flush encoder
-        encoder.send_eof()?;
-        let mut encoded = ffmpeg::Packet::empty();
-        while encoder.receive_packet(&mut encoded).is_ok() {
-            let is_key = encoded.is_key();
-            let data = encoded.data().ok_or("Empty packet data")?.to_vec();
-            if tx.blocking_send((data, is_key)).is_err() {
-                return Ok(());
+    loop {
+        let yuv_frame_raw = frame_rx.recv()?;
+        let yuv_frame = pixelutil::apply_frame(&yuv_frame_raw, 1920, 1080);
+
+        let pts = 0;
+        let duration = 1;
+        let deadline = vpx_rs::EncodingDeadline::Realtime;
+        let flags = vpx_rs::EncoderFrameFlags::empty();
+
+        let packets = encoder.encode(pts, duration, yuv_frame, deadline, flags)?;
+
+        for packet in packets {
+            match packet {
+                vpx_rs::Packet::CompressedFrame(frame) => {
+                    let is_key = frame.flags.is_key;
+                    let data = frame.data.to_vec();
+                    println!("encoded frame: size={}, is_key={}", data.len(), is_key);
+                    if let Err(e) = tx.blocking_send((data, is_key)) {
+                        eprintln!("Error sending encoded frame: {}", e);
+                        return Err(Box::new(e));
+                    }
+                }
+                _ => {}
            }
        }
    }
-
-    Ok(())
 }
--- a/backend/src/pixelutil.rs
+++ b/backend/src/pixelutil.rs
@@ -0,0 +1,72 @@
+use std::time::Instant;
+
+use scap::frame::Frame as ScapFrame;
+use vpx_rs::{
+    ImageFormat, YUVImageData, YUVImageDataOwned,
+    image::{
+        UVImagePlanes, UVImagePlanesInterleaved, UVImagePlanesInterleavedMut, UVImagePlanesMut,
+    },
+};
+use yuv::{YuvBiPlanarImageMut, YuvPlanarImageMut};
+
+pub fn frame_to_yuv<'a>(captured_frame: ScapFrame) -> Vec<u8> {
+    match captured_frame {
+        ScapFrame::BGRx(bgrx_image) => {
+            let width = bgrx_image.width as u32;
+            let height = bgrx_image.height as u32;
+            // let mut planar_image =
+            //     YuvBiPlanarImageMut::alloc(width, height, yuv::YuvChromaSubsampling::Yuv420);
+
+            let mut buf = Vec::new();
+
+            //println!("Converting BGRx frame: width={}, height={}", width, height);
+            let start = Instant::now();
+
+            let r = yuv::bgra_to_yuv420(
+                &mut wrap_yuv420_buf(width as usize, height as usize, &mut buf),
+                &bgrx_image.data,
+                width * 4,
+                yuv::YuvRange::Full,
+                yuv::YuvStandardMatrix::Bt601,
+                yuv::YuvConversionMode::Balanced,
+            );
+
+            //println!("BGRx to YUV420 conversion took {:?}", start.elapsed());
+
+            //println!("Copy took {:?}", start.elapsed());
+
+            buf
+        }
+        _ => {
+            panic!("Unsupported frame format");
+        }
+    }
+}
+
+pub fn apply_frame<'a>(data: &'a [u8], width: usize, height: usize) -> YUVImageData<'a, u8> {
+    YUVImageData::from_raw_data(ImageFormat::I420, width as usize, height as usize, data).unwrap()
+}
+
+fn wrap_yuv420_buf(
+    width: usize,
+    height: usize,
+    outbuf: &mut Vec<u8>,
+) -> yuv::YuvPlanarImageMut<'_, u8> {
+    let bufsz = ImageFormat::I420
+        .buffer_len(width, height)
+        .unwrap_or_else(|_| panic!("Invalid {width} or {height}"));
+    outbuf.resize(bufsz, 0);
+    // Split the output buffer into Y, U and V at the plane boundaries
+    let (y_plane, uv_plane) = outbuf.split_at_mut(width * height);
+    let (u_plane, v_plane) = uv_plane.split_at_mut((width / 2) * (height / 2));
+    yuv::YuvPlanarImageMut {
+        y_plane: yuv::BufferStoreMut::Borrowed(y_plane),
+        y_stride: width as u32,
+        u_plane: yuv::BufferStoreMut::Borrowed(u_plane),
+        u_stride: (width / 2) as u32,
+        v_plane: yuv::BufferStoreMut::Borrowed(v_plane),
+        v_stride: (width / 2) as u32,
+        width: width as u32,
+        height: height as u32,
+    }
+}
--- a/backend/src/probe.rs
+++ b/backend/src/probe.rs
@@ -0,0 +1,7 @@
+use yuvutils_rs::{YuvBiPlanarImageMut, rgb_to_yuv_nv12, YuvRange, YuvStandardMatrix, YuvConversionMode};
+
+fn main() {
+   let _ = YuvRange::Limited;
+   let _ = YuvStandardMatrix::Bt709;
+   // Let's guess simple names first, compiler will correct me
+}