From 3cf8eac16380f3409746c9b3dc947b5f6a593016 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9mence=20Lesn=C3=A9?= Date: Wed, 8 Apr 2026 18:11:06 +0200 Subject: [PATCH 1/4] feat: add comprehensive benchmark infrastructure for remote viewing pipeline MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add Criterion.rs benchmarks (via codspeed-criterion-compat) covering the full remote viewing hot path: color space conversion, VP8/VP9/AV1 encode/decode, protobuf serialization, video queue, mutex contention patterns, and end-to-end encode/decode pipelines. All benchmarks call the real implementation (encode_to_message, Decoder::handle_video_frame, etc.) with synthetic inputs — zero logic duplication, zero maintenance when the implementation changes. Also adds: - CI integration via reusable workflow (_bench.yml) with CodSpeed - Shared composite action for Linux build environment setup - Path-based filtering to run benchmarks only when libs/scrap or libs/hbb_common change Co-Authored-By: Claude Opus 4.6 (1M context) --- .github/actions/setup-linux/action.yml | 81 +++++++ .github/workflows/_bench.yml | 30 +++ .github/workflows/ci.yml | 260 ++++---------------- Cargo.lock | 249 +++++++++++++++++-- libs/scrap/Cargo.toml | 38 +++ libs/scrap/benches/codec_decode.rs | 238 ++++++++++++++++++ libs/scrap/benches/codec_encode.rs | 230 ++++++++++++++++++ libs/scrap/benches/common/mod.rs | 319 +++++++++++++++++++++++++ libs/scrap/benches/convert.rs | 210 ++++++++++++++++ libs/scrap/benches/mutex_contention.rs | 182 ++++++++++++++ libs/scrap/benches/pipeline_decode.rs | 185 ++++++++++++++ libs/scrap/benches/pipeline_encode.rs | 215 +++++++++++++++++ libs/scrap/benches/protobuf.rs | 127 ++++++++++ libs/scrap/benches/video_queue.rs | 151 ++++++++++++ libs/scrap/benches/yuv_to_rgb.rs | 193 +++++++++++++++ 15 files changed, 2478 insertions(+), 230 deletions(-) create mode 100644 .github/actions/setup-linux/action.yml create mode 100644 .github/workflows/_bench.yml create mode 100644 libs/scrap/benches/codec_decode.rs create mode 100644 libs/scrap/benches/codec_encode.rs create mode 100644 libs/scrap/benches/common/mod.rs create mode 100644 libs/scrap/benches/convert.rs create mode 100644 libs/scrap/benches/mutex_contention.rs create mode 100644 libs/scrap/benches/pipeline_decode.rs create mode 100644 libs/scrap/benches/pipeline_encode.rs create mode 100644 libs/scrap/benches/protobuf.rs create mode 100644 libs/scrap/benches/video_queue.rs create mode 100644 libs/scrap/benches/yuv_to_rgb.rs diff --git a/.github/actions/setup-linux/action.yml b/.github/actions/setup-linux/action.yml new file mode 100644 index 000000000..98d59df00 --- /dev/null +++ b/.github/actions/setup-linux/action.yml @@ -0,0 +1,81 @@ +name: Setup Linux build environment +description: Shared setup for CI and benchmarks — installs system deps, vcpkg, Rust toolchain, and caches. + +inputs: + vcpkg-commit-id: + description: vcpkg Git commit to pin + default: "120deac3062162151622ca4860575a33844ba10b" + rust-toolchain: + description: Rust toolchain channel + default: stable + +runs: + using: composite + steps: + - name: Free Disk Space + uses: jlumbroso/free-disk-space@v1.3.1 + with: + tool-cache: false + android: true + dotnet: true + haskell: true + large-packages: false + docker-images: true + swap-storage: false + + - name: Export GitHub Actions cache environment variables + uses: actions/github-script@v6 + with: + script: | + core.exportVariable('ACTIONS_CACHE_URL', process.env.ACTIONS_CACHE_URL || ''); + core.exportVariable('ACTIONS_RUNTIME_TOKEN', process.env.ACTIONS_RUNTIME_TOKEN || ''); + + - name: Checkout source code + uses: actions/checkout@v4 + with: + submodules: recursive + + - name: Install prerequisites + shell: bash + run: | + sudo apt-get -y update + sudo apt-get install -y \ + clang \ + cmake \ + curl \ + gcc \ + git \ + g++ \ + libpam0g-dev \ + libasound2-dev \ + libunwind-dev \ + libgstreamer1.0-dev \ + libgstreamer-plugins-base1.0-dev \ + libgtk-3-dev \ + libpulse-dev \ + libva-dev \ + libvdpau-dev \ + libxcb-randr0-dev \ + libxcb-shape0-dev \ + libxcb-xfixes0-dev \ + libxdo-dev \ + libxfixes-dev \ + nasm \ + wget + + - name: Setup vcpkg with Github Actions binary cache + uses: lukka/run-vcpkg@v11 + with: + vcpkgDirectory: /opt/artifacts/vcpkg + vcpkgGitCommitId: ${{ inputs.vcpkg-commit-id }} + + - name: Install vcpkg dependencies + shell: bash + run: $VCPKG_ROOT/vcpkg install --x-install-root="$VCPKG_ROOT/installed" + + - name: Install Rust toolchain + uses: dtolnay/rust-toolchain@v1 + with: + toolchain: ${{ inputs.rust-toolchain }} + + - uses: Swatinem/rust-cache@v2 diff --git a/.github/workflows/_bench.yml b/.github/workflows/_bench.yml new file mode 100644 index 000000000..231aeb1d7 --- /dev/null +++ b/.github/workflows/_bench.yml @@ -0,0 +1,30 @@ +name: Benchmarks + +on: + workflow_call: + workflow_dispatch: + +permissions: + contents: read + id-token: write # CodSpeed OIDC authentication + +env: + VCPKG_BINARY_SOURCES: "clear;x-gha,readwrite" + +jobs: + benchmarks: + name: Performance benchmarks + runs-on: ubuntu-latest + steps: + - uses: ./.github/actions/setup-linux + + - name: Install cargo-codspeed + run: cargo install cargo-codspeed + + - name: Build benchmarks + run: cargo codspeed build --package scrap + + - name: Run benchmarks + uses: CodSpeedHQ/action@v4 + with: + run: cargo codspeed run --package scrap diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 3a7d21d7e..67310ec1e 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -1,10 +1,7 @@ name: CI env: -# MIN_SUPPORTED_RUST_VERSION: "1.46.0" -# CICD_INTERMEDIATES_DIR: "_cicd-intermediates" VCPKG_BINARY_SOURCES: "clear;x-gha,readwrite" - # for multiarch gcc compatibility VCPKG_COMMIT_ID: "120deac3062162151622ca4860575a33844ba10b" on: @@ -25,46 +22,6 @@ on: - "flatpak/**" jobs: - # ensure_cargo_fmt: - # name: Ensure 'cargo fmt' has been run - # runs-on: ubuntu-20.04 - # steps: - # - uses: actions-rs/toolchain@v1 - # with: - # toolchain: stable - # default: true - # profile: minimal - # components: rustfmt - # - uses: actions/checkout@v3 - # - run: cargo fmt -- --check - - # min_version: - # name: Minimum supported rust version - # runs-on: ubuntu-20.04 - # steps: - # - name: Checkout source code - # uses: actions/checkout@v3 - # with: - # submodules: recursive - - # - name: Install rust toolchain (v${{ env.MIN_SUPPORTED_RUST_VERSION }}) - # uses: actions-rs/toolchain@v1 - # with: - # toolchain: ${{ env.MIN_SUPPORTED_RUST_VERSION }} - # default: true - # profile: minimal # minimal component installation (ie, no documentation) - # components: clippy - # - name: Run clippy (on minimum supported rust version to prevent warnings we can't fix) - # uses: actions-rs/cargo@v1 - # with: - # command: clippy - # args: --locked --all-targets --all-features -- --allow clippy::unknown_clippy_lints - # - name: Run tests - # uses: actions-rs/cargo@v1 - # with: - # command: test - # args: --locked - build: name: ${{ matrix.job.target }} (${{ matrix.job.os }}) runs-on: ${{ matrix.job.os }} @@ -72,179 +29,60 @@ jobs: fail-fast: false matrix: job: - # - { target: aarch64-unknown-linux-gnu , os: ubuntu-20.04, use-cross: true } - # - { target: arm-unknown-linux-gnueabihf , os: ubuntu-20.04, use-cross: true } - # - { target: arm-unknown-linux-musleabihf, os: ubuntu-20.04, use-cross: true } - # - { target: i686-pc-windows-msvc , os: windows-2022 } - # - { target: i686-unknown-linux-gnu , os: ubuntu-20.04, use-cross: true } - # - { target: i686-unknown-linux-musl , os: ubuntu-20.04, use-cross: true } - # - { target: x86_64-apple-darwin , os: macos-10.15 } - # - { target: x86_64-pc-windows-gnu , os: windows-2022 } - # - { target: x86_64-pc-windows-msvc , os: windows-2022 } - - { target: x86_64-unknown-linux-gnu , os: ubuntu-24.04 } - # - { target: x86_64-unknown-linux-musl , os: ubuntu-20.04, use-cross: true } + - { target: x86_64-unknown-linux-gnu, os: ubuntu-24.04 } steps: - - name: Free Disk Space (Ubuntu) - if: runner.os == 'Linux' - # jlumbroso/free-disk-space@main is used in .github\workflows\flutter-build.yml - # But pinning to a specific version to avoid unexpected issues is preferred. - uses: jlumbroso/free-disk-space@v1.3.1 - with: - tool-cache: false - android: true - dotnet: true - haskell: true - large-packages: false - docker-images: true - swap-storage: false + - uses: ./.github/actions/setup-linux - - name: Export GitHub Actions cache environment variables - uses: actions/github-script@v6 - with: - script: | - core.exportVariable('ACTIONS_CACHE_URL', process.env.ACTIONS_CACHE_URL || ''); - core.exportVariable('ACTIONS_RUNTIME_TOKEN', process.env.ACTIONS_RUNTIME_TOKEN || ''); + - name: Show version information (Rust, cargo, GCC) + shell: bash + run: | + gcc --version || true + rustup -V + rustup toolchain list + rustup default + cargo -V + rustc -V - - name: Checkout source code - uses: actions/checkout@v4 - with: - submodules: recursive + - name: Build + run: cargo build --locked --target=${{ matrix.job.target }} - - name: Install prerequisites - shell: bash - run: | - case ${{ matrix.job.target }} in - x86_64-unknown-linux-gnu) - sudo apt-get -y update - sudo apt-get install -y \ - clang \ - cmake \ - curl \ - gcc \ - git \ - g++ \ - libpam0g-dev \ - libasound2-dev \ - libunwind-dev \ - libgstreamer1.0-dev \ - libgstreamer-plugins-base1.0-dev \ - libgtk-3-dev \ - libpulse-dev \ - libva-dev \ - libvdpau-dev \ - libxcb-randr0-dev \ - libxcb-shape0-dev \ - libxcb-xfixes0-dev \ - libxdo-dev \ - libxfixes-dev \ - nasm \ - wget - ;; - # arm-unknown-linux-*) sudo apt-get -y update ; sudo apt-get -y install gcc-arm-linux-gnueabihf ;; - # aarch64-unknown-linux-gnu) sudo apt-get -y update ; sudo apt-get -y install gcc-aarch64-linux-gnu ;; - esac + - name: Clean + run: cargo clean - - name: Setup vcpkg with Github Actions binary cache - uses: lukka/run-vcpkg@v11 - with: - vcpkgDirectory: /opt/artifacts/vcpkg - vcpkgGitCommitId: ${{ env.VCPKG_COMMIT_ID }} + - name: Set testing options + id: test-options + shell: bash + run: | + unset CARGO_TEST_OPTIONS + case ${{ matrix.job.target }} in + arm-* | aarch64-*) + CARGO_TEST_OPTIONS="--lib --bin ${PROJECT_NAME}" + ;; + *) + CARGO_TEST_OPTIONS="--workspace --no-fail-fast -- --skip test_get_cursor_pos --skip test_get_key_state" + ;; + esac; + echo "CARGO_TEST_OPTIONS=${CARGO_TEST_OPTIONS}" >> "$GITHUB_OUTPUT" - - name: Install vcpkg dependencies - run: | - $VCPKG_ROOT/vcpkg install --x-install-root="$VCPKG_ROOT/installed" - shell: bash + - name: Run tests + run: cargo test --locked --target=${{ matrix.job.target }} ${{ steps.test-options.outputs.CARGO_TEST_OPTIONS }} - - name: Install Rust toolchain - uses: dtolnay/rust-toolchain@v1 - with: - toolchain: stable - targets: ${{ matrix.job.target }} - components: '' + check-bench-paths: + name: Check benchmark-relevant changes + runs-on: ubuntu-latest + outputs: + should-bench: ${{ steps.check.outputs.run }} + steps: + - uses: actions/checkout@v4 + with: + fetch-depth: 2 + - id: check + run: | + CHANGED=$(git diff --name-only HEAD~1 HEAD -- libs/scrap/ libs/hbb_common/ 2>/dev/null || echo "") + echo "run=$( [ -n "$CHANGED" ] && echo true || echo false )" >> "$GITHUB_OUTPUT" - - name: Show version information (Rust, cargo, GCC) - shell: bash - run: | - gcc --version || true - rustup -V - rustup toolchain list - rustup default - cargo -V - rustc -V - - - uses: Swatinem/rust-cache@v2 - - - name: Build - uses: actions-rs/cargo@v1 - with: - use-cross: ${{ matrix.job.use-cross }} - command: build - args: --locked --target=${{ matrix.job.target }} - - - name: clean - shell: bash - run: | - cargo clean - - # - name: Strip debug information from executable - # id: strip - # shell: bash - # run: | - # # Figure out suffix of binary - # EXE_suffix="" - # case ${{ matrix.job.target }} in - # *-pc-windows-*) EXE_suffix=".exe" ;; - # esac; - - # # Figure out what strip tool to use if any - # STRIP="strip" - # case ${{ matrix.job.target }} in - # arm-unknown-linux-*) STRIP="arm-linux-gnueabihf-strip" ;; - # aarch64-unknown-linux-gnu) STRIP="aarch64-linux-gnu-strip" ;; - # *-pc-windows-msvc) STRIP="" ;; - # esac; - - # # Setup paths - # BIN_DIR="${{ env.CICD_INTERMEDIATES_DIR }}/stripped-release-bin/" - # mkdir -p "${BIN_DIR}" - # BIN_NAME="${{ env.PROJECT_NAME }}${EXE_suffix}" - # BIN_PATH="${BIN_DIR}/${BIN_NAME}" - - # # Copy the release build binary to the result location - # cp "target/${{ matrix.job.target }}/release/${BIN_NAME}" "${BIN_DIR}" - - # # Also strip if possible - # if [ -n "${STRIP}" ]; then - # "${STRIP}" "${BIN_PATH}" - # fi - - # # Let subsequent steps know where to find the (stripped) bin - # echo ::set-output name=BIN_PATH::${BIN_PATH} - # echo ::set-output name=BIN_NAME::${BIN_NAME} - - - name: Set testing options - id: test-options - shell: bash - run: | - # test only library unit tests and binary for arm-type targets - unset CARGO_TEST_OPTIONS - - case ${{ matrix.job.target }} in - arm-* | aarch64-*) - CARGO_TEST_OPTIONS="--lib --bin ${PROJECT_NAME}" - ;; - *) - CARGO_TEST_OPTIONS="--workspace --no-fail-fast -- --skip test_get_cursor_pos --skip test_get_key_state" - ;; - esac; - - #deprecated echo ::set-output name=CARGO_TEST_OPTIONS::${CARGO_TEST_OPTIONS} - echo "CARGO_TEST_OPTIONS=${CARGO_TEST_OPTIONS}" >> $GITHUB_ENV - echo "CARGO_TEST_OPTIONS=${CARGO_TEST_OPTIONS}" >> $GITHUB_OUTPUT - - - name: Run tests - uses: actions-rs/cargo@v1 - with: - use-cross: ${{ matrix.job.use-cross }} - command: test - args: --locked --target=${{ matrix.job.target }} ${{ steps.test-options.outputs.CARGO_TEST_OPTIONS}} + benchmarks: + name: Benchmarks + needs: [build, check-bench-paths] + if: needs.check-bench-paths.outputs.should-bench == 'true' + uses: ./.github/workflows/_bench.yml diff --git a/Cargo.lock b/Cargo.lock index febfd6b17..886c5e7fd 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -225,6 +225,12 @@ dependencies = [ "libc", ] +[[package]] +name = "anes" +version = "0.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4b46cbb362ab8752921c97e041f5e366ee6297bd428a31275b9fcf1e380f7299" + [[package]] name = "ansi_term" version = "0.12.1" @@ -289,6 +295,15 @@ version = "1.0.98" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e16d2d3311acee920a9eb8d33b8cbc1787ce4a264e85f964c2404b969bdcd487" +[[package]] +name = "approx" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cab112f0a86d568ea0e627cc1d6be74a1e9cd55214684db5561995f6dad897c6" +dependencies = [ + "num-traits 0.2.19", +] + [[package]] name = "arboard" version = "3.4.0" @@ -1081,6 +1096,12 @@ dependencies = [ "wayland-client", ] +[[package]] +name = "cast" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "37b2a672a2cb129a2e41c10b1224bb368f9f37a2b16b612598138befd7b37eb5" + [[package]] name = "cbc" version = "0.1.2" @@ -1200,6 +1221,33 @@ dependencies = [ "windows-link 0.1.1", ] +[[package]] +name = "ciborium" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "42e69ffd6f0917f5c029256a24d0161db17cea3997d185db0d35926308770f0e" +dependencies = [ + "ciborium-io", + "ciborium-ll", + "serde 1.0.228", +] + +[[package]] +name = "ciborium-io" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "05afea1e0a06c9be33d539b876f1ce3692f4afea2cb41f740e7743225ed1c757" + +[[package]] +name = "ciborium-ll" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "57663b653d948a338bfb3eeba9bb2fd5fcfaecb9e199e87e1eda4d9e8b240fd9" +dependencies = [ + "ciborium-io", + "half", +] + [[package]] name = "cidr-utils" version = "0.5.11" @@ -1426,6 +1474,64 @@ dependencies = [ "objc", ] +[[package]] +name = "codspeed" +version = "4.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b684e94583e85a5ca7e1a6454a89d76a5121240f2fb67eb564129d9bafdb9db0" +dependencies = [ + "anyhow", + "cc", + "colored", + "getrandom 0.2.15", + "glob", + "libc", + "nix 0.31.2", + "serde 1.0.228", + "serde_json 1.0.149", + "statrs", +] + +[[package]] +name = "codspeed-criterion-compat" +version = "4.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2e65444156eb73ad7f57618188f8d4a281726d133ef55b96d1dcff89528609ab" +dependencies = [ + "clap 4.5.53", + "codspeed", + "codspeed-criterion-compat-walltime", + "colored", + "regex", +] + +[[package]] +name = "codspeed-criterion-compat-walltime" +version = "4.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "96389aaa4bbb872ea4924dc0335b2bb181bcf28d6eedbe8fea29afcc5bde36a6" +dependencies = [ + "anes", + "cast", + "ciborium", + "clap 4.5.53", + "codspeed", + "criterion-plot", + "is-terminal", + "itertools 0.10.5", + "num-traits 0.2.19", + "once_cell", + "oorandom", + "plotters", + "rayon", + "regex", + "serde 1.0.228", + "serde_derive", + "serde_json 1.0.149", + "tinytemplate", + "walkdir", +] + [[package]] name = "color_quant" version = "1.1.0" @@ -1438,6 +1544,16 @@ version = "1.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0b6a852b24ab71dffc585bcb46eaf7959d175cb865a7152e35b348d1b2960422" +[[package]] +name = "colored" +version = "2.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "117725a109d387c937a1533ce01b450cbde6b88abceea8473c4d7a85853cda3c" +dependencies = [ + "lazy_static", + "windows-sys 0.52.0", +] + [[package]] name = "combine" version = "4.6.7" @@ -1793,6 +1909,16 @@ dependencies = [ "cfg-if 1.0.0", ] +[[package]] +name = "criterion-plot" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6b50826342786a51a89e2da3a28f1c32b06e387201bc2d19791f622c673706b1" +dependencies = [ + "cast", + "itertools 0.10.5", +] + [[package]] name = "crossbeam-channel" version = "0.5.13" @@ -2694,7 +2820,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "39cab71617ae0d63f51a36d69f866391735b51691dbda63cf6f96d042b63efeb" dependencies = [ "libc", - "windows-sys 0.52.0", + "windows-sys 0.61.2", ] [[package]] @@ -3490,9 +3616,9 @@ dependencies = [ [[package]] name = "glob" -version = "0.3.1" +version = "0.3.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d2fabcfbdc87f4758337ca535fb41a6d701b65693ce38287d856d1674551ec9b" +checksum = "0cc23270f6e1808e30a928bdc84dea0b9b4136a8bc82338574f23baf47bbd280" [[package]] name = "gobject-sys" @@ -3781,7 +3907,7 @@ dependencies = [ "rustls-platform-verifier", "serde 1.0.228", "serde_derive", - "serde_json 1.0.118", + "serde_json 1.0.149", "sha2", "smithay-client-toolkit 0.20.0", "socket2 0.3.19", @@ -3959,7 +4085,7 @@ dependencies = [ "log", "serde 1.0.228", "serde_derive", - "serde_json 1.0.118", + "serde_json 1.0.149", ] [[package]] @@ -4270,6 +4396,15 @@ dependencies = [ "either", ] +[[package]] +name = "itertools" +version = "0.10.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b0fd2260e829bddf4cb6ea802289de2f86d6a7a690192fbe91b3f46e0f2c8473" +dependencies = [ + "either", +] + [[package]] name = "itertools" version = "0.12.1" @@ -4452,9 +4587,9 @@ dependencies = [ [[package]] name = "libc" -version = "0.2.171" +version = "0.2.184" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c19937216e9d3aa9956d9bb8dfc0b0c8beb6058fc4f7a4dc4d850edf86a237d6" +checksum = "48f5d2a454e16a5ea0f4ced81bd44e4cfc7bd3a507b61887c99fd3538b28e4af" [[package]] name = "libdbus-sys" @@ -5134,6 +5269,18 @@ dependencies = [ "libc", ] +[[package]] +name = "nix" +version = "0.31.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5d6d0705320c1e6ba1d912b5e37cf18071b6c2e9b7fa8215a1e8a7651966f5d3" +dependencies = [ + "bitflags 2.9.1", + "cfg-if 1.0.0", + "cfg_aliases 0.2.1", + "libc", +] + [[package]] name = "nokhwa" version = "0.10.7" @@ -5750,6 +5897,12 @@ version = "1.19.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3fdb12b2476b595f9358c5161aa467c2438859caa136dec86c26fdd2efe17b92" +[[package]] +name = "oorandom" +version = "11.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d6790f58c7ff633d8771f42965289203411a5e5c68388703c06e14f24770b41e" + [[package]] name = "opaque-debug" version = "0.3.1" @@ -5886,7 +6039,7 @@ checksum = "38731fa859ef679f1aec66ca9562165926b442f298467f76f5990f431efe87dc" dependencies = [ "serde 1.0.228", "serde_derive", - "serde_json 1.0.118", + "serde_json 1.0.149", ] [[package]] @@ -6280,6 +6433,34 @@ dependencies = [ "time 0.3.36", ] +[[package]] +name = "plotters" +version = "0.3.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5aeb6f403d7a4911efb1e33402027fc44f29b5bf6def3effcc22d7bb75f2b747" +dependencies = [ + "num-traits 0.2.19", + "plotters-backend", + "plotters-svg", + "wasm-bindgen", + "web-sys", +] + +[[package]] +name = "plotters-backend" +version = "0.3.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "df42e13c12958a16b3f7f4386b9ab1f3e7933914ecea48da7139435263a4172a" + +[[package]] +name = "plotters-svg" +version = "0.3.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "51bae2ac328883f7acdfea3d66a7c35751187f870bc81f94563733a154d7a670" +dependencies = [ + "plotters-backend", +] + [[package]] name = "png" version = "0.17.13" @@ -6673,7 +6854,7 @@ dependencies = [ "once_cell", "socket2 0.5.10", "tracing", - "windows-sys 0.52.0", + "windows-sys 0.60.2", ] [[package]] @@ -7076,7 +7257,7 @@ dependencies = [ "rustls-native-certs", "rustls-pki-types", "serde 1.0.228", - "serde_json 1.0.118", + "serde_json 1.0.149", "serde_urlencoded", "sync_wrapper", "tokio", @@ -7350,7 +7531,7 @@ dependencies = [ "scrap", "serde 1.0.228", "serde_derive", - "serde_json 1.0.118", + "serde_json 1.0.149", "serde_repr", "sha2", "shared_memory", @@ -7457,7 +7638,7 @@ dependencies = [ "errno", "libc", "linux-raw-sys 0.11.0", - "windows-sys 0.52.0", + "windows-sys 0.61.2", ] [[package]] @@ -7514,7 +7695,7 @@ dependencies = [ "security-framework 3.5.1", "security-framework-sys", "webpki-root-certs", - "windows-sys 0.52.0", + "windows-sys 0.61.2", ] [[package]] @@ -7604,6 +7785,8 @@ dependencies = [ "bindgen 0.65.1", "block", "cfg-if 1.0.0", + "codspeed-criterion-compat", + "crossbeam-queue", "dbus", "docopt", "gstreamer", @@ -7622,7 +7805,7 @@ dependencies = [ "quest", "repng", "serde 1.0.228", - "serde_json 1.0.118", + "serde_json 1.0.149", "target_build_utils", "tracing", "webm", @@ -7761,13 +7944,15 @@ dependencies = [ [[package]] name = "serde_json" -version = "1.0.118" +version = "1.0.149" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d947f6b3163d8857ea16c4fa0dd4840d52f3041039a85decd46867eb1abef2e4" +checksum = "83fc039473c5595ace860d8c4fafa220ff474b3fc6bfdb4293327f1a37e94d86" dependencies = [ "itoa 1.0.11", - "ryu", + "memchr", "serde 1.0.228", + "serde_core", + "zmij", ] [[package]] @@ -8158,6 +8343,16 @@ version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f" +[[package]] +name = "statrs" +version = "0.18.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2a3fe7c28c6512e766b0874335db33c94ad7b8f9054228ae1c2abd47ce7d335e" +dependencies = [ + "approx", + "num-traits 0.2.19", +] + [[package]] name = "strength_reduce" version = "0.2.4" @@ -8730,6 +8925,16 @@ dependencies = [ "tracing", ] +[[package]] +name = "tinytemplate" +version = "1.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "be4d6b5f19ff7664e8c98d03e2139cb510db9b0a60b55f8e8709b689d939b6bc" +dependencies = [ + "serde 1.0.228", + "serde_json 1.0.149", +] + [[package]] name = "tinyvec" version = "1.6.1" @@ -9853,7 +10058,7 @@ dependencies = [ "rtp", "sdp", "serde 1.0.228", - "serde_json 1.0.118", + "serde_json 1.0.149", "sha2", "smol_str", "stun", @@ -9900,7 +10105,7 @@ dependencies = [ "portable-atomic", "rand 0.9.2", "serde 1.0.228", - "serde_json 1.0.118", + "serde_json 1.0.149", "stun", "thiserror 1.0.61", "tokio", @@ -11193,6 +11398,12 @@ dependencies = [ "zstd 0.11.2+zstd.1.5.2", ] +[[package]] +name = "zmij" +version = "1.0.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b8848ee67ecc8aedbaf3e4122217aff892639231befc6a1b58d29fff4c2cabaa" + [[package]] name = "zstd" version = "0.11.2+zstd.1.5.2" diff --git a/libs/scrap/Cargo.toml b/libs/scrap/Cargo.toml index 505eca2de..f31de259d 100644 --- a/libs/scrap/Cargo.toml +++ b/libs/scrap/Cargo.toml @@ -45,6 +45,44 @@ ndk-context = "0.1" repng = "0.2" docopt = "1.1" quest = "0.3" +criterion = { package = "codspeed-criterion-compat", version = "4" } +crossbeam-queue = "0.3" + +[[bench]] +name = "convert" +harness = false + +[[bench]] +name = "yuv_to_rgb" +harness = false + +[[bench]] +name = "codec_encode" +harness = false + +[[bench]] +name = "codec_decode" +harness = false + +[[bench]] +name = "protobuf" +harness = false + +[[bench]] +name = "mutex_contention" +harness = false + +[[bench]] +name = "video_queue" +harness = false + +[[bench]] +name = "pipeline_encode" +harness = false + +[[bench]] +name = "pipeline_decode" +harness = false [build-dependencies] target_build_utils = "0.3" diff --git a/libs/scrap/benches/codec_decode.rs b/libs/scrap/benches/codec_decode.rs new file mode 100644 index 000000000..7e0155830 --- /dev/null +++ b/libs/scrap/benches/codec_decode.rs @@ -0,0 +1,238 @@ +mod common; + +use common::{pre_encode_av1, pre_encode_vpx}; +use criterion::{ + black_box, criterion_group, criterion_main, BenchmarkId, Criterion, Throughput, +}; +use hbb_common::{ + bytes::Bytes, + message_proto::{video_frame, Chroma, EncodedVideoFrame, EncodedVideoFrames}, +}; +use scrap::{ + codec::Decoder, CodecFormat, ImageFormat, ImageRgb, ImageTexture, VpxVideoCodecId, +}; +use std::time::Duration; + +/// C. Video decode benchmarks. +/// +/// Calls the real `Decoder::handle_video_frame()` — the exact function used +/// by the client-side VideoHandler. This includes codec dispatch, the +/// "keep only last frame" pattern, and YUV→RGB conversion. +/// See libs/scrap/src/common/codec.rs:631. + +const W: usize = 1920; +const H: usize = 1080; + +/// Build a `video_frame::Union` from pre-encoded data, ready for handle_video_frame. +fn make_union_vp9(frames: &[common::EncodedFrame]) -> Vec { + frames + .iter() + .map(|f| { + let mut evf = EncodedVideoFrame::new(); + evf.data = Bytes::from(f.data.clone()); + evf.key = f.key; + evf.pts = f.pts; + let mut evfs = EncodedVideoFrames::new(); + evfs.frames.push(evf); + video_frame::Union::Vp9s(evfs) + }) + .collect() +} + +fn make_union_vp8(frames: &[common::EncodedFrame]) -> Vec { + frames + .iter() + .map(|f| { + let mut evf = EncodedVideoFrame::new(); + evf.data = Bytes::from(f.data.clone()); + evf.key = f.key; + evf.pts = f.pts; + let mut evfs = EncodedVideoFrames::new(); + evfs.frames.push(evf); + video_frame::Union::Vp8s(evfs) + }) + .collect() +} + +fn make_union_av1(frames: &[common::EncodedFrame]) -> Vec { + frames + .iter() + .map(|f| { + let mut evf = EncodedVideoFrame::new(); + evf.data = Bytes::from(f.data.clone()); + evf.key = f.key; + evf.pts = f.pts; + let mut evfs = EncodedVideoFrames::new(); + evfs.frames.push(evf); + video_frame::Union::Av1s(evfs) + }) + .collect() +} + +// --------------------------------------------------------------------------- +// Single-frame decode (VP8, VP9, AV1) — via Decoder::handle_video_frame +// --------------------------------------------------------------------------- + +fn bench_decode_single(c: &mut Criterion) { + let mut group = c.benchmark_group("decode_single"); + + // VP8 + { + let encoded = pre_encode_vpx(VpxVideoCodecId::VP8, W, H, 1.0, 30); + let unions = make_union_vp8(&encoded); + let mut decoder = Decoder::new(CodecFormat::VP8, None); + let mut rgb = ImageRgb::new(ImageFormat::ARGB, 1); + let mut texture = ImageTexture::default(); + let mut pixelbuffer = true; + let mut chroma: Option = None; + + group.throughput(Throughput::Elements(1)); + group.bench_with_input(BenchmarkId::from_parameter("vp8_1080p"), &(), |b, _| { + let mut idx = 0; + b.iter(|| { + let union = &unions[idx % unions.len()]; + let _ = decoder.handle_video_frame( + black_box(union), + &mut rgb, + &mut texture, + &mut pixelbuffer, + &mut chroma, + ); + idx += 1; + }); + }); + } + + // VP9 + { + let encoded = pre_encode_vpx(VpxVideoCodecId::VP9, W, H, 1.0, 30); + let unions = make_union_vp9(&encoded); + let mut decoder = Decoder::new(CodecFormat::VP9, None); + let mut rgb = ImageRgb::new(ImageFormat::ARGB, 1); + let mut texture = ImageTexture::default(); + let mut pixelbuffer = true; + let mut chroma: Option = None; + + group.throughput(Throughput::Elements(1)); + group.bench_with_input(BenchmarkId::from_parameter("vp9_1080p"), &(), |b, _| { + let mut idx = 0; + b.iter(|| { + let union = &unions[idx % unions.len()]; + let _ = decoder.handle_video_frame( + black_box(union), + &mut rgb, + &mut texture, + &mut pixelbuffer, + &mut chroma, + ); + idx += 1; + }); + }); + } + + // AV1 + { + let encoded = pre_encode_av1(W, H, 1.0, 30); + let unions = make_union_av1(&encoded); + let mut decoder = Decoder::new(CodecFormat::AV1, None); + let mut rgb = ImageRgb::new(ImageFormat::ARGB, 1); + let mut texture = ImageTexture::default(); + let mut pixelbuffer = true; + let mut chroma: Option = None; + + group.throughput(Throughput::Elements(1)); + group.bench_with_input(BenchmarkId::from_parameter("av1_1080p"), &(), |b, _| { + let mut idx = 0; + b.iter(|| { + let union = &unions[idx % unions.len()]; + let _ = decoder.handle_video_frame( + black_box(union), + &mut rgb, + &mut texture, + &mut pixelbuffer, + &mut chroma, + ); + idx += 1; + }); + }); + } + + group.finish(); +} + +// --------------------------------------------------------------------------- +// Sequence decode: 100 frames (VP9) +// --------------------------------------------------------------------------- + +fn bench_vp9_decode_sequence(c: &mut Criterion) { + let mut group = c.benchmark_group("vp9_decode_sequence"); + group.sample_size(10); + group.measurement_time(Duration::from_secs(20)); + + let encoded = pre_encode_vpx(VpxVideoCodecId::VP9, W, H, 1.0, 100); + let unions = make_union_vp9(&encoded); + let mut decoder = Decoder::new(CodecFormat::VP9, None); + let mut rgb = ImageRgb::new(ImageFormat::ARGB, 1); + let mut texture = ImageTexture::default(); + let mut pixelbuffer = true; + let mut chroma: Option = None; + + group.throughput(Throughput::Elements(100)); + group.bench_function(BenchmarkId::from_parameter("100frames_1080p"), |b| { + b.iter(|| { + for union in &unions { + let _ = decoder.handle_video_frame( + black_box(union), + &mut rgb, + &mut texture, + &mut pixelbuffer, + &mut chroma, + ); + } + }); + }); + group.finish(); +} + +// --------------------------------------------------------------------------- +// 4K decode (VP9) +// --------------------------------------------------------------------------- + +fn bench_vp9_decode_4k(c: &mut Criterion) { + let mut group = c.benchmark_group("decode_4k"); + group.measurement_time(Duration::from_secs(15)); + + let (w4k, h4k) = (3840, 2160); + let encoded = pre_encode_vpx(VpxVideoCodecId::VP9, w4k, h4k, 1.0, 10); + let unions = make_union_vp9(&encoded); + let mut decoder = Decoder::new(CodecFormat::VP9, None); + let mut rgb = ImageRgb::new(ImageFormat::ARGB, 1); + let mut texture = ImageTexture::default(); + let mut pixelbuffer = true; + let mut chroma: Option = None; + + group.throughput(Throughput::Elements(1)); + group.bench_function(BenchmarkId::from_parameter("vp9"), |b| { + let mut idx = 0; + b.iter(|| { + let union = &unions[idx % unions.len()]; + let _ = decoder.handle_video_frame( + black_box(union), + &mut rgb, + &mut texture, + &mut pixelbuffer, + &mut chroma, + ); + idx += 1; + }); + }); + group.finish(); +} + +criterion_group!( + benches, + bench_decode_single, + bench_vp9_decode_sequence, + bench_vp9_decode_4k, +); +criterion_main!(benches); diff --git a/libs/scrap/benches/codec_encode.rs b/libs/scrap/benches/codec_encode.rs new file mode 100644 index 000000000..e551c150e --- /dev/null +++ b/libs/scrap/benches/codec_encode.rs @@ -0,0 +1,230 @@ +mod common; + +use common::make_i420; +use criterion::{ + black_box, criterion_group, criterion_main, BenchmarkId, Criterion, Throughput, +}; +use scrap::{ + aom::{AomEncoder, AomEncoderConfig}, + codec::{EncoderApi, EncoderCfg}, + EncodeInput, VpxEncoder, VpxEncoderConfig, VpxVideoCodecId, +}; +use std::time::Duration; + +/// B. Video encode benchmarks. +/// +/// Calls the real `EncoderApi::encode_to_message()` — the same function used +/// by video_service.rs handle_one_frame(). This ensures any change to the +/// encode path (flush behavior, frame creation, etc.) is reflected here. +/// +/// Includes single-frame, sequence (static + movement), and quality variations. + +const W: usize = 1920; +const H: usize = 1080; + +// --------------------------------------------------------------------------- +// Single-frame encode (VP8, VP9, AV1) +// --------------------------------------------------------------------------- + +fn bench_vpx_encode_single(c: &mut Criterion) { + let mut group = c.benchmark_group("encode_single"); + + for codec in [VpxVideoCodecId::VP8, VpxVideoCodecId::VP9] { + let label = match codec { + VpxVideoCodecId::VP8 => "vp8_1080p", + VpxVideoCodecId::VP9 => "vp9_1080p", + }; + let cfg = EncoderCfg::VPX(VpxEncoderConfig { + width: W as _, + height: H as _, + quality: 1.0, + codec, + keyframe_interval: None, + }); + let mut encoder = VpxEncoder::new(cfg, false).unwrap(); + let (yuv, _) = make_i420(W, H, 0); + + group.throughput(Throughput::Elements(1)); + group.bench_with_input(BenchmarkId::from_parameter(label), &(), |b, _| { + let mut pts = 0i64; + b.iter(|| { + let input = EncodeInput::YUV(&yuv); + let _ = encoder.encode_to_message(input, pts); + pts += 1; + }); + }); + } + + // AV1 + { + let cfg = EncoderCfg::AOM(AomEncoderConfig { + width: W as _, + height: H as _, + quality: 1.0, + keyframe_interval: None, + }); + let mut encoder = AomEncoder::new(cfg, false).unwrap(); + let (yuv, _) = make_i420(W, H, 0); + + group.throughput(Throughput::Elements(1)); + group.bench_with_input(BenchmarkId::from_parameter("av1_1080p"), &(), |b, _| { + let mut pts = 0i64; + b.iter(|| { + let input = EncodeInput::YUV(&yuv); + let _ = encoder.encode_to_message(input, pts); + pts += 1; + }); + }); + } + + group.finish(); +} + +// --------------------------------------------------------------------------- +// 4K encode +// --------------------------------------------------------------------------- + +fn bench_encode_4k(c: &mut Criterion) { + let mut group = c.benchmark_group("encode_4k"); + group.measurement_time(Duration::from_secs(15)); + let (w4k, h4k) = (3840, 2160); + + for codec in [VpxVideoCodecId::VP8, VpxVideoCodecId::VP9] { + let label = match codec { + VpxVideoCodecId::VP8 => "vp8", + VpxVideoCodecId::VP9 => "vp9", + }; + let cfg = EncoderCfg::VPX(VpxEncoderConfig { + width: w4k as _, + height: h4k as _, + quality: 1.0, + codec, + keyframe_interval: None, + }); + let mut encoder = VpxEncoder::new(cfg, false).unwrap(); + let (yuv, _) = make_i420(w4k, h4k, 0); + + group.throughput(Throughput::Elements(1)); + group.bench_with_input(BenchmarkId::from_parameter(label), &(), |b, _| { + let mut pts = 0i64; + b.iter(|| { + let input = EncodeInput::YUV(black_box(&yuv)); + let _ = encoder.encode_to_message(input, pts); + pts += 1; + }); + }); + } + group.finish(); +} + +// --------------------------------------------------------------------------- +// Sequence encode: 100 static frames (simulates idle screen) +// --------------------------------------------------------------------------- + +fn bench_vp9_encode_sequence_static(c: &mut Criterion) { + let mut group = c.benchmark_group("vp9_encode_seq_static"); + group.sample_size(10); + group.measurement_time(Duration::from_secs(30)); + + let cfg = EncoderCfg::VPX(VpxEncoderConfig { + width: W as _, + height: H as _, + quality: 1.0, + codec: VpxVideoCodecId::VP9, + keyframe_interval: None, + }); + let mut encoder = VpxEncoder::new(cfg, false).unwrap(); + let (yuv, _) = make_i420(W, H, 0); + + group.throughput(Throughput::Elements(100)); + group.bench_function(BenchmarkId::from_parameter("100frames_1080p"), |b| { + b.iter(|| { + for i in 0..100 { + let input = EncodeInput::YUV(black_box(&yuv)); + let _ = encoder.encode_to_message(input, i); + } + }); + }); + group.finish(); +} + +// --------------------------------------------------------------------------- +// Sequence encode: 100 varied frames (simulates scroll / movement) +// --------------------------------------------------------------------------- + +fn bench_vp9_encode_sequence_movement(c: &mut Criterion) { + let mut group = c.benchmark_group("vp9_encode_seq_movement"); + group.sample_size(10); + group.measurement_time(Duration::from_secs(30)); + + let cfg = EncoderCfg::VPX(VpxEncoderConfig { + width: W as _, + height: H as _, + quality: 1.0, + codec: VpxVideoCodecId::VP9, + keyframe_interval: None, + }); + let mut encoder = VpxEncoder::new(cfg, false).unwrap(); + + // Pre-generate 100 frames with progressive shift + let frames: Vec> = (0..100).map(|i| make_i420(W, H, i * 5).0).collect(); + + group.throughput(Throughput::Elements(100)); + group.bench_function(BenchmarkId::from_parameter("100frames_1080p"), |b| { + b.iter(|| { + for (i, yuv) in frames.iter().enumerate() { + let input = EncodeInput::YUV(black_box(yuv)); + let _ = encoder.encode_to_message(input, i as i64); + } + }); + }); + group.finish(); +} + +// --------------------------------------------------------------------------- +// Quality ratio impact (VP9 1080p) +// --------------------------------------------------------------------------- + +fn bench_vp9_encode_quality(c: &mut Criterion) { + let mut group = c.benchmark_group("vp9_encode_quality"); + group.measurement_time(Duration::from_secs(10)); + + let qualities: &[(&str, f32)] = &[ + ("q0.5_speed", 0.5), + ("q1.0_balanced", 1.0), + ("q2.0_best", 2.0), + ]; + let (yuv, _) = make_i420(W, H, 0); + + for (label, quality) in qualities { + let cfg = EncoderCfg::VPX(VpxEncoderConfig { + width: W as _, + height: H as _, + quality: *quality, + codec: VpxVideoCodecId::VP9, + keyframe_interval: None, + }); + let mut encoder = VpxEncoder::new(cfg, false).unwrap(); + + group.throughput(Throughput::Elements(1)); + group.bench_with_input(BenchmarkId::from_parameter(*label), &(), |b, _| { + let mut pts = 0i64; + b.iter(|| { + let input = EncodeInput::YUV(black_box(&yuv)); + let _ = encoder.encode_to_message(input, pts); + pts += 1; + }); + }); + } + group.finish(); +} + +criterion_group!( + benches, + bench_vpx_encode_single, + bench_encode_4k, + bench_vp9_encode_sequence_static, + bench_vp9_encode_sequence_movement, + bench_vp9_encode_quality, +); +criterion_main!(benches); diff --git a/libs/scrap/benches/common/mod.rs b/libs/scrap/benches/common/mod.rs new file mode 100644 index 000000000..3984169bb --- /dev/null +++ b/libs/scrap/benches/common/mod.rs @@ -0,0 +1,319 @@ +#![allow(dead_code)] + +pub use scrap::STRIDE_ALIGN; +use scrap::{ + aom::{AomEncoder, AomEncoderConfig}, + codec::{EncoderApi, EncoderCfg}, + EncodeYuvFormat, Pixfmt, VpxEncoder, VpxEncoderConfig, VpxVideoCodecId, +}; + +// --------------------------------------------------------------------------- +// Resolutions +// --------------------------------------------------------------------------- + +pub const RESOLUTIONS: &[(usize, usize, &str)] = &[ + (1280, 720, "720p"), + (1920, 1080, "1080p"), + (3840, 2160, "4K"), +]; + +// --------------------------------------------------------------------------- +// Alignment +// --------------------------------------------------------------------------- + +#[inline] +pub fn align_up(x: usize, align: usize) -> usize { + (x + align - 1) / align * align +} + +// --------------------------------------------------------------------------- +// BGRA buffer generation +// --------------------------------------------------------------------------- + +pub enum Pattern { + /// Solid fill — compresses very well, potential fast-path. + Solid(u8), + /// Horizontal gradient — varied but predictable. + Gradient, + /// Pseudo-random (seeded LCG) — worst case, incompressible. + Random(u64), +} + +pub fn make_bgra(w: usize, h: usize, pattern: &Pattern) -> (Vec, usize) { + let stride = w * 4; + let mut buf = vec![0u8; stride * h]; + fill_bgra(&mut buf, w, h, stride, pattern); + (buf, stride) +} + +/// BGRA buffer with extra stride padding (simulates non-aligned capture). +pub fn make_bgra_strided(w: usize, h: usize, stride: usize, pattern: &Pattern) -> Vec { + assert!(stride >= w * 4); + let mut buf = vec![0u8; stride * h]; + fill_bgra(&mut buf, w, h, stride, pattern); + buf +} + +fn fill_bgra(buf: &mut [u8], w: usize, h: usize, stride: usize, pattern: &Pattern) { + match pattern { + Pattern::Solid(v) => { + for row in 0..h { + for col in 0..w { + let off = row * stride + col * 4; + buf[off] = *v; + buf[off + 1] = *v; + buf[off + 2] = *v; + buf[off + 3] = 255; + } + } + } + Pattern::Gradient => { + for row in 0..h { + for col in 0..w { + let off = row * stride + col * 4; + let v = ((row + col) % 256) as u8; + buf[off] = v; + buf[off + 1] = v; + buf[off + 2] = v; + buf[off + 3] = 255; + } + } + } + Pattern::Random(seed) => { + let mut s = *seed; + for row in 0..h { + for col in 0..w { + let off = row * stride + col * 4; + for j in 0..4 { + s = s + .wrapping_mul(6364136223846793005) + .wrapping_add(1442695040888963407); + buf[off + j] = (s >> 33) as u8; + } + } + } + } + } +} + +// --------------------------------------------------------------------------- +// YUV layouts +// --------------------------------------------------------------------------- + +pub struct I420Layout { + pub stride_y: usize, + pub stride_uv: usize, + pub y_size: usize, + pub uv_size: usize, + pub total: usize, +} + +pub fn i420_layout(w: usize, h: usize) -> I420Layout { + let stride_y = align_up(w, STRIDE_ALIGN); + let stride_uv = align_up(w / 2, STRIDE_ALIGN); + let y_size = stride_y * h; + let uv_size = stride_uv * (h / 2); + I420Layout { + stride_y, + stride_uv, + y_size, + uv_size, + total: y_size + 2 * uv_size, + } +} + +pub fn make_i420(w: usize, h: usize, shift: usize) -> (Vec, I420Layout) { + let layout = i420_layout(w, h); + let mut data = vec![0u8; layout.total]; + for row in 0..h { + for col in 0..w { + data[row * layout.stride_y + col] = ((row + col + shift) % 256) as u8; + } + } + for i in layout.y_size..layout.total { + data[i] = 128; + } + (data, layout) +} + +pub struct NV12Layout { + pub stride_y: usize, + pub stride_uv: usize, + pub y_size: usize, + pub uv_size: usize, + pub total: usize, +} + +pub fn nv12_layout(w: usize, h: usize) -> NV12Layout { + let stride_y = align_up(w, STRIDE_ALIGN); + let stride_uv = align_up(w, STRIDE_ALIGN); + let y_size = stride_y * h; + let uv_size = stride_uv * (h / 2); + NV12Layout { + stride_y, + stride_uv, + y_size, + uv_size, + total: y_size + uv_size, + } +} + +pub fn make_nv12(w: usize, h: usize) -> (Vec, NV12Layout) { + let layout = nv12_layout(w, h); + let mut data = vec![0u8; layout.total]; + for row in 0..h { + for col in 0..w { + data[row * layout.stride_y + col] = ((row + col) % 256) as u8; + } + } + for i in layout.y_size..layout.total { + data[i] = 128; + } + (data, layout) +} + +pub struct I444Layout { + pub stride: usize, + pub plane_size: usize, + pub total: usize, +} + +pub fn i444_layout(w: usize, h: usize) -> I444Layout { + let stride = align_up(w, STRIDE_ALIGN); + let plane_size = stride * h; + I444Layout { + stride, + plane_size, + total: 3 * plane_size, + } +} + +pub fn make_i444(w: usize, h: usize) -> (Vec, I444Layout) { + let layout = i444_layout(w, h); + let mut data = vec![0u8; layout.total]; + for row in 0..h { + for col in 0..w { + data[row * layout.stride + col] = ((row + col) % 256) as u8; + } + } + for i in layout.plane_size..layout.total { + data[i] = 128; + } + (data, layout) +} + +// --------------------------------------------------------------------------- +// EncodeYuvFormat helpers +// --------------------------------------------------------------------------- + +pub fn yuv_format_i420(w: usize, h: usize) -> EncodeYuvFormat { + let layout = i420_layout(w, h); + EncodeYuvFormat { + pixfmt: Pixfmt::I420, + w, + h, + stride: vec![layout.stride_y, layout.stride_uv, layout.stride_uv, 0], + u: layout.y_size, + v: layout.y_size + layout.uv_size, + } +} + +pub fn yuv_format_nv12(w: usize, h: usize) -> EncodeYuvFormat { + let layout = nv12_layout(w, h); + EncodeYuvFormat { + pixfmt: Pixfmt::NV12, + w, + h, + stride: vec![layout.stride_y, layout.stride_uv, 0, 0], + u: layout.y_size, + v: 0, + } +} + +pub fn yuv_format_i444(w: usize, h: usize) -> EncodeYuvFormat { + let layout = i444_layout(w, h); + EncodeYuvFormat { + pixfmt: Pixfmt::I444, + w, + h, + stride: vec![layout.stride, layout.stride, layout.stride, 0], + u: layout.plane_size, + v: 2 * layout.plane_size, + } +} + +// --------------------------------------------------------------------------- +// Pre-encoding helpers (for decode benchmarks) +// --------------------------------------------------------------------------- + +pub struct EncodedFrame { + pub data: Vec, + pub key: bool, + pub pts: i64, +} + +pub fn pre_encode_vpx( + codec: VpxVideoCodecId, + w: usize, + h: usize, + quality: f32, + n: usize, +) -> Vec { + let cfg = EncoderCfg::VPX(VpxEncoderConfig { + width: w as _, + height: h as _, + quality, + codec, + keyframe_interval: None, + }); + let mut enc = VpxEncoder::new(cfg, false).unwrap(); + let mut out = Vec::with_capacity(n); + for i in 0.. { + let (yuv, _) = make_i420(w, h, i * 3); + for frame in enc.encode(i as i64, &yuv, STRIDE_ALIGN).unwrap() { + out.push(EncodedFrame { + data: frame.data.to_vec(), + key: frame.key, + pts: frame.pts, + }); + } + for frame in enc.flush().unwrap() { + out.push(EncodedFrame { + data: frame.data.to_vec(), + key: frame.key, + pts: frame.pts, + }); + } + if out.len() >= n { + break; + } + } + out.truncate(n); + out +} + +pub fn pre_encode_av1(w: usize, h: usize, quality: f32, n: usize) -> Vec { + let cfg = EncoderCfg::AOM(AomEncoderConfig { + width: w as _, + height: h as _, + quality, + keyframe_interval: None, + }); + let mut enc = AomEncoder::new(cfg, false).unwrap(); + let mut out = Vec::with_capacity(n); + for i in 0.. { + let (yuv, _) = make_i420(w, h, i * 3); + for frame in enc.encode(i as i64, &yuv, STRIDE_ALIGN).unwrap() { + out.push(EncodedFrame { + data: frame.data.to_vec(), + key: frame.key, + pts: frame.pts, + }); + } + if out.len() >= n { + break; + } + } + out.truncate(n); + out +} diff --git a/libs/scrap/benches/convert.rs b/libs/scrap/benches/convert.rs new file mode 100644 index 000000000..53245e1de --- /dev/null +++ b/libs/scrap/benches/convert.rs @@ -0,0 +1,210 @@ +mod common; + +use common::{ + i420_layout, i444_layout, make_bgra, make_bgra_strided, nv12_layout, Pattern, RESOLUTIONS, +}; +use criterion::{black_box, criterion_group, criterion_main, BenchmarkId, Criterion, Throughput}; + +/// A. Color space conversion benchmarks (server-side hot path). +/// +/// Measures BGRA → I420/NV12/I444 via libyuv FFI at multiple resolutions +/// and with different input patterns (solid, gradient, random). + +// --------------------------------------------------------------------------- +// BGRA → I420 +// --------------------------------------------------------------------------- + +fn bench_bgra_to_i420(c: &mut Criterion) { + let mut group = c.benchmark_group("bgra_to_i420"); + + for &(w, h, label) in RESOLUTIONS { + let (src, src_stride) = make_bgra(w, h, &Pattern::Gradient); + let layout = i420_layout(w, h); + + group.throughput(Throughput::Bytes((w * h * 4) as u64)); + group.bench_with_input(BenchmarkId::from_parameter(label), &(), |b, _| { + let mut dst = vec![0u8; layout.total]; + b.iter(|| unsafe { + let dst_y = dst.as_mut_ptr(); + let dst_u = dst_y.add(layout.y_size); + let dst_v = dst_u.add(layout.uv_size); + scrap::ARGBToI420( + black_box(src.as_ptr()), + src_stride as _, + dst_y, + layout.stride_y as _, + dst_u, + layout.stride_uv as _, + dst_v, + layout.stride_uv as _, + w as _, + h as _, + ); + }); + }); + } + group.finish(); +} + +// --------------------------------------------------------------------------- +// BGRA → NV12 +// --------------------------------------------------------------------------- + +fn bench_bgra_to_nv12(c: &mut Criterion) { + let mut group = c.benchmark_group("bgra_to_nv12"); + + for &(w, h, label) in RESOLUTIONS { + let (src, src_stride) = make_bgra(w, h, &Pattern::Gradient); + let layout = nv12_layout(w, h); + + group.throughput(Throughput::Bytes((w * h * 4) as u64)); + group.bench_with_input(BenchmarkId::from_parameter(label), &(), |b, _| { + let mut dst = vec![0u8; layout.total]; + b.iter(|| unsafe { + let dst_y = dst.as_mut_ptr(); + let dst_uv = dst_y.add(layout.y_size); + scrap::ARGBToNV12( + black_box(src.as_ptr()), + src_stride as _, + dst_y, + layout.stride_y as _, + dst_uv, + layout.stride_uv as _, + w as _, + h as _, + ); + }); + }); + } + group.finish(); +} + +// --------------------------------------------------------------------------- +// BGRA → I444 +// --------------------------------------------------------------------------- + +fn bench_bgra_to_i444(c: &mut Criterion) { + let mut group = c.benchmark_group("bgra_to_i444"); + + for &(w, h, label) in RESOLUTIONS { + let (src, src_stride) = make_bgra(w, h, &Pattern::Gradient); + let layout = i444_layout(w, h); + + group.throughput(Throughput::Bytes((w * h * 4) as u64)); + group.bench_with_input(BenchmarkId::from_parameter(label), &(), |b, _| { + let mut dst = vec![0u8; layout.total]; + b.iter(|| unsafe { + let dst_y = dst.as_mut_ptr(); + let dst_u = dst_y.add(layout.plane_size); + let dst_v = dst_u.add(layout.plane_size); + scrap::ARGBToI444( + black_box(src.as_ptr()), + src_stride as _, + dst_y, + layout.stride as _, + dst_u, + layout.stride as _, + dst_v, + layout.stride as _, + w as _, + h as _, + ); + }); + }); + } + group.finish(); +} + +// --------------------------------------------------------------------------- +// Input pattern impact (1080p BGRA → I420, solid vs gradient vs random) +// --------------------------------------------------------------------------- + +fn bench_bgra_to_i420_patterns(c: &mut Criterion) { + let mut group = c.benchmark_group("bgra_to_i420_patterns"); + let (w, h) = (1920, 1080); + let layout = i420_layout(w, h); + + let patterns: &[(&str, Pattern)] = &[ + ("solid", Pattern::Solid(128)), + ("gradient", Pattern::Gradient), + ("random", Pattern::Random(0xDEAD_BEEF)), + ]; + + group.throughput(Throughput::Bytes((w * h * 4) as u64)); + for (name, pat) in patterns { + let (src, src_stride) = make_bgra(w, h, pat); + group.bench_with_input(BenchmarkId::from_parameter(*name), &(), |b, _| { + let mut dst = vec![0u8; layout.total]; + b.iter(|| unsafe { + let dst_y = dst.as_mut_ptr(); + let dst_u = dst_y.add(layout.y_size); + let dst_v = dst_u.add(layout.uv_size); + scrap::ARGBToI420( + black_box(src.as_ptr()), + src_stride as _, + dst_y, + layout.stride_y as _, + dst_u, + layout.stride_uv as _, + dst_v, + layout.stride_uv as _, + w as _, + h as _, + ); + }); + }); + } + group.finish(); +} + +// --------------------------------------------------------------------------- +// Stride alignment impact (1080p BGRA → I420, aligned vs +64 padding) +// --------------------------------------------------------------------------- + +fn bench_bgra_to_i420_stride(c: &mut Criterion) { + let mut group = c.benchmark_group("bgra_to_i420_stride"); + let (w, h) = (1920, 1080); + let layout = i420_layout(w, h); + + let strides: &[(&str, usize)] = &[ + ("aligned", w * 4), + ("padded_64", w * 4 + 64), + ("padded_128", w * 4 + 128), + ]; + + group.throughput(Throughput::Bytes((w * h * 4) as u64)); + for (name, stride) in strides { + let src = make_bgra_strided(w, h, *stride, &Pattern::Gradient); + group.bench_with_input(BenchmarkId::from_parameter(*name), &(), |b, _| { + let mut dst = vec![0u8; layout.total]; + b.iter(|| unsafe { + let dst_y = dst.as_mut_ptr(); + let dst_u = dst_y.add(layout.y_size); + let dst_v = dst_u.add(layout.uv_size); + scrap::ARGBToI420( + black_box(src.as_ptr()), + *stride as _, + dst_y, + layout.stride_y as _, + dst_u, + layout.stride_uv as _, + dst_v, + layout.stride_uv as _, + w as _, + h as _, + ); + }); + }); + } + group.finish(); +} + +criterion_group!( + benches, + bench_bgra_to_i420, + bench_bgra_to_nv12, + bench_bgra_to_i444, + bench_bgra_to_i420_patterns, + bench_bgra_to_i420_stride, +); +criterion_main!(benches); diff --git a/libs/scrap/benches/mutex_contention.rs b/libs/scrap/benches/mutex_contention.rs new file mode 100644 index 000000000..240b86866 --- /dev/null +++ b/libs/scrap/benches/mutex_contention.rs @@ -0,0 +1,182 @@ +use criterion::{black_box, criterion_group, criterion_main, BenchmarkId, Criterion, Throughput}; +use std::collections::HashMap; +use std::sync::{Arc, Mutex, RwLock}; +use std::sync::atomic::{AtomicU32, Ordering}; +use std::thread; + +/// H. Mutex contention benchmarks (simulated patterns). +/// +/// Reproduces the locking patterns from video_service.rs hot loop: +/// - HashMap behind Mutex (current pattern for subscribers/connections) +/// - Comparison with RwLock and Atomic alternatives +/// +/// No RustDesk-specific types needed — pure synchronization primitives. + +const ENTRIES: usize = 5; + +fn make_map() -> HashMap { + (0..ENTRIES as i32).map(|i| (i, i as u64 * 100)).collect() +} + +// --------------------------------------------------------------------------- +// Mutex: single-thread lock/read/unlock +// --------------------------------------------------------------------------- + +fn bench_mutex_single_thread(c: &mut Criterion) { + let mut group = c.benchmark_group("mutex_1thread"); + let m = Mutex::new(make_map()); + + group.throughput(Throughput::Elements(1)); + group.bench_function("lock_read_unlock", |b| { + b.iter(|| { + let guard = m.lock().unwrap(); + let val = guard.get(&0).copied(); + drop(guard); + black_box(val) + }); + }); + group.finish(); +} + +// --------------------------------------------------------------------------- +// Mutex vs RwLock: 4 reader threads concurrent +// --------------------------------------------------------------------------- + +fn bench_mutex_vs_rwlock_readers(c: &mut Criterion) { + let mut group = c.benchmark_group("lock_4readers"); + group.measurement_time(std::time::Duration::from_secs(10)); + + // Mutex + { + let m = Arc::new(Mutex::new(make_map())); + group.throughput(Throughput::Elements(1)); + group.bench_function(BenchmarkId::from_parameter("mutex"), |b| { + b.iter(|| { + let mut handles = Vec::new(); + for _ in 0..4 { + let m = m.clone(); + handles.push(thread::spawn(move || { + for _ in 0..1000 { + let guard = m.lock().unwrap(); + black_box(guard.get(&0).copied()); + } + })); + } + for h in handles { + h.join().unwrap(); + } + }); + }); + } + + // RwLock + { + let m = Arc::new(RwLock::new(make_map())); + group.throughput(Throughput::Elements(1)); + group.bench_function(BenchmarkId::from_parameter("rwlock"), |b| { + b.iter(|| { + let mut handles = Vec::new(); + for _ in 0..4 { + let m = m.clone(); + handles.push(thread::spawn(move || { + for _ in 0..1000 { + let guard = m.read().unwrap(); + black_box(guard.get(&0).copied()); + } + })); + } + for h in handles { + h.join().unwrap(); + } + }); + }); + } + group.finish(); +} + +// --------------------------------------------------------------------------- +// Mutex: 4 writer threads concurrent +// --------------------------------------------------------------------------- + +fn bench_mutex_writers(c: &mut Criterion) { + let mut group = c.benchmark_group("mutex_4writers"); + group.measurement_time(std::time::Duration::from_secs(10)); + + let m = Arc::new(Mutex::new(make_map())); + group.throughput(Throughput::Elements(1)); + group.bench_function("lock_write_unlock", |b| { + b.iter(|| { + let mut handles = Vec::new(); + for t in 0..4u64 { + let m = m.clone(); + handles.push(thread::spawn(move || { + for i in 0..1000 { + let mut guard = m.lock().unwrap(); + guard.insert(0, t * 1000 + i); + } + })); + } + for h in handles { + h.join().unwrap(); + } + }); + }); + group.finish(); +} + +// --------------------------------------------------------------------------- +// Atomic vs Mutex for single u32 (like VIDEO_QOS.fps) +// --------------------------------------------------------------------------- + +fn bench_atomic_vs_mutex_u32(c: &mut Criterion) { + let mut group = c.benchmark_group("atomic_vs_mutex_u32"); + + // AtomicU32 + { + let v = Arc::new(AtomicU32::new(30)); + group.throughput(Throughput::Elements(1)); + group.bench_function(BenchmarkId::from_parameter("atomic_read"), |b| { + b.iter(|| { + black_box(v.load(Ordering::Relaxed)) + }); + }); + + group.bench_function(BenchmarkId::from_parameter("atomic_write"), |b| { + let mut val = 0u32; + b.iter(|| { + v.store(black_box(val), Ordering::Relaxed); + val = val.wrapping_add(1); + }); + }); + } + + // Mutex + { + let v = Arc::new(Mutex::new(30u32)); + group.bench_function(BenchmarkId::from_parameter("mutex_read"), |b| { + b.iter(|| { + let guard = v.lock().unwrap(); + black_box(*guard) + }); + }); + + group.bench_function(BenchmarkId::from_parameter("mutex_write"), |b| { + let mut val = 0u32; + b.iter(|| { + let mut guard = v.lock().unwrap(); + *guard = black_box(val); + val = val.wrapping_add(1); + }); + }); + } + group.finish(); +} + +criterion_group!( + benches, + bench_mutex_single_thread, + bench_mutex_vs_rwlock_readers, + bench_mutex_writers, + bench_atomic_vs_mutex_u32, +); +criterion_main!(benches); diff --git a/libs/scrap/benches/pipeline_decode.rs b/libs/scrap/benches/pipeline_decode.rs new file mode 100644 index 000000000..989390726 --- /dev/null +++ b/libs/scrap/benches/pipeline_decode.rs @@ -0,0 +1,185 @@ +mod common; + +use common::pre_encode_vpx; +use criterion::{ + black_box, criterion_group, criterion_main, BenchmarkId, Criterion, Throughput, +}; +use hbb_common::{ + bytes::Bytes, + message_proto::{video_frame, Chroma, EncodedVideoFrame, EncodedVideoFrames, Message, VideoFrame}, + protobuf::Message as ProtoMessage, +}; +use scrap::{ + codec::Decoder, CodecFormat, ImageFormat, ImageRgb, ImageTexture, VpxVideoCodecId, +}; +use std::time::Duration; + +/// K. Full decode pipeline benchmarks. +/// +/// Protobuf deserialize → Decoder::handle_video_frame(). +/// Uses the real Decoder::handle_video_frame() which includes codec dispatch, +/// the "keep only last frame" pattern, and YUV→RGB conversion. +/// This is the exact client-side path (see codec.rs:631). + +fn make_serialized_messages( + codec: VpxVideoCodecId, + w: usize, + h: usize, + n: usize, +) -> Vec> { + let frames = pre_encode_vpx(codec, w, h, 1.0, n); + frames + .iter() + .map(|f| { + let mut evf = EncodedVideoFrame::new(); + evf.data = Bytes::from(f.data.clone()); + evf.key = f.key; + evf.pts = f.pts; + + let mut evfs = EncodedVideoFrames::new(); + evfs.frames.push(evf); + + let mut vf = VideoFrame::new(); + match codec { + VpxVideoCodecId::VP8 => vf.set_vp8s(evfs), + VpxVideoCodecId::VP9 => vf.set_vp9s(evfs), + } + + let mut msg = Message::new(); + msg.set_video_frame(vf); + msg.write_to_bytes().unwrap() + }) + .collect() +} + +/// Extract the video_frame::Union from a serialized Message. +fn extract_union(msg_bytes: &[u8]) -> Option { + let msg = Message::parse_from_bytes(msg_bytes).ok()?; + let vf = msg.video_frame(); + vf.union.clone() +} + +// --------------------------------------------------------------------------- +// Single frame pipeline: VP9 1080p +// --------------------------------------------------------------------------- + +fn bench_pipeline_decode_1080p(c: &mut Criterion) { + let mut group = c.benchmark_group("pipeline_decode"); + let (w, h) = (1920, 1080); + + let messages = make_serialized_messages(VpxVideoCodecId::VP9, w, h, 30); + let mut decoder = Decoder::new(CodecFormat::VP9, None); + let mut rgb = ImageRgb::new(ImageFormat::ARGB, 1); + let mut texture = ImageTexture::default(); + let mut pixelbuffer = true; + let mut chroma: Option = None; + + group.throughput(Throughput::Elements(1)); + group.bench_function(BenchmarkId::from_parameter("vp9_1080p"), |b| { + let mut idx = 0; + b.iter(|| { + let msg_bytes = &messages[idx % messages.len()]; + + // Step 1: Protobuf deserialize + // Step 2+3: Decode + YUV→RGB via real Decoder::handle_video_frame + if let Some(union) = extract_union(black_box(msg_bytes)) { + let _ = decoder.handle_video_frame( + &union, + &mut rgb, + &mut texture, + &mut pixelbuffer, + &mut chroma, + ); + } + + idx += 1; + black_box(rgb.raw.len()) + }); + }); + group.finish(); +} + +// --------------------------------------------------------------------------- +// Single frame pipeline: VP9 4K +// --------------------------------------------------------------------------- + +fn bench_pipeline_decode_4k(c: &mut Criterion) { + let mut group = c.benchmark_group("pipeline_decode"); + group.measurement_time(Duration::from_secs(15)); + let (w, h) = (3840, 2160); + + let messages = make_serialized_messages(VpxVideoCodecId::VP9, w, h, 10); + let mut decoder = Decoder::new(CodecFormat::VP9, None); + let mut rgb = ImageRgb::new(ImageFormat::ARGB, 1); + let mut texture = ImageTexture::default(); + let mut pixelbuffer = true; + let mut chroma: Option = None; + + group.throughput(Throughput::Elements(1)); + group.bench_function(BenchmarkId::from_parameter("vp9_4k"), |b| { + let mut idx = 0; + b.iter(|| { + let msg_bytes = &messages[idx % messages.len()]; + if let Some(union) = extract_union(black_box(msg_bytes)) { + let _ = decoder.handle_video_frame( + &union, + &mut rgb, + &mut texture, + &mut pixelbuffer, + &mut chroma, + ); + } + idx += 1; + black_box(rgb.raw.len()) + }); + }); + group.finish(); +} + +// --------------------------------------------------------------------------- +// 100-frame sequence pipeline: VP9 1080p +// --------------------------------------------------------------------------- + +fn bench_pipeline_decode_sequence(c: &mut Criterion) { + let mut group = c.benchmark_group("pipeline_decode_sequence"); + group.sample_size(10); + group.measurement_time(Duration::from_secs(20)); + let (w, h) = (1920, 1080); + + let messages = make_serialized_messages(VpxVideoCodecId::VP9, w, h, 100); + let mut decoder = Decoder::new(CodecFormat::VP9, None); + let mut rgb = ImageRgb::new(ImageFormat::ARGB, 1); + let mut texture = ImageTexture::default(); + let mut pixelbuffer = true; + let mut chroma: Option = None; + + group.throughput(Throughput::Elements(100)); + group.bench_function( + BenchmarkId::from_parameter("vp9_1080p_100frames"), + |b| { + b.iter(|| { + for msg_bytes in &messages { + if let Some(union) = extract_union(msg_bytes) { + let _ = decoder.handle_video_frame( + &union, + &mut rgb, + &mut texture, + &mut pixelbuffer, + &mut chroma, + ); + } + } + black_box(rgb.raw.len()) + }); + }, + ); + group.finish(); +} + +criterion_group!( + benches, + bench_pipeline_decode_1080p, + bench_pipeline_decode_4k, + bench_pipeline_decode_sequence, +); +criterion_main!(benches); diff --git a/libs/scrap/benches/pipeline_encode.rs b/libs/scrap/benches/pipeline_encode.rs new file mode 100644 index 000000000..ea0247f5e --- /dev/null +++ b/libs/scrap/benches/pipeline_encode.rs @@ -0,0 +1,215 @@ +mod common; + +use common::{i420_layout, make_bgra, Pattern}; +use criterion::{ + black_box, criterion_group, criterion_main, BenchmarkId, Criterion, Throughput, +}; +use hbb_common::{ + message_proto::Message, + protobuf::Message as ProtoMessage, +}; +use scrap::{ + codec::{EncoderApi, EncoderCfg}, + EncodeInput, VpxEncoder, VpxEncoderConfig, VpxVideoCodecId, +}; +use std::time::Duration; + +/// J. Full encode pipeline benchmarks. +/// +/// BGRA capture → YUV conversion → encode_to_message → protobuf serialize. +/// Uses the real encode_to_message() API (see vpxcodec.rs EncoderApi impl) +/// which is the same path as video_service.rs handle_one_frame(). + +// --------------------------------------------------------------------------- +// Single frame pipeline: VP9 1080p +// --------------------------------------------------------------------------- + +fn bench_pipeline_encode_1080p(c: &mut Criterion) { + let mut group = c.benchmark_group("pipeline_encode"); + let (w, h) = (1920, 1080); + + let cfg = EncoderCfg::VPX(VpxEncoderConfig { + width: w as _, + height: h as _, + quality: 1.0, + codec: VpxVideoCodecId::VP9, + keyframe_interval: None, + }); + let mut encoder = VpxEncoder::new(cfg, false).unwrap(); + let (bgra, bgra_stride) = make_bgra(w, h, &Pattern::Gradient); + let layout = i420_layout(w, h); + + group.throughput(Throughput::Elements(1)); + group.bench_function(BenchmarkId::from_parameter("vp9_1080p"), |b| { + let mut yuv = vec![0u8; layout.total]; + let mut pts = 0i64; + + b.iter(|| { + // Step 1: BGRA → I420 (same as convert_to_yuv) + unsafe { + let dst_y = yuv.as_mut_ptr(); + let dst_u = dst_y.add(layout.y_size); + let dst_v = dst_u.add(layout.uv_size); + scrap::ARGBToI420( + bgra.as_ptr(), + bgra_stride as _, + dst_y, + layout.stride_y as _, + dst_u, + layout.stride_uv as _, + dst_v, + layout.stride_uv as _, + w as _, + h as _, + ); + } + + // Step 2+3: encode_to_message (real API from EncoderApi trait) + let input = EncodeInput::YUV(&yuv); + let vf = encoder.encode_to_message(input, pts); + + // Step 4: Wrap in Message + serialize (real send path) + if let Ok(vf) = vf { + let mut msg = Message::new(); + msg.set_video_frame(vf); + msg.write_to_bytes().unwrap() + } else { + Vec::new() + }; + pts += 1; + }); + }); + group.finish(); +} + +// --------------------------------------------------------------------------- +// Single frame pipeline: VP9 4K +// --------------------------------------------------------------------------- + +fn bench_pipeline_encode_4k(c: &mut Criterion) { + let mut group = c.benchmark_group("pipeline_encode"); + group.measurement_time(Duration::from_secs(15)); + let (w, h) = (3840, 2160); + + let cfg = EncoderCfg::VPX(VpxEncoderConfig { + width: w as _, + height: h as _, + quality: 1.0, + codec: VpxVideoCodecId::VP9, + keyframe_interval: None, + }); + let mut encoder = VpxEncoder::new(cfg, false).unwrap(); + let layout = i420_layout(w, h); + + let (bgra, bgra_stride) = make_bgra(w, h, &Pattern::Gradient); + + group.throughput(Throughput::Elements(1)); + group.bench_function(BenchmarkId::from_parameter("vp9_4k"), |b| { + let mut yuv = vec![0u8; layout.total]; + let mut pts = 0i64; + + b.iter(|| { + unsafe { + let dst_y = yuv.as_mut_ptr(); + let dst_u = dst_y.add(layout.y_size); + let dst_v = dst_u.add(layout.uv_size); + scrap::ARGBToI420( + bgra.as_ptr(), + bgra_stride as _, + dst_y, + layout.stride_y as _, + dst_u, + layout.stride_uv as _, + dst_v, + layout.stride_uv as _, + w as _, + h as _, + ); + } + + let input = EncodeInput::YUV(&yuv); + let vf = encoder.encode_to_message(input, pts); + if let Ok(vf) = vf { + let mut msg = Message::new(); + msg.set_video_frame(vf); + msg.write_to_bytes().unwrap() + } else { + Vec::new() + }; + pts += 1; + }); + }); + group.finish(); +} + +// --------------------------------------------------------------------------- +// 100-frame sequence pipeline: VP9 1080p with movement +// --------------------------------------------------------------------------- + +fn bench_pipeline_encode_sequence(c: &mut Criterion) { + let mut group = c.benchmark_group("pipeline_encode_sequence"); + group.sample_size(10); + group.measurement_time(Duration::from_secs(30)); + let (w, h) = (1920, 1080); + + let cfg = EncoderCfg::VPX(VpxEncoderConfig { + width: w as _, + height: h as _, + quality: 1.0, + codec: VpxVideoCodecId::VP9, + keyframe_interval: None, + }); + let mut encoder = VpxEncoder::new(cfg, false).unwrap(); + let layout = i420_layout(w, h); + + // Pre-generate 100 BGRA frames with movement + let bgra_frames: Vec<(Vec, usize)> = (0..100) + .map(|i| make_bgra(w, h, &Pattern::Random(i as u64 * 12345))) + .collect(); + + group.throughput(Throughput::Elements(100)); + group.bench_function( + BenchmarkId::from_parameter("vp9_1080p_100frames"), + |b| { + let mut yuv = vec![0u8; layout.total]; + + b.iter(|| { + let mut total_output_bytes = 0usize; + for (pts, (bgra, bgra_stride)) in bgra_frames.iter().enumerate() { + unsafe { + let dst_y = yuv.as_mut_ptr(); + let dst_u = dst_y.add(layout.y_size); + let dst_v = dst_u.add(layout.uv_size); + scrap::ARGBToI420( + bgra.as_ptr(), + *bgra_stride as _, + dst_y, + layout.stride_y as _, + dst_u, + layout.stride_uv as _, + dst_v, + layout.stride_uv as _, + w as _, + h as _, + ); + } + + let input = EncodeInput::YUV(&yuv); + if let Ok(vf) = encoder.encode_to_message(input, pts as i64) { + total_output_bytes += vf.compute_size() as usize; + } + } + black_box(total_output_bytes) + }); + }, + ); + group.finish(); +} + +criterion_group!( + benches, + bench_pipeline_encode_1080p, + bench_pipeline_encode_4k, + bench_pipeline_encode_sequence, +); +criterion_main!(benches); diff --git a/libs/scrap/benches/protobuf.rs b/libs/scrap/benches/protobuf.rs new file mode 100644 index 000000000..95d646fcd --- /dev/null +++ b/libs/scrap/benches/protobuf.rs @@ -0,0 +1,127 @@ +mod common; + +use common::pre_encode_vpx; +use criterion::{black_box, criterion_group, criterion_main, BenchmarkId, Criterion, Throughput}; +use hbb_common::{ + bytes::Bytes, + message_proto::{EncodedVideoFrame, EncodedVideoFrames, Message, VideoFrame}, + protobuf::Message as ProtoMessage, +}; + +/// E. Protobuf serialization/deserialization benchmarks. +/// +/// Measures Message wrapping VideoFrame → write_to_bytes → parse_from_bytes. +/// Tests both typical (30 KB VP9) and large (200 KB, simulating 4K best quality) payloads. + +fn make_video_frame(payload_size: usize) -> Message { + let mut evf = EncodedVideoFrame::new(); + evf.data = Bytes::from(vec![0xABu8; payload_size]); + evf.key = true; + evf.pts = 1234; + + let mut evfs = EncodedVideoFrames::new(); + evfs.frames.push(evf); + + let mut vf = VideoFrame::new(); + vf.set_vp9s(evfs); + vf.display = 0; + + let mut msg = Message::new(); + msg.set_video_frame(vf); + msg +} + +fn make_video_frame_from_real_encode() -> Message { + let frames = pre_encode_vpx(scrap::VpxVideoCodecId::VP9, 1920, 1080, 1.0, 1); + let mut evf = EncodedVideoFrame::new(); + evf.data = Bytes::from(frames[0].data.clone()); + evf.key = frames[0].key; + evf.pts = frames[0].pts; + + let mut evfs = EncodedVideoFrames::new(); + evfs.frames.push(evf); + + let mut vf = VideoFrame::new(); + vf.set_vp9s(evfs); + vf.display = 0; + + let mut msg = Message::new(); + msg.set_video_frame(vf); + msg +} + +// --------------------------------------------------------------------------- +// Serialize VideoFrame +// --------------------------------------------------------------------------- + +fn bench_serialize(c: &mut Criterion) { + let mut group = c.benchmark_group("protobuf_serialize"); + + let cases: &[(&str, usize)] = &[ + ("30KB_typical", 30_000), + ("100KB_hq", 100_000), + ("200KB_4k_best", 200_000), + ]; + + for (label, size) in cases { + let msg = make_video_frame(*size); + + group.throughput(Throughput::Bytes(*size as u64)); + group.bench_with_input(BenchmarkId::from_parameter(*label), &(), |b, _| { + b.iter(|| { + black_box(msg.write_to_bytes().unwrap()) + }); + }); + } + group.finish(); +} + +// --------------------------------------------------------------------------- +// Deserialize VideoFrame +// --------------------------------------------------------------------------- + +fn bench_deserialize(c: &mut Criterion) { + let mut group = c.benchmark_group("protobuf_deserialize"); + + let cases: &[(&str, usize)] = &[ + ("30KB_typical", 30_000), + ("100KB_hq", 100_000), + ("200KB_4k_best", 200_000), + ]; + + for (label, size) in cases { + let msg = make_video_frame(*size); + let bytes = msg.write_to_bytes().unwrap(); + + group.throughput(Throughput::Bytes(bytes.len() as u64)); + group.bench_with_input(BenchmarkId::from_parameter(*label), &(), |b, _| { + b.iter(|| { + black_box(Message::parse_from_bytes(black_box(&bytes)).unwrap()) + }); + }); + } + group.finish(); +} + +// --------------------------------------------------------------------------- +// Roundtrip: serialize + deserialize +// --------------------------------------------------------------------------- + +fn bench_roundtrip(c: &mut Criterion) { + let mut group = c.benchmark_group("protobuf_roundtrip"); + + let msg = make_video_frame_from_real_encode(); + let serialized_size = msg.compute_size() as u64; + + group.throughput(Throughput::Bytes(serialized_size)); + group.bench_function(BenchmarkId::from_parameter("real_vp9_1080p"), |b| { + b.iter(|| { + let bytes = msg.write_to_bytes().unwrap(); + black_box(Message::parse_from_bytes(&bytes).unwrap()) + }); + }); + group.finish(); +} + +criterion_group!(benches, bench_serialize, bench_deserialize, bench_roundtrip); +criterion_main!(benches); diff --git a/libs/scrap/benches/video_queue.rs b/libs/scrap/benches/video_queue.rs new file mode 100644 index 000000000..29d4599c0 --- /dev/null +++ b/libs/scrap/benches/video_queue.rs @@ -0,0 +1,151 @@ +use criterion::{black_box, criterion_group, criterion_main, BenchmarkId, Criterion, Throughput}; +use crossbeam_queue::ArrayQueue; +use hbb_common::{ + bytes::Bytes, + message_proto::{EncodedVideoFrame, EncodedVideoFrames, VideoFrame}, +}; +use std::sync::Arc; +use std::thread; + +/// I. ArrayQueue (video queue client-side) benchmarks. +/// +/// Simulates the client video queue from io_loop.rs:2318. +/// ArrayQueue with capacity 120 — the ring buffer between +/// network reception and the decoder thread. + +const QUEUE_CAP: usize = 120; +const PAYLOAD_SIZE: usize = 30_000; // typical VP9 frame + +fn make_video_frame(pts: i64) -> VideoFrame { + let mut evf = EncodedVideoFrame::new(); + evf.data = Bytes::from(vec![0u8; PAYLOAD_SIZE]); + evf.key = pts == 0; + evf.pts = pts; + + let mut evfs = EncodedVideoFrames::new(); + evfs.frames.push(evf); + + let mut vf = VideoFrame::new(); + vf.set_vp9s(evfs); + vf.display = 0; + vf +} + +// --------------------------------------------------------------------------- +// Push 120 VideoFrames +// --------------------------------------------------------------------------- + +fn bench_push(c: &mut Criterion) { + let mut group = c.benchmark_group("video_queue_push"); + let frames: Vec = (0..QUEUE_CAP as i64).map(make_video_frame).collect(); + + group.throughput(Throughput::Elements(QUEUE_CAP as u64)); + group.bench_function(BenchmarkId::from_parameter("120_frames"), |b| { + b.iter(|| { + let q = ArrayQueue::new(QUEUE_CAP); + for f in &frames { + let _ = q.push(black_box(f.clone())); + } + }); + }); + group.finish(); +} + +// --------------------------------------------------------------------------- +// Pop 120 VideoFrames +// --------------------------------------------------------------------------- + +fn bench_pop(c: &mut Criterion) { + let mut group = c.benchmark_group("video_queue_pop"); + + group.throughput(Throughput::Elements(QUEUE_CAP as u64)); + group.bench_function(BenchmarkId::from_parameter("120_frames"), |b| { + b.iter_with_setup( + || { + let q = ArrayQueue::new(QUEUE_CAP); + for i in 0..QUEUE_CAP as i64 { + let _ = q.push(make_video_frame(i)); + } + q + }, + |q| { + while let Some(f) = q.pop() { + black_box(f); + } + }, + ); + }); + group.finish(); +} + +// --------------------------------------------------------------------------- +// force_push when full (drop oldest + push) +// --------------------------------------------------------------------------- + +fn bench_force_push(c: &mut Criterion) { + let mut group = c.benchmark_group("video_queue_force_push"); + + group.throughput(Throughput::Elements(1)); + group.bench_function(BenchmarkId::from_parameter("full_queue"), |b| { + let q = ArrayQueue::new(QUEUE_CAP); + for i in 0..QUEUE_CAP as i64 { + let _ = q.push(make_video_frame(i)); + } + b.iter(|| { + // Real code: io_loop.rs:1310 uses video_queue.force_push(vf) + black_box(q.force_push(black_box(make_video_frame(999)))) + }); + }); + group.finish(); +} + +// --------------------------------------------------------------------------- +// Producer-consumer: 1 producer thread, 1 consumer thread, 1000 frames +// --------------------------------------------------------------------------- + +fn bench_producer_consumer(c: &mut Criterion) { + let mut group = c.benchmark_group("video_queue_producer_consumer"); + group.measurement_time(std::time::Duration::from_secs(10)); + + let n_frames = 1000; + group.throughput(Throughput::Elements(n_frames)); + group.bench_function(BenchmarkId::from_parameter("1000_frames"), |b| { + b.iter(|| { + let q = Arc::new(ArrayQueue::new(QUEUE_CAP)); + + let q_prod = q.clone(); + let producer = thread::spawn(move || { + for i in 0..n_frames as i64 { + // Real code uses force_push (drops oldest if full) + q_prod.force_push(make_video_frame(i)); + } + }); + + let q_cons = q.clone(); + let consumer = thread::spawn(move || { + let mut consumed = 0u64; + while consumed < n_frames { + if let Some(f) = q_cons.pop() { + black_box(f); + consumed += 1; + } else { + thread::yield_now(); + } + } + }); + + producer.join().unwrap(); + consumer.join().unwrap(); + }); + }); + group.finish(); +} + +criterion_group!( + benches, + bench_push, + bench_pop, + bench_force_push, + bench_producer_consumer, +); +criterion_main!(benches); diff --git a/libs/scrap/benches/yuv_to_rgb.rs b/libs/scrap/benches/yuv_to_rgb.rs new file mode 100644 index 000000000..60a092265 --- /dev/null +++ b/libs/scrap/benches/yuv_to_rgb.rs @@ -0,0 +1,193 @@ +mod common; + +use common::{make_i420, make_i444, make_nv12, RESOLUTIONS}; +use criterion::{black_box, criterion_group, criterion_main, BenchmarkId, Criterion, Throughput}; + +/// D. YUV → RGB conversion benchmarks (client-side decode output path). +/// +/// Measures I420/NV12/I444 → ARGB/ABGR via libyuv FFI. +/// Corresponds to GoogleImage::to() in the client decoder. + +// --------------------------------------------------------------------------- +// I420 → ARGB +// --------------------------------------------------------------------------- + +fn bench_i420_to_argb(c: &mut Criterion) { + let mut group = c.benchmark_group("i420_to_argb"); + + for &(w, h, label) in RESOLUTIONS { + let (frame, layout) = make_i420(w, h, 0); + let dst_stride = w * 4; + + group.throughput(Throughput::Bytes((w * h * 4) as u64)); + group.bench_with_input(BenchmarkId::from_parameter(label), &(), |b, _| { + let mut dst = vec![0u8; dst_stride * h]; + b.iter(|| unsafe { + let y = frame.as_ptr(); + let u = y.add(layout.y_size); + let v = u.add(layout.uv_size); + scrap::I420ToARGB( + black_box(y), + layout.stride_y as _, + u, + layout.stride_uv as _, + v, + layout.stride_uv as _, + dst.as_mut_ptr(), + dst_stride as _, + w as _, + h as _, + ); + }); + }); + } + group.finish(); +} + +// --------------------------------------------------------------------------- +// I420 → ABGR +// --------------------------------------------------------------------------- + +fn bench_i420_to_abgr(c: &mut Criterion) { + let mut group = c.benchmark_group("i420_to_abgr"); + + for &(w, h, label) in RESOLUTIONS { + let (frame, layout) = make_i420(w, h, 0); + let dst_stride = w * 4; + + group.throughput(Throughput::Bytes((w * h * 4) as u64)); + group.bench_with_input(BenchmarkId::from_parameter(label), &(), |b, _| { + let mut dst = vec![0u8; dst_stride * h]; + b.iter(|| unsafe { + let y = frame.as_ptr(); + let u = y.add(layout.y_size); + let v = u.add(layout.uv_size); + scrap::I420ToABGR( + black_box(y), + layout.stride_y as _, + u, + layout.stride_uv as _, + v, + layout.stride_uv as _, + dst.as_mut_ptr(), + dst_stride as _, + w as _, + h as _, + ); + }); + }); + } + group.finish(); +} + +// --------------------------------------------------------------------------- +// I444 → ARGB +// --------------------------------------------------------------------------- + +fn bench_i444_to_argb(c: &mut Criterion) { + let mut group = c.benchmark_group("i444_to_argb"); + + for &(w, h, label) in RESOLUTIONS { + let (frame, layout) = make_i444(w, h); + let dst_stride = w * 4; + + group.throughput(Throughput::Bytes((w * h * 4) as u64)); + group.bench_with_input(BenchmarkId::from_parameter(label), &(), |b, _| { + let mut dst = vec![0u8; dst_stride * h]; + b.iter(|| unsafe { + let y = frame.as_ptr(); + let u = y.add(layout.plane_size); + let v = u.add(layout.plane_size); + scrap::I444ToARGB( + black_box(y), + layout.stride as _, + u, + layout.stride as _, + v, + layout.stride as _, + dst.as_mut_ptr(), + dst_stride as _, + w as _, + h as _, + ); + }); + }); + } + group.finish(); +} + +// --------------------------------------------------------------------------- +// NV12 → ARGB +// --------------------------------------------------------------------------- + +fn bench_nv12_to_argb(c: &mut Criterion) { + let mut group = c.benchmark_group("nv12_to_argb"); + + for &(w, h, label) in RESOLUTIONS { + let (frame, layout) = make_nv12(w, h); + let dst_stride = w * 4; + + group.throughput(Throughput::Bytes((w * h * 4) as u64)); + group.bench_with_input(BenchmarkId::from_parameter(label), &(), |b, _| { + let mut dst = vec![0u8; dst_stride * h]; + b.iter(|| unsafe { + let y = frame.as_ptr(); + let uv = y.add(layout.y_size); + scrap::NV12ToARGB( + black_box(y), + layout.stride_y as _, + uv, + layout.stride_uv as _, + dst.as_mut_ptr(), + dst_stride as _, + w as _, + h as _, + ); + }); + }); + } + group.finish(); +} + +// --------------------------------------------------------------------------- +// NV12 → ABGR +// --------------------------------------------------------------------------- + +fn bench_nv12_to_abgr(c: &mut Criterion) { + let mut group = c.benchmark_group("nv12_to_abgr"); + + for &(w, h, label) in RESOLUTIONS { + let (frame, layout) = make_nv12(w, h); + let dst_stride = w * 4; + + group.throughput(Throughput::Bytes((w * h * 4) as u64)); + group.bench_with_input(BenchmarkId::from_parameter(label), &(), |b, _| { + let mut dst = vec![0u8; dst_stride * h]; + b.iter(|| unsafe { + let y = frame.as_ptr(); + let uv = y.add(layout.y_size); + scrap::NV12ToABGR( + black_box(y), + layout.stride_y as _, + uv, + layout.stride_uv as _, + dst.as_mut_ptr(), + dst_stride as _, + w as _, + h as _, + ); + }); + }); + } + group.finish(); +} + +criterion_group!( + benches, + bench_i420_to_argb, + bench_i420_to_abgr, + bench_i444_to_argb, + bench_nv12_to_argb, + bench_nv12_to_abgr, +); +criterion_main!(benches); From cfb41c13585fe91d2dd7dfde88bad6150d368849 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9mence=20Lesn=C3=A9?= Date: Wed, 8 Apr 2026 18:58:00 +0200 Subject: [PATCH 2/4] fix: address CodeRabbit review findings - Add checkout step before local composite action (GitHub Actions requires repo on disk to read action.yml) - Use PR base SHA for path detection in multi-commit PRs - Fail benchmarks loudly on decode errors (.expect instead of let _ =) - Document intentional encode error handling (codec may drop frames) Co-Authored-By: Claude Opus 4.6 (1M context) --- .github/actions/setup-linux/action.yml | 5 ----- .github/workflows/_bench.yml | 4 ++++ .github/workflows/ci.yml | 13 +++++++++++-- libs/scrap/benches/codec_decode.rs | 18 +++++++++--------- libs/scrap/benches/codec_encode.rs | 16 ++++++++++------ libs/scrap/benches/pipeline_decode.rs | 10 +++++----- 6 files changed, 39 insertions(+), 27 deletions(-) diff --git a/.github/actions/setup-linux/action.yml b/.github/actions/setup-linux/action.yml index 98d59df00..befbb8abb 100644 --- a/.github/actions/setup-linux/action.yml +++ b/.github/actions/setup-linux/action.yml @@ -30,11 +30,6 @@ runs: core.exportVariable('ACTIONS_CACHE_URL', process.env.ACTIONS_CACHE_URL || ''); core.exportVariable('ACTIONS_RUNTIME_TOKEN', process.env.ACTIONS_RUNTIME_TOKEN || ''); - - name: Checkout source code - uses: actions/checkout@v4 - with: - submodules: recursive - - name: Install prerequisites shell: bash run: | diff --git a/.github/workflows/_bench.yml b/.github/workflows/_bench.yml index 231aeb1d7..375f5d93c 100644 --- a/.github/workflows/_bench.yml +++ b/.github/workflows/_bench.yml @@ -16,6 +16,10 @@ jobs: name: Performance benchmarks runs-on: ubuntu-latest steps: + - uses: actions/checkout@v4 + with: + submodules: recursive + - uses: ./.github/actions/setup-linux - name: Install cargo-codspeed diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 67310ec1e..19119eeff 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -31,6 +31,10 @@ jobs: job: - { target: x86_64-unknown-linux-gnu, os: ubuntu-24.04 } steps: + - uses: actions/checkout@v4 + with: + submodules: recursive + - uses: ./.github/actions/setup-linux - name: Show version information (Rust, cargo, GCC) @@ -75,10 +79,15 @@ jobs: steps: - uses: actions/checkout@v4 with: - fetch-depth: 2 + fetch-depth: 0 - id: check run: | - CHANGED=$(git diff --name-only HEAD~1 HEAD -- libs/scrap/ libs/hbb_common/ 2>/dev/null || echo "") + if [ "${{ github.event_name }}" = "pull_request" ]; then + BASE="${{ github.event.pull_request.base.sha }}" + else + BASE="HEAD~1" + fi + CHANGED=$(git diff --name-only "$BASE" HEAD -- libs/scrap/ libs/hbb_common/ 2>/dev/null || echo "") echo "run=$( [ -n "$CHANGED" ] && echo true || echo false )" >> "$GITHUB_OUTPUT" benchmarks: diff --git a/libs/scrap/benches/codec_decode.rs b/libs/scrap/benches/codec_decode.rs index 7e0155830..959bbe4fe 100644 --- a/libs/scrap/benches/codec_decode.rs +++ b/libs/scrap/benches/codec_decode.rs @@ -91,13 +91,13 @@ fn bench_decode_single(c: &mut Criterion) { let mut idx = 0; b.iter(|| { let union = &unions[idx % unions.len()]; - let _ = decoder.handle_video_frame( + decoder.handle_video_frame( black_box(union), &mut rgb, &mut texture, &mut pixelbuffer, &mut chroma, - ); + ).expect("decode failed"); idx += 1; }); }); @@ -118,13 +118,13 @@ fn bench_decode_single(c: &mut Criterion) { let mut idx = 0; b.iter(|| { let union = &unions[idx % unions.len()]; - let _ = decoder.handle_video_frame( + decoder.handle_video_frame( black_box(union), &mut rgb, &mut texture, &mut pixelbuffer, &mut chroma, - ); + ).expect("decode failed"); idx += 1; }); }); @@ -145,13 +145,13 @@ fn bench_decode_single(c: &mut Criterion) { let mut idx = 0; b.iter(|| { let union = &unions[idx % unions.len()]; - let _ = decoder.handle_video_frame( + decoder.handle_video_frame( black_box(union), &mut rgb, &mut texture, &mut pixelbuffer, &mut chroma, - ); + ).expect("decode failed"); idx += 1; }); }); @@ -181,13 +181,13 @@ fn bench_vp9_decode_sequence(c: &mut Criterion) { group.bench_function(BenchmarkId::from_parameter("100frames_1080p"), |b| { b.iter(|| { for union in &unions { - let _ = decoder.handle_video_frame( + decoder.handle_video_frame( black_box(union), &mut rgb, &mut texture, &mut pixelbuffer, &mut chroma, - ); + ).expect("decode failed"); } }); }); @@ -216,7 +216,7 @@ fn bench_vp9_decode_4k(c: &mut Criterion) { let mut idx = 0; b.iter(|| { let union = &unions[idx % unions.len()]; - let _ = decoder.handle_video_frame( + decoder.handle_video_frame( black_box(union), &mut rgb, &mut texture, diff --git a/libs/scrap/benches/codec_encode.rs b/libs/scrap/benches/codec_encode.rs index e551c150e..a02bd2721 100644 --- a/libs/scrap/benches/codec_encode.rs +++ b/libs/scrap/benches/codec_encode.rs @@ -49,7 +49,8 @@ fn bench_vpx_encode_single(c: &mut Criterion) { let mut pts = 0i64; b.iter(|| { let input = EncodeInput::YUV(&yuv); - let _ = encoder.encode_to_message(input, pts); + // encode_to_message may return Err("no valid frame") when the codec drops a frame — this is normal +drop(encoder.encode_to_message(input, pts)); pts += 1; }); }); @@ -71,7 +72,8 @@ fn bench_vpx_encode_single(c: &mut Criterion) { let mut pts = 0i64; b.iter(|| { let input = EncodeInput::YUV(&yuv); - let _ = encoder.encode_to_message(input, pts); + // encode_to_message may return Err("no valid frame") when the codec drops a frame — this is normal +drop(encoder.encode_to_message(input, pts)); pts += 1; }); }); @@ -109,7 +111,8 @@ fn bench_encode_4k(c: &mut Criterion) { let mut pts = 0i64; b.iter(|| { let input = EncodeInput::YUV(black_box(&yuv)); - let _ = encoder.encode_to_message(input, pts); + // encode_to_message may return Err("no valid frame") when the codec drops a frame — this is normal +drop(encoder.encode_to_message(input, pts)); pts += 1; }); }); @@ -141,7 +144,7 @@ fn bench_vp9_encode_sequence_static(c: &mut Criterion) { b.iter(|| { for i in 0..100 { let input = EncodeInput::YUV(black_box(&yuv)); - let _ = encoder.encode_to_message(input, i); + drop(encoder.encode_to_message(input, i)); } }); }); @@ -174,7 +177,7 @@ fn bench_vp9_encode_sequence_movement(c: &mut Criterion) { b.iter(|| { for (i, yuv) in frames.iter().enumerate() { let input = EncodeInput::YUV(black_box(yuv)); - let _ = encoder.encode_to_message(input, i as i64); + drop(encoder.encode_to_message(input, i as i64)); } }); }); @@ -211,7 +214,8 @@ fn bench_vp9_encode_quality(c: &mut Criterion) { let mut pts = 0i64; b.iter(|| { let input = EncodeInput::YUV(black_box(&yuv)); - let _ = encoder.encode_to_message(input, pts); + // encode_to_message may return Err("no valid frame") when the codec drops a frame — this is normal +drop(encoder.encode_to_message(input, pts)); pts += 1; }); }); diff --git a/libs/scrap/benches/pipeline_decode.rs b/libs/scrap/benches/pipeline_decode.rs index 989390726..1f60e2678 100644 --- a/libs/scrap/benches/pipeline_decode.rs +++ b/libs/scrap/benches/pipeline_decode.rs @@ -83,13 +83,13 @@ fn bench_pipeline_decode_1080p(c: &mut Criterion) { // Step 1: Protobuf deserialize // Step 2+3: Decode + YUV→RGB via real Decoder::handle_video_frame if let Some(union) = extract_union(black_box(msg_bytes)) { - let _ = decoder.handle_video_frame( + decoder.handle_video_frame( &union, &mut rgb, &mut texture, &mut pixelbuffer, &mut chroma, - ); + ).expect("decode failed"); } idx += 1; @@ -121,13 +121,13 @@ fn bench_pipeline_decode_4k(c: &mut Criterion) { b.iter(|| { let msg_bytes = &messages[idx % messages.len()]; if let Some(union) = extract_union(black_box(msg_bytes)) { - let _ = decoder.handle_video_frame( + decoder.handle_video_frame( &union, &mut rgb, &mut texture, &mut pixelbuffer, &mut chroma, - ); + ).expect("decode failed"); } idx += 1; black_box(rgb.raw.len()) @@ -160,7 +160,7 @@ fn bench_pipeline_decode_sequence(c: &mut Criterion) { b.iter(|| { for msg_bytes in &messages { if let Some(union) = extract_union(msg_bytes) { - let _ = decoder.handle_video_frame( + decoder.handle_video_frame( &union, &mut rgb, &mut texture, From 3427d2f34f1995e99e2ae6b7f0c0e56f8728098a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9mence=20Lesn=C3=A9?= Date: Wed, 8 Apr 2026 19:35:00 +0200 Subject: [PATCH 3/4] fix: address all benchmark accuracy issues from peer review MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Critical fixes: - Encoder single-frame benchmarks now alternate 8 distinct frames to avoid rc_dropframe_thresh=25 dropping frames on identical input (previous results measured Err("no valid frame"), not actual encoding) - Added keyframe-only benchmark (keyframe_interval: Some(1)) for worst-case encode cost measurement - pipeline_encode: wrap write_to_bytes() in black_box to prevent LLVM dead-code elimination of the serialization step - pipeline_encode sequence: use write_to_bytes() instead of compute_size() for consistency with single-frame benchmarks Measurement fixes: - mutex_contention: persistent threads + Barrier instead of thread::spawn inside b.iter() (spawn overhead was dominating lock cost) - mutex_contention: Throughput::Elements(4000) for 4×1000 ops - video_queue: pre-allocate ArrayQueue and VideoFrame outside hot loop - pipeline_decode: use .take() instead of .clone() on Union (matches real code path which moves the VideoFrame) New benchmarks: - decode_raw: VP9 decode without YUV→RGB conversion (isolates codec cost) - decode_alignment: align=1 vs align=64 (macOS texture rendering) - encoder_cold_start: Encoder::new() cost (VP8/VP9/AV1) - decoder_cold_start: Decoder::new() cost (VP8/VP9/AV1) Co-Authored-By: Claude Opus 4.6 (1M context) --- libs/scrap/benches/codec_decode.rs | 94 +++++++++++++- libs/scrap/benches/codec_encode.rs | 134 ++++++++++++++++---- libs/scrap/benches/mutex_contention.rs | 168 ++++++++++++++++--------- libs/scrap/benches/pipeline_decode.rs | 8 +- libs/scrap/benches/pipeline_encode.rs | 19 ++- libs/scrap/benches/video_queue.rs | 10 +- 6 files changed, 335 insertions(+), 98 deletions(-) diff --git a/libs/scrap/benches/codec_decode.rs b/libs/scrap/benches/codec_decode.rs index 959bbe4fe..683761a29 100644 --- a/libs/scrap/benches/codec_decode.rs +++ b/libs/scrap/benches/codec_decode.rs @@ -9,7 +9,8 @@ use hbb_common::{ message_proto::{video_frame, Chroma, EncodedVideoFrame, EncodedVideoFrames}, }; use scrap::{ - codec::Decoder, CodecFormat, ImageFormat, ImageRgb, ImageTexture, VpxVideoCodecId, + codec::Decoder, CodecFormat, ImageFormat, ImageRgb, ImageTexture, VpxDecoder, + VpxDecoderConfig, VpxVideoCodecId, }; use std::time::Duration; @@ -160,6 +161,44 @@ fn bench_decode_single(c: &mut Criterion) { group.finish(); } +// --------------------------------------------------------------------------- +// Decode with align=1 vs align=64 (macOS texture rendering uses 64) +// --------------------------------------------------------------------------- + +fn bench_vp9_decode_alignment(c: &mut Criterion) { + let mut group = c.benchmark_group("decode_alignment"); + + let encoded = pre_encode_vpx(VpxVideoCodecId::VP9, W, H, 1.0, 30); + let unions = make_union_vp9(&encoded); + + for (label, align) in [("align_1", 1usize), ("align_64", 64)] { + let mut decoder = Decoder::new(CodecFormat::VP9, None); + let mut rgb = ImageRgb::new(ImageFormat::ARGB, align); + let mut texture = ImageTexture::default(); + let mut pixelbuffer = true; + let mut chroma: Option = None; + + group.throughput(Throughput::Elements(1)); + group.bench_with_input(BenchmarkId::from_parameter(label), &(), |b, _| { + let mut idx = 0; + b.iter(|| { + let union = &unions[idx % unions.len()]; + decoder + .handle_video_frame( + black_box(union), + &mut rgb, + &mut texture, + &mut pixelbuffer, + &mut chroma, + ) + .expect("decode failed"); + idx += 1; + }); + }); + } + group.finish(); +} + // --------------------------------------------------------------------------- // Sequence decode: 100 frames (VP9) // --------------------------------------------------------------------------- @@ -229,9 +268,62 @@ fn bench_vp9_decode_4k(c: &mut Criterion) { group.finish(); } +// --------------------------------------------------------------------------- +// Cold-start: decoder creation cost (reconnection / codec switch) +// --------------------------------------------------------------------------- + +fn bench_decoder_cold_start(c: &mut Criterion) { + let mut group = c.benchmark_group("decoder_cold_start"); + + for (label, format) in [ + ("vp8_1080p", CodecFormat::VP8), + ("vp9_1080p", CodecFormat::VP9), + ("av1_1080p", CodecFormat::AV1), + ] { + group.throughput(Throughput::Elements(1)); + group.bench_function(BenchmarkId::from_parameter(label), |b| { + b.iter(|| { + black_box(Decoder::new(format, None)); + }); + }); + } + group.finish(); +} + +// --------------------------------------------------------------------------- +// Decode-only: VP9 without YUV→RGB conversion (isolates codec cost) +// --------------------------------------------------------------------------- + +fn bench_vp9_decode_raw(c: &mut Criterion) { + let mut group = c.benchmark_group("decode_raw"); + + let encoded = pre_encode_vpx(VpxVideoCodecId::VP9, W, H, 1.0, 30); + let mut decoder = + VpxDecoder::new(VpxDecoderConfig { codec: VpxVideoCodecId::VP9 }).unwrap(); + + group.throughput(Throughput::Elements(1)); + group.bench_function(BenchmarkId::from_parameter("vp9_1080p"), |b| { + let mut idx = 0; + b.iter(|| { + let frame = &encoded[idx % encoded.len()]; + for img in decoder.decode(black_box(&frame.data)).unwrap() { + black_box(&img); + } + for img in decoder.flush().unwrap() { + black_box(&img); + } + idx += 1; + }); + }); + group.finish(); +} + criterion_group!( benches, bench_decode_single, + bench_vp9_decode_alignment, + bench_decoder_cold_start, + bench_vp9_decode_raw, bench_vp9_decode_sequence, bench_vp9_decode_4k, ); diff --git a/libs/scrap/benches/codec_encode.rs b/libs/scrap/benches/codec_encode.rs index a02bd2721..107fa7265 100644 --- a/libs/scrap/benches/codec_encode.rs +++ b/libs/scrap/benches/codec_encode.rs @@ -14,16 +14,23 @@ use std::time::Duration; /// B. Video encode benchmarks. /// /// Calls the real `EncoderApi::encode_to_message()` — the same function used -/// by video_service.rs handle_one_frame(). This ensures any change to the -/// encode path (flush behavior, frame creation, etc.) is reflected here. +/// by video_service.rs handle_one_frame(). /// -/// Includes single-frame, sequence (static + movement), and quality variations. +/// Single-frame benchmarks alternate between multiple distinct frames to avoid +/// the encoder's rate controller dropping frames on identical input +/// (rc_dropframe_thresh=25 causes Err("no valid frame") on static content). const W: usize = 1920; const H: usize = 1080; +const NUM_FRAMES: usize = 8; + +/// Pre-generate a pool of distinct YUV frames for realistic encode benchmarks. +fn make_frame_pool(w: usize, h: usize, n: usize) -> Vec> { + (0..n).map(|i| make_i420(w, h, i * 37).0).collect() +} // --------------------------------------------------------------------------- -// Single-frame encode (VP8, VP9, AV1) +// Single-frame encode with varied input (VP8, VP9, AV1) // --------------------------------------------------------------------------- fn bench_vpx_encode_single(c: &mut Criterion) { @@ -42,15 +49,15 @@ fn bench_vpx_encode_single(c: &mut Criterion) { keyframe_interval: None, }); let mut encoder = VpxEncoder::new(cfg, false).unwrap(); - let (yuv, _) = make_i420(W, H, 0); + let pool = make_frame_pool(W, H, NUM_FRAMES); group.throughput(Throughput::Elements(1)); group.bench_with_input(BenchmarkId::from_parameter(label), &(), |b, _| { let mut pts = 0i64; b.iter(|| { - let input = EncodeInput::YUV(&yuv); - // encode_to_message may return Err("no valid frame") when the codec drops a frame — this is normal -drop(encoder.encode_to_message(input, pts)); + let yuv = &pool[pts as usize % pool.len()]; + let input = EncodeInput::YUV(black_box(yuv)); + drop(encoder.encode_to_message(input, pts)); pts += 1; }); }); @@ -65,15 +72,15 @@ drop(encoder.encode_to_message(input, pts)); keyframe_interval: None, }); let mut encoder = AomEncoder::new(cfg, false).unwrap(); - let (yuv, _) = make_i420(W, H, 0); + let pool = make_frame_pool(W, H, NUM_FRAMES); group.throughput(Throughput::Elements(1)); group.bench_with_input(BenchmarkId::from_parameter("av1_1080p"), &(), |b, _| { let mut pts = 0i64; b.iter(|| { - let input = EncodeInput::YUV(&yuv); - // encode_to_message may return Err("no valid frame") when the codec drops a frame — this is normal -drop(encoder.encode_to_message(input, pts)); + let yuv = &pool[pts as usize % pool.len()]; + let input = EncodeInput::YUV(black_box(yuv)); + drop(encoder.encode_to_message(input, pts)); pts += 1; }); }); @@ -83,7 +90,43 @@ drop(encoder.encode_to_message(input, pts)); } // --------------------------------------------------------------------------- -// 4K encode +// Keyframe-only encode (worst case — every frame is a keyframe) +// --------------------------------------------------------------------------- + +fn bench_vpx_encode_keyframe(c: &mut Criterion) { + let mut group = c.benchmark_group("encode_keyframe"); + + for codec in [VpxVideoCodecId::VP8, VpxVideoCodecId::VP9] { + let label = match codec { + VpxVideoCodecId::VP8 => "vp8_1080p", + VpxVideoCodecId::VP9 => "vp9_1080p", + }; + let cfg = EncoderCfg::VPX(VpxEncoderConfig { + width: W as _, + height: H as _, + quality: 1.0, + codec, + keyframe_interval: Some(1), // force keyframe every frame + }); + let mut encoder = VpxEncoder::new(cfg, false).unwrap(); + let pool = make_frame_pool(W, H, NUM_FRAMES); + + group.throughput(Throughput::Elements(1)); + group.bench_with_input(BenchmarkId::from_parameter(label), &(), |b, _| { + let mut pts = 0i64; + b.iter(|| { + let yuv = &pool[pts as usize % pool.len()]; + let input = EncodeInput::YUV(black_box(yuv)); + drop(encoder.encode_to_message(input, pts)); + pts += 1; + }); + }); + } + group.finish(); +} + +// --------------------------------------------------------------------------- +// 4K encode with varied input // --------------------------------------------------------------------------- fn bench_encode_4k(c: &mut Criterion) { @@ -104,15 +147,15 @@ fn bench_encode_4k(c: &mut Criterion) { keyframe_interval: None, }); let mut encoder = VpxEncoder::new(cfg, false).unwrap(); - let (yuv, _) = make_i420(w4k, h4k, 0); + let pool = make_frame_pool(w4k, h4k, 4); // fewer frames for 4K (memory) group.throughput(Throughput::Elements(1)); group.bench_with_input(BenchmarkId::from_parameter(label), &(), |b, _| { let mut pts = 0i64; b.iter(|| { - let input = EncodeInput::YUV(black_box(&yuv)); - // encode_to_message may return Err("no valid frame") when the codec drops a frame — this is normal -drop(encoder.encode_to_message(input, pts)); + let yuv = &pool[pts as usize % pool.len()]; + let input = EncodeInput::YUV(black_box(yuv)); + drop(encoder.encode_to_message(input, pts)); pts += 1; }); }); @@ -168,8 +211,6 @@ fn bench_vp9_encode_sequence_movement(c: &mut Criterion) { keyframe_interval: None, }); let mut encoder = VpxEncoder::new(cfg, false).unwrap(); - - // Pre-generate 100 frames with progressive shift let frames: Vec> = (0..100).map(|i| make_i420(W, H, i * 5).0).collect(); group.throughput(Throughput::Elements(100)); @@ -185,7 +226,7 @@ fn bench_vp9_encode_sequence_movement(c: &mut Criterion) { } // --------------------------------------------------------------------------- -// Quality ratio impact (VP9 1080p) +// Quality ratio impact (VP9 1080p, varied input) // --------------------------------------------------------------------------- fn bench_vp9_encode_quality(c: &mut Criterion) { @@ -197,7 +238,7 @@ fn bench_vp9_encode_quality(c: &mut Criterion) { ("q1.0_balanced", 1.0), ("q2.0_best", 2.0), ]; - let (yuv, _) = make_i420(W, H, 0); + let pool = make_frame_pool(W, H, NUM_FRAMES); for (label, quality) in qualities { let cfg = EncoderCfg::VPX(VpxEncoderConfig { @@ -213,9 +254,9 @@ fn bench_vp9_encode_quality(c: &mut Criterion) { group.bench_with_input(BenchmarkId::from_parameter(*label), &(), |b, _| { let mut pts = 0i64; b.iter(|| { - let input = EncodeInput::YUV(black_box(&yuv)); - // encode_to_message may return Err("no valid frame") when the codec drops a frame — this is normal -drop(encoder.encode_to_message(input, pts)); + let yuv = &pool[pts as usize % pool.len()]; + let input = EncodeInput::YUV(black_box(yuv)); + drop(encoder.encode_to_message(input, pts)); pts += 1; }); }); @@ -223,10 +264,55 @@ drop(encoder.encode_to_message(input, pts)); group.finish(); } +// --------------------------------------------------------------------------- +// Cold-start: encoder creation cost (reconnection / codec switch) +// --------------------------------------------------------------------------- + +fn bench_encoder_cold_start(c: &mut Criterion) { + let mut group = c.benchmark_group("encoder_cold_start"); + + for codec in [VpxVideoCodecId::VP8, VpxVideoCodecId::VP9] { + let label = match codec { + VpxVideoCodecId::VP8 => "vp8_1080p", + VpxVideoCodecId::VP9 => "vp9_1080p", + }; + group.throughput(Throughput::Elements(1)); + group.bench_function(BenchmarkId::from_parameter(label), |b| { + b.iter(|| { + let cfg = EncoderCfg::VPX(VpxEncoderConfig { + width: W as _, + height: H as _, + quality: 1.0, + codec, + keyframe_interval: None, + }); + black_box(VpxEncoder::new(cfg, false).unwrap()); + }); + }); + } + + { + group.bench_function(BenchmarkId::from_parameter("av1_1080p"), |b| { + b.iter(|| { + let cfg = EncoderCfg::AOM(AomEncoderConfig { + width: W as _, + height: H as _, + quality: 1.0, + keyframe_interval: None, + }); + black_box(AomEncoder::new(cfg, false).unwrap()); + }); + }); + } + group.finish(); +} + criterion_group!( benches, bench_vpx_encode_single, + bench_vpx_encode_keyframe, bench_encode_4k, + bench_encoder_cold_start, bench_vp9_encode_sequence_static, bench_vp9_encode_sequence_movement, bench_vp9_encode_quality, diff --git a/libs/scrap/benches/mutex_contention.rs b/libs/scrap/benches/mutex_contention.rs index 240b86866..d0681dce4 100644 --- a/libs/scrap/benches/mutex_contention.rs +++ b/libs/scrap/benches/mutex_contention.rs @@ -1,18 +1,18 @@ use criterion::{black_box, criterion_group, criterion_main, BenchmarkId, Criterion, Throughput}; use std::collections::HashMap; -use std::sync::{Arc, Mutex, RwLock}; -use std::sync::atomic::{AtomicU32, Ordering}; +use std::sync::atomic::{AtomicBool, AtomicU32, Ordering}; +use std::sync::{Arc, Barrier, Mutex, RwLock}; use std::thread; /// H. Mutex contention benchmarks (simulated patterns). /// -/// Reproduces the locking patterns from video_service.rs hot loop: -/// - HashMap behind Mutex (current pattern for subscribers/connections) -/// - Comparison with RwLock and Atomic alternatives -/// -/// No RustDesk-specific types needed — pure synchronization primitives. +/// Reproduces the locking patterns from video_service.rs hot loop. +/// Multi-threaded benchmarks use persistent threads + Barrier to avoid +/// measuring thread::spawn/join overhead (~40-200µs per iteration). const ENTRIES: usize = 5; +const OPS_PER_THREAD: usize = 1000; +const NUM_THREADS: usize = 4; fn make_map() -> HashMap { (0..ENTRIES as i32).map(|i| (i, i as u64 * 100)).collect() @@ -39,88 +39,146 @@ fn bench_mutex_single_thread(c: &mut Criterion) { } // --------------------------------------------------------------------------- -// Mutex vs RwLock: 4 reader threads concurrent +// Mutex vs RwLock: 4 reader threads concurrent (persistent threads) // --------------------------------------------------------------------------- fn bench_mutex_vs_rwlock_readers(c: &mut Criterion) { let mut group = c.benchmark_group("lock_4readers"); group.measurement_time(std::time::Duration::from_secs(10)); + let total_ops = NUM_THREADS * OPS_PER_THREAD; + group.throughput(Throughput::Elements(total_ops as u64)); + // Mutex { let m = Arc::new(Mutex::new(make_map())); - group.throughput(Throughput::Elements(1)); + let barrier = Arc::new(Barrier::new(NUM_THREADS + 1)); + let stop = Arc::new(AtomicBool::new(false)); + + let handles: Vec<_> = (0..NUM_THREADS) + .map(|_| { + let m = m.clone(); + let barrier = barrier.clone(); + let stop = stop.clone(); + thread::spawn(move || loop { + barrier.wait(); + if stop.load(Ordering::Relaxed) { + return; + } + for _ in 0..OPS_PER_THREAD { + let guard = m.lock().unwrap(); + black_box(guard.get(&0).copied()); + } + barrier.wait(); + }) + }) + .collect(); + group.bench_function(BenchmarkId::from_parameter("mutex"), |b| { b.iter(|| { - let mut handles = Vec::new(); - for _ in 0..4 { - let m = m.clone(); - handles.push(thread::spawn(move || { - for _ in 0..1000 { - let guard = m.lock().unwrap(); - black_box(guard.get(&0).copied()); - } - })); - } - for h in handles { - h.join().unwrap(); - } + barrier.wait(); // start workers + barrier.wait(); // wait for completion }); }); + + stop.store(true, Ordering::Relaxed); + barrier.wait(); + for h in handles { + h.join().unwrap(); + } } // RwLock { let m = Arc::new(RwLock::new(make_map())); - group.throughput(Throughput::Elements(1)); + let barrier = Arc::new(Barrier::new(NUM_THREADS + 1)); + let stop = Arc::new(AtomicBool::new(false)); + + let handles: Vec<_> = (0..NUM_THREADS) + .map(|_| { + let m = m.clone(); + let barrier = barrier.clone(); + let stop = stop.clone(); + thread::spawn(move || loop { + barrier.wait(); + if stop.load(Ordering::Relaxed) { + return; + } + for _ in 0..OPS_PER_THREAD { + let guard = m.read().unwrap(); + black_box(guard.get(&0).copied()); + } + barrier.wait(); + }) + }) + .collect(); + group.bench_function(BenchmarkId::from_parameter("rwlock"), |b| { b.iter(|| { - let mut handles = Vec::new(); - for _ in 0..4 { - let m = m.clone(); - handles.push(thread::spawn(move || { - for _ in 0..1000 { - let guard = m.read().unwrap(); - black_box(guard.get(&0).copied()); - } - })); - } - for h in handles { - h.join().unwrap(); - } + barrier.wait(); + barrier.wait(); }); }); + + stop.store(true, Ordering::Relaxed); + barrier.wait(); + for h in handles { + h.join().unwrap(); + } } group.finish(); } // --------------------------------------------------------------------------- -// Mutex: 4 writer threads concurrent +// Mutex: 4 writer threads concurrent (persistent threads) // --------------------------------------------------------------------------- fn bench_mutex_writers(c: &mut Criterion) { let mut group = c.benchmark_group("mutex_4writers"); group.measurement_time(std::time::Duration::from_secs(10)); + let total_ops = NUM_THREADS * OPS_PER_THREAD; + group.throughput(Throughput::Elements(total_ops as u64)); + let m = Arc::new(Mutex::new(make_map())); - group.throughput(Throughput::Elements(1)); + let barrier = Arc::new(Barrier::new(NUM_THREADS + 1)); + let stop = Arc::new(AtomicBool::new(false)); + + let handles: Vec<_> = (0..NUM_THREADS) + .map(|t| { + let m = m.clone(); + let barrier = barrier.clone(); + let stop = stop.clone(); + thread::spawn(move || { + let mut i = 0u64; + loop { + barrier.wait(); + if stop.load(Ordering::Relaxed) { + return; + } + for _ in 0..OPS_PER_THREAD { + let mut guard = m.lock().unwrap(); + guard.insert(0, t as u64 * 1000 + i); + i += 1; + } + barrier.wait(); + } + }) + }) + .collect(); + group.bench_function("lock_write_unlock", |b| { b.iter(|| { - let mut handles = Vec::new(); - for t in 0..4u64 { - let m = m.clone(); - handles.push(thread::spawn(move || { - for i in 0..1000 { - let mut guard = m.lock().unwrap(); - guard.insert(0, t * 1000 + i); - } - })); - } - for h in handles { - h.join().unwrap(); - } + barrier.wait(); + barrier.wait(); }); }); + + stop.store(true, Ordering::Relaxed); + barrier.wait(); + for h in handles { + h.join().unwrap(); + } group.finish(); } @@ -133,12 +191,10 @@ fn bench_atomic_vs_mutex_u32(c: &mut Criterion) { // AtomicU32 { - let v = Arc::new(AtomicU32::new(30)); + let v = AtomicU32::new(30); group.throughput(Throughput::Elements(1)); group.bench_function(BenchmarkId::from_parameter("atomic_read"), |b| { - b.iter(|| { - black_box(v.load(Ordering::Relaxed)) - }); + b.iter(|| black_box(v.load(Ordering::Relaxed))); }); group.bench_function(BenchmarkId::from_parameter("atomic_write"), |b| { @@ -152,7 +208,7 @@ fn bench_atomic_vs_mutex_u32(c: &mut Criterion) { // Mutex { - let v = Arc::new(Mutex::new(30u32)); + let v = Mutex::new(30u32); group.bench_function(BenchmarkId::from_parameter("mutex_read"), |b| { b.iter(|| { let guard = v.lock().unwrap(); diff --git a/libs/scrap/benches/pipeline_decode.rs b/libs/scrap/benches/pipeline_decode.rs index 1f60e2678..1b3e2258b 100644 --- a/libs/scrap/benches/pipeline_decode.rs +++ b/libs/scrap/benches/pipeline_decode.rs @@ -52,11 +52,11 @@ fn make_serialized_messages( .collect() } -/// Extract the video_frame::Union from a serialized Message. +/// Extract the video_frame::Union from a serialized Message by move (no clone). +/// Matches the real code path in io_loop.rs where the VideoFrame is consumed. fn extract_union(msg_bytes: &[u8]) -> Option { - let msg = Message::parse_from_bytes(msg_bytes).ok()?; - let vf = msg.video_frame(); - vf.union.clone() + let mut msg = Message::parse_from_bytes(msg_bytes).ok()?; + msg.take_video_frame().union.take() } // --------------------------------------------------------------------------- diff --git a/libs/scrap/benches/pipeline_encode.rs b/libs/scrap/benches/pipeline_encode.rs index ea0247f5e..06ec3d777 100644 --- a/libs/scrap/benches/pipeline_encode.rs +++ b/libs/scrap/benches/pipeline_encode.rs @@ -72,10 +72,8 @@ fn bench_pipeline_encode_1080p(c: &mut Criterion) { if let Ok(vf) = vf { let mut msg = Message::new(); msg.set_video_frame(vf); - msg.write_to_bytes().unwrap() - } else { - Vec::new() - }; + black_box(msg.write_to_bytes().unwrap()); + } pts += 1; }); }); @@ -128,14 +126,11 @@ fn bench_pipeline_encode_4k(c: &mut Criterion) { } let input = EncodeInput::YUV(&yuv); - let vf = encoder.encode_to_message(input, pts); - if let Ok(vf) = vf { + if let Ok(vf) = encoder.encode_to_message(input, pts) { let mut msg = Message::new(); msg.set_video_frame(vf); - msg.write_to_bytes().unwrap() - } else { - Vec::new() - }; + black_box(msg.write_to_bytes().unwrap()); + } pts += 1; }); }); @@ -196,7 +191,9 @@ fn bench_pipeline_encode_sequence(c: &mut Criterion) { let input = EncodeInput::YUV(&yuv); if let Ok(vf) = encoder.encode_to_message(input, pts as i64) { - total_output_bytes += vf.compute_size() as usize; + let mut msg = Message::new(); + msg.set_video_frame(vf); + total_output_bytes += black_box(msg.write_to_bytes().unwrap()).len(); } } black_box(total_output_bytes) diff --git a/libs/scrap/benches/video_queue.rs b/libs/scrap/benches/video_queue.rs index 29d4599c0..e7a6a1d53 100644 --- a/libs/scrap/benches/video_queue.rs +++ b/libs/scrap/benches/video_queue.rs @@ -41,11 +41,14 @@ fn bench_push(c: &mut Criterion) { group.throughput(Throughput::Elements(QUEUE_CAP as u64)); group.bench_function(BenchmarkId::from_parameter("120_frames"), |b| { + // Pre-allocate queue once, drain+refill each iteration + let q = ArrayQueue::new(QUEUE_CAP); b.iter(|| { - let q = ArrayQueue::new(QUEUE_CAP); for f in &frames { let _ = q.push(black_box(f.clone())); } + // Drain for next iteration + while q.pop().is_some() {} }); }); group.finish(); @@ -85,6 +88,9 @@ fn bench_pop(c: &mut Criterion) { fn bench_force_push(c: &mut Criterion) { let mut group = c.benchmark_group("video_queue_force_push"); + // Pre-allocate the frame outside the hot loop to isolate queue cost + let frame = make_video_frame(999); + group.throughput(Throughput::Elements(1)); group.bench_function(BenchmarkId::from_parameter("full_queue"), |b| { let q = ArrayQueue::new(QUEUE_CAP); @@ -93,7 +99,7 @@ fn bench_force_push(c: &mut Criterion) { } b.iter(|| { // Real code: io_loop.rs:1310 uses video_queue.force_push(vf) - black_box(q.force_push(black_box(make_video_frame(999)))) + black_box(q.force_push(black_box(frame.clone()))) }); }); group.finish(); From 98acd06c7f332dd10973e398ef1143d647da783c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9mence=20Lesn=C3=A9?= Date: Wed, 8 Apr 2026 19:46:37 +0200 Subject: [PATCH 4/4] fix(bench): use force_push instead of push in video_queue bench_push Production code (io_loop.rs:1310) always uses force_push which drops the oldest frame when the queue is full. The benchmark was using push() which silently fails on a full queue. Co-Authored-By: Claude Opus 4.6 (1M context) --- libs/scrap/benches/video_queue.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/libs/scrap/benches/video_queue.rs b/libs/scrap/benches/video_queue.rs index e7a6a1d53..2367df53c 100644 --- a/libs/scrap/benches/video_queue.rs +++ b/libs/scrap/benches/video_queue.rs @@ -45,7 +45,8 @@ fn bench_push(c: &mut Criterion) { let q = ArrayQueue::new(QUEUE_CAP); b.iter(|| { for f in &frames { - let _ = q.push(black_box(f.clone())); + // Real code uses force_push (io_loop.rs:1310) + q.force_push(black_box(f.clone())); } // Drain for next iteration while q.pop().is_some() {}