diff --git a/.github/workflows/paddle_wheel.yaml b/.github/workflows/paddle_wheel.yaml index b7423f263..7b330bf8f 100644 --- a/.github/workflows/paddle_wheel.yaml +++ b/.github/workflows/paddle_wheel.yaml @@ -20,15 +20,61 @@ defaults: run: shell: bash -l -eo pipefail {0} +env: + PADDLECODEC_TEST_VIDEO_URL: https://paddlenlp.bj.bcebos.com/datasets/paddlemix/demo_video/example_video.mp4 + PADDLECODEC_TEST_VIDEO_CACHE_KEY: paddlecodec-test-video-v1-example-video + PADDLECODEC_TEST_VIDEO_PATH: .github/test-assets/example_video.mp4 + jobs: - build-paddlecodec-wheel: + prepare-test-video: runs-on: ubuntu-latest + name: Prepare cached Paddle test video + steps: + - name: Restore cached test video + id: cache-test-video + uses: actions/cache@v4 + with: + path: ${{ env.PADDLECODEC_TEST_VIDEO_PATH }} + key: ${{ env.PADDLECODEC_TEST_VIDEO_CACHE_KEY }} + + - name: Download test video + if: steps.cache-test-video.outputs.cache-hit != 'true' + run: | + mkdir -p "$(dirname "${PADDLECODEC_TEST_VIDEO_PATH}")" + curl --fail --location --retry 5 --retry-all-errors \ + --output "${PADDLECODEC_TEST_VIDEO_PATH}" \ + "${PADDLECODEC_TEST_VIDEO_URL}" + + - name: Upload cached test video artifact + uses: actions/upload-artifact@v5 + with: + name: paddlecodec-test-video + path: ${{ env.PADDLECODEC_TEST_VIDEO_PATH }} + if-no-files-found: error + + build-paddlecodec-wheel: + name: Build and upload Paddle wheel (${{ matrix.arch-name }}, py${{ matrix.python-version }}) + runs-on: ${{ matrix.runner }} container: - image: pytorch/manylinux2_28-builder:cpu + image: ${{ matrix.container-image }} strategy: fail-fast: false matrix: python-version: ["3.10", "3.11", "3.12", "3.13"] + arch: ["x86_64", "arm64"] + include: + - arch: x86_64 + arch-name: x86_64 + runner: ubuntu-latest + container-image: pytorch/manylinux2_28-builder:cpu + artifact-prefix: paddlecodec-wheel-linux + wheel-platform: manylinux_2_28_x86_64 + - arch: arm64 + arch-name: arm64 + runner: ubuntu-24.04-arm + container-image: pytorch/manylinux2_28_aarch64-builder:cpu-aarch64 + artifact-prefix: paddlecodec-wheel-linux-arm64 + wheel-platform: manylinux_2_28_aarch64 permissions: id-token: write contents: read @@ -59,8 +105,8 @@ jobs: - name: Build wheel run: | - # Use pre-built FFmpeg from PyTorch S3 export BUILD_AGAINST_ALL_FFMPEG_FROM_S3=1 + export I_CONFIRM_THIS_IS_NOT_A_LICENSE_VIOLATION=1 export TORCHCODEC_CMAKE_BUILD_DIR=$(pwd)/build_cmake python -m build --wheel -vvv --no-isolation @@ -68,29 +114,18 @@ jobs: run: | pip install auditwheel - # 1. Extract internal libraries from the wheel to a temporary directory - # This allows auditwheel to find them when checking dependencies mkdir -p temp_libs unzip -j dist/*.whl "torchcodec/*.so" -d temp_libs || true - # 2. Prepare LD_LIBRARY_PATH - # FFmpeg libraries - FFMPEG_LIB_PATHS=$(find $(pwd)/build_cmake/_deps -type d -name "lib" | tr '\n' ':') - # PaddlePaddle libraries + FFMPEG_LIB_PATHS=$(find "$(pwd)/build_cmake/_deps" -type d -name "lib" -print | paste -sd: -) PADDLE_PATH=$(python -c "import paddle; print(paddle.__path__[0])") PADDLE_LIB_PATHS="$PADDLE_PATH/base:$PADDLE_PATH/libs" - # Wheel internal libraries INTERNAL_LIB_PATH=$(pwd)/temp_libs - export LD_LIBRARY_PATH=${FFMPEG_LIB_PATHS}${PADDLE_LIB_PATHS}:${INTERNAL_LIB_PATH}:${LD_LIBRARY_PATH} + export LD_LIBRARY_PATH=${FFMPEG_LIB_PATHS}:${PADDLE_LIB_PATHS}:${INTERNAL_LIB_PATH}:${LD_LIBRARY_PATH} - # 3. Repair wheel with auditwheel - # We exclude all external libraries because we want to rely on system libraries (like FFmpeg) - # or libraries provided by other packages (like PaddlePaddle). - # auditwheel 6.1.0+ supports wildcards in --exclude. - auditwheel repair dist/*.whl --plat manylinux_2_28_x86_64 -w wheelhouse/ --exclude "*" + auditwheel repair dist/*.whl --plat ${{ matrix.wheel-platform }} -w wheelhouse/ --exclude "*" - # Cleanup rm -rf temp_libs rm dist/*.whl mv wheelhouse/*.whl dist/ @@ -99,7 +134,7 @@ jobs: - name: Upload wheel artifact uses: actions/upload-artifact@v5 with: - name: paddlecodec-wheel-linux-py${{ matrix.python-version }} + name: ${{ matrix.artifact-prefix }}-py${{ matrix.python-version }} path: dist/*.whl - name: Run post-build script @@ -113,31 +148,49 @@ jobs: unzip -l $wheel_path test-paddlecodec-wheel: - needs: build-paddlecodec-wheel - runs-on: ubuntu-latest + name: Install and test Paddle wheel (${{ matrix.arch-name }}, py${{ matrix.python-version }}, ffmpeg ${{ matrix.ffmpeg-version }}) + needs: [prepare-test-video, build-paddlecodec-wheel] + runs-on: ${{ matrix.runner }} + container: + image: ${{ matrix.container-image }} + env: + PADDLECODEC_TEST_VIDEO: .github/test-assets/example_video.mp4 strategy: fail-fast: false matrix: python-version: ["3.10", "3.11", "3.12", "3.13"] + arch: ["x86_64", "arm64"] # FFmpeg 8.0 depends on libopenvino.so.2520, PaddlePaddle CPU depends on libopenvino.so.2500 # There has some conflict causing test failures, but it works with PaddlePaddle GPU. # We skip FFmpeg 8.0 tests for PaddlePaddle CPU builds for now. ffmpeg-version: ["4.4.2", "5.1.2", "6.1.1", "7.0.1"] + include: + - arch: x86_64 + arch-name: x86_64 + runner: ubuntu-latest + container-image: pytorch/manylinux2_28-builder:cpu + artifact-prefix: paddlecodec-wheel-linux + - arch: arm64 + arch-name: arm64 + runner: ubuntu-24.04-arm + container-image: pytorch/manylinux2_28_aarch64-builder:cpu-aarch64 + artifact-prefix: paddlecodec-wheel-linux-arm64 steps: - name: Checkout repository uses: actions/checkout@v6 - - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v5 - with: - python-version: ${{ matrix.python-version }} - - name: Download wheel artifact uses: actions/download-artifact@v4 with: - name: paddlecodec-wheel-linux-py${{ matrix.python-version }} + name: ${{ matrix.artifact-prefix }}-py${{ matrix.python-version }} path: dist/ + - name: Download cached test video artifact + uses: actions/download-artifact@v4 + with: + name: paddlecodec-test-video + path: .github/test-assets/ + - name: Install FFmpeg via conda uses: conda-incubator/setup-miniconda@v3 with: @@ -167,7 +220,6 @@ jobs: - name: Delete src folder run: | - # Delete src/ to ensure we're testing the installed wheel, not source code rm -rf src/ ls -la @@ -177,18 +229,17 @@ jobs: publish-pypi: runs-on: ubuntu-latest - name: Publish to PyPI + name: Publish Paddle wheels to PyPI if: "startsWith(github.ref, 'refs/tags/')" needs: - test-paddlecodec-wheel permissions: id-token: write - steps: - name: Retrieve release distributions uses: actions/download-artifact@v6 with: - pattern: paddlecodec-wheel-linux-* + pattern: paddlecodec-wheel-linux* path: dist/ merge-multiple: true @@ -197,7 +248,7 @@ jobs: publish-release: runs-on: ubuntu-latest - name: Publish to GitHub + name: Publish Paddle wheels to GitHub if: "startsWith(github.ref, 'refs/tags/')" needs: - test-paddlecodec-wheel @@ -206,7 +257,7 @@ jobs: steps: - uses: actions/download-artifact@v6 with: - pattern: paddlecodec-wheel-linux-* + pattern: paddlecodec-wheel-linux* path: dist/ merge-multiple: true - name: Get tag name diff --git a/src/torchcodec/_core/fetch_and_expose_non_gpl_ffmpeg_libs.cmake b/src/torchcodec/_core/fetch_and_expose_non_gpl_ffmpeg_libs.cmake index 07abd2e87..781fce5a0 100644 --- a/src/torchcodec/_core/fetch_and_expose_non_gpl_ffmpeg_libs.cmake +++ b/src/torchcodec/_core/fetch_and_expose_non_gpl_ffmpeg_libs.cmake @@ -23,31 +23,59 @@ set( if (LINUX) set(lib_dir "lib") - set( - platform_url - ${base_url}/linux_x86_64 - ) + if (CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64|arm64|ARM64") + set( + platform_url + ${base_url}/linux_aarch64 + ) - set( - f4_sha256 - 1a083f1922443bedb5243d04896383b8c606778a7ddb9d886c8303e55339fe0c - ) - set( - f5_sha256 - 65d6ad54082d94dcb3f801d73df2265e0e1bb303c7afbce7723e3b77ccd0e207 - ) - set( - f6_sha256 - 8bd5939c2f4a4b072e837e7870c13fe7d13824e5ff087ab534e4db4e90b7be9c - ) - set( - f7_sha256 - 1cb946d8b7c6393c2c3ebe1f900b8de7a2885fe614c45d4ec32c9833084f2f26 - ) - set( - f8_sha256 - c55b3c1a4b5e4d5fdd7c632bea3ab6f45b4e37cc8e0999dda3f84a8ed8defad8 - ) + set( + f4_sha256 + a310a2ed9ffe555fd3278dae15065541098dd35e124564671dcda6a6620ac842 + ) + set( + f5_sha256 + 89ca7996bccbc2db49adaa401d20fdbabffe0e1b4e07a0f81d6b143e858b7c8d + ) + set( + f6_sha256 + ae44c67b4587d061b8e9cc8990ca891ee013fe52ad79e5016ba29871562621da + ) + set( + f7_sha256 + 948e2cac66ca6f68ff526d5e84138e94bce0f1a7c83f502d15d85d0bd3ddc112 + ) + set( + f8_sha256 + b9cfd99ae75a14e58300854967d4dc49de0b3daa551df51ea1f52a3f08d2c8af + ) + else() + set( + platform_url + ${base_url}/linux_x86_64 + ) + + set( + f4_sha256 + 1a083f1922443bedb5243d04896383b8c606778a7ddb9d886c8303e55339fe0c + ) + set( + f5_sha256 + 65d6ad54082d94dcb3f801d73df2265e0e1bb303c7afbce7723e3b77ccd0e207 + ) + set( + f6_sha256 + 8bd5939c2f4a4b072e837e7870c13fe7d13824e5ff087ab534e4db4e90b7be9c + ) + set( + f7_sha256 + 1cb946d8b7c6393c2c3ebe1f900b8de7a2885fe614c45d4ec32c9833084f2f26 + ) + set( + f8_sha256 + c55b3c1a4b5e4d5fdd7c632bea3ab6f45b4e37cc8e0999dda3f84a8ed8defad8 + ) + endif() set( f4_library_file_names libavutil.so.56 diff --git a/test_paddle/test_video_decode.py b/test_paddle/test_video_decode.py index 4e41cb19e..240944cc2 100644 --- a/test_paddle/test_video_decode.py +++ b/test_paddle/test_video_decode.py @@ -2,6 +2,7 @@ paddle.enable_compat(scope={"torchcodec"}) import pytest +import subprocess from dataclasses import dataclass, fields from io import BytesIO from typing import Callable, Mapping, Optional, Union @@ -11,6 +12,40 @@ import numpy as np +def ffmpeg_rgb_sum(video_path_or_url: str) -> int: + # Use the local FFmpeg build as the oracle because YUV->RGB conversion is + # architecture- and FFmpeg-build-dependent. + proc = subprocess.Popen( + [ + "ffmpeg", + "-v", + "error", + "-vsync", + "0", + "-i", + video_path_or_url, + "-f", + "rawvideo", + "-pix_fmt", + "rgb24", + "-", + ], + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + ) + assert proc.stdout is not None + assert proc.stderr is not None + + total = 0 + while chunk := proc.stdout.read(16 * 1024 * 1024): + total += sum(chunk) + + stderr = proc.stderr.read().decode("utf-8", "ignore") + if proc.wait() != 0: + raise RuntimeError(f"ffmpeg failed to decode video: {stderr}") + return total + + @dataclass class VideoMetadata(Mapping): total_num_frames: int @@ -169,9 +204,12 @@ def sample_indices_fn_func(metadata, **fn_kwargs): def test_video_decode(): - url = "https://paddlenlp.bj.bcebos.com/datasets/paddlemix/demo_video/example_video.mp4" - video, metadata = load_video(url, backend="torchcodec") - assert video.to(paddle.int64).sum().item() == 247759890390 + video_path = os.getenv( + "PADDLECODEC_TEST_VIDEO", + "https://paddlenlp.bj.bcebos.com/datasets/paddlemix/demo_video/example_video.mp4", + ) + video, metadata = load_video(video_path, backend="torchcodec") + assert video.to(paddle.int64).sum().item() == ffmpeg_rgb_sum(video_path) assert metadata.total_num_frames == 263 assert metadata.fps == pytest.approx(29.99418249715141) assert metadata.width == 1920