diff --git a/.github/workflows/build_ffmpeg.yaml b/.github/workflows/build_ffmpeg.yaml
index 9b9317e3b..5494bf0b1 100644
--- a/.github/workflows/build_ffmpeg.yaml
+++ b/.github/workflows/build_ffmpeg.yaml
@@ -48,6 +48,33 @@ jobs:
mkdir -p "${artifact_dir}"
mv ffmpeg.tar.gz "${artifact_dir}/${FFMPEG_VERSION}.tar.gz"
+ LGPL-Linux-aarch64:
+ strategy:
+ fail-fast: false
+ matrix:
+ ffmpeg-version: ["4.4.4", "5.1.4", "6.1.1", "7.0.1", "8.0"]
+ uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
+ permissions:
+ id-token: write
+ contents: read
+ with:
+ job-name: Build
+ upload-artifact: ffmpeg-lgpl-linux_aarch64-${{ matrix.ffmpeg-version }}
+ repository: meta-pytorch/torchcodec
+ runner: linux.arm64.2xlarge
+ docker-image: pytorch/manylinux2_28_aarch64-builder:cpu-aarch64
+ script: |
+ export FFMPEG_VERSION="${{ matrix.ffmpeg-version }}"
+ export FFMPEG_ROOT="${PWD}/ffmpeg"
+
+ packaging/build_ffmpeg.sh
+
+ tar -cf ffmpeg.tar.gz ffmpeg/include ffmpeg/lib
+
+ artifact_dir="${RUNNER_ARTIFACT_DIR}/$(date +%Y-%m-%d)/linux_aarch64"
+ mkdir -p "${artifact_dir}"
+ mv ffmpeg.tar.gz "${artifact_dir}/${FFMPEG_VERSION}.tar.gz"
+
LGPL-macOS:
strategy:
fail-fast: false
diff --git a/.github/workflows/cpp_tests.yaml b/.github/workflows/cpp_tests.yaml
index e08d90754..9ea4f0591 100644
--- a/.github/workflows/cpp_tests.yaml
+++ b/.github/workflows/cpp_tests.yaml
@@ -22,7 +22,7 @@ jobs:
ffmpeg-version-for-tests: ['4.4.2', '5.1.2', '6.1.1', '7.0.1']
steps:
- name: Check out repo
- uses: actions/checkout@v3
+ uses: actions/checkout@v6
- name: Setup conda env
uses: conda-incubator/setup-miniconda@v3
with:
@@ -37,8 +37,7 @@ jobs:
- name: Update pip
run: python -m pip install --upgrade pip
- name: Install torch dependencies
- run: |
- python -m pip install --pre torch --index-url https://download.pytorch.org/whl/nightly/cpu
+ run: bash packaging/install_pytorch.sh cpu "torch"
- name: Install ffmpeg, pkg-config and pybind11
run: |
conda install "ffmpeg=${{ matrix.ffmpeg-version-for-tests }}" pkg-config pybind11 -c conda-forge
diff --git a/.github/workflows/docs.yaml b/.github/workflows/docs.yaml
deleted file mode 100644
index 0829246e0..000000000
--- a/.github/workflows/docs.yaml
+++ /dev/null
@@ -1,116 +0,0 @@
-name: Docs
-
-on:
- push:
- branches: [ main ]
- pull_request:
-
-permissions:
- id-token: write
- contents: write
-
-defaults:
- run:
- shell: bash -l -eo pipefail {0}
-
-jobs:
- generate-matrix:
- uses: pytorch/test-infra/.github/workflows/generate_binary_build_matrix.yml@main
- with:
- package-type: wheel
- os: linux
- test-infra-repository: pytorch/test-infra
- test-infra-ref: main
- with-cpu: disable
- with-xpu: disable
- with-rocm: disable
- with-cuda: enable
- build-python-only: "disable"
- build:
- needs: generate-matrix
- strategy:
- fail-fast: false
- name: Build and Upload wheel
- uses: pytorch/test-infra/.github/workflows/build_wheels_linux.yml@main
- with:
- repository: meta-pytorch/torchcodec
- ref: ""
- test-infra-repository: pytorch/test-infra
- test-infra-ref: main
- build-matrix: ${{ needs.generate-matrix.outputs.matrix }}
- pre-script: packaging/pre_build_script.sh
- post-script: packaging/post_build_script.sh
- smoke-test-script: packaging/fake_smoke_test.py
- package-name: torchcodec
- trigger-event: ${{ github.event_name }}
- build-platform: "python-build-package"
- build-command: "BUILD_AGAINST_ALL_FFMPEG_FROM_S3=1 ENABLE_CUDA=1 python -m build --wheel -vvv --no-isolation"
-
- build-docs:
- runs-on: linux.4xlarge.nvidia.gpu
- strategy:
- fail-fast: false
- matrix:
- # 3.10 corresponds to the minimum python version for which we build
- # the wheel unless the label cliflow/binaries/all is present in the
- # PR.
- python-version: ['3.10']
- cuda-version: ['12.6']
- ffmpeg-version-for-tests: ['7']
- container:
- image: "pytorch/manylinux2_28-builder:cuda${{ matrix.cuda-version }}"
- options: "--gpus all -e NVIDIA_DRIVER_CAPABILITIES=video,compute,utility"
- needs: build
- steps:
- - name: Setup env vars
- run: |
- cuda_version_without_periods=$(echo "${{ matrix.cuda-version }}" | sed 's/\.//g')
- echo cuda_version_without_periods=${cuda_version_without_periods} >> $GITHUB_ENV
- python_version_without_periods=$(echo "${{ matrix.python-version }}" | sed 's/\.//g')
- echo python_version_without_periods=${python_version_without_periods} >> $GITHUB_ENV
- - uses: actions/download-artifact@v4
- with:
- name: meta-pytorch_torchcodec__${{ matrix.python-version }}_cu${{ env.cuda_version_without_periods }}_x86_64
- path: pytorch/torchcodec/dist/
- - name: Setup miniconda using test-infra
- uses: pytorch/test-infra/.github/actions/setup-miniconda@main
- with:
- python-version: ${{ matrix.python-version }}
- # We install conda packages at the start because otherwise conda may have conflicts with dependencies.
- default-packages: "nvidia/label/cuda-${{ matrix.cuda-version }}.0::libnpp nvidia::cuda-nvrtc=${{ matrix.cuda-version }} nvidia::cuda-toolkit=${{ matrix.cuda-version }} nvidia::cuda-cudart=${{ matrix.cuda-version }} nvidia::cuda-driver-dev=${{ matrix.cuda-version }} conda-forge::ffmpeg=${{ matrix.ffmpeg-version-for-tests }}"
- - name: Check env
- run: |
- ${CONDA_RUN} env
- ${CONDA_RUN} conda info
- ${CONDA_RUN} nvidia-smi
- ${CONDA_RUN} conda list
- - name: Assert ffmpeg exists
- run: |
- ${CONDA_RUN} ffmpeg -buildconf
- - name: Update pip
- run: ${CONDA_RUN} python -m pip install --upgrade pip
- - name: Install PyTorch
- run: |
- ${CONDA_RUN} python -m pip install --pre torch torchvision --index-url https://download.pytorch.org/whl/nightly/cu${{ env.cuda_version_without_periods }}
- ${CONDA_RUN} python -c 'import torch; print(f"{torch.__version__}"); print(f"{torch.__file__}"); print(f"{torch.cuda.is_available()=}")'
- - name: Install torchcodec from the wheel
- run: |
- wheel_path=`find pytorch/torchcodec/dist -type f -name "*cu${{ env.cuda_version_without_periods }}-cp${{ env.python_version_without_periods }}*.whl"`
- echo Installing $wheel_path
- ${CONDA_RUN} python -m pip install $wheel_path -vvv
-
- - name: Check out repo
- uses: actions/checkout@v3
-
- - name: Install doc dependencies
- run: |
- cd docs
- ${CONDA_RUN} python -m pip install -r requirements.txt
- - name: Build docs
- run: |
- cd docs
- ${CONDA_RUN} make html
- - uses: actions/upload-artifact@v4
- with:
- name: Built-Docs
- path: docs/build/html/
diff --git a/.github/workflows/lint.yaml b/.github/workflows/lint.yaml
index c156a833c..84dc126f5 100644
--- a/.github/workflows/lint.yaml
+++ b/.github/workflows/lint.yaml
@@ -22,12 +22,12 @@ jobs:
python-version: ['3.12']
steps:
- name: Check out repo
- uses: actions/checkout@v3
+ uses: actions/checkout@v6
- name: Setup conda env
- uses: conda-incubator/setup-miniconda@v2
+ uses: conda-incubator/setup-miniconda@v3
with:
auto-update-conda: true
- miniconda-version: "latest"
+ miniforge-version: latest
activate-environment: test
python-version: ${{ matrix.python-version }}
- name: Update pip
@@ -50,19 +50,19 @@ jobs:
python-version: ['3.12']
steps:
- name: Check out repo
- uses: actions/checkout@v3
+ uses: actions/checkout@v6
- name: Setup conda env
- uses: conda-incubator/setup-miniconda@v2
+ uses: conda-incubator/setup-miniconda@v3
with:
auto-update-conda: true
- miniconda-version: "latest"
+ miniforge-version: latest
activate-environment: test
python-version: ${{ matrix.python-version }}
- name: Update pip
run: python -m pip install --upgrade pip
- name: Install dependencies and FFmpeg
run: |
- python -m pip install --pre torch --index-url https://download.pytorch.org/whl/nightly/cpu
+ bash packaging/install_pytorch.sh cpu "torch torchvision"
conda install "ffmpeg=7.0.1" pkg-config pybind11 -c conda-forge
ffmpeg -version
- name: Build and install torchcodec
diff --git a/.github/workflows/linux_cuda_wheel.yaml b/.github/workflows/linux_cuda_wheel.yaml
index 17f18fe8b..d8fd5ade9 100644
--- a/.github/workflows/linux_cuda_wheel.yaml
+++ b/.github/workflows/linux_cuda_wheel.yaml
@@ -1,4 +1,4 @@
-name: Build and test Linux CUDA wheels
+name: Build and test Linux CUDA wheels and docs
on:
pull_request:
@@ -84,10 +84,13 @@ jobs:
echo cuda_version_without_periods=${cuda_version_without_periods} >> $GITHUB_ENV
python_version_without_periods=$(echo "${{ matrix.python-version }}" | sed 's/\.//g')
echo python_version_without_periods=${python_version_without_periods} >> $GITHUB_ENV
- - uses: actions/download-artifact@v4
- with:
- name: meta-pytorch_torchcodec__${{ matrix.python-version }}_cu${{ env.cuda_version_without_periods }}_x86_64
- path: pytorch/torchcodec/dist/
+
+ - name: Check out repo
+ uses: actions/checkout@v6
+
+ - name: Remove src/ folder
+ run: bash packaging/remove_src.sh
+
- name: Setup miniconda using test-infra
uses: pytorch/test-infra/.github/actions/setup-miniconda@main
with:
@@ -95,12 +98,13 @@ jobs:
# We install conda packages at the start because otherwise conda may have conflicts with dependencies.
# Note: xorg-libxau was addded to fix a problem with ffmpeg 4. We should consider removing it.
default-packages: "nvidia/label/cuda-${{ matrix.cuda-version }}.0::libnpp nvidia::cuda-nvrtc=${{ matrix.cuda-version }} nvidia::cuda-toolkit=${{ matrix.cuda-version }} nvidia::cuda-cudart=${{ matrix.cuda-version }} nvidia::cuda-driver-dev=${{ matrix.cuda-version }} conda-forge::ffmpeg=${{ matrix.ffmpeg-version-for-tests }} conda-forge::xorg-libxau"
- - name: Check env
+ - name: Check env, set LD_LIBRARY_PATH
run: |
${CONDA_RUN} env
${CONDA_RUN} conda info
${CONDA_RUN} nvidia-smi
${CONDA_RUN} conda list
+ echo LD_LIBRARY_PATH=$CONDA_PREFIX/lib:/usr/local/cuda/lib64/:${LD_LIBRARY_PATH} >> $GITHUB_ENV
- name: Assert ffmpeg exists
run: |
${CONDA_RUN} ffmpeg -buildconf
@@ -108,39 +112,168 @@ jobs:
run: ${CONDA_RUN} python -m pip install --upgrade pip
- name: Install PyTorch
run: |
- ${CONDA_RUN} python -m pip install --pre torch torchvision --index-url https://download.pytorch.org/whl/nightly/cu${{ env.cuda_version_without_periods }}
+ ${CONDA_RUN} bash packaging/install_pytorch.sh cu${{ env.cuda_version_without_periods }} "torch torchvision"
${CONDA_RUN} python -c 'import torch; print(f"{torch.__version__}"); print(f"{torch.__file__}"); print(f"{torch.cuda.is_available()=}")'
+
+ - uses: actions/download-artifact@v4
+ with:
+ name: meta-pytorch_torchcodec__${{ matrix.python-version }}_cu${{ env.cuda_version_without_periods }}_x86_64
+ path: dist/
+
- name: Install torchcodec from the wheel
+ run: ${CONDA_RUN} bash packaging/install_torchcodec_wheel.sh "*cu${{ env.cuda_version_without_periods }}-cp${{ env.python_version_without_periods }}*.whl"
+
+ - name: Install test dependencies
+ run: ${CONDA_RUN} bash packaging/install_test_dependencies.sh
+ - name: Run Python tests
run: |
- wheel_path=`find pytorch/torchcodec/dist -type f -name "*cu${{ env.cuda_version_without_periods }}-cp${{ env.python_version_without_periods }}*.whl"`
- echo Installing $wheel_path
- ${CONDA_RUN} python -m pip install $wheel_path -vvv
+ ${CONDA_RUN} FAIL_WITHOUT_CUDA=1 pytest --override-ini="addopts=-v" test --tb=short
+ - name: Run Python benchmark
+ run: |
+ ${CONDA_RUN} time python benchmarks/decoders/gpu_benchmark.py --devices=cuda:0,cpu --resize_devices=none
+
+ build-docs:
+ runs-on: linux.g5.4xlarge.nvidia.gpu
+ env:
+ PYTHON_VERSION: '3.10'
+ CUDA_VERSION: '12.6'
+ FFMPEG_VERSION: '7'
+ container:
+ image: "pytorch/manylinux2_28-builder:cuda12.6" # must be same as env!!
+ options: "--gpus all -e NVIDIA_DRIVER_CAPABILITIES=video,compute,utility"
+ needs: build
+ steps:
+ - name: Setup env vars
+ run: |
+ cuda_version_without_periods=$(echo "${{ env.CUDA_VERSION }}" | sed 's/\.//g')
+ echo cuda_version_without_periods=${cuda_version_without_periods} >> $GITHUB_ENV
+ python_version_without_periods=$(echo "${{ env.PYTHON_VERSION }}" | sed 's/\.//g')
+ echo python_version_without_periods=${python_version_without_periods} >> $GITHUB_ENV
- name: Check out repo
- uses: actions/checkout@v3
+ uses: actions/checkout@v6
- - name: Install test dependencies
+ - name: Remove src/ folder
+ run: bash packaging/remove_src.sh
+
+ - name: Setup miniconda using test-infra
+ uses: pytorch/test-infra/.github/actions/setup-miniconda@main
+ with:
+ python-version: ${{ env.PYTHON_VERSION }}
+ # We install conda packages at the start because otherwise conda may have conflicts with dependencies.
+ default-packages: "nvidia/label/cuda-${{ env.CUDA_VERSION }}.0::libnpp nvidia::cuda-nvrtc=${{ env.CUDA_VERSION }} nvidia::cuda-toolkit=${{ env.CUDA_VERSION }} nvidia::cuda-cudart=${{ env.CUDA_VERSION }} nvidia::cuda-driver-dev=${{ env.CUDA_VERSION }} conda-forge::ffmpeg=${{ env.FFMPEG_VERSION }}"
+ - name: Check env, set LD_LIBRARY_PATH
run: |
- # Ideally we would find a way to get those dependencies from pyproject.toml
- ${CONDA_RUN} python -m pip install numpy pytest pillow
+ ${CONDA_RUN} env
+ ${CONDA_RUN} conda info
+ ${CONDA_RUN} nvidia-smi
+ ${CONDA_RUN} conda list
+ echo LD_LIBRARY_PATH=$CONDA_PREFIX/lib:/usr/local/cuda/lib64/:${LD_LIBRARY_PATH} >> $GITHUB_ENV
+ - name: Assert ffmpeg exists
+ run: |
+ ${CONDA_RUN} ffmpeg -buildconf
+ - name: Update pip
+ run: ${CONDA_RUN} python -m pip install --upgrade pip
+ - name: Install PyTorch
+ run: |
+ ${CONDA_RUN} bash packaging/install_pytorch.sh cu${{ env.cuda_version_without_periods }} "torch torchvision"
+ ${CONDA_RUN} python -c 'import torch; print(f"{torch.__version__}"); print(f"{torch.__file__}"); print(f"{torch.cuda.is_available()=}")'
- - name: Delete the src/ folder just for fun
+ - uses: actions/download-artifact@v4
+ with:
+ name: meta-pytorch_torchcodec__${{ env.PYTHON_VERSION }}_cu${{ env.cuda_version_without_periods }}_x86_64
+ path: dist/
+
+ - name: Install torchcodec from the wheel
+ run: ${CONDA_RUN} bash packaging/install_torchcodec_wheel.sh "*cu${{ env.cuda_version_without_periods }}-cp${{ env.python_version_without_periods }}*.whl"
+
+ - name: Install doc dependencies
run: |
- # The only reason we checked-out the repo is to get access to the
- # tests. We don't care about the rest. Out of precaution, we delete
- # the src/ folder to be extra sure that we're running the code from
- # the installed wheel rather than from the source.
- # This is just to be extra cautious and very overkill because a)
- # there's no way the `torchcodec` package from src/ can be found from
- # the PythonPath: the main point of `src/` is precisely to protect
- # against that and b) if we ever were to execute code from
- # `src/torchcodec`, it would fail loudly because the built .so files
- # aren't present there.
- rm -r src/
- ls
- - name: Run Python tests
+ cd docs
+ ${CONDA_RUN} python -m pip install -r requirements.txt
+ - name: Build docs
run: |
- ${CONDA_RUN} FAIL_WITHOUT_CUDA=1 pytest --override-ini="addopts=-v" test --tb=short
- - name: Run Python benchmark
+ cd docs
+ ${CONDA_RUN} make html
+ - uses: actions/upload-artifact@v4
+ with:
+ name: Built-Docs
+ path: docs/build/html/
+
+ doc-preview:
+ runs-on: [self-hosted, linux.2xlarge]
+ needs: build-docs
+ if: github.repository == 'meta-pytorch/torchcodec' && github.event_name == 'pull_request'
+ steps:
+ - uses: actions/download-artifact@v4
+ with:
+ name: Built-Docs
+ path: docs
+
+ # Update HTML to add the no-index tag so that search engines do not index these ephemeral docs
+ - name: Add no-index tag
run: |
- ${CONDA_RUN} time python benchmarks/decoders/gpu_benchmark.py --devices=cuda:0,cpu --resize_devices=none
+ find docs -name "*.html" -print0 | xargs -0 sed -i '/
/a \ \ ';
+
+ - name: Upload docs preview
+ uses: seemethere/upload-artifact-s3@v5
+ with:
+ retention-days: 14
+ s3-bucket: doc-previews
+ if-no-files-found: error
+ path: docs
+ s3-prefix: meta-pytorch/torchcodec/${{ github.event.pull_request.number }}
+
+ upload-docs:
+ # This job uploads built docs:
+ # - to the `main` folder of the gh-pages branch (https://meta-pytorch.org/torchcodec/main) on every commit to the `main` branch
+ # - to the (e.g.) `0.10` folder in the gh-pages branch whenever a corresponding tag is pushed, like `v0.10.0` (https://meta-pytorch.org/torchcodec/0.10).
+
+ needs: build-docs
+ if: github.repository == 'meta-pytorch/torchcodec' && github.event_name == 'push' &&
+ ((github.ref_type == 'branch' && github.ref_name == 'main') || github.ref_type == 'tag')
+ runs-on: ubuntu-latest
+ steps:
+ - name: Check out gh-pages branch
+ uses: actions/checkout@v6
+ with:
+ ref: gh-pages
+
+ - uses: actions/download-artifact@v4
+ with:
+ name: Built-Docs
+ path: docs-artifact/
+
+ - name: Update docs and push
+ run: |
+ set -euo pipefail
+
+ REF_TYPE=${{ github.ref_type }}
+ REF_NAME=${{ github.ref_name }}
+
+ if [[ "${REF_TYPE}" == branch ]]; then
+ TARGET_FOLDER="${REF_NAME}"
+ elif [[ "${REF_TYPE}" == tag ]]; then
+ case "${REF_NAME}" in
+ *-rc*)
+ echo "Aborting upload since this is an RC tag: ${REF_NAME}"
+ exit 0
+ ;;
+ *)
+ # Strip the leading "v" as well as the trailing patch version. For example:
+ # 'v0.10.2' -> '0.10'
+ TARGET_FOLDER=$(echo "${REF_NAME}" | sed 's/v\([0-9]\+\)\.\([0-9]\+\)\.[0-9]\+/\1.\2/')
+ ;;
+ esac
+ fi
+ echo "Target Folder: ${TARGET_FOLDER}"
+
+ mkdir -p "${TARGET_FOLDER}"
+ rm -rf "${TARGET_FOLDER}"/*
+ cp -r docs-artifact/* "${TARGET_FOLDER}/"
+ git add "${TARGET_FOLDER}"
+
+ git config user.name 'pytorchbot'
+ git config user.email 'soumith+bot@pytorch.org'
+ git commit -m "auto-generating sphinx docs for ${TARGET_FOLDER}" || echo "No changes to commit"
+ git push
diff --git a/.github/workflows/linux_wheel.yaml b/.github/workflows/linux_wheel.yaml
index cccbedc25..14d702a91 100644
--- a/.github/workflows/linux_wheel.yaml
+++ b/.github/workflows/linux_wheel.yaml
@@ -66,10 +66,12 @@ jobs:
ffmpeg-version-for-tests: ['4.4.2', '5.1.2', '6.1.1', '7.0.1', '8.0']
needs: build
steps:
- - uses: actions/download-artifact@v4
- with:
- name: meta-pytorch_torchcodec__${{ matrix.python-version }}_cpu_x86_64
- path: pytorch/torchcodec/dist/
+ - name: Check out repo
+ uses: actions/checkout@v6
+
+ - name: Remove src/ folder
+ run: bash packaging/remove_src.sh
+
- name: Setup conda env
uses: conda-incubator/setup-miniconda@v3
with:
@@ -81,50 +83,82 @@ jobs:
miniforge-version: latest
activate-environment: test
python-version: ${{ matrix.python-version }}
+
- name: Update pip
run: python -m pip install --upgrade pip
+
- name: Install PyTorch
- run: |
- python -m pip install --pre torch torchvision --index-url https://download.pytorch.org/whl/nightly/cpu
+ run: bash packaging/install_pytorch.sh cpu "torch torchvision"
+
+ - uses: actions/download-artifact@v4
+ with:
+ name: meta-pytorch_torchcodec__${{ matrix.python-version }}_cpu_x86_64
+ path: dist/
+
- name: Install torchcodec from the wheel
+ run: bash packaging/install_torchcodec_wheel.sh
+
+ - name: Install ffmpeg, post build
+ run: bash packaging/install_ffmpeg.sh ${{ matrix.ffmpeg-version-for-tests }}
+
+ - name: Install test dependencies
+ run: bash packaging/install_test_dependencies.sh
+
+ - name: Run Python tests
run: |
- wheel_path=`find pytorch/torchcodec/dist -type f -name "*.whl"`
- echo Installing $wheel_path
- python -m pip install $wheel_path -vvv
+ pytest --override-ini="addopts=-v" test
+ install-and-test-third-party-interface:
+ runs-on: ubuntu-latest
+ strategy:
+ fail-fast: false
+ matrix:
+ python-version: ['3.10']
+ ffmpeg-version-for-tests: ['8.0']
+ needs: build
+ steps:
- name: Check out repo
uses: actions/checkout@v3
+
+ - name: Remove src/ folder
+ run: bash packaging/remove_src.sh
+
+ - name: Setup conda env
+ uses: conda-incubator/setup-miniconda@v3
+ with:
+ auto-update-conda: true
+ # Using miniforge instead of miniconda ensures that the default
+ # conda channel is conda-forge instead of main/default. This ensures
+ # ABI consistency between dependencies:
+ # https://conda-forge.org/docs/user/transitioning_from_defaults/
+ miniforge-version: latest
+ activate-environment: test
+ python-version: ${{ matrix.python-version }}
+
+ - name: Update pip
+ run: python -m pip install --upgrade pip
+
+ - name: Install PyTorch
+ run: bash packaging/install_pytorch.sh cpu "torch torchvision"
+
+ - uses: actions/download-artifact@v4
+ with:
+ name: meta-pytorch_torchcodec__${{ matrix.python-version }}_cpu_x86_64
+ path: dist/
+
+ - name: Install torchcodec from the wheel
+ run: bash packaging/install_torchcodec_wheel.sh
+
- name: Install ffmpeg, post build
- run: |
- # Ideally we would have checked for that before installing the wheel,
- # but we need to checkout the repo to access this file, and we don't
- # want to checkout the repo before installing the wheel to avoid any
- # side-effect. It's OK.
- source packaging/helpers.sh
- assert_ffmpeg_not_installed
+ run: bash packaging/install_ffmpeg.sh ${{ matrix.ffmpeg-version-for-tests }}
- conda install "ffmpeg=${{ matrix.ffmpeg-version-for-tests }}" -c conda-forge
- ffmpeg -version
+ - name: Install pkg-config
+ run: |
+ conda install pkg-config -c conda-forge
- name: Install test dependencies
- run: |
- # Ideally we would find a way to get those dependencies from pyproject.toml
- python -m pip install numpy pytest pillow
+ run: bash packaging/install_test_dependencies.sh
- - name: Delete the src/ folder just for fun
- run: |
- # The only reason we checked-out the repo is to get access to the
- # tests. We don't care about the rest. Out of precaution, we delete
- # the src/ folder to be extra sure that we're running the code from
- # the installed wheel rather than from the source.
- # This is just to be extra cautious and very overkill because a)
- # there's no way the `torchcodec` package from src/ can be found from
- # the PythonPath: the main point of `src/` is precisely to protect
- # against that and b) if we ever were to execute code from
- # `src/torchcodec`, it would fail loudly because the built .so files
- # aren't present there.
- rm -r src/
- ls
- name: Run Python tests
run: |
- pytest --override-ini="addopts=-v" test
+ pytest --override-ini="addopts=-v" test/third-party-interface
diff --git a/.github/workflows/macos_wheel.yaml b/.github/workflows/macos_wheel.yaml
index ead45784d..b183b80cd 100644
--- a/.github/workflows/macos_wheel.yaml
+++ b/.github/workflows/macos_wheel.yaml
@@ -68,58 +68,43 @@ jobs:
ffmpeg-version-for-tests: ['4.4.2', '5.1.2', '6.1.1', '7.0.1', '8.0']
needs: build
steps:
- - name: Download wheel
- uses: actions/download-artifact@v4
- with:
- name: meta-pytorch_torchcodec__${{ matrix.python-version }}_cpu_
- path: pytorch/torchcodec/dist/
+ - name: Check out torchcodec repo
+ uses: actions/checkout@v6
+
+ - name: Remove src/ folder
+ run: bash packaging/remove_src.sh
- name: Setup conda env
uses: conda-incubator/setup-miniconda@v3
with:
auto-update-conda: true
- miniconda-version: "latest"
+ # Using miniforge instead of miniconda ensures that the default
+ # conda channel is conda-forge instead of main/default. This ensures
+ # ABI consistency between dependencies:
+ # https://conda-forge.org/docs/user/transitioning_from_defaults/
+ miniforge-version: latest
activate-environment: test
python-version: ${{ matrix.python-version }}
- name: Update pip
run: python -m pip install --upgrade pip
- name: Install PyTorch
- run: |
- python -m pip install --pre torch torchvision --index-url https://download.pytorch.org/whl/nightly/cpu
+ run: bash packaging/install_pytorch.sh cpu "torch torchvision"
- - name: Install torchcodec from the wheel
- run: |
- wheel_path=`find pytorch/torchcodec/dist -type f -name "*.whl"`
- echo Installing $wheel_path
- python -m pip install $wheel_path -vvv
+ - name: Download wheel
+ uses: actions/download-artifact@v4
+ with:
+ name: meta-pytorch_torchcodec__${{ matrix.python-version }}_cpu_
+ path: dist/
- - name: Check out torchcodec repo
- uses: actions/checkout@v3
+ - name: Install torchcodec from the wheel
+ run: bash packaging/install_torchcodec_wheel.sh
- name: Install ffmpeg
- run: |
- conda install "ffmpeg=${{ matrix.ffmpeg-version-for-tests }}" -c conda-forge
- ffmpeg -version
+ run: bash packaging/install_ffmpeg.sh ${{ matrix.ffmpeg-version-for-tests }}
- name: Install test dependencies
- run: |
- python -m pip install numpy pytest pillow
-
- - name: Delete the src/ folder just for fun
- run: |
- # The only reason we checked-out the repo is to get access to the
- # tests. We don't care about the rest. Out of precaution, we delete
- # the src/ folder to be extra sure that we're running the code from
- # the installed wheel rather than from the source.
- # This is just to be extra cautious and very overkill because a)
- # there's no way the `torchcodec` package from src/ can be found from
- # the PythonPath: the main point of `src/` is precisely to protect
- # against that and b) if we ever were to execute code from
- # `src/torchcodec`, it would fail loudly because the built .so files
- # aren't present there.
- rm -r src/
- ls -lh
+ run: bash packaging/install_test_dependencies.sh
- name: Run Python tests
run: |
diff --git a/.github/workflows/paddle_wheel.yaml b/.github/workflows/paddle_wheel.yaml
index 4976e20c9..3ce2964c6 100644
--- a/.github/workflows/paddle_wheel.yaml
+++ b/.github/workflows/paddle_wheel.yaml
@@ -20,15 +20,61 @@ defaults:
run:
shell: bash -l -eo pipefail {0}
+env:
+ PADDLECODEC_TEST_VIDEO_URL: https://paddlenlp.bj.bcebos.com/datasets/paddlemix/demo_video/example_video.mp4
+ PADDLECODEC_TEST_VIDEO_CACHE_KEY: paddlecodec-test-video-v1-example-video
+ PADDLECODEC_TEST_VIDEO_PATH: .github/test-assets/example_video.mp4
+
jobs:
- build-paddlecodec-wheel:
+ prepare-test-video:
runs-on: ubuntu-latest
+ name: Prepare cached Paddle test video
+ steps:
+ - name: Restore cached test video
+ id: cache-test-video
+ uses: actions/cache@v4
+ with:
+ path: ${{ env.PADDLECODEC_TEST_VIDEO_PATH }}
+ key: ${{ env.PADDLECODEC_TEST_VIDEO_CACHE_KEY }}
+
+ - name: Download test video
+ if: steps.cache-test-video.outputs.cache-hit != 'true'
+ run: |
+ mkdir -p "$(dirname "${PADDLECODEC_TEST_VIDEO_PATH}")"
+ curl --fail --location --retry 5 --retry-all-errors \
+ --output "${PADDLECODEC_TEST_VIDEO_PATH}" \
+ "${PADDLECODEC_TEST_VIDEO_URL}"
+
+ - name: Upload cached test video artifact
+ uses: actions/upload-artifact@v5
+ with:
+ name: paddlecodec-test-video
+ path: ${{ env.PADDLECODEC_TEST_VIDEO_PATH }}
+ if-no-files-found: error
+
+ build-paddlecodec-wheel:
+ name: Build and upload Paddle wheel (${{ matrix.arch-name }}, py${{ matrix.python-version }})
+ runs-on: ${{ matrix.runner }}
container:
- image: pytorch/manylinux2_28-builder:cpu
+ image: ${{ matrix.container-image }}
strategy:
fail-fast: false
matrix:
python-version: ["3.9", "3.10", "3.11", "3.12", "3.13"]
+ arch: ["x86_64", "arm64"]
+ include:
+ - arch: x86_64
+ arch-name: x86_64
+ runner: ubuntu-latest
+ container-image: pytorch/manylinux2_28-builder:cpu
+ artifact-prefix: paddlecodec-wheel-linux
+ wheel-platform: manylinux_2_28_x86_64
+ - arch: arm64
+ arch-name: arm64
+ runner: ubuntu-24.04-arm
+ container-image: pytorch/manylinux2_28_aarch64-builder:cpu-aarch64
+ artifact-prefix: paddlecodec-wheel-linux-arm64
+ wheel-platform: manylinux_2_28_aarch64
permissions:
id-token: write
contents: read
@@ -59,8 +105,8 @@ jobs:
- name: Build wheel
run: |
- # Use pre-built FFmpeg from PyTorch S3
export BUILD_AGAINST_ALL_FFMPEG_FROM_S3=1
+ export I_CONFIRM_THIS_IS_NOT_A_LICENSE_VIOLATION=1
export TORCHCODEC_CMAKE_BUILD_DIR=$(pwd)/build_cmake
python -m build --wheel -vvv --no-isolation
@@ -68,29 +114,18 @@ jobs:
run: |
pip install auditwheel
- # 1. Extract internal libraries from the wheel to a temporary directory
- # This allows auditwheel to find them when checking dependencies
mkdir -p temp_libs
unzip -j dist/*.whl "torchcodec/*.so" -d temp_libs || true
- # 2. Prepare LD_LIBRARY_PATH
- # FFmpeg libraries
- FFMPEG_LIB_PATHS=$(find $(pwd)/build_cmake/_deps -type d -name "lib" | tr '\n' ':')
- # PaddlePaddle libraries
+ FFMPEG_LIB_PATHS=$(find "$(pwd)/build_cmake/_deps" -type d -name "lib" -print | paste -sd: -)
PADDLE_PATH=$(python -c "import paddle; print(paddle.__path__[0])")
PADDLE_LIB_PATHS="$PADDLE_PATH/base:$PADDLE_PATH/libs"
- # Wheel internal libraries
INTERNAL_LIB_PATH=$(pwd)/temp_libs
- export LD_LIBRARY_PATH=${FFMPEG_LIB_PATHS}${PADDLE_LIB_PATHS}:${INTERNAL_LIB_PATH}:${LD_LIBRARY_PATH}
+ export LD_LIBRARY_PATH=${FFMPEG_LIB_PATHS}:${PADDLE_LIB_PATHS}:${INTERNAL_LIB_PATH}:${LD_LIBRARY_PATH}
- # 3. Repair wheel with auditwheel
- # We exclude all external libraries because we want to rely on system libraries (like FFmpeg)
- # or libraries provided by other packages (like PaddlePaddle).
- # auditwheel 6.1.0+ supports wildcards in --exclude.
- auditwheel repair dist/*.whl --plat manylinux_2_28_x86_64 -w wheelhouse/ --exclude "*"
+ auditwheel repair dist/*.whl --plat ${{ matrix.wheel-platform }} -w wheelhouse/ --exclude "*"
- # Cleanup
rm -rf temp_libs
rm dist/*.whl
mv wheelhouse/*.whl dist/
@@ -99,7 +134,7 @@ jobs:
- name: Upload wheel artifact
uses: actions/upload-artifact@v5
with:
- name: paddlecodec-wheel-linux-py${{ matrix.python-version }}
+ name: ${{ matrix.artifact-prefix }}-py${{ matrix.python-version }}
path: dist/*.whl
- name: Run post-build script
@@ -113,31 +148,49 @@ jobs:
unzip -l $wheel_path
test-paddlecodec-wheel:
- needs: build-paddlecodec-wheel
- runs-on: ubuntu-latest
+ name: Install and test Paddle wheel (${{ matrix.arch-name }}, py${{ matrix.python-version }}, ffmpeg ${{ matrix.ffmpeg-version }})
+ needs: [prepare-test-video, build-paddlecodec-wheel]
+ runs-on: ${{ matrix.runner }}
+ container:
+ image: ${{ matrix.container-image }}
+ env:
+ PADDLECODEC_TEST_VIDEO: .github/test-assets/example_video.mp4
strategy:
fail-fast: false
matrix:
python-version: ["3.9", "3.10", "3.11", "3.12", "3.13"]
+ arch: ["x86_64", "arm64"]
# FFmpeg 8.0 depends on libopenvino.so.2520, PaddlePaddle CPU depends on libopenvino.so.2500
# There has some conflict causing test failures, but it works with PaddlePaddle GPU.
# We skip FFmpeg 8.0 tests for PaddlePaddle CPU builds for now.
ffmpeg-version: ["4.4.2", "5.1.2", "6.1.1", "7.0.1"]
+ include:
+ - arch: x86_64
+ arch-name: x86_64
+ runner: ubuntu-latest
+ container-image: pytorch/manylinux2_28-builder:cpu
+ artifact-prefix: paddlecodec-wheel-linux
+ - arch: arm64
+ arch-name: arm64
+ runner: ubuntu-24.04-arm
+ container-image: pytorch/manylinux2_28_aarch64-builder:cpu-aarch64
+ artifact-prefix: paddlecodec-wheel-linux-arm64
steps:
- name: Checkout repository
uses: actions/checkout@v6
- - name: Set up Python ${{ matrix.python-version }}
- uses: actions/setup-python@v5
- with:
- python-version: ${{ matrix.python-version }}
-
- name: Download wheel artifact
uses: actions/download-artifact@v4
with:
- name: paddlecodec-wheel-linux-py${{ matrix.python-version }}
+ name: ${{ matrix.artifact-prefix }}-py${{ matrix.python-version }}
path: dist/
+ - name: Download cached test video artifact
+ uses: actions/download-artifact@v4
+ with:
+ name: paddlecodec-test-video
+ path: .github/test-assets/
+
- name: Install FFmpeg via conda
uses: conda-incubator/setup-miniconda@v3
with:
@@ -167,7 +220,6 @@ jobs:
- name: Delete src folder
run: |
- # Delete src/ to ensure we're testing the installed wheel, not source code
rm -rf src/
ls -la
@@ -177,18 +229,17 @@ jobs:
publish-pypi:
runs-on: ubuntu-latest
- name: Publish to PyPI
+ name: Publish Paddle wheels to PyPI
if: "startsWith(github.ref, 'refs/tags/')"
needs:
- test-paddlecodec-wheel
permissions:
id-token: write
-
steps:
- name: Retrieve release distributions
uses: actions/download-artifact@v6
with:
- pattern: paddlecodec-wheel-linux-*
+ pattern: paddlecodec-wheel-linux*
path: dist/
merge-multiple: true
@@ -197,7 +248,7 @@ jobs:
publish-release:
runs-on: ubuntu-latest
- name: Publish to GitHub
+ name: Publish Paddle wheels to GitHub
if: "startsWith(github.ref, 'refs/tags/')"
needs:
- test-paddlecodec-wheel
@@ -206,7 +257,7 @@ jobs:
steps:
- uses: actions/download-artifact@v6
with:
- pattern: paddlecodec-wheel-linux-*
+ pattern: paddlecodec-wheel-linux*
path: dist/
merge-multiple: true
- name: Get tag name
diff --git a/.github/workflows/reference_resources.yaml b/.github/workflows/reference_resources.yaml
index 8f97378f1..135799731 100644
--- a/.github/workflows/reference_resources.yaml
+++ b/.github/workflows/reference_resources.yaml
@@ -53,42 +53,41 @@ jobs:
fail-fast: false
matrix:
python-version: ['3.10']
- ffmpeg-version-for-tests: ['4.4.2', '5.1.2', '6.1.1', '7.0.1']
+ # Traditionally we generate the resources locally on FFmpeg 4 or 6.
+ # The exact version shouln't matter as long as the unit tests pass
+ # across all version for a given generated resource.
+ ffmpeg-version-for-tests: ['6.1.1']
steps:
- - uses: actions/download-artifact@v4
- with:
- name: meta-pytorch_torchcodec__${{ matrix.python-version }}_cpu_x86_64
- path: pytorch/torchcodec/dist/
+ - name: Check out repo
+ uses: actions/checkout@v3
+
- name: Setup conda env
- uses: conda-incubator/setup-miniconda@v2
+ uses: conda-incubator/setup-miniconda@v3
with:
auto-update-conda: true
- miniconda-version: "latest"
+ miniforge-version: latest
activate-environment: test
python-version: ${{ matrix.python-version }}
- - name: Install ffmpeg
- run: |
- conda install "ffmpeg=${{ matrix.ffmpeg-version-for-tests }}" -c conda-forge
- ffmpeg -version
-
- name: Update pip
run: python -m pip install --upgrade pip
- - name: Install generation dependencies
- run: |
- # Note that we're installing stable - this is for running a script where we're a normal PyTorch
- # user, not for building TorhCodec.
- python -m pip install torch --index-url https://download.pytorch.org/whl/cpu
- python -m pip install numpy pillow pytest
+ - name: Install PyTorch
+ run: bash packaging/install_pytorch.sh cpu "torch"
+
+ - uses: actions/download-artifact@v4
+ with:
+ name: meta-pytorch_torchcodec__${{ matrix.python-version }}_cpu_x86_64
+ path: dist/
- name: Install torchcodec from the wheel
- run: |
- wheel_path=`find pytorch/torchcodec/dist -type f -name "*.whl"`
- echo Installing $wheel_path
- python -m pip install $wheel_path -vvv
- - name: Check out repo
- uses: actions/checkout@v3
+ run: bash packaging/install_torchcodec_wheel.sh
+
+ - name: Install ffmpeg, post build
+ run: bash packaging/install_ffmpeg.sh ${{ matrix.ffmpeg-version-for-tests }}
+
+ - name: Install test dependencies
+ run: bash packaging/install_test_dependencies.sh
- name: Run generation reference resources
run: |
diff --git a/.github/workflows/windows_wheel.yaml b/.github/workflows/windows_wheel.yaml
index 8a9b5b740..72df7c9c0 100644
--- a/.github/workflows/windows_wheel.yaml
+++ b/.github/workflows/windows_wheel.yaml
@@ -74,12 +74,14 @@ jobs:
ffmpeg-version-for-tests: ['4.4.2', '6.1.1', '7.0.1', '8.0']
needs: build
steps:
- - uses: actions/download-artifact@v4
- with:
- name: meta-pytorch_torchcodec__${{ matrix.python-version }}_cpu_x64
- path: pytorch/torchcodec/dist/
+ - name: Check out repo
+ uses: actions/checkout@v6
+
+ - name: Remove src/ folder
+ run: bash packaging/remove_src.sh
+
- name: Setup conda env
- uses: conda-incubator/setup-miniconda@v2
+ uses: conda-incubator/setup-miniconda@v3
with:
auto-update-conda: true
# Using miniforge instead of miniconda ensures that the default
@@ -91,48 +93,24 @@ jobs:
python-version: ${{ matrix.python-version }}
- name: Update pip
run: python -m pip install --upgrade pip
+
- name: Install PyTorch
- run: |
- python -m pip install --pre torch torchvision --index-url https://download.pytorch.org/whl/nightly/cpu
+ run: bash packaging/install_pytorch.sh cpu "torch torchvision"
+
+ - uses: actions/download-artifact@v4
+ with:
+ name: meta-pytorch_torchcodec__${{ matrix.python-version }}_cpu_x64
+ path: dist/
+
- name: Install torchcodec from the wheel
- run: |
- wheel_path=`find pytorch/torchcodec/dist -type f -name "*.whl"`
- echo Installing $wheel_path
- python -m pip install $wheel_path -vvv
- - name: Check out repo
- uses: actions/checkout@v3
+ run: bash packaging/install_torchcodec_wheel.sh
+
- name: Install ffmpeg, post build
- run: |
- # Ideally we would have checked for that before installing the wheel,
- # but we need to checkout the repo to access this file, and we don't
- # want to checkout the repo before installing the wheel to avoid any
- # side-effect. It's OK.
- source packaging/helpers.sh
- assert_ffmpeg_not_installed
- conda install "ffmpeg=${{ matrix.ffmpeg-version-for-tests }}" -c conda-forge
- ffmpeg -version
- - name: Test torchcodec import after FFmpeg installation
- run: |
- echo "Testing torchcodec import after FFmpeg is installed and PATH is updated..."
- python -c "import torchcodec; print('TorchCodec import successful!')"
+ # need -l for conda to be exposed
+ run: bash -l packaging/install_ffmpeg.sh ${{ matrix.ffmpeg-version-for-tests }}
+
- name: Install test dependencies
- run: |
- # Ideally we would find a way to get those dependencies from pyproject.toml
- python -m pip install numpy pytest pillow
- - name: Delete the src/ folder just for fun
- run: |
- # The only reason we checked-out the repo is to get access to the
- # tests. We don't care about the rest. Out of precaution, we delete
- # the src/ folder to be extra sure that we're running the code from
- # the installed wheel rather than from the source.
- # This is just to be extra cautious and very overkill because a)
- # there's no way the `torchcodec` package from src/ can be found from
- # the PythonPath: the main point of `src/` is precisely to protect
- # against that and b) if we ever were to execute code from
- # `src/torchcodec`, it would fail loudly because the built .so files
- # aren't present there.
- rm -r src/
- ls
+ run: bash packaging/install_test_dependencies.sh
+
- name: Run Python tests
- run: |
- pytest test -vvv
+ run: pytest --override-ini="addopts=-v" test
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 84e404a3e..775c100d0 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -14,6 +14,14 @@ repos:
- id: check-added-large-files
args: ['--maxkb=1000']
+ - repo: https://github.com/asottile/pyupgrade
+ rev: v3.21.2
+ hooks:
+ - id: pyupgrade
+ args: [--py310-plus]
+ files: ^(test|src)/
+ exclude: ^examples/
+
- repo: https://github.com/omnilib/ufmt
rev: v2.6.0
hooks:
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 0f2d0de2d..18dc98c01 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -1,6 +1,13 @@
cmake_minimum_required(VERSION 3.18)
project(TorchCodec)
+# Define LINUX platform variable globally
+if (UNIX AND NOT APPLE)
+ set(LINUX TRUE)
+else()
+ set(LINUX FALSE)
+endif()
+
add_subdirectory(src/torchcodec/_core)
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index 6c42e98f2..7a651d4e6 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -30,7 +30,7 @@ Start by installing the **nightly** build of PyTorch following the
Then, the easiest way to install the rest of the dependencies is to run:
```bash
-conda install cmake pkg-config pybind11 "ffmpeg<8" -c conda-forge
+conda install cmake pkg-config pybind11 "ffmpeg" -c conda-forge
```
### Clone and build
@@ -114,6 +114,34 @@ all of them, you can use a regex like
Run `make clean` from time to time if you encounter issues.
+### Serving docs locally (if building from a GPU env)
+
+If you're developing locally, you can just open the generated `index.html`file
+in your browser.
+
+If instead you're using a remote machine, you can use a combination of a simple
+python HTTP server and port forwarding to serve the docs locally. This allows
+you to iterate on the documentation much more quickly than relying on
+PR previews.
+
+To do so, after following the above doc build steps, run the following from
+the `docs/build/html` folder:
+
+```
+python -m http.server 8000 # or any free port
+```
+
+This will open up a simple HTTP server serving the files in the build directory.
+If this is done on a remote machine, you can set up port forwarding from your
+local machine to access the server, for example:
+
+```
+ssh -L 9000:localhost:8000 $REMOTE_DEV_HOST
+```
+
+Now, you can navigate to `localhost:9000` on your local machine to view the
+rendered documentation.
+
## License
By contributing to TorchCodec, you agree that your contributions will be
diff --git a/README.md b/README.md
index 5c84184d2..73de0969f 100644
--- a/README.md
+++ b/README.md
@@ -1,4 +1,4 @@
-[**Installation**](#installing-torchcodec) | [**Simple Example**](#using-torchcodec) | [**Detailed Example**](https://pytorch.org/torchcodec/stable/generated_examples/) | [**Documentation**](https://pytorch.org/torchcodec) | [**Contributing**](CONTRIBUTING.md) | [**License**](#license)
+[**Installation**](#installing-torchcodec) | [**Simple Example**](#using-torchcodec) | [**Detailed Example**](https://meta-pytorch.org/torchcodec/stable/generated_examples/) | [**Documentation**](https://meta-pytorch.org/torchcodec) | [**Contributing**](CONTRIBUTING.md) | [**License**](#license)
# PaddleCodec
@@ -26,9 +26,9 @@ The original README.md content is as follows:
---
TorchCodec is a Python library for decoding video and audio data into PyTorch
-tensors, on CPU and CUDA GPU. It also supports audio encoding, and video
-encoding will come soon! It aims to be fast, easy to use, and well integrated
-into the PyTorch ecosystem. If you want to use PyTorch to train ML models on
+tensors, on CPU and CUDA GPU. It also supports video and audio encoding on CPU!
+It aims to be fast, easy to use, and well integrated
+into the PyTorch ecosystem. If you want to use PyTorch to train ML models on
videos and audio, TorchCodec is how you turn these into data.
We achieve these capabilities through:
@@ -46,7 +46,7 @@ We achieve these capabilities through:
Here's a condensed summary of what you can do with TorchCodec. For more detailed
examples, [check out our
-documentation](https://pytorch.org/torchcodec/stable/generated_examples/)!
+documentation](https://meta-pytorch.org/torchcodec/stable/generated_examples/)!
#### Decoding
@@ -130,40 +130,45 @@ ffmpeg -f lavfi -i \
versions, refer to the table below for compatibility between versions of
`torch` and `torchcodec`.
-2. Install FFmpeg, if it's not already installed. Linux distributions usually
- come with FFmpeg pre-installed. TorchCodec supports major FFmpeg versions
- in [4, 7] on all platforms, and FFmpeg version 8 is supported on Mac and Linux.
+2. Install FFmpeg, if it's not already installed. TorchCodec supports
+ all major FFmpeg versions in [4, 8].
+ Linux distributions usually come with FFmpeg pre-installed. You'll need
+ FFmpeg that comes with separate shared libraries. This is especially relevant
+ for Windows users: these are usually called the "shared" releases.
If FFmpeg is not already installed, or you need a more recent version, an
easy way to install it is to use `conda`:
```bash
- conda install "ffmpeg<8"
+ conda install "ffmpeg"
# or
- conda install "ffmpeg<8" -c conda-forge
+ conda install "ffmpeg" -c conda-forge
```
3. Install TorchCodec:
```bash
- pip install torchcodec
+ pip install torchcodec --index-url=https://download.pytorch.org/whl/cpu
```
The following table indicates the compatibility between versions of
`torchcodec`, `torch` and Python.
-| `torchcodec` | `torch` | Python |
-| ------------------ | ------------------ | ------------------ |
-| `main` / `nightly` | `main` / `nightly` | `>=3.10`, `<=3.13` |
-| `0.8` | `2.9` | `>=3.10`, `<=3.13` |
-| `0.7` | `2.8` | `>=3.9`, `<=3.13` |
-| `0.6` | `2.8` | `>=3.9`, `<=3.13` |
-| `0.5` | `2.7` | `>=3.9`, `<=3.13` |
-| `0.4` | `2.7` | `>=3.9`, `<=3.13` |
-| `0.3` | `2.7` | `>=3.9`, `<=3.13` |
-| `0.2` | `2.6` | `>=3.9`, `<=3.13` |
-| `0.1` | `2.5` | `>=3.9`, `<=3.12` |
-| `0.0.3` | `2.4` | `>=3.8`, `<=3.12` |
+| `torchcodec` | `torch` | Python |
+| ------------------ | ------------------ | ------------------- |
+| `main` / `nightly` | `main` / `nightly` | `>=3.10`, `<=3.14` |
+| `0.11` | `2.11` | `>=3.10`, `<=3.14` |
+| `0.10` | `2.10` | `>=3.10`, `<=3.14` |
+| `0.9` | `2.9` | `>=3.10`, `<=3.14` |
+| `0.8` | `2.9` | `>=3.10`, `<=3.13` |
+| `0.7` | `2.8` | `>=3.9`, `<=3.13` |
+| `0.6` | `2.8` | `>=3.9`, `<=3.13` |
+| `0.5` | `2.7` | `>=3.9`, `<=3.13` |
+| `0.4` | `2.7` | `>=3.9`, `<=3.13` |
+| `0.3` | `2.7` | `>=3.9`, `<=3.13` |
+| `0.2` | `2.6` | `>=3.9`, `<=3.13` |
+| `0.1` | `2.5` | `>=3.9`, `<=3.12` |
+| `0.0.3` | `2.4` | `>=3.8`, `<=3.12` |
### Installing CUDA-enabled TorchCodec
@@ -172,16 +177,15 @@ format you want. Refer to Nvidia's GPU support matrix for more details
[here](https://developer.nvidia.com/video-encode-and-decode-gpu-support-matrix-new).
1. Install FFmpeg with NVDEC support.
- TorchCodec with CUDA should work with FFmpeg versions in [4, 7] on all platforms,
- and FFmpeg version 8 is supported on Linux.
+ TorchCodec with CUDA should work with FFmpeg versions in [4, 8].
If FFmpeg is not already installed, or you need a more recent version, an
easy way to install it is to use `conda`:
```bash
- conda install "ffmpeg<8"
+ conda install "ffmpeg"
# or
- conda install "ffmpeg<8" -c conda-forge
+ conda install "ffmpeg" -c conda-forge
```
After installing FFmpeg make sure it has NVDEC support when you list the supported
@@ -208,17 +212,19 @@ format you want. Refer to Nvidia's GPU support matrix for more details
3. Install TorchCodec
- Pass in an `--index-url` parameter that corresponds to your CUDA Toolkit
- version, for example:
+ On Linux, `pip install torchcodec` defaults to a CUDA wheel,
+ matching the default behavior of `pip install torch`.
```bash
- # This corresponds to CUDA Toolkit version 12.6. It should be the same one
- # you used when you installed PyTorch (If you installed PyTorch with pip).
- pip install torchcodec --index-url=https://download.pytorch.org/whl/cu126
+ pip install torchcodec
```
+ Use `--index-url` to select a different CUDA Toolkit version:
- Note that without passing in the `--index-url` parameter, `pip` installs
- the CPU-only version of TorchCodec.
+ ```bash
+ # This corresponds to CUDA Toolkit version 13.0. It should be the same one
+ # you used when you installed PyTorch (If you installed PyTorch with pip).
+ pip install torchcodec --index-url=https://download.pytorch.org/whl/cu130
+ ```
#### Windows
@@ -242,7 +248,7 @@ The bottom row is [promotional video from NASA](https://download.pytorch.org/tor
that has a resolution of 960x540 at 29.7 fps and is 206 seconds long. Both videos were
encoded with libx264 and yuv420p pixel format. All decoders, except for TorchVision, used FFmpeg 6.1.2. TorchVision used FFmpeg 4.2.2.
-For TorchCodec, the "approx" label means that it was using [approximate mode](https://pytorch.org/torchcodec/stable/generated_examples/approximate_mode.html)
+For TorchCodec, the "approx" label means that it was using [approximate mode](https://meta-pytorch.org/torchcodec/stable/generated_examples/decoding/approximate_mode.html)
for seeking.
## Contributing
diff --git a/benchmarks/decoders/benchmark_decoders_library.py b/benchmarks/decoders/benchmark_decoders_library.py
index a975aec7e..57174ab89 100644
--- a/benchmarks/decoders/benchmark_decoders_library.py
+++ b/benchmarks/decoders/benchmark_decoders_library.py
@@ -14,14 +14,8 @@
import torch
import torch.utils.benchmark as benchmark
-from torchcodec._core import (
- _add_video_stream,
- create_from_file,
- get_frames_at_indices,
- get_frames_by_pts,
- get_next_frame,
- seek_to_pts,
-)
+from torchcodec._core import get_frames_at_indices, get_frames_by_pts, get_next_frame
+from torchcodec._core.ops import _add_video_stream, create_from_file, seek_to_pts
from torchcodec._frame import FrameBatch
from torchcodec.decoders import VideoDecoder, VideoStreamMetadata
diff --git a/benchmarks/decoders/benchmark_transforms.py b/benchmarks/decoders/benchmark_transforms.py
index 75a49d63b..01222f403 100644
--- a/benchmarks/decoders/benchmark_transforms.py
+++ b/benchmarks/decoders/benchmark_transforms.py
@@ -5,14 +5,11 @@
import torch
from torch import Tensor
-from torchcodec._core import add_video_stream, create_from_file, get_frames_by_pts
from torchcodec.decoders import VideoDecoder
from torchvision.transforms import v2
-DEFAULT_NUM_EXP = 20
-
-def bench(f, *args, num_exp=DEFAULT_NUM_EXP, warmup=1) -> Tensor:
+def bench(f, *args, num_exp, warmup=1) -> Tensor:
for _ in range(warmup):
f(*args)
@@ -45,37 +42,55 @@ def report_stats(times: Tensor, unit: str = "ms", prefix: str = "") -> float:
def torchvision_resize(
- path: Path, pts_seconds: list[float], dims: tuple[int, int]
-) -> None:
- decoder = create_from_file(str(path), seek_mode="approximate")
- add_video_stream(decoder)
- raw_frames, *_ = get_frames_by_pts(decoder, timestamps=pts_seconds)
- return v2.functional.resize(raw_frames, size=dims)
+ path: Path, pts_seconds: list[float], dims: tuple[int, int], num_threads: int
+) -> Tensor:
+ decoder = VideoDecoder(
+ path, seek_mode="approximate", num_ffmpeg_threads=num_threads
+ )
+ raw_frames = decoder.get_frames_played_at(pts_seconds)
+ transformed_frames = v2.Resize(size=dims)(raw_frames.data)
+ assert len(transformed_frames) == len(pts_seconds)
+ return transformed_frames
def torchvision_crop(
- path: Path, pts_seconds: list[float], dims: tuple[int, int], x: int, y: int
-) -> None:
- decoder = create_from_file(str(path), seek_mode="approximate")
- add_video_stream(decoder)
- raw_frames, *_ = get_frames_by_pts(decoder, timestamps=pts_seconds)
- return v2.functional.crop(raw_frames, top=y, left=x, height=dims[0], width=dims[1])
-
-
-def decoder_native_resize(
- path: Path, pts_seconds: list[float], dims: tuple[int, int]
-) -> None:
- decoder = create_from_file(str(path), seek_mode="approximate")
- add_video_stream(decoder, transform_specs=f"resize, {dims[0]}, {dims[1]}")
- return get_frames_by_pts(decoder, timestamps=pts_seconds)[0]
-
-
-def decoder_native_crop(
- path: Path, pts_seconds: list[float], dims: tuple[int, int], x: int, y: int
-) -> None:
- decoder = create_from_file(str(path), seek_mode="approximate")
- add_video_stream(decoder, transform_specs=f"crop, {dims[0]}, {dims[1]}, {x}, {y}")
- return get_frames_by_pts(decoder, timestamps=pts_seconds)[0]
+ path: Path, pts_seconds: list[float], dims: tuple[int, int], num_threads: int
+) -> Tensor:
+ decoder = VideoDecoder(
+ path, seek_mode="approximate", num_ffmpeg_threads=num_threads
+ )
+ raw_frames = decoder.get_frames_played_at(pts_seconds)
+ transformed_frames = v2.CenterCrop(size=dims)(raw_frames.data)
+ assert len(transformed_frames) == len(pts_seconds)
+ return transformed_frames
+
+
+def decoder_resize(
+ path: Path, pts_seconds: list[float], dims: tuple[int, int], num_threads: int
+) -> Tensor:
+ decoder = VideoDecoder(
+ path,
+ transforms=[v2.Resize(size=dims)],
+ seek_mode="approximate",
+ num_ffmpeg_threads=num_threads,
+ )
+ transformed_frames = decoder.get_frames_played_at(pts_seconds).data
+ assert len(transformed_frames) == len(pts_seconds)
+ return transformed_frames.data
+
+
+def decoder_crop(
+ path: Path, pts_seconds: list[float], dims: tuple[int, int], num_threads: int
+) -> Tensor:
+ decoder = VideoDecoder(
+ path,
+ transforms=[v2.CenterCrop(size=dims)],
+ seek_mode="approximate",
+ num_ffmpeg_threads=num_threads,
+ )
+ transformed_frames = decoder.get_frames_played_at(pts_seconds).data
+ assert len(transformed_frames) == len(pts_seconds)
+ return transformed_frames
def main():
@@ -84,9 +99,27 @@ def main():
parser.add_argument(
"--num-exp",
type=int,
- default=DEFAULT_NUM_EXP,
+ default=5,
help="number of runs to average over",
)
+ parser.add_argument(
+ "--num-threads",
+ type=int,
+ default=1,
+ help="number of threads to use; 0 means FFmpeg decides",
+ )
+ parser.add_argument(
+ "--total-frame-fractions",
+ nargs="+",
+ type=float,
+ default=[0.005, 0.01, 0.05, 0.1],
+ )
+ parser.add_argument(
+ "--input-dimension-fractions",
+ nargs="+",
+ type=float,
+ default=[0.5, 0.25, 0.125],
+ )
args = parser.parse_args()
path = Path(args.path)
@@ -100,10 +133,8 @@ def main():
input_height = metadata.height
input_width = metadata.width
- fraction_of_total_frames_to_sample = [0.005, 0.01, 0.05, 0.1]
- fraction_of_input_dimensions = [0.5, 0.25, 0.125]
- for num_fraction in fraction_of_total_frames_to_sample:
+ for num_fraction in args.total_frame_fractions:
num_frames_to_sample = math.ceil(metadata.num_frames * num_fraction)
print(
f"Sampling {num_fraction * 100}%, {num_frames_to_sample}, of {metadata.num_frames} frames"
@@ -112,51 +143,49 @@ def main():
i * duration / num_frames_to_sample for i in range(num_frames_to_sample)
]
- for dims_fraction in fraction_of_input_dimensions:
+ for dims_fraction in args.input_dimension_fractions:
dims = (int(input_height * dims_fraction), int(input_width * dims_fraction))
times = bench(
- torchvision_resize, path, uniform_timestamps, dims, num_exp=args.num_exp
+ torchvision_resize,
+ path,
+ uniform_timestamps,
+ dims,
+ args.num_threads,
+ num_exp=args.num_exp,
)
report_stats(times, prefix=f"torchvision_resize({dims})")
times = bench(
- decoder_native_resize,
+ decoder_resize,
path,
uniform_timestamps,
dims,
+ args.num_threads,
num_exp=args.num_exp,
)
- report_stats(times, prefix=f"decoder_native_resize({dims})")
- print()
+ report_stats(times, prefix=f"decoder_resize({dims})")
- center_x = (input_height - dims[0]) // 2
- center_y = (input_width - dims[1]) // 2
times = bench(
torchvision_crop,
path,
uniform_timestamps,
dims,
- center_x,
- center_y,
+ args.num_threads,
num_exp=args.num_exp,
)
- report_stats(
- times, prefix=f"torchvision_crop({dims}, {center_x}, {center_y})"
- )
+ report_stats(times, prefix=f"torchvision_crop({dims})")
times = bench(
- decoder_native_crop,
+ decoder_crop,
path,
uniform_timestamps,
dims,
- center_x,
- center_y,
+ args.num_threads,
num_exp=args.num_exp,
)
- report_stats(
- times, prefix=f"decoder_native_crop({dims}, {center_x}, {center_y})"
- )
+ report_stats(times, prefix=f"decoder_crop({dims})")
+
print()
diff --git a/benchmarks/decoders/gpu_benchmark.py b/benchmarks/decoders/gpu_benchmark.py
index 4300643dd..638737e88 100644
--- a/benchmarks/decoders/gpu_benchmark.py
+++ b/benchmarks/decoders/gpu_benchmark.py
@@ -7,8 +7,9 @@
import torch.utils.benchmark as benchmark
-import torchcodec
import torchvision.transforms.v2.functional as F
+from torchcodec._core import get_next_frame
+from torchcodec._core.ops import _add_video_stream, create_from_file
RESIZED_WIDTH = 256
RESIZED_HEIGHT = 256
@@ -25,7 +26,7 @@ def decode_full_video(video_path, decode_device_string, resize_device_string):
# We use the core API instead of SimpleVideoDecoder because the core API
# allows us to natively resize as part of the decode step.
print(f"{decode_device_string=} {resize_device_string=}")
- decoder = torchcodec._core.create_from_file(video_path)
+ decoder = create_from_file(video_path)
num_threads = None
if "cuda" in decode_device_string:
num_threads = 1
@@ -34,7 +35,7 @@ def decode_full_video(video_path, decode_device_string, resize_device_string):
if "native" in resize_device_string:
resize_spec = f"resize, {RESIZED_HEIGHT}, {RESIZED_WIDTH}"
- torchcodec._core._add_video_stream(
+ _add_video_stream(
decoder,
stream_index=-1,
device=decode_device_string,
@@ -46,7 +47,7 @@ def decode_full_video(video_path, decode_device_string, resize_device_string):
frame_count = 0
while True:
try:
- frame, *_ = torchcodec._core.get_next_frame(decoder)
+ frame, *_ = get_next_frame(decoder)
if resize_device_string != "none" and "native" not in resize_device_string:
frame = transfer_and_resize_frame(frame, resize_device_string)
diff --git a/benchmarks/decoders/memprofile_decoders.py b/benchmarks/decoders/memprofile_decoders.py
index 16bc42dc6..a78eb1263 100644
--- a/benchmarks/decoders/memprofile_decoders.py
+++ b/benchmarks/decoders/memprofile_decoders.py
@@ -9,7 +9,8 @@
import torch
from memory_profiler import profile
-from torchcodec._core import add_video_stream, create_from_file, get_next_frame
+from torchcodec._core import get_next_frame
+from torchcodec._core.ops import add_video_stream, create_from_file
torch._dynamo.config.cache_size_limit = 100
torch._dynamo.config.capture_dynamic_output_shape_ops = True
diff --git a/benchmarks/encoders/benchmark_encoders.py b/benchmarks/encoders/benchmark_encoders.py
new file mode 100644
index 000000000..f59501f80
--- /dev/null
+++ b/benchmarks/encoders/benchmark_encoders.py
@@ -0,0 +1,227 @@
+#!/usr/bin/env python3
+import shutil
+import subprocess
+import tempfile
+from argparse import ArgumentParser
+from pathlib import Path
+from time import perf_counter_ns
+
+import pynvml
+import torch
+from torchcodec.decoders import VideoDecoder
+from torchcodec.encoders import VideoEncoder
+
+pynvml.nvmlInit()
+handle = pynvml.nvmlDeviceGetHandleByIndex(0)
+
+FRAME_RATE = 30
+DEFAULT_VIDEO_PATH = "test/resources/nasa_13013.mp4"
+# Alternatively, run this command to generate a longer test video:
+# ffmpeg -f lavfi -i testsrc2=duration=600:size=1280x720:rate=30 -c:v libx264 -pix_fmt yuv420p test/resources/testsrc2_10min.mp4
+
+
+def bench(f, average_over=50, warmup=2, gpu_monitoring=False, **f_kwargs):
+ for _ in range(warmup):
+ f(**f_kwargs)
+
+ times = []
+ utilizations = []
+ memory_usage = []
+
+ for _ in range(average_over):
+ start = perf_counter_ns()
+ f(**f_kwargs)
+ end = perf_counter_ns()
+ times.append(end - start)
+
+ if gpu_monitoring:
+ util = pynvml.nvmlDeviceGetEncoderUtilization(handle)[0]
+ mem_info = pynvml.nvmlDeviceGetMemoryInfo(handle)
+ mem_used = mem_info.used / (1_000_000) # Convert bytes to MB
+ utilizations.append(util)
+ memory_usage.append(mem_used)
+
+ times_tensor = torch.tensor(times).float()
+ return times_tensor, {
+ "utilization": torch.tensor(utilizations).float() if gpu_monitoring else None,
+ "memory_used": torch.tensor(memory_usage).float() if gpu_monitoring else None,
+ }
+
+
+def report_stats(times, num_frames, nvenc_metrics=None, prefix="", unit="ms"):
+ fps = num_frames * 1e9 / times.median()
+
+ mul = {
+ "ns": 1,
+ "µs": 1e-3,
+ "ms": 1e-6,
+ "s": 1e-9,
+ }[unit]
+ unit_times = times * mul
+ med = unit_times.median().item()
+ max = unit_times.max().item()
+ print(f"\n{prefix} {med = :.2f} {unit}, {max = :.2f} {unit}, fps = {fps:.1f}")
+
+ if nvenc_metrics is not None:
+ mem_used_max = nvenc_metrics["memory_used"].max().item()
+ mem_used_median = nvenc_metrics["memory_used"].median().item()
+ util_max = nvenc_metrics["utilization"].max().item()
+
+ print(
+ f"GPU memory used: med = {mem_used_median:.1f} MB, max = {mem_used_max:.1f} MB"
+ )
+ print(
+ f"NVENC utilization: med = {nvenc_metrics['utilization'].median():.1f}%, max = {util_max:.1f}%"
+ )
+
+
+def encode_torchcodec(frames, output_path, device="cpu"):
+ encoder = VideoEncoder(frames=frames, frame_rate=FRAME_RATE)
+ if device == "cuda":
+ encoder.to_file(dest=output_path, codec="h264_nvenc", extra_options={"qp": 0})
+ else:
+ encoder.to_file(dest=output_path, codec="libx264", crf=0)
+
+
+def write_raw_frames(frames, raw_path):
+ # Convert NCHW to NHWC for raw video format
+ raw_frames = frames.permute(0, 2, 3, 1)
+ with open(raw_path, "wb") as f:
+ f.write(raw_frames.cpu().numpy().tobytes())
+
+
+def encode_ffmpeg_cli(
+ frames, raw_path, output_path, device="cpu", skip_write_frames=False
+):
+ # Write frames during benchmarking function by default unless skip_write_frames flag used
+ if not skip_write_frames:
+ write_raw_frames(frames, raw_path)
+
+ ffmpeg_cmd = [
+ "ffmpeg",
+ "-y",
+ "-f",
+ "rawvideo",
+ "-pix_fmt",
+ "rgb24",
+ "-s",
+ f"{frames.shape[3]}x{frames.shape[2]}",
+ "-r",
+ str(FRAME_RATE),
+ "-i",
+ raw_path,
+ "-c:v",
+ "h264_nvenc" if device == "cuda" else "libx264",
+ "-pix_fmt",
+ "yuv420p",
+ ]
+ ffmpeg_cmd.extend(["-qp", "0"] if device == "cuda" else ["-crf", "0"])
+ ffmpeg_cmd.extend([str(output_path)])
+ subprocess.run(ffmpeg_cmd, check=True, capture_output=True)
+
+
+def main():
+ parser = ArgumentParser()
+ parser.add_argument(
+ "--path", type=str, help="Path to input video file", default=DEFAULT_VIDEO_PATH
+ )
+ parser.add_argument(
+ "--average-over",
+ type=int,
+ default=30,
+ help="Number of runs to average over",
+ )
+ parser.add_argument(
+ "--max-frames",
+ type=int,
+ default=None,
+ help="Maximum number of frames to decode for benchmarking. By default, all frames will be decoded.",
+ )
+ parser.add_argument(
+ "--skip-write-frames",
+ action="store_true",
+ help="Do not write raw frames in FFmpeg CLI benchmarks",
+ )
+ args = parser.parse_args()
+ decoder = VideoDecoder(str(args.path))
+ frames = decoder.get_frames_in_range(start=0, stop=args.max_frames).data
+
+ cuda_available = torch.cuda.is_available()
+ if not cuda_available:
+ print("CUDA not available. GPU benchmarks will be skipped.")
+
+ print(
+ f"Benchmarking {len(frames)} frames from {Path(args.path).name} over {args.average_over} runs:"
+ )
+ gpu_frames = frames.cuda() if cuda_available else None
+ print(
+ f"Decoded {frames.shape[0]} frames of size {frames.shape[2]}x{frames.shape[3]}"
+ )
+
+ temp_dir = Path(tempfile.mkdtemp())
+ raw_frames_path = temp_dir / "input_frames.raw"
+
+ # If skip_write_frames is True, we will not benchmark the time it takes to write the frames.
+ # Here, we still write the frames for FFmpeg to use!
+ if args.skip_write_frames:
+ write_raw_frames(frames, str(raw_frames_path))
+
+ if cuda_available:
+ # Benchmark torchcodec on GPU
+ gpu_output = temp_dir / "torchcodec_gpu.mp4"
+ times, nvenc_metrics = bench(
+ encode_torchcodec,
+ frames=gpu_frames,
+ output_path=str(gpu_output),
+ device="cuda",
+ gpu_monitoring=True,
+ average_over=args.average_over,
+ )
+ report_stats(
+ times, frames.shape[0], nvenc_metrics, prefix="VideoEncoder on GPU"
+ )
+ # Benchmark FFmpeg CLI on GPU
+ ffmpeg_gpu_output = temp_dir / "ffmpeg_gpu.mp4"
+ times, nvenc_metrics = bench(
+ encode_ffmpeg_cli,
+ frames=gpu_frames,
+ raw_path=str(raw_frames_path),
+ output_path=str(ffmpeg_gpu_output),
+ device="cuda",
+ gpu_monitoring=True,
+ skip_write_frames=args.skip_write_frames,
+ average_over=args.average_over,
+ )
+ prefix = "FFmpeg CLI on GPU "
+ report_stats(times, frames.shape[0], nvenc_metrics, prefix=prefix)
+
+ # Benchmark torchcodec on CPU
+ cpu_output = temp_dir / "torchcodec_cpu.mp4"
+ times, _nvenc_metrics = bench(
+ encode_torchcodec,
+ frames=frames,
+ output_path=str(cpu_output),
+ device="cpu",
+ average_over=args.average_over,
+ )
+ report_stats(times, frames.shape[0], prefix="VideoEncoder on CPU")
+
+ # Benchmark FFmpeg CLI on CPU
+ ffmpeg_cpu_output = temp_dir / "ffmpeg_cpu.mp4"
+ times, _nvenc_metrics = bench(
+ encode_ffmpeg_cli,
+ frames=frames,
+ raw_path=str(raw_frames_path),
+ output_path=str(ffmpeg_cpu_output),
+ device="cpu",
+ skip_write_frames=args.skip_write_frames,
+ average_over=args.average_over,
+ )
+ prefix = "FFmpeg CLI on CPU "
+ report_stats(times, frames.shape[0], prefix=prefix)
+
+ shutil.rmtree(temp_dir, ignore_errors=True)
+
+
+if __name__ == "__main__":
+ main()
diff --git a/docs/requirements.txt b/docs/requirements.txt
index ba6848490..5ac0663e1 100644
--- a/docs/requirements.txt
+++ b/docs/requirements.txt
@@ -1,12 +1,15 @@
-sphinx-gallery>0.11
-sphinx==5.0.0
-sphinx_design
+sphinx==7.2.6
+-e git+https://github.com/pytorch/pytorch_sphinx_theme.git@pytorch_sphinx_theme2#egg=pytorch_sphinx_theme2
+sphinx-gallery>=0.14.0
+sphinx_design>=0.6.1
sphinx_copybutton
sphinx-tabs
+sphinx-sitemap>=2.7.1
+sphinxcontrib-mermaid>=1.0.0
+docutils>=0.18.1,<0.21
matplotlib
torchvision
ipython
fsspec
aiohttp
joblib
--e git+https://github.com/pytorch/pytorch_sphinx_theme.git#egg=pytorch_sphinx_theme
diff --git a/docs/source/_static/css/custom_torchcodec.css b/docs/source/_static/css/custom_torchcodec.css
deleted file mode 100644
index 6c702e1f2..000000000
--- a/docs/source/_static/css/custom_torchcodec.css
+++ /dev/null
@@ -1,192 +0,0 @@
-/**
- * Copyright (c) Meta Platforms, Inc. and affiliates.
- * All rights reserved.
- *
- * This source code is licensed under the BSD-style license found in the
- * LICENSE file in the root directory of this source tree.
- */
-
-/* sphinx-design styles for cards/tabs */
-
-
-:root {
- --sd-color-info: #ee4c2c;
- --sd-color-primary: #6c6c6d;
- --sd-color-primary-highlight: #f3f4f7;
- --sd-color-card-border-hover: #ee4c2c;
- --sd-color-card-border: #f3f4f7;
- --sd-color-card-background: #fff;
- --sd-color-card-text: inherit;
- --sd-color-card-header: transparent;
- --sd-color-card-footer: transparent;
- --sd-color-tabs-label-active: #ee4c2c;
- --sd-color-tabs-label-hover: #ee4c2c;
- --sd-color-tabs-label-inactive: #6c6c6d;
- --sd-color-tabs-underline-active: #ee4c2c;
- --sd-color-tabs-underline-hover: #fabdbd;
- --sd-color-tabs-underline-inactive: transparent;
- --sd-color-tabs-overline: rgb(222, 222, 222);
- --sd-color-tabs-underline: rgb(222, 222, 222);
-}
-
-.sd-text-info {
- color: #ee4c2c;
-}
-
-.sd-card-img-top {
- background: #ee4c2c;
- height: 5px !important;
-}
-
-.sd-card {
- position: relative;
- background-color: #fff;
- opacity: 1.0;
- border-radius: 0px;
- width: 30%;
- border: none;
- padding-bottom: 0px;
-}
-
-
-.sd-card-img:hover {
- opacity: 1.0;
- background-color: #f3f4f7;
-}
-
-
-.sd-card:after {
- display: block;
- opacity: 1;
- content: '';
- border-bottom: solid 1px #ee4c2c;
- background-color: #fff;
- transform: scaleX(0);
- transition: transform .250s ease-in-out;
- transform-origin: 0% 50%;
-}
-
-.sd-card:hover {
- background-color: #fff;
- opacity: 1;
- border-top: 1px solid #f3f4f7;
- border-left: 1px solid #f3f4f7;
- border-right: 1px solid #f3f4f7;
-}
-
-.sd-card:hover:after {
- transform: scaleX(1);
-}
-
-.card-prerequisites:hover {
- transition: none;
- border: none;
-}
-
-.card-prerequisites:hover:after {
- transition: none;
- transform: none;
-}
-
-.card-prerequisites:after {
- display: block;
- content: '';
- border-bottom: none;
- background-color: #fff;
- transform: none;
- transition: none;
- transform-origin: none;
-}
-
-
-details.sd-dropdown {
- font-weight: 300;
- width: auto;
-}
-
-details.sd-dropdown:after {
- border: none;
- transition: none;
-}
-
-details.sd-dropdown:hover {
- border: none;
- transition: none;
-}
-
-details.sd-dropdown .sd-summary-content {
- font-weight: 300;
-}
-
-details.sd-dropdown .highlight .n {
- font-weight: normal;
-}
-
-.et-page-column1 {
- float: left;
- width: 70%;
- font-size: 1rem;
-}
-
-.et-page-column2 {
- float: right;
- padding-top: 40px;
- padding-left: 60px;
- padding-right: 60px;
- padding-bottom: 60px;
- width: 30%;
-}
-
-.et-page-column-row:after {
- content: "";
- display: table;
- clear: both;
-}
-
-/* For screens smaller than 768px (typical mobile devices) */
-@media screen and (max-width: 768px) {
- .et-page-column1, .et-page-column2 {
- float: none; /* Remove floats */
- width: 100%; /* Full width for both columns */
- padding: 0;
- font-size: 1rem;
- }
-
- .et-page-column2 img {
- display: none;
- }
- .et-page-column-row:after {
- content: "";
- display: table;
- clear: both;
- }
-}
-
-article.pytorch-article .class .method dt {
- border-top: none;
-}
-
-article.pytorch-article .class .simple dt {
- border-top: none;
-}
-
-article.pytorch-article .function dt.sig {
- border-top: none;
-}
-
-/* Fix for Sphinx gallery thumbnails.
-See https://github.com/sphinx-gallery/sphinx-gallery/issues/990
-*/
-article.pytorch-article .sphx-glr-thumbnails .sphx-glr-thumbcontainer {
- width: unset;
- margin-right: 0;
- margin-left: 0;
-}
-article.pytorch-article div.section div.wy-table-responsive tbody td {
- width: 50%;
-}
-
-article.pytorch-article section#glossary dl.simple.glossary dt {
- font-weight: bold;
- font-size: x-large;
-}
diff --git a/docs/source/_static/thumbnails/grumps_6.jpg b/docs/source/_static/thumbnails/grumps_6.jpg
new file mode 100644
index 000000000..081764555
Binary files /dev/null and b/docs/source/_static/thumbnails/grumps_6.jpg differ
diff --git a/docs/source/_static/thumbnails/grumps_audio.jpg b/docs/source/_static/thumbnails/grumps_audio.jpg
new file mode 100644
index 000000000..44fffe445
Binary files /dev/null and b/docs/source/_static/thumbnails/grumps_audio.jpg differ
diff --git a/docs/source/_static/thumbnails/grumps_audio2.jpg b/docs/source/_static/thumbnails/grumps_audio2.jpg
new file mode 100644
index 000000000..ff2a3c47a
Binary files /dev/null and b/docs/source/_static/thumbnails/grumps_audio2.jpg differ
diff --git a/docs/source/_static/thumbnails/grumps_brrrr.jpg b/docs/source/_static/thumbnails/grumps_brrrr.jpg
new file mode 100644
index 000000000..fa988b07e
Binary files /dev/null and b/docs/source/_static/thumbnails/grumps_brrrr.jpg differ
diff --git a/docs/source/_static/thumbnails/grumps_frame_mappings.jpg b/docs/source/_static/thumbnails/grumps_frame_mappings.jpg
new file mode 100644
index 000000000..465174aaa
Binary files /dev/null and b/docs/source/_static/thumbnails/grumps_frame_mappings.jpg differ
diff --git a/docs/source/_static/thumbnails/grumps_parallel.jpg b/docs/source/_static/thumbnails/grumps_parallel.jpg
new file mode 100644
index 000000000..2be5015f4
Binary files /dev/null and b/docs/source/_static/thumbnails/grumps_parallel.jpg differ
diff --git a/docs/source/_static/thumbnails/grumps_seek_mode.jpg b/docs/source/_static/thumbnails/grumps_seek_mode.jpg
new file mode 100644
index 000000000..402eddc67
Binary files /dev/null and b/docs/source/_static/thumbnails/grumps_seek_mode.jpg differ
diff --git a/docs/source/_static/thumbnails/not_grumps_encoding_video.jpg b/docs/source/_static/thumbnails/not_grumps_encoding_video.jpg
new file mode 100644
index 000000000..2720367e9
Binary files /dev/null and b/docs/source/_static/thumbnails/not_grumps_encoding_video.jpg differ
diff --git a/docs/source/_templates/layout.html b/docs/source/_templates/layout.html
deleted file mode 100644
index 010a4d6d7..000000000
--- a/docs/source/_templates/layout.html
+++ /dev/null
@@ -1,21 +0,0 @@
-{% extends "!layout.html" %}
-
-{% block sidebartitle %}
-
- {% include "searchbox.html" %}
-{% endblock %}
-
-
-{% block footer %}
-
-
-
-
-{% endblock %}
diff --git a/docs/source/api_ref.rst b/docs/source/api_ref.rst
new file mode 100644
index 000000000..f4ffe34dc
--- /dev/null
+++ b/docs/source/api_ref.rst
@@ -0,0 +1,11 @@
+API Reference
+=============
+
+.. toctree::
+ :maxdepth: 1
+
+ api_ref_torchcodec
+ api_ref_decoders
+ api_ref_encoders
+ api_ref_samplers
+ api_ref_transforms
diff --git a/docs/source/api_ref_decoders.rst b/docs/source/api_ref_decoders.rst
index 1417d7aea..40ae75101 100644
--- a/docs/source/api_ref_decoders.rst
+++ b/docs/source/api_ref_decoders.rst
@@ -19,17 +19,30 @@ For an audio decoder tutorial, see: :ref:`sphx_glr_generated_examples_decoding_a
VideoDecoder
AudioDecoder
+.. autosummary::
+ :toctree: generated/
+ :nosignatures:
+ :template: dataclass.rst
+
+ VideoStreamMetadata
+ AudioStreamMetadata
+
+
+CUDA decoding utils
+-------------------
+
.. autosummary::
:toctree: generated/
:nosignatures:
:template: function.rst
set_cuda_backend
+ set_nvdec_cache_capacity
+ get_nvdec_cache_capacity
.. autosummary::
:toctree: generated/
:nosignatures:
:template: dataclass.rst
- VideoStreamMetadata
- AudioStreamMetadata
+ CpuFallbackStatus
diff --git a/docs/source/api_ref_encoders.rst b/docs/source/api_ref_encoders.rst
index 52c7295bc..6c7fc825d 100644
--- a/docs/source/api_ref_encoders.rst
+++ b/docs/source/api_ref_encoders.rst
@@ -16,3 +16,4 @@ For an audio decoder tutorial, see: :ref:`sphx_glr_generated_examples_encoding_a
:template: class.rst
AudioEncoder
+ VideoEncoder
diff --git a/docs/source/api_ref_transforms.rst b/docs/source/api_ref_transforms.rst
new file mode 100644
index 000000000..18bffabae
--- /dev/null
+++ b/docs/source/api_ref_transforms.rst
@@ -0,0 +1,21 @@
+.. _transforms:
+
+=====================
+torchcodec.transforms
+=====================
+
+.. automodule:: torchcodec.transforms
+
+.. currentmodule:: torchcodec.transforms
+
+For a tutorial, see: :ref:`sphx_glr_generated_examples_decoding_transforms.py`.
+
+.. autosummary::
+ :toctree: generated/
+ :nosignatures:
+ :template: dataclass.rst
+
+ DecoderTransform
+ CenterCrop
+ RandomCrop
+ Resize
diff --git a/docs/source/conf.py b/docs/source/conf.py
index ba5247372..34e188dd9 100644
--- a/docs/source/conf.py
+++ b/docs/source/conf.py
@@ -18,21 +18,13 @@
# All configuration values have a default; values that are commented out
# serve to show the default.
-# If extensions (or modules to document with autodoc) are in another directory,
-# add these directories to sys.path here. If the directory is relative to the
-# documentation root, use os.path.abspath to make it absolute, like shown here.
-#
-# import os
-# import sys
-# sys.path.insert(0, os.path.abspath('.'))
-
import os
import sys
-import pytorch_sphinx_theme
+import pytorch_sphinx_theme2
import torchcodec
-sys.path.append(os.path.abspath("."))
+sys.path.insert(0, os.path.abspath("."))
# -- General configuration ------------------------------------------------
@@ -55,6 +47,9 @@
"sphinx_tabs.tabs",
"sphinx_design",
"sphinx_copybutton",
+ "sphinx_sitemap",
+ "sphinxcontrib.mermaid",
+ "pytorch_sphinx_theme2",
]
@@ -81,12 +76,15 @@ def __call__(self, filename):
"approximate_mode.py",
"sampling.py",
"parallel_decoding.py",
+ "performance_tips.py",
"custom_frame_mappings.py",
+ "transforms.py",
]
else:
assert "examples/encoding" in self.src_dir
order = [
"audio_encoding.py",
+ "video_encoding.py",
]
try:
@@ -133,13 +131,18 @@ def __call__(self, filename):
# Add any paths that contain templates here, relative to this directory.
-templates_path = ["_templates"]
+templates_path = [
+ "_templates",
+ os.path.join(os.path.dirname(pytorch_sphinx_theme2.__file__), "templates"),
+]
# The suffix(es) of source filenames.
# You can specify multiple suffix as a list of string:
#
source_suffix = [".rst"]
+version = ".".join(torchcodec.__version__.split(".")[:2])
+
html_title = f"TorchCodec {torchcodec.__version__} Documentation"
# The master toctree document.
@@ -173,26 +176,51 @@ def __call__(self, filename):
# The theme to use for HTML and HTML Help pages. See the documentation for
# a list of builtin themes.
#
-html_theme = "pytorch_sphinx_theme"
-html_theme_path = [pytorch_sphinx_theme.get_html_theme_path()]
+html_theme = "pytorch_sphinx_theme2"
+html_theme_path = [pytorch_sphinx_theme2.get_html_theme_path()]
# Theme options are theme-specific and customize the look and feel of a theme
# further. For a list of options available for each theme, see the
# documentation.
#
html_theme_options = {
- "collapse_navigation": False,
- "display_version": True,
- "logo_only": True,
- "pytorch_project": "docs",
- "navigation_with_keys": True,
+ "navigation_with_keys": False,
"analytics_id": "GTM-T8XT4PS",
+ "icon_links": [
+ {
+ "name": "X",
+ "url": "https://x.com/PyTorch",
+ "icon": "fa-brands fa-x-twitter",
+ },
+ {
+ "name": "GitHub",
+ "url": "https://github.com/meta-pytorch/torchcodec",
+ "icon": "fa-brands fa-github",
+ },
+ {
+ "name": "Discourse",
+ "url": "https://dev-discuss.pytorch.org/",
+ "icon": "fa-brands fa-discourse",
+ },
+ {
+ "name": "PyPi",
+ "url": "https://pypi.org/project/torchcodec/",
+ "icon": "fa-brands fa-python",
+ },
+ ],
+ "use_edit_page_button": True,
+ "navbar_center": "navbar-nav",
+ "navbar_start": ["navbar-logo", "version-switcher"],
+ "logo": {
+ "text": "TorchCodec",
+ },
+ "switcher": {
+ "json_url": "https://meta-pytorch.org/torchcodec/torchcodec-versions.json",
+ "version_match": version,
+ },
+ "show_version_warning_banner": True,
}
-html_logo = "_static/img/pytorch-logo-dark.svg"
-
-html_css_files = ["css/custom_torchcodec.css"]
-
# Add any paths that contain custom static files (such as style sheets) here,
# relative to this directory. They are copied after the builtin static files,
# so a file named "default.css" will overwrite the builtin "default.css".
@@ -209,11 +237,38 @@ def __call__(self, filename):
intersphinx_mapping = {
"python": ("https://docs.python.org/3/", None),
"torch": ("https://pytorch.org/docs/stable/", None),
+ "torchvision": ("https://docs.pytorch.org/vision/stable/", None),
"numpy": ("https://numpy.org/doc/stable/", None),
"PIL": ("https://pillow.readthedocs.io/en/stable/", None),
"matplotlib": ("https://matplotlib.org/stable/", None),
}
+# html_context for theme2
+theme_variables = pytorch_sphinx_theme2.get_theme_variables()
+
+html_context = {
+ "theme_variables": theme_variables,
+ "display_github": True,
+ "github_url": "https://github.com",
+ "github_user": "meta-pytorch",
+ "github_repo": "torchcodec",
+ "feedback_url": "https://github.com/meta-pytorch/torchcodec",
+ "github_version": "main",
+ "doc_path": "docs/source",
+ "library_links": [],
+ "community_links": theme_variables.get("community_links", []),
+ "language_bindings_links": html_theme_options.get("language_bindings_links", []),
+}
+
+# sitemap config
+html_baseurl = "https://meta-pytorch.org/torchcodec/stable/"
+sitemap_locales = [None]
+sitemap_excludes = [
+ "search.html",
+ "genindex.html",
+]
+sitemap_url_scheme = "{link}"
+
def inject_minigalleries(app, what, name, obj, options, lines):
"""Inject a minigallery into a docstring.
diff --git a/docs/source/index.rst b/docs/source/index.rst
index 85f9a067c..0276daa77 100644
--- a/docs/source/index.rst
+++ b/docs/source/index.rst
@@ -2,7 +2,7 @@ Welcome to the TorchCodec documentation!
========================================
TorchCodec is a Python library for decoding video and audio data into PyTorch
-tensors, on CPU and CUDA GPU. It also supports audio encoding, and video encoding will come soon!
+tensors, on CPU and CUDA GPU. It also supports audio and video encoding!
It aims to be fast, easy to use, and well integrated into the PyTorch ecosystem.
If you want to use PyTorch to train ML models on videos and audio, TorchCodec is
how you turn these into data.
@@ -25,8 +25,7 @@ Installation instructions
.. grid-item-card:: :octicon:`file-code;1em`
Installation instructions
- :img-top: _static/img/card-background.svg
- :link: https://github.com/pytorch/torchcodec?tab=readme-ov-file#installing-torchcodec
+ :link: https://github.com/meta-pytorch/torchcodec?tab=readme-ov-file#installing-torchcodec
:link-type: url
How to install TorchCodec
@@ -38,7 +37,6 @@ Decoding
.. grid-item-card:: :octicon:`file-code;1em`
Getting Started with TorchCodec
- :img-top: _static/img/card-background.svg
:link: generated_examples/decoding/basic_example.html
:link-type: url
@@ -46,7 +44,6 @@ Decoding
.. grid-item-card:: :octicon:`file-code;1em`
Audio Decoding
- :img-top: _static/img/card-background.svg
:link: generated_examples/decoding/audio_decoding.html
:link-type: url
@@ -54,7 +51,6 @@ Decoding
.. grid-item-card:: :octicon:`file-code;1em`
GPU decoding
- :img-top: _static/img/card-background.svg
:link: generated_examples/decoding/basic_cuda_example.html
:link-type: url
@@ -62,7 +58,6 @@ Decoding
.. grid-item-card:: :octicon:`file-code;1em`
Streaming video
- :img-top: _static/img/card-background.svg
:link: generated_examples/decoding/file_like.html
:link-type: url
@@ -70,7 +65,6 @@ Decoding
.. grid-item-card:: :octicon:`file-code;1em`
Parallel decoding
- :img-top: _static/img/card-background.svg
:link: generated_examples/decoding/parallel_decoding.html
:link-type: url
@@ -78,12 +72,25 @@ Decoding
.. grid-item-card:: :octicon:`file-code;1em`
Clip sampling
- :img-top: _static/img/card-background.svg
:link: generated_examples/decoding/sampling.html
:link-type: url
How to sample regular and random clips from a video
+ .. grid-item-card:: :octicon:`file-code;1em`
+ Decoder transforms
+ :link: generated_examples/decoding/transforms.html
+ :link-type: url
+
+ How to apply transforms while decoding
+
+ .. grid-item-card:: :octicon:`file-code;1em`
+ Performance Tips
+ :link: generated_examples/decoding/performance_tips.html
+ :link-type: url
+
+ Tips for optimizing video decoding performance
+
Encoding
^^^^^^^^
@@ -92,36 +99,38 @@ Encoding
.. grid-item-card:: :octicon:`file-code;1em`
Audio Encoding
- :img-top: _static/img/card-background.svg
:link: generated_examples/encoding/audio_encoding.html
:link-type: url
How encode audio samples
+ .. grid-item-card:: :octicon:`file-code;1em`
+ Video Encoding
+ :link: generated_examples/encoding/video_encoding.html
+ :link-type: url
+
+ How to encode video frames
+
.. toctree::
:maxdepth: 1
- :caption: TorchCodec documentation
:hidden:
- Home
- glossary
+ Installation
.. toctree::
:maxdepth: 1
- :caption: Examples and tutorials
:hidden:
- Installation instructions
generated_examples/index
+.. toctree::
+ :maxdepth: 1
+ :hidden:
+
+ api_ref
.. toctree::
- :glob:
:maxdepth: 1
- :caption: API Reference
:hidden:
- api_ref_torchcodec
- api_ref_decoders
- api_ref_encoders
- api_ref_samplers
+ glossary
diff --git a/examples/decoding/approximate_mode.py b/examples/decoding/approximate_mode.py
index 62abee801..15a19321f 100644
--- a/examples/decoding/approximate_mode.py
+++ b/examples/decoding/approximate_mode.py
@@ -33,6 +33,7 @@
from time import perf_counter_ns
+# sphinx_gallery_thumbnail_path = '_static/thumbnails/grumps_seek_mode.jpg'
# Video source: https://www.pexels.com/video/dog-eating-854132/
# License: CC0. Author: Coverr.
url = "https://videos.pexels.com/video-files/854132/854132-sd_640_360_25fps.mp4"
@@ -66,7 +67,7 @@
# Performance: ``VideoDecoder`` creation
# --------------------------------------
#
-# In terms of performance, the ``seek_mode`` parameter ultimately affects the
+# In terms of performance, the ``seek_mode`` parameter mainly affects the
# **creation** of a :class:`~torchcodec.decoders.VideoDecoder` object. The
# longer the video, the higher the performance gain.
@@ -104,7 +105,7 @@ def bench(f, average_over=50, warmup=2, **f_kwargs):
# ---------------------------------------------
#
# Strictly speaking the ``seek_mode`` parameter only affects the performance of
-# the :class:`~torchcodec.decoders.VideoDecoder` creation. It does not have a
+# the :class:`~torchcodec.decoders.VideoDecoder` creation. It usually does not have a
# direct effect on the performance of frame decoding or sampling. **However**,
# because frame decoding and sampling patterns typically involve the creation of
# the :class:`~torchcodec.decoders.VideoDecoder` (one per video), ``seek_mode``
@@ -168,8 +169,10 @@ def sample_clips(seek_mode):
# duration), and also builds an internal index of frames and key-frames. This
# internal index is potentially more accurate than the one in the file's
# headers, which leads to more accurate seeking behavior.
-# Without the scan, TorchCodec relies only on the metadata contained in the
-# file, which may not always be as accurate.
+# Without the scan (in approximate mode), TorchCodec relies only on the metadata
+# contained in the file, which may not always be as accurate. In some rare
+# cases, relying on this less accurate data may also lead to slower frame
+# decoding, because it can involve unnecessary seeks.
#
# Which mode should I use?
# ------------------------
@@ -177,11 +180,10 @@ def sample_clips(seek_mode):
# The general rule of thumb is as follows:
#
# - If you really care about exactness of frame seeking, use "exact".
-# - If you can sacrifice exactness of seeking for speed, which is usually the
-# case when doing clip sampling, use "approximate".
-# - If your videos don't have variable framerate and their metadata is correct,
-# then "approximate" mode is a net win: it will be just as accurate as the
-# "exact" mode while still being significantly faster.
+# - If your videos are short (less then a few minutes) then "exact" will usually
+# be preferable, as the scan's fixed cost will be negligible.
+# - For long videos, if you can sacrifice exactness of seeking for speed, which
+# is usually the case when doing clip sampling, consider using "approximate".
# %%
shutil.rmtree(temp_dir)
diff --git a/examples/decoding/audio_decoding.py b/examples/decoding/audio_decoding.py
index 3d41e350d..95ac36082 100644
--- a/examples/decoding/audio_decoding.py
+++ b/examples/decoding/audio_decoding.py
@@ -4,6 +4,7 @@
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.
+
"""
========================================
Decoding audio streams with AudioDecoder
@@ -25,6 +26,7 @@ def play_audio(samples):
return Audio(samples.data, rate=samples.sample_rate)
+# sphinx_gallery_thumbnail_path = '_static/thumbnails/grumps_audio.jpg'
# Audio source is CC0: https://opengameart.org/content/town-theme-rpg
# Attribution: cynicmusic.com pixelsphere.org
url = "https://opengameart.org/sites/default/files/TownTheme.mp3"
diff --git a/examples/decoding/basic_cuda_example.py b/examples/decoding/basic_cuda_example.py
index 8f82940c0..45f9ea1a9 100644
--- a/examples/decoding/basic_cuda_example.py
+++ b/examples/decoding/basic_cuda_example.py
@@ -18,32 +18,10 @@
running the transform steps. Encoded packets are often much smaller than decoded frames so
CUDA decoding also uses less PCI-e bandwidth.
-When to and when not to use CUDA Decoding
------------------------------------------
-
-CUDA Decoding can offer speed-up over CPU Decoding in a few scenarios:
-
-#. You are decoding a large resolution video
-#. You are decoding a large batch of videos that's saturating the CPU
-#. You want to do whole-image transforms like scaling or convolutions on the decoded tensors
- after decoding
-#. Your CPU is saturated and you want to free it up for other work
-
-
-Here are situations where CUDA Decoding may not make sense:
-
-#. You want bit-exact results compared to CPU Decoding
-#. You have small resolution videos and the PCI-e transfer latency is large
-#. Your GPU is already busy and CPU is not
-
-It's best to experiment with CUDA Decoding to see if it improves your use-case. With
-TorchCodec you can simply pass in a device parameter to the
-:class:`~torchcodec.decoders.VideoDecoder` class to use CUDA Decoding.
-
Installing TorchCodec with CUDA Enabled
---------------------------------------
-Refer to the installation guide in the `README `_.
+Refer to the installation guide in the `README `_.
"""
@@ -113,6 +91,25 @@
print(frame.data.device)
+# %%
+# Checking for CPU Fallback
+# -------------------------------------
+#
+# In some cases, CUDA decoding may fall back to CPU decoding. This can happen
+# when the video codec or format is not supported by the NVDEC hardware decoder, or when NVCUVID wasn't found.
+# TorchCodec provides the :class:`~torchcodec.decoders.CpuFallbackStatus` class
+# to help you detect when this fallback occurs.
+#
+# You can access the fallback status via the
+# :attr:`~torchcodec.decoders.VideoDecoder.cpu_fallback` attribute:
+
+with set_cuda_backend("beta"):
+ decoder = VideoDecoder(video_file, device="cuda")
+
+# Check and print the CPU fallback status
+print(decoder.cpu_fallback)
+
+
# %%
# Visualizing Frames
# -------------------------------------
diff --git a/examples/decoding/basic_example.py b/examples/decoding/basic_example.py
index 8440b6814..86fa8e6e4 100644
--- a/examples/decoding/basic_example.py
+++ b/examples/decoding/basic_example.py
@@ -18,7 +18,6 @@
# plotting utility. You can ignore that part and jump right below to
# :ref:`creating_decoder`.
-from typing import Optional
import torch
import requests
@@ -33,7 +32,7 @@
raw_video_bytes = response.content
-def plot(frames: torch.Tensor, title : Optional[str] = None):
+def plot(frames: torch.Tensor, title: str | None = None):
try:
from torchvision.utils import make_grid
from torchvision.transforms.v2.functional import to_pil_image
diff --git a/examples/decoding/custom_frame_mappings.py b/examples/decoding/custom_frame_mappings.py
index a62bc9eb0..1094201fc 100644
--- a/examples/decoding/custom_frame_mappings.py
+++ b/examples/decoding/custom_frame_mappings.py
@@ -32,6 +32,7 @@
import subprocess
import requests
+# sphinx_gallery_thumbnail_path = '_static/thumbnails/grumps_frame_mappings.jpg'
# Video source: https://www.pexels.com/video/dog-eating-854132/
# License: CC0. Author: Coverr.
url = "https://videos.pexels.com/video-files/854132/854132-sd_640_360_25fps.mp4"
@@ -82,7 +83,15 @@
# Lets define a simple function to run ffprobe on a video's first stream index, then writes the results in output_json_path.
def generate_frame_mappings(video_path, output_json_path, stream_index):
- ffprobe_cmd = ["ffprobe", "-i", f"{video_path}", "-select_streams", f"{stream_index}", "-show_frames", "-show_entries", "frame=pts,duration,key_frame", "-of", "json"]
+ ffprobe_cmd = [
+ "ffprobe",
+ "-i", f"{video_path}",
+ "-select_streams", f"{stream_index}",
+ "-show_frames",
+ "-show_entries",
+ "frame=pts,duration,key_frame",
+ "-of", "json",
+ ]
print(f"Running ffprobe:\n{' '.join(ffprobe_cmd)}\n")
ffprobe_result = subprocess.run(ffprobe_cmd, check=True, capture_output=True, text=True)
with open(output_json_path, "w") as f:
@@ -157,7 +166,7 @@ def bench(f, file_like=False, average_over=50, warmup=2, **f_kwargs):
# so the performance benefits are realized.
-def decode_frames(video_path, seek_mode = "exact", custom_frame_mappings = None):
+def decode_frames(video_path, seek_mode="exact", custom_frame_mappings=None):
decoder = VideoDecoder(
source=video_path,
seek_mode=seek_mode,
diff --git a/examples/decoding/file_like.py b/examples/decoding/file_like.py
index 7f302d3c5..238d51094 100644
--- a/examples/decoding/file_like.py
+++ b/examples/decoding/file_like.py
@@ -28,6 +28,7 @@ class to decode it. But all of the lessons here also apply to audio files and th
from time import perf_counter_ns
+# sphinx_gallery_thumbnail_path = '_static/thumbnails/grumps_6.jpg'
def get_url_content(url):
response = requests.get(url, headers={"User-Agent": ""})
if response.status_code != 200:
diff --git a/examples/decoding/parallel_decoding.py b/examples/decoding/parallel_decoding.py
index b5699a895..e8ad5e0b5 100644
--- a/examples/decoding/parallel_decoding.py
+++ b/examples/decoding/parallel_decoding.py
@@ -31,7 +31,6 @@
# require efficient processing. You can ignore that part and jump right below to
# :ref:`start_parallel_decoding`.
-from typing import List
import torch
import requests
import tempfile
@@ -44,6 +43,7 @@
from torchcodec.decoders import VideoDecoder
+# sphinx_gallery_thumbnail_path = '_static/thumbnails/grumps_parallel.jpg'
def bench(f, *args, num_exp=3, warmup=1, **kwargs):
"""Benchmark a function by running it multiple times and measuring execution time."""
for _ in range(warmup):
@@ -74,7 +74,7 @@ def report_stats(times, unit="s"):
return med
-def split_indices(indices: List[int], num_chunks: int) -> List[List[int]]:
+def split_indices(indices: list[int], num_chunks: int) -> list[list[int]]:
"""Split a list of indices into approximately equal chunks."""
chunk_size = len(indices) // num_chunks
chunks = []
@@ -155,7 +155,8 @@ def generate_long_video(temp_dir: str):
# Let's start with a sequential approach as our baseline. This processes
# frames one by one without any parallelization.
-def decode_sequentially(indices: List[int], video_path=long_video_path):
+
+def decode_sequentially(indices: list[int], video_path=long_video_path):
"""Decode frames sequentially using a single decoder instance."""
decoder = VideoDecoder(video_path, seek_mode="approximate")
return decoder.get_frames_at(indices)
@@ -173,8 +174,9 @@ def decode_sequentially(indices: List[int], video_path=long_video_path):
# via the ``num_ffmpeg_threads`` parameter. This approach uses multiple
# threads within FFmpeg itself to accelerate decoding operations.
+
def decode_with_ffmpeg_parallelism(
- indices: List[int],
+ indices: list[int],
num_threads: int,
video_path=long_video_path
):
@@ -197,10 +199,11 @@ def decode_with_ffmpeg_parallelism(
#
# Process-based parallelism distributes work across multiple Python processes.
+
def decode_with_multiprocessing(
- indices: List[int],
+ indices: list[int],
num_processes: int,
- video_path=long_video_path
+ video_path=long_video_path,
):
"""Decode frames using multiple processes with joblib."""
chunks = split_indices(indices, num_chunks=num_processes)
@@ -226,8 +229,9 @@ def decode_with_multiprocessing(
# Thread-based parallelism uses multiple threads within a single process.
# TorchCodec releases the GIL, so this can be very effective.
+
def decode_with_multithreading(
- indices: List[int],
+ indices: list[int],
num_threads: int,
video_path=long_video_path
):
diff --git a/examples/decoding/performance_tips.py b/examples/decoding/performance_tips.py
new file mode 100644
index 000000000..132d7f96f
--- /dev/null
+++ b/examples/decoding/performance_tips.py
@@ -0,0 +1,254 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+"""
+.. meta::
+ :description: Learn how to optimize TorchCodec video decoding performance with batch APIs, approximate seeking, multi-threading, and CUDA acceleration.
+
+==============================================
+TorchCodec Performance Tips and Best Practices
+==============================================
+
+This tutorial consolidates performance optimization techniques for video
+decoding with TorchCodec. Learn when and how to apply various strategies
+to increase performance.
+"""
+
+# %%
+# Overview
+# --------
+#
+# When decoding videos with TorchCodec, several techniques can significantly
+# improve performance depending on your use case. This guide covers:
+#
+# 1. **Batch APIs** - Decode multiple frames at once
+# 2. **Approximate Mode & Keyframe Mappings** - Trade accuracy for speed
+# 3. **Multi-threading** - Parallelize decoding across videos or chunks
+# 4. **CUDA Acceleration** - Use GPU decoding for supported formats
+# 5. **Decoder Native Transforms** - Apply transforms during decoding for memory efficiency
+#
+# We'll explore each technique and when to use it.
+
+# %%
+# 1. Use Batch APIs When Possible
+# --------------------------------
+#
+# If you need to decode multiple frames at once, the batch methods are faster than calling single-frame decoding methods multiple times.
+# For example, :meth:`~torchcodec.decoders.VideoDecoder.get_frames_at` is faster than calling :meth:`~torchcodec.decoders.VideoDecoder.get_frame_at` multiple times.
+# TorchCodec's batch APIs reduce overhead and can leverage internal optimizations.
+#
+# **Key Methods:**
+#
+# For index-based frame retrieval:
+#
+# - :meth:`~torchcodec.decoders.VideoDecoder.get_frames_at` for specific indices
+# - :meth:`~torchcodec.decoders.VideoDecoder.get_frames_in_range` for ranges
+#
+# For timestamp-based frame retrieval:
+#
+# - :meth:`~torchcodec.decoders.VideoDecoder.get_frames_played_at` for timestamps
+# - :meth:`~torchcodec.decoders.VideoDecoder.get_frames_played_in_range` for time ranges
+#
+# **When to use:**
+#
+# - Decoding multiple frames
+
+# %%
+# .. note::
+#
+# For complete examples with runnable code demonstrating batch decoding,
+# iteration, and frame retrieval, see :ref:`sphx_glr_generated_examples_decoding_basic_example.py`
+
+# %%
+# 2. Approximate Mode & Keyframe Mappings
+# ----------------------------------------
+#
+# By default, TorchCodec uses ``seek_mode="exact"``, which performs a :term:`scan` when
+# you create the decoder to build an accurate internal index of frames. This
+# ensures frame-accurate seeking but takes longer for decoder initialization,
+# especially on long videos.
+
+# %%
+# **Approximate Mode**
+# ~~~~~~~~~~~~~~~~~~~~
+#
+# Setting ``seek_mode="approximate"`` skips the initial :term:`scan` and relies on the
+# video file's metadata headers. This dramatically speeds up
+# :class:`~torchcodec.decoders.VideoDecoder` creation, particularly for long
+# videos, but may result in slightly less accurate seeking in some cases.
+#
+#
+# **Which mode should you use:**
+#
+# - If you care about exactness of frame seeking, use “exact”.
+# - If the video is long and you're only decoding a small amount of frames, approximate mode should be faster.
+
+# %%
+# **Custom Frame Mappings**
+# ~~~~~~~~~~~~~~~~~~~~~~~~~
+#
+# For advanced use cases, you can pre-compute a custom mapping between desired
+# frame indices and actual keyframe locations. This allows you to speed up :class:`~torchcodec.decoders.VideoDecoder`
+# instantiation while maintaining the frame seeking accuracy of ``seek_mode="exact"``
+#
+# **When to use:**
+#
+# - Frame accuracy is critical, so you cannot use approximate mode
+# - You can preprocess videos once and then decode them many times
+#
+# **Performance impact:** speeds up decoder instantiation, similarly to ``seek_mode="approximate"``.
+
+# %%
+# .. note::
+#
+# For complete benchmarks showing actual speedup numbers, accuracy comparisons,
+# and implementation examples, see :ref:`sphx_glr_generated_examples_decoding_approximate_mode.py`
+# and :ref:`sphx_glr_generated_examples_decoding_custom_frame_mappings.py`
+
+# %%
+# 3. Multi-threading for Parallel Decoding
+# -----------------------------------------
+#
+# When decoding multiple videos or decoding a large number of frames from a single video, there are a few parallelization strategies to speed up the decoding process:
+#
+# - **FFmpeg-based parallelism** - Using FFmpeg's internal threading capabilities for intra-frame parallelism, where parallelization happens within individual frames rather than across frames. For that, use the `num_ffmpeg_threads` parameter of the :class:`~torchcodec.decoders.VideoDecoder`
+# - **Multiprocessing** - Distributing work across multiple processes
+# - **Multithreading** - Using multiple threads within a single process
+#
+# You can use both multiprocessing and multithreading to decode multiple videos in parallel, or to decode a single long video in parallel by splitting it into chunks.
+
+# %%
+# .. note::
+#
+# For complete examples comparing
+# sequential, ffmpeg-based parallelism, multi-process, and multi-threaded approaches, see
+# :ref:`sphx_glr_generated_examples_decoding_parallel_decoding.py`
+
+# %%
+# 4. CUDA Acceleration
+# --------------------
+#
+# TorchCodec supports GPU-accelerated decoding using NVIDIA's hardware decoder
+# (NVDEC) on supported hardware. This keeps decoded tensors in GPU memory,
+# avoiding expensive CPU-GPU transfers for downstream GPU operations.
+#
+# **Recommended: use the Beta Interface!!**
+# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+#
+# We recommend you use the new "beta" CUDA interface which is significantly faster than the previous one, and supports the same features:
+#
+# .. code-block:: python
+#
+# with set_cuda_backend("beta"):
+# decoder = VideoDecoder("file.mp4", device="cuda")
+#
+# **When to use:**
+#
+# - Decoding large resolution videos
+# - Large batch of videos saturating the CPU
+#
+# **When NOT to use:**
+#
+# - You need bit-exact results with CPU decoding
+# - Small resolution videos and the PCI-e transfer latency is large
+# - GPU is already busy and CPU is idle
+#
+# **Performance impact:** CUDA decoding can significantly outperform CPU decoding,
+# especially for high-resolution videos and when decoding a lot of frames.
+# Actual speedup varies by hardware, resolution, and codec.
+
+# %%
+# **Checking for CPU Fallback**
+# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+#
+# In some cases, CUDA decoding may silently fall back to CPU decoding when the
+# video codec or format is not supported by NVDEC. You can detect this using
+# the :attr:`~torchcodec.decoders.VideoDecoder.cpu_fallback` attribute:
+#
+# .. code-block:: python
+#
+# with set_cuda_backend("beta"):
+# decoder = VideoDecoder("file.mp4", device="cuda")
+#
+# # Print detailed fallback status
+# print(decoder.cpu_fallback)
+#
+# .. note::
+#
+# The timing of when you can detect CPU fallback differs between backends:
+# with the **FFmpeg backend**, you can only check fallback status after decoding at
+# least one frame, because FFmpeg determines codec support lazily during decoding;
+# with the **BETA backend**, you can check fallback status immediately after
+# decoder creation, as the backend checks codec support upfront.
+#
+# For installation instructions, detailed examples, and visual comparisons
+# between CPU and CUDA decoding, see :ref:`sphx_glr_generated_examples_decoding_basic_cuda_example.py`
+
+# %%
+# 5. Decoder Native Transforms
+# ----------------------------
+#
+# TorchCodec supports applying transforms like resize and crop *during* the
+# decoding process itself, rather than as a separate post-processing step.
+# This can lead to significant memory savings, especially when decoding
+# high-resolution videos that will be resized to smaller dimensions.
+#
+# :class:`~torchcodec.decoders.VideoDecoder` accepts both TorchCodec
+# :class:`~torchcodec.transforms.DecoderTransform` objects and TorchVision
+# :class:`~torchvision.transforms.v2.Transform` objects as transform
+# specifications. TorchVision is **not required** to use decoder transforms.
+#
+# **Example:**
+#
+# .. code-block:: python
+#
+# from torchcodec.decoders import VideoDecoder
+# from torchcodec.transforms import Resize
+#
+# decoder = VideoDecoder(
+# "file.mp4",
+# transforms=[Resize(size=(480, 640))]
+# )
+#
+# **When to use:**
+#
+# - If you are applying a transform pipeline that significantly reduces the
+# dimensions of your input frames and memory efficiency matters.
+# - If you are using multiple FFmpeg threads, decoder transforms may be faster.
+# Experiment with your setup to verify.
+#
+
+# %%
+# .. note::
+#
+# For complete examples with memory benchmarks, transform pipelines, and
+# detailed comparisons between decoder transforms and TorchVision transforms,
+# see :ref:`sphx_glr_generated_examples_decoding_transforms.py`
+
+# %%
+# Conclusion
+# ----------
+#
+# TorchCodec offers multiple performance optimization strategies, each suited to
+# different scenarios. Use batch APIs for multi-frame decoding, approximate mode
+# for faster initialization, parallel processing for high throughput, CUDA
+# acceleration to offload the CPU, and decoder native transforms for memory efficiency.
+#
+# The best results often come from combining techniques. Profile your specific
+# use case and apply optimizations incrementally, using the benchmarks in the
+# linked examples as a guide.
+#
+# For more information, see:
+#
+# - :ref:`sphx_glr_generated_examples_decoding_basic_example.py` - Basic decoding examples
+# - :ref:`sphx_glr_generated_examples_decoding_approximate_mode.py` - Approximate mode benchmarks
+# - :ref:`sphx_glr_generated_examples_decoding_custom_frame_mappings.py` - Custom frame mappings
+# - :ref:`sphx_glr_generated_examples_decoding_parallel_decoding.py` - Parallel decoding strategies
+# - :ref:`sphx_glr_generated_examples_decoding_basic_cuda_example.py` - CUDA acceleration guide
+# - :ref:`sphx_glr_generated_examples_decoding_transforms.py` - Decoder transforms guide
+# - :class:`torchcodec.decoders.VideoDecoder` - Full API reference
+
+# sphinx_gallery_thumbnail_path = '_static/thumbnails/grumps_brrrr.jpg'
diff --git a/examples/decoding/sampling.py b/examples/decoding/sampling.py
index 2ca3b6e50..19babbace 100644
--- a/examples/decoding/sampling.py
+++ b/examples/decoding/sampling.py
@@ -37,7 +37,7 @@
raw_video_bytes = response.content
-def plot(frames: torch.Tensor, title : Optional[str] = None):
+def plot(frames: torch.Tensor, title: str | None = None):
try:
from torchvision.utils import make_grid
from torchvision.transforms.v2.functional import to_pil_image
diff --git a/examples/decoding/transforms.py b/examples/decoding/transforms.py
new file mode 100644
index 000000000..40eb7e79a
--- /dev/null
+++ b/examples/decoding/transforms.py
@@ -0,0 +1,343 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+"""
+.. meta::
+ :description: Learn how to apply transforms during video decoding for improved memory efficiency and performance.
+
+=======================================================
+Decoder Transforms: Applying transforms during decoding
+=======================================================
+
+In this example, we will demonstrate how to use the ``transforms`` parameter of
+the :class:`~torchcodec.decoders.VideoDecoder` class. This parameter allows us
+to specify a list of :class:`torchcodec.transforms.DecoderTransform` or
+:class:`torchvision.transforms.v2.Transform` objects. These objects serve as
+transform specifications that the :class:`~torchcodec.decoders.VideoDecoder`
+will apply during the decoding process.
+"""
+
+# %%
+# First, a bit of boilerplate, definitions that we will use later. You can skip
+# ahead to our :ref:`example_video` or :ref:`applying_transforms`.
+
+
+import torch
+import requests
+import tempfile
+from pathlib import Path
+import shutil
+from time import perf_counter_ns
+
+
+def store_video_to(url: str, local_video_path: Path):
+ response = requests.get(url, headers={"User-Agent": ""})
+ if response.status_code != 200:
+ raise RuntimeError(f"Failed to download video. {response.status_code = }.")
+
+ with open(local_video_path, 'wb') as f:
+ for chunk in response.iter_content():
+ f.write(chunk)
+
+
+def plot(frames: torch.Tensor, title : str | None = None):
+ try:
+ from torchvision.utils import make_grid
+ from torchvision.transforms.v2.functional import to_pil_image
+ import matplotlib.pyplot as plt
+ except ImportError:
+ print("Cannot plot, please run `pip install torchvision matplotlib`")
+ return
+
+ plt.rcParams["savefig.bbox"] = "tight"
+ dpi = 300
+ fig, ax = plt.subplots(figsize=(800 / dpi, 600 / dpi), dpi=dpi)
+ ax.imshow(to_pil_image(make_grid(frames)))
+ ax.set(xticklabels=[], yticklabels=[], xticks=[], yticks=[])
+ if title is not None:
+ ax.set_title(title, fontsize=6)
+ plt.tight_layout()
+
+# %%
+# .. _example_video:
+#
+# Our example video
+# -----------------
+#
+# We'll download a video from the internet and store it locally. We're
+# purposefully retrieving a high resolution video to demonstrate using
+# transforms to reduce the dimensions.
+
+
+# Video source: https://www.pexels.com/video/an-african-penguin-at-the-beach-9140346/
+# Author: Taryn Elliott.
+url = "https://videos.pexels.com/video-files/9140346/9140346-uhd_3840_2160_25fps.mp4"
+
+temp_dir = tempfile.mkdtemp()
+penguin_video_path = Path(temp_dir) / "penguin.mp4"
+store_video_to(url, penguin_video_path)
+
+from torchcodec.decoders import VideoDecoder
+print(f"Penguin video metadata: {VideoDecoder(penguin_video_path).metadata}")
+
+# %%
+# As shown above, the video is 37 seconds long and has a height of 2160 pixels
+# and a width of 3840 pixels.
+#
+# .. note::
+#
+# The colloquial way to report the dimensions of this video would be as
+# 3840x2160; that is, (`width`, `height`). In the PyTorch ecosystem, image
+# dimensions are typically expressed as (`height`, `width`). The remainder
+# of this tutorial uses the PyTorch convention of (`height`, `width`) to
+# specify image dimensions.
+
+# %%
+# .. _applying_transforms:
+#
+# Applying transforms during pre-processing
+# -----------------------------------------
+#
+# A pre-processing pipeline for videos during training will typically apply a
+# set of transforms for a variety of reasons. Below is a simple example of
+# applying TorchVision's :class:`~torchvision.transforms.v2.Resize` transform to a single
+# frame **after** the decoder returns it:
+
+from torchvision.transforms import v2
+
+full_decoder = VideoDecoder(penguin_video_path)
+frame = full_decoder[5]
+resized_after = v2.Resize(size=(480, 640))(frame)
+
+plot(resized_after, title="Resized to 480x640 after decoding")
+
+# %%
+# In the example above, ``full_decoder`` returns a video frame that has the
+# dimensions (2160, 3840) which is then resized down to (480, 640). But with the
+# ``transforms`` parameter of :class:`~torchcodec.decoders.VideoDecoder` we can
+# specify for the resize to happen **during** decoding!
+
+resize_decoder = VideoDecoder(
+ penguin_video_path,
+ transforms=[v2.Resize(size=(480, 640))]
+)
+resized_during = resize_decoder[5]
+
+plot(resized_during, title="Resized to 480x640 during decoding")
+
+# %%
+# Importantly, the two frames are not identical, even though we can see they
+# *look* very similar:
+
+abs_diff = (resized_after.float() - resized_during.float()).abs()
+(abs_diff == 0).all()
+
+# %%
+# But they're close enough that models won't be able to tell a difference:
+assert (abs_diff <= 1).float().mean() >= 0.998
+
+
+# %%
+# TorchCodec's relationship to TorchVision transforms
+# -----------------------------------------------------
+# Notably, in our examples we are passing in TorchVision
+# :class:`~torchvision.transforms.v2.Transform` objects as our transforms.
+# However, :class:`~torchcodec.decoders.VideoDecoder` accepts TorchVision
+# transforms as a matter of convenience. TorchVision is **not required** to use
+# decoder transforms.
+#
+# Every TorchVision transform that :class:`~torchcodec.decoders.VideoDecoder` accepts
+# has a complementary transform defined in :mod:`torchcodec.transforms`. We
+# would have gotten the same results if we had passed in the
+# :class:`torchcodec.transforms.Resize` object that is a part of TorchCodec.
+# :class:`~torchcodec.decoders.VideoDecoder` accepts both objects as a matter of
+# convenience and to clarify the relationship between the transforms that TorchCodec
+# applies and the transforms that TorchVision offers.
+#
+# While :class:`~torchcodec.decoders.VideoDecoder` accepts TorchVision transforms as
+# *specifications*, it is not actually using the TorchVision implementation of these
+# transforms. Instead, it is mapping them to equivalent
+# `FFmpeg filters `_. That is,
+# :class:`torchvision.transforms.v2.Resize` and :class:`torchcodec.transforms.Resize` are mapped to
+# `scale `_; and
+# :class:`torchvision.transforms.v2.CenterCrop` and :class:`torchcodec.transforms.CenterCrop` are mapped to
+# `crop `_.
+#
+# The relationships we ensure between TorchCodec :class:`~torchcodec.transforms.DecoderTransform` objects
+# and TorchVision :class:`~torchvision.transforms.v2.Transform` objects are:
+#
+# 1. The names are the same.
+# 2. Default behaviors are the same.
+# 3. The parameters for the :class:`~torchcodec.transforms.DecoderTransform`
+# object are a subset of the TorchVision :class:`~torchvision.transforms.v2.Transform`
+# object.
+# 4. Parameters with the same name control the same behavior and accept a
+# subset of the same types.
+# 5. The difference between the frames returned by a decoder transform and
+# the complementary TorchVision transform are such that a model should
+# not be able to tell the difference.
+#
+# .. note::
+#
+# Applying the exact same transforms during training and inference is
+# important for model perforamnce. For example, if you use decoder
+# transforms to resize frames during training, you should also use decoder
+# transforms to resize frames during inference. We provide the similarity
+# guarantees to mitigate the harm when the two techniques are
+# *unintentionally* mixed. That is, if you use decoder transforms to resize
+# frames during training, but use TorchVisions's
+# :class:`~torchvision.transforms.v2.Resize` during inference, our guarantees
+# mitigate the harm to model performance. But we **reccommend against** this kind of
+# mixing.
+#
+# It is appropriate and expected to use some decoder transforms and some TorchVision
+# transforms, as long as the exact same pre-processing operations are performed during
+# training and inference.
+
+# %%
+# Decoder transform pipelines
+# ---------------------------
+# So far, we've only provided a single transform to the ``transform`` parameter to
+# :class:`~torchcodec.decoders.VideoDecoder`. But it
+# actually accepts a list of transforms, which become a pipeline of transforms.
+# The order of the list matters: the first transform in the list will receive
+# the originally decoded frame. The output of that transform becomes the input
+# to the next transform in the list, and so on.
+#
+# From now on, we'll use TorchCodec transforms instead of TorchVision
+# transforms. When passed to the :class:`~torchcodec.decoders.VideoDecoder`,
+# they behave identically.
+#
+# A simple example:
+
+from torchcodec.transforms import Resize, CenterCrop
+
+
+crop_resize_decoder = VideoDecoder(
+ penguin_video_path,
+ transforms = [
+ CenterCrop(size=(1280, 1664)),
+ Resize(size=(480, 640)),
+ ]
+)
+crop_resized_during = crop_resize_decoder[5]
+plot(crop_resized_during, title="Center cropped then resized to 480x640")
+
+# %%
+# Performance: memory efficiency and speed
+# ----------------------------------------
+#
+# The main motivation for decoder transforms is *memory efficiency*,
+# particularly when applying transforms that reduce the size of a frame, such
+# as resize and crop. Because the FFmpeg layer knows all of the transforms it
+# needs to apply during decoding, it's able to efficiently reuse memory.
+# Further, full resolution frames are never returned to the Python layer. As a
+# result, there is significantly less total memory needed and less pressure on
+# the Python garbage collector.
+#
+# In `benchmarks `_
+# reducing frames from (1080, 1920) down to (135, 240), we have observed a
+# reduction in peak resident set size from 4.3 GB to 0.4 GB.
+#
+# There is sometimes a runtime benefit, but it is dependent on the number of
+# threads that the :class:`~torchcodec.decoders.VideoDecoder` tells FFmpeg
+# to use. We define the following benchmark function, as well as the functions
+# to benchmark:
+
+
+def bench(f, average_over=3, warmup=1, **f_kwargs):
+ for _ in range(warmup):
+ f(**f_kwargs)
+
+ times = []
+ for _ in range(average_over):
+ start_time = perf_counter_ns()
+ f(**f_kwargs)
+ end_time = perf_counter_ns()
+ times.append(end_time - start_time)
+
+ times = torch.tensor(times) * 1e-6 # ns to ms
+ times_std = times.std().item()
+ times_med = times.median().item()
+ return f"{times_med = :.2f}ms +- {times_std:.2f}"
+
+
+from torchcodec import samplers
+
+
+def sample_decoder_transforms(num_threads: int):
+ decoder = VideoDecoder(
+ penguin_video_path,
+ transforms = [
+ CenterCrop(size=(1280, 1664)),
+ Resize(size=(480, 640)),
+ ],
+ seek_mode="approximate",
+ num_ffmpeg_threads=num_threads,
+ )
+ transformed_frames = samplers.clips_at_regular_indices(
+ decoder,
+ num_clips=1,
+ num_frames_per_clip=200
+ )
+ assert len(transformed_frames.data[0]) == 200
+
+
+def sample_torchvision_transforms(num_threads: int):
+ if num_threads > 0:
+ torch.set_num_threads(num_threads)
+ decoder = VideoDecoder(
+ penguin_video_path,
+ seek_mode="approximate",
+ num_ffmpeg_threads=num_threads,
+ )
+ frames = samplers.clips_at_regular_indices(
+ decoder,
+ num_clips=1,
+ num_frames_per_clip=200
+ )
+ transforms = v2.Compose(
+ [
+ v2.CenterCrop(size=(1280, 1664)),
+ v2.Resize(size=(480, 640)),
+ ]
+ )
+ transformed_frames = transforms(frames.data)
+ assert transformed_frames.shape[1] == 200
+
+# %%
+# When the :class:`~torchcodec.decoders.VideoDecoder` object sets the number of
+# FFmpeg threads to 0, that tells FFmpeg to determine how many threads to use
+# based on what is available on the current system. In such cases, decoder transforms
+# will tend to outperform getting back a full frame and applying TorchVision transforms
+# sequentially:
+
+
+print(f"decoder transforms: {bench(sample_decoder_transforms, num_threads=0)}")
+print(f"torchvision transform: {bench(sample_torchvision_transforms, num_threads=0)}")
+
+# %%
+# The reason is that FFmpeg is applying the decoder transforms in parallel.
+# However, if the number of threads is 1 (as is the default), then there is often
+# less benefit to using decoder transforms. Using the TorchVision transforms may
+# even be faster!
+
+print(f"decoder transforms: {bench(sample_decoder_transforms, num_threads=1)}")
+print(f"torchvision transform: {bench(sample_torchvision_transforms, num_threads=1)}")
+
+# %%
+# In brief, our performance guidance is:
+#
+# 1. If you are applying a transform pipeline that signficantly reduces
+# the dimensions of your input frames and memory efficiency matters, use
+# decoder transforms.
+# 2. If you are using multiple FFmpeg threads, decoder transforms may be
+# faster. Experiment with your setup to verify.
+# 3. If you are using a single FFmpeg thread, then decoder transforms may
+# be slower. Experiment with your setup to verify.
+
+shutil.rmtree(temp_dir)
+# %%
diff --git a/examples/encoding/audio_encoding.py b/examples/encoding/audio_encoding.py
index 8bcc1e305..a657512b2 100644
--- a/examples/encoding/audio_encoding.py
+++ b/examples/encoding/audio_encoding.py
@@ -20,6 +20,7 @@
from IPython.display import Audio as play_audio
+# sphinx_gallery_thumbnail_path = '_static/thumbnails/grumps_audio2.jpg'
def make_sinewave() -> tuple[torch.Tensor, int]:
freq_A = 440 # Hz
sample_rate = 16000 # Hz
diff --git a/examples/encoding/video_encoding.py b/examples/encoding/video_encoding.py
new file mode 100644
index 000000000..4c589127e
--- /dev/null
+++ b/examples/encoding/video_encoding.py
@@ -0,0 +1,307 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+"""
+=======================================
+Encoding video frames with VideoEncoder
+=======================================
+
+In this example, we'll learn how to encode video frames to a file or to raw
+bytes using the :class:`~torchcodec.encoders.VideoEncoder` class.
+"""
+
+# %%
+# First, we'll download a video and decode some frames to tensors.
+# These will be the input to the :class:`~torchcodec.encoders.VideoEncoder`. For more details on decoding,
+# see :ref:`sphx_glr_generated_examples_decoding_basic_example.py`.
+# Otherwise, skip ahead to :ref:`creating_encoder`.
+
+import requests
+from torchcodec.decoders import VideoDecoder
+from IPython.display import Video
+
+# sphinx_gallery_thumbnail_path = '_static/thumbnails/not_grumps_encoding_video.jpg'
+
+
+def play_video(encoded_bytes):
+ return Video(
+ data=encoded_bytes.numpy().tobytes(),
+ embed=True,
+ width=640,
+ height=360,
+ mimetype="video/mp4",
+ )
+
+
+# Video source: https://www.pexels.com/video/adorable-cats-on-the-lawn-4977395/
+# Author: Altaf Shah.
+url = "https://videos.pexels.com/video-files/4977395/4977395-hd_1920_1080_24fps.mp4"
+
+response = requests.get(url, headers={"User-Agent": ""})
+if response.status_code != 200:
+ raise RuntimeError(f"Failed to download video. {response.status_code = }.")
+
+raw_video_bytes = response.content
+
+decoder = VideoDecoder(raw_video_bytes)
+frames = decoder.get_frames_in_range(0, 60).data # Get first 60 frames
+frame_rate = decoder.metadata.average_fps
+
+# %%
+# .. _creating_encoder:
+#
+# Creating an encoder
+# -------------------
+#
+# Let's instantiate a :class:`~torchcodec.encoders.VideoEncoder`. We will need to provide
+# the frames to be encoded as a 4D tensor of shape
+# ``(num_frames, num_channels, height, width)`` with values in the ``[0, 255]``
+# range and ``torch.uint8`` dtype. We will also need to provide the frame rate of the input
+# video.
+#
+# .. note::
+#
+# The ``frame_rate`` parameter corresponds to the frame rate of the
+# *input* video. It will also be used for the frame rate of the *output* encoded video.
+from torchcodec.encoders import VideoEncoder
+
+print(f"{frames.shape = }, {frames.dtype = }")
+print(f"{frame_rate = } fps")
+
+encoder = VideoEncoder(frames=frames, frame_rate=frame_rate)
+
+# %%
+# .. _cuda_encoding:
+#
+# CUDA Encoding
+# -------------
+#
+# To encode on GPU, pass the frames as a CUDA tensor. This can result in significantly
+# faster encoding than CPU. The encoder will automatically select a CUDA-compatible
+# codec when frames are on a CUDA device, such as ``h264_nvenc`` or ``hevc_nvenc``.
+#
+# .. note::
+#
+# On GPU, the pixel format is always set to ``nv12`` (which does equivalent chroma subsampling
+# to ``yuv420p``). The ``pixel_format`` parameter is not supported for GPU encoding.
+#
+# .. code-block:: python
+#
+# gpu_frames = frames.to("cuda") # Move frames to GPU
+# gpu_encoder = VideoEncoder(frames=gpu_frames, frame_rate=frame_rate)
+#
+# That's it! The rest of the encoding process is the same as on CPU.
+
+# %%
+# Encoding to file, bytes, or file-like
+# -------------------------------------
+#
+# :class:`~torchcodec.encoders.VideoEncoder` supports encoding frames into a
+# file via the :meth:`~torchcodec.encoders.VideoEncoder.to_file` method, to
+# file-like objects via the :meth:`~torchcodec.encoders.VideoEncoder.to_file_like`
+# method, or to raw bytes via :meth:`~torchcodec.encoders.VideoEncoder.to_tensor`.
+# For now we will use :meth:`~torchcodec.encoders.VideoEncoder.to_tensor`, so we
+# can easily inspect and display the encoded video.
+
+encoded_frames = encoder.to_tensor(format="mp4")
+play_video(encoded_frames)
+
+# %%
+#
+# Now that we have encoded data, we can decode it back to verify the
+# round-trip encode/decode process works as expected:
+
+decoder_verify = VideoDecoder(encoded_frames)
+decoded_frames = decoder_verify.get_frames_in_range(0, 60).data
+
+print(f"Re-decoded video: {decoded_frames.shape = }")
+print(f"Original frames: {frames.shape = }")
+
+# %%
+# .. _codec_selection:
+#
+# Codec Selection
+# ---------------
+#
+# By default, the codec used is selected automatically using the file extension provided
+# in the ``dest`` parameter for the :meth:`~torchcodec.encoders.VideoEncoder.to_file` method,
+# or using the ``format`` parameter for the
+# :meth:`~torchcodec.encoders.VideoEncoder.to_file_like` and
+# :meth:`~torchcodec.encoders.VideoEncoder.to_tensor` methods.
+#
+# For example, when encoding to MP4 format, the default codec is typically ``H.264``.
+#
+# To use a codec other than the default, use the ``codec`` parameter.
+# You can specify either a specific codec implementation (e.g., ``"libx264"``)
+# or a codec specification (e.g., ``"h264"``). Different codecs offer
+# different tradeoffs between quality, file size, and encoding speed.
+#
+# .. note::
+#
+# To see available encoders on your system, run ``ffmpeg -encoders``.
+#
+# Let's encode the same frames using different codecs:
+
+import tempfile
+from pathlib import Path
+
+# H.264 encoding
+h264_output = tempfile.NamedTemporaryFile(suffix=".mp4", delete=False).name
+encoder.to_file(h264_output, codec="libx264")
+
+# H.265 encoding
+hevc_output = tempfile.NamedTemporaryFile(suffix=".mp4", delete=False).name
+encoder.to_file(hevc_output, codec="hevc")
+
+# Now let's use ffprobe to verify the codec used in the output files
+import subprocess
+
+for output, name in [(h264_output, "h264_output"), (hevc_output, "hevc_output")]:
+ result = subprocess.run(
+ [
+ "ffprobe",
+ "-v",
+ "error",
+ "-select_streams",
+ "v:0",
+ "-show_entries",
+ "stream=codec_name",
+ "-of",
+ "default=noprint_wrappers=1:nokey=1",
+ output,
+ ],
+ capture_output=True,
+ text=True,
+ )
+ print(f"Codec used in {name}: {result.stdout.strip()}")
+
+
+# %%
+# .. _pixel_format:
+#
+# Pixel Format
+# ------------
+#
+# The ``pixel_format`` parameter controls the color sampling (chroma subsampling)
+# of the output video. This affects both quality and file size.
+#
+# Common pixel formats:
+#
+# - ``"yuv420p"`` - 4:2:0 chroma subsampling (standard quality, smaller file size, widely compatible)
+# - ``"yuv444p"`` - 4:4:4 chroma subsampling (full chroma resolution, higher quality, larger file size)
+#
+# Most playback devices and platforms support ``yuv420p``, making it the most
+# common choice for video encoding.
+#
+# .. note::
+#
+# Pixel format support depends on the codec used. Use ``ffmpeg -h encoder=``
+# to check available options for your selected codec.
+
+# Standard pixel format
+yuv420_encoded_frames = encoder.to_tensor(
+ format="mp4", codec="libx264", pixel_format="yuv420p"
+)
+play_video(yuv420_encoded_frames)
+
+# %%
+# .. _crf:
+#
+# CRF (Constant Rate Factor)
+# --------------------------
+#
+# The ``crf`` parameter controls video quality, where lower values produce higher quality output.
+#
+# For example, with the commonly used H.264 codec, ``libx264``:
+#
+# - Values range from 0 (lossless) to 51 (worst quality)
+# - Values 17 or 18 are considered visually lossless, and the default is 23.
+#
+# .. note::
+#
+# The range and interpretation of CRF values depend on the codec used, and
+# not all codecs support CRF. Use ``ffmpeg -h encoder=`` to
+# check available options for your selected codec.
+#
+
+# High quality (low CRF)
+high_quality_output = encoder.to_tensor(format="mp4", codec="libx264", crf=0)
+play_video(high_quality_output)
+
+# %%
+
+# Low quality (high CRF)
+low_quality_output = encoder.to_tensor(format="mp4", codec="libx264", crf=50)
+play_video(low_quality_output)
+
+
+# %%
+# .. _preset:
+#
+# Preset
+# ------
+#
+# The ``preset`` parameter controls the tradeoff between encoding speed and file compression.
+# Faster presets encode faster but produce larger files, while slower
+# presets take more time to encode but result in better compression.
+#
+# For example, with the commonly used H.264 codec, ``libx264`` presets include
+# ``"ultrafast"`` (fastest), ``"fast"``, ``"medium"`` (default), ``"slow"``, and
+# ``"veryslow"`` (slowest, best compression). See the
+# `H.264 Video Encoding Guide `_
+# for additional details.
+#
+# .. note::
+#
+# Not all codecs support the ``presets`` option. Use ``ffmpeg -h encoder=``
+# to check available options for your selected codec.
+#
+
+# Fast encoding with a larger file size
+fast_output = tempfile.NamedTemporaryFile(suffix=".mp4", delete=False).name
+encoder.to_file(fast_output, codec="libx264", preset="ultrafast")
+print(f"Size of fast encoded file: {Path(fast_output).stat().st_size} bytes")
+
+# Slow encoding for a smaller file size
+slow_output = tempfile.NamedTemporaryFile(suffix=".mp4", delete=False).name
+encoder.to_file(slow_output, codec="libx264", preset="veryslow")
+print(f"Size of slow encoded file: {Path(slow_output).stat().st_size} bytes")
+
+# %%
+# .. _extra_options:
+#
+# Extra Options
+# -------------
+#
+# The ``extra_options`` parameter accepts a dictionary of codec-specific options
+# that would normally be set via FFmpeg command-line arguments. This enables
+# control of encoding settings beyond the common parameters.
+#
+# For example, some potential extra options for the commonly used H.264 codec, ``libx264`` include:
+#
+# - ``"g"`` - GOP (Group of Pictures) size / keyframe interval
+# - ``"max_b_frames"`` - Maximum number of B-frames between I and P frames
+# - ``"tune"`` - Tuning preset (e.g., ``"film"``, ``"animation"``, ``"grain"``)
+#
+# .. note::
+#
+# Use ``ffmpeg -h encoder=`` to see all available options for
+# a specific codec.
+#
+
+
+custom_output = tempfile.NamedTemporaryFile(suffix=".mp4", delete=False).name
+encoder.to_file(
+ custom_output,
+ codec="libx264",
+ extra_options={
+ "g": 50, # Keyframe every 50 frames
+ "max_b_frames": 0, # Disable B-frames for faster decoding
+ "tune": "fastdecode", # Optimize for fast decoding
+ }
+)
+
+# %%
diff --git a/mypy.ini b/mypy.ini
index bd0ee6ac8..f018ba4f8 100644
--- a/mypy.ini
+++ b/mypy.ini
@@ -4,3 +4,4 @@ files = src/torchcodec
show_error_codes = True
pretty = True
allow_redefinition = True
+follow_untyped_imports = True
diff --git a/packaging/install_ffmpeg.sh b/packaging/install_ffmpeg.sh
new file mode 100755
index 000000000..32907596a
--- /dev/null
+++ b/packaging/install_ffmpeg.sh
@@ -0,0 +1,36 @@
+#!/usr/bin/env bash
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+# This script installs FFmpeg from conda-forge after asserting that FFmpeg is
+# not already installed.
+#
+# Usage:
+# install_ffmpeg.sh FFMPEG_VERSION
+# install_ffmpeg.sh 7.0.1
+# install_ffmpeg.sh 8.0
+
+set -euo pipefail
+
+if [ $# -lt 1 ]; then
+ echo "Error: Missing required FFmpeg version"
+ echo "Usage: install_ffmpeg.sh FFMPEG_VERSION"
+ echo "Example: install_ffmpeg.sh 7.0.1"
+ exit 1
+fi
+
+FFMPEG_VERSION="$1"
+
+# Ideally we would have checked for that before installing the wheel,
+# but we need to checkout the repo to access this file, and we don't
+# want to checkout the repo before installing the wheel to avoid any
+# side-effect. It's OK.
+source packaging/helpers.sh
+assert_ffmpeg_not_installed
+
+echo "Installing FFmpeg version $FFMPEG_VERSION from conda-forge..."
+conda install "ffmpeg=$FFMPEG_VERSION" -c conda-forge
+ffmpeg -version
diff --git a/packaging/install_pytorch.sh b/packaging/install_pytorch.sh
new file mode 100755
index 000000000..23611e938
--- /dev/null
+++ b/packaging/install_pytorch.sh
@@ -0,0 +1,39 @@
+#!/usr/bin/env bash
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+# This script installs PyTorch and other optional torch packages like
+# torchvision from either the nightly or test channel based on the branch: test
+# for release branches (and PRs against a release branch), nightly otherwise
+#
+# Example usage:
+# install_pytorch.sh cpu "torch torchvision"
+# install_pytorch.sh cpu "torch"
+# install_pytorch.sh cu126 "torch torchvision"
+
+set -euo pipefail
+
+if [ $# -lt 2 ]; then
+ echo "Error: Missing required arguments"
+ echo "Usage: install_pytorch.sh COMPUTE_PLATFORM PACKAGES"
+ echo "Example: install_pytorch.sh cpu \"torch torchvision\""
+ exit 1
+fi
+
+COMPUTE_PLATFORM="$1"
+PACKAGES="$2"
+
+if [[ (${GITHUB_EVENT_NAME:-} = 'pull_request' && (${GITHUB_BASE_REF:-} = 'release'*)) || (${GITHUB_REF:-} = 'refs/heads/release'*) || (${GITHUB_REF:-} = refs/tags/v*) ]]; then
+ CHANNEL=test
+else
+ CHANNEL=nightly
+fi
+
+echo "Installing PyTorch packages: $PACKAGES"
+echo "Compute platform: $COMPUTE_PLATFORM"
+echo "Channel: $CHANNEL"
+
+python -m pip install --pre $PACKAGES --index-url https://download.pytorch.org/whl/${CHANNEL}/${COMPUTE_PLATFORM}
diff --git a/packaging/install_test_dependencies.sh b/packaging/install_test_dependencies.sh
new file mode 100755
index 000000000..69c2d6dcb
--- /dev/null
+++ b/packaging/install_test_dependencies.sh
@@ -0,0 +1,19 @@
+#!/usr/bin/env bash
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+# This script installs the test dependencies needed to run the test suite.
+#
+# Example usage:
+# install_test_dependencies.sh
+
+set -euo pipefail
+
+echo "Installing test dependencies..."
+# Ideally we would find a way to get those dependencies from pyproject.toml
+python -m pip install numpy pytest pillow
+
+echo "Test dependencies installed successfully!"
diff --git a/packaging/install_torchcodec_wheel.sh b/packaging/install_torchcodec_wheel.sh
new file mode 100755
index 000000000..77b7b1383
--- /dev/null
+++ b/packaging/install_torchcodec_wheel.sh
@@ -0,0 +1,31 @@
+#!/usr/bin/env bash
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+# This script finds and installs a torchcodec wheel from the dist directory. The
+# wheel is expected to have been built and downloaded from a separate job.
+#
+# Usage:
+# install_torchcodec_wheel.sh [WHEEL_PATTERN]
+#
+# Example usage:
+# install_torchcodec_wheel.sh
+# install_torchcodec_wheel.sh "*.whl"
+# install_torchcodec_wheel.sh "*cu126-cp310*.whl"
+
+set -euo pipefail
+
+WHEEL_PATTERN="${1:-*.whl}"
+
+wheel_path=$(find dist -type f -name "$WHEEL_PATTERN")
+
+if [ -z "$wheel_path" ]; then
+ echo "Error: No wheel found matching pattern '$WHEEL_PATTERN' in dist/"
+ exit 1
+fi
+
+echo "Installing $wheel_path"
+python -m pip install "$wheel_path" -vvv
diff --git a/packaging/remove_src.sh b/packaging/remove_src.sh
new file mode 100755
index 000000000..e3b2f43b6
--- /dev/null
+++ b/packaging/remove_src.sh
@@ -0,0 +1,31 @@
+#!/usr/bin/env bash
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+# This script removes the src/ directory to ensure tests run against the
+# installed wheel rather than local source code.
+#
+# Usage:
+# remove_src.sh
+
+set -euo pipefail
+
+echo "Deleting src/ folder to ensure tests use installed wheel..."
+# The only reason we checked-out the repo is to get access to the
+# tests and to the helper scripts for the CI. We don't care about the rest.
+# Out of precaution, we delete
+# the src/ folder to be extra sure that we're running the code from
+# the installed wheel rather than from the source.
+# This is just to be extra cautious and very overkill because a)
+# there's no way the `torchcodec` package from src/ can be found from
+# the PythonPath: the main point of `src/` is precisely to protect
+# against that and b) if we ever were to execute code from
+# `src/torchcodec`, it would fail loudly because the built .so files
+# aren't present there.
+rm -r src/
+ls
+
+echo "src/ folder removed successfully!"
diff --git a/pyproject.toml b/pyproject.toml
index 6bdcd13f7..367786508 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -2,7 +2,7 @@
name = "paddlecodec"
description = "A video decoder for PyTorch"
readme = "README.md"
-requires-python = ">=3.8"
+requires-python = ">=3.10"
license-files = ["LICENSE"]
authors = [
{ name = "PaddlePaddle Team", email = "Paddle-better@baidu.com" },
@@ -32,7 +32,7 @@ dev = [
first_party_detection = false
[tool.black]
-target-version = ["py38"]
+target-version = ["py310"]
[tool.ufmt]
@@ -46,6 +46,10 @@ markers = [
"slow: mark test as slow"
]
+# Tells pytest not to run tests within this directory by default.
+# These tests can still be run by manually specifying the path.
+norecursedirs = ["third-party-interface"]
+
# We don't want to run the slow tests by default. These options are ignored in
# the CI, where we definitely want the 'slow' tests to run.
addopts = "-v -m 'not slow'"
diff --git a/setup.py b/setup.py
index b8211ea5e..63dd870ff 100644
--- a/setup.py
+++ b/setup.py
@@ -115,6 +115,9 @@ def _build_all_extensions_with_cmake(self):
torchcodec_disable_compile_warning_as_error = os.environ.get(
"TORCHCODEC_DISABLE_COMPILE_WARNING_AS_ERROR", "OFF"
)
+ torchcodec_disable_homebrew_rpath = os.environ.get(
+ "TORCHCODEC_DISABLE_HOMEBREW_RPATH", "OFF"
+ )
python_version = sys.version_info
cmake_args = [
f"-DCMAKE_INSTALL_PREFIX={self._install_prefix}",
@@ -125,6 +128,7 @@ def _build_all_extensions_with_cmake(self):
f"-DPYTHON_VERSION={python_version.major}.{python_version.minor}",
f"-DENABLE_CUDA={enable_cuda}",
f"-DTORCHCODEC_DISABLE_COMPILE_WARNING_AS_ERROR={torchcodec_disable_compile_warning_as_error}",
+ f"-DTORCHCODEC_DISABLE_HOMEBREW_RPATH={torchcodec_disable_homebrew_rpath}",
]
self.build_temp = os.getenv("TORCHCODEC_CMAKE_BUILD_DIR", self.build_temp)
@@ -199,14 +203,12 @@ def _write_version_files():
# the content of `version.txt` plus some suffix like "+cpu" or "+cu112".
# See
# https://github.com/pytorch/test-infra/blob/61e6da7a6557152eb9879e461a26ad667c15f0fd/tools/pkg-helpers/pytorch_pkg_helpers/version.py#L113
- version = version.replace("+cpu", "")
with open(_ROOT_DIR / "version.txt", "w") as f:
f.write(f"{version}")
else:
with open(_ROOT_DIR / "version.txt") as f:
version = f.readline().strip()
try:
- version = version.replace("+cpu", "")
sha = (
subprocess.check_output(
["git", "rev-parse", "HEAD"], cwd=str(_ROOT_DIR)
diff --git a/src/torchcodec/__init__.py b/src/torchcodec/__init__.py
index 29131290f..c30bd93d2 100644
--- a/src/torchcodec/__init__.py
+++ b/src/torchcodec/__init__.py
@@ -4,10 +4,12 @@
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.
+from pathlib import Path
+
# Note: usort wants to put Frame and FrameBatch after decoders and samplers,
# but that results in circular import.
from ._frame import AudioSamples, Frame, FrameBatch # usort:skip # noqa
-from . import decoders, samplers # noqa
+from . import decoders, encoders, samplers, transforms # noqa
try:
# Note that version.py is generated during install.
diff --git a/src/torchcodec/_core/AVIOContextHolder.cpp b/src/torchcodec/_core/AVIOContextHolder.cpp
index c1188e684..42fccf7c5 100644
--- a/src/torchcodec/_core/AVIOContextHolder.cpp
+++ b/src/torchcodec/_core/AVIOContextHolder.cpp
@@ -4,8 +4,8 @@
// This source code is licensed under the BSD-style license found in the
// LICENSE file in the root directory of this source tree.
-#include "src/torchcodec/_core/AVIOContextHolder.h"
-#include
+#include "AVIOContextHolder.h"
+#include "StableABICompat.h"
namespace facebook::torchcodec {
@@ -16,20 +16,21 @@ void AVIOContextHolder::createAVIOContext(
void* heldData,
bool isForWriting,
int bufferSize) {
- TORCH_CHECK(
+ STD_TORCH_CHECK(
bufferSize > 0,
"Buffer size must be greater than 0; is " + std::to_string(bufferSize));
auto buffer = static_cast(av_malloc(bufferSize));
- TORCH_CHECK(
+ STD_TORCH_CHECK(
buffer != nullptr,
"Failed to allocate buffer of size " + std::to_string(bufferSize));
- TORCH_CHECK(seek != nullptr, "seek method must be defined");
+ STD_TORCH_CHECK(seek != nullptr, "seek method must be defined");
if (isForWriting) {
- TORCH_CHECK(write != nullptr, "write method must be defined for writing");
+ STD_TORCH_CHECK(
+ write != nullptr, "write method must be defined for writing");
} else {
- TORCH_CHECK(read != nullptr, "read method must be defined for reading");
+ STD_TORCH_CHECK(read != nullptr, "read method must be defined for reading");
}
avioContext_.reset(avioAllocContext(
@@ -43,7 +44,7 @@ void AVIOContextHolder::createAVIOContext(
if (!avioContext_) {
av_freep(&buffer);
- TORCH_CHECK(false, "Failed to allocate AVIOContext");
+ STD_TORCH_CHECK(false, "Failed to allocate AVIOContext");
}
}
diff --git a/src/torchcodec/_core/AVIOContextHolder.h b/src/torchcodec/_core/AVIOContextHolder.h
index 16d70beaf..7b1123e6d 100644
--- a/src/torchcodec/_core/AVIOContextHolder.h
+++ b/src/torchcodec/_core/AVIOContextHolder.h
@@ -6,7 +6,7 @@
#pragma once
-#include "src/torchcodec/_core/FFMPEGCommon.h"
+#include "FFMPEGCommon.h"
namespace facebook::torchcodec {
diff --git a/src/torchcodec/_core/AVIOFileLikeContext.cpp b/src/torchcodec/_core/AVIOFileLikeContext.cpp
index 210942b57..1331abd5b 100644
--- a/src/torchcodec/_core/AVIOFileLikeContext.cpp
+++ b/src/torchcodec/_core/AVIOFileLikeContext.cpp
@@ -4,8 +4,8 @@
// This source code is licensed under the BSD-style license found in the
// LICENSE file in the root directory of this source tree.
-#include "src/torchcodec/_core/AVIOFileLikeContext.h"
-#include
+#include "AVIOFileLikeContext.h"
+#include "StableABICompat.h"
namespace facebook::torchcodec {
@@ -20,16 +20,16 @@ AVIOFileLikeContext::AVIOFileLikeContext(
py::gil_scoped_acquire gil;
if (isForWriting) {
- TORCH_CHECK(
+ STD_TORCH_CHECK(
py::hasattr(fileLike, "write"),
"File like object must implement a write method for writing.");
} else {
- TORCH_CHECK(
+ STD_TORCH_CHECK(
py::hasattr(fileLike, "read"),
"File like object must implement a read method for reading.");
}
- TORCH_CHECK(
+ STD_TORCH_CHECK(
py::hasattr(fileLike, "seek"),
"File like object must implement a seek method.");
}
@@ -60,7 +60,7 @@ int AVIOFileLikeContext::read(void* opaque, uint8_t* buf, int buf_size) {
break;
}
- TORCH_CHECK(
+ STD_TORCH_CHECK(
numBytesRead <= request,
"Requested up to ",
request,
diff --git a/src/torchcodec/_core/AVIOFileLikeContext.h b/src/torchcodec/_core/AVIOFileLikeContext.h
index fd7f534f3..001cda550 100644
--- a/src/torchcodec/_core/AVIOFileLikeContext.h
+++ b/src/torchcodec/_core/AVIOFileLikeContext.h
@@ -9,7 +9,7 @@
#include
#include
-#include "src/torchcodec/_core/AVIOContextHolder.h"
+#include "AVIOContextHolder.h"
namespace py = pybind11;
diff --git a/src/torchcodec/_core/AVIOTensorContext.cpp b/src/torchcodec/_core/AVIOTensorContext.cpp
index 263ce2228..5b1ac23ce 100644
--- a/src/torchcodec/_core/AVIOTensorContext.cpp
+++ b/src/torchcodec/_core/AVIOTensorContext.cpp
@@ -4,8 +4,8 @@
// This source code is licensed under the BSD-style license found in the
// LICENSE file in the root directory of this source tree.
-#include "src/torchcodec/_core/AVIOTensorContext.h"
-#include
+#include "AVIOTensorContext.h"
+#include "StableABICompat.h"
namespace facebook::torchcodec {
@@ -17,7 +17,7 @@ constexpr int64_t MAX_TENSOR_SIZE = 320'000'000; // 320 MB
// The signature of this function is defined by FFMPEG.
int read(void* opaque, uint8_t* buf, int buf_size) {
auto tensorContext = static_cast(opaque);
- TORCH_CHECK(
+ STD_TORCH_CHECK(
tensorContext->current_pos <= tensorContext->data.numel(),
"Tried to read outside of the buffer: current_pos=",
tensorContext->current_pos,
@@ -28,7 +28,7 @@ int read(void* opaque, uint8_t* buf, int buf_size) {
static_cast(buf_size),
tensorContext->data.numel() - tensorContext->current_pos);
- TORCH_CHECK(
+ STD_TORCH_CHECK(
numBytesRead >= 0,
"Tried to read negative bytes: numBytesRead=",
numBytesRead,
@@ -43,7 +43,8 @@ int read(void* opaque, uint8_t* buf, int buf_size) {
std::memcpy(
buf,
- tensorContext->data.data_ptr() + tensorContext->current_pos,
+ tensorContext->data.const_data_ptr() +
+ tensorContext->current_pos,
numBytesRead);
tensorContext->current_pos += numBytesRead;
return numBytesRead;
@@ -55,7 +56,7 @@ int write(void* opaque, const uint8_t* buf, int buf_size) {
int64_t bufSize = static_cast(buf_size);
if (tensorContext->current_pos + bufSize > tensorContext->data.numel()) {
- TORCH_CHECK(
+ STD_TORCH_CHECK(
tensorContext->data.numel() * 2 <= MAX_TENSOR_SIZE,
"We tried to allocate an output encoded tensor larger than ",
MAX_TENSOR_SIZE,
@@ -64,15 +65,15 @@ int write(void* opaque, const uint8_t* buf, int buf_size) {
// We double the size of the outpout tensor. Calling cat() may not be the
// most efficient, but it's simple.
tensorContext->data =
- torch::cat({tensorContext->data, tensorContext->data});
+ stableCat({tensorContext->data, tensorContext->data}, 0);
}
- TORCH_CHECK(
+ STD_TORCH_CHECK(
tensorContext->current_pos + bufSize <= tensorContext->data.numel(),
"Re-allocation of the output tensor didn't work. ",
"This should not happen, please report on TorchCodec bug tracker");
- uint8_t* outputTensorData = tensorContext->data.data_ptr();
+ uint8_t* outputTensorData = tensorContext->data.mutable_data_ptr();
std::memcpy(outputTensorData + tensorContext->current_pos, buf, bufSize);
tensorContext->current_pos += bufSize;
// Track the maximum position written so getOutputTensor's narrow() does not
@@ -104,18 +105,18 @@ int64_t seek(void* opaque, int64_t offset, int whence) {
} // namespace
-AVIOFromTensorContext::AVIOFromTensorContext(torch::Tensor data)
+AVIOFromTensorContext::AVIOFromTensorContext(torch::stable::Tensor data)
: tensorContext_{data, 0, 0} {
- TORCH_CHECK(data.numel() > 0, "data must not be empty");
- TORCH_CHECK(data.is_contiguous(), "data must be contiguous");
- TORCH_CHECK(data.scalar_type() == torch::kUInt8, "data must be kUInt8");
+ STD_TORCH_CHECK(data.numel() > 0, "data must not be empty");
+ STD_TORCH_CHECK(data.is_contiguous(), "data must be contiguous");
+ STD_TORCH_CHECK(data.scalar_type() == kStableUInt8, "data must be kUInt8");
createAVIOContext(
&read, nullptr, &seek, &tensorContext_, /*isForWriting=*/false);
}
AVIOToTensorContext::AVIOToTensorContext()
: tensorContext_{
- torch::empty({INITIAL_TENSOR_SIZE}, {torch::kUInt8}),
+ torch::stable::empty({INITIAL_TENSOR_SIZE}, kStableUInt8),
0,
0} {
createAVIOContext(
diff --git a/src/torchcodec/_core/AVIOTensorContext.h b/src/torchcodec/_core/AVIOTensorContext.h
index bcd97052b..0a50856c6 100644
--- a/src/torchcodec/_core/AVIOTensorContext.h
+++ b/src/torchcodec/_core/AVIOTensorContext.h
@@ -6,15 +6,15 @@
#pragma once
-#include
-#include "src/torchcodec/_core/AVIOContextHolder.h"
+#include "AVIOContextHolder.h"
+#include "StableABICompat.h"
namespace facebook::torchcodec {
namespace detail {
struct TensorContext {
- torch::Tensor data;
+ torch::stable::Tensor data;
int64_t current_pos;
int64_t max_pos;
};
@@ -23,19 +23,19 @@ struct TensorContext {
// For Decoding: enables users to pass in the entire video or audio as bytes.
// Our read and seek functions then traverse the bytes in memory.
-class AVIOFromTensorContext : public AVIOContextHolder {
+class FORCE_PUBLIC_VISIBILITY AVIOFromTensorContext : public AVIOContextHolder {
public:
- explicit AVIOFromTensorContext(torch::Tensor data);
+ explicit AVIOFromTensorContext(torch::stable::Tensor data);
private:
detail::TensorContext tensorContext_;
};
// For Encoding: used to encode into an output uint8 (bytes) tensor.
-class AVIOToTensorContext : public AVIOContextHolder {
+class FORCE_PUBLIC_VISIBILITY AVIOToTensorContext : public AVIOContextHolder {
public:
explicit AVIOToTensorContext();
- torch::Tensor getOutputTensor();
+ torch::stable::Tensor getOutputTensor();
private:
detail::TensorContext tensorContext_;
diff --git a/src/torchcodec/_core/BetaCudaDeviceInterface.cpp b/src/torchcodec/_core/BetaCudaDeviceInterface.cpp
index 587456f34..804c3ba78 100644
--- a/src/torchcodec/_core/BetaCudaDeviceInterface.cpp
+++ b/src/torchcodec/_core/BetaCudaDeviceInterface.cpp
@@ -4,20 +4,20 @@
// This source code is licensed under the BSD-style license found in the
// LICENSE file in the root directory of this source tree.
-#include
-#include
+#include