pytorch · NicolasHug · Dec 12, 2024 · Oct 3, 2024 · Nov 11, 2024 · Nov 11, 2024
diff --git a/.github/scripts/setup-env.sh b/.github/scripts/setup-env.sh
@@ -102,6 +102,11 @@ echo '::group::Install TorchVision'
 python setup.py develop
 echo '::endgroup::'
 
+echo '::group::Install torchvision-extra-decoders'
+# This can be done after torchvision was built
+pip install torchvision-extra-decoders
+echo '::endgroup::'
+
 echo '::group::Collect environment information'
 conda list
 python -m torch.utils.collect_env

diff --git a/docs/source/io.rst b/docs/source/io.rst
@@ -9,8 +9,8 @@ images and videos.
 Image Decoding
 --------------
 
-Torchvision currently supports decoding JPEG, PNG, WEBP and GIF images. JPEG
-decoding can also be done on CUDA GPUs.
+Torchvision currently supports decoding JPEG, PNG, WEBP, GIF, AVIF, and HEIC
+images. JPEG decoding can also be done on CUDA GPUs.
 
 The main entry point is the :func:`~torchvision.io.decode_image` function, which
 you can use as an alternative to ``PIL.Image.open()``. It will decode images
@@ -30,9 +30,10 @@ run transforms/preproc natively on tensors.
 
 
 :func:`~torchvision.io.decode_image` will automatically detect the image format,
-and call the corresponding decoder. You can also use the lower-level
-format-specific decoders which can be more powerful, e.g. if you want to
-encode/decode JPEGs on CUDA.
+and call the corresponding decoder (except for HEIC and AVIF images, see details
+in :func:`~torchvision.io.decode_avif` and :func:`~torchvision.io.decode_heic`).
+You can also use the lower-level format-specific decoders which can be more
+powerful, e.g. if you want to encode/decode JPEGs on CUDA.
 
 .. autosummary::
     :toctree: generated/
@@ -41,8 +42,10 @@ encode/decode JPEGs on CUDA.
     decode_image
     decode_jpeg
     encode_png
-    decode_gif
     decode_webp
+    decode_avif
+    decode_heic
+    decode_gif
 
 .. autosummary::
     :toctree: generated/

diff --git a/packaging/post_build_script.sh b/packaging/post_build_script.sh
@@ -1,2 +1,4 @@
 #!/bin/bash
 LD_LIBRARY_PATH="/usr/local/lib:$CUDA_HOME/lib64:$LD_LIBRARY_PATH" python packaging/wheel/relocate.py
+
+pip install torchvision-extra-decoders
diff --git a/setup.py b/setup.py
@@ -19,8 +19,6 @@
 USE_PNG = os.getenv("TORCHVISION_USE_PNG", "1") == "1"
 USE_JPEG = os.getenv("TORCHVISION_USE_JPEG", "1") == "1"
 USE_WEBP = os.getenv("TORCHVISION_USE_WEBP", "1") == "1"
-USE_HEIC = os.getenv("TORCHVISION_USE_HEIC", "0") == "1"  # TODO enable by default!
-USE_AVIF = os.getenv("TORCHVISION_USE_AVIF", "0") == "1"  # TODO enable by default!
 USE_NVJPEG = os.getenv("TORCHVISION_USE_NVJPEG", "1") == "1"
 NVCC_FLAGS = os.getenv("NVCC_FLAGS", None)
 # Note: the GPU video decoding stuff used to be called "video codec", which
@@ -51,8 +49,6 @@
 print(f"{USE_PNG = }")
 print(f"{USE_JPEG = }")
 print(f"{USE_WEBP = }")
-print(f"{USE_HEIC = }")
-print(f"{USE_AVIF = }")
 print(f"{USE_NVJPEG = }")
 print(f"{NVCC_FLAGS = }")
 print(f"{USE_CPU_VIDEO_DECODER = }")
@@ -336,36 +332,6 @@ def make_image_extension():
         else:
             warnings.warn("Building torchvision without WEBP support")
 
-    if USE_HEIC:
-        heic_found, heic_include_dir, heic_library_dir = find_library(header="libheif/heif.h")
-        if heic_found:
-            print("Building torchvision with HEIC support")
-            print(f"{heic_include_dir = }")
-            print(f"{heic_library_dir = }")
-            if heic_include_dir is not None and heic_library_dir is not None:
-                # if those are None it means they come from standard paths that are already in the search paths, which we don't need to re-add.
-                include_dirs.append(heic_include_dir)
-                library_dirs.append(heic_library_dir)
-            libraries.append("heif")
-            define_macros += [("HEIC_FOUND", 1)]
-        else:
-            warnings.warn("Building torchvision without HEIC support")
-
-    if USE_AVIF:
-        avif_found, avif_include_dir, avif_library_dir = find_library(header="avif/avif.h")
-        if avif_found:
-            print("Building torchvision with AVIF support")
-            print(f"{avif_include_dir = }")
-            print(f"{avif_library_dir = }")
-            if avif_include_dir is not None and avif_library_dir is not None:
-                # if those are None it means they come from standard paths that are already in the search paths, which we don't need to re-add.
-                include_dirs.append(avif_include_dir)
-                library_dirs.append(avif_library_dir)
-            libraries.append("avif")
-            define_macros += [("AVIF_FOUND", 1)]
-        else:
-            warnings.warn("Building torchvision without AVIF support")
-
     if USE_NVJPEG and (torch.cuda.is_available() or FORCE_CUDA):
         nvjpeg_found = CUDA_HOME is not None and (Path(CUDA_HOME) / "include/nvjpeg.h").exists()
 

diff --git a/test/smoke_test.py b/test/smoke_test.py
@@ -6,7 +6,7 @@
 
 import torch
 import torchvision
-from torchvision.io import decode_image, decode_jpeg, decode_webp, read_file
+from torchvision.io import decode_avif, decode_heic, decode_image, decode_jpeg, read_file
 from torchvision.models import resnet50, ResNet50_Weights
 
 
@@ -24,13 +24,46 @@ def smoke_test_torchvision_read_decode() -> None:
     img_jpg = decode_image(str(SCRIPT_DIR / "assets" / "encode_jpeg" / "grace_hopper_517x606.jpg"))
     if img_jpg.shape != (3, 606, 517):
         raise RuntimeError(f"Unexpected shape of img_jpg: {img_jpg.shape}")
+
     img_png = decode_image(str(SCRIPT_DIR / "assets" / "interlaced_png" / "wizard_low.png"))
     if img_png.shape != (4, 471, 354):
         raise RuntimeError(f"Unexpected shape of img_png: {img_png.shape}")
+
     img_webp = decode_image(str(SCRIPT_DIR / "assets/fakedata/logos/rgb_pytorch.webp"))
     if img_webp.shape != (3, 100, 100):
         raise RuntimeError(f"Unexpected shape of img_webp: {img_webp.shape}")
 
+    if sys.platform == "linux":
+        pass
+        # TODO: Fix/uncomment below (the TODO below is mostly accurate but we're
+        # still observing some failures on some CUDA jobs. Most are working.)
+        # if torch.cuda.is_available():
+        #     # TODO: For whatever reason this only passes on the runners that
+        #     # support CUDA.
+        #     # Strangely, on the CPU runners where this fails, the AVIF/HEIC
+        #     # tests (ran with pytest) are passing. This is likely related to a
+        #     # libcxx symbol thing, and the proper libstdc++.so get loaded only
+        #     # with pytest? Ugh.
+        #     img_avif = decode_avif(read_file(str(SCRIPT_DIR / "assets/fakedata/logos/rgb_pytorch.avif")))
+        #     if img_avif.shape != (3, 100, 100):
+        #         raise RuntimeError(f"Unexpected shape of img_avif: {img_avif.shape}")
+
+        #     img_heic = decode_heic(
+        #         read_file(str(SCRIPT_DIR / "assets/fakedata/logos/rgb_pytorch_incorrectly_encoded_but_who_cares.heic"))
+        #     )
+        #     if img_heic.shape != (3, 100, 100):
+        #         raise RuntimeError(f"Unexpected shape of img_heic: {img_heic.shape}")
+    else:
+        try:
+            decode_avif(str(SCRIPT_DIR / "assets/fakedata/logos/rgb_pytorch.avif"))
+        except RuntimeError as e:
+            assert "torchvision-extra-decoders" in str(e)
+
+        try:
+            decode_heic(str(SCRIPT_DIR / "assets/fakedata/logos/rgb_pytorch_incorrectly_encoded_but_who_cares.heic"))
+        except RuntimeError as e:
+            assert "torchvision-extra-decoders" in str(e)
+
 
 def smoke_test_torchvision_decode_jpeg(device: str = "cpu"):
     img_jpg_data = read_file(str(SCRIPT_DIR / "assets" / "encode_jpeg" / "grace_hopper_517x606.jpg"))

diff --git a/test/test_image.py b/test/test_image.py
@@ -4,7 +4,6 @@
 import os
 import re
 import sys
-from contextlib import nullcontext
 from pathlib import Path
 
 import numpy as np
@@ -14,11 +13,10 @@
 import torchvision.transforms.v2.functional as F
 from common_utils import assert_equal, cpu_and_cuda, IN_OSS_CI, needs_cuda
 from PIL import __version__ as PILLOW_VERSION, Image, ImageOps, ImageSequence
-from torchvision._internally_replaced_utils import IN_FBCODE
 from torchvision.io.image import (
-    _decode_avif,
-    _decode_heic,
+    decode_avif,
     decode_gif,
+    decode_heic,
     decode_image,
     decode_jpeg,
     decode_png,
@@ -43,22 +41,11 @@
 TOOSMALL_PNG = os.path.join(IMAGE_ROOT, "toosmall_png")
 IS_WINDOWS = sys.platform in ("win32", "cygwin")
 IS_MACOS = sys.platform == "darwin"
+IS_LINUX = sys.platform == "linux"
 PILLOW_VERSION = tuple(int(x) for x in PILLOW_VERSION.split("."))
 WEBP_TEST_IMAGES_DIR = os.environ.get("WEBP_TEST_IMAGES_DIR", "")
 # See https://github.com/pytorch/vision/pull/8724#issuecomment-2503964558
-ROCM_WEBP_MESSAGE = "ROCM not built with webp support."
-
-# Hacky way of figuring out whether we compiled with libavif/libheif (those are
-# currenlty disabled by default)
-try:
-    _decode_avif(torch.arange(10, dtype=torch.uint8))
-except Exception as e:
-    DECODE_AVIF_ENABLED = "torchvision not compiled with libavif support" not in str(e)
-
-try:
-    _decode_heic(torch.arange(10, dtype=torch.uint8))
-except Exception as e:
-    DECODE_HEIC_ENABLED = "torchvision not compiled with libheif support" not in str(e)
+HEIC_AVIF_MESSAGE = "AVIF and HEIF only available on linux."
 
 
 def _get_safe_image_name(name):
@@ -866,19 +853,23 @@ def test_decode_gif(tmpdir, name, scripted):
             torch.testing.assert_close(tv_frame, pil_frame, atol=0, rtol=0)
 
 
-decode_fun_and_match = [
-    (decode_png, "Content is not png"),
-    (decode_jpeg, "Not a JPEG file"),
-    (decode_gif, re.escape("DGifOpenFileName() failed - 103")),
-    (decode_webp, "WebPGetFeatures failed."),
-]
-if DECODE_AVIF_ENABLED:
-    decode_fun_and_match.append((_decode_avif, "BMFF parsing failed"))
-if DECODE_HEIC_ENABLED:
-    decode_fun_and_match.append((_decode_heic, "Invalid input: No 'ftyp' box"))
-
-
-@pytest.mark.parametrize("decode_fun, match", decode_fun_and_match)
+@pytest.mark.parametrize(
+    "decode_fun, match",
+    [
+        (decode_png, "Content is not png"),
+        (decode_jpeg, "Not a JPEG file"),
+        (decode_gif, re.escape("DGifOpenFileName() failed - 103")),
+        (decode_webp, "WebPGetFeatures failed."),
+        pytest.param(
+            decode_avif, "BMFF parsing failed", marks=pytest.mark.skipif(not IS_LINUX, reason=HEIC_AVIF_MESSAGE)
+        ),
+        pytest.param(
+            decode_heic,
+            "Invalid input: No 'ftyp' box",
+            marks=pytest.mark.skipif(not IS_LINUX, reason=HEIC_AVIF_MESSAGE),
+        ),
+    ],
+)
 def test_decode_bad_encoded_data(decode_fun, match):
     encoded_data = torch.randint(0, 256, (100,), dtype=torch.uint8)
     with pytest.raises(RuntimeError, match="Input tensor must be 1-dimensional"):
@@ -934,13 +925,10 @@ def test_decode_webp_against_pil(decode_fun, scripted, mode, pil_mode, filename)
     img += 123  # make sure image buffer wasn't freed by underlying decoding lib
 
 
-@pytest.mark.skipif(not DECODE_AVIF_ENABLED, reason="AVIF support not enabled.")
-@pytest.mark.parametrize("decode_fun", (_decode_avif, decode_image))
-@pytest.mark.parametrize("scripted", (False, True))
-def test_decode_avif(decode_fun, scripted):
+@pytest.mark.skipif(not IS_LINUX, reason=HEIC_AVIF_MESSAGE)
+@pytest.mark.parametrize("decode_fun", (decode_avif,))
+def test_decode_avif(decode_fun):
     encoded_bytes = read_file(next(get_images(FAKEDATA_DIR, ".avif")))
-    if scripted:
-        decode_fun = torch.jit.script(decode_fun)
     img = decode_fun(encoded_bytes)
     assert img.shape == (3, 100, 100)
     assert img[None].is_contiguous(memory_format=torch.channels_last)
@@ -949,16 +937,8 @@ def test_decode_avif(decode_fun, scripted):
 
 # Note: decode_image fails because some of these files have a (valid) signature
 # we don't recognize. We should probably use libmagic....
-decode_funs = []
-if DECODE_AVIF_ENABLED:
-    decode_funs.append(_decode_avif)
-if DECODE_HEIC_ENABLED:
-    decode_funs.append(_decode_heic)
-
-
-@pytest.mark.skipif(not decode_funs, reason="Built without avif and heic support.")
-@pytest.mark.parametrize("decode_fun", decode_funs)
-@pytest.mark.parametrize("scripted", (False, True))
+@pytest.mark.skipif(not IS_LINUX, reason=HEIC_AVIF_MESSAGE)
+@pytest.mark.parametrize("decode_fun", (decode_avif, decode_heic))
 @pytest.mark.parametrize(
     "mode, pil_mode",
     (
@@ -970,7 +950,7 @@ def test_decode_avif(decode_fun, scripted):
 @pytest.mark.parametrize(
     "filename", Path("/home/nicolashug/dev/libavif/tests/data/").glob("*.avif"), ids=lambda p: p.name
 )
-def test_decode_avif_heic_against_pil(decode_fun, scripted, mode, pil_mode, filename):
+def test_decode_avif_heic_against_pil(decode_fun, mode, pil_mode, filename):
     if "reversed_dimg_order" in str(filename):
         # Pillow properly decodes this one, but we don't (order of parts of the
         # image is wrong). This is due to a bug that was recently fixed in
@@ -980,8 +960,6 @@ def test_decode_avif_heic_against_pil(decode_fun, scripted, mode, pil_mode, file
     import pillow_avif  # noqa
 
     encoded_bytes = read_file(filename)
-    if scripted:
-        decode_fun = torch.jit.script(decode_fun)
     try:
         img = decode_fun(encoded_bytes, mode=mode)
     except RuntimeError as e:
@@ -994,6 +972,7 @@ def test_decode_avif_heic_against_pil(decode_fun, scripted, mode, pil_mode, file
                 "no 'ispe' property",
                 "'iref' has double references",
                 "Invalid image grid",
+                "decode_heif failed: Invalid input: No 'meta' box",
             )
         ):
             pytest.skip(reason="Expected failure, that's OK")
@@ -1010,7 +989,7 @@ def test_decode_avif_heic_against_pil(decode_fun, scripted, mode, pil_mode, file
     try:
         from_pil = F.pil_to_tensor(Image.open(filename).convert(pil_mode))
     except RuntimeError as e:
-        if "Invalid image grid" in str(e):
+        if any(s in str(e) for s in ("Invalid image grid", "Failed to decode image: Not implemented")):
             pytest.skip(reason="PIL failure")
         else:
             raise e
@@ -1021,7 +1000,7 @@ def test_decode_avif_heic_against_pil(decode_fun, scripted, mode, pil_mode, file
         g = make_grid([img, from_pil])
         F.to_pil_image(g).save((f"/home/nicolashug/out_images/{filename.name}.{pil_mode}.png"))
 
-    is_decode_heic = getattr(decode_fun, "__name__", getattr(decode_fun, "name", None)) == "_decode_heic"
+    is_decode_heic = getattr(decode_fun, "__name__", getattr(decode_fun, "name", None)) == "decode_heic"
     if mode == ImageReadMode.RGB and not is_decode_heic:
         # We don't compare torchvision's AVIF against PIL for RGB because
         # results look pretty different on RGBA images (other images are fine).
@@ -1035,13 +1014,10 @@ def test_decode_avif_heic_against_pil(decode_fun, scripted, mode, pil_mode, file
     torch.testing.assert_close(img, from_pil, rtol=0, atol=3)
 
 
-@pytest.mark.skipif(not DECODE_HEIC_ENABLED, reason="HEIC support not enabled yet.")
-@pytest.mark.parametrize("decode_fun", (_decode_heic, decode_image))
-@pytest.mark.parametrize("scripted", (False, True))
-def test_decode_heic(decode_fun, scripted):
+@pytest.mark.skipif(not IS_LINUX, reason=HEIC_AVIF_MESSAGE)
+@pytest.mark.parametrize("decode_fun", (decode_heic,))
+def test_decode_heic(decode_fun):
     encoded_bytes = read_file(next(get_images(FAKEDATA_DIR, ".heic")))
-    if scripted:
-        decode_fun = torch.jit.script(decode_fun)
     img = decode_fun(encoded_bytes)
     assert img.shape == (3, 100, 100)
     assert img[None].is_contiguous(memory_format=torch.channels_last)
@@ -1080,13 +1056,5 @@ def test_mode_str():
     assert decode_image(path, mode="RGBA").shape[0] == 4
 
 
-def test_avif_heic_fbcode():
-    cm = nullcontext() if IN_FBCODE else pytest.raises(ImportError, match="cannot import")
-    with cm:
-        from torchvision.io import decode_heic  # noqa
-    with cm:
-        from torchvision.io import decode_avif  # noqa
-
-
 if __name__ == "__main__":
     pytest.main([__file__])