Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Migrate avif and heic decoders to torchvision-extra-decoders repo #8671

Merged
merged 36 commits into from
Dec 12, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
36 commits
Select commit Hold shift + click to select a range
8bbebcb
Migrate avif and heic decoders to torchvision-extra-decoders repo
NicolasHug Oct 3, 2024
83c6361
Merge branch 'main' of github.com:pytorch/vision into extra_decoders
NicolasHug Nov 11, 2024
cbbe0dc
Remove existing decoders, cleanup tests
NicolasHug Nov 11, 2024
ef737c4
Merge branch 'main' into extra_decoders
NicolasHug Nov 20, 2024
f130f2e
Install torchvision-extra-decoders for unittests
NicolasHug Nov 21, 2024
06cce0a
Filter out unittests
NicolasHug Nov 21, 2024
097c68f
Deactivate wheel and conda build jobs
NicolasHug Nov 21, 2024
937f2ab
Merge branch 'extra_decoders' of github.com:nicolashug/vision into ex…
NicolasHug Nov 21, 2024
a4e53eb
Switch unit test back to Manylinux2014
atalman Nov 27, 2024
c32adc8
Merge branch 'main' into fix_vision_unit_test
NicolasHug Nov 27, 2024
51f3c92
Merge branch 'main' into fix_vision_unit_test
atalman Nov 28, 2024
7739bea
Merge branch 'main' into fix_vision_unit_test
atalman Nov 28, 2024
8d1be3d
Merge branch 'main' into fix_vision_unit_test
NicolasHug Nov 28, 2024
8526117
Merge branch 'main' into fix_vision_unit_test
atalman Nov 29, 2024
2bd64ca
Merge branch 'main' of github.com:pytorch/vision into extra_decoders
NicolasHug Nov 29, 2024
5457718
Merge branch 'fix_vision_unit_test' into extra_decoders
NicolasHug Nov 29, 2024
11fd635
Merge branch 'main' of github.com:pytorch/vision into extra_decoders
NicolasHug Nov 29, 2024
7e267a6
Merge branch 'main' of github.com:pytorch/vision into extra_decoders
NicolasHug Dec 5, 2024
593f4d0
Merge branch 'main' of github.com:pytorch/vision into extra_decoders
NicolasHug Dec 10, 2024
5faf165
empty
NicolasHug Dec 10, 2024
cc76037
empty
NicolasHug Dec 10, 2024
b6b863e
Fix, not sure why
NicolasHug Dec 10, 2024
fc15059
Run more tests
NicolasHug Dec 10, 2024
d436178
Put back normal CI
NicolasHug Dec 10, 2024
7a787a0
Skip tests when not on linux
NicolasHug Dec 10, 2024
18f4952
Add reason
NicolasHug Dec 10, 2024
dd1c53c
Fix lint
NicolasHug Dec 10, 2024
5c83466
Added smoke test
NicolasHug Dec 10, 2024
ea1dd93
Fix??
NicolasHug Dec 10, 2024
b2e13d2
Fix macos stuff?
NicolasHug Dec 10, 2024
951396e
PLEASE :'(
NicolasHug Dec 10, 2024
10ff9fe
Fix some tests
NicolasHug Dec 11, 2024
01297e1
Comments etc.
NicolasHug Dec 11, 2024
77ef215
Some docs
NicolasHug Dec 11, 2024
2138b75
Skip some smoke tests
NicolasHug Dec 12, 2024
491704a
Merge branch 'main' into extra_decoders
NicolasHug Dec 12, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions .github/scripts/setup-env.sh
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,11 @@ echo '::group::Install TorchVision'
python setup.py develop
echo '::endgroup::'

echo '::group::Install torchvision-extra-decoders'
# This can be done after torchvision was built
pip install torchvision-extra-decoders
echo '::endgroup::'

echo '::group::Collect environment information'
conda list
python -m torch.utils.collect_env
Expand Down
15 changes: 9 additions & 6 deletions docs/source/io.rst
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,8 @@ images and videos.
Image Decoding
--------------

Torchvision currently supports decoding JPEG, PNG, WEBP and GIF images. JPEG
decoding can also be done on CUDA GPUs.
Torchvision currently supports decoding JPEG, PNG, WEBP, GIF, AVIF, and HEIC
images. JPEG decoding can also be done on CUDA GPUs.

The main entry point is the :func:`~torchvision.io.decode_image` function, which
you can use as an alternative to ``PIL.Image.open()``. It will decode images
Expand All @@ -30,9 +30,10 @@ run transforms/preproc natively on tensors.


:func:`~torchvision.io.decode_image` will automatically detect the image format,
and call the corresponding decoder. You can also use the lower-level
format-specific decoders which can be more powerful, e.g. if you want to
encode/decode JPEGs on CUDA.
and call the corresponding decoder (except for HEIC and AVIF images, see details
in :func:`~torchvision.io.decode_avif` and :func:`~torchvision.io.decode_heic`).
You can also use the lower-level format-specific decoders which can be more
powerful, e.g. if you want to encode/decode JPEGs on CUDA.

.. autosummary::
:toctree: generated/
Expand All @@ -41,8 +42,10 @@ encode/decode JPEGs on CUDA.
decode_image
decode_jpeg
encode_png
decode_gif
decode_webp
decode_avif
decode_heic
decode_gif

.. autosummary::
:toctree: generated/
Expand Down
2 changes: 2 additions & 0 deletions packaging/post_build_script.sh
Original file line number Diff line number Diff line change
@@ -1,2 +1,4 @@
#!/bin/bash
LD_LIBRARY_PATH="/usr/local/lib:$CUDA_HOME/lib64:$LD_LIBRARY_PATH" python packaging/wheel/relocate.py

pip install torchvision-extra-decoders
34 changes: 0 additions & 34 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,6 @@
USE_PNG = os.getenv("TORCHVISION_USE_PNG", "1") == "1"
USE_JPEG = os.getenv("TORCHVISION_USE_JPEG", "1") == "1"
USE_WEBP = os.getenv("TORCHVISION_USE_WEBP", "1") == "1"
USE_HEIC = os.getenv("TORCHVISION_USE_HEIC", "0") == "1" # TODO enable by default!
USE_AVIF = os.getenv("TORCHVISION_USE_AVIF", "0") == "1" # TODO enable by default!
USE_NVJPEG = os.getenv("TORCHVISION_USE_NVJPEG", "1") == "1"
NVCC_FLAGS = os.getenv("NVCC_FLAGS", None)
# Note: the GPU video decoding stuff used to be called "video codec", which
Expand Down Expand Up @@ -51,8 +49,6 @@
print(f"{USE_PNG = }")
print(f"{USE_JPEG = }")
print(f"{USE_WEBP = }")
print(f"{USE_HEIC = }")
print(f"{USE_AVIF = }")
print(f"{USE_NVJPEG = }")
print(f"{NVCC_FLAGS = }")
print(f"{USE_CPU_VIDEO_DECODER = }")
Expand Down Expand Up @@ -336,36 +332,6 @@ def make_image_extension():
else:
warnings.warn("Building torchvision without WEBP support")

if USE_HEIC:
heic_found, heic_include_dir, heic_library_dir = find_library(header="libheif/heif.h")
if heic_found:
print("Building torchvision with HEIC support")
print(f"{heic_include_dir = }")
print(f"{heic_library_dir = }")
if heic_include_dir is not None and heic_library_dir is not None:
# if those are None it means they come from standard paths that are already in the search paths, which we don't need to re-add.
include_dirs.append(heic_include_dir)
library_dirs.append(heic_library_dir)
libraries.append("heif")
define_macros += [("HEIC_FOUND", 1)]
else:
warnings.warn("Building torchvision without HEIC support")

if USE_AVIF:
avif_found, avif_include_dir, avif_library_dir = find_library(header="avif/avif.h")
if avif_found:
print("Building torchvision with AVIF support")
print(f"{avif_include_dir = }")
print(f"{avif_library_dir = }")
if avif_include_dir is not None and avif_library_dir is not None:
# if those are None it means they come from standard paths that are already in the search paths, which we don't need to re-add.
include_dirs.append(avif_include_dir)
library_dirs.append(avif_library_dir)
libraries.append("avif")
define_macros += [("AVIF_FOUND", 1)]
else:
warnings.warn("Building torchvision without AVIF support")

if USE_NVJPEG and (torch.cuda.is_available() or FORCE_CUDA):
nvjpeg_found = CUDA_HOME is not None and (Path(CUDA_HOME) / "include/nvjpeg.h").exists()

Expand Down
35 changes: 34 additions & 1 deletion test/smoke_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@

import torch
import torchvision
from torchvision.io import decode_image, decode_jpeg, decode_webp, read_file
from torchvision.io import decode_avif, decode_heic, decode_image, decode_jpeg, read_file
from torchvision.models import resnet50, ResNet50_Weights


Expand All @@ -24,13 +24,46 @@ def smoke_test_torchvision_read_decode() -> None:
img_jpg = decode_image(str(SCRIPT_DIR / "assets" / "encode_jpeg" / "grace_hopper_517x606.jpg"))
if img_jpg.shape != (3, 606, 517):
raise RuntimeError(f"Unexpected shape of img_jpg: {img_jpg.shape}")

img_png = decode_image(str(SCRIPT_DIR / "assets" / "interlaced_png" / "wizard_low.png"))
if img_png.shape != (4, 471, 354):
raise RuntimeError(f"Unexpected shape of img_png: {img_png.shape}")

img_webp = decode_image(str(SCRIPT_DIR / "assets/fakedata/logos/rgb_pytorch.webp"))
if img_webp.shape != (3, 100, 100):
raise RuntimeError(f"Unexpected shape of img_webp: {img_webp.shape}")

if sys.platform == "linux":
pass
# TODO: Fix/uncomment below (the TODO below is mostly accurate but we're
# still observing some failures on some CUDA jobs. Most are working.)
# if torch.cuda.is_available():
# # TODO: For whatever reason this only passes on the runners that
# # support CUDA.
# # Strangely, on the CPU runners where this fails, the AVIF/HEIC
# # tests (ran with pytest) are passing. This is likely related to a
# # libcxx symbol thing, and the proper libstdc++.so get loaded only
# # with pytest? Ugh.
# img_avif = decode_avif(read_file(str(SCRIPT_DIR / "assets/fakedata/logos/rgb_pytorch.avif")))
# if img_avif.shape != (3, 100, 100):
# raise RuntimeError(f"Unexpected shape of img_avif: {img_avif.shape}")

# img_heic = decode_heic(
# read_file(str(SCRIPT_DIR / "assets/fakedata/logos/rgb_pytorch_incorrectly_encoded_but_who_cares.heic"))
# )
# if img_heic.shape != (3, 100, 100):
# raise RuntimeError(f"Unexpected shape of img_heic: {img_heic.shape}")
else:
try:
decode_avif(str(SCRIPT_DIR / "assets/fakedata/logos/rgb_pytorch.avif"))
except RuntimeError as e:
assert "torchvision-extra-decoders" in str(e)

try:
decode_heic(str(SCRIPT_DIR / "assets/fakedata/logos/rgb_pytorch_incorrectly_encoded_but_who_cares.heic"))
except RuntimeError as e:
assert "torchvision-extra-decoders" in str(e)


def smoke_test_torchvision_decode_jpeg(device: str = "cpu"):
img_jpg_data = read_file(str(SCRIPT_DIR / "assets" / "encode_jpeg" / "grace_hopper_517x606.jpg"))
Expand Down
98 changes: 33 additions & 65 deletions test/test_image.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@
import os
import re
import sys
from contextlib import nullcontext
from pathlib import Path

import numpy as np
Expand All @@ -14,11 +13,10 @@
import torchvision.transforms.v2.functional as F
from common_utils import assert_equal, cpu_and_cuda, IN_OSS_CI, needs_cuda
from PIL import __version__ as PILLOW_VERSION, Image, ImageOps, ImageSequence
from torchvision._internally_replaced_utils import IN_FBCODE
from torchvision.io.image import (
_decode_avif,
_decode_heic,
decode_avif,
decode_gif,
decode_heic,
decode_image,
decode_jpeg,
decode_png,
Expand All @@ -43,22 +41,11 @@
TOOSMALL_PNG = os.path.join(IMAGE_ROOT, "toosmall_png")
IS_WINDOWS = sys.platform in ("win32", "cygwin")
IS_MACOS = sys.platform == "darwin"
IS_LINUX = sys.platform == "linux"
PILLOW_VERSION = tuple(int(x) for x in PILLOW_VERSION.split("."))
WEBP_TEST_IMAGES_DIR = os.environ.get("WEBP_TEST_IMAGES_DIR", "")
# See https://github.com/pytorch/vision/pull/8724#issuecomment-2503964558
ROCM_WEBP_MESSAGE = "ROCM not built with webp support."

# Hacky way of figuring out whether we compiled with libavif/libheif (those are
# currenlty disabled by default)
try:
_decode_avif(torch.arange(10, dtype=torch.uint8))
except Exception as e:
DECODE_AVIF_ENABLED = "torchvision not compiled with libavif support" not in str(e)

try:
_decode_heic(torch.arange(10, dtype=torch.uint8))
except Exception as e:
DECODE_HEIC_ENABLED = "torchvision not compiled with libheif support" not in str(e)
HEIC_AVIF_MESSAGE = "AVIF and HEIF only available on linux."


def _get_safe_image_name(name):
Expand Down Expand Up @@ -866,19 +853,23 @@ def test_decode_gif(tmpdir, name, scripted):
torch.testing.assert_close(tv_frame, pil_frame, atol=0, rtol=0)


decode_fun_and_match = [
(decode_png, "Content is not png"),
(decode_jpeg, "Not a JPEG file"),
(decode_gif, re.escape("DGifOpenFileName() failed - 103")),
(decode_webp, "WebPGetFeatures failed."),
]
if DECODE_AVIF_ENABLED:
decode_fun_and_match.append((_decode_avif, "BMFF parsing failed"))
if DECODE_HEIC_ENABLED:
decode_fun_and_match.append((_decode_heic, "Invalid input: No 'ftyp' box"))


@pytest.mark.parametrize("decode_fun, match", decode_fun_and_match)
@pytest.mark.parametrize(
"decode_fun, match",
[
(decode_png, "Content is not png"),
(decode_jpeg, "Not a JPEG file"),
(decode_gif, re.escape("DGifOpenFileName() failed - 103")),
(decode_webp, "WebPGetFeatures failed."),
pytest.param(
decode_avif, "BMFF parsing failed", marks=pytest.mark.skipif(not IS_LINUX, reason=HEIC_AVIF_MESSAGE)
),
pytest.param(
decode_heic,
"Invalid input: No 'ftyp' box",
marks=pytest.mark.skipif(not IS_LINUX, reason=HEIC_AVIF_MESSAGE),
),
],
)
def test_decode_bad_encoded_data(decode_fun, match):
encoded_data = torch.randint(0, 256, (100,), dtype=torch.uint8)
with pytest.raises(RuntimeError, match="Input tensor must be 1-dimensional"):
Expand Down Expand Up @@ -934,13 +925,10 @@ def test_decode_webp_against_pil(decode_fun, scripted, mode, pil_mode, filename)
img += 123 # make sure image buffer wasn't freed by underlying decoding lib


@pytest.mark.skipif(not DECODE_AVIF_ENABLED, reason="AVIF support not enabled.")
@pytest.mark.parametrize("decode_fun", (_decode_avif, decode_image))
@pytest.mark.parametrize("scripted", (False, True))
def test_decode_avif(decode_fun, scripted):
@pytest.mark.skipif(not IS_LINUX, reason=HEIC_AVIF_MESSAGE)
@pytest.mark.parametrize("decode_fun", (decode_avif,))
def test_decode_avif(decode_fun):
encoded_bytes = read_file(next(get_images(FAKEDATA_DIR, ".avif")))
if scripted:
decode_fun = torch.jit.script(decode_fun)
img = decode_fun(encoded_bytes)
assert img.shape == (3, 100, 100)
assert img[None].is_contiguous(memory_format=torch.channels_last)
Expand All @@ -949,16 +937,8 @@ def test_decode_avif(decode_fun, scripted):

# Note: decode_image fails because some of these files have a (valid) signature
# we don't recognize. We should probably use libmagic....
decode_funs = []
if DECODE_AVIF_ENABLED:
decode_funs.append(_decode_avif)
if DECODE_HEIC_ENABLED:
decode_funs.append(_decode_heic)


@pytest.mark.skipif(not decode_funs, reason="Built without avif and heic support.")
@pytest.mark.parametrize("decode_fun", decode_funs)
@pytest.mark.parametrize("scripted", (False, True))
@pytest.mark.skipif(not IS_LINUX, reason=HEIC_AVIF_MESSAGE)
@pytest.mark.parametrize("decode_fun", (decode_avif, decode_heic))
@pytest.mark.parametrize(
"mode, pil_mode",
(
Expand All @@ -970,7 +950,7 @@ def test_decode_avif(decode_fun, scripted):
@pytest.mark.parametrize(
"filename", Path("/home/nicolashug/dev/libavif/tests/data/").glob("*.avif"), ids=lambda p: p.name
)
def test_decode_avif_heic_against_pil(decode_fun, scripted, mode, pil_mode, filename):
def test_decode_avif_heic_against_pil(decode_fun, mode, pil_mode, filename):
if "reversed_dimg_order" in str(filename):
# Pillow properly decodes this one, but we don't (order of parts of the
# image is wrong). This is due to a bug that was recently fixed in
Expand All @@ -980,8 +960,6 @@ def test_decode_avif_heic_against_pil(decode_fun, scripted, mode, pil_mode, file
import pillow_avif # noqa

encoded_bytes = read_file(filename)
if scripted:
decode_fun = torch.jit.script(decode_fun)
try:
img = decode_fun(encoded_bytes, mode=mode)
except RuntimeError as e:
Expand All @@ -994,6 +972,7 @@ def test_decode_avif_heic_against_pil(decode_fun, scripted, mode, pil_mode, file
"no 'ispe' property",
"'iref' has double references",
"Invalid image grid",
"decode_heif failed: Invalid input: No 'meta' box",
)
):
pytest.skip(reason="Expected failure, that's OK")
Expand All @@ -1010,7 +989,7 @@ def test_decode_avif_heic_against_pil(decode_fun, scripted, mode, pil_mode, file
try:
from_pil = F.pil_to_tensor(Image.open(filename).convert(pil_mode))
except RuntimeError as e:
if "Invalid image grid" in str(e):
if any(s in str(e) for s in ("Invalid image grid", "Failed to decode image: Not implemented")):
pytest.skip(reason="PIL failure")
else:
raise e
Expand All @@ -1021,7 +1000,7 @@ def test_decode_avif_heic_against_pil(decode_fun, scripted, mode, pil_mode, file
g = make_grid([img, from_pil])
F.to_pil_image(g).save((f"/home/nicolashug/out_images/{filename.name}.{pil_mode}.png"))

is_decode_heic = getattr(decode_fun, "__name__", getattr(decode_fun, "name", None)) == "_decode_heic"
is_decode_heic = getattr(decode_fun, "__name__", getattr(decode_fun, "name", None)) == "decode_heic"
if mode == ImageReadMode.RGB and not is_decode_heic:
# We don't compare torchvision's AVIF against PIL for RGB because
# results look pretty different on RGBA images (other images are fine).
Expand All @@ -1035,13 +1014,10 @@ def test_decode_avif_heic_against_pil(decode_fun, scripted, mode, pil_mode, file
torch.testing.assert_close(img, from_pil, rtol=0, atol=3)


@pytest.mark.skipif(not DECODE_HEIC_ENABLED, reason="HEIC support not enabled yet.")
@pytest.mark.parametrize("decode_fun", (_decode_heic, decode_image))
@pytest.mark.parametrize("scripted", (False, True))
def test_decode_heic(decode_fun, scripted):
@pytest.mark.skipif(not IS_LINUX, reason=HEIC_AVIF_MESSAGE)
@pytest.mark.parametrize("decode_fun", (decode_heic,))
def test_decode_heic(decode_fun):
encoded_bytes = read_file(next(get_images(FAKEDATA_DIR, ".heic")))
if scripted:
decode_fun = torch.jit.script(decode_fun)
img = decode_fun(encoded_bytes)
assert img.shape == (3, 100, 100)
assert img[None].is_contiguous(memory_format=torch.channels_last)
Expand Down Expand Up @@ -1080,13 +1056,5 @@ def test_mode_str():
assert decode_image(path, mode="RGBA").shape[0] == 4


def test_avif_heic_fbcode():
cm = nullcontext() if IN_FBCODE else pytest.raises(ImportError, match="cannot import")
with cm:
from torchvision.io import decode_heic # noqa
with cm:
from torchvision.io import decode_avif # noqa


if __name__ == "__main__":
pytest.main([__file__])
Loading
Loading