diff --git a/docs/api/datamodules.rst b/docs/api/datamodules.rst index 8270052ae9..d0753a7956 100644 --- a/docs/api/datamodules.rst +++ b/docs/api/datamodules.rst @@ -114,6 +114,11 @@ GID-15 .. autoclass:: GID15DataModule +Glacier Calving Front +^^^^^^^^^^^^^^^^^^^^^ + +.. autoclass:: GlacierCalvingFrontDataModule + Inria Aerial Image Labeling ^^^^^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/docs/api/datasets.rst b/docs/api/datasets.rst index 72c55139c0..419b7ceacf 100644 --- a/docs/api/datasets.rst +++ b/docs/api/datasets.rst @@ -307,6 +307,11 @@ GID-15 .. autoclass:: GID15 +Glacier Calving Front +^^^^^^^^^^^^^^^^^^^^^ + +.. autoclass:: GlacierCalvingFront + IDTReeS ^^^^^^^ diff --git a/docs/api/datasets/non_geo_datasets.csv b/docs/api/datasets/non_geo_datasets.csv index abdd41cc1f..4cbce62338 100644 --- a/docs/api/datasets/non_geo_datasets.csv +++ b/docs/api/datasets/non_geo_datasets.csv @@ -20,6 +20,7 @@ Dataset,Task,Source,License,# Samples,# Classes,Size (px),Resolution (m),Bands `Forest Damage`_,OD,Drone imagery,"CDLA-Permissive-1.0","1,543",4,"1,500x1,500",,RGB `GeoNRW`_,S,Aerial,"CC-BY-4.0","7,783",11,"1,000x1,000",1,"RGB, DEM" `GID-15`_,S,Gaofen-2,-,150,15,"6,800x7,200",3,RGB +`GlacierCalvingFront`_,S,"Sentinel-1, TerraSAR-X, TanDEM-X, ENVISAT, European Remote Sensing Satellite 1&2, ALOS PALSAR, and RADARSAT-1","CC-BY-4.0","93,820",4,"256x256",6-20,"SAR" `IDTReeS`_,"OD,C",Aerial,"CC-BY-4.0",591,33,200x200,0.1--1,RGB `Inria Aerial Image Labeling`_,S,Aerial,-,360,2,"5,000x5,000",0.3,RGB `LandCover.ai`_,S,Aerial,"CC-BY-NC-SA-4.0","10,674",5,512x512,0.25--0.5,RGB diff --git a/tests/conf/glacier_calving_front.yaml b/tests/conf/glacier_calving_front.yaml new file mode 100644 index 0000000000..58c246fa6b --- /dev/null +++ b/tests/conf/glacier_calving_front.yaml @@ -0,0 +1,16 @@ +model: + class_path: SemanticSegmentationTask + init_args: + loss: 'ce' + model: 'unet' + backbone: 'resnet18' + in_channels: 3 + num_classes: 11 + num_filters: 1 + ignore_index: null +data: + class_path: GlacierCalvingFrontDataModule + init_args: + batch_size: 1 + dict_kwargs: + root: 'tests/data/glacier_calving_front' diff --git a/tests/data/glacier_calving_front/data.py b/tests/data/glacier_calving_front/data.py new file mode 100644 index 0000000000..398122e5f3 --- /dev/null +++ b/tests/data/glacier_calving_front/data.py @@ -0,0 +1,75 @@ +import hashlib +import os +import shutil + +import numpy as np +from PIL import Image + +# Define the root directory and subdirectories +root_dir = 'glacier_calving_data' +sub_dirs = ['zones', 'sar_images', 'fronts'] +splits = ['train', 'val', 'test'] + +zone_file_names = [ + 'Crane_2002-11-09_ERS_20_2_061_zones__93_102_0_0_0.png', + 'Crane_2007-09-22_ENVISAT_20_1_467_zones__93_102_8_1024_0.png', + 'JAC_2015-12-23_TSX_6_1_005_zones__57_49_195_384_1024.png', +] + +IMG_SIZE = 32 + + +# Function to create dummy images +def create_dummy_image(path: str, shape: tuple[int], pixel_values: list[int]) -> None: + data = np.random.choice(pixel_values, size=shape, replace=True).astype(np.uint8) + img = Image.fromarray(data) + img.save(path) + + +def create_zone_images(split: str, filename: str) -> None: + zone_pixel_values = [0, 64, 127, 255] + path = os.path.join(root_dir, 'zones', split, filename) + create_dummy_image(path, (IMG_SIZE, IMG_SIZE), zone_pixel_values) + + +def create_sar_images(split: str, filename: str) -> None: + sar_pixel_values = range(256) + path = os.path.join(root_dir, 'sar_images', split, filename) + create_dummy_image(path, (IMG_SIZE, IMG_SIZE), sar_pixel_values) + + +def create_front_images(split: str, filename: str) -> None: + sar_pixel_values = range(256) + path = os.path.join(root_dir, 'fronts', split, filename) + create_dummy_image(path, (IMG_SIZE, IMG_SIZE), sar_pixel_values) + + +if os.path.exists(root_dir): + shutil.rmtree(root_dir) + +# Create the directory structure +for sub_dir in sub_dirs: + for split in splits: + os.makedirs(os.path.join(root_dir, sub_dir, split), exist_ok=True) + +# Create dummy data for all splits and filenames +for split in splits: + for filename in zone_file_names: + create_zone_images(split, filename) + create_sar_images(split, filename.replace('_zones_', '_')) + create_front_images(split, filename.replace('_zones_', '_front_')) + +# zip and compute md5 +shutil.make_archive(root_dir, 'zip', '.', root_dir) + + +def md5(fname: str) -> str: + hash_md5 = hashlib.md5() + with open(fname, 'rb') as f: + for chunk in iter(lambda: f.read(4096), b''): + hash_md5.update(chunk) + return hash_md5.hexdigest() + + +md5sum = md5('glacier_calving_data.zip') +print(f'MD5 checksum: {md5sum}') diff --git a/tests/data/glacier_calving_front/glacier_calving_data.zip b/tests/data/glacier_calving_front/glacier_calving_data.zip new file mode 100644 index 0000000000..213e24d787 Binary files /dev/null and b/tests/data/glacier_calving_front/glacier_calving_data.zip differ diff --git a/tests/data/glacier_calving_front/glacier_calving_data/fronts/test/Crane_2002-11-09_ERS_20_2_061_front__93_102_0_0_0.png b/tests/data/glacier_calving_front/glacier_calving_data/fronts/test/Crane_2002-11-09_ERS_20_2_061_front__93_102_0_0_0.png new file mode 100644 index 0000000000..89acc3abe5 Binary files /dev/null and b/tests/data/glacier_calving_front/glacier_calving_data/fronts/test/Crane_2002-11-09_ERS_20_2_061_front__93_102_0_0_0.png differ diff --git a/tests/data/glacier_calving_front/glacier_calving_data/fronts/test/Crane_2007-09-22_ENVISAT_20_1_467_front__93_102_8_1024_0.png b/tests/data/glacier_calving_front/glacier_calving_data/fronts/test/Crane_2007-09-22_ENVISAT_20_1_467_front__93_102_8_1024_0.png new file mode 100644 index 0000000000..5ab807fb56 Binary files /dev/null and b/tests/data/glacier_calving_front/glacier_calving_data/fronts/test/Crane_2007-09-22_ENVISAT_20_1_467_front__93_102_8_1024_0.png differ diff --git a/tests/data/glacier_calving_front/glacier_calving_data/fronts/test/JAC_2015-12-23_TSX_6_1_005_front__57_49_195_384_1024.png b/tests/data/glacier_calving_front/glacier_calving_data/fronts/test/JAC_2015-12-23_TSX_6_1_005_front__57_49_195_384_1024.png new file mode 100644 index 0000000000..2d421c7aa4 Binary files /dev/null and b/tests/data/glacier_calving_front/glacier_calving_data/fronts/test/JAC_2015-12-23_TSX_6_1_005_front__57_49_195_384_1024.png differ diff --git a/tests/data/glacier_calving_front/glacier_calving_data/fronts/train/Crane_2002-11-09_ERS_20_2_061_front__93_102_0_0_0.png b/tests/data/glacier_calving_front/glacier_calving_data/fronts/train/Crane_2002-11-09_ERS_20_2_061_front__93_102_0_0_0.png new file mode 100644 index 0000000000..50a3b94d70 Binary files /dev/null and b/tests/data/glacier_calving_front/glacier_calving_data/fronts/train/Crane_2002-11-09_ERS_20_2_061_front__93_102_0_0_0.png differ diff --git a/tests/data/glacier_calving_front/glacier_calving_data/fronts/train/Crane_2007-09-22_ENVISAT_20_1_467_front__93_102_8_1024_0.png b/tests/data/glacier_calving_front/glacier_calving_data/fronts/train/Crane_2007-09-22_ENVISAT_20_1_467_front__93_102_8_1024_0.png new file mode 100644 index 0000000000..36b48cae29 Binary files /dev/null and b/tests/data/glacier_calving_front/glacier_calving_data/fronts/train/Crane_2007-09-22_ENVISAT_20_1_467_front__93_102_8_1024_0.png differ diff --git a/tests/data/glacier_calving_front/glacier_calving_data/fronts/train/JAC_2015-12-23_TSX_6_1_005_front__57_49_195_384_1024.png b/tests/data/glacier_calving_front/glacier_calving_data/fronts/train/JAC_2015-12-23_TSX_6_1_005_front__57_49_195_384_1024.png new file mode 100644 index 0000000000..a72a26befa Binary files /dev/null and b/tests/data/glacier_calving_front/glacier_calving_data/fronts/train/JAC_2015-12-23_TSX_6_1_005_front__57_49_195_384_1024.png differ diff --git a/tests/data/glacier_calving_front/glacier_calving_data/fronts/val/Crane_2002-11-09_ERS_20_2_061_front__93_102_0_0_0.png b/tests/data/glacier_calving_front/glacier_calving_data/fronts/val/Crane_2002-11-09_ERS_20_2_061_front__93_102_0_0_0.png new file mode 100644 index 0000000000..577b6a48f4 Binary files /dev/null and b/tests/data/glacier_calving_front/glacier_calving_data/fronts/val/Crane_2002-11-09_ERS_20_2_061_front__93_102_0_0_0.png differ diff --git a/tests/data/glacier_calving_front/glacier_calving_data/fronts/val/Crane_2007-09-22_ENVISAT_20_1_467_front__93_102_8_1024_0.png b/tests/data/glacier_calving_front/glacier_calving_data/fronts/val/Crane_2007-09-22_ENVISAT_20_1_467_front__93_102_8_1024_0.png new file mode 100644 index 0000000000..fda7446ae4 Binary files /dev/null and b/tests/data/glacier_calving_front/glacier_calving_data/fronts/val/Crane_2007-09-22_ENVISAT_20_1_467_front__93_102_8_1024_0.png differ diff --git a/tests/data/glacier_calving_front/glacier_calving_data/fronts/val/JAC_2015-12-23_TSX_6_1_005_front__57_49_195_384_1024.png b/tests/data/glacier_calving_front/glacier_calving_data/fronts/val/JAC_2015-12-23_TSX_6_1_005_front__57_49_195_384_1024.png new file mode 100644 index 0000000000..fa006e49b3 Binary files /dev/null and b/tests/data/glacier_calving_front/glacier_calving_data/fronts/val/JAC_2015-12-23_TSX_6_1_005_front__57_49_195_384_1024.png differ diff --git a/tests/data/glacier_calving_front/glacier_calving_data/sar_images/test/Crane_2002-11-09_ERS_20_2_061__93_102_0_0_0.png b/tests/data/glacier_calving_front/glacier_calving_data/sar_images/test/Crane_2002-11-09_ERS_20_2_061__93_102_0_0_0.png new file mode 100644 index 0000000000..fac8fd4cb2 Binary files /dev/null and b/tests/data/glacier_calving_front/glacier_calving_data/sar_images/test/Crane_2002-11-09_ERS_20_2_061__93_102_0_0_0.png differ diff --git a/tests/data/glacier_calving_front/glacier_calving_data/sar_images/test/Crane_2007-09-22_ENVISAT_20_1_467__93_102_8_1024_0.png b/tests/data/glacier_calving_front/glacier_calving_data/sar_images/test/Crane_2007-09-22_ENVISAT_20_1_467__93_102_8_1024_0.png new file mode 100644 index 0000000000..20dd1ac2b6 Binary files /dev/null and b/tests/data/glacier_calving_front/glacier_calving_data/sar_images/test/Crane_2007-09-22_ENVISAT_20_1_467__93_102_8_1024_0.png differ diff --git a/tests/data/glacier_calving_front/glacier_calving_data/sar_images/test/JAC_2015-12-23_TSX_6_1_005__57_49_195_384_1024.png b/tests/data/glacier_calving_front/glacier_calving_data/sar_images/test/JAC_2015-12-23_TSX_6_1_005__57_49_195_384_1024.png new file mode 100644 index 0000000000..ddd256dc70 Binary files /dev/null and b/tests/data/glacier_calving_front/glacier_calving_data/sar_images/test/JAC_2015-12-23_TSX_6_1_005__57_49_195_384_1024.png differ diff --git a/tests/data/glacier_calving_front/glacier_calving_data/sar_images/train/Crane_2002-11-09_ERS_20_2_061__93_102_0_0_0.png b/tests/data/glacier_calving_front/glacier_calving_data/sar_images/train/Crane_2002-11-09_ERS_20_2_061__93_102_0_0_0.png new file mode 100644 index 0000000000..188c8513d4 Binary files /dev/null and b/tests/data/glacier_calving_front/glacier_calving_data/sar_images/train/Crane_2002-11-09_ERS_20_2_061__93_102_0_0_0.png differ diff --git a/tests/data/glacier_calving_front/glacier_calving_data/sar_images/train/Crane_2007-09-22_ENVISAT_20_1_467__93_102_8_1024_0.png b/tests/data/glacier_calving_front/glacier_calving_data/sar_images/train/Crane_2007-09-22_ENVISAT_20_1_467__93_102_8_1024_0.png new file mode 100644 index 0000000000..1ca9e0609a Binary files /dev/null and b/tests/data/glacier_calving_front/glacier_calving_data/sar_images/train/Crane_2007-09-22_ENVISAT_20_1_467__93_102_8_1024_0.png differ diff --git a/tests/data/glacier_calving_front/glacier_calving_data/sar_images/train/JAC_2015-12-23_TSX_6_1_005__57_49_195_384_1024.png b/tests/data/glacier_calving_front/glacier_calving_data/sar_images/train/JAC_2015-12-23_TSX_6_1_005__57_49_195_384_1024.png new file mode 100644 index 0000000000..58dbf12aaa Binary files /dev/null and b/tests/data/glacier_calving_front/glacier_calving_data/sar_images/train/JAC_2015-12-23_TSX_6_1_005__57_49_195_384_1024.png differ diff --git a/tests/data/glacier_calving_front/glacier_calving_data/sar_images/val/Crane_2002-11-09_ERS_20_2_061__93_102_0_0_0.png b/tests/data/glacier_calving_front/glacier_calving_data/sar_images/val/Crane_2002-11-09_ERS_20_2_061__93_102_0_0_0.png new file mode 100644 index 0000000000..47c35c0ee3 Binary files /dev/null and b/tests/data/glacier_calving_front/glacier_calving_data/sar_images/val/Crane_2002-11-09_ERS_20_2_061__93_102_0_0_0.png differ diff --git a/tests/data/glacier_calving_front/glacier_calving_data/sar_images/val/Crane_2007-09-22_ENVISAT_20_1_467__93_102_8_1024_0.png b/tests/data/glacier_calving_front/glacier_calving_data/sar_images/val/Crane_2007-09-22_ENVISAT_20_1_467__93_102_8_1024_0.png new file mode 100644 index 0000000000..ceb1b70ca8 Binary files /dev/null and b/tests/data/glacier_calving_front/glacier_calving_data/sar_images/val/Crane_2007-09-22_ENVISAT_20_1_467__93_102_8_1024_0.png differ diff --git a/tests/data/glacier_calving_front/glacier_calving_data/sar_images/val/JAC_2015-12-23_TSX_6_1_005__57_49_195_384_1024.png b/tests/data/glacier_calving_front/glacier_calving_data/sar_images/val/JAC_2015-12-23_TSX_6_1_005__57_49_195_384_1024.png new file mode 100644 index 0000000000..75740fba1c Binary files /dev/null and b/tests/data/glacier_calving_front/glacier_calving_data/sar_images/val/JAC_2015-12-23_TSX_6_1_005__57_49_195_384_1024.png differ diff --git a/tests/data/glacier_calving_front/glacier_calving_data/zones/test/Crane_2002-11-09_ERS_20_2_061_zones__93_102_0_0_0.png b/tests/data/glacier_calving_front/glacier_calving_data/zones/test/Crane_2002-11-09_ERS_20_2_061_zones__93_102_0_0_0.png new file mode 100644 index 0000000000..668c6509fc Binary files /dev/null and b/tests/data/glacier_calving_front/glacier_calving_data/zones/test/Crane_2002-11-09_ERS_20_2_061_zones__93_102_0_0_0.png differ diff --git a/tests/data/glacier_calving_front/glacier_calving_data/zones/test/Crane_2007-09-22_ENVISAT_20_1_467_zones__93_102_8_1024_0.png b/tests/data/glacier_calving_front/glacier_calving_data/zones/test/Crane_2007-09-22_ENVISAT_20_1_467_zones__93_102_8_1024_0.png new file mode 100644 index 0000000000..99823776f4 Binary files /dev/null and b/tests/data/glacier_calving_front/glacier_calving_data/zones/test/Crane_2007-09-22_ENVISAT_20_1_467_zones__93_102_8_1024_0.png differ diff --git a/tests/data/glacier_calving_front/glacier_calving_data/zones/test/JAC_2015-12-23_TSX_6_1_005_zones__57_49_195_384_1024.png b/tests/data/glacier_calving_front/glacier_calving_data/zones/test/JAC_2015-12-23_TSX_6_1_005_zones__57_49_195_384_1024.png new file mode 100644 index 0000000000..0612f34297 Binary files /dev/null and b/tests/data/glacier_calving_front/glacier_calving_data/zones/test/JAC_2015-12-23_TSX_6_1_005_zones__57_49_195_384_1024.png differ diff --git a/tests/data/glacier_calving_front/glacier_calving_data/zones/train/Crane_2002-11-09_ERS_20_2_061_zones__93_102_0_0_0.png b/tests/data/glacier_calving_front/glacier_calving_data/zones/train/Crane_2002-11-09_ERS_20_2_061_zones__93_102_0_0_0.png new file mode 100644 index 0000000000..673bde99a0 Binary files /dev/null and b/tests/data/glacier_calving_front/glacier_calving_data/zones/train/Crane_2002-11-09_ERS_20_2_061_zones__93_102_0_0_0.png differ diff --git a/tests/data/glacier_calving_front/glacier_calving_data/zones/train/Crane_2007-09-22_ENVISAT_20_1_467_zones__93_102_8_1024_0.png b/tests/data/glacier_calving_front/glacier_calving_data/zones/train/Crane_2007-09-22_ENVISAT_20_1_467_zones__93_102_8_1024_0.png new file mode 100644 index 0000000000..5d6ad51067 Binary files /dev/null and b/tests/data/glacier_calving_front/glacier_calving_data/zones/train/Crane_2007-09-22_ENVISAT_20_1_467_zones__93_102_8_1024_0.png differ diff --git a/tests/data/glacier_calving_front/glacier_calving_data/zones/train/JAC_2015-12-23_TSX_6_1_005_zones__57_49_195_384_1024.png b/tests/data/glacier_calving_front/glacier_calving_data/zones/train/JAC_2015-12-23_TSX_6_1_005_zones__57_49_195_384_1024.png new file mode 100644 index 0000000000..8b70c3cebb Binary files /dev/null and b/tests/data/glacier_calving_front/glacier_calving_data/zones/train/JAC_2015-12-23_TSX_6_1_005_zones__57_49_195_384_1024.png differ diff --git a/tests/data/glacier_calving_front/glacier_calving_data/zones/val/Crane_2002-11-09_ERS_20_2_061_zones__93_102_0_0_0.png b/tests/data/glacier_calving_front/glacier_calving_data/zones/val/Crane_2002-11-09_ERS_20_2_061_zones__93_102_0_0_0.png new file mode 100644 index 0000000000..604563038b Binary files /dev/null and b/tests/data/glacier_calving_front/glacier_calving_data/zones/val/Crane_2002-11-09_ERS_20_2_061_zones__93_102_0_0_0.png differ diff --git a/tests/data/glacier_calving_front/glacier_calving_data/zones/val/Crane_2007-09-22_ENVISAT_20_1_467_zones__93_102_8_1024_0.png b/tests/data/glacier_calving_front/glacier_calving_data/zones/val/Crane_2007-09-22_ENVISAT_20_1_467_zones__93_102_8_1024_0.png new file mode 100644 index 0000000000..fc0b6ee2f7 Binary files /dev/null and b/tests/data/glacier_calving_front/glacier_calving_data/zones/val/Crane_2007-09-22_ENVISAT_20_1_467_zones__93_102_8_1024_0.png differ diff --git a/tests/data/glacier_calving_front/glacier_calving_data/zones/val/JAC_2015-12-23_TSX_6_1_005_zones__57_49_195_384_1024.png b/tests/data/glacier_calving_front/glacier_calving_data/zones/val/JAC_2015-12-23_TSX_6_1_005_zones__57_49_195_384_1024.png new file mode 100644 index 0000000000..fb7fed9eff Binary files /dev/null and b/tests/data/glacier_calving_front/glacier_calving_data/zones/val/JAC_2015-12-23_TSX_6_1_005_zones__57_49_195_384_1024.png differ diff --git a/tests/datasets/test_glacier_calving_front.py b/tests/datasets/test_glacier_calving_front.py new file mode 100644 index 0000000000..aff8ad1a7a --- /dev/null +++ b/tests/datasets/test_glacier_calving_front.py @@ -0,0 +1,76 @@ +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. + +import os +import shutil +from pathlib import Path + +import matplotlib.pyplot as plt +import pytest +import torch +import torch.nn as nn +from _pytest.fixtures import SubRequest +from pytest import MonkeyPatch + +from torchgeo.datasets import DatasetNotFoundError, GlacierCalvingFront + + +class TestGlacierCalvingFront: + @pytest.fixture(params=['train', 'test']) + def dataset( + self, monkeypatch: MonkeyPatch, tmp_path: Path, request: SubRequest + ) -> GlacierCalvingFront: + md5 = '0b5c05bea31ff666f8eba18b43d4a01f' + monkeypatch.setattr(GlacierCalvingFront, 'md5', md5) + url = os.path.join( + 'tests', 'data', 'glacier_calving_front', 'glacier_calving_data.zip' + ) + monkeypatch.setattr(GlacierCalvingFront, 'url', url) + root = tmp_path + split = request.param + transforms = nn.Identity() + return GlacierCalvingFront( + root, split, transforms, download=True, checksum=True + ) + + def test_getitem(self, dataset: GlacierCalvingFront) -> None: + x = dataset[0] + assert isinstance(x, dict) + assert isinstance(x['image'], torch.Tensor) + assert x['image'].shape[0] == 1 + assert isinstance(x['mask_zone'], torch.Tensor) + assert x['image'].shape[-2:] == x['mask_zone'].shape[-2:] + + def test_len(self, dataset: GlacierCalvingFront) -> None: + if dataset.split == 'train': + assert len(dataset) == 3 + else: + assert len(dataset) == 3 + + def test_already_downloaded(self, dataset: GlacierCalvingFront) -> None: + GlacierCalvingFront(root=dataset.root) + + def test_not_yet_extracted(self, tmp_path: Path) -> None: + filename = 'glacier_calving_data.zip' + dir = os.path.join('tests', 'data', 'glacier_calving_front') + shutil.copyfile( + os.path.join(dir, filename), os.path.join(str(tmp_path), filename) + ) + GlacierCalvingFront(root=str(tmp_path)) + + def test_invalid_split(self) -> None: + with pytest.raises(AssertionError): + GlacierCalvingFront(split='foo') + + def test_not_downloaded(self, tmp_path: Path) -> None: + with pytest.raises(DatasetNotFoundError, match='Dataset not found'): + GlacierCalvingFront(tmp_path) + + def test_plot(self, dataset: GlacierCalvingFront) -> None: + dataset.plot(dataset[0], suptitle='Test') + plt.close() + + sample = dataset[0] + sample['prediction'] = torch.clone(sample['mask_zone']) + dataset.plot(sample, suptitle='Prediction') + plt.close() diff --git a/tests/trainers/test_segmentation.py b/tests/trainers/test_segmentation.py index 9634fb81f2..7f958bf84b 100644 --- a/tests/trainers/test_segmentation.py +++ b/tests/trainers/test_segmentation.py @@ -58,6 +58,7 @@ class TestSemanticSegmentationTask: 'etci2021', 'geonrw', 'gid15', + 'glacier_calving_front', 'inria', 'l7irish', 'l8biome', diff --git a/torchgeo/datamodules/__init__.py b/torchgeo/datamodules/__init__.py index cd59b0616b..5b3e23e28c 100644 --- a/torchgeo/datamodules/__init__.py +++ b/torchgeo/datamodules/__init__.py @@ -19,6 +19,7 @@ from .geo import BaseDataModule, GeoDataModule, NonGeoDataModule from .geonrw import GeoNRWDataModule from .gid15 import GID15DataModule +from .glacier_calving_front import GlacierCalvingFrontDataModule from .inria import InriaAerialImageLabelingDataModule from .iobench import IOBenchDataModule from .l7irish import L7IrishDataModule @@ -80,6 +81,7 @@ 'FireRiskDataModule', 'GeoNRWDataModule', 'GID15DataModule', + 'GlacierCalvingFrontDataModule', 'InriaAerialImageLabelingDataModule', 'LandCoverAIDataModule', 'LandCoverAI100DataModule', diff --git a/torchgeo/datamodules/glacier_calving_front.py b/torchgeo/datamodules/glacier_calving_front.py new file mode 100644 index 0000000000..a8560e4f5d --- /dev/null +++ b/torchgeo/datamodules/glacier_calving_front.py @@ -0,0 +1,67 @@ +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. + +"""GlacierCalvingFront datamodule.""" + +from typing import Any + +import kornia.augmentation as K +import torch + +from ..datasets import GlacierCalvingFront +from ..transforms import AugmentationSequential +from .geo import NonGeoDataModule + + +class GlacierCalvingFrontDataModule(NonGeoDataModule): + """LightningDataModule implementation for the GlacierCalvingFront dataset. + + Implements the default splits that come with the dataset. + + .. versionadded:: 0.7 + """ + + mean = torch.Tensor([0.5517]) + std = torch.Tensor([11.8478]) + + def __init__( + self, batch_size: int = 64, num_workers: int = 0, size: int = 256, **kwargs: Any + ) -> None: + """Initialize a new GlacierCalvingFrontDataModule instance. + + Args: + batch_size: Size of each mini-batch. + num_workers: Number of workers for parallel data loading. + size: resize images of input size 1000x1000 to size x size + **kwargs: Additional keyword arguments passed to + :class:`~torchgeo.datasets.GlacierCalvingFront`. + """ + super().__init__(GlacierCalvingFront, batch_size, num_workers, **kwargs) + + self.train_aug = AugmentationSequential( + K.Normalize(mean=self.mean, std=self.std), + K.Resize(size), + K.RandomHorizontalFlip(p=0.5), + K.RandomVerticalFlip(p=0.5), + data_keys=['image', 'mask'], + ) + + self.aug = AugmentationSequential( + K.Normalize(mean=self.mean, std=self.std), + K.Resize(size), + data_keys=['image', 'mask'], + ) + + self.size = size + + def setup(self, stage: str) -> None: + """Set up datasets. + + Args: + stage: Either 'fit', 'validate', 'test', or 'predict'. + """ + if stage in ['fit', 'validate']: + self.train_dataset = GlacierCalvingFront(split='train', **self.kwargs) + self.val_dataset = GlacierCalvingFront(split='val', **self.kwargs) + if stage in ['test']: + self.test_dataset = GlacierCalvingFront(split='test', **self.kwargs) diff --git a/torchgeo/datasets/__init__.py b/torchgeo/datasets/__init__.py index 5c4fea8970..fc88f7c4d8 100644 --- a/torchgeo/datasets/__init__.py +++ b/torchgeo/datasets/__init__.py @@ -59,6 +59,7 @@ ) from .geonrw import GeoNRW from .gid15 import GID15 +from .glacier_calving_front import GlacierCalvingFront from .globbiomass import GlobBiomass from .idtrees import IDTReeS from .inaturalist import INaturalist @@ -223,6 +224,7 @@ 'ForestDamage', 'GeoNRW', 'GID15', + 'GlacierCalvingFront', 'IDTReeS', 'InriaAerialImageLabeling', 'LandCoverAI', diff --git a/torchgeo/datasets/glacier_calving_front.py b/torchgeo/datasets/glacier_calving_front.py new file mode 100644 index 0000000000..07b7aa33c7 --- /dev/null +++ b/torchgeo/datasets/glacier_calving_front.py @@ -0,0 +1,249 @@ +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. + +"""Glacier Calving Front dataset.""" + +import glob +import os +from collections.abc import Callable +from typing import ClassVar + +import matplotlib.pyplot as plt +import numpy as np +import torch +from matplotlib.figure import Figure +from PIL import Image +from torch import Tensor + +from .errors import DatasetNotFoundError +from .geo import NonGeoDataset +from .utils import Path, download_and_extract_archive, extract_archive + + +class GlacierCalvingFront(NonGeoDataset): + """Glacier Calving Front dataset. + + The `Glacier Calving Front `__ dataset is a + semantic segmentation dataset of marine-terminating glaciers. + + Dataset features: + + * 46,700 train, 18,744 validation, and 28,376 test images + * varying spatial resolution of 6-20m + * paired binary calving front segmentation masks + * paired multi-class land cover segmentation masks + + Dataset format: + + * images are three-channel pngs with dimension 256x256 + * segmentation masks are single-channel pngs + + Dataset classes: + + 1. background + 2. ocean + 3. rock + 4. glacier + + If you use this dataset in your research, please cite the following paper: + + * https://essd.copernicus.org/articles/14/4287/2022/ + + .. versionadded:: 0.7 + """ + + valid_splits = ('train', 'val', 'test') + + zipfilename = 'glacier_calving_data.zip' + + data_dir = 'glacier_calving_data' + + image_dir = 'sar_images' + + mask_dirs = ('fronts', 'zones') + + url = 'https://huggingface.co/datasets/torchgeo/glacier_calving_front/resolve/main/glacier_calving_data.zip' + + md5 = '56e39e33f88a9842f48c513083e3c50a' + + px_class_values: ClassVar[dict[int, str]] = { + 0: 'background', + 64: 'ocean', + 127: 'rock', + 254: 'glacier', + } + + def __init__( + self, + root: Path = 'data', + split: str = 'train', + transforms: Callable[[dict[str, Tensor]], dict[str, Tensor]] | None = None, + download: bool = False, + checksum: bool = False, + ) -> None: + """Initialize a new instance of GlacierCalvingFront dataset. + + Args: + root: root directory where dataset can be found + split: one of "train", "val", or "test" + transforms: a function/transform that takes input sample and its target as + entry and returns a transformed version + download: if True, download dataset and store it in the root directory + checksum: if True, check the MD5 of the downloaded files (may be slow) + + Raises: + AssertionError: if ``split`` argument is invalid + DatasetNotFoundError: If dataset is not found and *download* is False. + """ + assert split in self.valid_splits, f'split must be one of {self.valid_splits}' + + self.root = root + self.split = split + self.transforms = transforms + self.download = download + self.checksum = checksum + + self._verify() + + self.fpaths = glob.glob( + os.path.join( + self.root, + self.zipfilename.replace('.zip', ''), + self.mask_dirs[0], + self.split, + '*.png', + ) + ) + + def __len__(self) -> int: + """Return the number of images in the dataset.""" + return len(self.fpaths) + + def __getitem__(self, idx: int) -> dict[str, Tensor]: + """Return the image and mask at the given index. + + Args: + idx: index of the image and mask to return + + Returns: + dict: a dict containing the image and mask + """ + zones_path = self.fpaths[idx] + img_path = zones_path.replace('_zones_', '_') + front_path = zones_path.replace('_zones_', '_front_') + img = Image.open(img_path) + + front_mask = Image.open(front_path) + zone_mask = Image.open(zones_path) + + sample = { + 'image': torch.from_numpy(np.array(img)).unsqueeze(0).float(), + 'mask_front': torch.from_numpy(np.array(front_mask)).long(), + 'mask_zone': torch.from_numpy(np.array(zone_mask)).long(), + } + + if self.transforms: + sample = self.transforms(sample) + + return sample + + def _verify(self) -> None: + """Verify the integrity of the dataset.""" + exists = [] + if os.path.exists( + os.path.join( + self.root, + self.zipfilename.replace('.zip', ''), + self.image_dir, + self.split, + ) + ): + exists.append(True) + else: + exists.append(False) + + for mask_dir in self.mask_dirs: + if os.path.exists( + os.path.join( + self.root, + self.zipfilename.replace('.zip', ''), + mask_dir, + self.split, + ) + ): + exists.append(True) + else: + exists.append(False) + + if all(exists): + return + + # check download of zipfile + if os.path.exists(os.path.join(self.root, self.zipfilename)): + self._extract() + return + + if not self.download: + raise DatasetNotFoundError(self) + + self._download() + + def _download(self) -> None: + """Download the dataset.""" + download_and_extract_archive( + self.url, + self.root, + filename=self.zipfilename, + md5=self.md5 if self.checksum else None, + ) + + def _extract(self) -> None: + """Extract the dataset.""" + extract_archive(os.path.join(self.root, self.zipfilename), self.root) + + def plot( + self, + sample: dict[str, Tensor], + show_titles: bool = True, + suptitle: str | None = None, + ) -> Figure: + """Plot a sample from the dataset. + + Args: + sample: a sample returned by :meth:`GlacierCalvingFront.__getitem__` + show_titles: flag indicating whether to show titles above each panel + suptitle: optional string to use as a suptitle + + Returns: + a matplotlib Figure with the rendered sample + """ + if 'prediction' in sample: + ncols = 4 + else: + ncols = 3 + fig, axs = plt.subplots(1, ncols, figsize=(15, 5)) + + axs[0].imshow(sample['image'].permute(1, 2, 0).numpy()) + axs[0].axis('off') + + axs[1].imshow(sample['mask_front'].numpy(), cmap='gray') + axs[1].axis('off') + + axs[2].imshow(sample['mask_zone'].numpy(), cmap='gray') + axs[2].axis('off') + + if show_titles: + axs[0].set_title('Image') + axs[1].set_title('Front Mask') + axs[2].set_title('Zone Mask') + + if 'prediction' in sample: + axs[3].imshow(sample['prediction'].numpy(), cmap='gray') + axs[3].axis('off') + if show_titles: + axs[3].set_title('Prediction') + + if suptitle: + fig.suptitle(suptitle) + + return fig