Skip to content

Commit

Permalink
glacier ds
Browse files Browse the repository at this point in the history
  • Loading branch information
nilsleh committed Oct 16, 2024
1 parent 6a84a61 commit be55b94
Show file tree
Hide file tree
Showing 39 changed files with 499 additions and 0 deletions.
5 changes: 5 additions & 0 deletions docs/api/datamodules.rst
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,11 @@ GID-15

.. autoclass:: GID15DataModule

Glacier Calving Front
^^^^^^^^^^^^^^^^^^^^^

.. autoclass:: GlacierCalvingFrontDataModule

Inria Aerial Image Labeling
^^^^^^^^^^^^^^^^^^^^^^^^^^^

Expand Down
5 changes: 5 additions & 0 deletions docs/api/datasets.rst
Original file line number Diff line number Diff line change
Expand Up @@ -307,6 +307,11 @@ GID-15

.. autoclass:: GID15

Glacier Calving Front
^^^^^^^^^^^^^^^^^^^^^

.. autoclass:: GlacierCalvingFront

IDTReeS
^^^^^^^

Expand Down
1 change: 1 addition & 0 deletions docs/api/datasets/non_geo_datasets.csv
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ Dataset,Task,Source,License,# Samples,# Classes,Size (px),Resolution (m),Bands
`Forest Damage`_,OD,Drone imagery,"CDLA-Permissive-1.0","1,543",4,"1,500x1,500",,RGB
`GeoNRW`_,S,Aerial,"CC-BY-4.0","7,783",11,"1,000x1,000",1,"RGB, DEM"
`GID-15`_,S,Gaofen-2,-,150,15,"6,800x7,200",3,RGB
`GlacierCalvingFront`_,S,"Sentinel-1, TerraSAR-X, TanDEM-X, ENVISAT, European Remote Sensing Satellite 1&2, ALOS PALSAR, and RADARSAT-1","CC-BY-4.0","93,820",4,"256x256",6-20,"SAR"
`IDTReeS`_,"OD,C",Aerial,"CC-BY-4.0",591,33,200x200,0.1--1,RGB
`Inria Aerial Image Labeling`_,S,Aerial,-,360,2,"5,000x5,000",0.3,RGB
`LandCover.ai`_,S,Aerial,"CC-BY-NC-SA-4.0","10,674",5,512x512,0.25--0.5,RGB
Expand Down
16 changes: 16 additions & 0 deletions tests/conf/glacier_calving_front.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
model:
class_path: SemanticSegmentationTask
init_args:
loss: 'ce'
model: 'unet'
backbone: 'resnet18'
in_channels: 3
num_classes: 11
num_filters: 1
ignore_index: null
data:
class_path: GlacierCalvingFrontDataModule
init_args:
batch_size: 1
dict_kwargs:
root: 'tests/data/glacier_calving_front'
75 changes: 75 additions & 0 deletions tests/data/glacier_calving_front/data.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
import hashlib
import os
import shutil

import numpy as np
from PIL import Image

# Define the root directory and subdirectories
root_dir = 'glacier_calving_data'
sub_dirs = ['zones', 'sar_images', 'fronts']
splits = ['train', 'val', 'test']

zone_file_names = [
'Crane_2002-11-09_ERS_20_2_061_zones__93_102_0_0_0.png',
'Crane_2007-09-22_ENVISAT_20_1_467_zones__93_102_8_1024_0.png',
'JAC_2015-12-23_TSX_6_1_005_zones__57_49_195_384_1024.png',
]

IMG_SIZE = 32


# Function to create dummy images
def create_dummy_image(path: str, shape: tuple[int], pixel_values: list[int]) -> None:
data = np.random.choice(pixel_values, size=shape, replace=True).astype(np.uint8)
img = Image.fromarray(data)
img.save(path)


def create_zone_images(split: str, filename: str) -> None:
zone_pixel_values = [0, 64, 127, 255]
path = os.path.join(root_dir, 'zones', split, filename)
create_dummy_image(path, (IMG_SIZE, IMG_SIZE), zone_pixel_values)


def create_sar_images(split: str, filename: str) -> None:
sar_pixel_values = range(256)
path = os.path.join(root_dir, 'sar_images', split, filename)
create_dummy_image(path, (IMG_SIZE, IMG_SIZE), sar_pixel_values)


def create_front_images(split: str, filename: str) -> None:
sar_pixel_values = range(256)
path = os.path.join(root_dir, 'fronts', split, filename)
create_dummy_image(path, (IMG_SIZE, IMG_SIZE), sar_pixel_values)


if os.path.exists(root_dir):
shutil.rmtree(root_dir)

# Create the directory structure
for sub_dir in sub_dirs:
for split in splits:
os.makedirs(os.path.join(root_dir, sub_dir, split), exist_ok=True)

# Create dummy data for all splits and filenames
for split in splits:
for filename in zone_file_names:
create_zone_images(split, filename)
create_sar_images(split, filename.replace('_zones_', '_'))
create_front_images(split, filename.replace('_zones_', '_front_'))

# zip and compute md5
shutil.make_archive(root_dir, 'zip', '.', root_dir)


def md5(fname: str) -> str:
hash_md5 = hashlib.md5()
with open(fname, 'rb') as f:
for chunk in iter(lambda: f.read(4096), b''):
hash_md5.update(chunk)
return hash_md5.hexdigest()


md5sum = md5('glacier_calving_data.zip')
print(f'MD5 checksum: {md5sum}')
Binary file not shown.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
76 changes: 76 additions & 0 deletions tests/datasets/test_glacier_calving_front.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License.

import os
import shutil
from pathlib import Path

import matplotlib.pyplot as plt
import pytest
import torch
import torch.nn as nn
from _pytest.fixtures import SubRequest
from pytest import MonkeyPatch

from torchgeo.datasets import DatasetNotFoundError, GlacierCalvingFront


class TestGlacierCalvingFront:
@pytest.fixture(params=['train', 'test'])
def dataset(
self, monkeypatch: MonkeyPatch, tmp_path: Path, request: SubRequest
) -> GlacierCalvingFront:
md5 = '0b5c05bea31ff666f8eba18b43d4a01f'
monkeypatch.setattr(GlacierCalvingFront, 'md5', md5)
url = os.path.join(
'tests', 'data', 'glacier_calving_front', 'glacier_calving_data.zip'
)
monkeypatch.setattr(GlacierCalvingFront, 'url', url)
root = tmp_path
split = request.param
transforms = nn.Identity()
return GlacierCalvingFront(
root, split, transforms, download=True, checksum=True
)

def test_getitem(self, dataset: GlacierCalvingFront) -> None:
x = dataset[0]
assert isinstance(x, dict)
assert isinstance(x['image'], torch.Tensor)
assert x['image'].shape[0] == 1
assert isinstance(x['mask_zone'], torch.Tensor)
assert x['image'].shape[-2:] == x['mask_zone'].shape[-2:]

def test_len(self, dataset: GlacierCalvingFront) -> None:
if dataset.split == 'train':
assert len(dataset) == 3
else:
assert len(dataset) == 3

def test_already_downloaded(self, dataset: GlacierCalvingFront) -> None:
GlacierCalvingFront(root=dataset.root)

def test_not_yet_extracted(self, tmp_path: Path) -> None:
filename = 'glacier_calving_data.zip'
dir = os.path.join('tests', 'data', 'glacier_calving_front')
shutil.copyfile(
os.path.join(dir, filename), os.path.join(str(tmp_path), filename)
)
GlacierCalvingFront(root=str(tmp_path))

def test_invalid_split(self) -> None:
with pytest.raises(AssertionError):
GlacierCalvingFront(split='foo')

def test_not_downloaded(self, tmp_path: Path) -> None:
with pytest.raises(DatasetNotFoundError, match='Dataset not found'):
GlacierCalvingFront(tmp_path)

def test_plot(self, dataset: GlacierCalvingFront) -> None:
dataset.plot(dataset[0], suptitle='Test')
plt.close()

sample = dataset[0]
sample['prediction'] = torch.clone(sample['mask_zone'])
dataset.plot(sample, suptitle='Prediction')
plt.close()
1 change: 1 addition & 0 deletions tests/trainers/test_segmentation.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@ class TestSemanticSegmentationTask:
'etci2021',
'geonrw',
'gid15',
'glacier_calving_front',
'inria',
'l7irish',
'l8biome',
Expand Down
2 changes: 2 additions & 0 deletions torchgeo/datamodules/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
from .geo import BaseDataModule, GeoDataModule, NonGeoDataModule
from .geonrw import GeoNRWDataModule
from .gid15 import GID15DataModule
from .glacier_calving_front import GlacierCalvingFrontDataModule
from .inria import InriaAerialImageLabelingDataModule
from .iobench import IOBenchDataModule
from .l7irish import L7IrishDataModule
Expand Down Expand Up @@ -80,6 +81,7 @@
'FireRiskDataModule',
'GeoNRWDataModule',
'GID15DataModule',
'GlacierCalvingFrontDataModule',
'InriaAerialImageLabelingDataModule',
'LandCoverAIDataModule',
'LandCoverAI100DataModule',
Expand Down
67 changes: 67 additions & 0 deletions torchgeo/datamodules/glacier_calving_front.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License.

"""GlacierCalvingFront datamodule."""

from typing import Any

import kornia.augmentation as K
import torch

from ..datasets import GlacierCalvingFront
from ..transforms import AugmentationSequential
from .geo import NonGeoDataModule


class GlacierCalvingFrontDataModule(NonGeoDataModule):
"""LightningDataModule implementation for the GlacierCalvingFront dataset.
Implements the default splits that come with the dataset.
.. versionadded:: 0.7
"""

mean = torch.Tensor([0.5517])
std = torch.Tensor([11.8478])

def __init__(
self, batch_size: int = 64, num_workers: int = 0, size: int = 256, **kwargs: Any
) -> None:
"""Initialize a new GlacierCalvingFrontDataModule instance.
Args:
batch_size: Size of each mini-batch.
num_workers: Number of workers for parallel data loading.
size: resize images of input size 1000x1000 to size x size
**kwargs: Additional keyword arguments passed to
:class:`~torchgeo.datasets.GlacierCalvingFront`.
"""
super().__init__(GlacierCalvingFront, batch_size, num_workers, **kwargs)

self.train_aug = AugmentationSequential(
K.Normalize(mean=self.mean, std=self.std),
K.Resize(size),
K.RandomHorizontalFlip(p=0.5),
K.RandomVerticalFlip(p=0.5),
data_keys=['image', 'mask'],
)

self.aug = AugmentationSequential(
K.Normalize(mean=self.mean, std=self.std),
K.Resize(size),
data_keys=['image', 'mask'],
)

self.size = size

def setup(self, stage: str) -> None:
"""Set up datasets.
Args:
stage: Either 'fit', 'validate', 'test', or 'predict'.
"""
if stage in ['fit', 'validate']:
self.train_dataset = GlacierCalvingFront(split='train', **self.kwargs)
self.val_dataset = GlacierCalvingFront(split='val', **self.kwargs)
if stage in ['test']:
self.test_dataset = GlacierCalvingFront(split='test', **self.kwargs)
2 changes: 2 additions & 0 deletions torchgeo/datasets/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@
)
from .geonrw import GeoNRW
from .gid15 import GID15
from .glacier_calving_front import GlacierCalvingFront
from .globbiomass import GlobBiomass
from .idtrees import IDTReeS
from .inaturalist import INaturalist
Expand Down Expand Up @@ -223,6 +224,7 @@
'ForestDamage',
'GeoNRW',
'GID15',
'GlacierCalvingFront',
'IDTReeS',
'InriaAerialImageLabeling',
'LandCoverAI',
Expand Down
Loading

0 comments on commit be55b94

Please sign in to comment.