diff --git a/.github/workflows/update-datasets.yml b/.github/workflows/update-datasets.yml index 335c2c7c..06f4320b 100644 --- a/.github/workflows/update-datasets.yml +++ b/.github/workflows/update-datasets.yml @@ -7,6 +7,8 @@ on: push: paths: - agml/_assets/public_datasources.json + branches: + - dev permissions: write-all diff --git a/README.md b/README.md index 5d5d74e6..90c965d4 100644 --- a/README.md +++ b/README.md @@ -137,6 +137,7 @@ You're now ready to use AgML for training your own models! [riseholme_strawberry_classification_2021](https://github.com/Project-AgML/AgML/blob/main/docs/datasets/riseholme_strawberry_classification_2021.md) | Image Classification | 3520 | [ghai_broccoli_detection](https://github.com/Project-AgML/AgML/blob/main/docs/datasets/ghai_broccoli_detection.md) | Object Detection | 500 | [bean_synthetic_earlygrowth_aerial](https://github.com/Project-AgML/AgML/blob/main/docs/datasets/bean_synthetic_earlygrowth_aerial.md) | Semantic Segmentation | 2500 | +[ghai_strawberry_fruit_detection](https://github.com/Project-AgML/AgML/blob/main/docs/datasets/ghai_strawberry_fruit_detection.md) | Object Detection | 500 | ## Usage Information @@ -167,4 +168,4 @@ a bug or feature that you would like to see implemented, please don't hesitate t See the [contributing guidelines](/CONTRIBUTING.md) for more information. ## Funding -This project is partly funded by the [National AI Institute for Food Systems (AIFS)](https://aifs.ucdavis.edu \ No newline at end of file +This project is partly funded by the [National AI Institute for Food Systems (AIFS)](https://aifs.ucdavis.ed \ No newline at end of file diff --git a/agml/__init__.py b/agml/__init__.py index bd465083..9747ee6d 100644 --- a/agml/__init__.py +++ b/agml/__init__.py @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -__version__ = '0.5.0' +__version__ = '0.5.1' __all__ = ['data', 'synthetic', 'backend', 'viz', 'io'] diff --git a/agml/_assets/public_datasources.json b/agml/_assets/public_datasources.json index c3b88144..bde3f7b7 100644 --- a/agml/_assets/public_datasources.json +++ b/agml/_assets/public_datasources.json @@ -1230,5 +1230,45 @@ 0.08992248773574829 ] } + }, + "ghai_strawberry_fruit_detection": { + "classes": { + "1": "Bud", + "2": "Calyx", + "3": "Detached Fruit", + "4": "Flower", + "5": "Large green", + "6": "Leaf", + "7": "Ripe fruit", + "8": "Small Green", + "9": "Stem", + "10": "Unripe fruit" + }, + "ml_task": "object_detection", + "ag_task": "crop_detection", + "location": { + "continent": "north_america", + "country": "usa" + }, + "sensor_modality": "rgb", + "real_synthetic": "real", + "platform": "handheld/ground", + "input_data_format": "jpg", + "annotation_format": "coco_json", + "n_images": "500", + "docs_url": "https://github.com/AxisAg/GHAIDatasets/blob/main/datasets/strawberry.md", + "external_image_sources": [], + "stats": { + "mean": [ + 0.49159616231918335, + 0.5238277316093445, + 0.4485996663570404 + ], + "std": [ + 0.18163496255874634, + 0.16137710213661194, + 0.18042609095573425 + ] + } } } \ No newline at end of file diff --git a/agml/_assets/shape_info.pickle b/agml/_assets/shape_info.pickle index 35362ede..cd5afc9a 100644 Binary files a/agml/_assets/shape_info.pickle and b/agml/_assets/shape_info.pickle differ diff --git a/agml/_assets/source_citations.json b/agml/_assets/source_citations.json index 8502610f..8d1d6efc 100644 --- a/agml/_assets/source_citations.json +++ b/agml/_assets/source_citations.json @@ -138,5 +138,9 @@ "bean_synthetic_earlygrowth_aerial": { "license": "MIT", "citation": "@ARTICLE{10.3389/fpls.2019.01185,\n \nAUTHOR={Bailey, Brian N.}, \n\t \nTITLE={Helios: A Scalable 3D Plant and Environmental Biophysical Modeling Framework}, \n\t\nJOURNAL={Frontiers in Plant Science}, \n\t\nVOLUME={10}, \n\t\nYEAR={2019}, \n\t \nURL={https://www.frontiersin.org/article/10.3389/fpls.2019.01185}, \n\t\nDOI={10.3389/fpls.2019.01185}, \n\t\nISSN={1664-462X}, \n \nABSTRACT={This article presents an overview of Helios, a new three-dimensional (3D) plant and environmental modeling framework. Helios is a model coupling framework designed to provide maximum flexibility in integrating and running arbitrary 3D environmental system models. Users interact with Helios through a well-documented open-source C++ API. Version 1.0 comes with model plug-ins for radiation transport, the surface energy balance, stomatal conductance, photosynthesis, solar position, and procedural tree generation. Additional plug-ins are also available for visualizing model geometry and data and for processing and integrating LiDAR scanning data. Many of the plug-ins perform calculations on the graphics processing unit, which allows for efficient simulation of very large domains with high detail. An example modeling study is presented in which leaf-level heterogeneity in water usage and photosynthesis of an orchard is examined to understand how this leaf-scale variability contributes to whole-tree and -canopy fluxes.}\n}" + }, + "ghai_strawberry_fruit_detection": { + "license": "CC BY-SA 4.0", + "citation": "" } } \ No newline at end of file diff --git a/agml/_internal/preprocess.py b/agml/_internal/preprocess.py index 7c99f90d..11d829ea 100644 --- a/agml/_internal/preprocess.py +++ b/agml/_internal/preprocess.py @@ -1016,6 +1016,19 @@ def ghai_broccoli_detection(self, dataset_name): shutil.move(os.path.join(original_dir, 'coco.json'), os.path.join(processed_dir, 'annotations.json')) + def ghai_strawberry_fruit_detection(self, dataset_name): + # Create processed directories + original_dir = os.path.join(self.data_original_dir, dataset_name) + processed_dir = os.path.join(self.data_processed_dir, dataset_name) + processed_image_dir = os.path.join(processed_dir, 'images') + os.makedirs(processed_image_dir, exist_ok = True) + + # Move images + for image in tqdm(glob.glob(os.path.join(original_dir, '*.jpg'))): + shutil.move(image, processed_image_dir) + shutil.move(os.path.join(original_dir, 'coco.json'), + os.path.join(processed_dir, 'annotations.json')) + if __name__ == '__main__': # Initialize program arguments. diff --git a/agml/data/loader.py b/agml/data/loader.py index 121cd5d8..e25d8430 100644 --- a/agml/data/loader.py +++ b/agml/data/loader.py @@ -15,6 +15,7 @@ import os import json import copy +import glob from typing import Union from collections.abc import Sequence from decimal import getcontext, Decimal @@ -296,6 +297,38 @@ def helios(cls, name, dataset_path = None): information which is provided in the `.metadata` directory of the Helios generated dataset, allowing it to contain potentially even more info. """ + # Instantiate from a list of datasets. + if isinstance(name, (list, tuple)): + if dataset_path is None: + dataset_path = [None] * len(name) + elif isinstance(dataset_path, str): + dataset_path = [dataset_path] * len(name) + else: + if not len(dataset_path) == len(name): + raise ValueError("The number of dataset paths must be " + "the same as the number of dataset names.") + datasets = [cls.helios(n, dataset_path = dp) + for n, dp in zip(name, dataset_path)] + return cls.merge(*datasets) + + # Instantiate from a wildcard pattern. + if isinstance(name, str) and '*' in name: + if dataset_path is None: + dataset_path = os.path.abspath(synthetic_data_save_path()) + elif not os.path.exists(dataset_path): + raise NotADirectoryError( + f"Existing directory '{dataset_path}' for dataset of name " + f"{name} not found, pass a custom path if you want to use " + f"a custom dataset path for the dataset.") + + # Get the list of datasets. + possible_datasets = glob.glob(os.path.join(dataset_path, name)) + if len(possible_datasets) == 0: + raise ValueError(f"No datasets found for pattern: {name}.") + datasets = [cls.helios(os.path.basename(p), dataset_path = dataset_path) + for p in sorted(possible_datasets)] + return cls.merge(*datasets) + # Locate the path to the dataset, using synthetic semantics. if dataset_path is None: dataset_path = os.path.abspath( diff --git a/agml/io.py b/agml/io.py index 0cf7dce2..e5bd8b9d 100644 --- a/agml/io.py +++ b/agml/io.py @@ -15,6 +15,8 @@ import random import inspect +import cv2 + from agml.utils.io import ( get_file_list as _get_file_list, get_dir_list as _get_dir_list, @@ -112,5 +114,15 @@ def random_file(path, **kwargs): return random.choice(get_file_list(path, **kwargs)) +def read_image(path, **kwargs): + """Reads an image from a file. + + Args: + path (str): The path to the image file. + **kwargs: Keyword arguments to pass to `cv2.imread`. + Returns: + numpy.ndarray: The image. + """ + return cv2.imread(path, **kwargs) diff --git a/agml/models/segmentation.py b/agml/models/segmentation.py index 8cce07d1..14088721 100644 --- a/agml/models/segmentation.py +++ b/agml/models/segmentation.py @@ -32,7 +32,7 @@ from agml.data.public import source from agml.utils.general import resolve_list_value from agml.utils.image import resolve_image_size -from agml.viz.masks import show_image_with_overlaid_mask, show_image_and_mask +from agml.viz.masks import show_image_and_overlaid_mask, show_image_and_mask # This is last since `agml.models.base` will check for PyTorch Lightning, # and PyTorch Lightning automatically installed torchmetrics with it. @@ -250,7 +250,7 @@ def show_prediction(self, image, overlay = False, **kwargs): image = self._expand_input_images(image)[0] mask = self.predict(image, **kwargs) if overlay: - return show_image_with_overlaid_mask(image, mask, **kwargs) + return show_image_and_overlaid_mask(image, mask, **kwargs) return show_image_and_mask(image, mask, **kwargs) def load_benchmark(self, dataset): diff --git a/agml/synthetic/generator.py b/agml/synthetic/generator.py index b8494d57..fedbdd89 100644 --- a/agml/synthetic/generator.py +++ b/agml/synthetic/generator.py @@ -155,7 +155,7 @@ def _convert_options_to_xml(self): # The `scan` tag is used for LiDAR generation. This must be added later # because there can be multiple origins and thus multiple `scan` tags. - if self._generation_options.simulation_type == SimulationType.LiDAR: + if self._generation_options.simulation_type == SimulationType.LiDAR or self._generation_options.simulation_type == SimulationType.Both: scan_tags = [] if isinstance(parameters['lidar']['origin'][0], list): for origin in parameters['lidar']['origin']: @@ -173,7 +173,7 @@ def _convert_options_to_xml(self): self._canopy + "Parameters": parameters['canopy'], 'Ground': parameters['Ground']} xml_params = {'canopygenerator': canopy_parameters} - if self._generation_options.simulation_type == SimulationType.RGB: + if self._generation_options.simulation_type == SimulationType.RGB or self._generation_options.simulation_type == SimulationType.Both: xml_params[''] = parameters['camera'] # Convert all of the parameters to XML format. @@ -181,7 +181,7 @@ def _convert_options_to_xml(self): root = tree.getroot() # Add the `scan` tags if necessary for LiDAR generation. - if self._generation_options.simulation_type == SimulationType.LiDAR: + if self._generation_options.simulation_type == SimulationType.LiDAR or self._generation_options.simulation_type == SimulationType.Both: for scan_tag in scan_tags: # noqa scan_tag_contents = ET.parse( io.StringIO(dict2xml({'scan': scan_tag}))).getroot() diff --git a/agml/synthetic/options.py b/agml/synthetic/options.py index 229a1e0d..3ee76274 100644 --- a/agml/synthetic/options.py +++ b/agml/synthetic/options.py @@ -35,6 +35,7 @@ class SimulationType(Enum): """The simulation render (RGB vs. LiDAR) that is generated.""" RGB: str = "rgb" LiDAR: str = "lidar" + Both: str = "rgb lidar" NumberOrMaybeList = TypeVar('NumberOrMaybeList', Number, List[Number]) diff --git a/agml/synthetic/synthetic_data_generation/generate.cpp b/agml/synthetic/synthetic_data_generation/generate.cpp index b39a0700..8b5801d9 100644 --- a/agml/synthetic/synthetic_data_generation/generate.cpp +++ b/agml/synthetic/synthetic_data_generation/generate.cpp @@ -12,7 +12,7 @@ struct SyntheticAnnotationConfig { public: int num_images; vector annotation_type; - string simulation_type; + vector simulation_type; vector labels; string xml_path; string output_path; @@ -53,7 +53,14 @@ void SyntheticAnnotationConfig::load_config(const char* path) { } this->annotation_type.push_back(line); } else if (i == 2) { - this->simulation_type = line; + string delimeter = " "; size_t pos; + vector simulation_type; + while ((pos = line.find(' ')) != string::npos) + { + this -> simulation_type.push_back(line.substr(0,pos)); + line.erase(0, pos + delimeter.length()); + } + this->simulation_type.push_back(line); } else if (i == 3) { string delimeter = " "; size_t pos; vector labels; @@ -150,7 +157,7 @@ int main(int argc, char** argv) { SyntheticAnnotation annotation(&context); // Choose either the LiDAR or RGB image simulation. - if (config.simulation_type == "lidar") { + if (!config.simulation_type.empty() && config.simulation_type[1] == "lidar") { // Get the UUID of all the elements on the scene vector UUID_trunk = cgen.getTrunkUUIDs(); vector UUID_shoot = cgen.getBranchUUIDs(); @@ -190,7 +197,9 @@ int main(int argc, char** argv) { string cloud_export = this_image_dir + "/" + string("point_cloud_" + to_string(i) + ".xyz"); std::cout << "Writing LiDAR Point cloud to " << cloud_export << " " << std::endl; lidarcloud.exportPointCloud(cloud_export.c_str()); - } else { + } + if (!config.simulation_type.empty() && config.simulation_type[0] == "rgb") + { if (!config.annotation_type.empty() && config.annotation_type[0] != "none") { // Set the annotation type based on the configuration. vector va = config.annotation_type; @@ -207,7 +216,7 @@ int main(int argc, char** argv) { // Add labels according to whatever scheme we want. vector vl = config.labels; for (int p = 0; p < cgen.getPlantCount(); p++) { // loop over vines - if (config.simulation_type == "rgb") { + if (!config.simulation_type.empty() && config.simulation_type[0] == "rgb") { if (contains(vl, "trunks")) { annotation.labelPrimitives(cgen.getTrunkUUIDs(p), "trunks"); } diff --git a/agml/viz/boxes.py b/agml/viz/boxes.py index 9a51279d..096cbe94 100644 --- a/agml/viz/boxes.py +++ b/agml/viz/boxes.py @@ -110,6 +110,8 @@ def annotate_object_detection(image, "either `bbox` or `bboxes` for bounding boxes.") if bbox_format is not None: bboxes = convert_bbox_format(bboxes, bbox_format) + if labels is None: + labels = [0] * len(bboxes) # Run a few final checks in order to ensure data is formatted properly. image = format_image(image, mask = False) diff --git a/docs/datasets/ghai_strawberry_fruit_detection.md b/docs/datasets/ghai_strawberry_fruit_detection.md new file mode 100644 index 00000000..a9cfcae0 --- /dev/null +++ b/docs/datasets/ghai_strawberry_fruit_detection.md @@ -0,0 +1,23 @@ + +# `ghai_strawberry_fruit_detection` + +## Dataset Metadata + +| Metadata | Value | +| --- | --- | +| **Classes** | Bud, Calyx, Detached Fruit, Flower, Large green, Leaf, Ripe fruit, Small Green, Stem, Unripe fruit | +| **Machine Learning Task** | object_detection | +| **Agricultural Task** | crop_detection | +| **Location** | United States, North America | +| **Sensor Modality** | RGB | +| **Real or Synthetic** | real | +| **Platform** | handheld/ground | +| **Input Data Format** | JPG | +| **Annotation Format** | coco_json | +| **Number of Images** | 500 | +| **Documentation** | https://github.com/AxisAg/GHAIDatasets/blob/main/datasets/strawberry.md | + + +## Examples + +![Example Images for ghai_strawberry_fruit_detection](https://github.com/Project-AgML/AgML/blob/main/docs/sample_images/ghai_strawberry_fruit_detection_examples.png) \ No newline at end of file diff --git a/docs/sample_images/ghai_strawberry_fruit_detection_examples.png b/docs/sample_images/ghai_strawberry_fruit_detection_examples.png new file mode 100644 index 00000000..0cf7831d Binary files /dev/null and b/docs/sample_images/ghai_strawberry_fruit_detection_examples.png differ