diff --git a/.flake8 b/.flake8 new file mode 100644 index 0000000..cfee739 --- /dev/null +++ b/.flake8 @@ -0,0 +1,16 @@ +[flake8] + +# E402 module level import not at top of file +# E501 line too long (83 > 79 characters) +# F821 undefined name '_' + +exclude = .git + +max-line-length = 87 + +per-file-ignores = + ./m.neural_network.preparedata/m.neural_network.preparedata.py: F821 + ./m.neural_network.preparedata/m.neural_network.preparedata.py: E501 + ./m.neural_network.preparedata.worker_nullcells/m.neural_network.preparedata.worker_nullcells.py: F821 + ./m.neural_network.preparedata.worker_nullcells/m.neural_network.preparedata.worker_nullcells.py: E501 + ./m.neural_network.preparedata.worker_export/m.neural_network.preparedata.worker_export.py: E501 diff --git a/.github/workflows/grass-manual.yml b/.github/workflows/grass-manual.yml new file mode 100644 index 0000000..94b4794 --- /dev/null +++ b/.github/workflows/grass-manual.yml @@ -0,0 +1,7 @@ +on: + push: + branches: [ main ] + +jobs: + grass-manual: + uses: mundialis/github-workflows/.github/workflows/grass-manual.yml@main diff --git a/.github/workflows/linting.yml b/.github/workflows/linting.yml new file mode 100644 index 0000000..73bef46 --- /dev/null +++ b/.github/workflows/linting.yml @@ -0,0 +1,15 @@ +--- +name: Linting and code quality check + +on: + push: + branches: + - main + - develop + pull_request: + +jobs: + lint: + uses: mundialis/github-workflows/.github/workflows/linting.yml@main + with: + VALIDATE_HTML: false \ No newline at end of file diff --git a/.github/workflows/post-pr-reviews.yml b/.github/workflows/post-pr-reviews.yml new file mode 100644 index 0000000..25b519b --- /dev/null +++ b/.github/workflows/post-pr-reviews.yml @@ -0,0 +1,12 @@ +--- +name: Post PR code suggestions + +on: + workflow_run: + workflows: ["Linting and code quality check"] + types: + - completed + +jobs: + post-pr-reviews: + uses: mundialis/github-workflows/.github/workflows/post-pr-reviews.yml@main \ No newline at end of file diff --git a/.pylintrc b/.pylintrc new file mode 100644 index 0000000..ef1da64 --- /dev/null +++ b/.pylintrc @@ -0,0 +1,48 @@ +# For documentation about this config, see +# https://pylint.readthedocs.io/en/stable/user_guide/configuration/all-options.html (as of writing, version 2.17.4) + +[MAIN] + +jobs=0 # Default: 1 + +load-plugins= + pylint.extensions.broad_try_clause + +recursive=yes # Default: False + +# reports=yes # Default: False + +# score=no # Default: True + +# To disable more rules, see output of pylint. E.g. +# [...] C0301: Line too long (89/80) (line-too-long) +# can be suppressed with either disable=line-too-long or disable=C +# It is also possible to ignore a specific line by adding +# # pylint: disable=broad-exception-caught +# above the line causing the lint error +disable= + W, ; all Warnings are allowed to fail + import-error, ; To suppress e.g "Unable to import 'grass.script" + missing-module-docstring, ; we use the GRASS GIS header + R, ; refactoring + design recommendations + consider-using-enumerate, ; to supress consider using enumerate + line-too-long, ; >80 + +[BASIC] +# for global variable setting +good-names=keep_data,download_dir,gisdbase,tgtgisrc,tmploc,srcgisrc + +[BROAD_TRY_CLAUSE] +max-try-statements=4 # Default: 1 + +[FORMAT] +max-line-length=80 # Default: 100 +max-module-lines=800 # Default: 1000 +ignore-long-lines=.*COPYRIGHT:.* |# . description:.*|\s*(# )?.*http.:\/\/\S+?|# %%* + +[MESSAGES CONTROL] +# E0606 (possibly-used-before-assignment): to set options and flags at the bottom of the file without pre-initialization +disable=E0606 + +[VARIABLES] +additional-builtins=_ # Default: () diff --git a/.pylintrc_allowed_to_fail b/.pylintrc_allowed_to_fail new file mode 100644 index 0000000..e00ddf0 --- /dev/null +++ b/.pylintrc_allowed_to_fail @@ -0,0 +1,57 @@ +# For documentation about this config, see +# https://pylint.readthedocs.io/en/stable/user_guide/configuration/all-options.html (as of writing, version 2.17.4) + +[MAIN] + +exit-zero=yes + +jobs=0 # Default: 1 + +load-plugins= + pylint.extensions.broad_try_clause + +recursive=yes # Default: False + +# score=no # Default: True + +[BASIC] +# for global variable setting +good-names=keep_data,download_dir,gisdbase,tgtgisrc,tmploc,srcgisrc + +[BROAD_TRY_CLAUSE] +max-try-statements=4 # Default: 1 + +[FORMAT] +max-line-length=80 # Default: 100 +max-module-lines=800 # Default: 1000 +ignore-long-lines=.*COPYRIGHT:.* |# . description:.*|\s*(# )?.*http.:\/\/\S+?|# %%* + +[MESSAGES CONTROL] +# E0606 (possibly-used-before-assignment): to set options and flags at the bottom of the file without pre-initialization +disable=E0606 + +[VARIABLES] +additional-builtins=_ # Default: () + + +; [DESIGN] + +; # Maximum number of arguments for function / method. +; # Default: 5 +; max-args=9 + +; # Maximum number of attributes for a class (see R0902). +; # Default: 7 +; max-attributes=11 + +; # Maximum number of branch for function / method body. +; # Default: 12 +; max-branches=15 + +; # Maximum number of locals for function / method body. +; # Default: 15 +; max-locals=19 + +; # Maximum number of return / yield for function / method body. +; # Default: 6 +; max-returns=11 diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..334e66a --- /dev/null +++ b/Makefile @@ -0,0 +1,17 @@ +MODULE_TOPDIR = ../.. + +PGM = m.neural_network + +# note: to deactivate a module, just place a file "DEPRECATED" into the subdir +ALL_SUBDIRS := ${sort ${dir ${wildcard */.}}} +DEPRECATED_SUBDIRS := ${sort ${dir ${wildcard */DEPRECATED}}} +RM_SUBDIRS := bin/ docs/ etc/ scripts/ +SUBDIRS_1 := $(filter-out $(DEPRECATED_SUBDIRS), $(ALL_SUBDIRS)) +SUBDIRS := $(filter-out $(RM_SUBDIRS), $(SUBDIRS_1)) + +include $(MODULE_TOPDIR)/include/Make/Dir.make + +default: parsubdirs htmldir + +install: installsubdirs + $(INSTALL_DATA) $(PGM).html $(INST_DIR)/docs/html/ diff --git a/README.md b/README.md new file mode 100644 index 0000000..b80a7f5 --- /dev/null +++ b/README.md @@ -0,0 +1,10 @@ +# m.neural_network - Toolset for creating training data and training a neural network + +For now the toolset only includes add-ons for data preparation for training data creation. + +The m.neural_network toolset consists of following modules: +* m.neural_network.preparedata: prepare training data as first step for the process of creating a neural network. + * m.neural_network.preparedata.worker_nullsells: Worker module for m.neural_network.preparedata to check null cells + * m.neural_network.preparedata.worker_export: Worker module for m.neural_network.preparedata to export data +* m.neural_network.preparetraining: prepare training data for use in model training + * m.neural_network.preparetraining.worker: Worker module for m.neural_network.preparetraining to check and rasterize label data \ No newline at end of file diff --git a/m.neural_network.html b/m.neural_network.html new file mode 100644 index 0000000..e98985b --- /dev/null +++ b/m.neural_network.html @@ -0,0 +1,53 @@ + + + +m.neural_network toolset - GRASS GIS manual + + + + + + + +
+ +GRASS logo +
+ +

NAME

+ +m.neural_network - GRASS GIS addons to train and apply a neural network. + +

KEYWORDS

+raster, vector + + +

DESCRIPTION

+ +The m.neural_network toolset consists of several modules. + + + +

REQUIREMENTS

+ +The following Python libraries are needed. + + + +

AUTHORS

+ +Anika Weinmann, mundialis GmbH & Co. KG, weinmann at mundialis.de +

Guido Riembauer, mundialis GmbH & Co. KG, riembauer at mundialis.de

+

Victoria-Leandra Brunn, mundialis GmbH & Co. KG, brunn at mundialis.de

+ +
+ + + diff --git a/m.neural_network.preparedata.worker_export/Makefile b/m.neural_network.preparedata.worker_export/Makefile new file mode 100644 index 0000000..9dcef3d --- /dev/null +++ b/m.neural_network.preparedata.worker_export/Makefile @@ -0,0 +1,7 @@ +MODULE_TOPDIR = ../.. + +PGM = m.neural_network.preparedata.worker_export + +include $(MODULE_TOPDIR)/include/Make/Script.make + +default: script \ No newline at end of file diff --git a/m.neural_network.preparedata.worker_export/m.neural_network.preparedata.worker_export.html b/m.neural_network.preparedata.worker_export/m.neural_network.preparedata.worker_export.html new file mode 100644 index 0000000..7ea08a0 --- /dev/null +++ b/m.neural_network.preparedata.worker_export/m.neural_network.preparedata.worker_export.html @@ -0,0 +1,14 @@ +

DESCRIPTION

+ +m.neural_network.preparedata.worker_export is used within m.neural_network.preparedata to export data in parallel. + +

SEE ALSO

+ + +g.region, +r.univar + + +

AUTHORS

+Anika Weinmann, mundialis GmbH & Co. KG
+

Guido Riembauer, mundialis GmbH & Co. KG

\ No newline at end of file diff --git a/m.neural_network.preparedata.worker_export/m.neural_network.preparedata.worker_export.py b/m.neural_network.preparedata.worker_export/m.neural_network.preparedata.worker_export.py new file mode 100644 index 0000000..452bd3b --- /dev/null +++ b/m.neural_network.preparedata.worker_export/m.neural_network.preparedata.worker_export.py @@ -0,0 +1,289 @@ +#!/usr/bin/env python3 +"""############################################################################ +# +# MODULE: m.neural_network.preparedata.worker_export +# AUTHOR(S): Guido Riembauer, Anika Weinmann +# PURPOSE: Worker module for m.neural_network.preparedata to export data +# COPYRIGHT: (C) 2024 by mundialis GmbH & Co. KG and the GRASS Development +# Team. +# +# This program is free software under the GNU General Public +# License (v3). Read the file COPYING that comes with GRASS +# for details. +# +############################################################################# +""" + +# %Module +# % description: Worker module for m.neural_network.preparedata to export data. +# % keyword: raster +# % keyword: export +# % keyword: segmentation +# %end + +# %option G_OPT_R_INPUTS +# % key: image_bands +# % label: The names of imagery raster bands, e.g. for DOPs RGBI raster bands +# % description: The first raster defines the output resolution +# % guisection: Input +# %end + +# %option G_OPT_R_INPUT +# % key: ndsm +# % label: Name of the nDSM raster +# % answer: ndsm +# % guisection: Input +# %end + +# %option G_OPT_V_INPUT +# % key: reference +# % required: no +# % label: Name of the reference vector map +# % guisection: Optional input +# %end + +# %option +# % key: segmentation_minsize +# % type: integer +# % required: no +# % label: Minimum number of cells in a segment +# % answer: 80 +# % guisection: Optional input +# %end + +# %option +# % key: segmentation_threshold +# % type: double +# % required: no +# % label: Difference threshold between 0 and 1 for the segments +# % description: Threshold = 0 merges only identical segments; threshold = 1 merges all +# % answer: 0.3 +# % guisection: Optional input +# %end + +# %option G_OPT_M_DIR +# % key: output_dir +# % multiple: no +# % label: Directory where the prepared data should be stored +# % description: The directory will be split into train and apply +# % guisection: Output +# %end + +# %option +# % key: tile_name +# % type: string +# % required: yes +# % multiple: no +# % key_desc: name +# % label: Unique Name of the tile +# %end + +# %option +# % key: new_mapset +# % type: string +# % required: yes +# % multiple: no +# % label: Name for new mapset +# %end + +# %flag +# % key: t +# % label: Export reference or segmentation data as training data suggestion +# %end + +import os +import shutil + +import grass.script as grass +from grass.pygrass.utils import get_lib_path +from grass.script.vector import vector_info_topo +from grass_gis_helpers.mapset import switch_to_new_mapset + +EXPORT_PARAM = { + "format": "GTiff", + "flags": "mc", + "createopt": "COMPRESS=LZW,TILED=YES,BIGTIFF=YES", + "overviews": 5, + "quiet": True, +} +NEWGISRC = None +GISRC = None +ID = grass.tempname(8) +NEW_MAPSET = None + + +def cleanup() -> None: + """Clean up function switching mapsets and deleting the new one.""" + grass.utils.try_remove(NEWGISRC) + os.environ["GISRC"] = GISRC + # delete the new mapset (doppelt haelt besser) + gisenv = grass.gisenv() + gisdbase = gisenv["GISDBASE"] + location = gisenv["LOCATION_NAME"] + mapset_dir = os.path.join(gisdbase, location, NEW_MAPSET) + if os.path.isdir(mapset_dir): + shutil.rmtree(mapset_dir) + + +def main() -> None: + """Export tiles and training data suggestion.""" + global NEW_MAPSET, NEWGISRC, GISRC + + NEW_MAPSET = options["new_mapset"] + tile_name = options["tile_name"] + image_bands = options["image_bands"].split(",") + ndsm = options["ndsm"] + reference = options["reference"] + segmentation_minsize = int(options["segmentation_minsize"]) + segmentation_threshold = float(options["segmentation_threshold"]) + output_dir = options["output_dir"] + tr_flag = flags["t"] + + # get addon etc path + etc_path = get_lib_path(modname="m.neural_network.preparedata") + if etc_path is None: + grass.fatal("Unable to find qml files!") + + # make new output directory + if not os.path.isdir(output_dir): + os.makedirs(output_dir) + + # switch to the new mapset + GISRC, NEWGISRC, old_mapset = switch_to_new_mapset(NEW_MAPSET, new=False) + + if ndsm and "@" not in ndsm: + ndsm += f"@{old_mapset}" + if reference and "@" not in reference: + reference += f"@{old_mapset}" + for num in range(len(image_bands)): + if "@" not in image_bands[num]: + image_bands[num] += f"@{old_mapset}" + + # image band export + image_file = os.path.join(output_dir, f"image_{tile_name}.tif") + grass.run_command( + "i.group", + group="image_bands", + input=image_bands, + quiet=True, + ) + grass.run_command( + "r.out.gdal", + input="image_bands", + output=image_file, + **EXPORT_PARAM, + ) + + # ndom export + grass.run_command( + "r.out.gdal", + input=ndsm, + output=os.path.join(output_dir, f"ndsm_{tile_name}.tif"), + **EXPORT_PARAM, + ) + + # nDSM scaled + export (cut to [0 30] and rescale to [1 255])) + ndsm_sc_file = os.path.join(output_dir, f"ndsm_1_255_{tile_name}.tif") + ex_cut = f"ndsm_cut = if( {ndsm} >= 30, 30, if( {ndsm} < 0, 0, {ndsm} ) )" + grass.run_command("r.mapcalc", expression=ex_cut) + ex_scale = "ndsm_scaled = int((ndsm_cut / 30. * 254.) + 1)" + grass.run_command("r.mapcalc", expression=ex_scale) + grass.run_command( + "r.out.gdal", + input="ndsm_scaled", + output=ndsm_sc_file, + type="Byte", + **EXPORT_PARAM, + ) + + # segmentation or clip reference data + if tr_flag: + label_file = os.path.join(output_dir, f"label_{tile_name}.gpkg") + create_seg = False + if reference: + grass.run_command( + "v.clip", + input=reference, + output="reference_clipped", + flags="r", + quiet=True, + ) + if vector_info_topo("reference_clipped")["centroids"] == 0: + create_seg = True + else: + grass.run_command( + "v.db.addcolumn", + map="reference_clipped", + columns="class_number INTEGER", + quiet=True, + ) + grass.run_command( + "v.db.update", + map="reference_clipped", + column="class_number", + value=0, + quiet=True, + ) + grass.run_command( + "v.out.ogr", + input="reference_clipped", + output=label_file, + flags="s", + quiet=True, + ) + else: + create_seg = True + if create_seg: + ndsm_range = grass.parse_command( + "r.info", + map="ndsm_scaled", + flags="r", + ) + if ndsm_range["min"] != ndsm_range["max"]: + grass.run_command( + "i.group", + group="image_bands", + input="ndsm_scaled", + quiet=True, + ) + grass.run_command( + "i.segment", + group="image_bands", + output="segments", + threshold=segmentation_threshold, + minsize=segmentation_minsize, + memory=1000, + quiet=True, + ) + grass.run_command( + "r.to.vect", + input="segments", + output="segments", + type="area", + col="class_number", + flags="s", + quiet=True, + ) + grass.run_command( + "v.db.update", + map="segments", + column="class_number", + value=0, + quiet=True, + ) + grass.run_command( + "v.out.ogr", + input="segments", + output=label_file, + flags="s", + quiet=True, + ) + # copy qml file + qml_src_file = os.path.join(etc_path, "qml", "label.qml") + qml_dest_file = os.path.join(output_dir, f"label_{tile_name}.qml") + shutil.copyfile(qml_src_file, qml_dest_file) + + +if __name__ == "__main__": + options, flags = grass.parser() + main() diff --git a/m.neural_network.preparedata.worker_nullcells/Makefile b/m.neural_network.preparedata.worker_nullcells/Makefile new file mode 100644 index 0000000..ce3bf4a --- /dev/null +++ b/m.neural_network.preparedata.worker_nullcells/Makefile @@ -0,0 +1,7 @@ +MODULE_TOPDIR = ../.. + +PGM = m.neural_network.preparedata.worker_nullcells + +include $(MODULE_TOPDIR)/include/Make/Script.make + +default: script \ No newline at end of file diff --git a/m.neural_network.preparedata.worker_nullcells/m.neural_network.preparedata.worker_nullcells.html b/m.neural_network.preparedata.worker_nullcells/m.neural_network.preparedata.worker_nullcells.html new file mode 100644 index 0000000..5588ee5 --- /dev/null +++ b/m.neural_network.preparedata.worker_nullcells/m.neural_network.preparedata.worker_nullcells.html @@ -0,0 +1,14 @@ +

DESCRIPTION

+ +m.neural_network.preparedata.worker_nullcells is used within m.neural_network.preparedata to analyse the number of null cells in parallel. + +

SEE ALSO

+ + +g.region, +r.univar + + +

AUTHORS

+Anika Weinmann, mundialis GmbH & Co. KG
+

Guido Riembauer, mundialis GmbH & Co. KG

\ No newline at end of file diff --git a/m.neural_network.preparedata.worker_nullcells/m.neural_network.preparedata.worker_nullcells.py b/m.neural_network.preparedata.worker_nullcells/m.neural_network.preparedata.worker_nullcells.py new file mode 100644 index 0000000..a2cb37c --- /dev/null +++ b/m.neural_network.preparedata.worker_nullcells/m.neural_network.preparedata.worker_nullcells.py @@ -0,0 +1,168 @@ +#!/usr/bin/env python3 +"""############################################################################ +# +# MODULE: m.neural_network.preparedata.worker_nullcells +# AUTHOR(S): Guido Riembauer, Anika Weinmann +# PURPOSE: Worker module for m.neural_network.preparedata to check null +# cells +# COPYRIGHT: (C) 2024 by mundialis GmbH & Co. KG and the GRASS Development +# Team. +# +# This program is free software under the GNU General Public +# License (v3). Read the file COPYING that comes with GRASS +# for details. +# +############################################################################# +""" + +# %Module +# % description: Worker module for m.neural_network.preparedata to check null cells. +# % keyword: raster +# % keyword: statistics +# %end + +# %option +# % key: n +# % type: string +# % required: no +# % multiple: no +# % key_desc: value +# % description: Value for the northern edge +# % guisection: Bounds +# %end + +# %option +# % key: s +# % type: string +# % required: no +# % multiple: no +# % key_desc: value +# % description: Value for the southern edge +# % guisection: Bounds +# %end + +# %option +# % key: e +# % type: string +# % required: no +# % multiple: no +# % key_desc: value +# % description: Value for the eastern edge +# % guisection: Bounds +# %end + +# %option +# % key: w +# % type: string +# % required: no +# % multiple: no +# % key_desc: value +# % description: Value for the western edge +# % guisection: Bounds +# %end + +# %option +# % key: res +# % type: string +# % required: no +# % multiple: no +# % key_desc: value +# % description: 2D grid resolution (north-south and east-west) +# % guisection: Resolution +# %end + +# %option G_OPT_R_INPUT +# % key: map +# % label: The name of input raster map +# % guisection: Input +# %end + +# %option +# % key: tile_name +# % type: string +# % required: yes +# % multiple: no +# % key_desc: name +# % label: Unique Name of the tile +# %end + +# %option +# % key: new_mapset +# % type: string +# % required: yes +# % multiple: no +# % label: Name for new mapset +# %end + +import os +import shutil +import sys + +import grass.script as grass +from grass_gis_helpers.mapset import switch_to_new_mapset + +NEWGISRC = None +GISRC = None +ID = grass.tempname(8) +NEW_MAPSET = None + + +def cleanup() -> None: + """Clean up function switching mapsets and deleting the new one.""" + grass.utils.try_remove(NEWGISRC) + os.environ["GISRC"] = GISRC + # delete the new mapset (doppelt haelt besser) + gisenv = grass.gisenv() + gisdbase = gisenv["GISDBASE"] + location = gisenv["LOCATION_NAME"] + mapset_dir = os.path.join(gisdbase, location, NEW_MAPSET) + if os.path.isdir(mapset_dir): + shutil.rmtree(mapset_dir) + + +def main() -> None: + """Check null cells.""" + global NEW_MAPSET, NEWGISRC, GISRC + + NEW_MAPSET = options["new_mapset"] + tile_name = options["tile_name"] + north = options["n"] + south = options["s"] + west = options["w"] + east = options["e"] + res = options["res"] + map = options["map"] + + # switch to the new mapset + GISRC, NEWGISRC, old_mapset = switch_to_new_mapset(NEW_MAPSET) + + # map full name + if "@" not in map: + map += f"@{old_mapset}" + + # set region + grass.message(_(f"Set region for tile {tile_name} ...")) + grass.run_command( + "g.region", + n=north, + s=south, + e=east, + w=west, + res=res, + quiet=True, + ) + + # get number of null cells + stats = grass.parse_command( + "r.univar", + map=map, + flags="g", + ) + sys.stdout.write( + f"For tile {tile_name} the number of null cells is: {stats['null_cells']}\n", + ) + + +if __name__ == "__main__": + options, flags = grass.parser() + main() diff --git a/m.neural_network.preparedata/Makefile b/m.neural_network.preparedata/Makefile new file mode 100644 index 0000000..da01e06 --- /dev/null +++ b/m.neural_network.preparedata/Makefile @@ -0,0 +1,14 @@ +MODULE_TOPDIR = ../.. + +PGM = m.neural_network.preparedata + +include $(MODULE_TOPDIR)/include/Make/Script.make + +QML_ETC = $(patsubst %,$(ETC)/$(PGM)/%,$(wildcard qml/*.qml)) +mkdir: + $(MKDIR) $(ETC)/$(PGM)/qml + +default: script mkdir $(QML_ETC) + +$(ETC)/$(PGM)/%: % | $(ETC)/$(PGM) + $(INSTALL_DATA) $< $@ diff --git a/m.neural_network.preparedata/m.neural_network.preparedata.html b/m.neural_network.preparedata/m.neural_network.preparedata.html new file mode 100644 index 0000000..8ef3fea --- /dev/null +++ b/m.neural_network.preparedata/m.neural_network.preparedata.html @@ -0,0 +1,27 @@ +

DESCRIPTION

+ +m.neural_network.preparedata prepares tiles for the labeling process as part of the +training data preparation of a neural network using DOPs and nDSM as input data. Additionally, +a tile index containing information about the labeled status is created. + +

EXAMPLES

+ +

Prepare the labeling of training data for a neural network with a tile_size of 512

+ +
+m.neural_network.preparedata image_bands=top_red_02,top_green_02,top_blue_02,top_nir_02 ndsm=ndsm tile_size=512 output_dir=/mnt/data/ nprocs=7
+
+ + +

SEE ALSO

+ + +g.region, +r.univar, + + +

AUTHORS

+ +Anika Weinmann, mundialis GmbH & Co. KG
+Guido Riembauer, mundialis GmbH & Co. KG
+Victoria-Leandra Brunn, mundialis GmbH & Co. KG diff --git a/m.neural_network.preparedata/m.neural_network.preparedata.py b/m.neural_network.preparedata/m.neural_network.preparedata.py new file mode 100644 index 0000000..0662c08 --- /dev/null +++ b/m.neural_network.preparedata/m.neural_network.preparedata.py @@ -0,0 +1,411 @@ +#!/usr/bin/env python3 +"""############################################################################ +# +# MODULE: m.neural_network.preparedata +# AUTHOR(S): Anika Weinmann, Guido Riembauer and Victoria-Leandra Brunn +# PURPOSE: Prepare training data as first step for the process of +# creating a neural network. +# +# COPYRIGHT: (C) 2024 by mundialis and the GRASS Development Team +# +# This program is free software under the GNU General Public +# License (v3). Read the file COPYING that comes with GRASS +# for details. +# +############################################################################# +""" + +# %Module +# % description: Prepare training data for creating a neuronal network +# % keyword: raster +# % keyword: vector +# % keyword: export +# % keyword: neural network +# % keyword: preparation +# %end + +# %option G_OPT_R_INPUTS +# % key: image_bands +# % label: The names of imagery raster bands, e.g. for DOPs RGBI raster bands +# % description: The first raster defines the output resolution +# % guisection: Input +# %end + +# %option G_OPT_R_INPUT +# % key: ndsm +# % label: Name of the nDSM raster +# % answer: ndsm +# % guisection: Input +# %end + +# %option G_OPT_V_INPUT +# % key: reference +# % required: no +# % label: Name of the reference vector map +# % guisection: Optional input +# %end + +# %option +# % key: tile_size +# % type: integer +# % required: yes +# % label: Size of the created tiles in cells +# % description: Creates tiles of size , +# % answer: 512 +# % guisection: Optional input +# %end + +# %option +# % key: tile_overlap +# % type: integer +# % required: yes +# % label: Overlap of the created tiles in cells +# % answer: 128 +# % guisection: Optional input +# %end + +# %option +# % key: segmentation_minsize +# % type: integer +# % required: no +# % label: Minimum number of cells in a segment +# % answer: 80 +# % guisection: Optional input +# %end + +# %option +# % key: segmentation_threshold +# % type: double +# % required: no +# % label: Difference threshold between 0 and 1 for the segments +# % description: Threshold = 0 merges only identical segments; threshold = 1 merges all +# % answer: 0.3 +# % guisection: Optional input +# %end + +# %option +# % key: train_percentage +# % type: integer +# % required: no +# % label: The percentage of data set for which the training data should be prepared +# % answer: 30 +# % guisection: Optional input +# %end + +# %option G_OPT_M_DIR +# % key: output_dir +# % multiple: no +# % label: Directory where the prepared data should be stored +# % description: The directory will be split into train and apply +# % guisection: Output +# %end + +# %option G_OPT_M_NPROCS +# %end + +import atexit +import json +import os +import random +import shutil + +import grass.script as grass +from grass.pygrass.modules import Module, ParallelModuleQueue +from grass.pygrass.utils import get_lib_path +from grass_gis_helpers.cleanup import general_cleanup +from grass_gis_helpers.general import set_nprocs +from grass_gis_helpers.mapset import verify_mapsets +from grass_gis_helpers.parallel import check_parallel_errors + +# initialize global vars +ID = grass.tempname(8) +rm_files = list() +ORIG_REGION = None +rm_dirs = [] + + +def cleanup() -> None: + """Clean up function calling general clean up from grass_gis_helpers.""" + general_cleanup( + orig_region=ORIG_REGION, + rm_dirs=rm_dirs, + rm_files=rm_files, + ) + + +def export_tindex(output_dir, geojson_dict, etc_path) -> None: + """Export tile index from geojson_dict. + + Export of tile index and verification of correct gpkg file. + + Args: + output_dir (str): The output directory where the tile index should be + exported + geojson_dict (dict): The dictionary with the tile index + etc_path (str): The addon etc path + + """ + geojson_file = os.path.join(output_dir, "tindex.geojson") + gpkg_file = os.path.join(output_dir, "tindex.gpkg") + rm_files.append(geojson_file) + with open(geojson_file, "w", encoding="utf-8") as f: + json.dump(geojson_dict, f, indent=4) + # create GPKG from GeoJson + stream = os.popen(f"ogr2ogr {gpkg_file} {geojson_file}") + stream.read() + + # verify + print("Verifying vector tile index:") + stream = os.popen(f"ogrinfo -so -al {gpkg_file}") + tindex_verification = stream.read() + print(tindex_verification) + + # copy qml file + qml_src_file = os.path.join(etc_path, "qml", "tindex.qml") + qml_dest_file = os.path.join(output_dir, "tindex.qml") + shutil.copyfile(qml_src_file, qml_dest_file) + + +def main() -> None: + """Prepare training data. + + Main function for data preparation. Creating tileindex, calling + export_tindex for its export. Creating tiles for label process + with DOPs and nDOM split in train and apply tiles. Exporting tiles + regarding to tileindex. + """ + global ORIG_REGION, rm_files + + image_bands = options["image_bands"].split(",") + ndsm = options["ndsm"] + reference = options["reference"] + tile_size = int(options["tile_size"]) + tile_overlap = int(options["tile_overlap"]) + segmentation_minsize = int(options["segmentation_minsize"]) + segmentation_threshold = float(options["segmentation_threshold"]) + train_percentage = int(options["train_percentage"]) + output_dir = options["output_dir"] + nprocs = set_nprocs(int(options["nprocs"])) + + # get addon etc path + etc_path = get_lib_path(modname="m.neural_network.preparedata") + if etc_path is None: + grass.fatal("Unable to find qml files!") + + # get location infos + gisenv = grass.gisenv() + cur_mapset = gisenv["MAPSET"] + gisdbase = gisenv["GISDBASE"] + location = gisenv["LOCATION_NAME"] + + # save orginal region + ORIG_REGION = f"orig_region_{ID}" + grass.run_command("g.region", save=ORIG_REGION, quiet=True) + + # set region + grass.run_command("g.region", raster=image_bands[0], quiet=True) + reg = grass.region() + + # parameter for tiles + res = reg["nsres"] + tile_size_map_units = tile_size * res + tile_overlap_map_units = tile_overlap * res + + # create GeoJson for tindex + epsg_code = grass.parse_command("g.proj", flags="g")["srid"].split(":")[-1] + + geojson_dict = { + "type": "FeatureCollection", + "name": "tindex", + "crs": { + "type": "name", + "properties": {"name": f"urn:ogc:def:crs:EPSG::{epsg_code}"}, + }, + "features": [], + } + + # start values + north = reg["n"] + num_tiles_row = round(reg["rows"] / (tile_size - tile_overlap) + 0.5) + num_tiles_col = round(reg["cols"] / (tile_size - tile_overlap) + 0.5) + num_zeros = max([len(str(num_tiles_row)), len(str(num_tiles_col))]) + + # loop over tiles + queue = ParallelModuleQueue(nprocs=nprocs) + num = 0 + try: + for row in range(num_tiles_row): + west = reg["w"] + for col in range(num_tiles_col): + grass.message( + _(f"Checking for null cells: row {row} - col {col}"), + ) + row_str = str(row).zfill(num_zeros) + col_str = str(col).zfill(num_zeros) + tile_id = f"{row_str}{col_str}" + tile_name = f"tile_{row_str}_{col_str}" + new_mapset = f"tmp_mapset_{ID}_{tile_id}" + rm_dirs.append(os.path.join(gisdbase, location, new_mapset)) + + # set tile region + south = north - tile_size_map_units + east = west + tile_size_map_units + + # worker to request the null cells to get the info if the tile + # can be a training data tile + worker_nullcells = Module( + "m.neural_network.preparedata.worker_nullcells", + n=north, + s=south, + e=east, + w=west, + res=res, + map=image_bands[0], + tile_name=num, + new_mapset=new_mapset, + run_=False, + ) + worker_nullcells.stdout_ = grass.PIPE + worker_nullcells.stderr_ = grass.PIPE + queue.put(worker_nullcells) + + # create tile for tindex + feat = { + "type": "Feature", + "properties": { + "fid": tile_id, + "name": tile_name, + "path": "", + "training": "false", + }, + "geometry": { + "type": "Polygon", + "coordinates": [ + [ + [west, north], + [east, north], + [east, south], + [west, south], + [west, north], + ], + ], + }, + } + geojson_dict["features"].append(feat) + + # set region west for next tile + west += tile_size_map_units - tile_overlap_map_units + num += 1 + north -= tile_size_map_units - tile_overlap_map_units + queue.wait() + except Exception: + check_parallel_errors(queue) + + verify_mapsets(cur_mapset) + + possible_tr_data = [] + tiles_with_data = [] + tiles_wo_data = [] + for proc in queue.get_finished_modules(): + stdout_strs = proc.outputs["stdout"].value.strip().split(":") + null_cells = int(stdout_strs[1].strip()) + num = int(stdout_strs[0].split(" ")[2]) + if null_cells == 0: + possible_tr_data.append(num) + if null_cells != tile_size * tile_size: + tiles_with_data.append(num) + else: + tiles_wo_data.append(num) + + # random split into train and apply data tiles + num_tr_tiles = round(train_percentage / 100.0 * len(possible_tr_data)) + random.shuffle(possible_tr_data) + tr_tiles = possible_tr_data[:num_tr_tiles] + ap_tiles = [x for x in tiles_with_data if x not in tr_tiles] + # loop over training data + queue_export_tr = ParallelModuleQueue(nprocs=nprocs) + try: + for i, tr_tile in enumerate(tr_tiles): + tile_name = geojson_dict["features"][tr_tile]["properties"]["name"] + tile_path = os.path.join(output_dir, "train", tile_name) + tile_id = geojson_dict["features"][tr_tile]["properties"]["fid"] + grass.message( + _( + f"Segmenting and/or Exporting: " + f"training tile {i + 1} of {len(tr_tiles)}", + ), + ) + new_mapset = f"tmp_mapset_{ID}_{tile_id}" + # update geojson values + geojson_dict["features"][tr_tile]["properties"][ + "training" + ] = "TODO" + geojson_dict["features"][tr_tile]["properties"]["path"] = tile_path + # worker for export + worker_export_tr = Module( + "m.neural_network.preparedata.worker_export", + image_bands=image_bands, + ndsm=ndsm, + tile_name=tile_name, + reference=reference, + segmentation_minsize=segmentation_minsize, + segmentation_threshold=segmentation_threshold, + output_dir=tile_path, + new_mapset=new_mapset, + flags="t", + run_=False, + ) + worker_export_tr.stdout_ = grass.PIPE + worker_export_tr.stderr_ = grass.PIPE + queue_export_tr.put(worker_export_tr) + queue_export_tr.wait() + except Exception: + check_parallel_errors(queue_export_tr) + verify_mapsets(cur_mapset) + + # loop over apply data + queue_export_ap = ParallelModuleQueue(nprocs=nprocs) + try: + for i, ap_tile in enumerate(ap_tiles): + tile_name = geojson_dict["features"][ap_tile]["properties"]["name"] + tile_path = os.path.join(output_dir, "apply", tile_name) + tile_id = geojson_dict["features"][ap_tile]["properties"]["fid"] + grass.message( + _(f"Exporting: apply tile {i + 1} of {len(ap_tiles)}"), + ) + new_mapset = f"tmp_mapset_{ID}_{tile_id}" + # update jeojson values + geojson_dict["features"][ap_tile]["properties"]["training"] = "no" + geojson_dict["features"][ap_tile]["properties"]["path"] = tile_path + # worker for export + worker_export_ap = Module( + "m.neural_network.preparedata.worker_export", + image_bands=image_bands, + tile_name=tile_name, + ndsm=ndsm, + output_dir=tile_path, + new_mapset=new_mapset, + run_=False, + ) + worker_export_ap.stdout_ = grass.PIPE + queue_export_ap.put(worker_export_ap) + queue_export_ap.wait() + except Exception: + check_parallel_errors(queue_export_ap) + verify_mapsets(cur_mapset) + + # remove tiles without data + tiles_wo_data.reverse() + for num in tiles_wo_data: + del geojson_dict["features"][num] + + # export tindex + export_tindex(output_dir, geojson_dict, etc_path) + + grass.message(_("Prepare data done")) + + +if __name__ == "__main__": + options, flags = grass.parser() + atexit.register(cleanup) + main() diff --git a/m.neural_network.preparedata/qml/label.qml b/m.neural_network.preparedata/qml/label.qml new file mode 100644 index 0000000..7cdaa68 --- /dev/null +++ b/m.neural_network.preparedata/qml/label.qml @@ -0,0 +1,483 @@ + + + + 1 + 1 + 1 + 0 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + 0 + 0 + 1 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + 0 + + + 0 + generatedlayout + + + + + + + + + + + + + + + + + + "label" + + 2 + diff --git a/m.neural_network.preparedata/qml/tindex.qml b/m.neural_network.preparedata/qml/tindex.qml new file mode 100644 index 0000000..c68544d --- /dev/null +++ b/m.neural_network.preparedata/qml/tindex.qml @@ -0,0 +1,682 @@ + + + + 1 + 1 + 1 + 0 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + 0 + 0 + 1 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + 0 + + + 0 + generatedlayout + + + + + + + + + + + + + + + + + + + + + "name" + + 2 + diff --git a/ruff.toml b/ruff.toml new file mode 100644 index 0000000..d7618a8 --- /dev/null +++ b/ruff.toml @@ -0,0 +1,22 @@ + +[lint] +ignore = [ + "A001", # shadowing variables + "ANN001", # missing type annotation + "BLE001", # catch blind exception + "D205", + "E501", # allow > 79 characters + "INT001", + "PLR0912", # allow > 12 branches (if/for/else/...) + "PLR0914", + "PLR0915", + "PLW0602", + "PLW0603", + "PTH103", + "PTH112", + "PTH118", + "PTH123", + "S605", # using shell + "F821", + "D203", + "D213",]