masking outliers + conflicting distributions + incidence angles

interTwin-eu · Dec 18, 2023 · 5d175e3 · 5d175e3
1 parent c66aa3c
commit 5d175e3
Show file tree

Hide file tree

Showing 6 changed files with 503 additions and 125 deletions.
diff --git a/notebooks/1_yeoda_dc.ipynb b/notebooks/1_yeoda_dc.ipynb
diff --git a/notebooks/1_yeoda_dc.qmd b/notebooks/1_yeoda_dc.qmd
@@ -39,8 +39,7 @@ from openeo.processes import ProcessBuilder, array_element, add, multiply, sin,
 The paths to the local data sources define the collections to be loaded in the next steps. In the case of a local openEO instance, we do this by just supplying the paths to the files required for the analysis. 
 
 ```{python}
-ROOT_DATA = "~/shares/radar/Projects/interTwin/08_scratch/open_eo_local/repo/openeo-flood-mapper-local/notebooks/"
-
+ROOT_DATA = ""
 hparam_id = Path(f"{ROOT_DATA}openEO_local/tuw_s1_harpar/S1_CSAR_IWGRDH/SIG0-HPAR/V0M2R3/EQUI7_EU020M/E054N006T3/D080.nc")
 plia_id = Path(f"{ROOT_DATA}openEO_local/s1_parameters/S1_CSAR_IWGRDH/PLIA-TAG/V01R03/EQUI7_EU020M/E054N006T3/PLIA-TAG-MEAN_20200101T000000_20201231T235959__D080_E054N006T3_EU020M_V01R03_S1IWGRDH.nc")
 sig0_id = Path(f"{ROOT_DATA}openEO_local/s1_parameters/S1_CSAR_IWGRDH/SIG0/V1M1R1/EQUI7_EU020M/E054N006T3/SIG0_20180228T043908__VV_D080_E054N006T3_EU020M_V1M1R1_S1AIWGRDH_TUWIEN.nc")
@@ -139,8 +138,10 @@ def bayesian_flood_decision(x: ProcessBuilder) -> ProcessBuilder:
     wbsc = x.array_element(index=2)
     hbsc = x.array_element(index=3)
     
-    f_prob = (1.0 / (std * np.sqrt(2 * np.pi))) * np.exp(-0.5 * (((sig0 - wbsc) / nf_std) ** 2))
-    nf_prob = (1.0 / (nf_std * np.sqrt(2 * np.pi))) * np.exp(-0.5 * (((sig0 - hbsc) / nf_std) ** 2))
+    f_prob = (1.0 / (std * np.sqrt(2 * np.pi))) * np.exp(-0.5 * \
+        (((sig0 - wbsc) / nf_std) ** 2))
+    nf_prob = (1.0 / (nf_std * np.sqrt(2 * np.pi))) * np.exp(-0.5 * \
+        (((sig0 - hbsc) / nf_std) ** 2))
     evidence = (nf_prob * 0.5) + (f_prob * 0.5)
     f_post_prob = (f_prob * 0.5) /  evidence 
     nf_post_prob = (nf_prob * 0.5) /  evidence 
@@ -161,47 +162,151 @@ sig0_dc = sig0_dc.reduce_bands('mean').add_dimension('bands', 'sig0', 'bands')
 Now we can merge all these datacubes, like so:
 
 ```{python}
-decision_in = sig0_dc.merge_cubes(std_dc).merge_cubes(water_bsc_dc).merge_cubes(land_bsc_dc)
+decision_in = sig0_dc. \
+    merge_cubes(std_dc). \
+    merge_cubes(water_bsc_dc). \
+    merge_cubes(land_bsc_dc). \
+    merge_cubes(plia_dc)
+```
+
+
+# Classification
+
+The preprocessed multi-band datacube can then be used as input for the Bayesian flood mapping function that reduces all these bands to just one in the resulting datacube.
+
+```{python}
+flood_dc = decision_in.reduce_bands(bayesian_flood_decision). \
+    add_dimension('bands', 'dec', 'bands')
+flood_dc = flood_dc.merge_cubes(decision_in)
+```
+
+## openEO flood mapping
+
+We can check the results of the openEO by executing the processing steps with `execute()` and by plotting the flood mapping classification.
+
+```{python}
+#| echo: False
+def view_flood_map(df):
+    # selecting a subsection of the data and reprojecting
+    flood_map = df[0, 13070:14000, 12900:14200]
+    flood_map = flood_map.rio.reproject(f"EPSG:4326", nodata=np.nan)
+    # add open streetmap
+    request = cimgt.OSM()
+    # initialize figure
+    fig = plt.figure(figsize=(13,9))
+    axis = plt.axes(projection=ccrs.PlateCarree(), frameon=True)
+    axis.add_image(request, 15)
+    # add the data
+    flood_map = flood_map.plot(
+        ax=axis,
+        transform=ccrs.PlateCarree(),
+        levels=[0, 1, 2],
+        colors=["#00000000", "#ff0000"],
+        add_colorbar=False
+    )
+    # legend and title
+    cbar = fig.colorbar(flood_map, ax=axis, location="bottom", shrink=0.6)
+    cbar.ax.get_xaxis().set_ticks([])
+    for j, lab in enumerate(['non-flood','flood']):
+        cbar.ax.text((2 * j + 1) / 2.0, 0.5, lab, ha='center', va='center')
+    cbar.ax.get_xaxis().labelpad = 10
+    tk = fig.gca()
+    tk = tk.set_title("Flood map")
+```
+
+```{python}
+#| fig-cap: "OpenEO floodmap - no pre-processing"
+#| results: hide
+flood_decision = flood_dc.execute()
+view_flood_map(flood_decision)
+```
+
+By comparing this figure with the original study [@bauer-marschallinger_satellite-based_2022], we see that the openEO workflow can perform the same operations. There are, however, some differences with the original flood mapping study. These differences relate to the absence of the low sensitivity masking and post-processing steps of the flood probabilities in the openEO workflow. A priori low sensitivity masking removes observations in which situations arise that cause insensitivity to flood conditions for physical, geometric, or sensor-side reasons. Whereas, post-processing removes e.g. the small patches of supposed flooded pixels scattered throughout the image also known as "speckles". These speckles produce a more noisy picture in the openEO example.
+
+## Masking of low sensitivity pixels
+
+We continue by improving our flood map by filtering out observations that we expect to have low sensistivity to flooding based on a predefined set of criteria.
+
+### Masking of Exceeding Incidence Angles
+
+Firstly we mask areas where the incidence angle exceeds the maximum tolerable range of $27\degree$ to $48\degree$. These larger than usual incidence angles are a result of the area's topography as beams reflect from steep slopes. 
+
+```{python}
+mask_ia = (flood_dc.band("PLIA") >= 27) * (flood_dc.band("PLIA") <= 48)
+flood_dc = flood_dc * mask_ia
+```
+
+This results in the following map:
+
+```{python}
+#| fig-cap: "OpenEO floodmap - masking exceeding incidence angles"
+#| results: hide
+#| echo: False
+flood_decision = flood_dc.execute()
+view_flood_map(flood_decision)
+```
+
+### Identification of Conflicting Distributions
+
+We remove values that have already low backscatter values during normal conditions and which do not represent water. Examples of such surfaces are highways, aistro[s. salt panes, or arid sand and/or bedrock. Indetification of such conflicting distriubtion is done by comparing the expected local land distribution (from the harmonic model) with those from the water distribution, if these cannot be distinguished from ech other, the pixel is excluded.
+
+```{python}
+water_bsc_threshold = decision_in.band("wbsc") + 0.5 * 2.754041 # 
+mask_conflict = decision_in.band("hbsc") > water_bsc_threshold
+flood_dc = flood_dc * mask_conflict
+```
+
+Exclusion of these conflicting distributions look as follows:
+
+```{python}
+#| fig-cap: "OpenEO floodmap - masking conflicting distributions + exceeding incidence angles"
+#| results: hide
+#| echo: False
+flood_decision = flood_dc.execute()
+view_flood_map(flood_decision)
+```
+
+
+### Removal of Measurement Outliers
+
+Extreme backscatter values are yet another source of insensitivity to floods. These outliers are not properly represented by the Bayesian model probabilities.
+
+```{python}
+land_bsc_lower = flood_dc.band("hbsc") - 3 * flood_dc.band("std")
+land_bsc_upper = flood_dc.band("hbsc") + 3 * flood_dc.band("std")
+water_bsc_upper = flood_dc.band("wbsc") + 3 * 2.754041
+
+mask_land_outliers = (flood_dc.band("sig0") > land_bsc_lower) * (flood_dc.band("sig0") < land_bsc_upper)
+mask_water_outliers = flood_dc.band("sig0") < water_bsc_upper
+flood_dc = flood_dc * ((mask_land_outliers + mask_water_outliers) > 0)
 ```
 
-This multi-band datacube is then used as input for the Bayesian flood mapping function that reduces all these bands to just one in the resulting datacube.
+Adding this to the prvious masking results in the following map:
 
 ```{python}
-flood_decision = decision_in.reduce_bands(bayesian_flood_decision).execute()
+#| fig-cap: "OpenEO floodmap - masking extreme outliers + conflicting distributions + exceeding incidence angles"
+#| results: hide
+#| echo: False
+flood_decision = flood_dc.execute()
+view_flood_map(flood_decision)
 ```
 
-## OpenEO flood mapping
 
-We can check the results of the openEO defined flood mapping in the following plot.
+### Denial of High Uncertainty on Decision
+
+In some cases the posterior distriubtion is ambiqous as it falls close to a 0.5 probability of flooding (a coin flip). This happens when the probability distributions for water and land backscattering overlap and/or the measured backscatter values falls exactly in the middle of the two distributions. Hence a cutt-off of 0.2 is used to limit the potential of falls positive classifications.
+
+```{python}
+mask_uncertainty = flood_dc > 0.8
+flood_dc = flood_dc * mask_uncertainty
+```
+
+This results in the following floodmap.
 
 ```{python}
-#| fig-cap: "OpenEO floodmap"
+#| fig-cap: "OpenEO floodmap - masking high uncertainty classifications + extreme outliers + conflicting distributions + exceeding incidence angles"
 #| results: hide
-# selecting a subsection of the data and reprojecting
-flood_map = flood_decision[13070:14000, 12900:14200]
-flood_map = flood_map.rio.reproject(f"EPSG:4326", nodata=np.nan)
-# add open streetmap
-request = cimgt.OSM()
-# initialize figure
-fig = plt.figure(figsize=(13,9))
-axis = plt.axes(projection=ccrs.PlateCarree(), frameon=True)
-axis.add_image(request, 15)
-# add the data
-flood_map = flood_map.plot(
-    ax=axis,
-    transform=ccrs.PlateCarree(),
-    levels=[0, 1, 2],
-    colors=["#00000000", "#ff0000"],
-    add_colorbar=False
-)
-# legend and title
-cbar = fig.colorbar(flood_map, ax=axis, location="bottom", shrink=0.6)
-cbar.ax.get_xaxis().set_ticks([])
-for j, lab in enumerate(['non-flood','flood']):
-    cbar.ax.text((2 * j + 1) / 2.0, 0.5, lab, ha='center', va='center')
-cbar.ax.get_xaxis().labelpad = 10
-tk = fig.gca()
-tk = tk.set_title("Flood map")
-```
-
-By comparing this figure with the original study [@bauer-marschallinger_satellite-based_2022], we see that the openEO workflow can perform the same operations. There are, however, some differences with the original flood mapping study. These differences relate to the absence of the low sensitivity masking and post-processing steps of the flood probabilities in the openEO workflow. A priori low sensitivity masking removes observations in which situations arise that cause insensitivity to flood conditions for physical, geometric, or sensor-side reasons. Whereas, post-processing removes e.g. the small patches of supposed flooded pixels scattered throughout the image also known as "speckles". These speckles produce a more noisy picture in the openEO example.
+#| echo: False 
+flood_decision = flood_dc.execute()
+view_flood_map(flood_decision)
+```
diff --git a/notebooks/load_eodc_data.py b/notebooks/load_eodc_data.py
@@ -0,0 +1,8 @@
+import openeo
+backend = "https://openeo.eodc.eu"
+
+connection = openeo.connect(backend)
+connection.authenticate_oidc(provider_id='egi')
+connection.list_collection_ids()
+connection.list_collections()
+connection.list_processes()
diff --git a/src/openeo_flood_mapper_local/restructure_hparam.py b/src/openeo_flood_mapper_local/restructure_hparam.py
@@ -9,7 +9,7 @@
 DATA_VERSION = 'V0M2R3'
 
 
-def restructure_hparam(root: Path, out: Path, tile_long_name: str) -> None:
+def restructure_hparam(root: Path, out: Path, tile_long_name: str, tag: str) -> None:
     grid, tile = tile_long_name.split('_')
     grid = f'EQUI7_{grid}'
     hparam_df = gather_files(root, yeoda_naming_convention, [re.compile('SIG0-HPAR'),
@@ -18,7 +18,7 @@ def restructure_hparam(root: Path, out: Path, tile_long_name: str) -> None:
                                                              re.compile(tile)], index='extra_field')
     out_dir = out / DATA_VERSION / grid / tile
     out_dir.mkdir(parents=True, exist_ok=True)
-
+    hparam_df = hparam_df.filter(like=tag, axis=0)
     for orbit, grouped_df in hparam_df.groupby('extra_field'):
         da = xr.open_mfdataset(grouped_df['filepath'], concat_dim='band', combine='nested', chunks=500,
                                mask_and_scale=True)['band_data']
@@ -30,15 +30,15 @@ def restructure_hparam(root: Path, out: Path, tile_long_name: str) -> None:
         encoding['NOBS'] = {'_FillValue': -9999, 'dtype': 'int16', 'zlib': True}
         ds.to_netcdf(out_dir / f"{orbit}.nc", mode='w', encoding=encoding)
 
-
 def main():
     parser = argparse.ArgumentParser(description="Restructure hparam tiffs to be loaded as local openEO dataset.")
     parser.add_argument('root', type=Path, help="root path to the yeoda file structure")
     parser.add_argument('out', type=Path, help="root path to output structure")
     parser.add_argument('tile', type=str, help='long name of tile to process, i.e. "EU020M_E051N015T3"')
+    parser.add_argument('tag', type=str, help='tag of ... to process, i.e. "D080"')
     args = parser.parse_args()
 
-    restructure_hparam(args.root, args.out, args.tile)
+    restructure_hparam(args.root, args.out, args.tile, args.tag)
 
 
 if __name__ == '__main__':

diff --git a/src/openeo_flood_mapper_local/restructure_plia.py b/src/openeo_flood_mapper_local/restructure_plia.py
@@ -10,7 +10,7 @@
 DATA_VERSION = 'V01R03'
 
 
-def restructure_plia(root: Path, out: Path, tile_long_name: str) -> None:
+def restructure_plia(root: Path, out: Path, tile_long_name: str, tag: str) -> None:
     grid, tile = tile_long_name.split('_')
     grid = f'EQUI7_{grid}'
     plia_df = gather_files(root, yeoda_naming_convention, [re.compile('PLIA-TAG'),
@@ -19,7 +19,7 @@ def restructure_plia(root: Path, out: Path, tile_long_name: str) -> None:
                                                              re.compile(tile)], index='extra_field')
     out_dir = out / DATA_VERSION / grid / tile
     out_dir.mkdir(parents=True, exist_ok=True)
-
+    plia_df = plia_df.filter(like=tag, axis=0)
     for _, row in plia_df.iterrows():
         file = row['filepath']
         da = rioxarray.open_rasterio(file, mask_and_scale=True, chunks=500).squeeze().drop_vars("band")
@@ -30,15 +30,15 @@ def restructure_plia(root: Path, out: Path, tile_long_name: str) -> None:
             da.encoding.update({'scale_factor': 0.01, '_FillValue': -9999, 'dtype': 'int16', 'zlib': True})
         da.to_dataset(name='PLIA').to_netcdf(out_dir / f"{file.stem}.nc", mode='w')
 
-
 def main():
     parser = argparse.ArgumentParser(description="Restructure plia tiffs to be loaded as local openEO dataset.")
     parser.add_argument('root', type=Path, help="root path to the yeoda file structure")
     parser.add_argument('out', type=Path, help="root path to output structure")
     parser.add_argument('tile', type=str, help='long name of tile to process, i.e. "EU020M_E051N015T3"')
+    parser.add_argument('tag', type=str, help='tag of ... to process, i.e. "D080"')
     args = parser.parse_args()
 
-    restructure_plia(args.root, args.out, args.tile)
+    restructure_plia(args.root, args.out, args.tile, args.tag)
 
 
 if __name__ == '__main__':

diff --git a/src/openeo_flood_mapper_local/restructure_sig0.py b/src/openeo_flood_mapper_local/restructure_sig0.py
@@ -10,7 +10,7 @@
 DATA_VERSION = 'V1M1R1'
 
 
-def restructure_sig0(root: Path, out: Path, tile_long_name: str) -> None:
+def restructure_sig0(root: Path, out: Path, tile_long_name: str, eventtime: str) -> None:
     grid, tile = tile_long_name.split('_')
     grid = f'EQUI7_{grid}'
     sig0_df = gather_files(root, yeoda_naming_convention, [re.compile('SIG0'),
@@ -19,23 +19,24 @@ def restructure_sig0(root: Path, out: Path, tile_long_name: str) -> None:
                                                              re.compile(tile)], index='datetime_1')
     out_dir = out / DATA_VERSION / grid / tile
     out_dir.mkdir(parents=True, exist_ok=True)
+    sig0_df = sig0_df[sig0_df.index == eventtime]
+    sig0_df = sig0_df[sig0_df["band"]=="VV"]
 
     for _, row in sig0_df.iterrows():
         file = row['filepath']
         da = rioxarray.open_rasterio(file, mask_and_scale=True, chunks=500).squeeze().drop_vars("band")
         da.encoding.update({'scale_factor': 0.1, '_FillValue': -9999, 'dtype': 'int16', 'zlib': True})
         da.to_dataset(name='SIG0').to_netcdf(out_dir / f"{file.stem}.nc", mode='w')
 
-
 def main():
     parser = argparse.ArgumentParser(description="Restructure sig0 tiffs to be loaded as local openEO dataset.")
     parser.add_argument('root', type=Path, help="root path to the yeoda file structure")
     parser.add_argument('out', type=Path, help="root path to output structure")
     parser.add_argument('tile', type=str, help='long name of tile to process, i.e. "EU020M_E051N015T3"')
+    parser.add_argument('eventtime', type=str, help='datetime of the flood event, i.e. "2018-02-28 04:39:08"')
     args = parser.parse_args()
 
-    restructure_sig0(args.root, args.out, args.tile)
-
+    restructure_sig0(args.root, args.out, args.tile, args.eventtime)
 
 if __name__ == '__main__':
     main()