From 5ffbe353439dea08b54438e39190d6303456f5bb Mon Sep 17 00:00:00 2001
From: Gustavo Shiroma <gustavo.h.shiroma@jpl.nasa.gov>
Date: Wed, 14 Dec 2022 13:08:20 -0800
Subject: [PATCH 1/4] Update RTC-S1 comparison script for beta release

---
 app/rtc_compare.py | 290 +++++++++++++++++++++++++++++++++++++++------
 1 file changed, 251 insertions(+), 39 deletions(-)

diff --git a/app/rtc_compare.py b/app/rtc_compare.py
index d8994ea7..d7c0c82c 100644
--- a/app/rtc_compare.py
+++ b/app/rtc_compare.py
@@ -1,14 +1,14 @@
-'''
-Compare two RTC products (in HDF5) if they are equivalent.
-Part of the codes are copied from PROTEUS SAS
-'''
-
+import os
+from osgeo import gdal
 import argparse
 import itertools
-
 import h5py
 import numpy as np
 
+PASSED_STR = '[PASS] '
+FAILED_STR = '[FAIL]'
+WARNING_STR = '[WARNING]'
+
 RTC_S1_PRODUCTS_ERROR_REL_TOLERANCE = 1e-04
 RTC_S1_PRODUCTS_ERROR_ABS_TOLERANCE = 1e-05
 
@@ -87,7 +87,7 @@ def print_data_difference(val_1, val_2, indent=4):
         flag_discrepancy = (val_1 != val_2)
         index_first_discrepancy = np.where(flag_discrepancy)[0][0]
         print(f'{str_indent} The first discrepancy has detected from index '
-              f'[{index_first_discrepancy}] : '
+              f'[{index_first_discrepancy}] '
               f'1st=({val_1[index_first_discrepancy]}), '
               f'2nd=({val_2[index_first_discrepancy]})')
         return
@@ -278,19 +278,16 @@ def compare_hdf5_elements(hdf5_obj_1, hdf5_obj_2, str_key, is_attr=False,
 
     # Start the comparison
     if list_exclude is not None and str_key in list_exclude:
-        print('\033[33mWARNING.\033[00m', str_message_data_location)
-        print('    Ignoring the comparison result based on '
-              'the exclusion list provided.')
         return True
 
     if shape_val_1 != shape_val_2:
         # Dataset or attribute shape does not match
-        print('\033[91mFAILED. \033[00m', str_message_data_location)
+        print(f'{FAILED_STR} ', str_message_data_location)
         print(f'    - Data shapes do not match. {shape_val_1} vs. {shape_val_2}\n')
         return False
 
     if val_1.dtype != val_2.dtype:
-        print('\033[91mFAILED. \033[00m', str_message_data_location)
+        print(f'{FAILED_STR} ', str_message_data_location)
         print(f'    - Data types do not match. ({val_1.dtype}) vs. ({val_2.dtype})\n')
         return False
 
@@ -306,9 +303,9 @@ def compare_hdf5_elements(hdf5_obj_1, hdf5_obj_2, str_key, is_attr=False,
 
             if return_val:
                 if print_passed_element:
-                    print('\033[32mPASSED. \033[00m', str_message_data_location)
+                    print(f'{PASSED_STR} ', str_message_data_location)
             else:
-                print('\033[91mFAILED. \033[00m', str_message_data_location)
+                print(f'{FAILED_STR} ', str_message_data_location)
                 print( '    - numerical scalar. Failed to pass the test. '
                       f'Relative tolerance = {RTC_S1_PRODUCTS_ERROR_REL_TOLERANCE}, '
                       f'Absolute tolerance = {RTC_S1_PRODUCTS_ERROR_ABS_TOLERANCE}')
@@ -320,9 +317,9 @@ def compare_hdf5_elements(hdf5_obj_1, hdf5_obj_2, str_key, is_attr=False,
         return_val = np.array_equal(val_1, val_2)
         if return_val:
             if print_passed_element:
-                print('\033[32mPASSED. \033[00m', str_message_data_location)
+                print(f'{PASSED_STR} ', str_message_data_location)
         else:
-            print('\033[91mFAILED. \033[00m', str_message_data_location)
+            print(f'{FAILED_STR} ', str_message_data_location)
             print( '    - non-numerical scalar. Failed to pass the test.')
             print(f'    - 1st value: {val_1}')
             print(f'    - 2nd value: {val_2}\n')
@@ -341,9 +338,9 @@ def compare_hdf5_elements(hdf5_obj_1, hdf5_obj_2, str_key, is_attr=False,
 
             if return_val:
                 if print_passed_element:
-                    print('\033[32mPASSED. \033[00m', str_message_data_location)
+                    print(f'{PASSED_STR} ', str_message_data_location)
             else:
-                print('\033[91mFAILED. \033[00m', str_message_data_location)
+                print(f'{FAILED_STR} ', str_message_data_location)
                 print('    - Numerical 1D array. Failed to pass the test. '
                      f'Relative tolerance = {RTC_S1_PRODUCTS_ERROR_REL_TOLERANCE}, '
                      f'Absolute tolerance = {RTC_S1_PRODUCTS_ERROR_ABS_TOLERANCE}')
@@ -354,9 +351,9 @@ def compare_hdf5_elements(hdf5_obj_1, hdf5_obj_2, str_key, is_attr=False,
         return_val = np.array_equal(val_1, val_2)
         if return_val:
             if print_passed_element:
-                print('\033[32mPASSED. \033[00m', str_message_data_location)
+                print(f'{PASSED_STR} ', str_message_data_location)
         else:
-            print('\033[91mFAILED. \033[00m', str_message_data_location)
+            print(f'{FAILED_STR} ', str_message_data_location)
             print('    non-numerical 1D array. Failed to pass the test.')
             print_data_difference(val_1, val_2)
         return return_val
@@ -370,9 +367,9 @@ def compare_hdf5_elements(hdf5_obj_1, hdf5_obj_2, str_key, is_attr=False,
                                  equal_nan=True)
         if return_val:
             if print_passed_element:
-                print('\033[32mPASSED. \033[00m', str_message_data_location)
+                print(f'{PASSED_STR} ', str_message_data_location)
         else:
-            print('\033[91mFAILED. \033[00m', str_message_data_location)
+            print(f'{FAILED_STR} ', str_message_data_location)
             print(f'    {len(shape_val_1)}D raster array. Failed to pass the test. '
                   f'Relative tolerance = {RTC_S1_PRODUCTS_ERROR_REL_TOLERANCE}, '
                   f'Absolute tolerance = {RTC_S1_PRODUCTS_ERROR_ABS_TOLERANCE}')
@@ -468,7 +465,7 @@ def compare_rtc_hdf5_files(file_1: str, file_2: str,
 
         # Print out the dataset structure discrepancy if there are any
         if not flag_identical_dataset_structure:
-            print('    \n\033[91mFAILED\033[00m: Dataset structure not identical.')
+            print(f'    {FAILED_STR} Dataset structure not identical.')
             print('In the 1st HDF5, not in the 2nd data:')
             list_dataset_1st_only = list(set_dataset_1 - set_dataset_2)
             list_dataset_1st_only.sort()
@@ -485,7 +482,7 @@ def compare_rtc_hdf5_files(file_1: str, file_2: str,
         list_attrs_2nd_only = list(set_attrs_2 - set_attrs_1)
         list_attrs_2nd_only.sort()
         if (not flag_identical_attrs_structure) and flag_identical_dataset_structure:
-            print('    \n\033[91mFAILED\033[00m: '
+            print(f'    {FAILED_STR} '
                   'Attribute structure not identical.')
             print('In the 1st HDF5, not in the 2nd data:')
             print('\r    ' +
@@ -498,13 +495,13 @@ def compare_rtc_hdf5_files(file_1: str, file_2: str,
                   replace('\n', ',\tattr: ').replace('\r', '\n'))
 
         # Print the test summary
-        print('\nTest summary:')
+        print('\nHDF5 test summary:')
 
         # Dataset structure
         if flag_identical_dataset_structure:
-            print('    \033[32mPASSED\033[00m: Same dataset structure confirmed.')
+            print(f'    {PASSED_STR} Same dataset structure confirmed.')
         else:
-            print( '    \033[91mFAILED\033[00m: '
+            print( f'    {FAILED_STR} '
                   f'{len(list_dataset_1st_only)} datasets from the 1st HDF are'
                    ' not found in the 2nd file.\n'
                   f'            {len(list_dataset_2nd_only)} datasets from the 2nd HDF are'
@@ -512,9 +509,9 @@ def compare_rtc_hdf5_files(file_1: str, file_2: str,
 
         # Attributes structure
         if flag_identical_attrs_structure:
-            print('    \033[32mPASSED\033[00m: Same attributes structure confirmed.')
+            print(f'    {PASSED_STR} Same attributes structure confirmed.')
         else:
-            print( '    \033[91mFAILED\033[00m: '
+            print(f'    {FAILED_STR} '
                   f'{len(list_attrs_1st_only)} attributes from the 1st HDF are'
                    ' not found in the 2nd file.\n'
                   f'            {len(list_attrs_2nd_only)} attributes from the 2nd HDF are'
@@ -522,13 +519,13 @@ def compare_rtc_hdf5_files(file_1: str, file_2: str,
 
         # Closeness of the common dataset
         if all(list_flag_identical_dataset):
-            print( '    \033[32mPASSED\033[00m: '
-                   'The datasets of the two HDF files are the same within '
-                   'the tolerance.')
+            print(f'    {PASSED_STR} '
+                  'The datasets of the two HDF files are the same within '
+                  'the tolerance.')
             print(f'            Relative tolerance = {RTC_S1_PRODUCTS_ERROR_REL_TOLERANCE}, '
                   f'Absolute tolerance = {RTC_S1_PRODUCTS_ERROR_ABS_TOLERANCE}')
         else:
-            print( '    \033[91mFAILED\033[00m: '
+            print(f'    {FAILED_STR} '
                   f'{sum(~np.array(list_flag_identical_dataset))} datasets '
                   f'out of {len(intersection_set_dataset)} are not the same. ')
             print(f'            Relative tolerance = {RTC_S1_PRODUCTS_ERROR_REL_TOLERANCE}, '
@@ -536,13 +533,13 @@ def compare_rtc_hdf5_files(file_1: str, file_2: str,
 
         # Closeness of the common attributes
         if all(list_flag_identical_attrs):
-            print( '    \033[32mPASSED\033[00m: '
-                   'The attributes of the two HDF files are the same within '
-                   'the tolerance')
+            print(f'    {PASSED_STR} '
+                  'The attributes of the two HDF files are the same within '
+                  'the tolerance')
             print(f'            Relative tolerance = {RTC_S1_PRODUCTS_ERROR_REL_TOLERANCE}, '
                   f'Absolute tolerance = {RTC_S1_PRODUCTS_ERROR_ABS_TOLERANCE}')
         else:
-            print(f'    \033[91mFAILED\033[00m: '
+            print(f'    {FAILED_STR} '
                   f'{sum(~np.array(list_flag_identical_attrs))} attributes '
                   f'out of {len(intersection_set_attrs)} are not the same.')
             print(f'            Relative tolerance = {RTC_S1_PRODUCTS_ERROR_REL_TOLERANCE}, '
@@ -551,6 +548,183 @@ def compare_rtc_hdf5_files(file_1: str, file_2: str,
         return final_result
 
 
+
+def _get_prefix_str(flag_same, flag_all_ok):
+    flag_all_ok[0] = flag_all_ok[0] and flag_same
+    return f'{PASSED_STR} ' if flag_same else f'{FAILED_STR} '
+
+
+def compare_rtc_s1_products(file_1, file_2):
+    if not os.path.isfile(file_1):
+        print(f'ERROR file not found: {file_1}')
+        return False
+
+    if not os.path.isfile(file_2):
+        print(f'ERROR file not found: {file_2}')
+        return False
+
+    flag_all_ok = [True]
+
+    # TODO: compare projections ds.GetProjection()
+    layer_gdal_dataset_1 = gdal.Open(file_1, gdal.GA_ReadOnly)
+    geotransform_1 = layer_gdal_dataset_1.GetGeoTransform()
+    metadata_1 = layer_gdal_dataset_1.GetMetadata()
+    nbands_1 = layer_gdal_dataset_1.RasterCount
+
+    layer_gdal_dataset_2 = gdal.Open(file_2, gdal.GA_ReadOnly)
+    geotransform_2 = layer_gdal_dataset_2.GetGeoTransform()
+    metadata_2 = layer_gdal_dataset_2.GetMetadata()
+    nbands_2 = layer_gdal_dataset_2.RasterCount
+
+    # compare number of bands
+    flag_same_nbands =  nbands_1 == nbands_2
+    flag_same_nbands_str = _get_prefix_str(flag_same_nbands, flag_all_ok)
+    prefix = ' ' * 7
+    print(f'{flag_same_nbands_str}Comparing number of bands')
+    if not flag_same_nbands:
+        print(prefix + f'Input 1 has {nbands_1} bands and input 2'
+              f' has {nbands_2} bands')
+        return False
+
+    # compare array values
+    print('Comparing RTC-S1 bands...')
+    for b in range(1, nbands_1 + 1):
+        gdal_band_1 = layer_gdal_dataset_1.GetRasterBand(b)
+        gdal_band_2 = layer_gdal_dataset_2.GetRasterBand(b)
+        image_1 = gdal_band_1.ReadAsArray()
+        image_2 = gdal_band_2.ReadAsArray()
+
+        shape_val_1 = image_1.shape
+        shape_val_2 = image_2.shape
+
+        if shape_val_1 != shape_val_2:
+            # Dataset or attribute shape does not match
+            print(f'{FAILED_STR} data shapes do not match.'
+                  f' {shape_val_1} vs. {shape_val_2}\n')
+            return False
+
+        if image_1.dtype != image_2.dtype:
+            print(f'{FAILED_STR} data types do not match.'
+                  f' ({image_1.dtype}) vs. ({image_2.dtype})\n')
+            return False
+
+        flag_bands_are_equal = np.allclose(
+            image_1, image_2, atol=RTC_S1_PRODUCTS_ERROR_ABS_TOLERANCE,
+            rtol=RTC_S1_PRODUCTS_ERROR_REL_TOLERANCE, equal_nan=True)
+        flag_bands_are_equal_str = _get_prefix_str(flag_bands_are_equal,
+                                                   flag_all_ok)
+        print(f'{flag_bands_are_equal_str}     Band {b} -'
+              f' {gdal_band_1.GetDescription()}"')
+        if not flag_bands_are_equal:
+            _print_first_value_diff(image_1, image_2, prefix)
+
+    # compare geotransforms
+    flag_same_geotransforms = np.array_equal(geotransform_1, geotransform_2)
+    flag_same_geotransforms_str = _get_prefix_str(flag_same_geotransforms,
+                                                  flag_all_ok)
+    print(f'{flag_same_geotransforms_str}Comparing geotransform')
+    if not flag_same_geotransforms:
+        print(prefix + f'* input 1 geotransform with content "{geotransform_1}"'
+              f' differs from input 2 geotransform with content'
+              f' "{geotransform_2}".')
+
+    # compare metadata
+    metadata_error_message, flag_same_metadata = \
+        _compare_rtc_s1_metadata(metadata_1, metadata_2)
+
+    flag_same_metadata_str = _get_prefix_str(flag_same_metadata,
+                                             flag_all_ok)
+    print(f'{flag_same_metadata_str}Comparing metadata')
+
+    if not flag_same_metadata:
+        print(prefix + metadata_error_message)
+
+    return flag_all_ok[0]
+
+
+def _compare_rtc_s1_metadata(metadata_1, metadata_2):
+    """
+    Compare RTC-S1 products' metadata
+
+       Parameters
+       ----------
+       metadata_1 : dict
+            Metadata of the first RTC-S1 product
+       metadata_2: dict
+            Metadata of the second
+    """
+    metadata_error_message = None
+    flag_same_metadata = len(metadata_1.keys()) == len(metadata_2.keys())
+    if not flag_same_metadata:
+        metadata_error_message = (
+            f'* input 1 metadata has {len(metadata_1.keys())} entries'
+            f' whereas input 2 metadata has {len(metadata_2.keys())} entries.')
+
+        set_1_m_2 = set(metadata_1.keys()) - set(metadata_2.keys())
+        if len(set_1_m_2) > 0:
+            metadata_error_message += (' Input 1 metadata has extra entries'
+                                       ' with keys:'
+                                       f' {", ".join(set_1_m_2)}.')
+        set_2_m_1 = set(metadata_2.keys()) - set(metadata_1.keys())
+        if len(set_2_m_1) > 0:
+            metadata_error_message += (' Input 2 metadata has extra entries'
+                                       ' with keys:'
+                                       f' {", ".join(set_2_m_1)}.')
+    else:
+        for k1, v1, in metadata_1.items():
+            if k1 not in metadata_2.keys():
+                flag_same_metadata = False
+                metadata_error_message = (
+                    f'* the metadata key {k1} is present in'
+                    ' but it is not present in input 2')
+                break
+            # Exclude metadata fields that are not required to be the same
+            if k1 in ['PROCESSING_DATETIME', 'DEM_SOURCE', 'LANDCOVER_SOURCE',
+                      'WORLDCOVER_SOURCE']:
+                continue
+            if metadata_2[k1] != v1:
+                flag_same_metadata = False
+                metadata_error_message = (
+                    f'* contents of metadata key {k1} from'
+                    f' input 1 has value "{v1}" whereas the same key in'
+                    f' input 2 metadata has value "{metadata_2[k1]}"')
+                break
+    return metadata_error_message, flag_same_metadata
+
+
+def _print_first_value_diff(image_1, image_2, prefix):
+    """
+    Print first value difference between two images.
+
+       Parameters
+       ----------
+       image_1 : numpy.ndarray
+            First input image
+       image_2: numpy.ndarray
+            Second input image
+       prefix: str
+            Prefix to the message printed to the user
+    """
+    flag_error_found = False
+    for i in range(image_1.shape[0]):
+        for j in range(image_1.shape[1]):
+            if (np.isnan(image_1[i, j]) and
+                    np.isnan(image_1[i, j])):
+                continue
+            if (abs(image_1[i, j] - image_2[i, j]) <=
+                    RTC_S1_PRODUCTS_ERROR_ABS_TOLERANCE):
+                continue
+            print(prefix + f'     * input 1 has value'
+                  f' "{image_1[i, j]}" in position'
+                  f' (x: {j}, y: {i})'
+                  f' whereas input 2 has value "{image_2[i, j]}"'
+                  ' in the same position.')
+            flag_error_found = True
+            break
+        if flag_error_found:
+            break
+
+
 def main():
     '''
     main function of the RTC product comparison script
@@ -562,8 +736,46 @@ def main():
     file_1 = args.input_file[0]
     file_2 = args.input_file[1]
 
-    compare_rtc_hdf5_files(file_1, file_2, LIST_EXCLUDE_COMPARISON)
-
+    # compare HDF5 files ('*h5')
+    print('*******************************************************')
+    print('************      TESTING (HDF5 file)      ************')
+    print('*******************************************************')
+    print('*** file 1:', file_1)
+    print('*** file 2:', file_2)
+    print('-------------------------------------------------------')
+    test_1 = compare_rtc_hdf5_files(file_1, file_2, LIST_EXCLUDE_COMPARISON)
+
+    # compare VH images
+    vh_file_1 = file_1.replace('.h5', '_VH.tif')
+    vh_file_2 = file_2.replace('.h5', '_VH.tif')
+    print('*******************************************************')
+    print('************   TESTING (VH polarization)   ************')
+    print('*******************************************************')
+    print('*** file 1:', vh_file_1)
+    print('*** file 2:', vh_file_2)
+    print('-------------------------------------------------------')
+    test_2 = compare_rtc_s1_products(vh_file_1, vh_file_2)
+
+    # compare VV images
+    vv_file_1 = file_1.replace('.h5', '_VV.tif')
+    vv_file_2 = file_2.replace('.h5', '_VV.tif')
+    print('*******************************************************')
+    print('************   TESTING (VV polarization)   ************')
+    print('*******************************************************')
+    print('*** file 1:', vv_file_1)
+    print('*** file 2:', vv_file_2)
+    print('-------------------------------------------------------')
+    test_3 = compare_rtc_s1_products(vv_file_1, vv_file_2)
+
+    print('*******************************************************')
+    print('************         Overall results       ************')
+    print('*******************************************************')
+    overal_results = [True]
+    print(f'{_get_prefix_str(test_1, overal_results)} HDF5 test')
+    print(f'{_get_prefix_str(test_2, overal_results)} VH polarization test')
+    print(f'{_get_prefix_str(test_3, overal_results)} VV polarization test')
+    print('*******************************************************')
+    return overal_results[0]
 
 if __name__ == '__main__':
     main()

From a13e8142643975a9160aa1c1eb373e657f2be8be Mon Sep 17 00:00:00 2001
From: Gustavo Shiroma <gustavo.h.shiroma@jpl.nasa.gov>
Date: Wed, 14 Dec 2022 16:05:34 -0800
Subject: [PATCH 2/4] remove unused WARNING_STR

---
 app/rtc_compare.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/app/rtc_compare.py b/app/rtc_compare.py
index d7c0c82c..b4c52e27 100644
--- a/app/rtc_compare.py
+++ b/app/rtc_compare.py
@@ -7,7 +7,6 @@
 
 PASSED_STR = '[PASS] '
 FAILED_STR = '[FAIL]'
-WARNING_STR = '[WARNING]'
 
 RTC_S1_PRODUCTS_ERROR_REL_TOLERANCE = 1e-04
 RTC_S1_PRODUCTS_ERROR_ABS_TOLERANCE = 1e-05

From f03755cdc77fd0fd2c5a09250c726b0a6de27434 Mon Sep 17 00:00:00 2001
From: Gustavo Shiroma <gustavo.h.shiroma@jpl.nasa.gov>
Date: Wed, 14 Dec 2022 16:12:06 -0800
Subject: [PATCH 3/4] add rtol (RTC_S1_PRODUCTS_ERROR_REL_TOLERANCE) to
 _print_first_value_diff()

---
 app/rtc_compare.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/app/rtc_compare.py b/app/rtc_compare.py
index b4c52e27..1135114b 100644
--- a/app/rtc_compare.py
+++ b/app/rtc_compare.py
@@ -711,7 +711,9 @@ def _print_first_value_diff(image_1, image_2, prefix):
                     np.isnan(image_1[i, j])):
                 continue
             if (abs(image_1[i, j] - image_2[i, j]) <=
-                    RTC_S1_PRODUCTS_ERROR_ABS_TOLERANCE):
+                    RTC_S1_PRODUCTS_ERROR_ABS_TOLERANCE +
+                    RTC_S1_PRODUCTS_ERROR_REL_TOLERANCE *
+                    abs(image_2[i, j])):
                 continue
             print(prefix + f'     * input 1 has value'
                   f' "{image_1[i, j]}" in position'

From 97fc190f0bbd0ad3ebe3e8341a3b0940504db770 Mon Sep 17 00:00:00 2001
From: Gustavo Shiroma <gustavo.h.shiroma@jpl.nasa.gov>
Date: Wed, 14 Dec 2022 16:15:36 -0800
Subject: [PATCH 4/4] add docstrings to _get_prefix_str()

---
 app/rtc_compare.py | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

diff --git a/app/rtc_compare.py b/app/rtc_compare.py
index 1135114b..612aa583 100644
--- a/app/rtc_compare.py
+++ b/app/rtc_compare.py
@@ -549,6 +549,23 @@ def compare_rtc_hdf5_files(file_1: str, file_2: str,
 
 
 def _get_prefix_str(flag_same, flag_all_ok):
+    '''
+    Returns the prefix string for a comparison test, either the contents
+    of PASSED_STR or the FAILED_STR.
+
+    Parameters:
+    -----------
+    flag_same: bool
+        Result of the comparison test
+    flag_all_ok: list(bool)
+        Mutable list of booleans that will hold the overall test status
+
+    Return:
+    -------
+    _: str
+        Prefix string for the given comparison test
+
+    '''
     flag_all_ok[0] = flag_all_ok[0] and flag_same
     return f'{PASSED_STR} ' if flag_same else f'{FAILED_STR} '