From 5ffbe353439dea08b54438e39190d6303456f5bb Mon Sep 17 00:00:00 2001 From: Gustavo Shiroma Date: Wed, 14 Dec 2022 13:08:20 -0800 Subject: [PATCH 1/4] Update RTC-S1 comparison script for beta release --- app/rtc_compare.py | 290 +++++++++++++++++++++++++++++++++++++++------ 1 file changed, 251 insertions(+), 39 deletions(-) diff --git a/app/rtc_compare.py b/app/rtc_compare.py index d8994ea7..d7c0c82c 100644 --- a/app/rtc_compare.py +++ b/app/rtc_compare.py @@ -1,14 +1,14 @@ -''' -Compare two RTC products (in HDF5) if they are equivalent. -Part of the codes are copied from PROTEUS SAS -''' - +import os +from osgeo import gdal import argparse import itertools - import h5py import numpy as np +PASSED_STR = '[PASS] ' +FAILED_STR = '[FAIL]' +WARNING_STR = '[WARNING]' + RTC_S1_PRODUCTS_ERROR_REL_TOLERANCE = 1e-04 RTC_S1_PRODUCTS_ERROR_ABS_TOLERANCE = 1e-05 @@ -87,7 +87,7 @@ def print_data_difference(val_1, val_2, indent=4): flag_discrepancy = (val_1 != val_2) index_first_discrepancy = np.where(flag_discrepancy)[0][0] print(f'{str_indent} The first discrepancy has detected from index ' - f'[{index_first_discrepancy}] : ' + f'[{index_first_discrepancy}] ' f'1st=({val_1[index_first_discrepancy]}), ' f'2nd=({val_2[index_first_discrepancy]})') return @@ -278,19 +278,16 @@ def compare_hdf5_elements(hdf5_obj_1, hdf5_obj_2, str_key, is_attr=False, # Start the comparison if list_exclude is not None and str_key in list_exclude: - print('\033[33mWARNING.\033[00m', str_message_data_location) - print(' Ignoring the comparison result based on ' - 'the exclusion list provided.') return True if shape_val_1 != shape_val_2: # Dataset or attribute shape does not match - print('\033[91mFAILED. \033[00m', str_message_data_location) + print(f'{FAILED_STR} ', str_message_data_location) print(f' - Data shapes do not match. {shape_val_1} vs. {shape_val_2}\n') return False if val_1.dtype != val_2.dtype: - print('\033[91mFAILED. \033[00m', str_message_data_location) + print(f'{FAILED_STR} ', str_message_data_location) print(f' - Data types do not match. ({val_1.dtype}) vs. ({val_2.dtype})\n') return False @@ -306,9 +303,9 @@ def compare_hdf5_elements(hdf5_obj_1, hdf5_obj_2, str_key, is_attr=False, if return_val: if print_passed_element: - print('\033[32mPASSED. \033[00m', str_message_data_location) + print(f'{PASSED_STR} ', str_message_data_location) else: - print('\033[91mFAILED. \033[00m', str_message_data_location) + print(f'{FAILED_STR} ', str_message_data_location) print( ' - numerical scalar. Failed to pass the test. ' f'Relative tolerance = {RTC_S1_PRODUCTS_ERROR_REL_TOLERANCE}, ' f'Absolute tolerance = {RTC_S1_PRODUCTS_ERROR_ABS_TOLERANCE}') @@ -320,9 +317,9 @@ def compare_hdf5_elements(hdf5_obj_1, hdf5_obj_2, str_key, is_attr=False, return_val = np.array_equal(val_1, val_2) if return_val: if print_passed_element: - print('\033[32mPASSED. \033[00m', str_message_data_location) + print(f'{PASSED_STR} ', str_message_data_location) else: - print('\033[91mFAILED. \033[00m', str_message_data_location) + print(f'{FAILED_STR} ', str_message_data_location) print( ' - non-numerical scalar. Failed to pass the test.') print(f' - 1st value: {val_1}') print(f' - 2nd value: {val_2}\n') @@ -341,9 +338,9 @@ def compare_hdf5_elements(hdf5_obj_1, hdf5_obj_2, str_key, is_attr=False, if return_val: if print_passed_element: - print('\033[32mPASSED. \033[00m', str_message_data_location) + print(f'{PASSED_STR} ', str_message_data_location) else: - print('\033[91mFAILED. \033[00m', str_message_data_location) + print(f'{FAILED_STR} ', str_message_data_location) print(' - Numerical 1D array. Failed to pass the test. ' f'Relative tolerance = {RTC_S1_PRODUCTS_ERROR_REL_TOLERANCE}, ' f'Absolute tolerance = {RTC_S1_PRODUCTS_ERROR_ABS_TOLERANCE}') @@ -354,9 +351,9 @@ def compare_hdf5_elements(hdf5_obj_1, hdf5_obj_2, str_key, is_attr=False, return_val = np.array_equal(val_1, val_2) if return_val: if print_passed_element: - print('\033[32mPASSED. \033[00m', str_message_data_location) + print(f'{PASSED_STR} ', str_message_data_location) else: - print('\033[91mFAILED. \033[00m', str_message_data_location) + print(f'{FAILED_STR} ', str_message_data_location) print(' non-numerical 1D array. Failed to pass the test.') print_data_difference(val_1, val_2) return return_val @@ -370,9 +367,9 @@ def compare_hdf5_elements(hdf5_obj_1, hdf5_obj_2, str_key, is_attr=False, equal_nan=True) if return_val: if print_passed_element: - print('\033[32mPASSED. \033[00m', str_message_data_location) + print(f'{PASSED_STR} ', str_message_data_location) else: - print('\033[91mFAILED. \033[00m', str_message_data_location) + print(f'{FAILED_STR} ', str_message_data_location) print(f' {len(shape_val_1)}D raster array. Failed to pass the test. ' f'Relative tolerance = {RTC_S1_PRODUCTS_ERROR_REL_TOLERANCE}, ' f'Absolute tolerance = {RTC_S1_PRODUCTS_ERROR_ABS_TOLERANCE}') @@ -468,7 +465,7 @@ def compare_rtc_hdf5_files(file_1: str, file_2: str, # Print out the dataset structure discrepancy if there are any if not flag_identical_dataset_structure: - print(' \n\033[91mFAILED\033[00m: Dataset structure not identical.') + print(f' {FAILED_STR} Dataset structure not identical.') print('In the 1st HDF5, not in the 2nd data:') list_dataset_1st_only = list(set_dataset_1 - set_dataset_2) list_dataset_1st_only.sort() @@ -485,7 +482,7 @@ def compare_rtc_hdf5_files(file_1: str, file_2: str, list_attrs_2nd_only = list(set_attrs_2 - set_attrs_1) list_attrs_2nd_only.sort() if (not flag_identical_attrs_structure) and flag_identical_dataset_structure: - print(' \n\033[91mFAILED\033[00m: ' + print(f' {FAILED_STR} ' 'Attribute structure not identical.') print('In the 1st HDF5, not in the 2nd data:') print('\r ' + @@ -498,13 +495,13 @@ def compare_rtc_hdf5_files(file_1: str, file_2: str, replace('\n', ',\tattr: ').replace('\r', '\n')) # Print the test summary - print('\nTest summary:') + print('\nHDF5 test summary:') # Dataset structure if flag_identical_dataset_structure: - print(' \033[32mPASSED\033[00m: Same dataset structure confirmed.') + print(f' {PASSED_STR} Same dataset structure confirmed.') else: - print( ' \033[91mFAILED\033[00m: ' + print( f' {FAILED_STR} ' f'{len(list_dataset_1st_only)} datasets from the 1st HDF are' ' not found in the 2nd file.\n' f' {len(list_dataset_2nd_only)} datasets from the 2nd HDF are' @@ -512,9 +509,9 @@ def compare_rtc_hdf5_files(file_1: str, file_2: str, # Attributes structure if flag_identical_attrs_structure: - print(' \033[32mPASSED\033[00m: Same attributes structure confirmed.') + print(f' {PASSED_STR} Same attributes structure confirmed.') else: - print( ' \033[91mFAILED\033[00m: ' + print(f' {FAILED_STR} ' f'{len(list_attrs_1st_only)} attributes from the 1st HDF are' ' not found in the 2nd file.\n' f' {len(list_attrs_2nd_only)} attributes from the 2nd HDF are' @@ -522,13 +519,13 @@ def compare_rtc_hdf5_files(file_1: str, file_2: str, # Closeness of the common dataset if all(list_flag_identical_dataset): - print( ' \033[32mPASSED\033[00m: ' - 'The datasets of the two HDF files are the same within ' - 'the tolerance.') + print(f' {PASSED_STR} ' + 'The datasets of the two HDF files are the same within ' + 'the tolerance.') print(f' Relative tolerance = {RTC_S1_PRODUCTS_ERROR_REL_TOLERANCE}, ' f'Absolute tolerance = {RTC_S1_PRODUCTS_ERROR_ABS_TOLERANCE}') else: - print( ' \033[91mFAILED\033[00m: ' + print(f' {FAILED_STR} ' f'{sum(~np.array(list_flag_identical_dataset))} datasets ' f'out of {len(intersection_set_dataset)} are not the same. ') print(f' Relative tolerance = {RTC_S1_PRODUCTS_ERROR_REL_TOLERANCE}, ' @@ -536,13 +533,13 @@ def compare_rtc_hdf5_files(file_1: str, file_2: str, # Closeness of the common attributes if all(list_flag_identical_attrs): - print( ' \033[32mPASSED\033[00m: ' - 'The attributes of the two HDF files are the same within ' - 'the tolerance') + print(f' {PASSED_STR} ' + 'The attributes of the two HDF files are the same within ' + 'the tolerance') print(f' Relative tolerance = {RTC_S1_PRODUCTS_ERROR_REL_TOLERANCE}, ' f'Absolute tolerance = {RTC_S1_PRODUCTS_ERROR_ABS_TOLERANCE}') else: - print(f' \033[91mFAILED\033[00m: ' + print(f' {FAILED_STR} ' f'{sum(~np.array(list_flag_identical_attrs))} attributes ' f'out of {len(intersection_set_attrs)} are not the same.') print(f' Relative tolerance = {RTC_S1_PRODUCTS_ERROR_REL_TOLERANCE}, ' @@ -551,6 +548,183 @@ def compare_rtc_hdf5_files(file_1: str, file_2: str, return final_result + +def _get_prefix_str(flag_same, flag_all_ok): + flag_all_ok[0] = flag_all_ok[0] and flag_same + return f'{PASSED_STR} ' if flag_same else f'{FAILED_STR} ' + + +def compare_rtc_s1_products(file_1, file_2): + if not os.path.isfile(file_1): + print(f'ERROR file not found: {file_1}') + return False + + if not os.path.isfile(file_2): + print(f'ERROR file not found: {file_2}') + return False + + flag_all_ok = [True] + + # TODO: compare projections ds.GetProjection() + layer_gdal_dataset_1 = gdal.Open(file_1, gdal.GA_ReadOnly) + geotransform_1 = layer_gdal_dataset_1.GetGeoTransform() + metadata_1 = layer_gdal_dataset_1.GetMetadata() + nbands_1 = layer_gdal_dataset_1.RasterCount + + layer_gdal_dataset_2 = gdal.Open(file_2, gdal.GA_ReadOnly) + geotransform_2 = layer_gdal_dataset_2.GetGeoTransform() + metadata_2 = layer_gdal_dataset_2.GetMetadata() + nbands_2 = layer_gdal_dataset_2.RasterCount + + # compare number of bands + flag_same_nbands = nbands_1 == nbands_2 + flag_same_nbands_str = _get_prefix_str(flag_same_nbands, flag_all_ok) + prefix = ' ' * 7 + print(f'{flag_same_nbands_str}Comparing number of bands') + if not flag_same_nbands: + print(prefix + f'Input 1 has {nbands_1} bands and input 2' + f' has {nbands_2} bands') + return False + + # compare array values + print('Comparing RTC-S1 bands...') + for b in range(1, nbands_1 + 1): + gdal_band_1 = layer_gdal_dataset_1.GetRasterBand(b) + gdal_band_2 = layer_gdal_dataset_2.GetRasterBand(b) + image_1 = gdal_band_1.ReadAsArray() + image_2 = gdal_band_2.ReadAsArray() + + shape_val_1 = image_1.shape + shape_val_2 = image_2.shape + + if shape_val_1 != shape_val_2: + # Dataset or attribute shape does not match + print(f'{FAILED_STR} data shapes do not match.' + f' {shape_val_1} vs. {shape_val_2}\n') + return False + + if image_1.dtype != image_2.dtype: + print(f'{FAILED_STR} data types do not match.' + f' ({image_1.dtype}) vs. ({image_2.dtype})\n') + return False + + flag_bands_are_equal = np.allclose( + image_1, image_2, atol=RTC_S1_PRODUCTS_ERROR_ABS_TOLERANCE, + rtol=RTC_S1_PRODUCTS_ERROR_REL_TOLERANCE, equal_nan=True) + flag_bands_are_equal_str = _get_prefix_str(flag_bands_are_equal, + flag_all_ok) + print(f'{flag_bands_are_equal_str} Band {b} -' + f' {gdal_band_1.GetDescription()}"') + if not flag_bands_are_equal: + _print_first_value_diff(image_1, image_2, prefix) + + # compare geotransforms + flag_same_geotransforms = np.array_equal(geotransform_1, geotransform_2) + flag_same_geotransforms_str = _get_prefix_str(flag_same_geotransforms, + flag_all_ok) + print(f'{flag_same_geotransforms_str}Comparing geotransform') + if not flag_same_geotransforms: + print(prefix + f'* input 1 geotransform with content "{geotransform_1}"' + f' differs from input 2 geotransform with content' + f' "{geotransform_2}".') + + # compare metadata + metadata_error_message, flag_same_metadata = \ + _compare_rtc_s1_metadata(metadata_1, metadata_2) + + flag_same_metadata_str = _get_prefix_str(flag_same_metadata, + flag_all_ok) + print(f'{flag_same_metadata_str}Comparing metadata') + + if not flag_same_metadata: + print(prefix + metadata_error_message) + + return flag_all_ok[0] + + +def _compare_rtc_s1_metadata(metadata_1, metadata_2): + """ + Compare RTC-S1 products' metadata + + Parameters + ---------- + metadata_1 : dict + Metadata of the first RTC-S1 product + metadata_2: dict + Metadata of the second + """ + metadata_error_message = None + flag_same_metadata = len(metadata_1.keys()) == len(metadata_2.keys()) + if not flag_same_metadata: + metadata_error_message = ( + f'* input 1 metadata has {len(metadata_1.keys())} entries' + f' whereas input 2 metadata has {len(metadata_2.keys())} entries.') + + set_1_m_2 = set(metadata_1.keys()) - set(metadata_2.keys()) + if len(set_1_m_2) > 0: + metadata_error_message += (' Input 1 metadata has extra entries' + ' with keys:' + f' {", ".join(set_1_m_2)}.') + set_2_m_1 = set(metadata_2.keys()) - set(metadata_1.keys()) + if len(set_2_m_1) > 0: + metadata_error_message += (' Input 2 metadata has extra entries' + ' with keys:' + f' {", ".join(set_2_m_1)}.') + else: + for k1, v1, in metadata_1.items(): + if k1 not in metadata_2.keys(): + flag_same_metadata = False + metadata_error_message = ( + f'* the metadata key {k1} is present in' + ' but it is not present in input 2') + break + # Exclude metadata fields that are not required to be the same + if k1 in ['PROCESSING_DATETIME', 'DEM_SOURCE', 'LANDCOVER_SOURCE', + 'WORLDCOVER_SOURCE']: + continue + if metadata_2[k1] != v1: + flag_same_metadata = False + metadata_error_message = ( + f'* contents of metadata key {k1} from' + f' input 1 has value "{v1}" whereas the same key in' + f' input 2 metadata has value "{metadata_2[k1]}"') + break + return metadata_error_message, flag_same_metadata + + +def _print_first_value_diff(image_1, image_2, prefix): + """ + Print first value difference between two images. + + Parameters + ---------- + image_1 : numpy.ndarray + First input image + image_2: numpy.ndarray + Second input image + prefix: str + Prefix to the message printed to the user + """ + flag_error_found = False + for i in range(image_1.shape[0]): + for j in range(image_1.shape[1]): + if (np.isnan(image_1[i, j]) and + np.isnan(image_1[i, j])): + continue + if (abs(image_1[i, j] - image_2[i, j]) <= + RTC_S1_PRODUCTS_ERROR_ABS_TOLERANCE): + continue + print(prefix + f' * input 1 has value' + f' "{image_1[i, j]}" in position' + f' (x: {j}, y: {i})' + f' whereas input 2 has value "{image_2[i, j]}"' + ' in the same position.') + flag_error_found = True + break + if flag_error_found: + break + + def main(): ''' main function of the RTC product comparison script @@ -562,8 +736,46 @@ def main(): file_1 = args.input_file[0] file_2 = args.input_file[1] - compare_rtc_hdf5_files(file_1, file_2, LIST_EXCLUDE_COMPARISON) - + # compare HDF5 files ('*h5') + print('*******************************************************') + print('************ TESTING (HDF5 file) ************') + print('*******************************************************') + print('*** file 1:', file_1) + print('*** file 2:', file_2) + print('-------------------------------------------------------') + test_1 = compare_rtc_hdf5_files(file_1, file_2, LIST_EXCLUDE_COMPARISON) + + # compare VH images + vh_file_1 = file_1.replace('.h5', '_VH.tif') + vh_file_2 = file_2.replace('.h5', '_VH.tif') + print('*******************************************************') + print('************ TESTING (VH polarization) ************') + print('*******************************************************') + print('*** file 1:', vh_file_1) + print('*** file 2:', vh_file_2) + print('-------------------------------------------------------') + test_2 = compare_rtc_s1_products(vh_file_1, vh_file_2) + + # compare VV images + vv_file_1 = file_1.replace('.h5', '_VV.tif') + vv_file_2 = file_2.replace('.h5', '_VV.tif') + print('*******************************************************') + print('************ TESTING (VV polarization) ************') + print('*******************************************************') + print('*** file 1:', vv_file_1) + print('*** file 2:', vv_file_2) + print('-------------------------------------------------------') + test_3 = compare_rtc_s1_products(vv_file_1, vv_file_2) + + print('*******************************************************') + print('************ Overall results ************') + print('*******************************************************') + overal_results = [True] + print(f'{_get_prefix_str(test_1, overal_results)} HDF5 test') + print(f'{_get_prefix_str(test_2, overal_results)} VH polarization test') + print(f'{_get_prefix_str(test_3, overal_results)} VV polarization test') + print('*******************************************************') + return overal_results[0] if __name__ == '__main__': main() From a13e8142643975a9160aa1c1eb373e657f2be8be Mon Sep 17 00:00:00 2001 From: Gustavo Shiroma Date: Wed, 14 Dec 2022 16:05:34 -0800 Subject: [PATCH 2/4] remove unused WARNING_STR --- app/rtc_compare.py | 1 - 1 file changed, 1 deletion(-) diff --git a/app/rtc_compare.py b/app/rtc_compare.py index d7c0c82c..b4c52e27 100644 --- a/app/rtc_compare.py +++ b/app/rtc_compare.py @@ -7,7 +7,6 @@ PASSED_STR = '[PASS] ' FAILED_STR = '[FAIL]' -WARNING_STR = '[WARNING]' RTC_S1_PRODUCTS_ERROR_REL_TOLERANCE = 1e-04 RTC_S1_PRODUCTS_ERROR_ABS_TOLERANCE = 1e-05 From f03755cdc77fd0fd2c5a09250c726b0a6de27434 Mon Sep 17 00:00:00 2001 From: Gustavo Shiroma Date: Wed, 14 Dec 2022 16:12:06 -0800 Subject: [PATCH 3/4] add rtol (RTC_S1_PRODUCTS_ERROR_REL_TOLERANCE) to _print_first_value_diff() --- app/rtc_compare.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/app/rtc_compare.py b/app/rtc_compare.py index b4c52e27..1135114b 100644 --- a/app/rtc_compare.py +++ b/app/rtc_compare.py @@ -711,7 +711,9 @@ def _print_first_value_diff(image_1, image_2, prefix): np.isnan(image_1[i, j])): continue if (abs(image_1[i, j] - image_2[i, j]) <= - RTC_S1_PRODUCTS_ERROR_ABS_TOLERANCE): + RTC_S1_PRODUCTS_ERROR_ABS_TOLERANCE + + RTC_S1_PRODUCTS_ERROR_REL_TOLERANCE * + abs(image_2[i, j])): continue print(prefix + f' * input 1 has value' f' "{image_1[i, j]}" in position' From 97fc190f0bbd0ad3ebe3e8341a3b0940504db770 Mon Sep 17 00:00:00 2001 From: Gustavo Shiroma Date: Wed, 14 Dec 2022 16:15:36 -0800 Subject: [PATCH 4/4] add docstrings to _get_prefix_str() --- app/rtc_compare.py | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/app/rtc_compare.py b/app/rtc_compare.py index 1135114b..612aa583 100644 --- a/app/rtc_compare.py +++ b/app/rtc_compare.py @@ -549,6 +549,23 @@ def compare_rtc_hdf5_files(file_1: str, file_2: str, def _get_prefix_str(flag_same, flag_all_ok): + ''' + Returns the prefix string for a comparison test, either the contents + of PASSED_STR or the FAILED_STR. + + Parameters: + ----------- + flag_same: bool + Result of the comparison test + flag_all_ok: list(bool) + Mutable list of booleans that will hold the overall test status + + Return: + ------- + _: str + Prefix string for the given comparison test + + ''' flag_all_ok[0] = flag_all_ok[0] and flag_same return f'{PASSED_STR} ' if flag_same else f'{FAILED_STR} '