From a4fe283389733bf677681ff44524d831e89132a8 Mon Sep 17 00:00:00 2001 From: Alexander Grund Date: Fri, 6 Jan 2023 11:23:59 +0100 Subject: [PATCH 1/2] Fix the checksum type check The `None` case was missed and due to the unrestricted `tuple` elem_type it may return valid for actually invalid entries. So restrict that beeing overly cautious so it may wrongly return invalid. But in that case the conversion function will be called which can do more elaborate verification. Add test checking for None in checksums. --- easybuild/framework/easyconfig/types.py | 24 +++++++++++++++++++----- test/framework/type_checking.py | 18 ++++++++++++++++-- 2 files changed, 35 insertions(+), 7 deletions(-) diff --git a/easybuild/framework/easyconfig/types.py b/easybuild/framework/easyconfig/types.py index 42be1b99e4..ba81aeaccd 100644 --- a/easybuild/framework/easyconfig/types.py +++ b/easybuild/framework/easyconfig/types.py @@ -608,19 +608,33 @@ def ensure_iterable_license_specs(specs): })) # checksums is a list of checksums, one entry per file (source/patch) # each entry can be: +# None # a single checksum value (string) # a single checksum value of a specified type (2-tuple, 1st element is checksum type, 2nd element is checksum) # a list of checksums (of different types, perhaps different formats), which should *all* be valid -# a dictionary with a mapping from filename to checksum value -CHECKSUM_LIST = (list, as_hashable({'elem_types': [str, tuple, STRING_DICT]})) -CHECKSUMS = (list, as_hashable({'elem_types': [str, tuple, STRING_DICT, CHECKSUM_LIST]})) +# a tuple of checksums (of different types, perhaps different formats), where one should be valid +# a dictionary with a mapping from filename to checksum (None, value, type&value, alternatives) -CHECKABLE_TYPES = [CHECKSUM_LIST, CHECKSUMS, DEPENDENCIES, DEPENDENCY_DICT, LIST_OF_STRINGS, +# Type & value, value may be an int for type "size" +# This is a bit too permissive as it allows the first element to be an int and doesn't restrict the number of elements +CHECKSUM_TUPLE = (tuple, as_hashable({'elem_types': [str, int]})) +CHECKSUM_DICT = (dict, as_hashable( + { + 'elem_types': [type(None), str, CHECKSUM_TUPLE], + 'key_types': [str], + } +)) +CHECKSUM_LIST = (list, as_hashable({'elem_types': [str, CHECKSUM_TUPLE, CHECKSUM_DICT]})) + +CHECKSUMS = (list, as_hashable({'elem_types': [type(None), str, CHECKSUM_LIST, CHECKSUM_TUPLE, CHECKSUM_DICT]})) + +CHECKABLE_TYPES = [CHECKSUM_DICT, CHECKSUM_LIST, CHECKSUM_TUPLE, CHECKSUMS, + DEPENDENCIES, DEPENDENCY_DICT, LIST_OF_STRINGS, SANITY_CHECK_PATHS_DICT, SANITY_CHECK_PATHS_ENTRY, STRING_DICT, STRING_OR_TUPLE_LIST, STRING_OR_TUPLE_DICT, STRING_OR_TUPLE_OR_DICT_LIST, TOOLCHAIN_DICT, TUPLE_OF_STRINGS] # easy types, that can be verified with isinstance -EASY_TYPES = [string_type, bool, dict, int, list, str, tuple] +EASY_TYPES = [string_type, bool, dict, int, list, str, tuple, type(None)] # type checking is skipped for easyconfig parameters names not listed in PARAMETER_TYPES PARAMETER_TYPES = { diff --git a/test/framework/type_checking.py b/test/framework/type_checking.py index 8b7edd3215..1b6aaaa75d 100644 --- a/test/framework/type_checking.py +++ b/test/framework/type_checking.py @@ -221,10 +221,24 @@ def test_check_type_of_param_value_checksums(self): {'foo.txt': sha256_checksum1, 'bar.txt': sha256_checksum2}, # 3 alternative checksums for a single file, one match is sufficient (sha256_checksum1, sha256_checksum2, sha256_checksum3), - ] + # two alternative checksums for a single file (not to be confused by checksum-type & -value tuple) + (sha256_checksum1, md5_checksum), + # three alternative checksums for a single file of different types + (sha256_checksum1, ('md5', md5_checksum), {'foo.txt': sha256_checksum1}), + # alternative checksums in dicts are also allowed + {'foo.txt': (sha256_checksum2, sha256_checksum3), 'bar.txt': (sha256_checksum1, md5_checksum)}, + # Same but with lists -> all must match for each file + {'foo.txt': [sha256_checksum2, sha256_checksum3], 'bar.txt': [sha256_checksum1, md5_checksum]}, + ], + # None is allowed, meaning skip the checksum + [ + None, + # Also in mappings + {'foo.txt': sha256_checksum1, 'bar.txt': None}, + ], ] for inp in inputs: - self.assertEqual(check_type_of_param_value('checksums', inp), (True, inp)) + self.assertTrue(check_type_of_param_value('checksums', inp), 'Failed for ' + str(inp)) def test_check_type_of_param_value_patches(self): """Test check_type_of_param_value function for patches.""" From 8335f250dfadc6b4506824f6b8c5cabc62ecfb8c Mon Sep 17 00:00:00 2001 From: Alexander Grund Date: Wed, 4 Jan 2023 15:41:41 +0100 Subject: [PATCH 2/2] Fix `to_checksums` with `None` values in dicts and recursion Having a `'src': None` entry in a dict for checksums is as valid as having a `None` entry directly in the list. However the current function didn't handle it and crashed. Fix that as well as a few corner cases especially in the recursive case by introducing a new function for handling checksum entries in the checksum list and limiting the recursiveness. Fixes #4142 --- easybuild/framework/easyconfig/types.py | 68 +++++++++------ test/framework/type_checking.py | 105 +++++++++++++++++++++--- 2 files changed, 135 insertions(+), 38 deletions(-) diff --git a/easybuild/framework/easyconfig/types.py b/easybuild/framework/easyconfig/types.py index ba81aeaccd..6f7d4bd1a6 100644 --- a/easybuild/framework/easyconfig/types.py +++ b/easybuild/framework/easyconfig/types.py @@ -505,33 +505,51 @@ def to_dependencies(dep_list): return [to_dependency(dep) for dep in dep_list] -def to_checksums(checksums): - """Ensure correct element types for list of checksums: convert list elements to tuples.""" - res = [] - for checksum in checksums: - # each list entry can be: - # * None (indicates no checksum) - # * a string (MD5 or SHA256 checksum) - # * a tuple with 2 elements: checksum type + checksum value - # * a list of checksums (i.e. multiple checksums for a single file) - # * a dict (filename to checksum mapping) - if isinstance(checksum, string_type): - res.append(checksum) - elif isinstance(checksum, (list, tuple)): - # 2 elements + only string/int values => a checksum tuple - if len(checksum) == 2 and all(isinstance(x, (string_type, int)) for x in checksum): - res.append(tuple(checksum)) +def _to_checksum(checksum, list_level=0, allow_dict=True): + """Ensure the correct element type for each checksum in the checksum list""" + # each entry can be: + # * None (indicates no checksum) + # * a string (MD5, SHA256, ... checksum) + # * a list or tuple with 2 elements: checksum type + checksum value + # * a list or tuple of checksums (i.e. multiple checksums for a single file) + # * a dict (filename to checksum mapping) + if checksum is None or isinstance(checksum, string_type): + return checksum + elif isinstance(checksum, (list, tuple)): + if len(checksum) == 2 and isinstance(checksum[0], string_type) and isinstance(checksum[1], (string_type, int)): + # 2 elements so either: + # - a checksum tuple (2nd element string or int) + # - 2 alternative checksums (tuple) + # - 2 checksums that must each match (list) + # --> Convert to tuple only if we can exclude the 3rd case + if not isinstance(checksum[1], string_type) or list_level > 0: + return tuple(checksum) else: - res.append(to_checksums(checksum)) - elif isinstance(checksum, dict): - validated_dict = {} - for key, value in checksum.items(): - validated_dict[key] = to_checksums(value) - res.append(validated_dict) - else: - res.append(checksum) + return checksum + elif list_level < 2: + # Alternative checksums or multiple checksums for a single file + # Allowed to nest (at most) 2 times, e.g. [[[type, value]]] == [[(type, value)]] + # None is not allowed here + if any(x is None for x in checksum): + raise ValueError('Unexpected None in ' + str(checksum)) + if isinstance(checksum, tuple) or list_level > 0: + # When we already are in a tuple no further recursion is allowed -> set list_level very high + return tuple(_to_checksum(x, list_level=99, allow_dict=allow_dict) for x in checksum) + else: + return list(_to_checksum(x, list_level=list_level+1, allow_dict=allow_dict) for x in checksum) + elif isinstance(checksum, dict) and allow_dict: + return {key: _to_checksum(value, allow_dict=False) for key, value in checksum.items()} - return res + # Not returned -> Wrong type/format + raise ValueError('Unexpected type of "%s": %s' % (type(checksum), str(checksum))) + + +def to_checksums(checksums): + """Ensure correct element types for list of checksums: convert list elements to tuples.""" + try: + return [_to_checksum(checksum) for checksum in checksums] + except ValueError as e: + raise EasyBuildError('Invalid checksums: %s\n\tError: %s', checksums, e) def ensure_iterable_license_specs(specs): diff --git a/test/framework/type_checking.py b/test/framework/type_checking.py index 1b6aaaa75d..57f39bf02a 100644 --- a/test/framework/type_checking.py +++ b/test/framework/type_checking.py @@ -172,16 +172,16 @@ def test_check_type_of_param_value_sanity_check_paths(self): out = {'files': ['bin/foo', ('bin/bar', 'bin/baz')], 'dirs': [('lib', 'lib64', 'lib32')]} self.assertEqual(check_type_of_param_value('sanity_check_paths', inp, auto_convert=True), (True, out)) - def test_check_type_of_param_value_checksums(self): - """Test check_type_of_param_value function for checksums.""" + @staticmethod + def get_valid_checksums_values(): + """Return list of values valid for the 'checksums' EC parameter""" md5_checksum = 'fa618be8435447a017fd1bf2c7ae9224' sha256_checksum1 = 'fa618be8435447a017fd1bf2c7ae922d0428056cfc7449f7a8641edf76b48265' sha256_checksum2 = 'b5f9cb06105c1d2d30719db5ffb3ea67da60919fb68deaefa583deccd8813551' sha256_checksum3 = '033be54514a03e255df75c5aee8f9e672f663f93abb723444caec8fe43437bde' - # valid values for 'checksums' easyconfig parameters - inputs = [ + return [ [], # single checksum (one file) [md5_checksum], @@ -237,7 +237,11 @@ def test_check_type_of_param_value_checksums(self): {'foo.txt': sha256_checksum1, 'bar.txt': None}, ], ] - for inp in inputs: + + def test_check_type_of_param_value_checksums(self): + """Test check_type_of_param_value function for checksums.""" + + for inp in TypeCheckingTest.get_valid_checksums_values(): self.assertTrue(check_type_of_param_value('checksums', inp), 'Failed for ' + str(inp)) def test_check_type_of_param_value_patches(self): @@ -720,19 +724,94 @@ def test_to_sanity_check_paths_dict(self): def test_to_checksums(self): """Test to_checksums function.""" + # Some hand-crafted examples. Only the types are important, values are for easier verification test_inputs = [ - ['be662daa971a640e40be5c804d9d7d10'], - ['be662daa971a640e40be5c804d9d7d10', ('md5', 'be662daa971a640e40be5c804d9d7d10')], - [['be662daa971a640e40be5c804d9d7d10', ('md5', 'be662daa971a640e40be5c804d9d7d10')]], - [('md5', 'be662daa971a640e40be5c804d9d7d10')], - ['be662daa971a640e40be5c804d9d7d10', ('adler32', '0x998410035'), ('crc32', '0x1553842328'), - ('md5', 'be662daa971a640e40be5c804d9d7d10'), ('sha1', 'f618096c52244539d0e89867405f573fdb0b55b0'), - ('size', 273)], + ['checksumvalue'], + [('md5', 'md5checksumvalue')], + ['file_1_checksum', ('md5', 'file_2_md5_checksum')], + # One checksum per file, some with checksum type + [ + 'be662daa971a640e40be5c804d9d7d10', + ('adler32', '0x998410035'), + ('crc32', '0x1553842328'), + ('md5', 'be662daa971a640e40be5c804d9d7d10'), + ('sha1', 'f618096c52244539d0e89867405f573fdb0b55b0'), + # int type as the 2nd value + ('size', 273), + ], # None values should not be filtered out, but left in place - [None, 'fa618be8435447a017fd1bf2c7ae922d0428056cfc7449f7a8641edf76b48265', None], + [None, 'checksum', None], + # Alternative checksums, not to be confused with multiple checksums for a file + [('main_checksum', 'alternative_checksum')], + [('1st_of_3', '2nd_of_3', '3rd_of_3')], + # Lists must be kept: This means all must match + [['checksum_1_in_list']], + [['checksum_must_match', 'this_must_also_match']], + [['1st_of_3_list', '2nd_of_3_list', '3rd_of_3_list']], + # Alternative checksums with types + [ + (('adler32', '1st_adler'), ('crc32', '1st_crc')), + (('adler32', '2nd_adler'), ('crc32', '2nd_crc'), ('sha1', '2nd_sha')), + ], + # Entries can be dicts even containing `None` + [ + { + 'src-arm.tgz': 'arm_checksum', + 'src-x86.tgz': ('mainchecksum', 'altchecksum'), + 'src-ppc.tgz': ('mainchecksum', ('md5', 'altchecksum')), + 'git-clone.tgz': None, + }, + { + 'src': ['checksum_must_match', 'this_must_also_match'] + }, + # 2nd required checksum a dict + ['first_checksum', {'src-arm': 'arm_checksum'}] + ], ] for checksums in test_inputs: self.assertEqual(to_checksums(checksums), checksums) + # Also reuse the checksums we use in test_check_type_of_param_value_checksums + # When a checksum is valid it must not be modified + for checksums in TypeCheckingTest.get_valid_checksums_values(): + self.assertEqual(to_checksums(checksums), checksums) + + # List in list converted to tuple -> alternatives or checksum with type + checksums = [['1stchecksum', ['md5', 'md5sum']]] + checksums_expected = [['1stchecksum', ('md5', 'md5sum')]] + self.assertEqual(to_checksums(checksums), checksums_expected) + + # Error detection + wrong_nesting = [('1stchecksum', ('md5', ('md5sum', 'altmd5sum')))] + self.assertErrorRegex(EasyBuildError, 'Unexpected type.*md5', to_checksums, wrong_nesting) + correct_nesting = [('1stchecksum', ('md5', 'md5sum'), ('md5', 'altmd5sum'))] + self.assertEqual(to_checksums(correct_nesting), correct_nesting) + # YEB (YAML EC) doesn't has tuples so it uses lists instead which need to get converted + correct_nesting_yeb = [[['1stchecksum', ['md5', 'md5sum'], ['md5', 'altmd5sum']]]] + correct_nesting_yeb_conv = [[('1stchecksum', ('md5', 'md5sum'), ('md5', 'altmd5sum'))]] + self.assertEqual(to_checksums(correct_nesting_yeb), correct_nesting_yeb_conv) + self.assertEqual(to_checksums(correct_nesting_yeb_conv), correct_nesting_yeb_conv) + + unexpected_set = [('1stchecksum', {'md5', 'md5sum'})] + self.assertErrorRegex(EasyBuildError, 'Unexpected type.*md5', to_checksums, unexpected_set) + unexpected_dict = [{'src': ('md5sum', {'src': 'shasum'})}] + self.assertErrorRegex(EasyBuildError, 'Unexpected type.*shasum', to_checksums, unexpected_dict) + correct_dict = [{'src': ('md5sum', 'shasum')}] + self.assertEqual(to_checksums(correct_dict), correct_dict) + correct_dict_1 = [{'src': [['md5', 'md5sum'], ['sha', 'shasum']]}] + correct_dict_2 = [{'src': [('md5', 'md5sum'), ('sha', 'shasum')]}] + self.assertEqual(to_checksums(correct_dict_2), correct_dict_2) + self.assertEqual(to_checksums(correct_dict_1), correct_dict_2) # inner lists to tuples + + unexpected_Nones = [ + [('1stchecksum', None)], + [['1stchecksum', None]], + [{'src': ('md5sum', None)}], + [{'src': ['md5sum', None]}], + ] + self.assertErrorRegex(EasyBuildError, 'Unexpected None', to_checksums, unexpected_Nones[0]) + self.assertErrorRegex(EasyBuildError, 'Unexpected None', to_checksums, unexpected_Nones[1]) + self.assertErrorRegex(EasyBuildError, 'Unexpected None', to_checksums, unexpected_Nones[2]) + self.assertErrorRegex(EasyBuildError, 'Unexpected None', to_checksums, unexpected_Nones[3]) def test_ensure_iterable_license_specs(self): """Test ensure_iterable_license_specs function."""