COM-10976: Add checksum handling function in H266 test suite generator (

#190) * COM-10976: Add checksum handling function in H266 test suite generator - tweaked utils.find_by_ext() - added _fill_checksum_h266() in gen_jvet.py - minor corrections throughout
fluendo · Sep 16, 2024 · f7d69e3 · f7d69e3
1 parent bcadfb9
commit f7d69e3
Show file tree

Hide file tree

Showing 7 changed files with 102 additions and 97 deletions.
diff --git a/fluster/utils.py b/fluster/utils.py
@@ -104,7 +104,7 @@ def run_command_with_output(
 
 
 def is_extractable(filepath: str) -> bool:
-    """Checks is a file can be extracted from the its extension"""
+    """Checks is a file can be extracted, based on its extension"""
     return filepath.endswith(TARBALL_EXTS) or filepath.endswith(".zip")
 
 
@@ -162,43 +162,27 @@ def find_by_ext(
                 if not excluded:
                     candidates.append(filepath)
 
-    for candidate in candidates:
-        # Prioritize files with 'L0' in the name (for JCT-VC-SHVC)
-        if "L0" in candidate.upper():
-            return candidate
-        # Prioritize files with 'norpt' in the name (for JVT-AVC_V1)
-        # Special case only for CVSEFDFT3_Sony_E.zip and CVSE3_Sony_H.zip
-        if "norpt" in candidate.lower():
-            return candidate
-
-    # If none of the above 2 cases is fulfilled, return the first candidate
+    if len(candidates) > 1:
+        for candidate in candidates.copy():
+            # Prioritize files with 'L0' in the name (for JCT-VC-SHVC)
+            if "L0" in candidate.upper():
+                return candidate
+            # Prioritize files with 'norpt' in the name (for JVT-AVC_V1)
+            # Special case only for CVSEFDFT3_Sony_E.zip and CVSE3_Sony_H.zip
+            # Prioritize files with 'layer0' in the name (for JVET-VVC_draft6
+            # checksum files)
+            if "norpt" in candidate.lower() or "layer0" in candidate.lower():
+                return candidate
+            # Files with 'first_picture' in the name are kicked out of the list
+            # (for JVET-VVC_draft6 checksum files)
+            # Reverse logic (with not in and return) does not produce desired value
+            if "first_picture" in candidate.lower():
+                candidates.remove(candidate)
+
+    # If none of the above cases is fulfilled, return the first candidate
     return candidates[0] if candidates else None
 
 
-def find_by_ext_multiple(
-    dest_dir: str, exts: List[str], excludes: Optional[List[str]] = None
-) -> List[str]:
-    """Return multiple names by file extension"""
-    excludes = excludes or []
-    found_files = []
-
-    # Respect the priority for extensions
-    for ext in exts:
-        for subdir, _, files in os.walk(dest_dir):
-            for filename in files:
-                excluded = False
-                filepath = os.path.join(subdir, filename)
-                if not filepath.endswith(ext) or "__MACOSX" in filepath:
-                    continue
-                for excl in excludes:
-                    if excl in filepath:
-                        excluded = True
-                        break
-                if not excluded:
-                    found_files.append(filepath)
-    return found_files
-
-
 def _linux_user_data_dir(appname: str) -> str:
     """Return data directory tied to the user"""
     path = os.environ.get("XDG_DATA_HOME", "")

diff --git a/scripts/gen_av1_aom.py b/scripts/gen_av1_aom.py
@@ -116,7 +116,7 @@ def generate(self, download, jobs):
             out420 = f"{dest_path}.i420"
             # Run the libaom av1 decoder to get the checksum as the .md5 files are per-frame
             test_vector.result = self.decoder.decode(
-                dest_path, out420, test_vector.output_format, 30, False)
+                dest_path, out420, test_vector.output_format, 30, False, False)
             os.remove(out420)
 
         test_suite.to_json_file(output_filepath)

diff --git a/scripts/gen_av1_chromium.py b/scripts/gen_av1_chromium.py
@@ -152,7 +152,7 @@ def generate(self, download, jobs):
             out420 = f"{dest_path}.i420"
             # Run the libaom av1 decoder to get the checksum as the .md5 in the JSONs are per-frame
             test_vector.result = self.decoder.decode(
-                dest_path, out420, test_vector.output_format, 30, False)
+                dest_path, out420, test_vector.output_format, 30, False, False)
             os.remove(out420)
 
         test_suite.to_json_file(output_filepath)

diff --git a/scripts/gen_jct_vc.py b/scripts/gen_jct_vc.py
@@ -43,7 +43,6 @@
 )
 MD5_EXTS = ("yuv_2.md5", "yuv.md5", ".md5", ".MD5", "md5.txt", "md5sum.txt")
 MD5_EXCLUDES = (".bin.md5", "bit.md5")
-RAW_EXTS = ("nogray.yuv", ".yuv", ".qcif")
 
 
 class HREFParser(HTMLParser):

diff --git a/scripts/gen_jvet.py b/scripts/gen_jvet.py
@@ -20,6 +20,7 @@
 import argparse
 from html.parser import HTMLParser
 import os
+import re
 import sys
 import urllib.request
 import multiprocessing
@@ -35,12 +36,11 @@
 
 BASE_URL = "https://www.itu.int/"
 H266_URL = BASE_URL + "wftp3/av-arch/jvet-site/bitstream_exchange/VVC/draft_conformance/"
-BITSTREAM_EXTS = (
-    ".bit",
-)
-MD5_EXTS = ("yuv_2.md5", "yuv.md5", ".md5", "md5.txt", "md5sum.txt")
-MD5_EXCLUDES = (".bin.md5", "bit.md5")
-RAW_EXTS = ("nogray.yuv", ".yuv", ".qcif")
+# When there is only 1 element in below variables there must be a ", " at the end.
+# Otherwise utils.find_by_ext() considers each character of the string as an individual
+# element in the list
+BITSTREAM_EXTS = (".bit", )
+MD5_EXTS = (".yuv.md5", )
 
 
 class HREFParser(HTMLParser):
@@ -158,7 +158,7 @@ def generate(self, download, jobs):
                         raise key_err
                 except CalledProcessError as proc_err:
                     exceptions = {
-                        # All below test vectors need cause ffprobe to crash
+                        # All below test vectors cause ffprobe to crash
                         "MNUT_A_Nokia_3": OutputFormat.NONE,
                         "MNUT_B_Nokia_2": OutputFormat.NONE,
                         "SUBPIC_C_ERICSSON_1": OutputFormat.NONE,
@@ -169,9 +169,30 @@ def generate(self, download, jobs):
                     else:
                         raise proc_err
 
+            self._fill_checksum_h266(test_vector, dest_dir)
+
         test_suite.to_json_file(output_filepath)
         print("Generate new test suite: " + test_suite.name + ".json")
 
+    @staticmethod
+    def _fill_checksum_h266(test_vector, dest_dir):
+        checksum_file = utils.find_by_ext(dest_dir, MD5_EXTS)
+        if checksum_file is None:
+            raise Exception("MD5 not found")
+        with open(checksum_file, "r") as checksum_file:
+            regex = re.compile(rf"([a-fA-F0-9]{{32,}}).*(?:\.(yuv|rgb|gbr))?")
+            lines = checksum_file.readlines()
+            # Filter out empty lines
+            filtered_lines = [line.strip() for line in lines if line.strip()]
+            # Prefer lines matching the regex pattern
+            match = next((regex.match(line) for line in filtered_lines if regex.match(line)), None)
+            if match:
+                test_vector.result = match.group(1).lower()
+            # Assert that we have extracted a valid MD5 from the file
+            assert len(test_vector.result) == 32 and re.search(
+                r"^[a-fA-F0-9]{32}$",
+                test_vector.result) is not None, f"{test_vector.result} is not a valid MD5 hash"
+
 
 if __name__ == "__main__":
     parser = argparse.ArgumentParser()
@@ -194,6 +215,7 @@ def generate(self, download, jobs):
         'JVET-VVC_draft6',
         Codec.H266,
         'JVET VVC draft6',
-        H266_URL
+        H266_URL,
+        True,
     )
     generator.generate(not args.skip_download, args.jobs)
diff --git a/test_suites/h265/JCT-VC-3D-HEVC.json b/test_suites/h265/JCT-VC-3D-HEVC.json
@@ -1,7 +1,7 @@
 {
     "name": "JCT-VC-3D-HEVC",
     "codec": "H.265",
-    "description": "JCT-VC 3D-HEVC 3D Extension",
+    "description": "JCT-VC HEVC 3D Extension",
     "test_vectors": [
         {
             "name": "3DHC_C_A_HHI_3",