From 5aad91509031556ee9c816b8246ccd92943ec78e Mon Sep 17 00:00:00 2001
From: rsanchez <149661416+rsanchez87@users.noreply.github.com>
Date: Tue, 23 Jul 2024 11:31:09 +0200
Subject: [PATCH] Split test suites for H265 and H266 (#181)

* Split test suites for H265 and H266

* fixup! Resolve comments

* fixup! fixup! Resolve comments (second)
---
 README.md                                    |   2 +-
 scripts/{gen_jvet_jctvc.py => gen_jct_vc.py} |  35 ++--
 scripts/gen_jvet.py                          | 177 +++++++++++++++++++
 scripts/gen_jvt.py                           |   3 +-
 4 files changed, 190 insertions(+), 27 deletions(-)
 rename scripts/{gen_jvet_jctvc.py => gen_jct_vc.py} (91%)
 mode change 100755 => 100644
 create mode 100644 scripts/gen_jvet.py

diff --git a/README.md b/README.md
index ebdc514..4c2f577 100644
--- a/README.md
+++ b/README.md
@@ -458,7 +458,7 @@ Check out the JSON format they follow in the [test_suites](test_suites)
 directory. Add a new json file within, Fluster will automatically pick it
 up.
 
-There is also a [generator script (H.265, H.266)](scripts/gen_jvet_jctvc.py) and a [generator script (H.264)](scripts/gen_jvt.py) for the [conformance
+There is also a [generator script (H.264)](scripts/gen_jvt.py), [generator script (H.265)](scripts/gen_jct_vc.py), and a [generator script (H.266)](scripts/gen_jvet.py) for the [conformance
 test suites](#test_suites) that you can use as a base to generate automatically
 new ones.
 
diff --git a/scripts/gen_jvet_jctvc.py b/scripts/gen_jct_vc.py
old mode 100755
new mode 100644
similarity index 91%
rename from scripts/gen_jvet_jctvc.py
rename to scripts/gen_jct_vc.py
index a353138..8ad6735
--- a/scripts/gen_jvet_jctvc.py
+++ b/scripts/gen_jct_vc.py
@@ -1,9 +1,10 @@
 #!/usr/bin/env python3
 
 # Fluster - testing framework for decoders conformance
-# Copyright (C) 2020, Fluendo, S.A.
+# Copyright (C) 2020-2024, Fluendo, S.A.
 #  Author: Pablo Marcos Oltra <pmarcos@fluendo.com>, Fluendo, S.A.
 #  Author: Andoni Morales Alastruey <amorales@fluendo.com>, Fluendo, S.A.
+#  Author: Ruben Sanchez Sanchez <rsanchez@fluendo.com>, Fluendo, S.A.
 #
 # This library is free software; you can redistribute it and/or
 # modify it under the terms of the GNU Lesser General Public License
@@ -35,7 +36,6 @@
 # pylint: enable=wrong-import-position
 
 BASE_URL = "https://www.itu.int/"
-H266_URL = BASE_URL + "wftp3/av-arch/jvet-site/bitstream_exchange/VVC/draft_conformance/"
 H265_URL = BASE_URL + "wftp3/av-arch/jctvc-site/bitstream_exchange/draft_conformance/"
 BITSTREAM_EXTS = (
     ".bin",
@@ -67,7 +67,7 @@ def handle_starttag(self, tag, attrs):
                     self.links.append(base_url + value)
 
 
-class JVETJCTVCGenerator:
+class JCTVCGenerator:
     """Generates a test suite from the conformance bitstreams"""
 
     def __init__(
@@ -105,10 +105,7 @@ def generate(self, download, jobs):
             hparser.feed(data)
 
         for url in hparser.links[1:]:
-            # The first item in the AVCv1 list is a readme file
-            if "00readme_H" in url:
-                continue
-            elif "replaced" in url:
+            if "replaced" in url:
                 # This is in HEVC-SHVC, we don't want that.
                 continue
             file_url = os.path.basename(url)
@@ -181,8 +178,7 @@ def generate(self, download, jobs):
                     else:
                         raise e
 
-            if self.codec == Codec.H265:
-                self._fill_checksum_h265(test_vector, dest_dir)
+            self._fill_checksum_h265(test_vector, dest_dir)
 
         test_suite.to_json_file(output_filepath)
         print("Generate new test suite: " + test_suite.name + ".json")
@@ -214,9 +210,9 @@ def _fill_checksum_h265(self, test_vector, dest_dir):
             lines = checksum_file.readlines()
             # If we have a line like examples 4,5,6 anywhere in the file, prefer
             # that.
-            if self.codec == Codec.H265 and any((match := regex.match(line)) for line in lines):
+            if any((match := regex.match(line)) for line in lines):
                 test_vector.result = match.group(1)[:32].lower()
-            elif self.codec == Codec.H265 and self.name == "RExt" or self.name == "MV-HEVC" or self.name == "SCC":
+            elif self.name == "RExt" or self.name == "MV-HEVC" or self.name == "SCC":
                 # If we can't match with the regex, note that these usually come
                 # with the checksum at the end
                 test_vector.result = lines[-1].split(" ")[0].split("\n")[0].lower()
@@ -250,7 +246,7 @@ def _fill_checksum_h265(self, test_vector, dest_dir):
         default=2 * multiprocessing.cpu_count(),
     )
     args = parser.parse_args()
-    generator = JVETJCTVCGenerator(
+    generator = JCTVCGenerator(
         "HEVC_v1",
         "JCT-VC-HEVC_V1",
         Codec.H265,
@@ -259,7 +255,7 @@ def _fill_checksum_h265(self, test_vector, dest_dir):
     )
     generator.generate(not args.skip_download, args.jobs)
 
-    generator = JVETJCTVCGenerator(
+    generator = JCTVCGenerator(
         "RExt",
         "JCT-VC-RExt",
         Codec.H265,
@@ -269,7 +265,7 @@ def _fill_checksum_h265(self, test_vector, dest_dir):
     )
     generator.generate(not args.skip_download, args.jobs)
 
-    generator = JVETJCTVCGenerator(
+    generator = JCTVCGenerator(
         "SCC",
         "JCT-VC-SCC",
         Codec.H265,
@@ -279,7 +275,7 @@ def _fill_checksum_h265(self, test_vector, dest_dir):
     )
     generator.generate(not args.skip_download, args.jobs)
 
-    generator = JVETJCTVCGenerator(
+    generator = JCTVCGenerator(
         "MV-HEVC",
         "JCT-VC-MV-HEVC",
         Codec.H265,
@@ -288,12 +284,3 @@ def _fill_checksum_h265(self, test_vector, dest_dir):
         True
     )
     generator.generate(not args.skip_download, args.jobs)
-
-    generator = JVETJCTVCGenerator(
-        'draft6',
-        'JVET-VVC_draft6',
-        Codec.H266,
-        'JVET VVC draft6',
-        H266_URL
-    )
-    generator.generate(not args.skip_download, args.jobs)
diff --git a/scripts/gen_jvet.py b/scripts/gen_jvet.py
new file mode 100644
index 0000000..69dd170
--- /dev/null
+++ b/scripts/gen_jvet.py
@@ -0,0 +1,177 @@
+#!/usr/bin/env python3
+
+# Fluster - testing framework for decoders conformance
+# Copyright (C) 2024, Fluendo, S.A.
+#  Author: Ruben Sanchez Sanchez <rsanchez@fluendo.com>, Fluendo, S.A.
+#
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of the GNU Lesser General Public License
+# as published by the Free Software Foundation, either version 3
+# of the License, or (at your option) any later version.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library. If not, see <https://www.gnu.org/licenses/>.
+
+import argparse
+from html.parser import HTMLParser
+import os
+import sys
+import urllib.request
+import multiprocessing
+
+# pylint: disable=wrong-import-position
+sys.path.append(os.path.join(os.path.dirname(__file__), ".."))
+from fluster import utils
+from fluster.codec import Codec, OutputFormat
+from fluster.test_suite import TestSuite, TestVector
+
+# pylint: enable=wrong-import-position
+
+BASE_URL = "https://www.itu.int/"
+H266_URL = BASE_URL + "wftp3/av-arch/jvet-site/bitstream_exchange/VVC/draft_conformance/"
+BITSTREAM_EXTS = (
+    ".bit",
+)
+MD5_EXTS = ("yuv_2.md5", "yuv.md5", ".md5", "md5.txt", "md5sum.txt")
+MD5_EXCLUDES = (".bin.md5", "bit.md5")
+RAW_EXTS = ("nogray.yuv", ".yuv", ".qcif")
+
+
+class HREFParser(HTMLParser):
+    """Custom parser to find href links"""
+
+    def __init__(self):
+        self.links = []
+        super().__init__()
+
+    def error(self, message):
+        print(message)
+
+    def handle_starttag(self, tag, attrs):
+        # Only parse the 'anchor' tag.
+        if tag == "a":
+            # Check the list of defined attributes.
+            for name, value in attrs:
+                # If href is defined, print it.
+                if name == "href":
+                    base_url = BASE_URL if BASE_URL[-1] != "/" else BASE_URL[0:-1]
+                    self.links.append(base_url + value)
+
+
+class JVETGenerator:
+    """Generates a test suite from the conformance bitstreams"""
+
+    def __init__(
+        self,
+        name: str,
+        suite_name: str,
+        codec: Codec,
+        description: str,
+        site: str,
+        use_ffprobe: bool = False
+    ):
+        self.name = name
+        self.suite_name = suite_name
+        self.codec = codec
+        self.description = description
+        self.site = site
+        self.use_ffprobe = use_ffprobe
+
+    def generate(self, download, jobs):
+        """Generates the test suite and saves it to a file"""
+        output_filepath = os.path.join(self.suite_name + ".json")
+        test_suite = TestSuite(
+            output_filepath,
+            "resources",
+            self.suite_name,
+            self.codec,
+            self.description,
+            dict(),
+        )
+
+        hparser = HREFParser()
+        print(f"Download list of bitstreams from {self.site + self.name}")
+        with urllib.request.urlopen(self.site + self.name) as resp:
+            data = str(resp.read())
+            hparser.feed(data)
+
+        for url in hparser.links[1:]:
+            file_url = os.path.basename(url)
+            name = os.path.splitext(file_url)[0]
+            file_input = f"{name}.bin"
+            test_vector = TestVector(name, url, "__skip__", file_input, OutputFormat.YUV420P, "")
+            test_suite.test_vectors[name] = test_vector
+
+        if download:
+            test_suite.download(
+                jobs=jobs,
+                out_dir=test_suite.resources_dir,
+                verify=False,
+                extract_all=True,
+                keep_file=True,
+            )
+
+        for test_vector in test_suite.test_vectors.values():
+            dest_dir = os.path.join(
+                test_suite.resources_dir, test_suite.name, test_vector.name
+            )
+            dest_path = os.path.join(dest_dir, os.path.basename(test_vector.source))
+            test_vector.input_file = utils.find_by_ext(dest_dir, BITSTREAM_EXTS)
+            absolute_input_path = test_vector.input_file
+            test_vector.input_file = test_vector.input_file.replace(
+                os.path.join(
+                    test_suite.resources_dir, test_suite.name, test_vector.name
+                )
+                + os.sep,
+                "",
+            )
+            if not test_vector.input_file:
+                raise Exception(f"Bitstream file not found in {dest_dir}")
+            test_vector.source_checksum = utils.file_checksum(dest_path)
+            if self.use_ffprobe:
+                ffprobe = utils.normalize_binary_cmd('ffprobe')
+                command = [ffprobe, '-v', 'error', '-select_streams', 'v:0',
+                           '-show_entries', 'stream=pix_fmt', '-of',
+                           'default=nokey=1:noprint_wrappers=1',
+                           absolute_input_path]
+
+                result = utils.run_command_with_output(command).splitlines()
+                pix_fmt = result[0]
+                try:
+                    test_vector.output_format = OutputFormat[pix_fmt.upper()]
+                except KeyError as e:
+                    raise e
+
+        test_suite.to_json_file(output_filepath)
+        print("Generate new test suite: " + test_suite.name + ".json")
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "--skip-download",
+        help="skip extracting tarball",
+        action="store_true",
+        default=False,
+    )
+    parser.add_argument(
+        "-j",
+        "--jobs",
+        help="number of parallel jobs to use. 2x logical cores by default",
+        type=int,
+        default=2 * multiprocessing.cpu_count(),
+    )
+    args = parser.parse_args()
+    generator = JVETGenerator(
+        'draft6',
+        'JVET-VVC_draft6',
+        Codec.H266,
+        'JVET VVC draft6',
+        H266_URL
+    )
+    generator.generate(not args.skip_download, args.jobs)
diff --git a/scripts/gen_jvt.py b/scripts/gen_jvt.py
index cc7d2a3..6efd861 100755
--- a/scripts/gen_jvt.py
+++ b/scripts/gen_jvt.py
@@ -1,7 +1,7 @@
 #!/usr/bin/env python3
 
 # Fluster - testing framework for decoders conformance
-# Copyright (C) 2020, Fluendo, S.A.
+# Copyright (C) 2020-2024, Fluendo, S.A.
 #  Author: Ruben Sanchez Sanchez <rsanchez@fluendo.com>, Fluendo, S.A.
 #
 # This library is free software; you can redistribute it and/or
@@ -37,7 +37,6 @@
 BASE_URL = "https://www.itu.int/"
 H264_URL = BASE_URL + "wftp3/av-arch/jvt-site/draft_conformance/"
 BITSTREAM_EXTS = (
-    ".bit",
     ".264",
     ".h264",
     ".jsv",