Skip to content

Commit

Permalink
COM-10976: Add checksum handling function in H266 test suite generator (
Browse files Browse the repository at this point in the history
#190)

* COM-10976: Add checksum handling function in H266 test suite generator

- tweaked utils.find_by_ext()
- added _fill_checksum_h266() in gen_jvet.py
- minor corrections throughout
  • Loading branch information
mdimopoulos authored Sep 16, 2024
1 parent bcadfb9 commit f7d69e3
Show file tree
Hide file tree
Showing 7 changed files with 102 additions and 97 deletions.
54 changes: 19 additions & 35 deletions fluster/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,7 @@ def run_command_with_output(


def is_extractable(filepath: str) -> bool:
"""Checks is a file can be extracted from the its extension"""
"""Checks is a file can be extracted, based on its extension"""
return filepath.endswith(TARBALL_EXTS) or filepath.endswith(".zip")


Expand Down Expand Up @@ -162,43 +162,27 @@ def find_by_ext(
if not excluded:
candidates.append(filepath)

for candidate in candidates:
# Prioritize files with 'L0' in the name (for JCT-VC-SHVC)
if "L0" in candidate.upper():
return candidate
# Prioritize files with 'norpt' in the name (for JVT-AVC_V1)
# Special case only for CVSEFDFT3_Sony_E.zip and CVSE3_Sony_H.zip
if "norpt" in candidate.lower():
return candidate

# If none of the above 2 cases is fulfilled, return the first candidate
if len(candidates) > 1:
for candidate in candidates.copy():
# Prioritize files with 'L0' in the name (for JCT-VC-SHVC)
if "L0" in candidate.upper():
return candidate
# Prioritize files with 'norpt' in the name (for JVT-AVC_V1)
# Special case only for CVSEFDFT3_Sony_E.zip and CVSE3_Sony_H.zip
# Prioritize files with 'layer0' in the name (for JVET-VVC_draft6
# checksum files)
if "norpt" in candidate.lower() or "layer0" in candidate.lower():
return candidate
# Files with 'first_picture' in the name are kicked out of the list
# (for JVET-VVC_draft6 checksum files)
# Reverse logic (with not in and return) does not produce desired value
if "first_picture" in candidate.lower():
candidates.remove(candidate)

# If none of the above cases is fulfilled, return the first candidate
return candidates[0] if candidates else None


def find_by_ext_multiple(
dest_dir: str, exts: List[str], excludes: Optional[List[str]] = None
) -> List[str]:
"""Return multiple names by file extension"""
excludes = excludes or []
found_files = []

# Respect the priority for extensions
for ext in exts:
for subdir, _, files in os.walk(dest_dir):
for filename in files:
excluded = False
filepath = os.path.join(subdir, filename)
if not filepath.endswith(ext) or "__MACOSX" in filepath:
continue
for excl in excludes:
if excl in filepath:
excluded = True
break
if not excluded:
found_files.append(filepath)
return found_files


def _linux_user_data_dir(appname: str) -> str:
"""Return data directory tied to the user"""
path = os.environ.get("XDG_DATA_HOME", "")
Expand Down
2 changes: 1 addition & 1 deletion scripts/gen_av1_aom.py
Original file line number Diff line number Diff line change
Expand Up @@ -116,7 +116,7 @@ def generate(self, download, jobs):
out420 = f"{dest_path}.i420"
# Run the libaom av1 decoder to get the checksum as the .md5 files are per-frame
test_vector.result = self.decoder.decode(
dest_path, out420, test_vector.output_format, 30, False)
dest_path, out420, test_vector.output_format, 30, False, False)
os.remove(out420)

test_suite.to_json_file(output_filepath)
Expand Down
2 changes: 1 addition & 1 deletion scripts/gen_av1_chromium.py
Original file line number Diff line number Diff line change
Expand Up @@ -152,7 +152,7 @@ def generate(self, download, jobs):
out420 = f"{dest_path}.i420"
# Run the libaom av1 decoder to get the checksum as the .md5 in the JSONs are per-frame
test_vector.result = self.decoder.decode(
dest_path, out420, test_vector.output_format, 30, False)
dest_path, out420, test_vector.output_format, 30, False, False)
os.remove(out420)

test_suite.to_json_file(output_filepath)
Expand Down
1 change: 0 additions & 1 deletion scripts/gen_jct_vc.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,6 @@
)
MD5_EXTS = ("yuv_2.md5", "yuv.md5", ".md5", ".MD5", "md5.txt", "md5sum.txt")
MD5_EXCLUDES = (".bin.md5", "bit.md5")
RAW_EXTS = ("nogray.yuv", ".yuv", ".qcif")


class HREFParser(HTMLParser):
Expand Down
38 changes: 30 additions & 8 deletions scripts/gen_jvet.py
100644 → 100755
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
import argparse
from html.parser import HTMLParser
import os
import re
import sys
import urllib.request
import multiprocessing
Expand All @@ -35,12 +36,11 @@

BASE_URL = "https://www.itu.int/"
H266_URL = BASE_URL + "wftp3/av-arch/jvet-site/bitstream_exchange/VVC/draft_conformance/"
BITSTREAM_EXTS = (
".bit",
)
MD5_EXTS = ("yuv_2.md5", "yuv.md5", ".md5", "md5.txt", "md5sum.txt")
MD5_EXCLUDES = (".bin.md5", "bit.md5")
RAW_EXTS = ("nogray.yuv", ".yuv", ".qcif")
# When there is only 1 element in below variables there must be a ", " at the end.
# Otherwise utils.find_by_ext() considers each character of the string as an individual
# element in the list
BITSTREAM_EXTS = (".bit", )
MD5_EXTS = (".yuv.md5", )


class HREFParser(HTMLParser):
Expand Down Expand Up @@ -158,7 +158,7 @@ def generate(self, download, jobs):
raise key_err
except CalledProcessError as proc_err:
exceptions = {
# All below test vectors need cause ffprobe to crash
# All below test vectors cause ffprobe to crash
"MNUT_A_Nokia_3": OutputFormat.NONE,
"MNUT_B_Nokia_2": OutputFormat.NONE,
"SUBPIC_C_ERICSSON_1": OutputFormat.NONE,
Expand All @@ -169,9 +169,30 @@ def generate(self, download, jobs):
else:
raise proc_err

self._fill_checksum_h266(test_vector, dest_dir)

test_suite.to_json_file(output_filepath)
print("Generate new test suite: " + test_suite.name + ".json")

@staticmethod
def _fill_checksum_h266(test_vector, dest_dir):
checksum_file = utils.find_by_ext(dest_dir, MD5_EXTS)
if checksum_file is None:
raise Exception("MD5 not found")
with open(checksum_file, "r") as checksum_file:
regex = re.compile(rf"([a-fA-F0-9]{{32,}}).*(?:\.(yuv|rgb|gbr))?")
lines = checksum_file.readlines()
# Filter out empty lines
filtered_lines = [line.strip() for line in lines if line.strip()]
# Prefer lines matching the regex pattern
match = next((regex.match(line) for line in filtered_lines if regex.match(line)), None)
if match:
test_vector.result = match.group(1).lower()
# Assert that we have extracted a valid MD5 from the file
assert len(test_vector.result) == 32 and re.search(
r"^[a-fA-F0-9]{32}$",
test_vector.result) is not None, f"{test_vector.result} is not a valid MD5 hash"


if __name__ == "__main__":
parser = argparse.ArgumentParser()
Expand All @@ -194,6 +215,7 @@ def generate(self, download, jobs):
'JVET-VVC_draft6',
Codec.H266,
'JVET VVC draft6',
H266_URL
H266_URL,
True,
)
generator.generate(not args.skip_download, args.jobs)
2 changes: 1 addition & 1 deletion test_suites/h265/JCT-VC-3D-HEVC.json
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
{
"name": "JCT-VC-3D-HEVC",
"codec": "H.265",
"description": "JCT-VC 3D-HEVC 3D Extension",
"description": "JCT-VC HEVC 3D Extension",
"test_vectors": [
{
"name": "3DHC_C_A_HHI_3",
Expand Down
Loading

0 comments on commit f7d69e3

Please sign in to comment.