Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

COM-10976: Add checksum handling function in H266 test suite generator #190

Merged
merged 2 commits into from
Sep 16, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
54 changes: 19 additions & 35 deletions fluster/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,7 @@ def run_command_with_output(


def is_extractable(filepath: str) -> bool:
"""Checks is a file can be extracted from the its extension"""
"""Checks is a file can be extracted, based on its extension"""
return filepath.endswith(TARBALL_EXTS) or filepath.endswith(".zip")


Expand Down Expand Up @@ -162,43 +162,27 @@ def find_by_ext(
if not excluded:
candidates.append(filepath)

for candidate in candidates:
# Prioritize files with 'L0' in the name (for JCT-VC-SHVC)
if "L0" in candidate.upper():
return candidate
# Prioritize files with 'norpt' in the name (for JVT-AVC_V1)
# Special case only for CVSEFDFT3_Sony_E.zip and CVSE3_Sony_H.zip
if "norpt" in candidate.lower():
return candidate

# If none of the above 2 cases is fulfilled, return the first candidate
if len(candidates) > 1:
for candidate in candidates.copy():
# Prioritize files with 'L0' in the name (for JCT-VC-SHVC)
if "L0" in candidate.upper():
return candidate
# Prioritize files with 'norpt' in the name (for JVT-AVC_V1)
# Special case only for CVSEFDFT3_Sony_E.zip and CVSE3_Sony_H.zip
# Prioritize files with 'layer0' in the name (for JVET-VVC_draft6
# checksum files)
if "norpt" in candidate.lower() or "layer0" in candidate.lower():
return candidate
# Files with 'first_picture' in the name are kicked out of the list
# (for JVET-VVC_draft6 checksum files)
# Reverse logic (with not in and return) does not produce desired value
if "first_picture" in candidate.lower():
candidates.remove(candidate)

# If none of the above cases is fulfilled, return the first candidate
return candidates[0] if candidates else None


def find_by_ext_multiple(
dest_dir: str, exts: List[str], excludes: Optional[List[str]] = None
) -> List[str]:
"""Return multiple names by file extension"""
excludes = excludes or []
found_files = []

# Respect the priority for extensions
for ext in exts:
for subdir, _, files in os.walk(dest_dir):
for filename in files:
excluded = False
filepath = os.path.join(subdir, filename)
if not filepath.endswith(ext) or "__MACOSX" in filepath:
continue
for excl in excludes:
if excl in filepath:
excluded = True
break
if not excluded:
found_files.append(filepath)
return found_files


def _linux_user_data_dir(appname: str) -> str:
"""Return data directory tied to the user"""
path = os.environ.get("XDG_DATA_HOME", "")
Expand Down
2 changes: 1 addition & 1 deletion scripts/gen_av1_aom.py
Original file line number Diff line number Diff line change
Expand Up @@ -116,7 +116,7 @@ def generate(self, download, jobs):
out420 = f"{dest_path}.i420"
# Run the libaom av1 decoder to get the checksum as the .md5 files are per-frame
test_vector.result = self.decoder.decode(
dest_path, out420, test_vector.output_format, 30, False)
dest_path, out420, test_vector.output_format, 30, False, False)
os.remove(out420)

test_suite.to_json_file(output_filepath)
Expand Down
2 changes: 1 addition & 1 deletion scripts/gen_av1_chromium.py
Original file line number Diff line number Diff line change
Expand Up @@ -152,7 +152,7 @@ def generate(self, download, jobs):
out420 = f"{dest_path}.i420"
# Run the libaom av1 decoder to get the checksum as the .md5 in the JSONs are per-frame
test_vector.result = self.decoder.decode(
dest_path, out420, test_vector.output_format, 30, False)
dest_path, out420, test_vector.output_format, 30, False, False)
os.remove(out420)

test_suite.to_json_file(output_filepath)
Expand Down
1 change: 0 additions & 1 deletion scripts/gen_jct_vc.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,6 @@
)
MD5_EXTS = ("yuv_2.md5", "yuv.md5", ".md5", ".MD5", "md5.txt", "md5sum.txt")
MD5_EXCLUDES = (".bin.md5", "bit.md5")
RAW_EXTS = ("nogray.yuv", ".yuv", ".qcif")


class HREFParser(HTMLParser):
Expand Down
38 changes: 30 additions & 8 deletions scripts/gen_jvet.py
100644 → 100755
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
import argparse
from html.parser import HTMLParser
import os
import re
import sys
import urllib.request
import multiprocessing
Expand All @@ -35,12 +36,11 @@

BASE_URL = "https://www.itu.int/"
H266_URL = BASE_URL + "wftp3/av-arch/jvet-site/bitstream_exchange/VVC/draft_conformance/"
BITSTREAM_EXTS = (
".bit",
)
MD5_EXTS = ("yuv_2.md5", "yuv.md5", ".md5", "md5.txt", "md5sum.txt")
MD5_EXCLUDES = (".bin.md5", "bit.md5")
RAW_EXTS = ("nogray.yuv", ".yuv", ".qcif")
# When there is only 1 element in below variables there must be a ", " at the end.
# Otherwise utils.find_by_ext() considers each character of the string as an individual
# element in the list
BITSTREAM_EXTS = (".bit", )
MD5_EXTS = (".yuv.md5", )


class HREFParser(HTMLParser):
Expand Down Expand Up @@ -158,7 +158,7 @@ def generate(self, download, jobs):
raise key_err
except CalledProcessError as proc_err:
exceptions = {
# All below test vectors need cause ffprobe to crash
# All below test vectors cause ffprobe to crash
"MNUT_A_Nokia_3": OutputFormat.NONE,
"MNUT_B_Nokia_2": OutputFormat.NONE,
"SUBPIC_C_ERICSSON_1": OutputFormat.NONE,
Expand All @@ -169,9 +169,30 @@ def generate(self, download, jobs):
else:
raise proc_err

self._fill_checksum_h266(test_vector, dest_dir)

test_suite.to_json_file(output_filepath)
print("Generate new test suite: " + test_suite.name + ".json")

@staticmethod
def _fill_checksum_h266(test_vector, dest_dir):
checksum_file = utils.find_by_ext(dest_dir, MD5_EXTS)
if checksum_file is None:
raise Exception("MD5 not found")
with open(checksum_file, "r") as checksum_file:
regex = re.compile(rf"([a-fA-F0-9]{{32,}}).*(?:\.(yuv|rgb|gbr))?")
lines = checksum_file.readlines()
# Filter out empty lines
filtered_lines = [line.strip() for line in lines if line.strip()]
# Prefer lines matching the regex pattern
match = next((regex.match(line) for line in filtered_lines if regex.match(line)), None)
if match:
test_vector.result = match.group(1).lower()
# Assert that we have extracted a valid MD5 from the file
assert len(test_vector.result) == 32 and re.search(
r"^[a-fA-F0-9]{32}$",
test_vector.result) is not None, f"{test_vector.result} is not a valid MD5 hash"


if __name__ == "__main__":
parser = argparse.ArgumentParser()
Expand All @@ -194,6 +215,7 @@ def generate(self, download, jobs):
'JVET-VVC_draft6',
Codec.H266,
'JVET VVC draft6',
H266_URL
H266_URL,
True,
)
generator.generate(not args.skip_download, args.jobs)
2 changes: 1 addition & 1 deletion test_suites/h265/JCT-VC-3D-HEVC.json
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
{
"name": "JCT-VC-3D-HEVC",
"codec": "H.265",
"description": "JCT-VC 3D-HEVC 3D Extension",
"description": "JCT-VC HEVC 3D Extension",
"test_vectors": [
{
"name": "3DHC_C_A_HHI_3",
Expand Down
Loading
Loading