Skip to content

Commit

Permalink
sources(curl): use json like output inside of custom record
Browse files Browse the repository at this point in the history
When using `--write-out` we are not using %{json} because older curl
(7.76) will write {"http_connect":000} which python cannot parse.

So we had a custom `--write-out` with `\1xc` as "record" separators
between the fields. This is a bit old-school and not very extensible
so Achilleas had the idea to still use json but "define" our own
subset via the variables that curl provides. This commit does that.
  • Loading branch information
mvo5 authored and achilleas-k committed Jul 30, 2024
1 parent e535877 commit 46db834
Show file tree
Hide file tree
Showing 2 changed files with 49 additions and 20 deletions.
37 changes: 19 additions & 18 deletions sources/org.osbuild.curl
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ up the download.

import concurrent.futures
import contextlib
import json
import os
import pathlib
import platform
Expand Down Expand Up @@ -96,6 +97,11 @@ SCHEMA = """
}]
"""

# We are not just using %{json} here because older curl (7.76) will
# write {"http_connect":000} which python cannot parse so we write our
# own json subset
CURL_WRITE_OUT = r'\{\"url\": \"%{url}\"\, \"filename_effective\": \"%{filename_effective}\", \"exitcode\": %{exitcode}, \"errormsg\": \"%{errormsg}\" \}\n'


def curl_has_parallel_downloads():
"""
Expand Down Expand Up @@ -131,9 +137,11 @@ def _quote_url(url: str) -> str:
return quoted.geturl()


def gen_curl_download_config(config_path: pathlib.Path, chksum_desc_tuple: List[Tuple[str, Dict]]):
def gen_curl_download_config(config_path: pathlib.Path, chksum_desc_tuple: List[Tuple[str, Dict]], parallel=False):
with open(config_path, "w", encoding="utf8") as fp:
# global options
if parallel:
fp.write("parallel\n")
fp.write(textwrap.dedent(f"""\
user-agent = "osbuild (Linux.{platform.machine()}; https://osbuild.org/)"
silent
Expand All @@ -142,6 +150,11 @@ def gen_curl_download_config(config_path: pathlib.Path, chksum_desc_tuple: List[
fail
location
"""))
if parallel:
fp.write(textwrap.dedent(f"""\
write-out = "{CURL_WRITE_OUT}"
"""))

proxy = os.getenv("OSBUILD_SOURCES_CURL_PROXY")
if proxy:
fp.write(f'proxy = "{proxy}"\n')
Expand Down Expand Up @@ -171,18 +184,11 @@ def gen_curl_download_config(config_path: pathlib.Path, chksum_desc_tuple: List[


def try_parse_curl_line(line):
line = line.strip()
print(line)
if not line.startswith("osbuild-dl\x1c"):
print(f"WARNING: unexpected prefix in {line}", file=sys.stderr)
try:
return json.loads(line.strip())
except Exception as e: # pylint: disable=broad-exception-caught
print(f"WARNING: cannot json parse {line} {e}", file=sys.stderr)
return None
_, url, filename, exitcode, errormsg = line.split("\x1c")
return {
"url": url,
"filename_effective": filename,
"exitcode": int(exitcode),
"errormsg": errormsg,
}


def validate_and_move_to_targetdir(tmpdir, targetdir, checksum, origin):
Expand All @@ -203,17 +209,12 @@ def validate_and_move_to_targetdir(tmpdir, targetdir, checksum, origin):

def fetch_many_new_curl(tmpdir, targetdir, dl_pairs):
curl_config_path = f"{tmpdir}/curl-config.txt"
gen_curl_download_config(curl_config_path, dl_pairs)
gen_curl_download_config(curl_config_path, dl_pairs, parallel=True)
curl_command = [
"curl",
"--config", curl_config_path,
# this adds a bunch of noise but might be nice for debug?
# "--show-error",
"--parallel",
# this will write out a "record" for each finished download
# Not using %{json} here because older curl (7.76) will write
# {"http_connect":000} which python cannot parse
"--write-out", "osbuild-dl\x1c%{url}\x1c%{filename_effective}\x1c%{exitcode}\x1cerror: %{errormsg}\n",
]
with contextlib.ExitStack() as cm:
curl_p = subprocess.Popen(curl_command, encoding="utf-8", cwd=tmpdir, stdout=subprocess.PIPE)
Expand Down
32 changes: 30 additions & 2 deletions sources/test/test_curl_source.py
Original file line number Diff line number Diff line change
Expand Up @@ -241,18 +241,46 @@ def test_curl_download_proxy(tmp_path, monkeypatch, sources_module, with_proxy):
]


def test_curl_gen_download_config(tmp_path, sources_module):
def test_curl_gen_download_config_old_curl(tmp_path, sources_module):
config_path = tmp_path / "curl-config.txt"
sources_module.gen_curl_download_config(config_path, TEST_SOURCE_PAIRS_GEN_DOWNLOAD_CONFIG)
sources_module.gen_curl_download_config(config_path, [(
# sha256("0")
"sha256:5feceb66ffc86f38d952786c6d696c79c2dbc239dd4e91b46729d73a27fb57e9",
{
"url": "http://example.com/file/0",
},
)])

assert config_path.exists()
assert config_path.read_text(encoding="utf8") == textwrap.dedent(f"""\
user-agent = "osbuild (Linux.{platform.machine()}; https://osbuild.org/)"
silent
speed-limit = 1000
connect-timeout = 30
fail
location
url = "http://example.com/file/0"
output = "sha256:5feceb66ffc86f38d952786c6d696c79c2dbc239dd4e91b46729d73a27fb57e9"
no-insecure
""")


def test_curl_gen_download_config_parallel(tmp_path, sources_module):
config_path = tmp_path / "curl-config.txt"
sources_module.gen_curl_download_config(config_path, TEST_SOURCE_PAIRS_GEN_DOWNLOAD_CONFIG, parallel=True)

assert config_path.exists()
assert config_path.read_text(encoding="utf8") == textwrap.dedent(f"""\
parallel
user-agent = "osbuild (Linux.{platform.machine()}; https://osbuild.org/)"
silent
speed-limit = 1000
connect-timeout = 30
fail
location
write-out = "{sources_module.CURL_WRITE_OUT}"
url = "http://example.com/file/0"
output = "sha256:5feceb66ffc86f38d952786c6d696c79c2dbc239dd4e91b46729d73a27fb57e9"
Expand Down

0 comments on commit 46db834

Please sign in to comment.