Skip to content

Commit

Permalink
Stream GET request for HttpFetcher download, and write in 30 MiB chun…
Browse files Browse the repository at this point in the history
…ks, or declared chunk encoding to reduce RAM usage
  • Loading branch information
mtalexan authored and dustymabe committed Sep 7, 2023
1 parent 86943ca commit 52774c1
Showing 1 changed file with 18 additions and 4 deletions.
22 changes: 18 additions & 4 deletions src/cmd-buildfetch
Original file line number Diff line number Diff line change
Expand Up @@ -226,12 +226,26 @@ class HTTPFetcher(Fetcher):

@retry(stop=retry_stop, retry=retry_requests_exception, before_sleep=retry_callback)
def fetch_impl(self, url, dest):
# notice we don't use `stream=True` here; the stuff we're fetching for
# now is super small
with requests.get(url) as r:
with requests.get(url, stream=True) as r:
r.raise_for_status()
with open(dest, mode='wb') as f:
f.write(r.content)
# Stream file data from the network to the file in these size chunks.
# 30 MiB is somewhat arbitrary but should be easily supported on most systems
# without transfer slowdown.
max_chunk_size = 30 * 1024 * 1024

# If the HTTP headers have encoded the file transfer as chunks already, respect those instead
# of our hardcoded max size.
if 'chunked' in r.headers.get('transfer-encoding', list()):
max_chunk_size = None

# With stream=True above, read data from the network and write it to the file in chunks
# rather than trying to put it all in RAM and then write it all to disk.
# For large ociarchive files on lower-RAM systems, this can cause a crash, and the performance
# trade-off for chunking it is usually negligible unless the files are extra huge, the disk IO cache is
# very small, and the network pipe is very large.
for chunk in r.iter_content(chunk_size=max_chunk_size):
f.write(chunk)

@retry(stop=retry_stop, retry=retry_requests_exception, before_sleep=retry_callback)
def exists_impl(self, url):
Expand Down

0 comments on commit 52774c1

Please sign in to comment.