diff --git a/CHANGELOG b/CHANGELOG index 804689f..e0ccc21 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -1,6 +1,7 @@ 0.16.2 - fix: convert resource file extensions to lower-case (#81) - fix: escape html characters in logging panel + - fix: fall-back to resource size estimation via S3 download header - enh: improve error message when registered dataset ID does not exist (#82) - setup: bump dclab to 0.62.7 0.16.1 diff --git a/dcoraid/download/job.py b/dcoraid/download/job.py index 1bbf1f8..9a2a2e0 100644 --- a/dcoraid/download/job.py +++ b/dcoraid/download/job.py @@ -114,7 +114,26 @@ def from_download_job_state(dj_state, api): @property def file_size(self): - return self.get_resource_dict()["size"] + size = None + if not self.condensed: + # Try to get the file size from the resource dictionary. + # Note that the file size is set only after upload. So, if you + # are downloading immediately after upload, the "size" + # attribute might not be set yet. + size = self.get_resource_dict()["size"] + if size is None: + # Fetch the file size from S3. + # This is the only option for condensed downloads (because they + # are not a resource) and the fall-back for actual resources. + url = self.get_resource_url() + req = requests.get(url, + stream=True, + headers=self.api.headers, + verify=self.api.verify, + timeout=29.9, + ) + size = int(req.headers["Content-length"]) + return size @property def id(self): @@ -323,19 +342,8 @@ def task_download_resource(self): # set-up temporary path self.path_temp = self.path.with_name(self.path.name + "~") # check for disk space - if self.condensed: - # get the size from the server - url = self.get_resource_url() - req = requests.get(url, - stream=True, - headers=self.api.headers, - verify=self.api.verify, - timeout=29.9, - ) - size = int(req.headers["Content-length"]) - else: - size = self.get_resource_dict()["size"] - if shutil.disk_usage(self.path_temp.parent).free < size: + if shutil.disk_usage( + self.path_temp.parent).free < self.file_size: # there is not enough space on disk for the download self.set_state("wait-disk") time.sleep(1)