From a5e1c29e479ae369b44493e423c3975a4ba405d0 Mon Sep 17 00:00:00 2001 From: goose-life <32566441+goose-life@users.noreply.github.com> Date: Thu, 26 Sep 2024 16:28:28 +0200 Subject: [PATCH] fix all get() logic -- only use for has_trusted_url (new field) --- peachjam/adapters/indigo.py | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/peachjam/adapters/indigo.py b/peachjam/adapters/indigo.py index f530b1317..44e9144ea 100644 --- a/peachjam/adapters/indigo.py +++ b/peachjam/adapters/indigo.py @@ -515,6 +515,11 @@ def download_source_file(self, url, doc, title): }, ) + def get_size_from_url(self, url): + logger.info(" Getting the file size ...") + r = self.client_get(url) + return len(r.content) + def create_publication_file(self, publication_document, doc, title, stub=False): from peachjam.models import PublicationFile @@ -558,12 +563,8 @@ def create_publication_file(self, publication_document, doc, title, stub=False): ) if publication_document.get("has_trusted_url"): logger.info(f" Using publication file from trusted URL {url}") - mimetype = publication_document.get("mime_type", "application/pdf") - size = publication_document.get("size") - if not size: - logger.info(" Getting the file size ...") - r = self.client_get(url) - size = len(r.content) + mimetype = publication_document["mime_type"] or "application/pdf" + size = publication_document["size"] or self.get_size_from_url(url) logger.info(f" Size is {size}") PublicationFile.objects.update_or_create( document=doc, @@ -581,8 +582,8 @@ def create_publication_file(self, publication_document, doc, title, stub=False): with NamedTemporaryFile() as f: r = self.client_get(url) f.write(r.content) - mimetype = publication_document.get( - "mime_type", magic.from_file(f.name, mime=True) + mimetype = publication_document["mime_type"] or magic.from_file( + f.name, mime=True ) file = File(f, name=filename) PublicationFile.objects.update_or_create(