From 7caa34076b59f616ae042cd272b39f480cad5ec6 Mon Sep 17 00:00:00 2001 From: Wilson Gaturu Date: Thu, 7 Sep 2023 13:41:19 +0300 Subject: [PATCH 1/5] adds taxonomy topics in indigo adapter --- peachjam/adapters/adapters.py | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/peachjam/adapters/adapters.py b/peachjam/adapters/adapters.py index da1b27656..29022c6a7 100644 --- a/peachjam/adapters/adapters.py +++ b/peachjam/adapters/adapters.py @@ -18,12 +18,14 @@ Author, CoreDocument, DocumentNature, + DocumentTopic, GenericDocument, LegalInstrument, Legislation, Locality, Predicate, Relationship, + Taxonomy, Work, ) from peachjam.plugins import plugins @@ -290,6 +292,21 @@ def update_document(self, url): # the source file is the PDF version self.download_source_file(f"{url}.pdf", created_doc, title) + if document["taxonomy_topics"]: + # get topics beginning with "subject-areas" + topics = [ + t for t in document["taxonomy_topics"] if t.startswith("subject-areas") + ] + if topics: + taxonomies = Taxonomy.objects.filter(slug__in=topics) + created_doc.taxonomies.all().delete() + for taxonomy in taxonomies: + DocumentTopic.objects.create( + document=created_doc, + topic=taxonomy, + ) + logger.info(f"Added {len(taxonomies)} taxonomies to {created_doc}") + self.set_parent(document, created_doc) self.fetch_relationships(document, created_doc) From a5415f20b4c1cfaa7b7024d65ea9830ffa722f68 Mon Sep 17 00:00:00 2001 From: Wilson Gaturu Date: Fri, 8 Sep 2023 05:35:04 +0300 Subject: [PATCH 2/5] adds filter in taxonomy delete method --- peachjam/adapters/adapters.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/peachjam/adapters/adapters.py b/peachjam/adapters/adapters.py index 29022c6a7..18bc0e340 100644 --- a/peachjam/adapters/adapters.py +++ b/peachjam/adapters/adapters.py @@ -299,7 +299,9 @@ def update_document(self, url): ] if topics: taxonomies = Taxonomy.objects.filter(slug__in=topics) - created_doc.taxonomies.all().delete() + created_doc.taxonomies.filter( + topic__slug__startswith="subject-areas" + ).delete() for taxonomy in taxonomies: DocumentTopic.objects.create( document=created_doc, From 1103345b01adc781d8ec69c82f5adeb003c30a4b Mon Sep 17 00:00:00 2001 From: Wilson Gaturu Date: Fri, 8 Sep 2023 19:34:56 +0300 Subject: [PATCH 3/5] clear existing taxonomies before update --- peachjam/adapters/adapters.py | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/peachjam/adapters/adapters.py b/peachjam/adapters/adapters.py index 18bc0e340..f71c2fc7f 100644 --- a/peachjam/adapters/adapters.py +++ b/peachjam/adapters/adapters.py @@ -89,6 +89,9 @@ def __init__(self, settings): } ) self.api_url = self.settings["api_url"] + self.taxonomy_topic_root = self.settings.get( + "taxonomy_topic_root", "subject-areas" + ) def check_for_updates(self, last_refreshed): """Checks for documents updated since last_refreshed (which may be None), and returns a list @@ -227,7 +230,9 @@ def update_document(self, url): "date": datetime.strptime(document["expression_date"], "%Y-%m-%d").date(), } if document["locality"]: - frbr_uri_data["locality"] = Locality.objects.get(code=document["locality"]) + frbr_uri_data["locality"] = Locality.objects.gapi_urlet( + code=document["locality"] + ) doc = CoreDocument(**frbr_uri_data) doc.work_frbr_uri = doc.generate_work_frbr_uri() @@ -292,16 +297,20 @@ def update_document(self, url): # the source file is the PDF version self.download_source_file(f"{url}.pdf", created_doc, title) + # clear any existing taxonomies + created_doc.taxonomies.filter( + topic__slug__startswith=self.taxonomy_topic_root + ).delete() + if document["taxonomy_topics"]: # get topics beginning with "subject-areas" topics = [ - t for t in document["taxonomy_topics"] if t.startswith("subject-areas") + t + for t in document["taxonomy_topics"] + if t.startswith(self.taxonomy_topic_root) ] if topics: taxonomies = Taxonomy.objects.filter(slug__in=topics) - created_doc.taxonomies.filter( - topic__slug__startswith="subject-areas" - ).delete() for taxonomy in taxonomies: DocumentTopic.objects.create( document=created_doc, From 1395b3eeb167f4e3fe027fd4149f68ed8b841835 Mon Sep 17 00:00:00 2001 From: Wilson Gaturu Date: Tue, 12 Sep 2023 10:12:41 +0300 Subject: [PATCH 4/5] fixes taxonomy topic adapter settings --- peachjam/adapters/adapters.py | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/peachjam/adapters/adapters.py b/peachjam/adapters/adapters.py index f71c2fc7f..a2a4fbd4e 100644 --- a/peachjam/adapters/adapters.py +++ b/peachjam/adapters/adapters.py @@ -89,9 +89,7 @@ def __init__(self, settings): } ) self.api_url = self.settings["api_url"] - self.taxonomy_topic_root = self.settings.get( - "taxonomy_topic_root", "subject-areas" - ) + self.taxonomy_topic_root = self.settings.get("taxonomy_topic_root") def check_for_updates(self, last_refreshed): """Checks for documents updated since last_refreshed (which may be None), and returns a list @@ -230,9 +228,7 @@ def update_document(self, url): "date": datetime.strptime(document["expression_date"], "%Y-%m-%d").date(), } if document["locality"]: - frbr_uri_data["locality"] = Locality.objects.gapi_urlet( - code=document["locality"] - ) + frbr_uri_data["locality"] = Locality.objects.get(code=document["locality"]) doc = CoreDocument(**frbr_uri_data) doc.work_frbr_uri = doc.generate_work_frbr_uri() @@ -302,7 +298,7 @@ def update_document(self, url): topic__slug__startswith=self.taxonomy_topic_root ).delete() - if document["taxonomy_topics"]: + if document["taxonomy_topics"] and self.taxonomy_topic_root: # get topics beginning with "subject-areas" topics = [ t From 94a61145b278183cbef2a1ffab6eca76c680cce4 Mon Sep 17 00:00:00 2001 From: Wilson Gaturu Date: Tue, 12 Sep 2023 10:18:36 +0300 Subject: [PATCH 5/5] checks for taxonomy topic root before updating taxonomies --- peachjam/adapters/adapters.py | 41 ++++++++++++++++++----------------- 1 file changed, 21 insertions(+), 20 deletions(-) diff --git a/peachjam/adapters/adapters.py b/peachjam/adapters/adapters.py index a2a4fbd4e..7e8c5bc83 100644 --- a/peachjam/adapters/adapters.py +++ b/peachjam/adapters/adapters.py @@ -293,26 +293,27 @@ def update_document(self, url): # the source file is the PDF version self.download_source_file(f"{url}.pdf", created_doc, title) - # clear any existing taxonomies - created_doc.taxonomies.filter( - topic__slug__startswith=self.taxonomy_topic_root - ).delete() - - if document["taxonomy_topics"] and self.taxonomy_topic_root: - # get topics beginning with "subject-areas" - topics = [ - t - for t in document["taxonomy_topics"] - if t.startswith(self.taxonomy_topic_root) - ] - if topics: - taxonomies = Taxonomy.objects.filter(slug__in=topics) - for taxonomy in taxonomies: - DocumentTopic.objects.create( - document=created_doc, - topic=taxonomy, - ) - logger.info(f"Added {len(taxonomies)} taxonomies to {created_doc}") + if self.taxonomy_topic_root: + # clear any existing taxonomies + created_doc.taxonomies.filter( + topic__slug__startswith=self.taxonomy_topic_root + ).delete() + + if document["taxonomy_topics"]: + # get topics beginning with "subject-areas" + topics = [ + t + for t in document["taxonomy_topics"] + if t.startswith(self.taxonomy_topic_root) + ] + if topics: + taxonomies = Taxonomy.objects.filter(slug__in=topics) + for taxonomy in taxonomies: + DocumentTopic.objects.create( + document=created_doc, + topic=taxonomy, + ) + logger.info(f"Added {len(taxonomies)} taxonomies to {created_doc}") self.set_parent(document, created_doc) self.fetch_relationships(document, created_doc)