chore: Simplify methods for parsing graph

opendata-swiss · Oct 2, 2023 · edc397b · edc397b
1 parent deee104
commit edc397b
Showing 1 changed file with 18 additions and 22 deletions.
diff --git a/ckanext/dcatapchharvest/profiles.py b/ckanext/dcatapchharvest/profiles.py
@@ -169,7 +169,7 @@ def _get_publisher_url_from_identifier(self, identifier):
             return ORGANIZATION_BASE_URL + identifier_split[1]
         return ''
 
-    def _publisher(self, subject, predicate, identifier):
+    def _publisher(self, subject, identifier):
         """
         Returns a dict with details about a dct:publisher entity, a foaf:Agent
 
@@ -192,7 +192,7 @@ def _publisher(self, subject, predicate, identifier):
         an empty string if they could not be found
         """
         publisher = {}
-        for agent in self.g.objects(subject, predicate):
+        for agent in self.g.objects(subject, DCT.publisher):
             publisher['url'] = (str(agent) if isinstance(agent,
                                 URIRef) else '')
             publisher_name = self._object_value(agent, FOAF.name)
@@ -210,11 +210,11 @@ def _publisher(self, subject, predicate, identifier):
             )
         return json.dumps(publisher)
 
-    def _relations(self, subject, predicate):
+    def _relations(self, subject):
 
         relations = []
 
-        for relation_node in self.g.objects(subject, predicate):
+        for relation_node in self.g.objects(subject, DCT.relation):
             relation = {
                 'label': self._object_value(relation_node, RDFS.label),
                 'url': relation_node
@@ -242,24 +242,24 @@ def _license_rights_uri(self, subject, predicate):
                 return node
         return None
 
-    def _keywords(self, subject, predicate):
+    def _keywords(self, subject):
         keywords = {}
         # initialize the keywords with empty lists for all languages
         for lang in dh.get_langs():
             keywords[lang] = []
 
-        for keyword_node in self.g.objects(subject, predicate):
+        for keyword_node in self.g.objects(subject, DCAT.keyword):
             lang = keyword_node.language
             keyword = munge_tag(unicode(keyword_node))
             keywords.setdefault(lang, []).append(keyword)
 
         return keywords
 
-    def _contact_points(self, subject, predicate):
+    def _contact_points(self, subject):
 
         contact_points = []
 
-        for contact_node in self.g.objects(subject, predicate):
+        for contact_node in self.g.objects(subject, DCAT.contactPoint):
             email = self._object_value(contact_node, VCARD.hasEmail)
             if email:
                 email_clean = email.replace(EMAIL_MAILTO_PREFIX, '')
@@ -274,11 +274,11 @@ def _contact_points(self, subject, predicate):
 
         return contact_points
 
-    def _temporals(self, subject, predicate):
+    def _temporals(self, subject):
 
         temporals = []
 
-        for temporal_node in self.g.objects(subject, predicate):
+        for temporal_node in self.g.objects(subject, DCT.temporal):
             # Currently specified properties in DCAT-AP.
             start_date, start_date_type = self._object_value_and_datatype(
                 temporal_node, DCAT.startDate)
@@ -391,8 +391,8 @@ def _clean_end_datetime(self, datetime_value, data_type):
         except ValueError:
             return None
 
-    def _get_eu_accrual_periodicity(self, subject, predicate):
-        ogdch_value = self._object_value(subject, predicate)
+    def _get_eu_accrual_periodicity(self, subject):
+        ogdch_value = self._object_value(subject, DCT.accrualPeriodicity)
         ogdch_value = URIRef(ogdch_value)
         for key, value in valid_frequencies.items():
             if ogdch_value == value:
@@ -428,11 +428,9 @@ def parse_dataset(self, dataset_dict, dataset_ref):  # noqa
                 dataset_dict[key] = value
 
         # Accrual periodicity
-        for key, predicate in (
-                ('accrual_periodicity', DCT.accrualPeriodicity),
-        ):
-            value = self._get_eu_accrual_periodicity(dataset_ref, predicate)
-            dataset_dict[key] = value
+        dataset_dict['accrual_periodicity'] = self._get_eu_accrual_periodicity(
+            dataset_ref
+        )
 
         # Timestamp fields
         for key, predicate in (
@@ -461,7 +459,7 @@ def parse_dataset(self, dataset_dict, dataset_ref):  # noqa
             dataset_dict['tags'].append({'name': munge_tag(unicode(keyword))})
 
         # Keywords
-        dataset_dict['keywords'] = self._keywords(dataset_ref, DCAT.keyword)
+        dataset_dict['keywords'] = self._keywords(dataset_ref)
 
         # Themes
         dcat_theme_urls = self._object_value_list(dataset_ref, DCAT.theme)
@@ -480,24 +478,22 @@ def parse_dataset(self, dataset_dict, dataset_ref):  # noqa
         # Contact details
         dataset_dict['contact_points'] = self._contact_points(
             dataset_ref,
-            DCAT.contactPoint
         )
 
         # Publisher
         dataset_dict['publisher'] = self._publisher(
             dataset_ref,
-            DCT.publisher,
             dataset_dict.get('identifier', '')
         )
 
         # Relations
-        dataset_dict['relations'] = self._relations(dataset_ref, DCT.relation)
+        dataset_dict['relations'] = self._relations(dataset_ref)
         for relation in dataset_dict['relations']:
             if relation['label'] == {}:
                 relation['label'] = str(relation.get('url', ''))
 
         # Temporal
-        dataset_dict['temporals'] = self._temporals(dataset_ref, DCT.temporal)
+        dataset_dict['temporals'] = self._temporals(dataset_ref)
 
         # References
         see_alsos = self._object_value_list(dataset_ref, RDFS.seeAlso)