Skip to content

Commit

Permalink
chore: Simplify methods for parsing graph
Browse files Browse the repository at this point in the history
  • Loading branch information
bellisk committed Oct 2, 2023
1 parent deee104 commit edc397b
Showing 1 changed file with 18 additions and 22 deletions.
40 changes: 18 additions & 22 deletions ckanext/dcatapchharvest/profiles.py
Original file line number Diff line number Diff line change
Expand Up @@ -169,7 +169,7 @@ def _get_publisher_url_from_identifier(self, identifier):
return ORGANIZATION_BASE_URL + identifier_split[1]
return ''

def _publisher(self, subject, predicate, identifier):
def _publisher(self, subject, identifier):
"""
Returns a dict with details about a dct:publisher entity, a foaf:Agent
Expand All @@ -192,7 +192,7 @@ def _publisher(self, subject, predicate, identifier):
an empty string if they could not be found
"""
publisher = {}
for agent in self.g.objects(subject, predicate):
for agent in self.g.objects(subject, DCT.publisher):
publisher['url'] = (str(agent) if isinstance(agent,
URIRef) else '')
publisher_name = self._object_value(agent, FOAF.name)
Expand All @@ -210,11 +210,11 @@ def _publisher(self, subject, predicate, identifier):
)
return json.dumps(publisher)

def _relations(self, subject, predicate):
def _relations(self, subject):

relations = []

for relation_node in self.g.objects(subject, predicate):
for relation_node in self.g.objects(subject, DCT.relation):
relation = {
'label': self._object_value(relation_node, RDFS.label),
'url': relation_node
Expand Down Expand Up @@ -242,24 +242,24 @@ def _license_rights_uri(self, subject, predicate):
return node
return None

def _keywords(self, subject, predicate):
def _keywords(self, subject):
keywords = {}
# initialize the keywords with empty lists for all languages
for lang in dh.get_langs():
keywords[lang] = []

for keyword_node in self.g.objects(subject, predicate):
for keyword_node in self.g.objects(subject, DCAT.keyword):
lang = keyword_node.language
keyword = munge_tag(unicode(keyword_node))
keywords.setdefault(lang, []).append(keyword)

return keywords

def _contact_points(self, subject, predicate):
def _contact_points(self, subject):

contact_points = []

for contact_node in self.g.objects(subject, predicate):
for contact_node in self.g.objects(subject, DCAT.contactPoint):
email = self._object_value(contact_node, VCARD.hasEmail)
if email:
email_clean = email.replace(EMAIL_MAILTO_PREFIX, '')
Expand All @@ -274,11 +274,11 @@ def _contact_points(self, subject, predicate):

return contact_points

def _temporals(self, subject, predicate):
def _temporals(self, subject):

temporals = []

for temporal_node in self.g.objects(subject, predicate):
for temporal_node in self.g.objects(subject, DCT.temporal):
# Currently specified properties in DCAT-AP.
start_date, start_date_type = self._object_value_and_datatype(
temporal_node, DCAT.startDate)
Expand Down Expand Up @@ -391,8 +391,8 @@ def _clean_end_datetime(self, datetime_value, data_type):
except ValueError:
return None

def _get_eu_accrual_periodicity(self, subject, predicate):
ogdch_value = self._object_value(subject, predicate)
def _get_eu_accrual_periodicity(self, subject):
ogdch_value = self._object_value(subject, DCT.accrualPeriodicity)
ogdch_value = URIRef(ogdch_value)
for key, value in valid_frequencies.items():
if ogdch_value == value:
Expand Down Expand Up @@ -428,11 +428,9 @@ def parse_dataset(self, dataset_dict, dataset_ref): # noqa
dataset_dict[key] = value

# Accrual periodicity
for key, predicate in (
('accrual_periodicity', DCT.accrualPeriodicity),
):
value = self._get_eu_accrual_periodicity(dataset_ref, predicate)
dataset_dict[key] = value
dataset_dict['accrual_periodicity'] = self._get_eu_accrual_periodicity(
dataset_ref
)

# Timestamp fields
for key, predicate in (
Expand Down Expand Up @@ -461,7 +459,7 @@ def parse_dataset(self, dataset_dict, dataset_ref): # noqa
dataset_dict['tags'].append({'name': munge_tag(unicode(keyword))})

# Keywords
dataset_dict['keywords'] = self._keywords(dataset_ref, DCAT.keyword)
dataset_dict['keywords'] = self._keywords(dataset_ref)

# Themes
dcat_theme_urls = self._object_value_list(dataset_ref, DCAT.theme)
Expand All @@ -480,24 +478,22 @@ def parse_dataset(self, dataset_dict, dataset_ref): # noqa
# Contact details
dataset_dict['contact_points'] = self._contact_points(
dataset_ref,
DCAT.contactPoint
)

# Publisher
dataset_dict['publisher'] = self._publisher(
dataset_ref,
DCT.publisher,
dataset_dict.get('identifier', '')
)

# Relations
dataset_dict['relations'] = self._relations(dataset_ref, DCT.relation)
dataset_dict['relations'] = self._relations(dataset_ref)
for relation in dataset_dict['relations']:
if relation['label'] == {}:
relation['label'] = str(relation.get('url', ''))

# Temporal
dataset_dict['temporals'] = self._temporals(dataset_ref, DCT.temporal)
dataset_dict['temporals'] = self._temporals(dataset_ref)

# References
see_alsos = self._object_value_list(dataset_ref, RDFS.seeAlso)
Expand Down

0 comments on commit edc397b

Please sign in to comment.