Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Issue35 #36

Draft
wants to merge 2 commits into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
55 changes: 39 additions & 16 deletions src/mdh_modules/nc_to_mmd.py
Original file line number Diff line number Diff line change
Expand Up @@ -698,23 +698,46 @@ def add_project(self, myxmltree, mynsmap, ncin):

# Add platform, relies on controlled vocabulary in MMD, will read platform and platform_vocabulary from ACDD if the latter is present and map
def add_platform(self, myxmltree, mynsmap, ncin, myattrs):
myplatform = getattr(ncin, 'platform')
if ',' in myplatform:
# Split string in multiple elements
myplatform = myplatform.split(',')
if isinstance(myplatform, list):
myel = ET.SubElement(myxmltree,ET.QName(mynsmap['mmd'],'platform'))
for el in platform:
myel2 = ET.SubElement(myel,ET.QName(mynsmap['mmd'],'long_name'))
# Not added yet since MMD only relies on satellite data for now.
valid_statements = []
# valid statements for platforms are dictionaries of the following form:
# Platform = {..., 'Metop-B': {'wmosatellites_ref': 'https://www.wmo-sat.info/oscar/satellites/view/metop-b',
# 'altLabel': 'Meteorological operational satellite - B'}
# this allows to extract both long name and resources in case the short/preferred name is available either as comma separated list, or in parenthesis
# e.g. :platform = "NOAA-21, GCOM-W1" ;
# e.g. :platform = "some long name (NOAA-21), someother long name (GCOM-W1)";
valid_statements_platform = self.vocabulary.ControlledVocabulary.Platform
#similarly for instruments, using wmoinstruments_ref key to populate the resource field in mmd.
valid_statements_instrument = self.vocabulary.ControlledVocabulary.Instrument
myplatform = getattr(ncin, 'platform').split(',')
for i, el in enumerate(myplatform):
el = el.strip()
if '(' in el and ')' in el:
el = re.search('\(.+\)', el)
el = el.group().lstrip('(').rstrip(')').strip()
if el in valid_statements_platform.keys():
myel = ET.SubElement(myxmltree,ET.QName(mynsmap['mmd'],'platform'))
myel2 = ET.SubElement(myel,ET.QName(mynsmap['mmd'],'short_name'))
myel2.text = el
else:
myel = ET.SubElement(myxmltree,ET.QName(mynsmap['mmd'],'platform'))
myel2 = ET.SubElement(myel,ET.QName(mynsmap['mmd'],'long_name'))
# Not added yet since MMD only relies on satellite data for now.
valid_statements = []
myel2.text = myplatform
myel3 = ET.SubElement(myel,ET.QName(mynsmap['mmd'],'long_name'))
myel3.text = valid_statements_platform[el]['altLabel']
myel4 = ET.SubElement(myel,ET.QName(mynsmap['mmd'],'resource'))
myel4.text = valid_statements_platform[el]['wmosatellites_ref']
#for now, the instrument nested fields within the platform is populated only if the platform and instuments have the same length.
#There is currently no quality check that the instrument is onboard the platform.
if 'instrument' in myattrs:
myinstrument = getattr(ncin, 'instrument').split(',')
if len(myinstrument) == len(myplatform):
myinst = myinstrument[i].strip()
if '(' in myinst and ')' in myinst:
myinst = re.search('\(.+\)', myinst)
myinst = myinst.group().lstrip('(').rstrip(')').strip()
if myinst in valid_statements_instrument.keys():
myeli = ET.SubElement(myel,ET.QName(mynsmap['mmd'],'instrument'))
myeli2 = ET.SubElement(myeli,ET.QName(mynsmap['mmd'],'short_name'))
myeli2.text = myinst
myeli3 = ET.SubElement(myeli,ET.QName(mynsmap['mmd'],'long_name'))
myeli3.text = valid_statements_instrument[myinst]['altLabel']
myeli4 = ET.SubElement(myeli,ET.QName(mynsmap['mmd'],'resource'))
myeli4.text = valid_statements_instrument[myinst]['wmoinstruments_ref']

def add_spatial_representation(self, myxmltree, mynsmap, ncin, myattrs):
myspatr = getattr(ncin, 'spatial_representation')
Expand Down
4 changes: 3 additions & 1 deletion src/vocab/ControlledVocabulary.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
#last fetch: 2024-09-04 09:27:07.959950
#last fetch: 2024-09-19 18:26:27.200093
UseConstraint = {'CC0-1.0': {'exactMatch': ['http://spdx.org/licenses/CC0-1.0', 'https://creativecommons.org/publicdomain/zero/1.0/'], 'altLabel': ['Creative Commons Zero v1.0 Universal', 'CC0 1.0']}, 'CC-BY-4.0': {'exactMatch': ['http://spdx.org/licenses/CC-BY-4.0', 'https://creativecommons.org/licenses/by/4.0/'], 'altLabel': ['Creative Commons Attribution 4.0 International', 'Attribution', 'CC BY 4.0']}, 'CC-BY-SA-4.0': {'exactMatch': ['http://spdx.org/licenses/CC-BY-SA-4.0', 'https://creativecommons.org/licenses/by-sa/4.0/'], 'altLabel': ['Creative Commons Attribution Share Alike 4.0 International', 'Attribution-ShareAlike', 'CC BY-SA 4.0']}, 'CC-BY-NC-4.0': {'exactMatch': ['http://spdx.org/licenses/CC-BY-NC-4.0', 'https://creativecommons.org/licenses/by-nc/4.0/'], 'altLabel': ['Creative Commons Attribution Non Commercial 4.0 International', 'Attribution-NonCommercial', 'CC BY-NC 4.0']}, 'CC-BY-NC-SA-4.0': {'exactMatch': ['http://spdx.org/licenses/CC-BY-NC-SA-4.0', 'https://creativecommons.org/licenses/by-nc-sa/4.0/'], 'altLabel': ['Creative Commons Attribution Non Commercial Share Alike 4.0 International', 'Attribution-NonCommercial-ShareAlike', 'CC BY-NC-SA 4.0']}, 'CC-BY-ND-4.0': {'exactMatch': ['http://spdx.org/licenses/CC-BY-ND-4.0', 'https://creativecommons.org/licenses/by-nd/4.0/'], 'altLabel': ['Creative Commons Attribution No Derivatives 4.0 International', 'Attribution-NoDerivs', 'CC BY-ND 4.0']}, 'CC-BY-NC-ND-4.0': {'exactMatch': ['https://creativecommons.org/licenses/by-nc-nd/4.0/', 'http://spdx.org/licenses/CC-BY-NC-ND-4.0'], 'altLabel': ['Creative Commons Attribution Non Commercial No Derivatives 4.0 International', 'Attribution-NonCommercial-NoDerivs', 'CC BY-NC-ND 4.0']}, 'CC-BY-3.0': {'exactMatch': ['http://spdx.org/licenses/CC-BY-3.0', 'https://creativecommons.org/licenses/by/3.0/'], 'altLabel': ['Creative Commons Attribution 3.0 Unported', 'CC BY 3.0']}}
AccessConstraint = ['Open', 'Registered users only (automated approval)', 'Registered users only (manual approval required)', 'Restricted to a community', 'Restricted access to metadata']
ActivityType = ['Aircraft', 'Space Borne Instrument', 'Numerical Simulation', 'Climate Indicator', 'In Situ Land-based station', 'In Situ Ship-based station', 'In Situ Ocean fixed station', 'In Situ Ocean moving station', 'In Situ Ice-based station', 'Interview/Questionnaire', 'Maps/Charts/Photographs', 'Not available']
Expand All @@ -9,4 +9,6 @@
DatasetProductionStatus = ['Planned', 'In Work', 'Complete', 'Obsolete', 'Not available']
RelatedInformationTypes = ['Project home page', 'Users guide', 'Dataset landing page', 'Scientific publication', 'Data paper', 'Data management plan', 'Software', 'Other documentation', 'Observation facility', 'Extended metadata']
ISOTopicCategory = ['inlandWaters', 'intelligenceMilitary', 'climatologyMeteorologyAtmosphere', 'utilitiesCommunications', 'farming', 'imageryBaseMapsEarthCover', 'structure', 'health', 'elevation', 'society', 'environment', 'extraTerrestrial', 'biota', 'disaster', 'transportation', 'geoscientificInformation', 'oceans', 'economy', 'planningCadastre', 'location', 'boundaries', 'Not available']
Platform = {'Sentinel-1A': {'wmosatellites_ref': 'https://www.wmo-sat.info/oscar/satellites/view/sentinel_1a', 'altLabel': 'Sentinel-1A'}, 'Sentinel-1B': {'wmosatellites_ref': 'https://www.wmo-sat.info/oscar/satellites/view/sentinel_1b', 'altLabel': 'Sentinel-1B'}, 'Sentinel-2A': {'wmosatellites_ref': 'https://www.wmo-sat.info/oscar/satellites/view/sentinel_2a', 'altLabel': 'Sentinel-2A'}, 'Sentinel-2B': {'wmosatellites_ref': 'https://www.wmo-sat.info/oscar/satellites/view/sentinel_2b', 'altLabel': 'Sentinel-2B'}, 'Sentinel-3A': {'wmosatellites_ref': 'https://www.wmo-sat.info/oscar/satellites/view/sentinel_3a', 'altLabel': 'Sentinel-3A'}, 'Sentinel-3B': {'wmosatellites_ref': 'https://www.wmo-sat.info/oscar/satellites/view/sentinel_3b', 'altLabel': 'Sentinel-3B'}, 'Metop-A': {'wmosatellites_ref': 'https://www.wmo-sat.info/oscar/satellites/view/metop-a', 'altLabel': 'Meteorological operational satellite - A'}, 'Metop-B': {'wmosatellites_ref': 'https://www.wmo-sat.info/oscar/satellites/view/metop-b', 'altLabel': 'Meteorological operational satellite - B'}, 'Metop-C': {'wmosatellites_ref': 'https://www.wmo-sat.info/oscar/satellites/view/metop-c', 'altLabel': 'Meteorological operational satellite - C'}, 'NOAA-18': {'wmosatellites_ref': 'https://www.wmo-sat.info/oscar/satellites/view/noaa_18', 'altLabel': 'National Oceanic and Atmospheric Administration - 18'}, 'NOAA-19': {'wmosatellites_ref': 'https://www.wmo-sat.info/oscar/satellites/view/noaa_19', 'altLabel': 'National Oceanic and Atmospheric Administration - 19'}, 'NOAA-20': {'wmosatellites_ref': 'https://www.wmo-sat.info/oscar/satellites/view/noaa_20', 'altLabel': 'National Oceanic and Atmospheric Administration - 20'}, 'SNPP': {'wmosatellites_ref': 'https://www.wmo-sat.info/oscar/satellites/view/snpp', 'altLabel': 'Suomi National Polar-orbiting Partnership'}, 'Aqua': {'wmosatellites_ref': 'https://www.wmo-sat.info/oscar/satellites/view/aqua', 'altLabel': 'Earth Observation System - Aqua'}, 'Terra': {'wmosatellites_ref': 'https://www.wmo-sat.info/oscar/satellites/view/terra', 'altLabel': 'Earth Observation System - Terra'}, 'FY-3D': {'wmosatellites_ref': 'https://www.wmo-sat.info/oscar/satellites/view/fy_3d', 'altLabel': 'Feng-Yun 3D'}, 'FY-3E': {'wmosatellites_ref': 'https://www.wmo-sat.info/oscar/satellites/view/fy_3e', 'altLabel': 'Feng-Yun 3E'}, 'GCOM-W1': {'wmosatellites_ref': 'https://www.wmo-sat.info/oscar/satellites/view/gcom_w', 'altLabel': 'Global Change Observation Mission 1st-Water'}, 'NOAA-21': {'wmosatellites_ref': 'https://www.wmo-sat.info/oscar/satellites/view/noaa_21', 'altLabel': 'National Oceanic and Atmospheric Administration - 21'}, 'Envisat': {'wmosatellites_ref': 'https://www.wmo-sat.info/oscar/satellites/view/envisat', 'altLabel': 'Environmental Satellite'}}
Instrument = {'SAR-C': {'wmoinstruments_ref': 'https://www.wmo-sat.info/oscar/instruments/view/sar_c_sentinel_1', 'altLabel': 'Synthetic Aperture Radar (C-band)'}, 'MSI': {'wmoinstruments_ref': 'https://www.wmo-sat.info/oscar/instruments/view/msi_sentinel_2a', 'altLabel': 'Multi-Spectral Imager for Sentinel-2'}, 'OLCI': {'wmoinstruments_ref': 'https://www.wmo-sat.info/oscar/instruments/view/olci', 'altLabel': 'Ocean and Land Colour Imager'}, 'SLSTR': {'wmoinstruments_ref': 'https://www.wmo-sat.info/oscar/instruments/view/slstr', 'altLabel': 'Sea and Land Surface Temperature Radiometer'}, 'MWR': {'wmoinstruments_ref': 'https://www.wmo-sat.info/oscar/instruments/view/mwr_sentinel_3', 'altLabel': 'Micro-Wave Radiometer'}, 'AVHRR/3': {'wmoinstruments_ref': 'https://www.wmo-sat.info/oscar/instruments/view/avhrr_3', 'altLabel': 'Advanced Very High Resolution Radiometer / 3'}, 'VIIRS': {'wmoinstruments_ref': 'https://www.wmo-sat.info/oscar/instruments/view/viirs', 'altLabel': 'Visible/Infrared Imager Radiometer Suite'}, 'MODIS': {'wmoinstruments_ref': 'https://www.wmo-sat.info/oscar/instruments/view/modis', 'altLabel': 'Moderate-resolution Imaging Spectro-radiometer'}, 'MERSI-2': {'wmoinstruments_ref': 'https://www.wmo-sat.info/oscar/instruments/view/mersi_2', 'altLabel': 'Medium Resolution Spectral Imager -2'}, 'AMSR2': {'wmoinstruments_ref': 'https://www.wmo-sat.info/oscar/instruments/view/amsr2', 'altLabel': 'Advanced Microwave Scanning Radiometer 2'}, 'SSM/I': {'wmoinstruments_ref': 'https://www.wmo-sat.info/oscar/instruments/view/ssm_i', 'altLabel': 'Special Sensor Microwave - Imager'}, 'AVHRR': {'wmoinstruments_ref': 'https://www.wmo-sat.info/oscar/instruments/view/avhrr', 'altLabel': 'Advanced Very High Resolution Radiometer'}, 'ASAR': {'wmoinstruments_ref': 'https://www.wmo-sat.info/oscar/instruments/view/asar', 'altLabel': 'Advanced Synthetic Aperature Radar'}}
KeywordsVocabulary = ['GCMDSK', 'GCMDLOC', 'GCMDPROV', 'CFSTDN', 'GEMET', 'NORTHEMES', 'None']
95 changes: 95 additions & 0 deletions src/vocab/get_vocab.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,12 @@ def get_MMDvocab(collections, vocabno):
if collection == 'Use Constraint':
licenses = lookup_license(members, vocabno)
fullvoc += "".join(collection.split()) + ' = ' + str(licenses) + "\n"
elif collection == 'Platform':
platforms = get_platform(members,vocabno)
fullvoc += "".join(collection.split()) + ' = ' + str(platforms) + "\n"
elif collection == 'Instrument':
instruments = get_instrument(members,vocabno)
fullvoc += "".join(collection.split()) + ' = ' + str(instruments) + "\n"
else:
fullvoc += "".join(collection.split()) + ' = ' + str(members) + "\n"

Expand All @@ -52,6 +58,93 @@ def get_MMDvocab(collections, vocabno):

return

def get_platform(short_names,vocabno):

platforms = {}

prefixes = '''
prefix skos:<http://www.w3.org/2004/02/skos/core#>
prefix text:<http://jena.apache.org/text#>
prefix rdf:<http://www.w3.org/1999/02/22-rdf-syntax-ns#>
prefix rdfs:<http://www.w3.org/2000/01/rdf-schema#>
prefix owl:<http://www.w3.org/2002/07/owl#>
prefix dc:<http://purl.org/dc/terms/>'''

wmosatellites_ref = '''select distinct ?wmosatellites FROM <https://vocab.met.no/mmd> WHERE {
?concept skos:prefLabel "%(short_name)s"@en .
?concept rdfs:seeAlso ?wmosatellites .
FILTER (contains(str(?wmosatellites), "wmo"))
}'''

matching_altlabel = '''select distinct ?altLabel FROM <https://vocab.met.no/mmd> WHERE {
?concept skos:prefLabel "%(short_name)s"@en .
?concept skos:altLabel ?altLabel .
FILTER (lang(?altLabel) = "en") .
}'''

for short_name in short_names:
vocabno.setQuery(prefixes + wmosatellites_ref % {'short_name': short_name})
vocabno.setReturnFormat(JSON)
wmosatellites = vocabno.query().convert()

vocabno.setQuery(prefixes + matching_altlabel % {'short_name': short_name})
vocabno.setReturnFormat(JSON)
altlabel = vocabno.query().convert()

for result in wmosatellites["results"]["bindings"]:
wmo_resource = result['wmosatellites']['value']

for result in altlabel["results"]["bindings"]:
platform_longname = result['altLabel']['value']

platforms[short_name] = {'wmosatellites_ref' : wmo_resource, 'altLabel' : platform_longname}

return platforms

def get_instrument(short_names,vocabno):

instruments = {}

prefixes = '''
prefix skos:<http://www.w3.org/2004/02/skos/core#>
prefix text:<http://jena.apache.org/text#>
prefix rdf:<http://www.w3.org/1999/02/22-rdf-syntax-ns#>
prefix rdfs:<http://www.w3.org/2000/01/rdf-schema#>
prefix owl:<http://www.w3.org/2002/07/owl#>
prefix dc:<http://purl.org/dc/terms/>'''

wmoinstruments_ref = '''select distinct ?wmoinstruments FROM <https://vocab.met.no/mmd> WHERE {
?concept skos:prefLabel "%(short_name)s"@en .
?concept rdfs:seeAlso ?wmoinstruments .
FILTER (contains(str(?wmoinstruments), "wmo"))
}'''

matching_altlabel = '''select distinct ?altLabel FROM <https://vocab.met.no/mmd> WHERE {
?concept skos:prefLabel "%(short_name)s"@en .
?concept skos:altLabel ?altLabel .
FILTER (lang(?altLabel) = "en") .
}
LIMIT 1'''

for short_name in short_names:
vocabno.setQuery(prefixes + wmoinstruments_ref % {'short_name': short_name})
vocabno.setReturnFormat(JSON)
wmoinstruments = vocabno.query().convert()

vocabno.setQuery(prefixes + matching_altlabel % {'short_name': short_name})
vocabno.setReturnFormat(JSON)
altlabel = vocabno.query().convert()

for result in wmoinstruments["results"]["bindings"]:
wmo_resource = result['wmoinstruments']['value']

for result in altlabel["results"]["bindings"]:
instrument_longname = result['altLabel']['value']

instruments[short_name] = {'wmoinstruments_ref' : wmo_resource, 'altLabel' : instrument_longname}

return instruments

def lookup_license(list_identifiers,vocabno):

license_lookup = {}
Expand Down Expand Up @@ -221,6 +314,8 @@ def main(voc):
'Dataset Production Status',
'Related Information Types',
'ISO Topic Category',
'Platform',
'Instrument',
'Keywords Vocabulary']
if voc == 'mmd':
get_MMDvocab(collections, vocabno)
Expand Down