diff --git a/src/mdh_modules/nc_to_mmd.py b/src/mdh_modules/nc_to_mmd.py index 761f57b..02e8a23 100644 --- a/src/mdh_modules/nc_to_mmd.py +++ b/src/mdh_modules/nc_to_mmd.py @@ -698,23 +698,46 @@ def add_project(self, myxmltree, mynsmap, ncin): # Add platform, relies on controlled vocabulary in MMD, will read platform and platform_vocabulary from ACDD if the latter is present and map def add_platform(self, myxmltree, mynsmap, ncin, myattrs): - myplatform = getattr(ncin, 'platform') - if ',' in myplatform: - # Split string in multiple elements - myplatform = myplatform.split(',') - if isinstance(myplatform, list): - myel = ET.SubElement(myxmltree,ET.QName(mynsmap['mmd'],'platform')) - for el in platform: - myel2 = ET.SubElement(myel,ET.QName(mynsmap['mmd'],'long_name')) - # Not added yet since MMD only relies on satellite data for now. - valid_statements = [] + # valid statements for platforms are dictionaries of the following form: + # Platform = {..., 'Metop-B': {'wmosatellites_ref': 'https://www.wmo-sat.info/oscar/satellites/view/metop-b', + # 'altLabel': 'Meteorological operational satellite - B'} + # this allows to extract both long name and resources in case the short/preferred name is available either as comma separated list, or in parenthesis + # e.g. :platform = "NOAA-21, GCOM-W1" ; + # e.g. :platform = "some long name (NOAA-21), someother long name (GCOM-W1)"; + valid_statements_platform = self.vocabulary.ControlledVocabulary.Platform + #similarly for instruments, using wmoinstruments_ref key to populate the resource field in mmd. + valid_statements_instrument = self.vocabulary.ControlledVocabulary.Instrument + myplatform = getattr(ncin, 'platform').split(',') + for i, el in enumerate(myplatform): + el = el.strip() + if '(' in el and ')' in el: + el = re.search('\(.+\)', el) + el = el.group().lstrip('(').rstrip(')').strip() + if el in valid_statements_platform.keys(): + myel = ET.SubElement(myxmltree,ET.QName(mynsmap['mmd'],'platform')) + myel2 = ET.SubElement(myel,ET.QName(mynsmap['mmd'],'short_name')) myel2.text = el - else: - myel = ET.SubElement(myxmltree,ET.QName(mynsmap['mmd'],'platform')) - myel2 = ET.SubElement(myel,ET.QName(mynsmap['mmd'],'long_name')) - # Not added yet since MMD only relies on satellite data for now. - valid_statements = [] - myel2.text = myplatform + myel3 = ET.SubElement(myel,ET.QName(mynsmap['mmd'],'long_name')) + myel3.text = valid_statements_platform[el]['altLabel'] + myel4 = ET.SubElement(myel,ET.QName(mynsmap['mmd'],'resource')) + myel4.text = valid_statements_platform[el]['wmosatellites_ref'] + #for now, the instrument nested fields within the platform is populated only if the platform and instuments have the same length. + #There is currently no quality check that the instrument is onboard the platform. + if 'instrument' in myattrs: + myinstrument = getattr(ncin, 'instrument').split(',') + if len(myinstrument) == len(myplatform): + myinst = myinstrument[i].strip() + if '(' in myinst and ')' in myinst: + myinst = re.search('\(.+\)', myinst) + myinst = myinst.group().lstrip('(').rstrip(')').strip() + if myinst in valid_statements_instrument.keys(): + myeli = ET.SubElement(myel,ET.QName(mynsmap['mmd'],'instrument')) + myeli2 = ET.SubElement(myeli,ET.QName(mynsmap['mmd'],'short_name')) + myeli2.text = myinst + myeli3 = ET.SubElement(myeli,ET.QName(mynsmap['mmd'],'long_name')) + myeli3.text = valid_statements_instrument[myinst]['altLabel'] + myeli4 = ET.SubElement(myeli,ET.QName(mynsmap['mmd'],'resource')) + myeli4.text = valid_statements_instrument[myinst]['wmoinstruments_ref'] def add_spatial_representation(self, myxmltree, mynsmap, ncin, myattrs): myspatr = getattr(ncin, 'spatial_representation') diff --git a/src/vocab/ControlledVocabulary.py b/src/vocab/ControlledVocabulary.py index 252979c..6d78ff3 100644 --- a/src/vocab/ControlledVocabulary.py +++ b/src/vocab/ControlledVocabulary.py @@ -1,4 +1,4 @@ -#last fetch: 2024-09-04 09:27:07.959950 +#last fetch: 2024-09-19 18:26:27.200093 UseConstraint = {'CC0-1.0': {'exactMatch': ['http://spdx.org/licenses/CC0-1.0', 'https://creativecommons.org/publicdomain/zero/1.0/'], 'altLabel': ['Creative Commons Zero v1.0 Universal', 'CC0 1.0']}, 'CC-BY-4.0': {'exactMatch': ['http://spdx.org/licenses/CC-BY-4.0', 'https://creativecommons.org/licenses/by/4.0/'], 'altLabel': ['Creative Commons Attribution 4.0 International', 'Attribution', 'CC BY 4.0']}, 'CC-BY-SA-4.0': {'exactMatch': ['http://spdx.org/licenses/CC-BY-SA-4.0', 'https://creativecommons.org/licenses/by-sa/4.0/'], 'altLabel': ['Creative Commons Attribution Share Alike 4.0 International', 'Attribution-ShareAlike', 'CC BY-SA 4.0']}, 'CC-BY-NC-4.0': {'exactMatch': ['http://spdx.org/licenses/CC-BY-NC-4.0', 'https://creativecommons.org/licenses/by-nc/4.0/'], 'altLabel': ['Creative Commons Attribution Non Commercial 4.0 International', 'Attribution-NonCommercial', 'CC BY-NC 4.0']}, 'CC-BY-NC-SA-4.0': {'exactMatch': ['http://spdx.org/licenses/CC-BY-NC-SA-4.0', 'https://creativecommons.org/licenses/by-nc-sa/4.0/'], 'altLabel': ['Creative Commons Attribution Non Commercial Share Alike 4.0 International', 'Attribution-NonCommercial-ShareAlike', 'CC BY-NC-SA 4.0']}, 'CC-BY-ND-4.0': {'exactMatch': ['http://spdx.org/licenses/CC-BY-ND-4.0', 'https://creativecommons.org/licenses/by-nd/4.0/'], 'altLabel': ['Creative Commons Attribution No Derivatives 4.0 International', 'Attribution-NoDerivs', 'CC BY-ND 4.0']}, 'CC-BY-NC-ND-4.0': {'exactMatch': ['https://creativecommons.org/licenses/by-nc-nd/4.0/', 'http://spdx.org/licenses/CC-BY-NC-ND-4.0'], 'altLabel': ['Creative Commons Attribution Non Commercial No Derivatives 4.0 International', 'Attribution-NonCommercial-NoDerivs', 'CC BY-NC-ND 4.0']}, 'CC-BY-3.0': {'exactMatch': ['http://spdx.org/licenses/CC-BY-3.0', 'https://creativecommons.org/licenses/by/3.0/'], 'altLabel': ['Creative Commons Attribution 3.0 Unported', 'CC BY 3.0']}} AccessConstraint = ['Open', 'Registered users only (automated approval)', 'Registered users only (manual approval required)', 'Restricted to a community', 'Restricted access to metadata'] ActivityType = ['Aircraft', 'Space Borne Instrument', 'Numerical Simulation', 'Climate Indicator', 'In Situ Land-based station', 'In Situ Ship-based station', 'In Situ Ocean fixed station', 'In Situ Ocean moving station', 'In Situ Ice-based station', 'Interview/Questionnaire', 'Maps/Charts/Photographs', 'Not available'] @@ -9,4 +9,6 @@ DatasetProductionStatus = ['Planned', 'In Work', 'Complete', 'Obsolete', 'Not available'] RelatedInformationTypes = ['Project home page', 'Users guide', 'Dataset landing page', 'Scientific publication', 'Data paper', 'Data management plan', 'Software', 'Other documentation', 'Observation facility', 'Extended metadata'] ISOTopicCategory = ['inlandWaters', 'intelligenceMilitary', 'climatologyMeteorologyAtmosphere', 'utilitiesCommunications', 'farming', 'imageryBaseMapsEarthCover', 'structure', 'health', 'elevation', 'society', 'environment', 'extraTerrestrial', 'biota', 'disaster', 'transportation', 'geoscientificInformation', 'oceans', 'economy', 'planningCadastre', 'location', 'boundaries', 'Not available'] +Platform = {'Sentinel-1A': {'wmosatellites_ref': 'https://www.wmo-sat.info/oscar/satellites/view/sentinel_1a', 'altLabel': 'Sentinel-1A'}, 'Sentinel-1B': {'wmosatellites_ref': 'https://www.wmo-sat.info/oscar/satellites/view/sentinel_1b', 'altLabel': 'Sentinel-1B'}, 'Sentinel-2A': {'wmosatellites_ref': 'https://www.wmo-sat.info/oscar/satellites/view/sentinel_2a', 'altLabel': 'Sentinel-2A'}, 'Sentinel-2B': {'wmosatellites_ref': 'https://www.wmo-sat.info/oscar/satellites/view/sentinel_2b', 'altLabel': 'Sentinel-2B'}, 'Sentinel-3A': {'wmosatellites_ref': 'https://www.wmo-sat.info/oscar/satellites/view/sentinel_3a', 'altLabel': 'Sentinel-3A'}, 'Sentinel-3B': {'wmosatellites_ref': 'https://www.wmo-sat.info/oscar/satellites/view/sentinel_3b', 'altLabel': 'Sentinel-3B'}, 'Metop-A': {'wmosatellites_ref': 'https://www.wmo-sat.info/oscar/satellites/view/metop-a', 'altLabel': 'Meteorological operational satellite - A'}, 'Metop-B': {'wmosatellites_ref': 'https://www.wmo-sat.info/oscar/satellites/view/metop-b', 'altLabel': 'Meteorological operational satellite - B'}, 'Metop-C': {'wmosatellites_ref': 'https://www.wmo-sat.info/oscar/satellites/view/metop-c', 'altLabel': 'Meteorological operational satellite - C'}, 'NOAA-18': {'wmosatellites_ref': 'https://www.wmo-sat.info/oscar/satellites/view/noaa_18', 'altLabel': 'National Oceanic and Atmospheric Administration - 18'}, 'NOAA-19': {'wmosatellites_ref': 'https://www.wmo-sat.info/oscar/satellites/view/noaa_19', 'altLabel': 'National Oceanic and Atmospheric Administration - 19'}, 'NOAA-20': {'wmosatellites_ref': 'https://www.wmo-sat.info/oscar/satellites/view/noaa_20', 'altLabel': 'National Oceanic and Atmospheric Administration - 20'}, 'SNPP': {'wmosatellites_ref': 'https://www.wmo-sat.info/oscar/satellites/view/snpp', 'altLabel': 'Suomi National Polar-orbiting Partnership'}, 'Aqua': {'wmosatellites_ref': 'https://www.wmo-sat.info/oscar/satellites/view/aqua', 'altLabel': 'Earth Observation System - Aqua'}, 'Terra': {'wmosatellites_ref': 'https://www.wmo-sat.info/oscar/satellites/view/terra', 'altLabel': 'Earth Observation System - Terra'}, 'FY-3D': {'wmosatellites_ref': 'https://www.wmo-sat.info/oscar/satellites/view/fy_3d', 'altLabel': 'Feng-Yun 3D'}, 'FY-3E': {'wmosatellites_ref': 'https://www.wmo-sat.info/oscar/satellites/view/fy_3e', 'altLabel': 'Feng-Yun 3E'}, 'GCOM-W1': {'wmosatellites_ref': 'https://www.wmo-sat.info/oscar/satellites/view/gcom_w', 'altLabel': 'Global Change Observation Mission 1st-Water'}, 'NOAA-21': {'wmosatellites_ref': 'https://www.wmo-sat.info/oscar/satellites/view/noaa_21', 'altLabel': 'National Oceanic and Atmospheric Administration - 21'}, 'Envisat': {'wmosatellites_ref': 'https://www.wmo-sat.info/oscar/satellites/view/envisat', 'altLabel': 'Environmental Satellite'}} +Instrument = {'SAR-C': {'wmoinstruments_ref': 'https://www.wmo-sat.info/oscar/instruments/view/sar_c_sentinel_1', 'altLabel': 'Synthetic Aperture Radar (C-band)'}, 'MSI': {'wmoinstruments_ref': 'https://www.wmo-sat.info/oscar/instruments/view/msi_sentinel_2a', 'altLabel': 'Multi-Spectral Imager for Sentinel-2'}, 'OLCI': {'wmoinstruments_ref': 'https://www.wmo-sat.info/oscar/instruments/view/olci', 'altLabel': 'Ocean and Land Colour Imager'}, 'SLSTR': {'wmoinstruments_ref': 'https://www.wmo-sat.info/oscar/instruments/view/slstr', 'altLabel': 'Sea and Land Surface Temperature Radiometer'}, 'MWR': {'wmoinstruments_ref': 'https://www.wmo-sat.info/oscar/instruments/view/mwr_sentinel_3', 'altLabel': 'Micro-Wave Radiometer'}, 'AVHRR/3': {'wmoinstruments_ref': 'https://www.wmo-sat.info/oscar/instruments/view/avhrr_3', 'altLabel': 'Advanced Very High Resolution Radiometer / 3'}, 'VIIRS': {'wmoinstruments_ref': 'https://www.wmo-sat.info/oscar/instruments/view/viirs', 'altLabel': 'Visible/Infrared Imager Radiometer Suite'}, 'MODIS': {'wmoinstruments_ref': 'https://www.wmo-sat.info/oscar/instruments/view/modis', 'altLabel': 'Moderate-resolution Imaging Spectro-radiometer'}, 'MERSI-2': {'wmoinstruments_ref': 'https://www.wmo-sat.info/oscar/instruments/view/mersi_2', 'altLabel': 'Medium Resolution Spectral Imager -2'}, 'AMSR2': {'wmoinstruments_ref': 'https://www.wmo-sat.info/oscar/instruments/view/amsr2', 'altLabel': 'Advanced Microwave Scanning Radiometer 2'}, 'SSM/I': {'wmoinstruments_ref': 'https://www.wmo-sat.info/oscar/instruments/view/ssm_i', 'altLabel': 'Special Sensor Microwave - Imager'}, 'AVHRR': {'wmoinstruments_ref': 'https://www.wmo-sat.info/oscar/instruments/view/avhrr', 'altLabel': 'Advanced Very High Resolution Radiometer'}, 'ASAR': {'wmoinstruments_ref': 'https://www.wmo-sat.info/oscar/instruments/view/asar', 'altLabel': 'Advanced Synthetic Aperature Radar'}} KeywordsVocabulary = ['GCMDSK', 'GCMDLOC', 'GCMDPROV', 'CFSTDN', 'GEMET', 'NORTHEMES', 'None'] diff --git a/src/vocab/get_vocab.py b/src/vocab/get_vocab.py index 9240427..2e32229 100755 --- a/src/vocab/get_vocab.py +++ b/src/vocab/get_vocab.py @@ -38,6 +38,12 @@ def get_MMDvocab(collections, vocabno): if collection == 'Use Constraint': licenses = lookup_license(members, vocabno) fullvoc += "".join(collection.split()) + ' = ' + str(licenses) + "\n" + elif collection == 'Platform': + platforms = get_platform(members,vocabno) + fullvoc += "".join(collection.split()) + ' = ' + str(platforms) + "\n" + elif collection == 'Instrument': + instruments = get_instrument(members,vocabno) + fullvoc += "".join(collection.split()) + ' = ' + str(instruments) + "\n" else: fullvoc += "".join(collection.split()) + ' = ' + str(members) + "\n" @@ -52,6 +58,93 @@ def get_MMDvocab(collections, vocabno): return + def get_platform(short_names,vocabno): + + platforms = {} + + prefixes = ''' + prefix skos: + prefix text: + prefix rdf: + prefix rdfs: + prefix owl: + prefix dc:''' + + wmosatellites_ref = '''select distinct ?wmosatellites FROM WHERE { + ?concept skos:prefLabel "%(short_name)s"@en . + ?concept rdfs:seeAlso ?wmosatellites . + FILTER (contains(str(?wmosatellites), "wmo")) + }''' + + matching_altlabel = '''select distinct ?altLabel FROM WHERE { + ?concept skos:prefLabel "%(short_name)s"@en . + ?concept skos:altLabel ?altLabel . + FILTER (lang(?altLabel) = "en") . + }''' + + for short_name in short_names: + vocabno.setQuery(prefixes + wmosatellites_ref % {'short_name': short_name}) + vocabno.setReturnFormat(JSON) + wmosatellites = vocabno.query().convert() + + vocabno.setQuery(prefixes + matching_altlabel % {'short_name': short_name}) + vocabno.setReturnFormat(JSON) + altlabel = vocabno.query().convert() + + for result in wmosatellites["results"]["bindings"]: + wmo_resource = result['wmosatellites']['value'] + + for result in altlabel["results"]["bindings"]: + platform_longname = result['altLabel']['value'] + + platforms[short_name] = {'wmosatellites_ref' : wmo_resource, 'altLabel' : platform_longname} + + return platforms + + def get_instrument(short_names,vocabno): + + instruments = {} + + prefixes = ''' + prefix skos: + prefix text: + prefix rdf: + prefix rdfs: + prefix owl: + prefix dc:''' + + wmoinstruments_ref = '''select distinct ?wmoinstruments FROM WHERE { + ?concept skos:prefLabel "%(short_name)s"@en . + ?concept rdfs:seeAlso ?wmoinstruments . + FILTER (contains(str(?wmoinstruments), "wmo")) + }''' + + matching_altlabel = '''select distinct ?altLabel FROM WHERE { + ?concept skos:prefLabel "%(short_name)s"@en . + ?concept skos:altLabel ?altLabel . + FILTER (lang(?altLabel) = "en") . + } + LIMIT 1''' + + for short_name in short_names: + vocabno.setQuery(prefixes + wmoinstruments_ref % {'short_name': short_name}) + vocabno.setReturnFormat(JSON) + wmoinstruments = vocabno.query().convert() + + vocabno.setQuery(prefixes + matching_altlabel % {'short_name': short_name}) + vocabno.setReturnFormat(JSON) + altlabel = vocabno.query().convert() + + for result in wmoinstruments["results"]["bindings"]: + wmo_resource = result['wmoinstruments']['value'] + + for result in altlabel["results"]["bindings"]: + instrument_longname = result['altLabel']['value'] + + instruments[short_name] = {'wmoinstruments_ref' : wmo_resource, 'altLabel' : instrument_longname} + + return instruments + def lookup_license(list_identifiers,vocabno): license_lookup = {} @@ -221,6 +314,8 @@ def main(voc): 'Dataset Production Status', 'Related Information Types', 'ISO Topic Category', + 'Platform', + 'Instrument', 'Keywords Vocabulary'] if voc == 'mmd': get_MMDvocab(collections, vocabno)