Skip to content

Commit

Permalink
#7 add keywords and contacts readers
Browse files Browse the repository at this point in the history
  • Loading branch information
leodarengosse committed Jan 15, 2019
1 parent d9fe431 commit 108d0a8
Show file tree
Hide file tree
Showing 3 changed files with 279 additions and 19 deletions.
34 changes: 34 additions & 0 deletions isogeo_xml_toolbelt/list_themes_inspire.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
Altitude
Référentiels de coordonnées
Unités statistiques
Répartition de la population — démographie
Zones de gestion, de restriction ou de réglementation et unités de déclaration
Zones à risque naturel
Conditions atmosphériques
Caractéristiques géographiques météorologiques
Caractéristiques géographiques océanographiques
Régions maritimes
Régions biogéographiques
Habitats et biotopes
Répartition des espèces
Bâtiments
Occupation des terres
Systèmes de maillage géographique
Sources d'énergie
Ressources minérales
Dénominations géographiques
Ortho-imagerie
Sols
Géologie
Unités administratives
Usage des sols
Adresses
Santé et sécurité des personnes
Parcelles cadastrales
Services d'utilité publique et services publics
Installations de suivi environnemental
Réseaux de transport
Hydrographie
Lieux de production et sites industriels
Installations agricoles et aquacoles
Sites protégés
135 changes: 116 additions & 19 deletions isogeo_xml_toolbelt/reader_iso19139.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,11 +25,17 @@
from lxml import etree

# submodules
try:
from .xml_19139_fields import Contact
except (ImportError, ValueError, SystemError):
from xml_19139_fields import Contact

try:
from .xml_utils import XmlUtils
except (ImportError, ValueError, SystemError):
from xml_utils import XmlUtils


# #############################################################################
# ########## Globals ###############
# ##################################
Expand All @@ -40,6 +46,7 @@
# utils
utils = XmlUtils()


# #############################################################################
# ########## Classes ###############
# ##################################
Expand Down Expand Up @@ -147,12 +154,13 @@ def __init__(self, xml: Path):
"gco:DateTime/text()",
self.namespaces)
self.md_date = utils.parse_string_for_max_date(md_dates_str)
self.contact = {
"mails": self.md.xpath(
"/gmd:MD_Metadata/gmd:contact/gmd:CI_ResponsibleParty/gmd:contactInfo/"
"gmd:CI_Contact/gmd:address/gmd:CI_Address/gmd:electronicMailAddress/gco:CharacterString/text()",
namespaces=self.namespaces)
}

#contacts
self.list_contacts = self.get_md_contacts()

#keywords
self.keywords = self.get_md_keywords()

# bounding box
self.bbox = []
try:
Expand Down Expand Up @@ -186,9 +194,6 @@ def __init__(self, xml: Path):
self.latmin = -90
self.latmax = 90

#Vector geometry
# self.geometry = self.get_vector_geometry(self.md)

self.geometry = utils.xmlGetTextTag(
self.md,
"gmd:spatialRepresentationInfo/gmd:MD_VectorSpatialRepresentation/"
Expand Down Expand Up @@ -226,22 +231,112 @@ def __repr__(self):
def __str__(self):
return self.fileIdentifier

def get_md_contacts(self) -> dict:

md_contact = list()

root = self.md.getroot() #get xml root

#get contacts in gmd:contact
for ct in root.findall("gmd:contact/", self.namespaces):

md_contact.append(Contact(ct, self.namespaces).asDict())

#get contacts in gmd:pointOfContact
for pct in root.findall("gmd:identificationInfo/gmd:MD_DataIdentification/gmd:pointOfContact/", self.namespaces):

md_contact.append(Contact(pct, self.namespaces).asDict())

return md_contact
# try:
# name = contact.find("gmd:individualName/gco:CharacterString", self.namespaces).text
# except:
# name = None
# try:
# organisation = contact.find("gmd:organisationName/gco:CharacterString", self.namespaces).text
# except:
# organisation = None

# adr_path = "gmd:contactInfo/gmd:CI_Contact/gmd:address/gmd:CI_Address/"

# try:
# rue = contact.find(adr_path + "gmd:deliveryPoint/gco:CharacterString", self.namespaces).text
# except:
# rue = None
# try:
# ville = contact.find(adr_path + "gmd:city/gco:CharacterString", self.namespaces).text
# except:
# ville = None
# try:
# cp = contact.find(adr_path + "gmd:postalCode/gco:CharacterString", self.namespaces).text
# except:
# cp = None
# try:
# country = contact.find(adr_path + "gmd:country/gco:CharacterString", self.namespaces).text
# except:
# country = None
# try:
# mail = contact.find(adr_path + "gmd:electronicMailAddress/gco:CharacterString", self.namespaces).text
# except:
# mail = None

# try:
# telephone = contact.find("gmd:contactInfo/gmd:CI_Contact/"
# "gmd:phone/gmd:CI_Telephone/gmd:voice/gco:CharacterString", self.namespaces).text
# except:
# telephone = None

# try:
# role = contact.find("gmd:role/gmd:CI_RoleCode", self.namespaces).get("codeListValue")
# except:
# role = None

# md_contact.append({"name": name,"organisation": organisation,"role": role,"rue": rue,"ville": ville,
# "cp": cp,"country": country,"mail": mail,"telephone": telephone})

# return md_contact


def get_md_keywords(self) -> list:

md_keywords = list()

root = self.md.getroot() #get xml root

#get contacts in gmd:contact
for kw in root.findall("gmd:identificationInfo/"\
"gmd:MD_DataIdentification/"\
"gmd:descriptiveKeywords/"\
"gmd:MD_Keywords/gmd:keyword/gco:CharacterString"\
, self.namespaces):

#Test for exceptions like <gco:CharacterString>cycles ; circulations douces ; vélo ; aménagements cyclables ; transport ; véloroute ;</gco:CharacterString>
keyword = kw.text.split(";")
if (len(keyword) > 1):
for k in keyword:
md_keywords.append(k)
else:
md_keywords.append(kw.text)


return md_keywords

def asDict(self) -> dict:
"""Retrun object as a structured dictionary key: value."""
return {
"filename": self.filename,
"fileIdentifier": self.fileIdentifier,
"MD_Identifier": self.MD_Identifier,
"md_date": self.md_date,
"type": self.storageType,
"title": self.title,
"OrganisationName": self.OrganisationName,
"abstract": self.abstract,
"parentidentifier": self.parentIdentifier,
"type": self.storageType,
"OrganisationName": self.OrganisationName,
"keywords": self.keywords,
"formatName": self.formatName,
"formatVersion": self.formatVersion,
"contacts": self.list_contacts,
"md_date": self.md_date,
"date": self.date,
"contact": self.contact,
"geometry": self.geometry,
"srs": "{}:{}".format(self.srs_codeSpace, self.srs_code),
"latmin": self.latmin,
Expand All @@ -250,21 +345,23 @@ def asDict(self) -> dict:
"lonmax": self.lonmax,
"featureCount": self.featureCount,
"featureCatalogs": self.featureCatalogs,
"storageType": self.storageType
"storageType": self.storageType,
"parentidentifier": self.parentIdentifier

}



# #############################################################################
# ### Stand alone execution #######
# #################################

if __name__ == "__main__":
"""Test parameters for a stand-alone run."""
li_fixtures_xml = sorted(Path(r"tests/fixtures/").glob("**/*.xml"))
# li_fixtures_xml = sorted(Path(r"input").glob("**/*.xml"))
# li_fixtures_xml = sorted(Path(r"tests/fixtures/").glob("**/*.xml"))
li_fixtures_xml = sorted(Path(r"input").glob("**/*.xml"))
for xml_path in li_fixtures_xml:
test = MetadataIso19139(xml=xml_path)
# print(test.asDict().get("title"), test.asDict().get("storageType"))
print(test.asDict())
print(test.asDict().get("title"), test.asDict().get("keywords"))
# print(test.asDict())
# print(xml_path.resolve(), test.storageType)
129 changes: 129 additions & 0 deletions isogeo_xml_toolbelt/xml_19139_fields.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,129 @@
# -*- coding: utf-8 -*-
#! python3

"""
Isogeo XML Fixer - Metadata
Purpose: Read a metadata stored into XML ISO 19110 as an object
Authors: Isogeo, inspired by the work did by GeoBretagne on mdchecker
Python: 3.6.x
"""

# #############################################################################
# ########## Libraries #############
# ##################################

# standard library
import datetime
import logging
import os
from pathlib import Path
from uuid import UUID

# 3rd party library
import arrow
from lxml import etree

# #############################################################################
# ########## Globals ###############
# ##################################

# logging
logging.basicConfig(level=logging.INFO)


# #############################################################################
# ########## Classes ###############
# ##################################

class Contact(object):
"""Contact in metadata XML 19139
:param lxml.etree._ElementTree contact: Element {http://www.isotc211.org/2005/gmd}CI_ResponsibleParty
:param dict namespaces: XML namespaces like `lxml.etree.getroot().nsmap`
"""

def __init__(self, contact, namespaces):
"""Instanciation."""
self.namespaces = namespaces
try:
self.name = contact.find("gmd:individualName/gco:CharacterString", self.namespaces).text
except:
self.name = None
try:
self.organisation = contact.find("gmd:organisationName/gco:CharacterString", self.namespaces).text
except:
self.organisation = None

self.adr_path = "gmd:contactInfo/gmd:CI_Contact/gmd:address/gmd:CI_Address/"

try:
self.rue = contact.find(self.adr_path + "gmd:deliveryPoint/gco:CharacterString", self.namespaces).text
except:
self.rue = None
try:
self.ville = contact.find(self.adr_path + "gmd:city/gco:CharacterString", self.namespaces).text
except:
self.ville = None
try:
self.cp = contact.find(self.adr_path + "gmd:postalCode/gco:CharacterString", self.namespaces).text
except:
self.cp = None
try:
self.country = contact.find(self.adr_path + "gmd:country/gco:CharacterString", self.namespaces).text
except:
self.country = None
try:
self.mail = contact.find(self.adr_path + "gmd:electronicMailAddress/gco:CharacterString", self.namespaces).text
except:
self.mail = None

try:
self.telephone = contact.find("gmd:contactInfo/gmd:CI_Contact/"
"gmd:phone/gmd:CI_Telephone/gmd:voice/gco:CharacterString", self.namespaces).text
except:
self.telephone = None

try:
self.role = contact.find("gmd:role/gmd:CI_RoleCode", self.namespaces).get("codeListValue")
except:
self.role = None


def asDict(self) -> dict:
"""Return contact as a structured dictionary key: value."""

return {"name": self.name,"organisation": self.organisation,"role": self.role,"rue": self.rue,
"ville": self.ville,"cp": self.cp,"country": self.country,"mail": self.mail,"telephone": self.telephone}

# #############################################################################
# ### Stand alone execution #######
# #################################
if __name__ == "__main__":
"""Test parameters for a stand-alone run."""
namespaces = {
"gts": "http://www.isotc211.org/2005/gts",
"gml": "http://www.opengis.net/gml",
"xsi": "http://www.w3.org/2001/XMLSchema-instance",
"gco": "http://www.isotc211.org/2005/gco",
"gmd": "http://www.isotc211.org/2005/gmd",
"gmx": "http://www.isotc211.org/2005/gmx",
"srv": "http://www.isotc211.org/2005/srv",
"xl": "http://www.w3.org/1999/xlink"}

li_fixtures_xml = sorted(Path(r"input").glob("**/*.xml"))

for xml_path in li_fixtures_xml:
# lxml needs a str not a Path

xml_path = str(xml_path.resolve())

md = etree.parse(xml_path)
root = md.getroot() #get xml root

#get contacts in gmd:contact
for ct in root.findall("gmd:contact/", namespaces):

print(Contact(ct).asDict())


0 comments on commit 108d0a8

Please sign in to comment.