Skip to content

Commit

Permalink
add function to get geometry and dataset type
Browse files Browse the repository at this point in the history
  • Loading branch information
leodarengosse committed Jan 3, 2019
1 parent 4573e53 commit d9fe431
Show file tree
Hide file tree
Showing 3 changed files with 50 additions and 22 deletions.
2 changes: 1 addition & 1 deletion .vscode/settings.json
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
{
"python.pythonPath": "C:\\Users\\julien.moura\\.virtualenvs\\iso19139_xml_fixer-Jl-kKthK\\Scripts\\python.exe",
"python.pythonPath": "C:\\Users\\leo.darengosse\\.virtualenvs\\isogeo-xml-toolbelt-e_OlFPPv\\Scripts\\python.exe",
"python.unitTest.unittestArgs": [
"-v",
"-s",
Expand Down
44 changes: 23 additions & 21 deletions isogeo_xml_toolbelt/reader_iso19139.py
Original file line number Diff line number Diff line change
Expand Up @@ -106,9 +106,11 @@ def __init__(self, xml: Path):
self.namespaces)

# vector or raster
self.storageType = self.get_dataset_type(self.md)


self.storageType = utils.xmlGetTextTag(
self.md,
"/gmd:MD_Metadata/gmd:identificationInfo/gmd:MD_DataIdentification/gmd:spatialRepresentationType/gmd:MD_SpatialRepresentationTypeCode/text()",
self.namespaces)

# format
self.formatName = utils.xmlGetTextNodes(
self.md,
Expand Down Expand Up @@ -184,6 +186,15 @@ def __init__(self, xml: Path):
self.latmin = -90
self.latmax = 90

#Vector geometry
# self.geometry = self.get_vector_geometry(self.md)

self.geometry = utils.xmlGetTextTag(
self.md,
"gmd:spatialRepresentationInfo/gmd:MD_VectorSpatialRepresentation/"
"gmd:geometricObjects/gmd:MD_GeometricObjects/gmd:geometricObjectType/gmd:MD_GeometricObjectTypeCode/text()",
self.namespaces)

# SRS
self.srs_code = utils.xmlGetTextNodes(
self.md,
Expand Down Expand Up @@ -215,19 +226,6 @@ def __repr__(self):
def __str__(self):
return self.fileIdentifier

def get_dataset_type(self, doc):
"""Determines if dataset is a vector / raster / service / sertires or not defined"""
storageType = utils.xmlGetTextNodes(
doc,
"/gmd:MD_Metadata/gmd:identificationInfo/gmd:MD_DataIdentification/gmd:spatialRepresentationType/gmd:MD_SpatialRepresentationTypeCode/text()",
self.namespaces)
if len(storageType) < 1:
storageType = doc.xpath("/gmd:MD_Metadata/gmd:identificationInfo/gmd:MD_DataIdentification/gmd:spatialRepresentationType/gmd:MD_SpatialRepresentationTypeCode", namespaces=self.namespaces)
if len(storageType) > 0:
storageType = storageType[0].get("codeListValue", None)

return storageType

def asDict(self) -> dict:
"""Retrun object as a structured dictionary key: value."""
return {
Expand All @@ -244,13 +242,16 @@ def asDict(self) -> dict:
"formatVersion": self.formatVersion,
"date": self.date,
"contact": self.contact,
"geometry": self.geometry,
"srs": "{}:{}".format(self.srs_codeSpace, self.srs_code),
"latmin": self.latmin,
"latmax": self.latmax,
"lonmin": self.lonmin,
"lonmax": self.lonmax,
"featureCount": self.featureCount,
"featureCatalogs": self.featureCatalogs
"featureCatalogs": self.featureCatalogs,
"storageType": self.storageType

}


Expand All @@ -260,9 +261,10 @@ def asDict(self) -> dict:

if __name__ == "__main__":
"""Test parameters for a stand-alone run."""
li_fixtures_xml = sorted(Path(r"tests/fixtures").glob("**/*.xml"))
li_fixtures_xml += sorted(Path(r"input").glob("**/*.xml"))
li_fixtures_xml = sorted(Path(r"tests/fixtures/").glob("**/*.xml"))
# li_fixtures_xml = sorted(Path(r"input").glob("**/*.xml"))
for xml_path in li_fixtures_xml:
test = MetadataIso19139(xml=xml_path)
#print(test.asDict().get("title"), test.asDict().get("srs"))
print(xml_path.resolve(), test.storageType)
# print(test.asDict().get("title"), test.asDict().get("storageType"))
print(test.asDict())
# print(xml_path.resolve(), test.storageType)
26 changes: 26 additions & 0 deletions isogeo_xml_toolbelt/xml_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,32 @@ def xmlGetTextNodes(self, doc: etree._ElementTree, xpath: str, namespaces: dict)
"""
return ", ".join(doc.xpath(xpath, namespaces=namespaces))

def xmlGetTextTag(self, doc: etree._ElementTree, xpath: str, namespaces: dict):

"""Function to get information in tag when information isn't in nodes matching a specific xpath.
:param lxml.etree._ElementTree doc: XML element to parse
:param str xpath: Xpath to reach
:param dict namespaces: XML namespaces like 'lxml.etree.getroot().nsmap'
"""
#XML Isogeo example: <MD_GeometricObjectTypeCode codeList="http://...#MD_GeometricObjectTypeCode" codeListValue="surface">surface</MD_GeometricObjectTypeCode>

tag = self.xmlGetTextNodes(
doc,
xpath,
namespaces)

#XML GeoSource example: <gmd:MD_GeometricObjectTypeCode codeList="http://...#MD_GeometricObjectTypeCode" codeListValue="surface" />

if len(tag) < 1:
xpath = xpath.replace("/text()"," ")
tag = doc.xpath(xpath, namespaces=namespaces)
if len(tag) > 0:
tag = tag[0].get("codeListValue", None)
else:
tag = "None"

return tag

def parse_string_for_max_date(self, dates_as_str: str):
"""Parse string with multiple dates to extract the most recent one. Used
Expand Down

1 comment on commit d9fe431

@Guts
Copy link
Contributor

@Guts Guts commented on d9fe431 Jan 7, 2019

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

lié à #7

Please sign in to comment.