Skip to content

Commit

Permalink
Add general XML de-serialization
Browse files Browse the repository at this point in the history
We introduce additional functions which can de-serialize all model
instances by determining the type based on the first start element and
dispatching to the concrete de-serialization function accordingly.

Based on [aas-core-codegen 3cacd0b3].

[aas-core-codegen 3cacd0b3]: aas-core-works/aas-core-codegen@3cacd0b3
  • Loading branch information
mristin committed Oct 27, 2024
1 parent 4ad3ed8 commit 2145090
Show file tree
Hide file tree
Showing 3 changed files with 354 additions and 0 deletions.
241 changes: 241 additions & 0 deletions aas_core3/xmlization.py
Original file line number Diff line number Diff line change
Expand Up @@ -9261,6 +9261,174 @@ def data_specification_iec_61360_from_str(
)


def from_iterparse(iterator: Iterator[Tuple[str, Element]]) -> aas_types.Class:
"""
Read an instance from the :paramref:`iterator`.

The type of the instance is determined by the very first start element.

Example usage:

.. code-block::

import pathlib
import xml.etree.ElementTree as ET

import aas_core3.xmlization as aas_xmlization

path = pathlib.Path(...)
with path.open("rt") as fid:
iterator = ET.iterparse(
source=fid,
events=['start', 'end']
)
instance = aas_xmlization.from_iterparse(
iterator
)

# Do something with the ``instance``

:param iterator:
Input stream of ``(event, element)`` coming from
:py:func:`xml.etree.ElementTree.iterparse` with the argument
``events=["start", "end"]``
:raise: :py:class:`DeserializationException` if unexpected input
:return:
Instance of :py:class:`.types.Class` read from the :paramref:`iterator`
"""
next_event_element = next(iterator, None)
if next_event_element is None:
raise DeserializationException(
# fmt: off
"Expected the start element of an instance, "
"but got the end-of-input"
# fmt: on
)

next_event, next_element = next_event_element
if next_event != "start":
raise DeserializationException(
f"Expected the start element of an instance, "
f"but got event {next_event!r} and element {next_element.tag!r}"
)

try:
return _read_as_element(next_element, iterator)
except DeserializationException as exception:
exception.path._prepend(ElementSegment(next_element))
raise exception


def from_stream(
stream: TextIO, has_iterparse: HasIterparse = xml.etree.ElementTree
) -> aas_types.Class:
"""
Read an instance from the :paramref:`stream`.

The type of the instance is determined by the very first start element.

Example usage:

.. code-block::

import aas_core3.xmlization as aas_xmlization

with open_some_stream_over_network(...) as stream:
instance = aas_xmlization.from_stream(
stream
)

# Do something with the ``instance``

:param stream:
representing an instance in XML
:param has_iterparse:
Module containing ``iterparse`` function.

Default is to use :py:mod:`xml.etree.ElementTree` from the standard
library. If you have to deal with malicious input, consider using
a library such as `defusedxml.ElementTree`_.
:raise: :py:class:`DeserializationException` if unexpected input
:return:
Instance read from :paramref:`stream`
"""
iterator = has_iterparse.iterparse(stream, ["start", "end"])
return from_iterparse(_with_elements_cleared_after_yield(iterator))


def from_file(
path: PathLike, has_iterparse: HasIterparse = xml.etree.ElementTree
) -> aas_types.Class:
"""
Read an instance from the file at the :paramref:`path`.

Example usage:

.. code-block::

import pathlib
import aas_core3.xmlization as aas_xmlization

path = pathlib.Path(...)
instance = aas_xmlization.from_file(
path
)

# Do something with the ``instance``

:param path:
to the file representing an instance in XML
:param has_iterparse:
Module containing ``iterparse`` function.

Default is to use :py:mod:`xml.etree.ElementTree` from the standard
library. If you have to deal with malicious input, consider using
a library such as `defusedxml.ElementTree`_.
:raise: :py:class:`DeserializationException` if unexpected input
:return:
Instance read from the file at :paramref:`path`
"""
with open(os.fspath(path), "rt", encoding="utf-8") as fid:
iterator = has_iterparse.iterparse(fid, ["start", "end"])
return from_iterparse(_with_elements_cleared_after_yield(iterator))


def from_str(
text: str, has_iterparse: HasIterparse = xml.etree.ElementTree
) -> aas_types.Class:
"""
Read an instance from the :paramref:`text`.

Example usage:

.. code-block::

import pathlib
import aas_core3.xmlization as aas_xmlization

text = "<...>...</...>"
instance = aas_xmlization.from_str(
text
)

# Do something with the ``instance``

:param text:
representing an instance in XML
:param has_iterparse:
Module containing ``iterparse`` function.

Default is to use :py:mod:`xml.etree.ElementTree` from the standard
library. If you have to deal with malicious input, consider using
a library such as `defusedxml.ElementTree`_.
:raise: :py:class:`DeserializationException` if unexpected input
:return:
Instance read from :paramref:`text`
"""
iterator = has_iterparse.iterparse(io.StringIO(text), ["start", "end"])
return from_iterparse(_with_elements_cleared_after_yield(iterator))


# NOTE (mristin, 2022-10-08):
# Directly using the iterator turned out to result in very complex function
# designs. The design became much simpler as soon as we considered one look-ahead
Expand Down Expand Up @@ -22567,6 +22735,33 @@ def _read_data_specification_iec_61360_as_element(
return _read_data_specification_iec_61360_as_sequence(element, iterator)


def _read_as_element(
element: Element, iterator: Iterator[Tuple[str, Element]]
) -> aas_types.Class:
"""
Read an instance from :paramref:`iterator`, including the end element.

:param element: start element
:param iterator:
Input stream of ``(event, element)`` coming from
:py:func:`xml.etree.ElementTree.iterparse` with the argument
``events=["start", "end"]``
:raise: :py:class:`DeserializationException` if unexpected input
:return: parsed instance
"""
tag_wo_ns = _parse_element_tag(element)
read_as_sequence = _GENERAL_DISPATCH.get(tag_wo_ns, None)

if read_as_sequence is None:
raise DeserializationException(
f"Expected the element tag to be a valid model type "
f"of a concrete instance, "
f"but got tag {tag_wo_ns!r}"
)

return read_as_sequence(element, iterator)


#: Dispatch XML class names to read-as-sequence functions
#: corresponding to concrete descendants of HasSemantics
_DISPATCH_FOR_HAS_SEMANTICS: Mapping[
Expand Down Expand Up @@ -23548,6 +23743,52 @@ def _read_data_specification_iec_61360_as_element(
}


#: Dispatch XML class names to read-as-sequence functions
#: corresponding to the concrete classes
_GENERAL_DISPATCH: Mapping[
str, Callable[[Element, Iterator[Tuple[str, Element]]], aas_types.Class]
] = {
"extension": _read_extension_as_sequence,
"administrativeInformation": _read_administrative_information_as_sequence,
"qualifier": _read_qualifier_as_sequence,
"assetAdministrationShell": _read_asset_administration_shell_as_sequence,
"assetInformation": _read_asset_information_as_sequence,
"resource": _read_resource_as_sequence,
"specificAssetId": _read_specific_asset_id_as_sequence,
"submodel": _read_submodel_as_sequence,
"relationshipElement": _read_relationship_element_as_sequence,
"submodelElementList": _read_submodel_element_list_as_sequence,
"submodelElementCollection": _read_submodel_element_collection_as_sequence,
"property": _read_property_as_sequence,
"multiLanguageProperty": _read_multi_language_property_as_sequence,
"range": _read_range_as_sequence,
"referenceElement": _read_reference_element_as_sequence,
"blob": _read_blob_as_sequence,
"file": _read_file_as_sequence,
"annotatedRelationshipElement": _read_annotated_relationship_element_as_sequence,
"entity": _read_entity_as_sequence,
"eventPayload": _read_event_payload_as_sequence,
"basicEventElement": _read_basic_event_element_as_sequence,
"operation": _read_operation_as_sequence,
"operationVariable": _read_operation_variable_as_sequence,
"capability": _read_capability_as_sequence,
"conceptDescription": _read_concept_description_as_sequence,
"reference": _read_reference_as_sequence,
"key": _read_key_as_sequence,
"langStringNameType": _read_lang_string_name_type_as_sequence,
"langStringTextType": _read_lang_string_text_type_as_sequence,
"environment": _read_environment_as_sequence,
"embeddedDataSpecification": _read_embedded_data_specification_as_sequence,
"levelType": _read_level_type_as_sequence,
"valueReferencePair": _read_value_reference_pair_as_sequence,
"valueList": _read_value_list_as_sequence,
"langStringPreferredNameTypeIec61360": _read_lang_string_preferred_name_type_iec_61360_as_sequence,
"langStringShortNameTypeIec61360": _read_lang_string_short_name_type_iec_61360_as_sequence,
"langStringDefinitionTypeIec61360": _read_lang_string_definition_type_iec_61360_as_sequence,
"dataSpecificationIec61360": _read_data_specification_iec_61360_as_sequence,
}


# endregion


Expand Down
56 changes: 56 additions & 0 deletions docs/source/getting_started/xmlize.rst
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,62 @@ Expected output:
<class 'aas_core3.types.Submodel'>
<class 'aas_core3.types.Property'>

You can also de-serialize other model classes other than ``Environment``.
For example, to de-serialize a submodel, you can use :py:func:`aas_core3.xmlization.submodel_from_str`:

.. testcode::

import aas_core3.xmlization as aas_xmlization

text = (
"<submodel xmlns=\"https://admin-shell.io/aas/3/0\">" +
"<id>some-unique-global-identifier</id>" +
"<submodelElements><property><idShort>someProperty</idShort>" +
"<valueType>xs:boolean</valueType></property></submodelElements>" +
"</submodel>"
)

submodel = aas_xmlization.submodel_from_str(text)

for something in submodel.descend():
print(type(something))

Expected output:

.. testoutput::

<class 'aas_core3.types.Property'>

If you do not know the model type in advance, you can use the general functions such as :py:func:`aas_core3.xmlization.from_str` and :py:func:`aas_core3.xmlization.from_file`.
The model type will be determined based on the first start element.
The same example above can be thus rewritten:

.. testcode::

import aas_core3.xmlization as aas_xmlization

text = (
"<submodel xmlns=\"https://admin-shell.io/aas/3/0\">" +
"<id>some-unique-global-identifier</id>" +
"<submodelElements><property><idShort>someProperty</idShort>" +
"<valueType>xs:boolean</valueType></property></submodelElements>" +
"</submodel>"
)

instance = aas_xmlization.from_str(text)

for something in instance.descend():
print(type(something))

Expected output:

.. testoutput::

<class 'aas_core3.types.Property'>

Prefer the particular de-serialization (:py:func:`aas_core3.xmlization.submodel_from_str`) whenever you know the type in advance.
The particular de-serialization function will check the actual model type for you, and you also get more precise type annotations for your downstream code.

Errors
======

Expand Down
57 changes: 57 additions & 0 deletions tests/test_general_xmlization.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
"""Test general XML de-serialization of a type given by the start element."""

# pylint: disable=missing-docstring

import io
import unittest
import xml.etree.ElementTree as ET


import aas_core3.xmlization as aas_xmlization
import aas_core3.verification as aas_verification

import tests.common
import tests.common_xmlization


class TestGeneral(unittest.TestCase):
def test_ok(self) -> None:
paths = sorted((tests.common.TEST_DATA_DIR / "Xml").glob("*/Expected/**/*.xml"))

for path in paths:
text = path.read_text(encoding="utf-8")

try:
instance = aas_xmlization.from_file(path)
except Exception as exception: # pylint: disable=broad-except
raise AssertionError(
f"Unexpected exception when de-serializing: {path}"
) from exception

errors = list(aas_verification.verify(instance))

if len(errors) > 0:
errors_joined = "\n\n".join(
f"{error.path}: {error.cause}" for error in errors
)
raise AssertionError(
f"One or more unexpected errors from {path}:\n{errors_joined}"
)

writer = io.StringIO()
aas_xmlization.write(instance, writer)

# Check the round-trip
original = ET.fromstring(text)
tests.common_xmlization.remove_redundant_whitespace(original)

serialized = ET.fromstring(aas_xmlization.to_str(instance))
tests.common_xmlization.remove_redundant_whitespace(serialized)

tests.common_xmlization.assert_elements_equal(
original, serialized, f"={path}"
)


if __name__ == "__main__":
unittest.main()

0 comments on commit 2145090

Please sign in to comment.