diff --git a/examples/neuroml2/NeuroML2.md b/examples/neuroml2/NeuroML2.md index aabfc643..405be412 100644 --- a/examples/neuroml2/NeuroML2.md +++ b/examples/neuroml2/NeuroML2.md @@ -13,7 +13,21 @@ Some description... xmlns str - Schema for NeuroML 2, usually http://www.neuroml.org/schema/neuroml2 + Default namespace for the NeuroML file, usually http://www.neuroml.org/schema/neuroml2 + + + + + xmlns_xsi + str + Namespace for XMLSchema-instance + + + + + xmlns_loc + str + Specifies location of the main namespace @@ -24,14 +38,14 @@ Some description... izhikevich2007Cells izhikevich2007Cell - + The izhikevich2007Cells pulseGenerators pulseGenerator - + The pulse current generators diff --git a/examples/neuroml2/NeuroML2.rst b/examples/neuroml2/NeuroML2.rst index ee22286c..2a812b47 100644 --- a/examples/neuroml2/NeuroML2.rst +++ b/examples/neuroml2/NeuroML2.rst @@ -5,22 +5,24 @@ Some description... **Allowed parameters** -=============== =========== ==================================================================== +=============== =========== ====================================================================================== Allowed field Data Type Description -=============== =========== ==================================================================== +=============== =========== ====================================================================================== **id** str The id of the NeuroML 2 document -**xmlns** str Schema for NeuroML 2, usually http://www.neuroml.org/schema/neuroml2 -=============== =========== ==================================================================== +**xmlns** str Default namespace for the NeuroML file, usually http://www.neuroml.org/schema/neuroml2 +**xmlns_xsi** str Namespace for XMLSchema-instance +**xmlns_loc** str Specifies location of the main namespace +=============== =========== ====================================================================================== **Allowed children** -======================= ============================================ ==================== +======================= ============================================ ============================ Allowed child Data Type Description -======================= ============================================ ==================== -**izhikevich2007Cells** `izhikevich2007Cell <#izhikevich2007cell>`__ -**pulseGenerators** `pulseGenerator <#pulsegenerator>`__ +======================= ============================================ ============================ +**izhikevich2007Cells** `izhikevich2007Cell <#izhikevich2007cell>`__ The izhikevich2007Cells +**pulseGenerators** `pulseGenerator <#pulsegenerator>`__ The pulse current generators **networks** `network <#network>`__ The networks present -======================= ============================================ ==================== +======================= ============================================ ============================ ================== izhikevich2007Cell diff --git a/examples/neuroml2/NeuroML2.specification.json b/examples/neuroml2/NeuroML2.specification.json index fe211575..8e39c0eb 100644 --- a/examples/neuroml2/NeuroML2.specification.json +++ b/examples/neuroml2/NeuroML2.specification.json @@ -8,17 +8,25 @@ }, "xmlns": { "type": "str", - "description": "Schema for NeuroML 2, usually http://www.neuroml.org/schema/neuroml2" + "description": "Default namespace for the NeuroML file, usually http://www.neuroml.org/schema/neuroml2" + }, + "xmlns_xsi": { + "type": "str", + "description": "Namespace for XMLSchema-instance" + }, + "xmlns_loc": { + "type": "str", + "description": "Specifies location of the main namespace" } }, "allowed_children": { "izhikevich2007Cells": { "type": "izhikevich2007Cell", - "description": "" + "description": "The izhikevich2007Cells" }, "pulseGenerators": { "type": "pulseGenerator", - "description": "" + "description": "The pulse current generators" }, "networks": { "type": "network", diff --git a/examples/neuroml2/NeuroML2.specification.yaml b/examples/neuroml2/NeuroML2.specification.yaml index 0d4bb12a..df89cb5b 100644 --- a/examples/neuroml2/NeuroML2.specification.yaml +++ b/examples/neuroml2/NeuroML2.specification.yaml @@ -6,14 +6,20 @@ neuroml: description: The id of the NeuroML 2 document xmlns: type: str - description: Schema for NeuroML 2, usually http://www.neuroml.org/schema/neuroml2 + description: Default namespace for the NeuroML file, usually http://www.neuroml.org/schema/neuroml2 + xmlns_xsi: + type: str + description: Namespace for XMLSchema-instance + xmlns_loc: + type: str + description: Specifies location of the main namespace allowed_children: izhikevich2007Cells: type: izhikevich2007Cell - description: '' + description: The izhikevich2007Cells pulseGenerators: type: pulseGenerator - description: '' + description: The pulse current generators networks: type: network description: The networks present diff --git a/examples/neuroml2/TestNeuroML.xml b/examples/neuroml2/TestNeuroML.xml index 54ee04dc..70586cf6 100644 --- a/examples/neuroml2/TestNeuroML.xml +++ b/examples/neuroml2/TestNeuroML.xml @@ -1,5 +1,5 @@ - + diff --git a/examples/neuroml2/neuroml2_spec.py b/examples/neuroml2/neuroml2_spec.py index 045931e9..6b07fda5 100644 --- a/examples/neuroml2/neuroml2_spec.py +++ b/examples/neuroml2/neuroml2_spec.py @@ -101,14 +101,26 @@ class neuroml(Base): Args: id: The id of the NeuroML 2 document - xmlns: Schema for NeuroML 2, usually http://www.neuroml.org/schema/neuroml2 + xmlns: Default namespace for the NeuroML file, usually http://www.neuroml.org/schema/neuroml2 + xmlns_xsi: Namespace for XMLSchema-instance + xmlns_loc: Specifies location of the main namespace + izhikevich2007Cells: The izhikevich2007Cells + pulseGenerators: The pulse current generators networks: The networks present """ id: str = field(validator=instance_of(str)) + xmlns: str = field( validator=instance_of(str), default="http://www.neuroml.org/schema/neuroml2" ) + xmlns_xsi: str = field( + validator=instance_of(str), default="http://www.w3.org/2001/XMLSchema-instance" + ) + xmlns_loc: str = field( + validator=instance_of(str), + default="http://www.neuroml.org/schema/neuroml2 https://raw.github.com/NeuroML/NeuroML2/development/Schemas/NeuroML2/NeuroML_v2.3.xsd", + ) izhikevich2007Cells: List[izhikevich2007Cell] = field(factory=list) pulseGenerators: List[pulseGenerator] = field(factory=list) @@ -187,3 +199,8 @@ class neuroml(Base): yy = yaml.dump(doc_dict, indent=4, sort_keys=False) print(yy) d.write(yy) + + from modelspec.utils import load_xml + + new_neuroml = load_xml("hello_world_neuroml.net.nml") + print(new_neuroml) diff --git a/src/modelspec/base_types.py b/src/modelspec/base_types.py index 7bb3f6a6..f4a81a9d 100644 --- a/src/modelspec/base_types.py +++ b/src/modelspec/base_types.py @@ -125,7 +125,6 @@ def to_xml(self) -> str: ) from modelspec.utils import build_xml_element - # root = ET.Element("modelspec") root = build_xml_element(self) xml_string = ET.tostring( @@ -170,12 +169,38 @@ def from_bson(cls, bson_str: str) -> "Base": @classmethod def from_xml(cls, xml_str: str) -> "Base": """Instantiate a Base object from an XML string""" - from modelspec.utils import element_to_dict, handle_id, convert_values + from modelspec.utils import ( + elementtree_element_to_dict, + handle_xml_dict_id, + convert_xml_dict_values, + process_xml_namespace, + ) + import re + + # When the to_xml() method is used it messes up the string therefore, + # it is necessary to convert it into an elementree object then decode into a string. + xml_string_a = ET.fromstring(xml_str) + xml_string_b = ET.tostring(xml_string_a).decode() + + # while trying to obtain a useable xml structure, using the conversion above it acquires + # some unusual string element that sometimes can be incremental from either :ns0 to :nsX or ns0: to nsX:. + # Using the regex expression pattern catches it in any form and removes it from the xml string structure. + ns_prefix_pattern = r"(ns\d+:|:ns\d+)" + cleaned_xml = re.sub(ns_prefix_pattern, "", xml_string_b).strip() + + # For the xml to be useable in modelspec unnecessary string elements which only serve as asthetics for the xml must + # be removed when converting to a dict, the process_xml_namespaes function does just that. + removed_namespaces = process_xml_namespace(cleaned_xml) + + # process_xml_namespace function returns an elementtree object which can be directly worked upon by the elementtree_element_to_dict + # function, this returns a python dictionary + data_dict = elementtree_element_to_dict(removed_namespaces) + + # This strips every instance of 'id' from the resulting dictionary structure + removed_id = handle_xml_dict_id(data_dict) - root = ET.fromstring(xml_str) - data_dict = element_to_dict(root) - removed_id = handle_id(data_dict) - converted_to_actual_val = convert_values(removed_id) + # XML conversions do not returns exact values, instead all values are returned as a string, this reassigns their actual values + converted_to_actual_val = convert_xml_dict_values(removed_id) return cls.from_dict(converted_to_actual_val) @@ -377,15 +402,38 @@ def from_xml_file(cls, filename: str) -> "Base": Returns: A modelspec Base for this XML. """ - from modelspec.utils import element_to_dict, handle_id, convert_values + from modelspec.utils import ( + elementtree_element_to_dict, + handle_xml_dict_id, + convert_xml_dict_values, + process_xml_namespace, + ) + import re with open(filename) as infile: - tree = ET.parse(infile) - root = tree.getroot() + tree = ET.parse(infile) # Parse the XML file into an ElementTree object + root = tree.getroot() # Get the root element + + # This defines regular expressions to match the namespace patterns to be removed + ns_prefix_pattern = r"(ns\d+:|:ns\d+)" + + # Converts the loaded xml into a string and removes unwanted string values ':ns0' to :ns∞ and 'ns0:' to ns∞: + # They prevent the xml from loading correctly + xml_string = ET.tostring(root).decode() + cleaned_xml = re.sub(ns_prefix_pattern, "", xml_string).strip() + + # Removes xmlns, xmlns:xsi and xsi:schemaLocation from the xml structure for conversion + # it passes an element tree object to the elementtree_element_to_dict function + removed_namespaces = process_xml_namespace(cleaned_xml) + + # Converts the resulting xml stripped of xmlns, xmlns:xsi and xsi:schemaLocation into a dict + data_dict = elementtree_element_to_dict(removed_namespaces) + + # Removes every key having 'id' and replaces it with it's value + removed_id = handle_xml_dict_id(data_dict) - data_dict = element_to_dict(root) - removed_id = handle_id(data_dict) - converted_to_actual_val = convert_values(removed_id) + # Values are returned as strings after conversion, this corrects them to their actual values + converted_to_actual_val = convert_xml_dict_values(removed_id) return cls.from_dict(converted_to_actual_val) def get_child(self, id: str, type_: str) -> Any: diff --git a/src/modelspec/utils.py b/src/modelspec/utils.py index 7f0aabca..16844cb1 100644 --- a/src/modelspec/utils.py +++ b/src/modelspec/utils.py @@ -67,19 +67,35 @@ def load_xml(filename: str): Args: filename: The name of the XML file to load. """ + import re + with open(filename, "rb") as infile: tree = ET.parse(infile) # Parse the XML file into an ElementTree object root = tree.getroot() # Get the root element - # Convert the ElementTree object to a dictionary - data = element_to_dict(root) - removed_id = handle_id(data) - converted_to_actual_val = convert_values(removed_id) + # This defines regular expressions to match the namespace patterns to be removed + ns_prefix_pattern = r"(ns\d+:|:ns\d+)" + + # Converts the loaded xml into a string and removes unwanted string values ':ns0' to :ns∞ and 'ns0:' to ns∞: + # They prevent the xml from loading correctly + xml_string = ET.tostring(root).decode() + cleaned_xml = re.sub(ns_prefix_pattern, "", xml_string).strip() + + # Removes xmlns, xmlns:xsi and xsi:schemaLocation from the xml structure for conversion + # it passes an element tree object to the elementtree_element_to_dict function + removed_namespaces = process_xml_namespace(cleaned_xml) + + # Converts the resulting xml stripped of xmlns, xmlns:xsi and xsi:schemaLocation into a dict + data = elementtree_element_to_dict(removed_namespaces) - return convert_values(converted_to_actual_val) + # Removes every key having 'id' and replaces it with it's value + removed_id = handle_xml_dict_id(data) + # Values are returned as strings after conversion, this corrects them to their actual values + return convert_xml_dict_values(removed_id) -def element_to_dict(element): + +def elementtree_element_to_dict(element): """ This convert an ElementTree element to a dictionary. @@ -94,35 +110,57 @@ def element_to_dict(element): if attrs: result.update(attrs) + children_by_tag = {} for child_element in element: - child_key = child_element.tag - child_value = element_to_dict(child_element) + child_key = child_element.tag + "s" + child_value = elementtree_element_to_dict(child_element) - if child_key in result: - if not isinstance(result[child_key], list): - result[child_key] = [result[child_key]] - result[child_key].append(child_value) - else: + # Check if the child element has an 'id' attribute + if "id" in child_element.attrib: + # If the child element has an 'id', add it to the result dictionary directly result[child_key] = child_value + else: + # If the child element does not have an 'id', represent it as a list + children_by_tag.setdefault(child_key, []).append(child_value) + + # Append the lists to the result dictionary + result.update(children_by_tag) return result -def handle_id(dictionary): +def process_xml_namespace(xml_string): + # Remove ignored elements from the XML string + ignored_elements = [ + 'xmlns="http://www.neuroml.org/schema/neuroml2"', + 'xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"', + 'xsi:schemaLocation="http://www.neuroml.org/schema/neuroml2 https://raw.github.com/NeuroML/NeuroML2/development/Schemas/NeuroML2/NeuroML_v2.3.xsd"', + ] + + # Loops through the xml string and removes every instance of the elements in the list named ignored_elements + for ignored_element in ignored_elements: + xml_string = xml_string.replace(ignored_element, "").strip() + + # Parse the XML string into an ElementTree + root = ET.fromstring(xml_string) + return root + + +def handle_xml_dict_id(dictionary): if isinstance(dictionary, dict): if "id" in dictionary: nested_dict = {dictionary["id"]: dictionary.copy()} del nested_dict[dictionary["id"]]["id"] - return {k: handle_id(v) for k, v in nested_dict.items()} + return {k: handle_xml_dict_id(v) for k, v in nested_dict.items()} else: - return {k: handle_id(v) for k, v in dictionary.items()} + return {k: handle_xml_dict_id(v) for k, v in dictionary.items()} elif isinstance(dictionary, list): - return [handle_id(item) for item in dictionary] + return [handle_xml_dict_id(item) for item in dictionary] else: return dictionary -def convert_values(value): +def convert_xml_dict_values(value): """ This recursively converts values to their actual types. @@ -146,9 +184,9 @@ def convert_values(value): elif value.lower() == "none": return None elif isinstance(value, dict): - return {key: convert_values(val) for key, val in value.items()} + return {key: convert_xml_dict_values(val) for key, val in value.items()} elif isinstance(value, list): - return [convert_values(item) for item in value] + return [convert_xml_dict_values(item) for item in value] return value @@ -219,11 +257,20 @@ def build_xml_element(data, parent=None): for child in children: child_element = build_xml_element(child) parent.append(child_element) - else: + + # Filters name space and schemaLoacation attributes, only allows non name space attributes to be added as attributes + elif not isinstance(aattr.default, str): attribute_name = aattr.name attribute_value = data.__getattribute__(aattr.name) parent.set(attribute_name, str(attribute_value)) + # This defines the various namespaces and schemaLocation of the generated xml + if hasattr(data, "xmlns"): + parent.set("xmlns", data.xmlns) + if hasattr(data, "xmlns_xsi"): + parent.set("xmlns:xsi", data.xmlns_xsi) + if hasattr(data, "xmlns_loc"): + parent.set("xsi:schemaLocation", str(data.xmlns_loc)) return parent