Split DataTranslator

Put the namedtuple-generation into a new class DataDecoder. This is done for performance reasons. Signed-off-by: Øyvind Rønningstad <[email protected]>
NordicSemiconductor · Jan 21, 2025 · b39b68e · b39b68e
1 parent cbcb027
commit b39b68e
Show file tree

Hide file tree

Showing 5 changed files with 66 additions and 37 deletions.
diff --git a/MIGRATION_GUIDE.md b/MIGRATION_GUIDE.md
@@ -1,5 +1,14 @@
 # zcbor v. 0.9.99
 
+* The following `DataTranslator` functions have been moved to a separate class `DataDecoder`:
+
+    * `decode_obj()`
+    * `decode_str_yaml()`
+    * `decode_str()`
+
+  The split was done for performance reasons (namedtuple objects are slow to create).
+  The `DataDecoder` class is a subclass of `DataTranslator` and can do all the the same things, just a bit slower.
+  This functionality is only relevant when zcbor is imported, so all CLI usage is unaffected.
 
 # zcbor v. 0.9.0
 

diff --git a/__init__.py b/__init__.py
@@ -10,5 +10,6 @@
 from .zcbor.zcbor import (
     CddlValidationError,
     DataTranslator,
+    DataDecoder,
     main
 )
diff --git a/tests/scripts/test_performance.py b/tests/scripts/test_performance.py
@@ -18,6 +18,7 @@
 """
 raw_message = cbor2.dumps(list(range(1000)))
 cmd_spec = zcbor.DataTranslator.from_cddl(cddl_contents, 3).my_types["perf_int"]
+# cmd_spec = zcbor.DataDecoder.from_cddl(cddl_contents, 3).my_types["perf_int"]
 
 profiler = cProfile.Profile()
 profiler.enable()

diff --git a/tests/scripts/test_zcbor.py b/tests/scripts/test_zcbor.py
@@ -65,7 +65,7 @@ def decode_file(self, data_path, *cddl_paths):
 
     def decode_string(self, data_string, *cddl_paths):
         cddl_str = " ".join((Path(p).read_text(encoding="utf-8") for p in cddl_paths))
-        self.my_types = zcbor.DataTranslator.from_cddl(cddl_str, 16).my_types
+        self.my_types = zcbor.DataDecoder.from_cddl(cddl_str, 16).my_types
         cddl = self.my_types["SUIT_Envelope_Tagged"]
         self.decoded = cddl.decode_str(data_string)
 
@@ -676,7 +676,7 @@ def test_file_header(self):
 class TestOptional(TestCase):
     def test_optional_0(self):
         with open(p_optional, 'r', encoding="utf-8") as f:
-            cddl_res = zcbor.DataTranslator.from_cddl(f.read(), 16)
+            cddl_res = zcbor.DataDecoder.from_cddl(f.read(), 16)
         cddl = cddl_res.my_types['cfg']
         test_yaml = """
             mem_config:
@@ -689,7 +689,7 @@ def test_optional_0(self):
 
 class TestUndefined(TestCase):
     def test_undefined_0(self):
-        cddl_res = zcbor.DataTranslator.from_cddl(
+        cddl_res = zcbor.DataDecoder.from_cddl(
             p_prelude.read_text(encoding="utf-8") + '\n' + p_corner_cases.read_text(encoding="utf-8"), 16)
         cddl = cddl_res.my_types['Simples']
         test_yaml = "[true, false, true, null, [zcbor_undefined]]"
@@ -703,7 +703,7 @@ def test_undefined_0(self):
 
 class TestFloat(TestCase):
     def test_float_0(self):
-        cddl_res = zcbor.DataTranslator.from_cddl(
+        cddl_res = zcbor.DataDecoder.from_cddl(
             p_prelude.read_text(encoding="utf-8") + '\n' + p_corner_cases.read_text(encoding="utf-8"), 16)
         cddl = cddl_res.my_types['Floats']
         test_yaml = f"[3.1415, 1234567.89, 0.000123, 3.1415, 2.71828, 5.0, {1 / 3}]"
@@ -731,14 +731,14 @@ def test_yaml_compatibility(self):
 
 class TestIntmax(TestCase):
     def test_intmax1(self):
-        cddl_res = zcbor.DataTranslator.from_cddl(
+        cddl_res = zcbor.DataDecoder.from_cddl(
             p_prelude.read_text(encoding="utf-8") + '\n' + p_corner_cases.read_text(encoding="utf-8"), 16)
         cddl = cddl_res.my_types['Intmax1']
         test_yaml = f"[-128, 127, 255, -32768, 32767, 65535, -2147483648, 2147483647, 4294967295, -9223372036854775808, 9223372036854775807, 18446744073709551615]"
         decoded = cddl.decode_str_yaml(test_yaml)
 
     def test_intmax2(self):
-        cddl_res = zcbor.DataTranslator.from_cddl(
+        cddl_res = zcbor.DataDecoder.from_cddl(
             p_prelude.read_text(encoding="utf-8") + '\n' + p_corner_cases.read_text(encoding="utf-8"), 16)
         cddl = cddl_res.my_types['Intmax2']
         test_yaml1 = f"[-128, 0, -32768, 0, -2147483648, 0, -9223372036854775808, 0]"
@@ -766,7 +766,7 @@ def test_intmax2(self):
 
 class TestInvalidIdentifiers(TestCase):
     def test_invalid_identifiers0(self):
-        cddl_res = zcbor.DataTranslator.from_cddl(
+        cddl_res = zcbor.DataDecoder.from_cddl(
             p_prelude.read_text(encoding="utf-8") + '\n' + p_corner_cases.read_text(encoding="utf-8"), 16)
         cddl = cddl_res.my_types['InvalidIdentifiers']
         test_yaml = "['1one', 2, '{[a-z]}']"

diff --git a/zcbor/zcbor.py b/zcbor/zcbor.py
@@ -1572,19 +1572,8 @@ def _flatten_list(self, name, obj):
         return obj
 
     def _construct_obj(self, my_list):
-        """Construct a namedtuple object from my_list. my_list contains tuples of name/value.
-
-        Also, attempt to flatten redundant levels of abstraction.
-        """
-        if my_list == []:
-            return None
-        names, values = tuple(zip(*my_list))
-        if len(values) == 1:
-            values = (self._flatten_obj(values[0]), )
-        values = tuple(self._flatten_list(names[i], values[i]) for i in range(len(values)))
-        assert (not any((isinstance(elem, KeyTuple) for elem in values))), \
-            f"KeyTuple not processed: {values}"
-        return namedtuple("_", names)(*values)
+        """Can be overridden to construct a decoded object."""
+        pass
 
     def _add_if(self, my_list, obj, expect_key=False, name=None):
         """Add construct obj and add it to my_list if relevant.
@@ -1614,11 +1603,11 @@ def _add_if(self, my_list, obj, expect_key=False, name=None):
             # If a bstr is CBOR-formatted, add both the string and the decoding of the string here
             if isinstance(obj, list) and all((isinstance(o, bytes) for o in obj)):
                 # One or more bstr in a list (i.e. it is optional or repeated)
-                my_list.append((name or self.var_name(), [self.cbor.decode_str(o) for o in obj]))
+                my_list.append((name or self.var_name(), [self.cbor._decode_str(o) for o in obj]))
                 my_list.append(((name or self.var_name()) + "_bstr", obj))
                 return
             if isinstance(obj, bytes):
-                my_list.append((name or self.var_name(), self.cbor.decode_str(obj)))
+                my_list.append((name or self.var_name(), self.cbor._decode_str(obj)))
                 my_list.append(((name or self.var_name()) + "_bstr", obj))
                 return
         my_list.append((name or self.var_name(), obj))
@@ -1695,7 +1684,7 @@ def _handle_key(self, next_obj):
         res = KeyTuple((key_res if not self.key.is_unambiguous() else None, obj_res))
         return res
 
-    def _decode_obj(self, it):
+    def _decode_obj_it(self, it):
         """Decode single CDDL value, excluding repetitions.
 
         May consume 0 to n CBOR objects via the iterator.
@@ -1752,22 +1741,22 @@ def _decode_full(self, it):
         if self.multi_var_condition():
             retvals = []
             for i in range(self.min_qty):
-                it, retval = self._decode_obj(it)
+                it, retval = self._decode_obj_it(it)
                 retvals.append(retval if not self.is_unambiguous_repeated() else None)
             try:
                 for i in range(self.max_qty - self.min_qty):
                     it, it_copy = tee(it)
-                    it, retval = self._decode_obj(it)
+                    it, retval = self._decode_obj_it(it)
                     retvals.append(retval if not self.is_unambiguous_repeated() else None)
             except CddlValidationError as c:
                 self.errors.append(str(c))
                 it = it_copy
             return it, retvals
         else:
-            ret = self._decode_obj(it)
+            ret = self._decode_obj_it(it)
             return ret
 
-    def decode_obj(self, obj):
+    def _decode_obj(self, obj):
         """CBOR object => python object"""
         it = iter([obj])
         try:
@@ -1780,21 +1769,14 @@ def decode_obj(self, obj):
             raise e
         return decoded
 
-    def decode_str_yaml(self, yaml_str, yaml_compat=False):
-        """YAML => python object"""
-        yaml_obj = yaml_load(yaml_str)
-        obj = self._from_yaml_obj(yaml_obj) if yaml_compat else yaml_obj
-        self.validate_obj(obj)
-        return self.decode_obj(obj)
-
-    def decode_str(self, cbor_str):
+    def _decode_str(self, cbor_str):
         """CBOR bytestring => python object"""
         cbor_obj = loads(cbor_str)
-        return self.decode_obj(cbor_obj)
+        return self._decode_obj(cbor_obj)
 
     def validate_obj(self, obj):
         """Validate CBOR object against CDDL. Exception if not valid."""
-        self.decode_obj(obj)
+        self._decode_obj(obj)  # Will raise exception if not valid
         return True
 
     def validate_str(self, cbor_str):
@@ -1909,6 +1891,42 @@ def str_to_c_code(self, cbor_str, var_name, columns=0):
         return f'uint8_t {var_name}[] = {{{arr}}};\n'
 
 
+class DataDecoder(DataTranslator):
+    """Create a decoded object with element names taken from the CDDL.
+
+    This is kept separate from DataTranslator for performance reasons."""
+
+    def _construct_obj(self, my_list):
+        """Construct a namedtuple object from my_list. my_list contains tuples of name/value.
+
+        Also, attempt to flatten redundant levels of abstraction.
+        """
+        if my_list == []:
+            return None
+        names, values = tuple(zip(*my_list))
+        if len(values) == 1:
+            values = (self._flatten_obj(values[0]), )
+        values = tuple(self._flatten_list(names[i], values[i]) for i in range(len(values)))
+        assert (not any((isinstance(elem, KeyTuple) for elem in values))), \
+            f"KeyTuple not processed: {values}"
+        return namedtuple("_", names)(*values)
+
+    def decode_obj(self, obj):
+        """CBOR object => python object"""
+        return self._decode_obj(obj)
+
+    def decode_str_yaml(self, yaml_str, yaml_compat=False):
+        """YAML => python object"""
+        yaml_obj = yaml_load(yaml_str)
+        obj = self._from_yaml_obj(yaml_obj) if yaml_compat else yaml_obj
+        self.validate_obj(obj)
+        return self.decode_obj(obj)
+
+    def decode_str(self, cbor_str):
+        """CBOR bytestring => python object"""
+        return self._decode_str(cbor_str)
+
+
 class XcoderTuple(NamedTuple):
     body: list
     func_name: str