fixing the last of the tests

sphinx-contrib · Aug 19, 2024 · 95b6de4 · 95b6de4
1 parent 2c39f67
commit 95b6de4
Show file tree

Hide file tree

Showing 2 changed files with 62 additions and 19 deletions.
diff --git a/sphinxcontrib/mat_tree_sitter_parser.py b/sphinxcontrib/mat_tree_sitter_parser.py
@@ -4,7 +4,7 @@
 import re
 
 # rpath = "../../../syscop/software/nosnoc/+nosnoc/Options.m"
-rpath = "/home/anton/tools/matlabdomain/tests/test_data/PropTypeOld.m"
+rpath = "/home/anton/tools/matlabdomain/tests/test_data/ClassWithTrailingCommentAfterBases.m"
 # rpath = "/home/anton/tools/matlabdomain/tests/test_data/f_with_dummy_argument.m"
 
 tree_sitter_ver = tuple([int(sec) for sec in version("tree_sitter").split(".")])
@@ -194,12 +194,16 @@
 
 q_get_set = ML_LANG.query("""["get." "set."]""")
 
+q_line_continuation = ML_LANG.query("(line_continuation) @lc")
+
 
 re_percent_remove = re.compile(r"^[ \t]*% ?", flags=re.M)
+re_trim_line = re.compile(r"^[ \t]*", flags=re.M)
 re_assign_remove = re.compile(r"^=[ \t]*")
 
 
 def tree_sitter_is_0_21():
+    """Check if tree-sitter is v0.21.* in order to use the correct language initialization and syntax."""
     if not hasattr(tree_sitter_is_0_21, "is_21"):
         tree_sitter_ver = tuple([int(sec) for sec in version("tree_sitter").split(".")])
         tree_sitter_is_0_21.is_21 = tree_sitter_ver[1] == 21  # memoize
@@ -215,13 +219,33 @@ def get_row(point):
 
 
 def process_text_into_docstring(text, encoding):
+    """Take a text bytestring and decode it into a docstring."""
     docstring = text.decode(encoding, errors="backslashreplace")
     return re.sub(re_percent_remove, "", docstring)
 
 
-def process_default(text, encoding):
-    default = text.decode(encoding, errors="backslashreplace")
-    return re.sub(re_assign_remove, "", default)
+def process_default(node, encoding):
+    """Take the node defining a default and remove any line continuations before generating the default."""
+    text = node.text
+    to_keep = set(range(node.end_byte - node.start_byte))
+    lc_matches = q_line_continuation.matches(node)
+    for _, match in lc_matches:
+        # TODO this copies a lot perhaps there is a better option.
+        lc = match["lc"]
+        cut_start = lc.start_byte - node.start_byte
+        cut_end = lc.end_byte - node.start_byte
+        to_keep -= set(range(cut_start, cut_end))
+    # NOTE: hardcoded endianess is fine because for one byte this does not matter.
+    #       See python bikeshed on possible defaults for this here:
+    #       https://discuss.python.org/t/what-should-be-the-default-value-for-int-to-bytes-byteorder/10616
+    new_text = b"".join(
+        [byte.to_bytes(1, "big") for idx, byte in enumerate(text) if idx in to_keep]
+    )
+    # TODO We may want to do an in-order traversal of the parse here to generate a "nice" reformatted single line
+    #      however doing so sufficiently generically is likely a major undertaking.
+    default = new_text.decode(encoding, errors="backslashreplace")
+    default = re.sub(re_assign_remove, "", default)
+    return re.sub(re_trim_line, "", default)
 
 
 class MatScriptParser:
@@ -281,13 +305,21 @@ def __init__(self, root_node, encoding):
 
         # get docstring
         docstring_node = fun_match.get("docstring")
-        docstring = None
+        docstring = ""
         if docstring_node is not None:
             prev_sib = docstring_node.prev_named_sibling
             if get_row(docstring_node.start_point) - get_row(prev_sib.end_point) <= 1:
-                docstring = process_text_into_docstring(
-                    docstring_node.text, self.encoding
-                )
+                if get_row(docstring_node.start_point) == get_row(prev_sib.end_point):
+                    # if the docstring is on the same line as the end of the function drop it
+                    docstring = process_text_into_docstring(
+                        docstring_node.text, self.encoding
+                    )
+                    split_ds = docstring.split("\n")
+                    docstring = "\n".join(split_ds[1:]) if len(split_ds) > 1 else ""
+                else:
+                    docstring = process_text_into_docstring(
+                        docstring_node.text, self.encoding
+                    )
 
         if not docstring:
             docstring = None
@@ -343,7 +375,7 @@ def _parse_argument_section(self, argblock_node):
             # extract default
             default_node = arg_match.get("default")
             default = (
-                process_default(default_node.text, self.encoding)
+                process_default(default_node, self.encoding)
                 if default_node is not None
                 else None
             )
@@ -515,12 +547,22 @@ def __init__(self, root_node, encoding):
 
         # get docstring and check that it consecutive
         docstring_node = class_match.get("docstring")
+        docstring = ""
         if docstring_node is not None:
             prev_node = docstring_node.prev_sibling
             if get_row(docstring_node.start_point) - get_row(prev_node.end_point) <= 1:
-                self.docstring = process_text_into_docstring(
-                    docstring_node.text, self.encoding
-                )
+                if get_row(docstring_node.start_point) == get_row(prev_node.end_point):
+                    # if the docstring is on the same line as the end of the classdef drop it
+                    docstring = process_text_into_docstring(
+                        docstring_node.text, self.encoding
+                    )
+                    split_ds = docstring.split("\n")
+                    docstring = "\n".join(split_ds[1:]) if len(split_ds) > 1 else ""
+                else:
+                    docstring = process_text_into_docstring(
+                        docstring_node.text, self.encoding
+                    )
+        self.docstring = docstring
 
         prop_matches = q_properties.matches(self.cls)
         method_matches = q_methods.matches(self.cls)
@@ -606,7 +648,7 @@ def _parse_property_section(self, props_match):
             # extract default
             default_node = prop_match.get("default")
             default = (
-                process_default(default_node.text, self.encoding)
+                process_default(default_node, self.encoding)
                 if default_node is not None
                 else None
             )

diff --git a/tests/test_parse_mfile.py b/tests/test_parse_mfile.py
@@ -433,7 +433,7 @@ def test_file_parsing_with_no_encoding_specified():
     mfile = os.path.join(DIRNAME, "test_data", "f_with_latin_1.m")
     obj = mat_types.MatObject.parse_mfile(mfile, "f_with_latin_1", "test_data")
     assert obj.name == "f_with_latin_1"
-    assert obj.docstring == r"Analyse de la r\xe9dponse \xe0 un cr\xe9dneau"
+    assert obj.docstring == r"Analyse de la r\xe9ponse \xe0 un cr\xe9neau"
 
 
 def test_ClassWithBuiltinOverload():
@@ -740,7 +740,7 @@ def test_ClassWithLongPropertyDocstrings():
     )
     assert obj.name == "ClassWithLongPropertyDocstrings"
     assert (
-        obj.properties["a"]["docstring"] == "This line is deleted"
+        obj.properties["a"]["docstring"] == "This line is deleted\n"
         "This line documents another property"
     )
     assert obj.properties["b"]["docstring"] == "Document this property"
@@ -755,7 +755,7 @@ def test_ClassWithLongPropertyTrailingEmptyDocstrings():
     )
     assert obj.name == "ClassWithLongPropertyTrailingEmptyDocstrings"
     assert (
-        obj.properties["a"]["docstring"] == "This line is deleted"
+        obj.properties["a"]["docstring"] == "This line is deleted\n"
         "This line documents another property"
     )
     assert obj.properties["b"]["docstring"] == "Document this property"
@@ -795,6 +795,7 @@ def test_ClassWithTrailingCommentAfterBases():
 
 
 def test_ClassWithEllipsisProperties():
+    # TODO change this when the functionality to "nicely" generate one line defaults exists
     mfile = os.path.join(TESTDATA_ROOT, "ClassWithEllipsisProperties.m")
     obj = mat_types.MatObject.parse_mfile(
         mfile, "ClassWithEllipsisProperties", "test_data"
@@ -805,15 +806,15 @@ def test_ClassWithEllipsisProperties():
     assert len(obj.methods) == 0
 
     assert obj.properties["A"]["docstring"] == "an expression with ellipsis"
-    assert obj.properties["A"]["default"] == "1+2+3+4+5"
+    assert obj.properties["A"]["default"] == "1 + 2 + 3 +             4 + 5"
     assert (
         obj.properties["B"]["docstring"]
         == "a cell array with ellipsis and other array notation"
     )
-    assert obj.properties["B"]["default"].startswith("{'hello','bye';")
+    assert obj.properties["B"]["default"].startswith("{'hello', 'bye';")
     assert obj.properties["B"]["default"].endswith("}")
     assert obj.properties["C"]["docstring"] == "using end inside array"
-    assert obj.properties["C"]["default"] == "ClassWithEllipsisProperties.B(2:end,1)"
+    assert obj.properties["C"]["default"] == "ClassWithEllipsisProperties.B(2:end, 1)"
     assert obj.properties["D"]["docstring"] == "String with line continuation"
     assert obj.properties["D"]["default"] == "'...'"
     assert obj.properties["E"]["docstring"] == "The string with spaces"