From c6d889f1b10417db44540747aa2ab1583b4762e5 Mon Sep 17 00:00:00 2001
From: Anton Edvinovich Pozharskiy <apozharski@gmail.com>
Date: Thu, 18 Jul 2024 07:55:24 +0200
Subject: [PATCH 01/45] [WIP] Work on writing a parser using the textmate
 grammar backend.   (#253)

* Actually not only enums, quick and dirty start to using textmate parser

* a little more boilerplate

* property validator parsing

* working function parsing without docstrings yet

* start enum work

* some enum parsing

* working parsing for enumeration comments

* add handling for block comments to enums

* backport enum docstring parsing to properties

* remove vestigial file

* minor fixes + black
---
 dev-requirements.txt                 |   1 +
 sphinxcontrib/mat_textmate_parser.py | 476 +++++++++++++++++++++++++++
 2 files changed, 477 insertions(+)
 create mode 100644 sphinxcontrib/mat_textmate_parser.py

diff --git a/dev-requirements.txt b/dev-requirements.txt
index 427369d..c74a328 100644
--- a/dev-requirements.txt
+++ b/dev-requirements.txt
@@ -4,3 +4,4 @@ pytest-cov
 pre-commit
 defusedxml>=0.7.1
 sphinxcontrib-napoleon
+textmate-grammar-python
diff --git a/sphinxcontrib/mat_textmate_parser.py b/sphinxcontrib/mat_textmate_parser.py
new file mode 100644
index 0000000..1ea1c8f
--- /dev/null
+++ b/sphinxcontrib/mat_textmate_parser.py
@@ -0,0 +1,476 @@
+from textmate_grammar.parsers.matlab import MatlabParser
+
+rpath = "../tests/test_data/ClassWithPropertyValidators.m"
+
+
+def find_first_child(curr, tok):
+    ind = [i for i in range(len(curr.children)) if curr.children[i].token == tok]
+    if not ind:
+        return None
+    return (curr.children[ind[0]], ind[0])
+
+
+class MatClassParser:
+    def __init__(self, path):
+        # DATA
+        self.name = ""
+        self.supers = []
+        self.attrs = {}
+        self.docstring = ""
+        self.properties = {}
+        self.methods = {}
+        self.enumerations = {}
+
+        # Maybe remove continuations as a crutch? currently parser is broken for continuations in attributes
+        # self.parser = MatlabParser(remove_line_continuations=True)
+        self.parser = MatlabParser()
+        self.parsed = self.parser.parse_file(path)
+        self.cls, _ = find_first_child(self.parsed, "meta.class.matlab")
+        if not self.cls:
+            raise Exception()  # TODO better exception
+        self.clsdef, _ = find_first_child(self.cls, "meta.class.declaration.matlab")
+        self._parse_clsdef()
+        self._find_class_docstring()
+
+        property_sections = self.cls.findall(tokens="meta.properties.matlab", depth=1)
+        method_sections = self.cls.findall(tokens="meta.methods.matlab", depth=1)
+        enumeration_sections = self.cls.findall(tokens="meta.enum.matlab", depth=1)
+
+        for section in property_sections:
+            self._parse_property_section(section[0])
+
+        for section in method_sections:
+            self._parse_method_section(section[0])
+
+        for section in enumeration_sections:
+            self._parse_enum_section(section[0])
+        import pdb
+
+        pdb.set_trace()
+
+    def _find_class_docstring(self):
+        if self.cls.children[1].token == "comment.line.percentage.matlab":
+            self._docstring_lines()
+        elif self.cls.children[1].token == "comment.block.percentage.matlab":
+            self.docstring = (
+                self.cls.children[1].content.strip()[2:-2].strip()
+            )  # [2,-2] strips out block comment delimiters
+        else:
+            print("found no docstring")
+
+    def _docstring_lines(self):
+        idx = 1
+        while self.cls.children[idx].token == "comment.line.percentage.matlab":
+            self.docstring += (
+                self.cls.children[idx].content[1:] + "\n"
+            )  # [1:] strips out percent sign
+            idx += 1
+        self.docstring = self.docstring.strip()
+
+    def _parse_clsdef(self):
+        for child in self.clsdef.children:
+            child.print()
+
+        # Parse Attributes TODO maybe there is a smarter way to do this?
+        idx = 0
+        while self.clsdef.children[idx].token == "storage.modifier.class.matlab":
+            attr = self.clsdef.children[idx].content
+            val = None  # TODO maybe do some typechecking here or we can assume that you give us valid Matlab
+            idx += 1
+            if (
+                self.clsdef.children[idx].token == "keyword.operator.assignment.matlab"
+            ):  # pull out r.h.s
+                idx += 1
+                val = self.clsdef.children[idx].content
+                idx += 1
+            if (
+                self.clsdef.children[idx].token
+                == "punctuation.separator.modifier.comma.matlab"
+            ):  # skip commas
+                idx += 1
+            self.attrs[attr] = val
+
+        if (
+            self.clsdef.children[idx].token == "punctuation.section.parens.end.matlab"
+        ):  # Skip end of attrs
+            idx += 1
+
+        # name must be next
+        self.name = self.clsdef.children[idx].content
+        idx += 1
+
+        while idx < len(
+            self.clsdef.children
+        ):  # No children we care about after this except inherited classes
+            if self.clsdef.children[idx].token == "meta.inherited-class.matlab":
+                super_cls_tok = self.clsdef.children[idx]
+                # collect superclass as a tuple
+                super_cls = tuple(
+                    [
+                        child.content
+                        for child in super_cls_tok.children
+                        if not child.token.startswith("punctuation")
+                    ]
+                )
+                self.supers.append(super_cls)
+            idx += 1
+
+    def _parse_property_section(self, section):
+        # TODO parse property section attrs
+        idxs = [
+            i
+            for i in range(len(section.children))
+            if section.children[i].token == "meta.assignment.definition.property.matlab"
+        ]
+        for idx in idxs:
+            prop_tok = section.children[idx]
+            prop_name = prop_tok.begin[0].content
+            self.properties[prop_name] = {}  # Create entry for property
+            self._parse_property_validation(
+                prop_name, prop_tok
+            )  # Parse property validation.
+
+            # Get inline docstring
+            inline_docstring_gen = prop_tok.find(
+                tokens="comment.line.percentage.matlab", attribute="end"
+            )
+            try:
+                inline_docstring_tok, _ = next(inline_docstring_gen)
+                inline_docstring = inline_docstring_tok.content[
+                    1:
+                ]  # strip leading % sign
+            except StopIteration:
+                inline_docstring = None
+
+            # Walk backwards to get preceding docstring.
+            preceding_docstring = ""
+            walk_back_idx = idx - 1
+            next_tok = prop_tok
+            while walk_back_idx >= 0:
+                walk_tok = section.children[walk_back_idx]
+                if self._is_empty_line_between_tok(walk_tok, next_tok):
+                    # Once there is an empty line between consecutive tokens we are done.
+                    break
+
+                if (
+                    not preceding_docstring
+                    and walk_tok.token == "comment.block.percentage.matlab"
+                ):
+                    # block comment immediately preceding enum so we are done.
+                    # TODO we might need to do some postprocessing here to handle indents gracefully
+                    preceding_docstring = walk_tok.content.strip()[2:-2]
+                    break
+                elif walk_tok.token == "comment.line.percentage.matlab":
+                    preceding_docstring = (
+                        walk_tok.content[1:] + "\n" + preceding_docstring
+                    )  # [1:] strips %
+                    walk_back_idx -= 1
+                    next_tok = walk_tok
+                elif walk_tok.token == "punctuation.whitespace.comment.leading.matlab":
+                    walk_back_idx -= 1
+                    # Dont update next_tok for whitespace
+                else:
+                    break
+
+            # Walk forwards to get following docstring or inline one.
+            following_docstring = ""
+            walk_fwd_idx = idx + 1
+            prev_tok = prop_tok
+            while walk_fwd_idx < len(section.children):
+                walk_tok = section.children[walk_fwd_idx]
+
+                if self._is_empty_line_between_tok(prev_tok, walk_tok):
+                    # Once there is an empty line between consecutive tokens we are done.
+                    break
+
+                if (
+                    not following_docstring
+                    and walk_tok.token == "comment.block.percentage.matlab"
+                ):
+                    # block comment immediately following enum so we are done.
+                    # TODO we might need to do some postprocessing here to handle indents gracefully
+                    following_docstring = walk_tok.content.strip()[2:-2]
+                    break
+                elif walk_tok.token == "comment.line.percentage.matlab":
+                    following_docstring = (
+                        following_docstring + "\n" + walk_tok.content[1:]
+                    )  # [1:] strips %
+                    walk_fwd_idx += 1
+                    prev_tok = walk_tok
+                elif walk_tok.token == "punctuation.whitespace.comment.leading.matlab":
+                    walk_fwd_idx += 1
+                    # Dont update prev_tok for whitespace
+                else:
+                    break
+
+            if preceding_docstring:
+                self.properties[prop_name]["docstring"] = preceding_docstring.strip()
+            elif inline_docstring:
+                self.properties[prop_name]["docstring"] = inline_docstring.strip()
+            elif following_docstring:
+                self.properties[prop_name]["docstring"] = following_docstring.strip()
+            else:
+                self.properties[prop_name]["docstring"] = None
+
+    def _parse_property_validation(self, prop_name, prop):
+        """Parses property validation syntax"""
+        # First get the szize if found
+        size_gen = prop.find(tokens="meta.parens.size.matlab", depth=1)
+        try:  # We have a size, therefore parse the comma separated list into tuple
+            size_tok, _ = next(size_gen)
+            size_elem_gen = size_tok.find(
+                tokens=[
+                    "constant.numeric.decimal.matlab",
+                    "keyword.operator.vector.colon.matlab",
+                ],
+                depth=1,
+            )
+            size = tuple([elem[0].content for elem in size_elem_gen])
+            self.properties[prop_name]["size"] = size
+        except StopIteration:
+            pass
+
+        # Now find the type if it exists
+        # TODO this should be mapped to known types (though perhaps as a postprocess)
+        type_gen = prop.find(tokens="storage.type.matlab", depth=1)
+        try:
+            self.properties[prop_name]["type"] = next(type_gen)[0].content
+        except StopIteration:
+            pass
+
+        # Now find list of validators
+        validator_gen = prop.find(tokens="meta.block.validation.matlab", depth=1)
+        try:
+            import pdb
+
+            pdb.set_trace()
+            validator_tok, _ = next(validator_gen)
+            validator_toks = validator_tok.findall(
+                tokens="variable.other.readwrite.matlab", depth=1
+            )  # TODO Probably bug here in MATLAB-Language-grammar
+            self.properties[prop_name]["validators"] = [
+                tok[0].content for tok in validator_toks
+            ]
+        except StopIteration:
+            pass
+
+    def _parse_method_section(self, section):
+        # TODO parse property section attrs
+        idxs = [
+            i
+            for i in range(len(section.children))
+            if section.children[i].token == "meta.function.matlab"
+        ]
+        for idx in idxs:
+            meth_tok = section.children[idx]
+            self._parse_function(meth_tok)
+            # TODO walk forward and backward to get property docstring.
+            # TODO if we have mutliple possible docstrings what is given priority?
+            # TODO parse out property validations syntax
+
+    def _parse_function(self, fun_tok):
+        """Parse Function definition"""
+        # First find the function name
+        name_gen = fun_tok.find(tokens="entity.name.function.matlab")
+        try:
+            name_tok, _ = next(name_gen)
+            fun_name = name_tok.content
+        except StopIteration:
+            # TODO correct error here
+            raise Exception("Couldn't find function name")
+
+        # Find outputs and parameters
+        output_gen = fun_tok.find(tokens="variable.parameter.output.matlab")
+        param_gen = fun_tok.find(tokens="variable.parameter.input.matlab")
+
+        self.methods[fun_name] = {}
+        self.methods[fun_name]["outputs"] = {}
+        self.methods[fun_name]["params"] = {}
+
+        for out, _ in output_gen:
+            self.methods[fun_name]["outputs"][out.content] = {}
+
+        for param, _ in param_gen:
+            self.methods[fun_name]["params"][param.content] = {}
+
+        # find arguments blocks
+        for arg_section, _ in fun_tok.find(tokens="meta.arguments.matlab"):
+            self._parse_argument_section(fun_name, arg_section)
+
+    def _parse_argument_section(self, fun_name, section):
+        modifiers = [
+            mod.content
+            for mod, _ in section.find(tokens="storage.modifier.arguments.matlab")
+        ]
+        arg_def_gen = section.find(tokens="meta.assignment.definition.property.matlab")
+        for arg_def, _ in arg_def_gen:
+            arg_name = arg_def.begin[
+                0
+            ].content  # Get argument name that is being defined
+            self._parse_argument_validation(fun_name, arg_name, arg_def, modifiers)
+
+    def _parse_argument_validation(self, fun_name, arg_name, arg, modifiers):
+        # TODO This should be identical to propery validation I think. Refactor
+        # First get the size if found
+        section = "output" if "Output" in modifiers else "params"
+        size_gen = arg.find(tokens="meta.parens.size.matlab", depth=1)
+        try:  # We have a size, therefore parse the comma separated list into tuple
+            size_tok, _ = next(size_gen)
+            size_elem_gen = size_tok.find(
+                tokens=[
+                    "constant.numeric.decimal.matlab",
+                    "keyword.operator.vector.colon.matlab",
+                ],
+                depth=1,
+            )
+            size = tuple([elem[0].content for elem in size_elem_gen])
+            self.methods[fun_name][section][arg_name]["size"] = size
+        except StopIteration:
+            pass
+
+        # Now find the type if it exists
+        # TODO this should be mapped to known types (though perhaps as a postprocess)
+        type_gen = arg.find(tokens="storage.type.matlab", depth=1)
+        try:
+            self.methods[fun_name][section][arg_name]["type"] = next(type_gen)[
+                0
+            ].content
+        except StopIteration:
+            pass
+
+        # Now find list of validators
+        validator_gen = arg.find(tokens="meta.block.validation.matlab", depth=1)
+        try:
+            validator_tok, _ = next(validator_gen)
+            validator_toks = validator_tok.findall(
+                tokens="variable.other.readwrite.matlab", depth=1
+            )  # TODO Probably bug here in MATLAB-Language-grammar
+            self.methods[fun_name][section][arg_name]["validators"] = [
+                tok[0].content for tok in validator_toks
+            ]
+        except StopIteration:
+            pass
+
+    def _parse_enum_section(self, section):
+        # TODO parse property section attrs
+        idxs = [
+            i
+            for i in range(len(section.children))
+            if section.children[i].token
+            == "meta.assignment.definition.enummember.matlab"
+        ]
+        for idx in idxs:
+            enum_tok = section.children[idx]
+            next_idx = idx
+            enum_name = enum_tok.children[0].content
+            self.enumerations[enum_name] = {}
+            if (
+                section.children[idx + 1].token == "meta.parens.matlab"
+            ):  # Parse out args TODO this should be part of enummember assignment definition
+                args = tuple(
+                    [
+                        arg.content
+                        for arg in section.children[idx + 1].children
+                        if arg.token != "punctuation.separator.comma.matlab"
+                    ]
+                )
+                self.enumerations[enum_name]["args"] = args
+                next_idx += 1
+
+            # Walk backwards to get preceding docstring.
+            preceding_docstring = ""
+            walk_back_idx = idx - 1
+            next_tok = enum_tok
+            while walk_back_idx >= 0:
+                walk_tok = section.children[walk_back_idx]
+                if self._is_empty_line_between_tok(walk_tok, next_tok):
+                    # Once there is an empty line between consecutive tokens we are done.
+                    break
+
+                if (
+                    not preceding_docstring
+                    and walk_tok.token == "comment.block.percentage.matlab"
+                ):
+                    # block comment immediately preceding enum so we are done.
+                    # TODO we might need to do some postprocessing here to handle indents gracefully
+                    preceding_docstring = walk_tok.content.strip()[2:-2]
+                    break
+                elif walk_tok.token == "comment.line.percentage.matlab":
+                    preceding_docstring = (
+                        walk_tok.content[1:] + "\n" + preceding_docstring
+                    )  # [1:] strips %
+                    walk_back_idx -= 1
+                    next_tok = walk_tok
+                elif walk_tok.token == "punctuation.whitespace.comment.leading.matlab":
+                    walk_back_idx -= 1
+                    # Dont update next_tok for whitespace
+                else:
+                    break
+
+            # Walk forwards to get following docstring or inline one.
+            inline_docstring = ""
+            following_docstring = ""
+            walk_fwd_idx = next_idx + 1
+            prev_tok = section.children[next_idx]
+            while walk_fwd_idx < len(section.children):
+                walk_tok = section.children[walk_fwd_idx]
+
+                if self._is_empty_line_between_tok(prev_tok, walk_tok):
+                    # Once there is an empty line between consecutive tokens we are done.
+                    break
+
+                if (
+                    not following_docstring
+                    and walk_tok.token == "comment.block.percentage.matlab"
+                ):
+                    # block comment immediately following enum so we are done.
+                    # TODO we might need to do some postprocessing here to handle indents gracefully
+                    following_docstring = walk_tok.content.strip()[2:-2]
+                    break
+                elif walk_tok.token == "comment.line.percentage.matlab":
+                    # In the case the comment is on the same line as the end of the enum declaration, take it as inline comment and exit.
+                    if self._toks_on_same_line(section.children[idx], walk_tok):
+                        inline_docstring = walk_tok.content[1:]
+                        break
+
+                    following_docstring = (
+                        following_docstring + "\n" + walk_tok.content[1:]
+                    )  # [1:] strips %
+                    walk_fwd_idx += 1
+                    prev_tok = walk_tok
+                elif walk_tok.token == "punctuation.whitespace.comment.leading.matlab":
+                    walk_fwd_idx += 1
+                    # Dont update prev_tok for whitespace
+                else:
+                    break
+
+            if preceding_docstring:
+                self.enumerations[enum_name]["docstring"] = preceding_docstring.strip()
+            elif inline_docstring:
+                self.enumerations[enum_name]["docstring"] = inline_docstring.strip()
+            elif following_docstring:
+                self.enumerations[enum_name]["docstring"] = following_docstring.strip()
+            else:
+                self.enumerations[enum_name]["docstring"] = None
+
+    def _toks_on_same_line(self, tok1, tok2):
+        """Note: pass the tokens in order they appear in case of multiline tokens, otherwise this may return incorrect results"""
+        line1 = self._get_last_line_of_tok(tok1)
+        line2 = self._get_first_line_of_tok(tok2)
+        return line1 == line2
+
+    def _is_empty_line_between_tok(self, tok1, tok2):
+        """Note: pass tokens in order they appear"""
+        line1 = self._get_last_line_of_tok(tok1)
+        line2 = self._get_first_line_of_tok(tok2)
+        return line2 - line1 > 1
+
+    def _get_first_line_of_tok(self, tok):
+        return min([loc[0] for loc in tok.characters.keys()])
+
+    def _get_last_line_of_tok(self, tok):
+        return max([loc[0] for loc in tok.characters.keys()])
+
+
+if __name__ == "__main__":
+    cls_parse = MatClassParser(rpath)

From b7bc00649152aac5d6eb55534779bc96c7317c6e Mon Sep 17 00:00:00 2001
From: Anton Pozharskiy <apozharski@gmail.com>
Date: Sun, 7 Jul 2024 20:35:19 +0200
Subject: [PATCH 02/45] Hack for object hierarchy

---
 sphinxcontrib/matlab.py | 19 +++++++++++++++++++
 1 file changed, 19 insertions(+)

diff --git a/sphinxcontrib/matlab.py b/sphinxcontrib/matlab.py
index 43b96ab..d609e33 100644
--- a/sphinxcontrib/matlab.py
+++ b/sphinxcontrib/matlab.py
@@ -336,6 +336,25 @@ class MatClasslike(MatObject):
     Description of a class-like object (classes, interfaces, exceptions).
     """
 
+    def _object_hierarchy_parts(self, sig):
+        """
+        Returns a tuple of strings, one entry for each part of the object's
+        hierarchy (e.g. ``('module', 'submodule', 'Class', 'method')``). The
+        returned tuple is used to properly nest children within parents in the
+        table of contents, and can also be used within the
+        :py:meth:`_toc_entry_name` method.
+
+        This method must not be used outwith table of contents generation.
+        """
+        parts = sig.attributes.get('module').split('.')
+        parts.append(sig.attributes.get('fullname'))
+        #import pdb;pdb.set_trace()
+        return tuple(parts)
+
+    def _toc_entry_name(self, sig):
+        # TODO respecting the configuration setting ``toc_object_entries_show_parents``
+        return sig.attributes.get('fullname')
+
     def get_signature_prefix(self, sig):
         return self.objtype + " "
 

From 4e60a764268bd8cd3ddc261d0c8ad8ccd1fe8cc1 Mon Sep 17 00:00:00 2001
From: Anton Edvinovich Pozharskiy <apozharski@gmail.com>
Date: Tue, 9 Jul 2024 15:40:18 +0200
Subject: [PATCH 03/45] Initial hack to get enumerations to work

---
 sphinxcontrib/mat_documenters.py |  29 +++++-
 sphinxcontrib/mat_types.py       | 160 ++++++++++++++++++++++++++++++-
 sphinxcontrib/matlab.py          |   6 ++
 3 files changed, 190 insertions(+), 5 deletions(-)

diff --git a/sphinxcontrib/mat_documenters.py b/sphinxcontrib/mat_documenters.py
index b5cd00c..35fff8c 100644
--- a/sphinxcontrib/mat_documenters.py
+++ b/sphinxcontrib/mat_documenters.py
@@ -14,6 +14,7 @@
     MatFunction,
     MatClass,
     MatProperty,
+    MatEnumeration,
     MatMethod,
     MatScript,
     MatException,
@@ -555,8 +556,11 @@ def member_is_friend_of(member, friends):
             else:
                 return False
 
+        def member_is_enum(member):
+            return isinstance(member, MatEnumeration)
+        
         ret = []
-
+        
         # search for members in source code too
         namespace = ".".join(self.objpath)  # will be empty for modules
 
@@ -637,7 +641,7 @@ def member_is_friend_of(member, friends):
                 isattr = True
             else:
                 # ignore undocumented members if :undoc-members: is not given
-                keep = has_doc or self.options.undoc_members
+                keep = has_doc or self.options.undoc_members or member_is_enum(member)
 
             # give the user a chance to decide whether this member
             # should be skipped
@@ -656,7 +660,6 @@ def member_is_friend_of(member, friends):
 
             if keep:
                 ret.append((membername, member, isattr))
-
         return ret
 
     def document_members(self, all_members=False):
@@ -1229,11 +1232,17 @@ def document_members(self, all_members=False):
             for (membername, member) in filtered_members
             if isinstance(member, MatMethod) and member.name != member.cls.name
         ]
+        # create list of enums
+        enum_names = [
+            membername
+            for (membername, member) in filtered_members
+            if isinstance(member, MatEnumeration)
+        ]
         # create list of other members
         other_names = [
             membername
             for (membername, member) in filtered_members
-            if not isinstance(member, MatMethod) and not isinstance(member, MatProperty)
+            if not isinstance(member, MatMethod) and not isinstance(member, MatProperty) and not isinstance(member, MatEnumeration)
             # exclude parent modules with names matching members (as in Myclass.Myclass)
             and not (hasattr(member, "module") and member.name == member.module)
         ]
@@ -1255,6 +1264,12 @@ def document_members(self, all_members=False):
             for (membername, member) in members
             if not isinstance(member, MatMethod) or member.name == member.cls.name
         ]
+        # create list of members that are not properties
+        non_enums = [
+            membername
+            for (membername, member) in members
+            if not isinstance(member, MatEnumeration)
+        ]
         # create list of members that are not non-constructor methods
         non_other = [
             membername
@@ -1280,6 +1295,12 @@ def document_members(self, all_members=False):
             self.document_member_section(
                 "Property Summary", non_properties, all_members
             )
+            
+        # enumss
+        if enum_names:
+            self.document_member_section(
+                "Enumeration Values", non_enums, all_members
+            )
 
         # methods
         if meth_names:
diff --git a/sphinxcontrib/mat_types.py b/sphinxcontrib/mat_types.py
index 964b012..f76a697 100644
--- a/sphinxcontrib/mat_types.py
+++ b/sphinxcontrib/mat_types.py
@@ -26,6 +26,7 @@
     "MatFunction",
     "MatClass",
     "MatProperty",
+    "MatEnumerations",
     "MatMethod",
     "MatScript",
     "MatException",
@@ -1068,6 +1069,8 @@ def __init__(self, name, modname, tokens):
         self.properties = {}
         #: dictionary of class methods
         self.methods = {}
+        #: dictionary of class enumerations
+        self.enumerations = {}
         #: remaining tokens after main class definition is parsed
         self.rem_tks = None
         # =====================================================================
@@ -1421,10 +1424,143 @@ def __init__(self, name, modname, tokens):
                         "[sphinxcontrib-matlabdomain] ignoring 'enumeration' in 'classdef %s'.",
                         self.name,
                     )
+                    # no attributes for enums
                     idx += 1
                     # Token.Keyword: "end" terminates events block
                     while self._tk_ne(idx, (Token.Keyword, "end")):
-                        idx += 1
+                        # skip whitespace
+                        while self._whitespace(idx):
+                            whitespace = self._whitespace(idx)
+                            if whitespace:
+                                idx += whitespace
+                            else:
+                                idx += 1
+
+                        # =========================================================
+                        # long docstring before property
+                        if self.tokens[idx][0] is Token.Comment:
+                            # docstring
+                            docstring = ""
+
+                            # Collect comment lines
+                            while self.tokens[idx][0] is Token.Comment:
+                                docstring += self.tokens[idx][1].lstrip("%")
+                                idx += 1
+                                idx += self._blanks(idx)
+
+                                try:
+                                    # Check if end of line was reached
+                                    if self._is_newline(idx):
+                                        docstring += "\n"
+                                        idx += 1
+                                        idx += self._blanks(idx)
+
+                                    # Check if variable name is next
+                                    if self.tokens[idx][0] is Token.Name:
+                                        enum_name = self.tokens[idx][1]
+                                        self.enumerations[enum_name] = {}
+                                        self.enumerations[enum_name][
+                                            "docstring"
+                                        ] = docstring
+                                        break
+
+                                    # If there is an empty line at the end of
+                                    # the comment: discard it
+                                    elif self._is_newline(idx):
+                                        docstring = ""
+                                        idx += self._whitespace(idx)
+                                        break
+
+                                except IndexError:
+                                    # EOF reached, quit gracefully
+                                    break
+
+                        # with "%:" directive trumps docstring after property
+                        if self.tokens[idx][0] is Token.Name:
+                            enum_name = self.tokens[idx][1]
+                            idx += 1
+                            # Initialize property if it was not already done
+                            if enum_name not in self.enumerations.keys():
+                                self.enumerations[enum_name] = {}
+
+                            # skip size, class and functions specifiers
+                            # TODO: parse args and do a postprocessing step.
+                            idx += self._propspec(idx)
+
+                            if self._tk_eq(idx, (Token.Punctuation, ";")):
+                                continue
+
+                            # This is because matlab allows comma separated list of enums 
+                            if self._tk_eq(idx, (Token.Punctuation, ",")):
+                                continue
+
+                        # subtype of Name EG Name.Builtin used as Name
+                        elif self.tokens[idx][0] in Token.Name.subtypes:
+                            prop_name = self.tokens[idx][1]
+                            logger.debug(
+                                "[sphinxcontrib-matlabdomain] WARNING %s.%s.%s is a builtin name.",
+                                self.module,
+                                self.name,
+                                prop_name,
+                            )
+                            self.properties[prop_name] = {"attrs": attr_dict}
+                            idx += 1
+
+                            # skip size, class and functions specifiers
+                            # TODO: Parse old and new style property extras
+                            idx += self._propspec(idx)
+
+                            if self._tk_eq(idx, (Token.Punctuation, ";")):
+                                continue
+
+                        elif self._tk_eq(idx, (Token.Keyword, "end")):
+                            idx += 1
+                            break
+                        # skip semicolon after property name, but no default
+                        elif self._tk_eq(idx, (Token.Punctuation, ";")):
+                            idx += 1
+                            # A comment might come after semi-colon
+                            idx += self._blanks(idx)
+                            if self._is_newline(idx):
+                                idx += 1
+                                # Property definition is finished; add missing values
+                                if "default" not in self.properties[prop_name].keys():
+                                    self.properties[prop_name]["default"] = None
+                                if "docstring" not in self.properties[prop_name].keys():
+                                    self.properties[prop_name]["docstring"] = None
+
+                                continue
+                            elif self.tokens[idx][0] is Token.Comment:
+                                docstring = self.tokens[idx][1].lstrip("%")
+                                docstring += "\n"
+                                self.properties[prop_name]["docstring"] = docstring
+                                idx += 1
+                        elif self.tokens[idx][0] is Token.Comment:
+                            # Comments seperated with blank lines.
+                            idx = idx - 1
+                            continue
+                        else:
+                            logger.warning(
+                                "sphinxcontrib-matlabdomain] Expected enumeration in %s.%s - got %s",
+                                self.module,
+                                self.name,
+                                str(self.tokens[idx]),
+                            )
+                            return
+                        idx += self._blanks(idx)  # skip blanks
+
+                        # docstring
+                        if "docstring" not in self.enumerations[enum_name].keys():
+                            docstring = {"docstring": None}
+                            if self.tokens[idx][0] is Token.Comment:
+                                docstring["docstring"] = self.tokens[idx][1].lstrip("%")
+                                idx += 1
+                            self.enumerations[enum_name].update(docstring)
+                        elif self.tokens[idx][0] is Token.Comment:
+                            # skip this comment
+                            idx += 1
+
+                        idx += self._whitespace(idx)
                     idx += 1
                 if self._tk_eq(idx, (Token.Punctuation, ";")):
                     # Skip trailing semicolon after end.
@@ -1603,11 +1739,16 @@ def getter(self, name, *defargs):
             return self.__bases__
         elif name in self.properties:
             return MatProperty(name, self, self.properties[name])
+        elif name in self.enumerations:
+            return MatEnumeration(name, self, self.enumerations[name])
         elif name in self.methods:
             return self.methods[name]
+        elif name in self.enumerations:
+            return
         elif name == "__dict__":
             objdict = dict([(pn, self.getter(pn)) for pn in self.properties.keys()])
             objdict.update(self.methods)
+            objdict.update(dict([(en, self.getter(en)) for en in self.enumerations.keys()]))
             return objdict
         else:
             super(MatClass, self).getter(name, *defargs)
@@ -1634,6 +1775,23 @@ def __module__(self):
     def __doc__(self):
         return self.docstring
 
+class MatEnumeration(MatObject):
+    def __init__(self, name, cls, attrs):
+        super(MatEnumeration, self).__init__(name)
+        self.cls = cls
+        self.docstring = attrs["docstring"]
+        
+    def ref_role(self):
+        """Returns role to use for references to this object (e.g. when generating auto-links)"""
+        return "enum"
+
+    @property
+    def __module__(self):
+        return self.cls.module
+
+    @property
+    def __doc__(self):
+        return self.docstring
 
 class MatMethod(MatFunction):
     def __init__(self, modname, tks, cls, attrs):
diff --git a/sphinxcontrib/matlab.py b/sphinxcontrib/matlab.py
index d609e33..5764f9f 100644
--- a/sphinxcontrib/matlab.py
+++ b/sphinxcontrib/matlab.py
@@ -712,6 +712,7 @@ class MATLABDomain(Domain):
         "class": MatXRefRole(),
         "const": MatXRefRole(),
         "attr": MatXRefRole(),
+        "enum": MatXRefRole(),
         "meth": MatXRefRole(fix_parens=True),
         "mod": MatXRefRole(),
         "obj": MatXRefRole(),
@@ -921,6 +922,11 @@ def setup(app):
         "mat", "autoattribute", mat_directives.MatlabAutodocDirective
     )
 
+    app.registry.add_documenter("mat:enum", doc.MatAttributeDocumenter)
+    app.add_directive_to_domain(
+        "mat", "autoenum", mat_directives.MatlabAutodocDirective
+    )
+
     app.registry.add_documenter("mat:data", doc.MatDataDocumenter)
     app.add_directive_to_domain(
         "mat", "autodata", mat_directives.MatlabAutodocDirective

From e5384dad3ceccb133c91eba1f53b626a183482a3 Mon Sep 17 00:00:00 2001
From: Anton Edvinovich Pozharskiy <apozharski@gmail.com>
Date: Fri, 19 Jul 2024 11:44:18 +0200
Subject: [PATCH 04/45] better classdef parsing including changes to
 MATLAB-language-grammar prs #86, #88, and #90

---
 sphinxcontrib/mat_textmate_parser.py | 137 ++++++++++++++++++---------
 1 file changed, 93 insertions(+), 44 deletions(-)

diff --git a/sphinxcontrib/mat_textmate_parser.py b/sphinxcontrib/mat_textmate_parser.py
index 1ea1c8f..db24a5c 100644
--- a/sphinxcontrib/mat_textmate_parser.py
+++ b/sphinxcontrib/mat_textmate_parser.py
@@ -1,6 +1,6 @@
 from textmate_grammar.parsers.matlab import MatlabParser
 
-rpath = "../tests/test_data/ClassWithPropertyValidators.m"
+rpath = "../../../syscop/software/nosnoc/+nosnoc/Options.m"
 
 
 def find_first_child(curr, tok):
@@ -49,71 +49,119 @@ def __init__(self, path):
         pdb.set_trace()
 
     def _find_class_docstring(self):
-        if self.cls.children[1].token == "comment.line.percentage.matlab":
+        try:
+            possible_comment_tok = self.cls.children[1]
+        except IndexError:
+            print("found no docstring")
+            return
+
+        if possible_comment_tok.token == "comment.line.percentage.matlab":
             self._docstring_lines()
-        elif self.cls.children[1].token == "comment.block.percentage.matlab":
-            self.docstring = (
-                self.cls.children[1].content.strip()[2:-2].strip()
-            )  # [2,-2] strips out block comment delimiters
+        elif possible_comment_tok.token == "comment.block.percentage.matlab":
+            self.docstring = possible_comment_tok.content.strip()[
+                2:-2
+            ].strip()  # [2,-2] strips out block comment delimiters
         else:
             print("found no docstring")
 
     def _docstring_lines(self):
         idx = 1
-        while self.cls.children[idx].token == "comment.line.percentage.matlab":
+        cls_children = self.cls.children
+
+        while (
+            idx < len(cls_children)
+            and cls_children[idx].token == "comment.line.percentage.matlab"
+        ):
             self.docstring += (
-                self.cls.children[idx].content[1:] + "\n"
+                cls_children[idx].content[1:] + "\n"
             )  # [1:] strips out percent sign
             idx += 1
         self.docstring = self.docstring.strip()
 
     def _parse_clsdef(self):
-        for child in self.clsdef.children:
-            child.print()
+        # Try parsing attrs
+        attrs_tok_gen = self.clsdef.find(tokens="storage.modifier.section.class.matlab")
+        try:
+            attrs_tok, _ = next(attrs_tok_gen)
+            self._parse_class_attributes(attrs_tok)
+        except StopIteration:
+            pass
 
+        # Parse classname
+        classname_tok_gen = self.clsdef.find(tokens="entity.name.type.class.matlab")
+        try:
+            classname_tok, _ = next(classname_tok_gen)
+            self.name = classname_tok.content
+        except StopIteration:
+            print("ClassName not found")  # TODO this is probably fatal
+
+        # Parse interited classes
+        parent_class_toks = self.clsdef.findall(tokens="meta.inherited-class.matlab")
+
+        for parent_class_tok, _ in parent_class_toks:
+            sections = parent_class_tok.findall(
+                tokens=[
+                    "entity.name.namespace.matlab",
+                    "entity.other.inherited-class.matlab",
+                ]
+            )
+            super_cls = tuple([sec.content for sec, _ in sections])
+            self.supers.append(super_cls)
         # Parse Attributes TODO maybe there is a smarter way to do this?
         idx = 0
         while self.clsdef.children[idx].token == "storage.modifier.class.matlab":
-            attr = self.clsdef.children[idx].content
+            attr_tok = self.clsdef.children[idx]
+            attr = attr_tok.content
             val = None  # TODO maybe do some typechecking here or we can assume that you give us valid Matlab
             idx += 1
-            if (
-                self.clsdef.children[idx].token == "keyword.operator.assignment.matlab"
-            ):  # pull out r.h.s
+            if attr_tok.token == "keyword.operator.assignment.matlab":  # pull out r.h.s
                 idx += 1
                 val = self.clsdef.children[idx].content
                 idx += 1
             if (
-                self.clsdef.children[idx].token
-                == "punctuation.separator.modifier.comma.matlab"
+                attr_tok.token == "punctuation.separator.modifier.comma.matlab"
             ):  # skip commas
                 idx += 1
             self.attrs[attr] = val
 
-        if (
-            self.clsdef.children[idx].token == "punctuation.section.parens.end.matlab"
-        ):  # Skip end of attrs
-            idx += 1
-
-        # name must be next
-        self.name = self.clsdef.children[idx].content
-        idx += 1
-
-        while idx < len(
-            self.clsdef.children
-        ):  # No children we care about after this except inherited classes
-            if self.clsdef.children[idx].token == "meta.inherited-class.matlab":
-                super_cls_tok = self.clsdef.children[idx]
-                # collect superclass as a tuple
-                super_cls = tuple(
-                    [
-                        child.content
-                        for child in super_cls_tok.children
-                        if not child.token.startswith("punctuation")
-                    ]
-                )
-                self.supers.append(super_cls)
-            idx += 1
+    def _parse_class_attributes(self, attrs_tok):
+        # walk down child list and parse manually
+        # TODO perhaps contribute a delimited list find to textmate-grammar-python
+        children = attrs_tok.children
+        idx = 0
+        while idx < len(children):
+            child_tok = children[idx]
+            if child_tok.token == "storage.modifier.class.matlab":
+                attr = child_tok.content
+                val = None
+                idx += 1  # walk to next token
+                maybe_assign_tok = children[idx]
+                if maybe_assign_tok.token == "keyword.operator.assignment.matlab":
+                    idx += 1
+                    rhs_tok = children[idx]  # parse right hand side
+                    if rhs_tok.token == "meta.cell.literal.matlab":
+                        # A cell. For now just take the whole cell as value.
+                        # TODO parse out the cell array of metaclass literals.
+                        val = "{" + rhs_tok.content + "}"
+                        idx += 1
+                    elif rhs_tok.token == "constant.language.boolean.matlab":
+                        val = rhs_tok.content
+                        idx += 1
+                    elif rhs_tok.token == "keyword.operator.other.question.matlab":
+                        idx += 1
+                        metaclass_tok = children[idx]
+                        metaclass_components = metaclass_tok.findall(
+                            tokens=[
+                                "entity.name.namespace.matlab",
+                                "entity.other.class.matlab",
+                            ]
+                        )
+                        val = tuple([comp.content for comp, _ in metaclass_components])
+                    else:
+                        pass
+                self.attrs[attr] = val
+            else:  # Comma or continuation therefore skip
+                idx += 1
 
     def _parse_property_section(self, section):
         # TODO parse property section attrs
@@ -241,12 +289,13 @@ def _parse_property_validation(self, prop_name, prop):
         # Now find list of validators
         validator_gen = prop.find(tokens="meta.block.validation.matlab", depth=1)
         try:
-            import pdb
-
-            pdb.set_trace()
             validator_tok, _ = next(validator_gen)
             validator_toks = validator_tok.findall(
-                tokens="variable.other.readwrite.matlab", depth=1
+                tokens=[
+                    "variable.other.readwrite.matlab",
+                    "meta.function-call.parens.matlab",
+                ],
+                depth=1,
             )  # TODO Probably bug here in MATLAB-Language-grammar
             self.properties[prop_name]["validators"] = [
                 tok[0].content for tok in validator_toks

From fee6d03dd46f0e802a182a298e7a16a9b255e6ab Mon Sep 17 00:00:00 2001
From: Anton Edvinovich Pozharskiy <apozharski@gmail.com>
Date: Mon, 22 Jul 2024 10:50:26 +0200
Subject: [PATCH 05/45] parse function docstring

---
 sphinxcontrib/mat_textmate_parser.py | 56 +++++++++++++++++++++++-----
 1 file changed, 47 insertions(+), 9 deletions(-)

diff --git a/sphinxcontrib/mat_textmate_parser.py b/sphinxcontrib/mat_textmate_parser.py
index db24a5c..ae3f342 100644
--- a/sphinxcontrib/mat_textmate_parser.py
+++ b/sphinxcontrib/mat_textmate_parser.py
@@ -36,14 +36,14 @@ def __init__(self, path):
         method_sections = self.cls.findall(tokens="meta.methods.matlab", depth=1)
         enumeration_sections = self.cls.findall(tokens="meta.enum.matlab", depth=1)
 
-        for section in property_sections:
-            self._parse_property_section(section[0])
+        for section, _ in property_sections:
+            self._parse_property_section(section)
 
-        for section in method_sections:
-            self._parse_method_section(section[0])
+        for section, _ in method_sections:
+            self._parse_method_section(section)
 
-        for section in enumeration_sections:
-            self._parse_enum_section(section[0])
+        for section, _ in enumeration_sections:
+            self._parse_enum_section(section)
         import pdb
 
         pdb.set_trace()
@@ -313,9 +313,6 @@ def _parse_method_section(self, section):
         for idx in idxs:
             meth_tok = section.children[idx]
             self._parse_function(meth_tok)
-            # TODO walk forward and backward to get property docstring.
-            # TODO if we have mutliple possible docstrings what is given priority?
-            # TODO parse out property validations syntax
 
     def _parse_function(self, fun_tok):
         """Parse Function definition"""
@@ -343,9 +340,50 @@ def _parse_function(self, fun_tok):
             self.methods[fun_name]["params"][param.content] = {}
 
         # find arguments blocks
+        arg_section = None
         for arg_section, _ in fun_tok.find(tokens="meta.arguments.matlab"):
             self._parse_argument_section(fun_name, arg_section)
 
+        fun_decl_gen = fun_tok.find(tokens="meta.function.declaration.matlab")
+        try:
+            fun_decl_tok, _ = next(fun_decl_gen)
+        except StopIteration:
+            raise Exception(
+                "missing function declaration"
+            )  # This cant happen as we'd be missing a function name
+
+        # Now parse for docstring
+        docstring = ""
+        comment_toks = fun_tok.findall(
+            tokens=["comment.line.percentage.matlab", "comment.block.percentage.matlab"]
+        )
+        last_tok = arg_section if arg_section is not None else fun_decl_tok
+        import pdb
+
+        pdb.set_trace()
+        for comment_tok, _ in comment_toks:
+            if self._is_empty_line_between_tok(last_tok, comment_tok):
+                # If we have non-consecutive tokens quit right away.
+                break
+            elif (
+                not docstring and comment_tok.token == "comment.block.percentage.matlab"
+            ):
+                # If we have no previous docstring lines and a comment block we take
+                # the comment block as the docstring and exit.
+                docstring = comment_tok.content.strip()[
+                    2:-2
+                ].strip()  # [2,-2] strips out block comment delimiters
+                break
+            elif comment_tok.token == "comment.line.percentage.matlab":
+                # keep parsing comments
+                docstring += comment_tok.content[1:] + "\n"
+            else:
+                # we are done.
+                break
+            last_tok = comment_tok
+
+        self.methods[fun_name]["docstring"] = docstring if docstring else None
+
     def _parse_argument_section(self, fun_name, section):
         modifiers = [
             mod.content

From e75155f159c0fc0a2ee50186215b1c4c69e4c206 Mon Sep 17 00:00:00 2001
From: Anton Edvinovich Pozharskiy <apozharski@gmail.com>
Date: Mon, 22 Jul 2024 11:19:30 +0200
Subject: [PATCH 06/45] extract function parser

---
 sphinxcontrib/mat_textmate_parser.py | 300 ++++++++++++++-------------
 1 file changed, 152 insertions(+), 148 deletions(-)

diff --git a/sphinxcontrib/mat_textmate_parser.py b/sphinxcontrib/mat_textmate_parser.py
index ae3f342..8eba0b1 100644
--- a/sphinxcontrib/mat_textmate_parser.py
+++ b/sphinxcontrib/mat_textmate_parser.py
@@ -10,6 +10,151 @@ def find_first_child(curr, tok):
     return (curr.children[ind[0]], ind[0])
 
 
+def _toks_on_same_line(tok1, tok2):
+    """Note: pass the tokens in order they appear in case of multiline tokens, otherwise this may return incorrect results"""
+    line1 = _get_last_line_of_tok(tok1)
+    line2 = _get_first_line_of_tok(tok2)
+    return line1 == line2
+
+
+def _is_empty_line_between_tok(tok1, tok2):
+    """Note: pass tokens in order they appear"""
+    line1 = _get_last_line_of_tok(tok1)
+    line2 = _get_first_line_of_tok(tok2)
+    return line2 - line1 > 1
+
+
+def _get_first_line_of_tok(tok):
+    return min([loc[0] for loc in tok.characters.keys()])
+
+
+def _get_last_line_of_tok(tok):
+    return max([loc[0] for loc in tok.characters.keys()])
+
+
+class MatFunctionParser:
+    def __init__(self, fun_tok):
+        """Parse Function definition"""
+        # First find the function name
+        name_gen = fun_tok.find(tokens="entity.name.function.matlab")
+        try:
+            name_tok, _ = next(name_gen)
+            self.name = name_tok.content
+        except StopIteration:
+            # TODO correct error here
+            raise Exception("Couldn't find function name")
+
+        # Find outputs and parameters
+        output_gen = fun_tok.find(tokens="variable.parameter.output.matlab")
+        param_gen = fun_tok.find(tokens="variable.parameter.input.matlab")
+
+        self.outputs = {}
+        self.params = {}
+
+        for out, _ in output_gen:
+            self.outputs[out.content] = {}
+
+        for param, _ in param_gen:
+            self.params[param.content] = {}
+
+        # find arguments blocks
+        arg_section = None
+        for arg_section, _ in fun_tok.find(tokens="meta.arguments.matlab"):
+            self._parse_argument_section(arg_section)
+
+        fun_decl_gen = fun_tok.find(tokens="meta.function.declaration.matlab")
+        try:
+            fun_decl_tok, _ = next(fun_decl_gen)
+        except StopIteration:
+            raise Exception(
+                "missing function declaration"
+            )  # This cant happen as we'd be missing a function name
+
+        # Now parse for docstring
+        docstring = ""
+        comment_toks = fun_tok.findall(
+            tokens=["comment.line.percentage.matlab", "comment.block.percentage.matlab"]
+        )
+        last_tok = arg_section if arg_section is not None else fun_decl_tok
+
+        for comment_tok, _ in comment_toks:
+            if _is_empty_line_between_tok(last_tok, comment_tok):
+                # If we have non-consecutive tokens quit right away.
+                break
+            elif (
+                not docstring and comment_tok.token == "comment.block.percentage.matlab"
+            ):
+                # If we have no previous docstring lines and a comment block we take
+                # the comment block as the docstring and exit.
+                docstring = comment_tok.content.strip()[
+                    2:-2
+                ].strip()  # [2,-2] strips out block comment delimiters
+                break
+            elif comment_tok.token == "comment.line.percentage.matlab":
+                # keep parsing comments
+                docstring += comment_tok.content[1:] + "\n"
+            else:
+                # we are done.
+                break
+            last_tok = comment_tok
+
+        self.docstring = docstring if docstring else None
+
+    def _parse_argument_section(self, section):
+        modifiers = [
+            mod.content
+            for mod, _ in section.find(tokens="storage.modifier.arguments.matlab")
+        ]
+        arg_def_gen = section.find(tokens="meta.assignment.definition.property.matlab")
+        for arg_def, _ in arg_def_gen:
+            arg_name = arg_def.begin[
+                0
+            ].content  # Get argument name that is being defined
+            self._parse_argument_validation(fun_name, arg_name, arg_def, modifiers)
+
+    def _parse_argument_validation(self, arg_name, arg, modifiers):
+        # TODO This should be identical to propery validation I think. Refactor
+        # First get the size if found
+        section = "output" if "Output" in modifiers else "params"
+        size_gen = arg.find(tokens="meta.parens.size.matlab", depth=1)
+        try:  # We have a size, therefore parse the comma separated list into tuple
+            size_tok, _ = next(size_gen)
+            size_elem_gen = size_tok.find(
+                tokens=[
+                    "constant.numeric.decimal.matlab",
+                    "keyword.operator.vector.colon.matlab",
+                ],
+                depth=1,
+            )
+            size = tuple([elem[0].content for elem in size_elem_gen])
+            self.methods[fun_name][section][arg_name]["size"] = size
+        except StopIteration:
+            pass
+
+        # Now find the type if it exists
+        # TODO this should be mapped to known types (though perhaps as a postprocess)
+        type_gen = arg.find(tokens="storage.type.matlab", depth=1)
+        try:
+            self.methods[fun_name][section][arg_name]["type"] = next(type_gen)[
+                0
+            ].content
+        except StopIteration:
+            pass
+
+        # Now find list of validators
+        validator_gen = arg.find(tokens="meta.block.validation.matlab", depth=1)
+        try:
+            validator_tok, _ = next(validator_gen)
+            validator_toks = validator_tok.findall(
+                tokens="variable.other.readwrite.matlab", depth=1
+            )  # TODO Probably bug here in MATLAB-Language-grammar
+            self.methods[fun_name][section][arg_name]["validators"] = [
+                tok[0].content for tok in validator_toks
+            ]
+        except StopIteration:
+            pass
+
+
 class MatClassParser:
     def __init__(self, path):
         # DATA
@@ -196,7 +341,7 @@ def _parse_property_section(self, section):
             next_tok = prop_tok
             while walk_back_idx >= 0:
                 walk_tok = section.children[walk_back_idx]
-                if self._is_empty_line_between_tok(walk_tok, next_tok):
+                if _is_empty_line_between_tok(walk_tok, next_tok):
                     # Once there is an empty line between consecutive tokens we are done.
                     break
 
@@ -227,7 +372,7 @@ def _parse_property_section(self, section):
             while walk_fwd_idx < len(section.children):
                 walk_tok = section.children[walk_fwd_idx]
 
-                if self._is_empty_line_between_tok(prev_tok, walk_tok):
+                if _is_empty_line_between_tok(prev_tok, walk_tok):
                     # Once there is an empty line between consecutive tokens we are done.
                     break
 
@@ -312,131 +457,8 @@ def _parse_method_section(self, section):
         ]
         for idx in idxs:
             meth_tok = section.children[idx]
-            self._parse_function(meth_tok)
-
-    def _parse_function(self, fun_tok):
-        """Parse Function definition"""
-        # First find the function name
-        name_gen = fun_tok.find(tokens="entity.name.function.matlab")
-        try:
-            name_tok, _ = next(name_gen)
-            fun_name = name_tok.content
-        except StopIteration:
-            # TODO correct error here
-            raise Exception("Couldn't find function name")
-
-        # Find outputs and parameters
-        output_gen = fun_tok.find(tokens="variable.parameter.output.matlab")
-        param_gen = fun_tok.find(tokens="variable.parameter.input.matlab")
-
-        self.methods[fun_name] = {}
-        self.methods[fun_name]["outputs"] = {}
-        self.methods[fun_name]["params"] = {}
-
-        for out, _ in output_gen:
-            self.methods[fun_name]["outputs"][out.content] = {}
-
-        for param, _ in param_gen:
-            self.methods[fun_name]["params"][param.content] = {}
-
-        # find arguments blocks
-        arg_section = None
-        for arg_section, _ in fun_tok.find(tokens="meta.arguments.matlab"):
-            self._parse_argument_section(fun_name, arg_section)
-
-        fun_decl_gen = fun_tok.find(tokens="meta.function.declaration.matlab")
-        try:
-            fun_decl_tok, _ = next(fun_decl_gen)
-        except StopIteration:
-            raise Exception(
-                "missing function declaration"
-            )  # This cant happen as we'd be missing a function name
-
-        # Now parse for docstring
-        docstring = ""
-        comment_toks = fun_tok.findall(
-            tokens=["comment.line.percentage.matlab", "comment.block.percentage.matlab"]
-        )
-        last_tok = arg_section if arg_section is not None else fun_decl_tok
-        import pdb
-
-        pdb.set_trace()
-        for comment_tok, _ in comment_toks:
-            if self._is_empty_line_between_tok(last_tok, comment_tok):
-                # If we have non-consecutive tokens quit right away.
-                break
-            elif (
-                not docstring and comment_tok.token == "comment.block.percentage.matlab"
-            ):
-                # If we have no previous docstring lines and a comment block we take
-                # the comment block as the docstring and exit.
-                docstring = comment_tok.content.strip()[
-                    2:-2
-                ].strip()  # [2,-2] strips out block comment delimiters
-                break
-            elif comment_tok.token == "comment.line.percentage.matlab":
-                # keep parsing comments
-                docstring += comment_tok.content[1:] + "\n"
-            else:
-                # we are done.
-                break
-            last_tok = comment_tok
-
-        self.methods[fun_name]["docstring"] = docstring if docstring else None
-
-    def _parse_argument_section(self, fun_name, section):
-        modifiers = [
-            mod.content
-            for mod, _ in section.find(tokens="storage.modifier.arguments.matlab")
-        ]
-        arg_def_gen = section.find(tokens="meta.assignment.definition.property.matlab")
-        for arg_def, _ in arg_def_gen:
-            arg_name = arg_def.begin[
-                0
-            ].content  # Get argument name that is being defined
-            self._parse_argument_validation(fun_name, arg_name, arg_def, modifiers)
-
-    def _parse_argument_validation(self, fun_name, arg_name, arg, modifiers):
-        # TODO This should be identical to propery validation I think. Refactor
-        # First get the size if found
-        section = "output" if "Output" in modifiers else "params"
-        size_gen = arg.find(tokens="meta.parens.size.matlab", depth=1)
-        try:  # We have a size, therefore parse the comma separated list into tuple
-            size_tok, _ = next(size_gen)
-            size_elem_gen = size_tok.find(
-                tokens=[
-                    "constant.numeric.decimal.matlab",
-                    "keyword.operator.vector.colon.matlab",
-                ],
-                depth=1,
-            )
-            size = tuple([elem[0].content for elem in size_elem_gen])
-            self.methods[fun_name][section][arg_name]["size"] = size
-        except StopIteration:
-            pass
-
-        # Now find the type if it exists
-        # TODO this should be mapped to known types (though perhaps as a postprocess)
-        type_gen = arg.find(tokens="storage.type.matlab", depth=1)
-        try:
-            self.methods[fun_name][section][arg_name]["type"] = next(type_gen)[
-                0
-            ].content
-        except StopIteration:
-            pass
-
-        # Now find list of validators
-        validator_gen = arg.find(tokens="meta.block.validation.matlab", depth=1)
-        try:
-            validator_tok, _ = next(validator_gen)
-            validator_toks = validator_tok.findall(
-                tokens="variable.other.readwrite.matlab", depth=1
-            )  # TODO Probably bug here in MATLAB-Language-grammar
-            self.methods[fun_name][section][arg_name]["validators"] = [
-                tok[0].content for tok in validator_toks
-            ]
-        except StopIteration:
-            pass
+            parsed_function = MatFunctionParser(meth_tok)
+            self.methods[parsed_function.name] = parsed_function
 
     def _parse_enum_section(self, section):
         # TODO parse property section attrs
@@ -470,7 +492,7 @@ def _parse_enum_section(self, section):
             next_tok = enum_tok
             while walk_back_idx >= 0:
                 walk_tok = section.children[walk_back_idx]
-                if self._is_empty_line_between_tok(walk_tok, next_tok):
+                if _is_empty_line_between_tok(walk_tok, next_tok):
                     # Once there is an empty line between consecutive tokens we are done.
                     break
 
@@ -502,7 +524,7 @@ def _parse_enum_section(self, section):
             while walk_fwd_idx < len(section.children):
                 walk_tok = section.children[walk_fwd_idx]
 
-                if self._is_empty_line_between_tok(prev_tok, walk_tok):
+                if _is_empty_line_between_tok(prev_tok, walk_tok):
                     # Once there is an empty line between consecutive tokens we are done.
                     break
 
@@ -516,7 +538,7 @@ def _parse_enum_section(self, section):
                     break
                 elif walk_tok.token == "comment.line.percentage.matlab":
                     # In the case the comment is on the same line as the end of the enum declaration, take it as inline comment and exit.
-                    if self._toks_on_same_line(section.children[idx], walk_tok):
+                    if _toks_on_same_line(section.children[idx], walk_tok):
                         inline_docstring = walk_tok.content[1:]
                         break
 
@@ -540,24 +562,6 @@ def _parse_enum_section(self, section):
             else:
                 self.enumerations[enum_name]["docstring"] = None
 
-    def _toks_on_same_line(self, tok1, tok2):
-        """Note: pass the tokens in order they appear in case of multiline tokens, otherwise this may return incorrect results"""
-        line1 = self._get_last_line_of_tok(tok1)
-        line2 = self._get_first_line_of_tok(tok2)
-        return line1 == line2
-
-    def _is_empty_line_between_tok(self, tok1, tok2):
-        """Note: pass tokens in order they appear"""
-        line1 = self._get_last_line_of_tok(tok1)
-        line2 = self._get_first_line_of_tok(tok2)
-        return line2 - line1 > 1
-
-    def _get_first_line_of_tok(self, tok):
-        return min([loc[0] for loc in tok.characters.keys()])
-
-    def _get_last_line_of_tok(self, tok):
-        return max([loc[0] for loc in tok.characters.keys()])
-
 
 if __name__ == "__main__":
     cls_parse = MatClassParser(rpath)

From 597ab641e3eabe9d2e44248cd7bb6363ad5720f4 Mon Sep 17 00:00:00 2001
From: Anton Edvinovich Pozharskiy <apozharski@gmail.com>
Date: Mon, 22 Jul 2024 16:49:16 +0200
Subject: [PATCH 07/45] initial work

---
 sphinxcontrib/mat_textmate_parser.py |  29 +-
 sphinxcontrib/mat_types.py           | 745 +++------------------------
 sphinxcontrib/matlab.py              |   1 +
 3 files changed, 70 insertions(+), 705 deletions(-)

diff --git a/sphinxcontrib/mat_textmate_parser.py b/sphinxcontrib/mat_textmate_parser.py
index 8eba0b1..0320df2 100644
--- a/sphinxcontrib/mat_textmate_parser.py
+++ b/sphinxcontrib/mat_textmate_parser.py
@@ -1,6 +1,4 @@
-from textmate_grammar.parsers.matlab import MatlabParser
-
-rpath = "../../../syscop/software/nosnoc/+nosnoc/Options.m"
+rpath = "../../../syscop/software/nosnoc/src/NosnocIpoptCallback.m"
 
 
 def find_first_child(curr, tok):
@@ -110,12 +108,12 @@ def _parse_argument_section(self, section):
             arg_name = arg_def.begin[
                 0
             ].content  # Get argument name that is being defined
-            self._parse_argument_validation(fun_name, arg_name, arg_def, modifiers)
+            self._parse_argument_validation(arg_name, arg_def, modifiers)
 
     def _parse_argument_validation(self, arg_name, arg, modifiers):
         # TODO This should be identical to propery validation I think. Refactor
         # First get the size if found
-        section = "output" if "Output" in modifiers else "params"
+        section = self.output if "Output" in modifiers else self.params
         size_gen = arg.find(tokens="meta.parens.size.matlab", depth=1)
         try:  # We have a size, therefore parse the comma separated list into tuple
             size_tok, _ = next(size_gen)
@@ -127,7 +125,7 @@ def _parse_argument_validation(self, arg_name, arg, modifiers):
                 depth=1,
             )
             size = tuple([elem[0].content for elem in size_elem_gen])
-            self.methods[fun_name][section][arg_name]["size"] = size
+            section[arg_name]["size"] = size
         except StopIteration:
             pass
 
@@ -135,9 +133,7 @@ def _parse_argument_validation(self, arg_name, arg, modifiers):
         # TODO this should be mapped to known types (though perhaps as a postprocess)
         type_gen = arg.find(tokens="storage.type.matlab", depth=1)
         try:
-            self.methods[fun_name][section][arg_name]["type"] = next(type_gen)[
-                0
-            ].content
+            section[arg_name]["type"] = next(type_gen)[0].content
         except StopIteration:
             pass
 
@@ -148,15 +144,13 @@ def _parse_argument_validation(self, arg_name, arg, modifiers):
             validator_toks = validator_tok.findall(
                 tokens="variable.other.readwrite.matlab", depth=1
             )  # TODO Probably bug here in MATLAB-Language-grammar
-            self.methods[fun_name][section][arg_name]["validators"] = [
-                tok[0].content for tok in validator_toks
-            ]
+            section[arg_name]["validators"] = [tok[0].content for tok in validator_toks]
         except StopIteration:
             pass
 
 
 class MatClassParser:
-    def __init__(self, path):
+    def __init__(self, tokens):
         # DATA
         self.name = ""
         self.supers = []
@@ -168,8 +162,7 @@ def __init__(self, path):
 
         # Maybe remove continuations as a crutch? currently parser is broken for continuations in attributes
         # self.parser = MatlabParser(remove_line_continuations=True)
-        self.parser = MatlabParser()
-        self.parsed = self.parser.parse_file(path)
+        self.parsed = tokens
         self.cls, _ = find_first_child(self.parsed, "meta.class.matlab")
         if not self.cls:
             raise Exception()  # TODO better exception
@@ -189,9 +182,6 @@ def __init__(self, path):
 
         for section, _ in enumeration_sections:
             self._parse_enum_section(section)
-        import pdb
-
-        pdb.set_trace()
 
     def _find_class_docstring(self):
         try:
@@ -474,7 +464,8 @@ def _parse_enum_section(self, section):
             enum_name = enum_tok.children[0].content
             self.enumerations[enum_name] = {}
             if (
-                section.children[idx + 1].token == "meta.parens.matlab"
+                idx + 1 < len(section.children)
+                and section.children[idx + 1].token == "meta.parens.matlab"
             ):  # Parse out args TODO this should be part of enummember assignment definition
                 args = tuple(
                     [
diff --git a/sphinxcontrib/mat_types.py b/sphinxcontrib/mat_types.py
index f76a697..18a9b5a 100644
--- a/sphinxcontrib/mat_types.py
+++ b/sphinxcontrib/mat_types.py
@@ -17,6 +17,10 @@
 from zipfile import ZipFile
 import xml.etree.ElementTree as ET
 import sphinxcontrib.mat_parser as mat_parser
+from sphinxcontrib.mat_textmate_parser import MatClassParser, MatFunctionParser
+from textmate_grammar.parsers.matlab import MatlabParser
+import logging
+from pathlib import Path
 
 logger = sphinx.util.logging.getLogger("matlab-domain")
 
@@ -430,7 +434,9 @@ def matlabify(objname):
 
             # make a full path out of basedir and objname
             fullpath = os.path.join(MatObject.basedir, objname)  # objname fullpath
+        import pdb
 
+        pdb.set_trace()
         logger.debug(
             f"[sphinxcontrib-matlabdomain] matlabify {package=}, {objname=}, {fullpath=}"
         )
@@ -495,40 +501,69 @@ def parse_mfile(mfile, name, path, encoding=None):
 
         full_code = code
 
+        print(mfile)
+
         # remove the top comment header (if there is one) from the code string
-        code = mat_parser.remove_comment_header(code)
-        code = mat_parser.remove_line_continuations(code)
-        code = mat_parser.fix_function_signatures(code)
+        # code = mat_parser.remove_comment_header(code)
+        # code = mat_parser.remove_line_continuations(code)
+        # code = mat_parser.fix_function_signatures(code)
+        # TODO: This might not be necessary
 
-        tks = list(MatlabLexer().get_tokens(code))
+        logging.getLogger("textmate_grammar").setLevel(logging.ERROR)
+        parser = MatlabParser()
+        toks = parser.parse_file(mfile)
 
         modname = path.replace(os.sep, ".")  # module name
 
         # assume that functions and classes always start with a keyword
         def isFunction(token):
-            return token == (Token.Keyword, "function")
+            comments_and_functions = [
+                "comment.block.percentage.matlab",
+                "comment.line.percentage.matlab",
+                "meta.function.matlab",
+            ]
+            return all(
+                [(child.token in comments_and_functions) for child in token.children]
+            )
 
         def isClass(token):
-            return token == (Token.Keyword, "classdef")
+            tok_gen = token.find(tokens="meta.class.matlab", depth=1)
+            try:
+                tok, _ = next(tok_gen)
+                return True
+            except StopIteration:
+                return False
 
-        if isClass(tks[0]):
+        if isClass(toks):
             logger.debug(
                 "[sphinxcontrib-matlabdomain] parsing classdef %s from %s.",
                 name,
                 modname,
             )
-            return MatClass(name, modname, tks)
-        elif isFunction(tks[0]):
+            return MatClass(name, modname, toks)
+        elif isFunction(toks):
             logger.debug(
                 "[sphinxcontrib-matlabdomain] parsing function %s from %s.",
                 name,
                 modname,
             )
-            return MatFunction(name, modname, tks)
+            fun_tok_gen = toks.find(tokens="meta.function.matlab")
+            parsed_function = None
+            try:
+                fun_tok, _ = next(fun_tok_gen)
+                parsed_function = MatFunctionParser(fun_tok)
+            except StopIteration:
+                logger.warning(
+                    "[sphinxcontrib-matlabdomain] Parsing failed in %s.%s. No function found.",
+                    modname,
+                    name,
+                )
+            return MatFunction(name, modname, toks)
         else:
+            pass
             # it's a script file retoken with header comment
-            tks = list(MatlabLexer().get_tokens(full_code))
-            return MatScript(name, modname, tks)
+            # tks = list(MatlabLexer().get_tokens(full_code))
+            # return MatScript(name, modname, toks)
         return None
 
     @staticmethod
@@ -846,177 +881,17 @@ class MatFunction(MatObject):
 
     def __init__(self, name, modname, tokens):
         super(MatFunction, self).__init__(name)
+        parsed_function = MatFunctionParser(tokens)
         #: Path of folder containing :class:`MatObject`.
         self.module = modname
-        #: List of tokens parsed from mfile by Pygments.
-        self.tokens = tokens
         #: docstring
-        self.docstring = ""
+        self.docstring = parsed_function.docstring
         #: output args
-        self.retv = None
+        self.retv = parsed_function.outputs
         #: input args
-        self.args = None
+        self.args = parsed_function.params
         #: remaining tokens after main function is parsed
         self.rem_tks = None
-        # =====================================================================
-        # parse tokens
-        # XXX: Pygments always reads MATLAB function signature as:
-        # [(Token.Keyword, 'function'),  # any whitespace is stripped
-        #  (Token.Text.Whitesapce, ' '),  # spaces and tabs are concatenated
-        #  (Token.Text, '[o1, o2]'),  # if there are outputs, they're all
-        #                               concatenated w/ or w/o brackets and any
-        #                               trailing whitespace
-        #  (Token.Punctuation, '='),  # possibly an equal sign
-        #  (Token.Text.Whitesapce, ' '),  # spaces and tabs are concatenated
-        #  (Token.Name.Function, 'myfun'),  # the name of the function
-        #  (Token.Punctuation, '('),  # opening parenthesis
-        #  (Token.Text, 'a1, a2',  # if there are args, they're concatenated
-        #  (Token.Punctuation, ')'),  # closing parenthesis
-        #  (Token.Text.Whitesapce, '\n')]  # all whitespace after args
-        # XXX: Pygments does not tolerate MATLAB continuation ellipsis!
-        tks = copy(self.tokens)  # make a copy of tokens
-        tks.reverse()  # reverse in place for faster popping, stacks are LiLo
-        try:
-            # =====================================================================
-            # parse function signature
-            # function [output] = name(inputs)
-            # % docstring
-            # =====================================================================
-            # Skip function token - already checked in MatObject.parse_mfile
-            tks.pop()
-            skip_whitespace(tks)
-
-            #  Check for return values
-            retv = tks.pop()
-            if retv[0] is Token.Text:
-                self.retv = [rv.strip() for rv in retv[1].strip("[ ]").split(",")]
-                if len(self.retv) == 1:
-                    # check if return is empty
-                    if not self.retv[0]:
-                        self.retv = None
-                    # check if return delimited by whitespace
-                    elif " " in self.retv[0] or "\t" in self.retv[0]:
-                        self.retv = [
-                            rv
-                            for rv_tab in self.retv[0].split("\t")
-                            for rv in rv_tab.split(" ")
-                        ]
-                if tks.pop() != (Token.Punctuation, "="):
-                    # Unlikely to end here. But never-the-less warn!
-                    logger.warning(
-                        "[sphinxcontrib-matlabdomain] Parsing failed in %s.%s. Expected '='.",
-                        modname,
-                        name,
-                    )
-                    return
-
-                skip_whitespace(tks)
-            elif retv[0] is Token.Name.Function:
-                tks.append(retv)
-            # =====================================================================
-            # function name
-            func_name = tks.pop()
-            func_name = (
-                func_name[0],
-                func_name[1].strip(" ()"),
-            )  # Strip () in case of dummy arg
-            if func_name != (Token.Name.Function, self.name):  # @UndefinedVariable
-                if isinstance(self, MatMethod):
-                    self.name = func_name[1]
-                else:
-                    logger.warning(
-                        "[sphinxcontrib-matlabdomain] Unexpected function name: '%s'. "
-                        "Expected '%s' in module '%s'.",
-                        func_name[1],
-                        name,
-                        modname,
-                    )
-
-            # =====================================================================
-            # input args
-            if tks.pop() == (Token.Punctuation, "("):
-                args = tks.pop()
-                if args[0] is Token.Text:
-                    self.args = [
-                        arg.strip() for arg in args[1].split(",")
-                    ]  # no arguments given
-                elif args == (Token.Punctuation, ")"):
-                    # put closing parenthesis back in stack
-                    tks.append(args)
-                # check if function args parsed correctly
-                if tks.pop() != (Token.Punctuation, ")"):
-                    # Unlikely to end here. But never-the-less warn!
-                    logger.warning(
-                        "[sphinxcontrib-matlabdomain] Parsing failed in {}.{}. Expected ')'.",
-                        modname,
-                        name,
-                    )
-                    return
-
-            skip_whitespace(tks)
-            # =====================================================================
-            # docstring
-            try:
-                docstring = tks.pop()
-            except IndexError:
-                docstring = None
-            while docstring and docstring[0] is Token.Comment:
-                self.docstring += docstring[1].lstrip("%")
-                # Get newline if it exists and append to docstring
-                try:
-                    wht = tks.pop()  # We expect a newline
-                except IndexError:
-                    break
-                if wht[0] in (Token.Text, Token.Text.Whitespace) and wht[1] == "\n":
-                    self.docstring += "\n"
-                # Skip whitespace
-                try:
-                    wht = tks.pop()  # We expect a newline
-                except IndexError:
-                    break
-                while wht in list(zip((Token.Text,) * 3, (" ", "\t"))):
-                    try:
-                        wht = tks.pop()
-                    except IndexError:
-                        break
-                docstring = wht  # check if Token is Comment
-
-            # Find the end of the function - used in `MatMethod`` to determine where a method ends.
-            if docstring is None:
-                return
-            kw = docstring  # last token
-            lastkw = 0  # set last keyword placeholder
-            kw_end = 1  # count function keyword
-            while kw_end > 0:
-                # increment keyword-end pairs count
-                if kw in MATLAB_KEYWORD_REQUIRES_END:
-                    kw_end += 1
-                # nested function definition
-                elif kw[0] is Token.Keyword and kw[1].strip() == "function":
-                    kw_end += 1
-                # decrement keyword-end pairs count but
-                # don't decrement `end` if used as index
-                elif kw == (Token.Keyword, "end") and not lastkw:
-                    kw_end -= 1
-                # save last punctuation
-                elif kw in MATLAB_FUNC_BRACES_BEGIN:
-                    lastkw += 1
-                elif kw in MATLAB_FUNC_BRACES_END:
-                    lastkw -= 1
-                try:
-                    kw = tks.pop()
-                except IndexError:
-                    break
-            tks.append(kw)  # put last token back in list
-        except IndexError:
-            logger.warning(
-                "[sphinxcontrib-matlabdomain] Parsing failed in %s.%s. Check if valid MATLAB code.",
-                modname,
-                name,
-            )
-        # if there are any tokens left save them
-        if len(tks) > 0:
-            self.rem_tks = tks  # save extra tokens
 
     def ref_role(self):
         """Returns role to use for references to this object (e.g. when generating auto-links)"""
@@ -1055,525 +930,23 @@ class MatClass(MatMixin, MatObject):
 
     def __init__(self, name, modname, tokens):
         super(MatClass, self).__init__(name)
+        parsed_class = MatClassParser(tokens)
         #: Path of folder containing :class:`MatObject`.
         self.module = modname
-        #: List of tokens parsed from mfile by Pygments.
-        self.tokens = tokens
         #: dictionary of class attributes
-        self.attrs = {}
+        self.attrs = parsed_class.attrs
         #: list of class superclasses
-        self.bases = []
+        self.bases = parsed_class.supers
         #: docstring
-        self.docstring = ""
+        self.docstring = parsed_class.docstring
         #: dictionary of class properties
-        self.properties = {}
+        self.properties = parsed_class.properties
         #: dictionary of class methods
-        self.methods = {}
-        #: dictionary of class enumerations
-        self.enumerations = {}
+        self.methods = parsed_class.methods
+        #:
+        self.enumerations = parsed_class.enumerations
         #: remaining tokens after main class definition is parsed
         self.rem_tks = None
-        # =====================================================================
-        # parse tokens
-        # TODO: use generator and next() instead of stepping index!
-        try:
-            # Skip classdef token - already checked in MatObject.parse_mfile
-            idx = 1  # token index
-
-            # class "attributes"
-            self.attrs, idx = self.attributes(idx, MATLAB_CLASS_ATTRIBUTE_TYPES)
-
-            # Check if self.name matches the name in the file.
-            idx += self._blanks(idx)
-            if not self.tokens[idx][1] == self.name:
-                logger.warning(
-                    "[sphinxcontrib-matlabdomain] Unexpected class name: '%s'."
-                    " Expected '%s' in '%s'.",
-                    self.tokens[idx][1],
-                    name,
-                    modname,
-                )
-
-            idx += 1
-            idx += self._blanks(idx)  # skip blanks
-            # =====================================================================
-            # super classes
-            if self._tk_eq(idx, (Token.Operator, "<")):
-                idx += 1
-                # newline terminates superclasses
-                while not self._is_newline(idx):
-                    idx += self._blanks(idx)  # skip blanks
-                    # concatenate base name
-                    base_name = ""
-                    while (
-                        not self._whitespace(idx)
-                        and self.tokens[idx][0] is not Token.Comment
-                    ):
-                        base_name += self.tokens[idx][1]
-                        idx += 1
-                    # If it's a newline, we are done parsing.
-                    if not self._is_newline(idx):
-                        idx += 1
-                    if base_name:
-                        self.bases.append(base_name)
-                    idx += self._blanks(idx)  # skip blanks
-                    # continue to next super class separated by &
-                    if self._tk_eq(idx, (Token.Operator, "&")):
-                        idx += 1
-                idx += 1  # end of super classes
-            # newline terminates classdef signature
-            elif self._is_newline(idx):
-                idx += 1  # end of classdef signature
-            # =====================================================================
-            # docstring
-            idx += self._indent(idx)  # calculation indentation
-            # concatenate docstring
-            while self.tokens[idx][0] is Token.Comment:
-                self.docstring += self.tokens[idx][1].lstrip("%")
-                idx += 1
-                # append newline to docstring
-                if self._is_newline(idx):
-                    self.docstring += self.tokens[idx][1]
-                    idx += 1
-                # skip tab
-                indent = self._indent(idx)  # calculation indentation
-                idx += indent
-            # =====================================================================
-            # properties & methods blocks
-            # loop over code body searching for blocks until end of class
-            while self._tk_ne(idx, (Token.Keyword, "end")):
-                # skip comments and whitespace
-                while self._whitespace(idx) or self.tokens[idx][0] is Token.Comment:
-                    whitespace = self._whitespace(idx)
-                    if whitespace:
-                        idx += whitespace
-                    else:
-                        idx += 1
-
-                # =================================================================
-                # properties blocks
-                if self._tk_eq(idx, (Token.Keyword, "properties")):
-                    prop_name = ""
-                    idx += 1
-                    # property "attributes"
-                    attr_dict, idx = self.attributes(
-                        idx, MATLAB_PROPERTY_ATTRIBUTE_TYPES
-                    )
-                    # Token.Keyword: "end" terminates properties & methods block
-                    while self._tk_ne(idx, (Token.Keyword, "end")):
-                        # skip whitespace
-                        while self._whitespace(idx):
-                            whitespace = self._whitespace(idx)
-                            if whitespace:
-                                idx += whitespace
-                            else:
-                                idx += 1
-
-                        # =========================================================
-                        # long docstring before property
-                        if self.tokens[idx][0] is Token.Comment:
-                            # docstring
-                            docstring = ""
-
-                            # Collect comment lines
-                            while self.tokens[idx][0] is Token.Comment:
-                                docstring += self.tokens[idx][1].lstrip("%")
-                                idx += 1
-                                idx += self._blanks(idx)
-
-                                try:
-                                    # Check if end of line was reached
-                                    if self._is_newline(idx):
-                                        docstring += "\n"
-                                        idx += 1
-                                        idx += self._blanks(idx)
-
-                                    # Check if variable name is next
-                                    if self.tokens[idx][0] is Token.Name:
-                                        prop_name = self.tokens[idx][1]
-                                        self.properties[prop_name] = {
-                                            "attrs": attr_dict
-                                        }
-                                        self.properties[prop_name][
-                                            "docstring"
-                                        ] = docstring
-                                        break
-
-                                    # If there is an empty line at the end of
-                                    # the comment: discard it
-                                    elif self._is_newline(idx):
-                                        docstring = ""
-                                        idx += self._whitespace(idx)
-                                        break
-
-                                except IndexError:
-                                    # EOF reached, quit gracefully
-                                    break
-
-                        # with "%:" directive trumps docstring after property
-                        isTokenName = self.tokens[idx][0] is Token.Name
-                        isTokenNameSubtype = self.tokens[idx][0] in Token.Name.subtypes
-                        if isTokenName or isTokenNameSubtype:
-                            prop_name = self.tokens[idx][1]
-                            idx += 1
-                            if isTokenNameSubtype:
-                                logger.debug(
-                                    "[sphinxcontrib-matlabdomain] WARNING %s.%s.%s is a builtin name.",
-                                    self.module,
-                                    self.name,
-                                    prop_name,
-                                )
-
-                            # Initialize property if it was not already done
-                            if prop_name not in self.properties.keys():
-                                self.properties[prop_name] = {"attrs": attr_dict}
-
-                            # Capture (dimensions) class {validators} as "specs"
-                            # https://mathworks.com/help/matlab/matlab_oop/defining-properties.html
-                            count, propspec = self._propspec(idx)
-                            self.properties[prop_name]["specs"] = propspec
-
-                            idx = idx + count
-                            if self._tk_eq(idx, (Token.Punctuation, ";")):
-                                continue
-
-                        elif self._tk_eq(idx, (Token.Keyword, "end")):
-                            idx += 1
-                            break
-                        # skip semicolon after property name, but no default
-                        elif self._tk_eq(idx, (Token.Punctuation, ";")):
-                            idx += 1
-                            # A comment might come after semi-colon
-                            idx += self._blanks(idx)
-                            if self._is_newline(idx):
-                                idx += 1
-                                # Property definition is finished; add missing values
-                                if "default" not in self.properties[prop_name].keys():
-                                    self.properties[prop_name]["default"] = None
-                                if "docstring" not in self.properties[prop_name].keys():
-                                    self.properties[prop_name]["docstring"] = None
-
-                                continue
-                            elif self.tokens[idx][0] is Token.Comment:
-                                docstring = self.tokens[idx][1].lstrip("%")
-                                docstring += "\n"
-                                self.properties[prop_name]["docstring"] = docstring
-                                idx += 1
-                        elif self.tokens[idx][0] is Token.Comment:
-                            # Comments seperated with blank lines.
-                            idx = idx - 1
-                            continue
-                        else:
-                            logger.warning(
-                                "sphinxcontrib-matlabdomain] Expected property in %s.%s - got %s",
-                                self.module,
-                                self.name,
-                                str(self.tokens[idx]),
-                            )
-                            return
-                        idx += self._blanks(idx)  # skip blanks
-                        # =========================================================
-                        # defaults
-                        default = {"default": None}
-                        if self._tk_eq(idx, (Token.Punctuation, "=")):
-                            idx += 1
-                            idx += self._blanks(idx)  # skip blanks
-                            # concatenate default value until newline or comment
-                            default = ""
-                            brace_count = 0
-                            # keep reading until newline or comment
-                            # only if all punctuation pairs are closed
-                            # and comment is **not** continuation ellipsis
-                            while (
-                                (
-                                    not self._is_newline(idx)
-                                    and self.tokens[idx][0] is not Token.Comment
-                                )
-                                or brace_count > 0
-                                or (
-                                    self.tokens[idx][0] is Token.Comment
-                                    and self.tokens[idx][1].startswith("...")
-                                )
-                            ):
-                                token = self.tokens[idx]
-                                # default has an array spanning multiple lines
-                                # keep track of braces
-                                if token in MATLAB_PROP_BRACES_BEGIN:
-                                    brace_count += 1
-                                # look for end of array
-                                elif token in MATLAB_PROP_BRACES_END:
-                                    brace_count -= 1
-                                # Pygments treats continuation ellipsis as comments
-                                # text from ellipsis until newline is in token
-                                elif token[0] is Token.Comment and token[1].startswith(
-                                    "..."
-                                ):
-                                    idx += 1  # skip ellipsis comments
-                                    # include newline which should follow comment
-                                    if self._is_newline(idx):
-                                        default += "\n"
-                                        idx += 1
-                                    continue
-                                elif self._is_newline(idx - 1) and not self._is_newline(
-                                    idx
-                                ):
-                                    idx += self._blanks(idx)
-                                    continue
-                                elif token[0] is Token.Text and token[1] == " ":
-                                    # Skip spaces that are not in strings.
-                                    idx += 1
-                                    continue
-                                default += token[1]
-                                idx += 1
-                            if self.tokens[idx][0] is not Token.Comment:
-                                idx += 1
-                            if default:
-                                default = {"default": default.rstrip("; ")}
-
-                        self.properties[prop_name].update(default)
-                        # =========================================================
-                        # docstring
-                        if "docstring" not in self.properties[prop_name].keys():
-                            docstring = {"docstring": None}
-                            if self.tokens[idx][0] is Token.Comment:
-                                docstring["docstring"] = self.tokens[idx][1].lstrip("%")
-                                idx += 1
-                            self.properties[prop_name].update(docstring)
-                        elif self.tokens[idx][0] is Token.Comment:
-                            # skip this comment
-                            idx += 1
-
-                        idx += self._whitespace(idx)
-                    idx += 1
-                # =================================================================
-                # method blocks
-                if self._tk_eq(idx, (Token.Keyword, "methods")):
-                    idx += 1
-                    # method "attributes"
-                    attr_dict, idx = self.attributes(idx, MATLAB_METHOD_ATTRIBUTE_TYPES)
-                    # Token.Keyword: "end" terminates properties & methods block
-                    while self._tk_ne(idx, (Token.Keyword, "end")):
-                        # skip comments and whitespace
-                        while (
-                            self._whitespace(idx)
-                            or self.tokens[idx][0] is Token.Comment
-                        ):
-                            whitespace = self._whitespace(idx)
-                            if whitespace:
-                                idx += whitespace
-                            else:
-                                idx += 1
-                        # skip methods defined in other files
-                        meth_tk = self.tokens[idx]
-                        if (
-                            meth_tk[0] is Token.Name
-                            or meth_tk[0] is Token.Name.Builtin
-                            or meth_tk[0] is Token.Name.Function
-                            or (
-                                meth_tk[0] is Token.Keyword
-                                and meth_tk[1].strip() == "function"
-                                and self.tokens[idx + 1][0] is Token.Name.Function
-                            )
-                            or self._tk_eq(idx, (Token.Punctuation, "["))
-                            or self._tk_eq(idx, (Token.Punctuation, "]"))
-                            or self._tk_eq(idx, (Token.Punctuation, "="))
-                            or self._tk_eq(idx, (Token.Punctuation, "("))
-                            or self._tk_eq(idx, (Token.Punctuation, ")"))
-                            or self._tk_eq(idx, (Token.Punctuation, ";"))
-                            or self._tk_eq(idx, (Token.Punctuation, ","))
-                        ):
-                            logger.debug(
-                                "[sphinxcontrib-matlabdomain] Skipping tokens for methods defined in separate files."
-                                "Token #%d: %r",
-                                idx,
-                                self.tokens[idx],
-                            )
-                            idx += 1 + self._whitespace(idx + 1)
-                        elif self._tk_eq(idx, (Token.Keyword, "end")):
-                            idx += 1
-                            break
-                        else:
-                            # find methods
-                            meth = MatMethod(
-                                self.module, self.tokens[idx:], self, attr_dict
-                            )
-
-                            # Detect getter/setter methods - these are not documented
-                            isGetter = meth.name.startswith("get.")
-                            isSetter = meth.name.startswith("set.")
-                            if not (isGetter or isSetter):
-                                # Add the parsed method to methods dictionary
-                                self.methods[meth.name] = meth
-
-                            # Update idx with the number of parsed tokens.
-                            idx += meth.skip_tokens()
-                            idx += self._whitespace(idx)
-                    idx += 1
-                if self._tk_eq(idx, (Token.Keyword, "events")):
-                    logger.debug(
-                        "[sphinxcontrib-matlabdomain] ignoring 'events' in 'classdef %s.'",
-                        self.name,
-                    )
-                    idx += 1
-                    # Token.Keyword: "end" terminates events block
-                    while self._tk_ne(idx, (Token.Keyword, "end")):
-                        idx += 1
-                    idx += 1
-                if self._tk_eq(idx, (Token.Name, "enumeration")):
-                    logger.debug(
-                        "[sphinxcontrib-matlabdomain] ignoring 'enumeration' in 'classdef %s'.",
-                        self.name,
-                    )
-                    # no attributes for enums
-                    idx += 1
-                    # Token.Keyword: "end" terminates events block
-                    while self._tk_ne(idx, (Token.Keyword, "end")):
-                        # skip whitespace
-                        while self._whitespace(idx):
-                            whitespace = self._whitespace(idx)
-                            if whitespace:
-                                idx += whitespace
-                            else:
-                                idx += 1
-
-                        # =========================================================
-                        # long docstring before property
-                        if self.tokens[idx][0] is Token.Comment:
-                            # docstring
-                            docstring = ""
-
-                            # Collect comment lines
-                            while self.tokens[idx][0] is Token.Comment:
-                                docstring += self.tokens[idx][1].lstrip("%")
-                                idx += 1
-                                idx += self._blanks(idx)
-
-                                try:
-                                    # Check if end of line was reached
-                                    if self._is_newline(idx):
-                                        docstring += "\n"
-                                        idx += 1
-                                        idx += self._blanks(idx)
-
-                                    # Check if variable name is next
-                                    if self.tokens[idx][0] is Token.Name:
-                                        enum_name = self.tokens[idx][1]
-                                        self.enumerations[enum_name] = {}
-                                        self.enumerations[enum_name][
-                                            "docstring"
-                                        ] = docstring
-                                        break
-
-                                    # If there is an empty line at the end of
-                                    # the comment: discard it
-                                    elif self._is_newline(idx):
-                                        docstring = ""
-                                        idx += self._whitespace(idx)
-                                        break
-
-                                except IndexError:
-                                    # EOF reached, quit gracefully
-                                    break
-
-                        # with "%:" directive trumps docstring after property
-                        if self.tokens[idx][0] is Token.Name:
-                            enum_name = self.tokens[idx][1]
-                            idx += 1
-                            # Initialize property if it was not already done
-                            if enum_name not in self.enumerations.keys():
-                                self.enumerations[enum_name] = {}
-
-                            # skip size, class and functions specifiers
-                            # TODO: parse args and do a postprocessing step.
-                            idx += self._propspec(idx)
-
-                            if self._tk_eq(idx, (Token.Punctuation, ";")):
-                                continue
-
-                            # This is because matlab allows comma separated list of enums 
-                            if self._tk_eq(idx, (Token.Punctuation, ",")):
-                                continue
-
-                        # subtype of Name EG Name.Builtin used as Name
-                        elif self.tokens[idx][0] in Token.Name.subtypes:
-                            prop_name = self.tokens[idx][1]
-                            logger.debug(
-                                "[sphinxcontrib-matlabdomain] WARNING %s.%s.%s is a builtin name.",
-                                self.module,
-                                self.name,
-                                prop_name,
-                            )
-                            self.properties[prop_name] = {"attrs": attr_dict}
-                            idx += 1
-
-                            # skip size, class and functions specifiers
-                            # TODO: Parse old and new style property extras
-                            idx += self._propspec(idx)
-
-                            if self._tk_eq(idx, (Token.Punctuation, ";")):
-                                continue
-
-                        elif self._tk_eq(idx, (Token.Keyword, "end")):
-                            idx += 1
-                            break
-                        # skip semicolon after property name, but no default
-                        elif self._tk_eq(idx, (Token.Punctuation, ";")):
-                            idx += 1
-                            # A comment might come after semi-colon
-                            idx += self._blanks(idx)
-                            if self._is_newline(idx):
-                                idx += 1
-                                # Property definition is finished; add missing values
-                                if "default" not in self.properties[prop_name].keys():
-                                    self.properties[prop_name]["default"] = None
-                                if "docstring" not in self.properties[prop_name].keys():
-                                    self.properties[prop_name]["docstring"] = None
-
-                                continue
-                            elif self.tokens[idx][0] is Token.Comment:
-                                docstring = self.tokens[idx][1].lstrip("%")
-                                docstring += "\n"
-                                self.properties[prop_name]["docstring"] = docstring
-                                idx += 1
-                        elif self.tokens[idx][0] is Token.Comment:
-                            # Comments seperated with blank lines.
-                            idx = idx - 1
-                            continue
-                        else:
-                            logger.warning(
-                                "sphinxcontrib-matlabdomain] Expected enumeration in %s.%s - got %s",
-                                self.module,
-                                self.name,
-                                str(self.tokens[idx]),
-                            )
-                            return
-                        idx += self._blanks(idx)  # skip blanks
-
-                        # docstring
-                        if "docstring" not in self.enumerations[enum_name].keys():
-                            docstring = {"docstring": None}
-                            if self.tokens[idx][0] is Token.Comment:
-                                docstring["docstring"] = self.tokens[idx][1].lstrip("%")
-                                idx += 1
-                            self.enumerations[enum_name].update(docstring)
-                        elif self.tokens[idx][0] is Token.Comment:
-                            # skip this comment
-                            idx += 1
-
-                        idx += self._whitespace(idx)
-                    idx += 1
-                if self._tk_eq(idx, (Token.Punctuation, ";")):
-                    # Skip trailing semicolon after end.
-                    idx += 1
-        except IndexError:
-            logger.warning(
-                "[sphinxcontrib-matlabdomain] Parsing failed in %s.%s. "
-                "Check if valid MATLAB code.",
-                modname,
-                name,
-            )
-
-        self.rem_tks = idx  # index of last token
 
     def ref_role(self):
         """Returns role to use for references to this object (e.g. when generating auto-links)"""
diff --git a/sphinxcontrib/matlab.py b/sphinxcontrib/matlab.py
index 5764f9f..bf83a5a 100644
--- a/sphinxcontrib/matlab.py
+++ b/sphinxcontrib/matlab.py
@@ -879,6 +879,7 @@ def setup(app):
     app.add_domain(MATLABDomain)
     # autodoc
     app.add_config_value("matlab_src_dir", None, "env")
+    app.add_config_value("matlab_ignore_dirs", [], "env")
     app.add_config_value("matlab_src_encoding", None, "env")
     app.add_config_value("matlab_keep_package_prefix", False, "env")
     app.add_config_value("matlab_show_property_default_value", False, "env")

From 54899fb49e24d7efe5c264485c5e08871df07c89 Mon Sep 17 00:00:00 2001
From: Anton Edvinovich Pozharskiy <apozharski@gmail.com>
Date: Tue, 23 Jul 2024 14:32:37 +0200
Subject: [PATCH 08/45] finish integrating mat_textmate_parser with mat_types

---
 sphinxcontrib/mat_textmate_parser.py | 100 +++++++++++++++++++++++----
 sphinxcontrib/mat_types.py           |  22 +++---
 2 files changed, 98 insertions(+), 24 deletions(-)

diff --git a/sphinxcontrib/mat_textmate_parser.py b/sphinxcontrib/mat_textmate_parser.py
index 0320df2..2b4a4e9 100644
--- a/sphinxcontrib/mat_textmate_parser.py
+++ b/sphinxcontrib/mat_textmate_parser.py
@@ -1,11 +1,15 @@
-rpath = "../../../syscop/software/nosnoc/src/NosnocIpoptCallback.m"
+from textmate_grammar.parsers.matlab import MatlabParser
+import re
 
+rpath = "../../../syscop/software/nosnoc/+nosnoc/Options.m"
 
-def find_first_child(curr, tok):
-    ind = [i for i in range(len(curr.children)) if curr.children[i].token == tok]
+
+def find_first_child(curr, tok, attr="children"):
+    tok_lst = getattr(curr, attr)
+    ind = [i for i in range(len(tok_lst)) if tok_lst[i].token == tok]
     if not ind:
-        return None
-    return (curr.children[ind[0]], ind[0])
+        return (None, None)
+    return (tok_lst[ind[0]], ind[0])
 
 
 def _toks_on_same_line(tok1, tok2):
@@ -48,6 +52,7 @@ def __init__(self, fun_tok):
 
         self.outputs = {}
         self.params = {}
+        self.attrs = {}
 
         for out, _ in output_gen:
             self.outputs[out.content] = {}
@@ -160,8 +165,6 @@ def __init__(self, tokens):
         self.methods = {}
         self.enumerations = {}
 
-        # Maybe remove continuations as a crutch? currently parser is broken for continuations in attributes
-        # self.parser = MatlabParser(remove_line_continuations=True)
         self.parsed = tokens
         self.cls, _ = find_first_child(self.parsed, "meta.class.matlab")
         if not self.cls:
@@ -187,7 +190,6 @@ def _find_class_docstring(self):
         try:
             possible_comment_tok = self.cls.children[1]
         except IndexError:
-            print("found no docstring")
             return
 
         if possible_comment_tok.token == "comment.line.percentage.matlab":
@@ -197,7 +199,7 @@ def _find_class_docstring(self):
                 2:-2
             ].strip()  # [2,-2] strips out block comment delimiters
         else:
-            print("found no docstring")
+            pass
 
     def _docstring_lines(self):
         idx = 1
@@ -300,6 +302,7 @@ def _parse_class_attributes(self, attrs_tok):
 
     def _parse_property_section(self, section):
         # TODO parse property section attrs
+        attrs = self._parse_attributes(section)
         idxs = [
             i
             for i in range(len(section.children))
@@ -308,11 +311,30 @@ def _parse_property_section(self, section):
         for idx in idxs:
             prop_tok = section.children[idx]
             prop_name = prop_tok.begin[0].content
-            self.properties[prop_name] = {}  # Create entry for property
+            self.properties[prop_name] = {"attrs": attrs}  # Create entry for property
             self._parse_property_validation(
                 prop_name, prop_tok
             )  # Parse property validation.
 
+            # Try to find a default assignment:
+            default = None
+            _, assgn_idx = find_first_child(
+                prop_tok, "keyword.operator.assignment.matlab", attr="end"
+            )
+            if assgn_idx is not None:
+                default = ""
+                assgn_idx += 1  # skip assignment
+                while assgn_idx < len(prop_tok.end):
+                    tok = prop_tok.end[assgn_idx]
+                    assgn_idx += 1
+                    if tok.token in [
+                        "comment.line.percentage.matlab",
+                        "punctuation.terminator.semicolon.matlab",
+                    ]:
+                        break
+                    default += tok.content
+            self.properties[prop_name]["default"] = default
+
             # Get inline docstring
             inline_docstring_gen = prop_tok.find(
                 tokens="comment.line.percentage.matlab", attribute="end"
@@ -439,7 +461,7 @@ def _parse_property_validation(self, prop_name, prop):
             pass
 
     def _parse_method_section(self, section):
-        # TODO parse property section attrs
+        attrs = self._parse_attributes(section)
         idxs = [
             i
             for i in range(len(section.children))
@@ -449,9 +471,9 @@ def _parse_method_section(self, section):
             meth_tok = section.children[idx]
             parsed_function = MatFunctionParser(meth_tok)
             self.methods[parsed_function.name] = parsed_function
+            self.methods[parsed_function.name].attrs = attrs
 
     def _parse_enum_section(self, section):
-        # TODO parse property section attrs
         idxs = [
             i
             for i in range(len(section.children))
@@ -553,6 +575,58 @@ def _parse_enum_section(self, section):
             else:
                 self.enumerations[enum_name]["docstring"] = None
 
+    def _parse_attributes(self, section):
+        # walk down child list and parse manually
+        children = section.begin
+        idx = 1
+        attrs = {}
+        while idx < len(children):
+            child_tok = children[idx]
+            if re.match(
+                "storage.modifier.(properties|methods|events).matlab", child_tok.token
+            ):
+                attr = child_tok.content
+                val = None
+                idx += 1  # walk to next token
+                try:  # however we may have walked off the end of the list in which case we exit
+                    maybe_assign_tok = children[idx]
+                except:
+                    attrs[attr] = val
+                    return attrs
+                if maybe_assign_tok.token == "keyword.operator.assignment.matlab":
+                    idx += 1
+                    rhs_tok = children[idx]  # parse right hand side
+                    if rhs_tok.token == "meta.cell.literal.matlab":
+                        # A cell. For now just take the whole cell as value.
+                        # TODO parse out the cell array of metaclass literals.
+                        val = "{" + rhs_tok.content + "}"
+                        idx += 1
+                    elif rhs_tok.token == "constant.language.boolean.matlab":
+                        val = rhs_tok.content
+                        idx += 1
+                    elif rhs_tok.token == "storage.modifier.access.matlab":
+                        val = rhs_tok.content
+                        idx += 1
+                    elif rhs_tok.token == "keyword.operator.other.question.matlab":
+                        idx += 1
+                        metaclass_tok = children[idx]
+                        metaclass_components = metaclass_tok.findall(
+                            tokens=[
+                                "entity.name.namespace.matlab",
+                                "entity.other.class.matlab",
+                            ]
+                        )
+                        val = tuple([comp.content for comp, _ in metaclass_components])
+                    else:
+                        pass
+                attrs[attr] = val
+            else:  # Comma or continuation therefore skip
+                idx += 1
+
+        return attrs
+
 
 if __name__ == "__main__":
-    cls_parse = MatClassParser(rpath)
+    parser = MatlabParser()
+    toks = parser.parse_file(rpath)
+    cls_parse = MatClassParser(toks)
diff --git a/sphinxcontrib/mat_types.py b/sphinxcontrib/mat_types.py
index 18a9b5a..cd7acb9 100644
--- a/sphinxcontrib/mat_types.py
+++ b/sphinxcontrib/mat_types.py
@@ -21,6 +21,8 @@
 from textmate_grammar.parsers.matlab import MatlabParser
 import logging
 from pathlib import Path
+import cProfile
+import pstats
 
 logger = sphinx.util.logging.getLogger("matlab-domain")
 
@@ -434,9 +436,12 @@ def matlabify(objname):
 
             # make a full path out of basedir and objname
             fullpath = os.path.join(MatObject.basedir, objname)  # objname fullpath
-        import pdb
 
-        pdb.set_trace()
+        # Check if path should be ignored
+        for ignore in MatObject.sphinx_env.config.matlab_ignore_dirs:
+            if Path(fullpath).is_relative_to(MatObject.basedir, ignore):
+                return None
+
         logger.debug(
             f"[sphinxcontrib-matlabdomain] matlabify {package=}, {objname=}, {fullpath=}"
         )
@@ -501,14 +506,7 @@ def parse_mfile(mfile, name, path, encoding=None):
 
         full_code = code
 
-        print(mfile)
-
-        # remove the top comment header (if there is one) from the code string
-        # code = mat_parser.remove_comment_header(code)
-        # code = mat_parser.remove_line_continuations(code)
-        # code = mat_parser.fix_function_signatures(code)
-        # TODO: This might not be necessary
-
+        # quiet the textmate grammar logger and parse the file
         logging.getLogger("textmate_grammar").setLevel(logging.ERROR)
         parser = MatlabParser()
         toks = parser.parse_file(mfile)
@@ -1086,7 +1084,9 @@ def __doc__(self):
 
     @property
     def __bases__(self):
-        bases_ = dict.fromkeys(self.bases)  # make copy of bases
+        bases_ = dict.fromkeys(
+            [".".join(base) for base in self.bases]
+        )  # make copy of bases
         class_entity_table = {}
         for name, entity in entities_table.items():
             if isinstance(entity, MatClass) or "@" in name:

From 3c8c18e5f1f3fd5de5f5e28d8a7c732d6503fd99 Mon Sep 17 00:00:00 2001
From: Anton Edvinovich Pozharskiy <apozharski@gmail.com>
Date: Wed, 24 Jul 2024 17:45:51 +0200
Subject: [PATCH 09/45] [skip-ci] some minor changes

---
 sphinxcontrib/mat_textmate_parser.py | 14 ++++++++++++--
 1 file changed, 12 insertions(+), 2 deletions(-)

diff --git a/sphinxcontrib/mat_textmate_parser.py b/sphinxcontrib/mat_textmate_parser.py
index 2b4a4e9..46236ce 100644
--- a/sphinxcontrib/mat_textmate_parser.py
+++ b/sphinxcontrib/mat_textmate_parser.py
@@ -1,7 +1,9 @@
 from textmate_grammar.parsers.matlab import MatlabParser
 import re
 
-rpath = "../../../syscop/software/nosnoc/+nosnoc/Options.m"
+# rpath = "../../../syscop/software/nosnoc/+nosnoc/Options.m"
+
+rpath = "/home/anton/tools/matlabdomain/tests/roots/test_autodoc/target/ClassExample.m"
 
 
 def find_first_child(curr, tok, attr="children"):
@@ -186,6 +188,10 @@ def __init__(self, tokens):
         for section, _ in enumeration_sections:
             self._parse_enum_section(section)
 
+        import pdb
+
+        pdb.set_trace()
+
     def _find_class_docstring(self):
         try:
             possible_comment_tok = self.cls.children[1]
@@ -272,7 +278,11 @@ def _parse_class_attributes(self, attrs_tok):
                 attr = child_tok.content
                 val = None
                 idx += 1  # walk to next token
-                maybe_assign_tok = children[idx]
+                try:  # however we may have walked off the end of the list in which case we exit
+                    maybe_assign_tok = children[idx]
+                except:
+                    self.attrs[attr] = val
+                    break
                 if maybe_assign_tok.token == "keyword.operator.assignment.matlab":
                     idx += 1
                     rhs_tok = children[idx]  # parse right hand side

From bf1e6a26197888a714adcb0d5312571c71a0166d Mon Sep 17 00:00:00 2001
From: Anton Edvinovich Pozharskiy <apozharski@gmail.com>
Date: Wed, 24 Jul 2024 17:47:02 +0200
Subject: [PATCH 10/45] initial work on a tree sitter based parser

---
 sphinxcontrib/mat_tree_sitter_parser.py | 729 ++++++++++++++++++++++++
 1 file changed, 729 insertions(+)
 create mode 100644 sphinxcontrib/mat_tree_sitter_parser.py

diff --git a/sphinxcontrib/mat_tree_sitter_parser.py b/sphinxcontrib/mat_tree_sitter_parser.py
new file mode 100644
index 0000000..e0c039e
--- /dev/null
+++ b/sphinxcontrib/mat_tree_sitter_parser.py
@@ -0,0 +1,729 @@
+import tree_sitter_matlab as tsml
+from tree_sitter import Language, Parser
+import re
+
+# rpath = "../../../syscop/software/nosnoc/+nosnoc/Options.m"
+
+ML_LANG = Language(tsml.language())
+
+rpath = "/home/anton/tools/matlabdomain/tests/test_data/ClassTesting.m"
+
+# QUERIES
+q_classdef = ML_LANG.query(
+    """(class_definition
+    "classdef"
+    (attributes
+    [(attribute) @attrs _]+
+    )?
+    (identifier) @name
+    (superclasses
+        [(property_name) @supers _]+
+    )?
+    ) @class
+"""
+)
+
+q_attributes = ML_LANG.query("""(identifier) @name (_)? @value""")
+
+q_supers = ML_LANG.query("""[(identifier) @secs "."]+ """)
+
+q_properties = ML_LANG.query(
+    """(properties
+    (attributes)? @attrs
+    (property)* @properties
+    ) @prop_block
+"""
+)
+
+q_methods = ML_LANG.query(
+    """(methods
+    (attributes)? @attrs
+    (function_definition)* @methods
+    ) @meth_block
+"""
+)
+
+q_enumerations = ML_LANG.query(
+    """(enumeration
+    (enum)* @enums
+    ) @enum_block
+"""
+)
+
+q_events = ML_LANG.query(
+    """(events
+    (attributes)? @attrs
+    (identifier)* @events
+    ) @event_block
+"""
+)
+
+
+def find_first_child(curr, tok, attr="children"):
+    tok_lst = getattr(curr, attr)
+    ind = [i for i in range(len(tok_lst)) if tok_lst[i].token == tok]
+    if not ind:
+        return (None, None)
+    return (tok_lst[ind[0]], ind[0])
+
+
+def _toks_on_same_line(tok1, tok2):
+    """Note: pass the tokens in order they appear in case of multiline tokens, otherwise this may return incorrect results"""
+    line1 = _get_last_line_of_tok(tok1)
+    line2 = _get_first_line_of_tok(tok2)
+    return line1 == line2
+
+
+def _is_empty_line_between_tok(tok1, tok2):
+    """Note: pass tokens in order they appear"""
+    line1 = _get_last_line_of_tok(tok1)
+    line2 = _get_first_line_of_tok(tok2)
+    return line2 - line1 > 1
+
+
+def _get_first_line_of_tok(tok):
+    return min([loc[0] for loc in tok.characters.keys()])
+
+
+def _get_last_line_of_tok(tok):
+    return max([loc[0] for loc in tok.characters.keys()])
+
+
+class MatFunctionParser:
+    def __init__(self, fun_tok):
+        """Parse Function definition"""
+        # First find the function name
+        name_gen = fun_tok.find(tokens="entity.name.function.matlab")
+        try:
+            name_tok, _ = next(name_gen)
+            self.name = name_tok.content
+        except StopIteration:
+            # TODO correct error here
+            raise Exception("Couldn't find function name")
+
+        # Find outputs and parameters
+        output_gen = fun_tok.find(tokens="variable.parameter.output.matlab")
+        param_gen = fun_tok.find(tokens="variable.parameter.input.matlab")
+
+        self.outputs = {}
+        self.params = {}
+        self.attrs = {}
+
+        for out, _ in output_gen:
+            self.outputs[out.content] = {}
+
+        for param, _ in param_gen:
+            self.params[param.content] = {}
+
+        # find arguments blocks
+        arg_section = None
+        for arg_section, _ in fun_tok.find(tokens="meta.arguments.matlab"):
+            self._parse_argument_section(arg_section)
+
+        fun_decl_gen = fun_tok.find(tokens="meta.function.declaration.matlab")
+        try:
+            fun_decl_tok, _ = next(fun_decl_gen)
+        except StopIteration:
+            raise Exception(
+                "missing function declaration"
+            )  # This cant happen as we'd be missing a function name
+
+        # Now parse for docstring
+        docstring = ""
+        comment_toks = fun_tok.findall(
+            tokens=["comment.line.percentage.matlab", "comment.block.percentage.matlab"]
+        )
+        last_tok = arg_section if arg_section is not None else fun_decl_tok
+
+        for comment_tok, _ in comment_toks:
+            if _is_empty_line_between_tok(last_tok, comment_tok):
+                # If we have non-consecutive tokens quit right away.
+                break
+            elif (
+                not docstring and comment_tok.token == "comment.block.percentage.matlab"
+            ):
+                # If we have no previous docstring lines and a comment block we take
+                # the comment block as the docstring and exit.
+                docstring = comment_tok.content.strip()[
+                    2:-2
+                ].strip()  # [2,-2] strips out block comment delimiters
+                break
+            elif comment_tok.token == "comment.line.percentage.matlab":
+                # keep parsing comments
+                docstring += comment_tok.content[1:] + "\n"
+            else:
+                # we are done.
+                break
+            last_tok = comment_tok
+
+        self.docstring = docstring if docstring else None
+
+    def _parse_argument_section(self, section):
+        modifiers = [
+            mod.content
+            for mod, _ in section.find(tokens="storage.modifier.arguments.matlab")
+        ]
+        arg_def_gen = section.find(tokens="meta.assignment.definition.property.matlab")
+        for arg_def, _ in arg_def_gen:
+            arg_name = arg_def.begin[
+                0
+            ].content  # Get argument name that is being defined
+            self._parse_argument_validation(arg_name, arg_def, modifiers)
+
+    def _parse_argument_validation(self, arg_name, arg, modifiers):
+        # TODO This should be identical to propery validation I think. Refactor
+        # First get the size if found
+        section = self.output if "Output" in modifiers else self.params
+        size_gen = arg.find(tokens="meta.parens.size.matlab", depth=1)
+        try:  # We have a size, therefore parse the comma separated list into tuple
+            size_tok, _ = next(size_gen)
+            size_elem_gen = size_tok.find(
+                tokens=[
+                    "constant.numeric.decimal.matlab",
+                    "keyword.operator.vector.colon.matlab",
+                ],
+                depth=1,
+            )
+            size = tuple([elem[0].content for elem in size_elem_gen])
+            section[arg_name]["size"] = size
+        except StopIteration:
+            pass
+
+        # Now find the type if it exists
+        # TODO this should be mapped to known types (though perhaps as a postprocess)
+        type_gen = arg.find(tokens="storage.type.matlab", depth=1)
+        try:
+            section[arg_name]["type"] = next(type_gen)[0].content
+        except StopIteration:
+            pass
+
+        # Now find list of validators
+        validator_gen = arg.find(tokens="meta.block.validation.matlab", depth=1)
+        try:
+            validator_tok, _ = next(validator_gen)
+            validator_toks = validator_tok.findall(
+                tokens="variable.other.readwrite.matlab", depth=1
+            )  # TODO Probably bug here in MATLAB-Language-grammar
+            section[arg_name]["validators"] = [tok[0].content for tok in validator_toks]
+        except StopIteration:
+            pass
+
+
+class MatClassParser:
+    def __init__(self, tree):
+        # DATA
+        self.name = ""
+        self.supers = []
+        self.attrs = {}
+        self.docstring = ""
+        self.properties = {}
+        self.methods = {}
+        self.enumerations = {}
+
+        self.tree = tree
+
+        # Parse class basics
+        class_matches = q_classdef.matches(tree.root_node)
+        _, class_match = class_matches[0]
+        self.cls = class_match.get("class")
+        self.name = class_match.get("name")
+
+        import pdb
+
+        pdb.set_trace()
+        # Parse class attrs and supers
+        attrs_node = class_match.get("attrs")
+        if attrs_node is not None:
+            attrs_matches = q_attributes.matches(attrs_node)
+            for _, match in attrs_matches:
+                name = match.get("name").text.decode("utf-8")
+                value_node = match.get("value")
+                self.attrs[name] = (
+                    value_node.text.decode("utf-8") if value_node is not None else None
+                )
+
+        supers_node = class_match.get("supers")
+        if supers_node is not None:
+            supers_matches = q_supers.matches(supers_node)
+            for _, match in supers_matches:
+                super_cls = tuple(
+                    [sec.text.decode("utf-8") for sec in match.get("secs")]
+                )
+                self.supers.append(super_cls)
+
+        prop_matches = q_properties.matches(self.cls)
+        method_matches = q_methods.matches(self.cls)
+        enumeration_matches = q_enumerations.matches(self.cls)
+        events_matches = q_events.matches(self.cls)
+
+        self._parse_clsdef()
+        self._find_class_docstring()
+
+        property_sections = self.cls.findall(tokens="meta.properties.matlab", depth=1)
+        method_sections = self.cls.findall(tokens="meta.methods.matlab", depth=1)
+        enumeration_sections = self.cls.findall(tokens="meta.enum.matlab", depth=1)
+
+        for section, _ in property_sections:
+            self._parse_property_section(section)
+
+        for section, _ in method_sections:
+            self._parse_method_section(section)
+
+        for section, _ in enumeration_sections:
+            self._parse_enum_section(section)
+
+    def _find_class_docstring(self):
+        try:
+            possible_comment_tok = self.cls.children[1]
+        except IndexError:
+            return
+
+        if possible_comment_tok.token == "comment.line.percentage.matlab":
+            self._docstring_lines()
+        elif possible_comment_tok.token == "comment.block.percentage.matlab":
+            self.docstring = possible_comment_tok.content.strip()[
+                2:-2
+            ].strip()  # [2,-2] strips out block comment delimiters
+        else:
+            pass
+
+    def _docstring_lines(self):
+        idx = 1
+        cls_children = self.cls.children
+
+        while (
+            idx < len(cls_children)
+            and cls_children[idx].token == "comment.line.percentage.matlab"
+        ):
+            self.docstring += (
+                cls_children[idx].content[1:] + "\n"
+            )  # [1:] strips out percent sign
+            idx += 1
+        self.docstring = self.docstring.strip()
+
+    def _parse_clsdef(self):
+        # Try parsing attrs
+        attrs_tok_gen = self.clsdef.find(tokens="storage.modifier.section.class.matlab")
+        try:
+            attrs_tok, _ = next(attrs_tok_gen)
+            self._parse_class_attributes(attrs_tok)
+        except StopIteration:
+            pass
+
+        # Parse classname
+        classname_tok_gen = self.clsdef.find(tokens="entity.name.type.class.matlab")
+        try:
+            classname_tok, _ = next(classname_tok_gen)
+            self.name = classname_tok.content
+        except StopIteration:
+            print("ClassName not found")  # TODO this is probably fatal
+
+        # Parse interited classes
+        parent_class_toks = self.clsdef.findall(tokens="meta.inherited-class.matlab")
+
+        for parent_class_tok, _ in parent_class_toks:
+            sections = parent_class_tok.findall(
+                tokens=[
+                    "entity.name.namespace.matlab",
+                    "entity.other.inherited-class.matlab",
+                ]
+            )
+            super_cls = tuple([sec.content for sec, _ in sections])
+            self.supers.append(super_cls)
+        # Parse Attributes TODO maybe there is a smarter way to do this?
+        idx = 0
+        while self.clsdef.children[idx].token == "storage.modifier.class.matlab":
+            attr_tok = self.clsdef.children[idx]
+            attr = attr_tok.content
+            val = None  # TODO maybe do some typechecking here or we can assume that you give us valid Matlab
+            idx += 1
+            if attr_tok.token == "keyword.operator.assignment.matlab":  # pull out r.h.s
+                idx += 1
+                val = self.clsdef.children[idx].content
+                idx += 1
+            if (
+                attr_tok.token == "punctuation.separator.modifier.comma.matlab"
+            ):  # skip commas
+                idx += 1
+            self.attrs[attr] = val
+
+    def _parse_class_attributes(self, attrs_tok):
+        # walk down child list and parse manually
+        # TODO perhaps contribute a delimited list find to textmate-grammar-python
+        children = attrs_tok.children
+        idx = 0
+        while idx < len(children):
+            child_tok = children[idx]
+            if child_tok.token == "storage.modifier.class.matlab":
+                attr = child_tok.content
+                val = None
+                idx += 1  # walk to next token
+                try:  # however we may have walked off the end of the list in which case we exit
+                    maybe_assign_tok = children[idx]
+                except:
+                    self.attrs[attr] = val
+                    break
+                if maybe_assign_tok.token == "keyword.operator.assignment.matlab":
+                    idx += 1
+                    rhs_tok = children[idx]  # parse right hand side
+                    if rhs_tok.token == "meta.cell.literal.matlab":
+                        # A cell. For now just take the whole cell as value.
+                        # TODO parse out the cell array of metaclass literals.
+                        val = "{" + rhs_tok.content + "}"
+                        idx += 1
+                    elif rhs_tok.token == "constant.language.boolean.matlab":
+                        val = rhs_tok.content
+                        idx += 1
+                    elif rhs_tok.token == "keyword.operator.other.question.matlab":
+                        idx += 1
+                        metaclass_tok = children[idx]
+                        metaclass_components = metaclass_tok.findall(
+                            tokens=[
+                                "entity.name.namespace.matlab",
+                                "entity.other.class.matlab",
+                            ]
+                        )
+                        val = tuple([comp.content for comp, _ in metaclass_components])
+                    else:
+                        pass
+                self.attrs[attr] = val
+            else:  # Comma or continuation therefore skip
+                idx += 1
+
+    def _parse_property_section(self, section):
+        # TODO parse property section attrs
+        attrs = self._parse_attributes(section)
+        idxs = [
+            i
+            for i in range(len(section.children))
+            if section.children[i].token == "meta.assignment.definition.property.matlab"
+        ]
+        for idx in idxs:
+            prop_tok = section.children[idx]
+            prop_name = prop_tok.begin[0].content
+            self.properties[prop_name] = {"attrs": attrs}  # Create entry for property
+            self._parse_property_validation(
+                prop_name, prop_tok
+            )  # Parse property validation.
+
+            # Try to find a default assignment:
+            default = None
+            _, assgn_idx = find_first_child(
+                prop_tok, "keyword.operator.assignment.matlab", attr="end"
+            )
+            if assgn_idx is not None:
+                default = ""
+                assgn_idx += 1  # skip assignment
+                while assgn_idx < len(prop_tok.end):
+                    tok = prop_tok.end[assgn_idx]
+                    assgn_idx += 1
+                    if tok.token in [
+                        "comment.line.percentage.matlab",
+                        "punctuation.terminator.semicolon.matlab",
+                    ]:
+                        break
+                    default += tok.content
+            self.properties[prop_name]["default"] = default
+
+            # Get inline docstring
+            inline_docstring_gen = prop_tok.find(
+                tokens="comment.line.percentage.matlab", attribute="end"
+            )
+            try:
+                inline_docstring_tok, _ = next(inline_docstring_gen)
+                inline_docstring = inline_docstring_tok.content[
+                    1:
+                ]  # strip leading % sign
+            except StopIteration:
+                inline_docstring = None
+
+            # Walk backwards to get preceding docstring.
+            preceding_docstring = ""
+            walk_back_idx = idx - 1
+            next_tok = prop_tok
+            while walk_back_idx >= 0:
+                walk_tok = section.children[walk_back_idx]
+                if _is_empty_line_between_tok(walk_tok, next_tok):
+                    # Once there is an empty line between consecutive tokens we are done.
+                    break
+
+                if (
+                    not preceding_docstring
+                    and walk_tok.token == "comment.block.percentage.matlab"
+                ):
+                    # block comment immediately preceding enum so we are done.
+                    # TODO we might need to do some postprocessing here to handle indents gracefully
+                    preceding_docstring = walk_tok.content.strip()[2:-2]
+                    break
+                elif walk_tok.token == "comment.line.percentage.matlab":
+                    preceding_docstring = (
+                        walk_tok.content[1:] + "\n" + preceding_docstring
+                    )  # [1:] strips %
+                    walk_back_idx -= 1
+                    next_tok = walk_tok
+                elif walk_tok.token == "punctuation.whitespace.comment.leading.matlab":
+                    walk_back_idx -= 1
+                    # Dont update next_tok for whitespace
+                else:
+                    break
+
+            # Walk forwards to get following docstring or inline one.
+            following_docstring = ""
+            walk_fwd_idx = idx + 1
+            prev_tok = prop_tok
+            while walk_fwd_idx < len(section.children):
+                walk_tok = section.children[walk_fwd_idx]
+
+                if _is_empty_line_between_tok(prev_tok, walk_tok):
+                    # Once there is an empty line between consecutive tokens we are done.
+                    break
+
+                if (
+                    not following_docstring
+                    and walk_tok.token == "comment.block.percentage.matlab"
+                ):
+                    # block comment immediately following enum so we are done.
+                    # TODO we might need to do some postprocessing here to handle indents gracefully
+                    following_docstring = walk_tok.content.strip()[2:-2]
+                    break
+                elif walk_tok.token == "comment.line.percentage.matlab":
+                    following_docstring = (
+                        following_docstring + "\n" + walk_tok.content[1:]
+                    )  # [1:] strips %
+                    walk_fwd_idx += 1
+                    prev_tok = walk_tok
+                elif walk_tok.token == "punctuation.whitespace.comment.leading.matlab":
+                    walk_fwd_idx += 1
+                    # Dont update prev_tok for whitespace
+                else:
+                    break
+
+            if preceding_docstring:
+                self.properties[prop_name]["docstring"] = preceding_docstring.strip()
+            elif inline_docstring:
+                self.properties[prop_name]["docstring"] = inline_docstring.strip()
+            elif following_docstring:
+                self.properties[prop_name]["docstring"] = following_docstring.strip()
+            else:
+                self.properties[prop_name]["docstring"] = None
+
+    def _parse_property_validation(self, prop_name, prop):
+        """Parses property validation syntax"""
+        # First get the szize if found
+        size_gen = prop.find(tokens="meta.parens.size.matlab", depth=1)
+        try:  # We have a size, therefore parse the comma separated list into tuple
+            size_tok, _ = next(size_gen)
+            size_elem_gen = size_tok.find(
+                tokens=[
+                    "constant.numeric.decimal.matlab",
+                    "keyword.operator.vector.colon.matlab",
+                ],
+                depth=1,
+            )
+            size = tuple([elem[0].content for elem in size_elem_gen])
+            self.properties[prop_name]["size"] = size
+        except StopIteration:
+            pass
+
+        # Now find the type if it exists
+        # TODO this should be mapped to known types (though perhaps as a postprocess)
+        type_gen = prop.find(tokens="storage.type.matlab", depth=1)
+        try:
+            self.properties[prop_name]["type"] = next(type_gen)[0].content
+        except StopIteration:
+            pass
+
+        # Now find list of validators
+        validator_gen = prop.find(tokens="meta.block.validation.matlab", depth=1)
+        try:
+            validator_tok, _ = next(validator_gen)
+            validator_toks = validator_tok.findall(
+                tokens=[
+                    "variable.other.readwrite.matlab",
+                    "meta.function-call.parens.matlab",
+                ],
+                depth=1,
+            )  # TODO Probably bug here in MATLAB-Language-grammar
+            self.properties[prop_name]["validators"] = [
+                tok[0].content for tok in validator_toks
+            ]
+        except StopIteration:
+            pass
+
+    def _parse_method_section(self, section):
+        attrs = self._parse_attributes(section)
+        idxs = [
+            i
+            for i in range(len(section.children))
+            if section.children[i].token == "meta.function.matlab"
+        ]
+        for idx in idxs:
+            meth_tok = section.children[idx]
+            parsed_function = MatFunctionParser(meth_tok)
+            self.methods[parsed_function.name] = parsed_function
+            self.methods[parsed_function.name].attrs = attrs
+
+    def _parse_enum_section(self, section):
+        idxs = [
+            i
+            for i in range(len(section.children))
+            if section.children[i].token
+            == "meta.assignment.definition.enummember.matlab"
+        ]
+        for idx in idxs:
+            enum_tok = section.children[idx]
+            next_idx = idx
+            enum_name = enum_tok.children[0].content
+            self.enumerations[enum_name] = {}
+            if (
+                idx + 1 < len(section.children)
+                and section.children[idx + 1].token == "meta.parens.matlab"
+            ):  # Parse out args TODO this should be part of enummember assignment definition
+                args = tuple(
+                    [
+                        arg.content
+                        for arg in section.children[idx + 1].children
+                        if arg.token != "punctuation.separator.comma.matlab"
+                    ]
+                )
+                self.enumerations[enum_name]["args"] = args
+                next_idx += 1
+
+            # Walk backwards to get preceding docstring.
+            preceding_docstring = ""
+            walk_back_idx = idx - 1
+            next_tok = enum_tok
+            while walk_back_idx >= 0:
+                walk_tok = section.children[walk_back_idx]
+                if _is_empty_line_between_tok(walk_tok, next_tok):
+                    # Once there is an empty line between consecutive tokens we are done.
+                    break
+
+                if (
+                    not preceding_docstring
+                    and walk_tok.token == "comment.block.percentage.matlab"
+                ):
+                    # block comment immediately preceding enum so we are done.
+                    # TODO we might need to do some postprocessing here to handle indents gracefully
+                    preceding_docstring = walk_tok.content.strip()[2:-2]
+                    break
+                elif walk_tok.token == "comment.line.percentage.matlab":
+                    preceding_docstring = (
+                        walk_tok.content[1:] + "\n" + preceding_docstring
+                    )  # [1:] strips %
+                    walk_back_idx -= 1
+                    next_tok = walk_tok
+                elif walk_tok.token == "punctuation.whitespace.comment.leading.matlab":
+                    walk_back_idx -= 1
+                    # Dont update next_tok for whitespace
+                else:
+                    break
+
+            # Walk forwards to get following docstring or inline one.
+            inline_docstring = ""
+            following_docstring = ""
+            walk_fwd_idx = next_idx + 1
+            prev_tok = section.children[next_idx]
+            while walk_fwd_idx < len(section.children):
+                walk_tok = section.children[walk_fwd_idx]
+
+                if _is_empty_line_between_tok(prev_tok, walk_tok):
+                    # Once there is an empty line between consecutive tokens we are done.
+                    break
+
+                if (
+                    not following_docstring
+                    and walk_tok.token == "comment.block.percentage.matlab"
+                ):
+                    # block comment immediately following enum so we are done.
+                    # TODO we might need to do some postprocessing here to handle indents gracefully
+                    following_docstring = walk_tok.content.strip()[2:-2]
+                    break
+                elif walk_tok.token == "comment.line.percentage.matlab":
+                    # In the case the comment is on the same line as the end of the enum declaration, take it as inline comment and exit.
+                    if _toks_on_same_line(section.children[idx], walk_tok):
+                        inline_docstring = walk_tok.content[1:]
+                        break
+
+                    following_docstring = (
+                        following_docstring + "\n" + walk_tok.content[1:]
+                    )  # [1:] strips %
+                    walk_fwd_idx += 1
+                    prev_tok = walk_tok
+                elif walk_tok.token == "punctuation.whitespace.comment.leading.matlab":
+                    walk_fwd_idx += 1
+                    # Dont update prev_tok for whitespace
+                else:
+                    break
+
+            if preceding_docstring:
+                self.enumerations[enum_name]["docstring"] = preceding_docstring.strip()
+            elif inline_docstring:
+                self.enumerations[enum_name]["docstring"] = inline_docstring.strip()
+            elif following_docstring:
+                self.enumerations[enum_name]["docstring"] = following_docstring.strip()
+            else:
+                self.enumerations[enum_name]["docstring"] = None
+
+    def _parse_attributes(self, section):
+        # walk down child list and parse manually
+        children = section.begin
+        idx = 1
+        attrs = {}
+        while idx < len(children):
+            child_tok = children[idx]
+            if re.match(
+                "storage.modifier.(properties|methods|events).matlab", child_tok.token
+            ):
+                attr = child_tok.content
+                val = None
+                idx += 1  # walk to next token
+                try:  # however we may have walked off the end of the list in which case we exit
+                    maybe_assign_tok = children[idx]
+                except:
+                    attrs[attr] = val
+                    return attrs
+                if maybe_assign_tok.token == "keyword.operator.assignment.matlab":
+                    idx += 1
+                    rhs_tok = children[idx]  # parse right hand side
+                    if rhs_tok.token == "meta.cell.literal.matlab":
+                        # A cell. For now just take the whole cell as value.
+                        # TODO parse out the cell array of metaclass literals.
+                        val = "{" + rhs_tok.content + "}"
+                        idx += 1
+                    elif rhs_tok.token == "constant.language.boolean.matlab":
+                        val = rhs_tok.content
+                        idx += 1
+                    elif rhs_tok.token == "storage.modifier.access.matlab":
+                        val = rhs_tok.content
+                        idx += 1
+                    elif rhs_tok.token == "keyword.operator.other.question.matlab":
+                        idx += 1
+                        metaclass_tok = children[idx]
+                        metaclass_components = metaclass_tok.findall(
+                            tokens=[
+                                "entity.name.namespace.matlab",
+                                "entity.other.class.matlab",
+                            ]
+                        )
+                        val = tuple([comp.content for comp, _ in metaclass_components])
+                    else:
+                        pass
+                attrs[attr] = val
+            else:  # Comma or continuation therefore skip
+                idx += 1
+
+        return attrs
+
+
+if __name__ == "__main__":
+    parser = Parser(ML_LANG)
+
+    with open(rpath, "rb") as f:
+        data = f.read()
+
+    tree = parser.parse(data)
+    class_parser = MatClassParser(tree)
+    import pdb
+
+    pdb.set_trace()

From 3090fa5d6df922086663b9473081c21ab1382820 Mon Sep 17 00:00:00 2001
From: Anton Edvinovich Pozharskiy <apozharski@gmail.com>
Date: Thu, 25 Jul 2024 17:17:53 +0200
Subject: [PATCH 11/45] nearly finished with tree-sitter implementation

---
 sphinxcontrib/mat_tree_sitter_parser.py | 1078 ++++++++++-------------
 1 file changed, 473 insertions(+), 605 deletions(-)

diff --git a/sphinxcontrib/mat_tree_sitter_parser.py b/sphinxcontrib/mat_tree_sitter_parser.py
index e0c039e..98bdfa1 100644
--- a/sphinxcontrib/mat_tree_sitter_parser.py
+++ b/sphinxcontrib/mat_tree_sitter_parser.py
@@ -11,41 +11,52 @@
 # QUERIES
 q_classdef = ML_LANG.query(
     """(class_definition
+    .
     "classdef"
+    .
     (attributes
-    [(attribute) @attrs _]+
+        [(attribute) @attrs _]+
     )?
+    .
     (identifier) @name
+    .
     (superclasses
         [(property_name) @supers _]+
     )?
+    .
+    (comment)? @docstring
     ) @class
 """
 )
 
-q_attributes = ML_LANG.query("""(identifier) @name (_)? @value""")
+q_attributes = ML_LANG.query("""(attribute (identifier) @name (_)? @value)""")
 
 q_supers = ML_LANG.query("""[(identifier) @secs "."]+ """)
 
 q_properties = ML_LANG.query(
     """(properties
-    (attributes)? @attrs
-    (property)* @properties
+    .
+    (attributes
+        [(attribute) @attrs _]+
+    )?
+    [(property) @properties _]*
     ) @prop_block
 """
 )
 
 q_methods = ML_LANG.query(
     """(methods
-    (attributes)? @attrs
-    (function_definition)* @methods
+    (attributes
+        [(attribute) @attrs _]+
+    )?
+    [(function_definition) @methods _]*
     ) @meth_block
 """
 )
 
 q_enumerations = ML_LANG.query(
     """(enumeration
-    (enum)* @enums
+    [(enum) @enums _]*
     ) @enum_block
 """
 )
@@ -58,155 +69,274 @@
 """
 )
 
+q_property = ML_LANG.query(
+    """
+    (property name: (identifier) @name
+     (dimensions
+         [[(spread_operator) (number)] @dims _]+
+     )?
+     (identifier)? @type
+     (validation_functions
+         [[(identifier) (function_call)] @validation_functions _]+
+     )?
+     (default_value (number))? @default
+     (comment)? @docstring
+    )
+"""
+)
 
-def find_first_child(curr, tok, attr="children"):
-    tok_lst = getattr(curr, attr)
-    ind = [i for i in range(len(tok_lst)) if tok_lst[i].token == tok]
-    if not ind:
-        return (None, None)
-    return (tok_lst[ind[0]], ind[0])
-
+q_enum = ML_LANG.query(
+    """(enum
+    .
+    (identifier) @name
+    [(_) @args _]*
+    )
+"""
+)
 
-def _toks_on_same_line(tok1, tok2):
-    """Note: pass the tokens in order they appear in case of multiline tokens, otherwise this may return incorrect results"""
-    line1 = _get_last_line_of_tok(tok1)
-    line2 = _get_first_line_of_tok(tok2)
-    return line1 == line2
+q_fun = ML_LANG.query(
+    """(function_definition
+    .
+    (function_output
+        [
+            (identifier) @outputs
+            (multioutput_variable
+                [(identifier) @outputs _]+
+            )
+        ]
+    )?
+    .
+    name: (identifier) @name
+    .
+    (function_arguments
+        [(identifier) @params _]*
+    )?
+    .
+    [(arguments_statement) @argblocks _]*
+    .
+    (comment)? @docstring
+    )
+"""
+)
 
+q_argblock = ML_LANG.query(
+    """
+    (arguments_statement
+    .
+    (attributes
+        [(attribute) @attrs _]+
+    )?
+    .
+    [(property) @args _]*
+    )
+"""
+)
 
-def _is_empty_line_between_tok(tok1, tok2):
-    """Note: pass tokens in order they appear"""
-    line1 = _get_last_line_of_tok(tok1)
-    line2 = _get_first_line_of_tok(tok2)
-    return line2 - line1 > 1
+q_arg = ML_LANG.query(
+    """
+    (property name:
+        [
+            (identifier) @name
+            (property_name
+                [(identifier) @name _]+
+            )
+        ]
+     (dimensions
+         [[(spread_operator) (number)] @dims _]+
+     )?
+     (identifier)? @type
+     (validation_functions
+         [[(identifier) (function_call)] @validation_functions _]+
+     )?
+     (default_value (number))? @default
+     (comment)? @docstring
+    )
+"""
+)
 
 
-def _get_first_line_of_tok(tok):
-    return min([loc[0] for loc in tok.characters.keys()])
+re_percent_remove = re.compile(r"^[ \t]*%", flags=re.M)
 
 
-def _get_last_line_of_tok(tok):
-    return max([loc[0] for loc in tok.characters.keys()])
+def process_text_into_docstring(text):
+    docstring = text.decode("utf-8")
+    return re.sub(re_percent_remove, "", docstring)
 
 
 class MatFunctionParser:
-    def __init__(self, fun_tok):
+    def __init__(self, fun_node):
         """Parse Function definition"""
-        # First find the function name
-        name_gen = fun_tok.find(tokens="entity.name.function.matlab")
-        try:
-            name_tok, _ = next(name_gen)
-            self.name = name_tok.content
-        except StopIteration:
-            # TODO correct error here
-            raise Exception("Couldn't find function name")
-
-        # Find outputs and parameters
-        output_gen = fun_tok.find(tokens="variable.parameter.output.matlab")
-        param_gen = fun_tok.find(tokens="variable.parameter.input.matlab")
+        _, fun_match = q_fun.matches(fun_node)[0]
+        self.name = fun_match.get("name").text.decode("utf-8")
 
+        # Get outputs (possibly more than one)
         self.outputs = {}
-        self.params = {}
-        self.attrs = {}
+        output_nodes = fun_match.get("outputs")
+        if output_nodes is not None:
+            outputs = [output.text.decode("utf-8") for output in output_nodes]
+            for output in outputs:
+                self.outputs[output] = {}
 
-        for out, _ in output_gen:
-            self.outputs[out.content] = {}
-
-        for param, _ in param_gen:
-            self.params[param.content] = {}
-
-        # find arguments blocks
-        arg_section = None
-        for arg_section, _ in fun_tok.find(tokens="meta.arguments.matlab"):
-            self._parse_argument_section(arg_section)
-
-        fun_decl_gen = fun_tok.find(tokens="meta.function.declaration.matlab")
-        try:
-            fun_decl_tok, _ = next(fun_decl_gen)
-        except StopIteration:
-            raise Exception(
-                "missing function declaration"
-            )  # This cant happen as we'd be missing a function name
-
-        # Now parse for docstring
-        docstring = ""
-        comment_toks = fun_tok.findall(
-            tokens=["comment.line.percentage.matlab", "comment.block.percentage.matlab"]
-        )
-        last_tok = arg_section if arg_section is not None else fun_decl_tok
-
-        for comment_tok, _ in comment_toks:
-            if _is_empty_line_between_tok(last_tok, comment_tok):
-                # If we have non-consecutive tokens quit right away.
-                break
-            elif (
-                not docstring and comment_tok.token == "comment.block.percentage.matlab"
-            ):
-                # If we have no previous docstring lines and a comment block we take
-                # the comment block as the docstring and exit.
-                docstring = comment_tok.content.strip()[
-                    2:-2
-                ].strip()  # [2,-2] strips out block comment delimiters
-                break
-            elif comment_tok.token == "comment.line.percentage.matlab":
-                # keep parsing comments
-                docstring += comment_tok.content[1:] + "\n"
-            else:
-                # we are done.
-                break
-            last_tok = comment_tok
+        # Get parameters
+        self.params = {}
+        param_nodes = fun_match.get("params")
+        if output_nodes is not None:
+            params = [param.text.decode("utf-8") for param in param_nodes]
+            for param in params:
+                self.params[param] = {}
+
+        # parse out info from argument blocks
+        argblock_nodes = fun_match.get("argblocks")
+        for argblock_node in argblock_nodes:
+            self._parse_argument_section(argblock_node)
+
+        #
+        import pdb
 
-        self.docstring = docstring if docstring else None
+        pdb.set_trace()
 
-    def _parse_argument_section(self, section):
-        modifiers = [
-            mod.content
-            for mod, _ in section.find(tokens="storage.modifier.arguments.matlab")
-        ]
-        arg_def_gen = section.find(tokens="meta.assignment.definition.property.matlab")
-        for arg_def, _ in arg_def_gen:
-            arg_name = arg_def.begin[
-                0
-            ].content  # Get argument name that is being defined
-            self._parse_argument_validation(arg_name, arg_def, modifiers)
-
-    def _parse_argument_validation(self, arg_name, arg, modifiers):
-        # TODO This should be identical to propery validation I think. Refactor
-        # First get the size if found
-        section = self.output if "Output" in modifiers else self.params
-        size_gen = arg.find(tokens="meta.parens.size.matlab", depth=1)
-        try:  # We have a size, therefore parse the comma separated list into tuple
-            size_tok, _ = next(size_gen)
-            size_elem_gen = size_tok.find(
-                tokens=[
-                    "constant.numeric.decimal.matlab",
-                    "keyword.operator.vector.colon.matlab",
-                ],
-                depth=1,
+    def _parse_argument_section(self, argblock_node):
+        _, argblock_match = q_argblock.matches(argblock_node)[0]
+        attrs_nodes = argblock_match.get("attrs")
+        attrs = self._parse_attributes(attrs_nodes)
+
+        arguments = argblock_match.get("args")
+
+        # TODO this is almost identical to property parsing.
+        #      might be a good idea to extract common code here.
+        for arg in arguments:
+            # match property to extract details
+            _, arg_match = q_arg.matches(arg)[0]
+
+            # extract name (this is always available so no need for None check)
+            name = [name.text.decode("utf-8") for name in arg_match.get("name")]
+
+            # extract dims list
+            dims_list = arg_match.get("dims")
+            dims = None
+            if dims_list is not None:
+                dims = tuple([dim.text.decode("utf-8") for dim in dims_list])
+
+            # extract type
+            type_node = arg_match.get("type")
+            typename = type_node.text.decode("utf-8") if type_node is not None else None
+
+            # extract validator functions
+            vf_list = arg_match.get("validator_functions")
+            vfs = None
+            if vf_list is not None:
+                vfs = [vf.text.decode("utf-8") for vf in vf_list]
+
+            # extract default
+            default_node = arg_match.get("default")
+            default = (
+                default_node.text.decode("utf-8") if default_node is not None else None
             )
-            size = tuple([elem[0].content for elem in size_elem_gen])
-            section[arg_name]["size"] = size
-        except StopIteration:
-            pass
-
-        # Now find the type if it exists
-        # TODO this should be mapped to known types (though perhaps as a postprocess)
-        type_gen = arg.find(tokens="storage.type.matlab", depth=1)
-        try:
-            section[arg_name]["type"] = next(type_gen)[0].content
-        except StopIteration:
-            pass
-
-        # Now find list of validators
-        validator_gen = arg.find(tokens="meta.block.validation.matlab", depth=1)
-        try:
-            validator_tok, _ = next(validator_gen)
-            validator_toks = validator_tok.findall(
-                tokens="variable.other.readwrite.matlab", depth=1
-            )  # TODO Probably bug here in MATLAB-Language-grammar
-            section[arg_name]["validators"] = [tok[0].content for tok in validator_toks]
-        except StopIteration:
-            pass
+
+            # extract inline or following docstring if there is no semicolon
+            docstring_node = arg_match.get("docstring")
+            docstring = ""
+            if docstring_node is not None:
+                # tree-sitter-matlab combines inline comments with following
+                # comments which means this requires some relatively ugly
+                # processing, but worth it for the ease of the rest of it.
+                prev_sib = docstring_node.prev_named_sibling
+                if docstring_node.start_point.row == prev_sib.end_point.row:
+                    # if the docstring is on the same line as the end of the definition only take the inline part
+                    docstring = process_text_into_docstring(docstring_node.text)
+                    docstring = docstring.split("\n")[0]
+                elif docstring_node.start_point.row - prev_sib.end_point.row <= 1:
+                    # Otherwise take the whole docstring
+                    docstring = process_text_into_docstring(docstring_node.text)
+
+            # extract inline or following docstring if there _is_ a semicolon.
+            # this is only done if we didn't already find a docstring with the previous approach
+            next_node = arg.next_named_sibling
+            if next_node is None or docstring is not None:
+                # Nothing to be done.
+                pass
+            elif next_node.type == "comment":
+                if next_node.start_point.row == arg.end_point.row:
+                    # if the docstring is on the same line as the end of the definition only take the inline part
+                    docstring = process_text_into_docstring(next_node.text)
+                    docstring = docstring.split("\n")[0]
+                elif next_node.start_point.row - arg.end_point.row <= 1:
+                    # Otherwise take the whole docstring
+                    docstring = process_text_into_docstring(next_node.text)
+
+            # override docstring with prior if exists
+            prev_node = arg.prev_named_sibling
+            if prev_node is None:
+                # Nothing we can do, no previous comment
+                pass
+            elif prev_node.type == "comment":
+                # We have a previous comment if it ends on the previous
+                # line then we set the docstring. We also need to check
+                # if the first line of the comment is the same as a
+                # previous argument.
+                if arg.start_point.row - prev_node.end_point.row <= 1:
+                    ds = process_text_into_docstring(prev_node.text)
+                    prev_arg = prev_node.prev_named_sibling
+                    if prev_arg is not None and prev_arg.type == "property":
+                        if prev_node.start_point.row == prev_arg.end_point.row:
+                            ds = "\n".join(ds.split("\n")[1:])
+                    if ds:
+                        docstring = ds
+                else:
+                    if arg.start_point.row - prev_node.end_point.row <= 1:
+                        docstring = process_text_into_docstring(prev_node.text)
+            elif prev_node.type == "property":
+                # The previous argumentnode may have eaten our comment
+                # check for it a trailing comment. If it is not there
+                # then we stop looking.
+                prev_comment = prev_node.named_children[-1]
+                if prev_comment.type == "comment":
+                    # we now need to check if prev_comment ends on the line
+                    # before ours and trim the first line if it on the same
+                    # line as prev property.
+                    if arg.start_point.row - prev_comment.end_point.row <= 1:
+                        ds = process_text_into_docstring(prev_comment.text)
+                        if (
+                            prev_comment.start_point.row
+                            == prev_comment.prev_named_sibling.end_point.row
+                        ):
+                            ds = "\n".join(ds.split("\n")[1:])
+                        if ds:
+                            docstring = ds
+            # After all that if our docstring is empty then we have none
+            if docstring.strip() == "":
+                docstring == None
+
+            # Here we trust that the person is giving us valid matlab.
+            if "Output" in attrs.keys():
+                arg_loc = self.outputs
+            else:
+                arg_loc = self.params
+            if len(name) == 1:
+                arg_loc[name[0]] = {
+                    "attrs": attrs,
+                    "size": dims,
+                    "type": typename,
+                    "validators": vfs,
+                    "default": default,
+                    "docstring": docstring,
+                }
+            else:
+                # how to handle dotted args
+                pass
+
+    def _parse_attributes(self, attrs_nodes):
+        # TOOD deduplicated this
+        attrs = {}
+        if attrs_nodes is not None:
+            for attr_node in attrs_nodes:
+                _, attr_match = q_attributes.matches(attr_node)[0]
+                name = attr_match.get("name").text.decode("utf-8")
+                value_node = attr_match.get("value")
+                attrs[name] = (
+                    value_node.text.decode("utf-8") if value_node is not None else None
+                )
+        return attrs
 
 
 class MatClassParser:
@@ -228,491 +358,229 @@ def __init__(self, tree):
         self.cls = class_match.get("class")
         self.name = class_match.get("name")
 
-        import pdb
-
-        pdb.set_trace()
         # Parse class attrs and supers
-        attrs_node = class_match.get("attrs")
-        if attrs_node is not None:
-            attrs_matches = q_attributes.matches(attrs_node)
-            for _, match in attrs_matches:
-                name = match.get("name").text.decode("utf-8")
-                value_node = match.get("value")
-                self.attrs[name] = (
-                    value_node.text.decode("utf-8") if value_node is not None else None
-                )
+        attrs_nodes = class_match.get("attrs")
+        self.attrs = self._parse_attributes(attrs_nodes)
 
-        supers_node = class_match.get("supers")
-        if supers_node is not None:
-            supers_matches = q_supers.matches(supers_node)
-            for _, match in supers_matches:
+        supers_nodes = class_match.get("supers")
+        if supers_nodes is not None:
+            for super_node in supers_nodes:
+                _, super_match = q_supers.matches(super_node)[0]
                 super_cls = tuple(
-                    [sec.text.decode("utf-8") for sec in match.get("secs")]
+                    [sec.text.decode("utf-8") for sec in super_match.get("secs")]
                 )
                 self.supers.append(super_cls)
 
+        # get docstring and check that it consecutive
+        docstring_node = class_match.get("docstring")
+        if docstring_node is not None:
+            prev_node = docstring_node.prev_sibling
+            if docstring_node.start_point.row - prev_node.end_point.row <= 1:
+                self.docstring = process_text_into_docstring(docstring_node.text)
+
         prop_matches = q_properties.matches(self.cls)
         method_matches = q_methods.matches(self.cls)
-        enumeration_matches = q_enumerations.matches(self.cls)
+        enum_matches = q_enumerations.matches(self.cls)
         events_matches = q_events.matches(self.cls)
 
-        self._parse_clsdef()
-        self._find_class_docstring()
-
-        property_sections = self.cls.findall(tokens="meta.properties.matlab", depth=1)
-        method_sections = self.cls.findall(tokens="meta.methods.matlab", depth=1)
-        enumeration_sections = self.cls.findall(tokens="meta.enum.matlab", depth=1)
-
-        for section, _ in property_sections:
-            self._parse_property_section(section)
-
-        for section, _ in method_sections:
-            self._parse_method_section(section)
-
-        for section, _ in enumeration_sections:
-            self._parse_enum_section(section)
-
-    def _find_class_docstring(self):
-        try:
-            possible_comment_tok = self.cls.children[1]
-        except IndexError:
-            return
-
-        if possible_comment_tok.token == "comment.line.percentage.matlab":
-            self._docstring_lines()
-        elif possible_comment_tok.token == "comment.block.percentage.matlab":
-            self.docstring = possible_comment_tok.content.strip()[
-                2:-2
-            ].strip()  # [2,-2] strips out block comment delimiters
-        else:
-            pass
-
-    def _docstring_lines(self):
-        idx = 1
-        cls_children = self.cls.children
-
-        while (
-            idx < len(cls_children)
-            and cls_children[idx].token == "comment.line.percentage.matlab"
-        ):
-            self.docstring += (
-                cls_children[idx].content[1:] + "\n"
-            )  # [1:] strips out percent sign
-            idx += 1
-        self.docstring = self.docstring.strip()
-
-    def _parse_clsdef(self):
-        # Try parsing attrs
-        attrs_tok_gen = self.clsdef.find(tokens="storage.modifier.section.class.matlab")
-        try:
-            attrs_tok, _ = next(attrs_tok_gen)
-            self._parse_class_attributes(attrs_tok)
-        except StopIteration:
-            pass
-
-        # Parse classname
-        classname_tok_gen = self.clsdef.find(tokens="entity.name.type.class.matlab")
-        try:
-            classname_tok, _ = next(classname_tok_gen)
-            self.name = classname_tok.content
-        except StopIteration:
-            print("ClassName not found")  # TODO this is probably fatal
-
-        # Parse interited classes
-        parent_class_toks = self.clsdef.findall(tokens="meta.inherited-class.matlab")
-
-        for parent_class_tok, _ in parent_class_toks:
-            sections = parent_class_tok.findall(
-                tokens=[
-                    "entity.name.namespace.matlab",
-                    "entity.other.inherited-class.matlab",
-                ]
-            )
-            super_cls = tuple([sec.content for sec, _ in sections])
-            self.supers.append(super_cls)
-        # Parse Attributes TODO maybe there is a smarter way to do this?
-        idx = 0
-        while self.clsdef.children[idx].token == "storage.modifier.class.matlab":
-            attr_tok = self.clsdef.children[idx]
-            attr = attr_tok.content
-            val = None  # TODO maybe do some typechecking here or we can assume that you give us valid Matlab
-            idx += 1
-            if attr_tok.token == "keyword.operator.assignment.matlab":  # pull out r.h.s
-                idx += 1
-                val = self.clsdef.children[idx].content
-                idx += 1
-            if (
-                attr_tok.token == "punctuation.separator.modifier.comma.matlab"
-            ):  # skip commas
-                idx += 1
-            self.attrs[attr] = val
-
-    def _parse_class_attributes(self, attrs_tok):
-        # walk down child list and parse manually
-        # TODO perhaps contribute a delimited list find to textmate-grammar-python
-        children = attrs_tok.children
-        idx = 0
-        while idx < len(children):
-            child_tok = children[idx]
-            if child_tok.token == "storage.modifier.class.matlab":
-                attr = child_tok.content
-                val = None
-                idx += 1  # walk to next token
-                try:  # however we may have walked off the end of the list in which case we exit
-                    maybe_assign_tok = children[idx]
-                except:
-                    self.attrs[attr] = val
-                    break
-                if maybe_assign_tok.token == "keyword.operator.assignment.matlab":
-                    idx += 1
-                    rhs_tok = children[idx]  # parse right hand side
-                    if rhs_tok.token == "meta.cell.literal.matlab":
-                        # A cell. For now just take the whole cell as value.
-                        # TODO parse out the cell array of metaclass literals.
-                        val = "{" + rhs_tok.content + "}"
-                        idx += 1
-                    elif rhs_tok.token == "constant.language.boolean.matlab":
-                        val = rhs_tok.content
-                        idx += 1
-                    elif rhs_tok.token == "keyword.operator.other.question.matlab":
-                        idx += 1
-                        metaclass_tok = children[idx]
-                        metaclass_components = metaclass_tok.findall(
-                            tokens=[
-                                "entity.name.namespace.matlab",
-                                "entity.other.class.matlab",
-                            ]
-                        )
-                        val = tuple([comp.content for comp, _ in metaclass_components])
-                    else:
-                        pass
-                self.attrs[attr] = val
-            else:  # Comma or continuation therefore skip
-                idx += 1
-
-    def _parse_property_section(self, section):
-        # TODO parse property section attrs
-        attrs = self._parse_attributes(section)
-        idxs = [
-            i
-            for i in range(len(section.children))
-            if section.children[i].token == "meta.assignment.definition.property.matlab"
-        ]
-        for idx in idxs:
-            prop_tok = section.children[idx]
-            prop_name = prop_tok.begin[0].content
-            self.properties[prop_name] = {"attrs": attrs}  # Create entry for property
-            self._parse_property_validation(
-                prop_name, prop_tok
-            )  # Parse property validation.
-
-            # Try to find a default assignment:
-            default = None
-            _, assgn_idx = find_first_child(
-                prop_tok, "keyword.operator.assignment.matlab", attr="end"
-            )
-            if assgn_idx is not None:
-                default = ""
-                assgn_idx += 1  # skip assignment
-                while assgn_idx < len(prop_tok.end):
-                    tok = prop_tok.end[assgn_idx]
-                    assgn_idx += 1
-                    if tok.token in [
-                        "comment.line.percentage.matlab",
-                        "punctuation.terminator.semicolon.matlab",
-                    ]:
-                        break
-                    default += tok.content
-            self.properties[prop_name]["default"] = default
-
-            # Get inline docstring
-            inline_docstring_gen = prop_tok.find(
-                tokens="comment.line.percentage.matlab", attribute="end"
+        for _, prop_match in prop_matches:
+            self._parse_property_section(prop_match)
+        for _, enum_match in enum_matches:
+            self._parse_enum_section(enum_match)
+        for _, method_match in method_matches:
+            self._parse_method_section(method_match)
+        import pdb
+
+        pdb.set_trace()
+
+    def _parse_property_section(self, props_match):
+        # extract property section attributes
+        attrs_nodes = props_match.get("attrs")
+        attrs = self._parse_attributes(attrs_nodes)
+
+        properties = props_match.get("properties")
+
+        for prop in properties:
+            # match property to extract details
+            _, prop_match = q_property.matches(prop)[0]
+
+            # extract name (this is always available so no need for None check)
+            name = prop_match.get("name").text.decode("utf-8")
+
+            # extract dims list
+            dims_list = prop_match.get("dims")
+            dims = None
+            if dims_list is not None:
+                dims = tuple([dim.text.decode("utf-8") for dim in dims_list])
+
+            # extract type
+            type_node = prop_match.get("type")
+            typename = type_node.text.decode("utf-8") if type_node is not None else None
+
+            # extract validator functions
+            vf_list = prop_match.get("validator_functions")
+            vfs = None
+            if vf_list is not None:
+                vfs = [vf.text.decode("utf-8") for vf in vf_list]
+
+            # extract default
+            default_node = prop_match.get("default")
+            default = (
+                default_node.text.decode("utf-8") if default_node is not None else None
             )
-            try:
-                inline_docstring_tok, _ = next(inline_docstring_gen)
-                inline_docstring = inline_docstring_tok.content[
-                    1:
-                ]  # strip leading % sign
-            except StopIteration:
-                inline_docstring = None
-
-            # Walk backwards to get preceding docstring.
-            preceding_docstring = ""
-            walk_back_idx = idx - 1
-            next_tok = prop_tok
-            while walk_back_idx >= 0:
-                walk_tok = section.children[walk_back_idx]
-                if _is_empty_line_between_tok(walk_tok, next_tok):
-                    # Once there is an empty line between consecutive tokens we are done.
-                    break
-
-                if (
-                    not preceding_docstring
-                    and walk_tok.token == "comment.block.percentage.matlab"
-                ):
-                    # block comment immediately preceding enum so we are done.
-                    # TODO we might need to do some postprocessing here to handle indents gracefully
-                    preceding_docstring = walk_tok.content.strip()[2:-2]
-                    break
-                elif walk_tok.token == "comment.line.percentage.matlab":
-                    preceding_docstring = (
-                        walk_tok.content[1:] + "\n" + preceding_docstring
-                    )  # [1:] strips %
-                    walk_back_idx -= 1
-                    next_tok = walk_tok
-                elif walk_tok.token == "punctuation.whitespace.comment.leading.matlab":
-                    walk_back_idx -= 1
-                    # Dont update next_tok for whitespace
-                else:
-                    break
-
-            # Walk forwards to get following docstring or inline one.
-            following_docstring = ""
-            walk_fwd_idx = idx + 1
-            prev_tok = prop_tok
-            while walk_fwd_idx < len(section.children):
-                walk_tok = section.children[walk_fwd_idx]
-
-                if _is_empty_line_between_tok(prev_tok, walk_tok):
-                    # Once there is an empty line between consecutive tokens we are done.
-                    break
-
-                if (
-                    not following_docstring
-                    and walk_tok.token == "comment.block.percentage.matlab"
-                ):
-                    # block comment immediately following enum so we are done.
-                    # TODO we might need to do some postprocessing here to handle indents gracefully
-                    following_docstring = walk_tok.content.strip()[2:-2]
-                    break
-                elif walk_tok.token == "comment.line.percentage.matlab":
-                    following_docstring = (
-                        following_docstring + "\n" + walk_tok.content[1:]
-                    )  # [1:] strips %
-                    walk_fwd_idx += 1
-                    prev_tok = walk_tok
-                elif walk_tok.token == "punctuation.whitespace.comment.leading.matlab":
-                    walk_fwd_idx += 1
-                    # Dont update prev_tok for whitespace
+
+            # extract inline or following docstring if there is no semicolon
+            docstring_node = prop_match.get("docstring")
+            docstring = ""
+            if docstring_node is not None:
+                # tree-sitter-matlab combines inline comments with following
+                # comments which means this requires some relatively ugly
+                # processing, but worth it for the ease of the rest of it.
+                prev_sib = docstring_node.prev_named_sibling
+                if docstring_node.start_point.row == prev_sib.end_point.row:
+                    # if the docstring is on the same line as the end of the definition only take the inline part
+                    docstring = process_text_into_docstring(docstring_node.text)
+                    docstring = docstring.split("\n")[0]
+                elif docstring_node.start_point.row - prev_sib.end_point.row <= 1:
+                    # Otherwise take the whole docstring
+                    docstring = process_text_into_docstring(docstring_node.text)
+
+            # extract inline or following docstring if there _is_ a semicolon.
+            # this is only done if we didn't already find a docstring with the previous approach
+            next_node = prop.next_named_sibling
+            if next_node is None or docstring is not None:
+                # Nothing to be done.
+                pass
+            elif next_node.type == "comment":
+                if next_node.start_point.row == prop.end_point.row:
+                    # if the docstring is on the same line as the end of the definition only take the inline part
+                    docstring = process_text_into_docstring(next_node.text)
+                    docstring = docstring.split("\n")[0]
+                elif next_node.start_point.row - prop.end_point.row <= 1:
+                    # Otherwise take the whole docstring
+                    docstring = process_text_into_docstring(next_node.text)
+
+            # override docstring with prior if exists
+            prev_node = prop.prev_named_sibling
+            if prev_node is None:
+                # Nothing we can do, no previous comment
+                pass
+            elif prev_node.type == "comment":
+                # We have a previous comment if it ends on the previous
+                # line then we set the docstring. We also need to check
+                # if the first line of the comment is the same as a
+                # previous property.
+                if prop.start_point.row - prev_node.end_point.row <= 1:
+                    ds = process_text_into_docstring(prev_node.text)
+                    prev_prop = prev_node.prev_named_sibling
+                    if prev_prop is not None and prev_prop.type == "property":
+                        if prev_node.start_point.row == prev_prop.end_point.row:
+                            ds = "\n".join(ds.split("\n")[1:])
+                    if ds:
+                        docstring = ds
                 else:
-                    break
-
-            if preceding_docstring:
-                self.properties[prop_name]["docstring"] = preceding_docstring.strip()
-            elif inline_docstring:
-                self.properties[prop_name]["docstring"] = inline_docstring.strip()
-            elif following_docstring:
-                self.properties[prop_name]["docstring"] = following_docstring.strip()
-            else:
-                self.properties[prop_name]["docstring"] = None
-
-    def _parse_property_validation(self, prop_name, prop):
-        """Parses property validation syntax"""
-        # First get the szize if found
-        size_gen = prop.find(tokens="meta.parens.size.matlab", depth=1)
-        try:  # We have a size, therefore parse the comma separated list into tuple
-            size_tok, _ = next(size_gen)
-            size_elem_gen = size_tok.find(
-                tokens=[
-                    "constant.numeric.decimal.matlab",
-                    "keyword.operator.vector.colon.matlab",
-                ],
-                depth=1,
-            )
-            size = tuple([elem[0].content for elem in size_elem_gen])
-            self.properties[prop_name]["size"] = size
-        except StopIteration:
-            pass
-
-        # Now find the type if it exists
-        # TODO this should be mapped to known types (though perhaps as a postprocess)
-        type_gen = prop.find(tokens="storage.type.matlab", depth=1)
-        try:
-            self.properties[prop_name]["type"] = next(type_gen)[0].content
-        except StopIteration:
-            pass
-
-        # Now find list of validators
-        validator_gen = prop.find(tokens="meta.block.validation.matlab", depth=1)
-        try:
-            validator_tok, _ = next(validator_gen)
-            validator_toks = validator_tok.findall(
-                tokens=[
-                    "variable.other.readwrite.matlab",
-                    "meta.function-call.parens.matlab",
-                ],
-                depth=1,
-            )  # TODO Probably bug here in MATLAB-Language-grammar
-            self.properties[prop_name]["validators"] = [
-                tok[0].content for tok in validator_toks
-            ]
-        except StopIteration:
-            pass
-
-    def _parse_method_section(self, section):
-        attrs = self._parse_attributes(section)
-        idxs = [
-            i
-            for i in range(len(section.children))
-            if section.children[i].token == "meta.function.matlab"
-        ]
-        for idx in idxs:
-            meth_tok = section.children[idx]
-            parsed_function = MatFunctionParser(meth_tok)
+                    if prop.start_point.row - prev_node.end_point.row <= 1:
+                        docstring = process_text_into_docstring(prev_node.text)
+            elif prev_node.type == "property":
+                # The previous property node may have eaten our comment
+                # check for it a trailing comment. If it is not there
+                # then we stop looking.
+                prev_comment = prev_node.named_children[-1]
+                if prev_comment.type == "comment":
+                    # we now need to check if prev_comment ends on the line
+                    # before ours and trim the first line if it on the same
+                    # line as prev property.
+                    if prop.start_point.row - prev_comment.end_point.row <= 1:
+                        ds = process_text_into_docstring(prev_comment.text)
+                        if (
+                            prev_comment.start_point.row
+                            == prev_comment.prev_named_sibling.end_point.row
+                        ):
+                            ds = "\n".join(ds.split("\n")[1:])
+                        if ds:
+                            docstring = ds
+            # After all that if our docstring is empty then we have none
+            if docstring.strip() == "":
+                docstring == None
+
+            self.properties[name] = {
+                "attrs": attrs,
+                "size": dims,
+                "type": typename,
+                "validators": vfs,
+                "default": default,
+                "docstring": docstring,
+            }
+
+    def _parse_method_section(self, methods_match):
+        attrs_nodes = methods_match.get("attrs")
+        attrs = self._parse_attributes(attrs_nodes)
+        methods = methods_match.get("methods")
+        for method in methods:
+            parsed_function = MatFunctionParser(method)
             self.methods[parsed_function.name] = parsed_function
             self.methods[parsed_function.name].attrs = attrs
 
-    def _parse_enum_section(self, section):
-        idxs = [
-            i
-            for i in range(len(section.children))
-            if section.children[i].token
-            == "meta.assignment.definition.enummember.matlab"
-        ]
-        for idx in idxs:
-            enum_tok = section.children[idx]
-            next_idx = idx
-            enum_name = enum_tok.children[0].content
-            self.enumerations[enum_name] = {}
-            if (
-                idx + 1 < len(section.children)
-                and section.children[idx + 1].token == "meta.parens.matlab"
-            ):  # Parse out args TODO this should be part of enummember assignment definition
-                args = tuple(
-                    [
-                        arg.content
-                        for arg in section.children[idx + 1].children
-                        if arg.token != "punctuation.separator.comma.matlab"
-                    ]
-                )
-                self.enumerations[enum_name]["args"] = args
-                next_idx += 1
-
-            # Walk backwards to get preceding docstring.
-            preceding_docstring = ""
-            walk_back_idx = idx - 1
-            next_tok = enum_tok
-            while walk_back_idx >= 0:
-                walk_tok = section.children[walk_back_idx]
-                if _is_empty_line_between_tok(walk_tok, next_tok):
-                    # Once there is an empty line between consecutive tokens we are done.
-                    break
-
-                if (
-                    not preceding_docstring
-                    and walk_tok.token == "comment.block.percentage.matlab"
-                ):
-                    # block comment immediately preceding enum so we are done.
-                    # TODO we might need to do some postprocessing here to handle indents gracefully
-                    preceding_docstring = walk_tok.content.strip()[2:-2]
-                    break
-                elif walk_tok.token == "comment.line.percentage.matlab":
-                    preceding_docstring = (
-                        walk_tok.content[1:] + "\n" + preceding_docstring
-                    )  # [1:] strips %
-                    walk_back_idx -= 1
-                    next_tok = walk_tok
-                elif walk_tok.token == "punctuation.whitespace.comment.leading.matlab":
-                    walk_back_idx -= 1
-                    # Dont update next_tok for whitespace
-                else:
-                    break
-
-            # Walk forwards to get following docstring or inline one.
-            inline_docstring = ""
-            following_docstring = ""
-            walk_fwd_idx = next_idx + 1
-            prev_tok = section.children[next_idx]
-            while walk_fwd_idx < len(section.children):
-                walk_tok = section.children[walk_fwd_idx]
-
-                if _is_empty_line_between_tok(prev_tok, walk_tok):
-                    # Once there is an empty line between consecutive tokens we are done.
-                    break
-
-                if (
-                    not following_docstring
-                    and walk_tok.token == "comment.block.percentage.matlab"
-                ):
-                    # block comment immediately following enum so we are done.
-                    # TODO we might need to do some postprocessing here to handle indents gracefully
-                    following_docstring = walk_tok.content.strip()[2:-2]
-                    break
-                elif walk_tok.token == "comment.line.percentage.matlab":
-                    # In the case the comment is on the same line as the end of the enum declaration, take it as inline comment and exit.
-                    if _toks_on_same_line(section.children[idx], walk_tok):
-                        inline_docstring = walk_tok.content[1:]
-                        break
-
-                    following_docstring = (
-                        following_docstring + "\n" + walk_tok.content[1:]
-                    )  # [1:] strips %
-                    walk_fwd_idx += 1
-                    prev_tok = walk_tok
-                elif walk_tok.token == "punctuation.whitespace.comment.leading.matlab":
-                    walk_fwd_idx += 1
-                    # Dont update prev_tok for whitespace
-                else:
-                    break
-
-            if preceding_docstring:
-                self.enumerations[enum_name]["docstring"] = preceding_docstring.strip()
-            elif inline_docstring:
-                self.enumerations[enum_name]["docstring"] = inline_docstring.strip()
-            elif following_docstring:
-                self.enumerations[enum_name]["docstring"] = following_docstring.strip()
+    def _parse_enum_section(self, enums_match):
+        enums = enums_match.get("enums")
+        for enum in enums:
+            _, enum_match = q_enum.matches(enum)[0]
+            name = enum_match.get("name").text.decode("utf-8")
+            arg_nodes = enum_match.get("args")
+            if arg_nodes is not None:
+                args = [arg.text.decode("utf-8") for arg in arg_nodes]
             else:
-                self.enumerations[enum_name]["docstring"] = None
+                args = None
+
+            docstring = ""
+            # look forward for docstring
+            next_node = enum.next_named_sibling
+            if next_node is not None and next_node.type == "comment":
+                if next_node.start_point.row == enum.end_point.row:
+                    # if the docstring is on the same line as the end of the definition only take the inline part
+                    docstring = process_text_into_docstring(next_node.text)
+                    docstring = docstring.split("\n")[0]
+                elif next_node.start_point.row - enum.end_point.row <= 1:
+                    # Otherwise take the whole docstring
+                    docstring = process_text_into_docstring(next_node.text)
+
+            # override docstring with prior if exists
+            prev_node = enum.prev_named_sibling
+            if prev_node is None:
+                # Nothing we can do, no previous comment
+                pass
+            elif prev_node.type == "comment":
+                # We have a previous comment if it ends on the previous
+                # line then we set the docstring. We also need to check
+                # if the first line of the comment is the same as a
+                # previous enum.
+                if enum.start_point.row - prev_node.end_point.row <= 1:
+                    ds = process_text_into_docstring(prev_node.text)
+                    prev_enum = prev_node.prev_named_sibling
+                    if prev_enum is not None and prev_enum.type == "enum":
+                        if prev_node.start_point.row == prev_enum.end_point.row:
+                            ds = "\n".join(ds.split("\n")[1:])
+                    if ds:
+                        docstring = ds
+                else:
+                    if enum.start_point.row - prev_node.end_point.row <= 1:
+                        docstring = process_text_into_docstring(prev_node.text)
+            # After all that if our docstring is empty then we have none
+            if docstring.strip() == "":
+                docstring == None
 
-    def _parse_attributes(self, section):
-        # walk down child list and parse manually
-        children = section.begin
-        idx = 1
-        attrs = {}
-        while idx < len(children):
-            child_tok = children[idx]
-            if re.match(
-                "storage.modifier.(properties|methods|events).matlab", child_tok.token
-            ):
-                attr = child_tok.content
-                val = None
-                idx += 1  # walk to next token
-                try:  # however we may have walked off the end of the list in which case we exit
-                    maybe_assign_tok = children[idx]
-                except:
-                    attrs[attr] = val
-                    return attrs
-                if maybe_assign_tok.token == "keyword.operator.assignment.matlab":
-                    idx += 1
-                    rhs_tok = children[idx]  # parse right hand side
-                    if rhs_tok.token == "meta.cell.literal.matlab":
-                        # A cell. For now just take the whole cell as value.
-                        # TODO parse out the cell array of metaclass literals.
-                        val = "{" + rhs_tok.content + "}"
-                        idx += 1
-                    elif rhs_tok.token == "constant.language.boolean.matlab":
-                        val = rhs_tok.content
-                        idx += 1
-                    elif rhs_tok.token == "storage.modifier.access.matlab":
-                        val = rhs_tok.content
-                        idx += 1
-                    elif rhs_tok.token == "keyword.operator.other.question.matlab":
-                        idx += 1
-                        metaclass_tok = children[idx]
-                        metaclass_components = metaclass_tok.findall(
-                            tokens=[
-                                "entity.name.namespace.matlab",
-                                "entity.other.class.matlab",
-                            ]
-                        )
-                        val = tuple([comp.content for comp, _ in metaclass_components])
-                    else:
-                        pass
-                attrs[attr] = val
-            else:  # Comma or continuation therefore skip
-                idx += 1
+            self.enumerations[name] = {"args": args, "docstring": docstring}
 
+    def _parse_attributes(self, attrs_nodes):
+        attrs = {}
+        if attrs_nodes is not None:
+            for attr_node in attrs_nodes:
+                _, attr_match = q_attributes.matches(attr_node)[0]
+                name = attr_match.get("name").text.decode("utf-8")
+                value_node = attr_match.get("value")
+                attrs[name] = (
+                    value_node.text.decode("utf-8") if value_node is not None else None
+                )
         return attrs
 
 

From b0d603ee49984e30e304dd1173ff51ea6ef04284 Mon Sep 17 00:00:00 2001
From: Anton Pozharskiy <apozharski@gmail.com>
Date: Sat, 27 Jul 2024 10:57:10 +0200
Subject: [PATCH 12/45] everything but events working

---
 sphinxcontrib/mat_tree_sitter_parser.py | 48 ++++++++++++++-----------
 1 file changed, 28 insertions(+), 20 deletions(-)

diff --git a/sphinxcontrib/mat_tree_sitter_parser.py b/sphinxcontrib/mat_tree_sitter_parser.py
index 98bdfa1..b277c8c 100644
--- a/sphinxcontrib/mat_tree_sitter_parser.py
+++ b/sphinxcontrib/mat_tree_sitter_parser.py
@@ -2,12 +2,11 @@
 from tree_sitter import Language, Parser
 import re
 
-# rpath = "../../../syscop/software/nosnoc/+nosnoc/Options.m"
+rpath = "../../../syscop/software/nosnoc/+nosnoc/Options.m"
+# rpath = "/home/anton/tools/matlabdomain/tests/test_data/ClassTesting.m"
 
 ML_LANG = Language(tsml.language())
 
-rpath = "/home/anton/tools/matlabdomain/tests/test_data/ClassTesting.m"
-
 # QUERIES
 q_classdef = ML_LANG.query(
     """(class_definition
@@ -155,13 +154,16 @@
 )
 
 
-re_percent_remove = re.compile(r"^[ \t]*%", flags=re.M)
-
+re_percent_remove = re.compile(r"^[ \t]*% ?", flags=re.M)
+re_assign_remove = re.compile(r"^=[ \t]*")
 
 def process_text_into_docstring(text):
     docstring = text.decode("utf-8")
     return re.sub(re_percent_remove, "", docstring)
 
+def process_default(text):
+    default = text.decode("utf-8")
+    return re.sub(re_assign_remove, "", default)
 
 class MatFunctionParser:
     def __init__(self, fun_node):
@@ -180,20 +182,29 @@ def __init__(self, fun_node):
         # Get parameters
         self.params = {}
         param_nodes = fun_match.get("params")
-        if output_nodes is not None:
+        if param_nodes is not None:
             params = [param.text.decode("utf-8") for param in param_nodes]
             for param in params:
                 self.params[param] = {}
 
         # parse out info from argument blocks
         argblock_nodes = fun_match.get("argblocks")
-        for argblock_node in argblock_nodes:
-            self._parse_argument_section(argblock_node)
+        if argblock_nodes is not None:
+            for argblock_node in argblock_nodes:
+                self._parse_argument_section(argblock_node)
 
-        #
-        import pdb
+        # get docstring
+        docstring_node = fun_match.get("docstring")
+        docstring = None
+        if docstring_node is not None:
+            prev_sib = docstring_node.prev_named_sibling
+            if docstring_node.start_point.row - prev_sib.end_point.row <= 1:
+                docstring = process_text_into_docstring(docstring_node.text)
 
-        pdb.set_trace()
+        if not docstring:
+            docstring = None
+        self.docstring = docstring
+        
 
     def _parse_argument_section(self, argblock_node):
         _, argblock_match = q_argblock.matches(argblock_node)[0]
@@ -230,7 +241,7 @@ def _parse_argument_section(self, argblock_node):
             # extract default
             default_node = arg_match.get("default")
             default = (
-                default_node.text.decode("utf-8") if default_node is not None else None
+                process_default(default_node.text) if default_node is not None else None
             )
 
             # extract inline or following docstring if there is no semicolon
@@ -304,8 +315,8 @@ def _parse_argument_section(self, argblock_node):
                         if ds:
                             docstring = ds
             # After all that if our docstring is empty then we have none
-            if docstring.strip() == "":
-                docstring == None
+            if not docstring.strip():
+                docstring = None
 
             # Here we trust that the person is giving us valid matlab.
             if "Output" in attrs.keys():
@@ -426,7 +437,7 @@ def _parse_property_section(self, props_match):
             # extract default
             default_node = prop_match.get("default")
             default = (
-                default_node.text.decode("utf-8") if default_node is not None else None
+                process_default(default_node.text) if default_node is not None else None
             )
 
             # extract inline or following docstring if there is no semicolon
@@ -500,8 +511,8 @@ def _parse_property_section(self, props_match):
                         if ds:
                             docstring = ds
             # After all that if our docstring is empty then we have none
-            if docstring.strip() == "":
-                docstring == None
+            if not docstring.strip():
+                docstring = None
 
             self.properties[name] = {
                 "attrs": attrs,
@@ -592,6 +603,3 @@ def _parse_attributes(self, attrs_nodes):
 
     tree = parser.parse(data)
     class_parser = MatClassParser(tree)
-    import pdb
-
-    pdb.set_trace()

From f12b0a47588bc82e261efcccae03ef06464f7333 Mon Sep 17 00:00:00 2001
From: Anton Pozharskiy <apozharski@gmail.com>
Date: Sat, 27 Jul 2024 11:39:32 +0200
Subject: [PATCH 13/45] working events

---
 sphinxcontrib/mat_tree_sitter_parser.py | 69 ++++++++++++++++++++++---
 1 file changed, 61 insertions(+), 8 deletions(-)

diff --git a/sphinxcontrib/mat_tree_sitter_parser.py b/sphinxcontrib/mat_tree_sitter_parser.py
index b277c8c..6e42dbb 100644
--- a/sphinxcontrib/mat_tree_sitter_parser.py
+++ b/sphinxcontrib/mat_tree_sitter_parser.py
@@ -2,8 +2,8 @@
 from tree_sitter import Language, Parser
 import re
 
-rpath = "../../../syscop/software/nosnoc/+nosnoc/Options.m"
-# rpath = "/home/anton/tools/matlabdomain/tests/test_data/ClassTesting.m"
+#rpath = "../../../syscop/software/nosnoc/+nosnoc/Options.m"
+rpath = "/home/anton/tools/matlabdomain/tests/test_data/ClassTesting.m"
 
 ML_LANG = Language(tsml.language())
 
@@ -38,7 +38,7 @@
     (attributes
         [(attribute) @attrs _]+
     )?
-    [(property) @properties _]*
+    [(property) @properties _]+
     ) @prop_block
 """
 )
@@ -48,22 +48,24 @@
     (attributes
         [(attribute) @attrs _]+
     )?
-    [(function_definition) @methods _]*
+    [(function_definition) @methods _]+
     ) @meth_block
 """
 )
 
 q_enumerations = ML_LANG.query(
     """(enumeration
-    [(enum) @enums _]*
+    [(enum) @enums _]+
     ) @enum_block
 """
 )
 
 q_events = ML_LANG.query(
     """(events
-    (attributes)? @attrs
-    (identifier)* @events
+    (attributes
+        [(attribute) @attrs _]+
+    )?
+    (identifier)+ @events
     ) @event_block
 """
 )
@@ -360,6 +362,7 @@ def __init__(self, tree):
         self.properties = {}
         self.methods = {}
         self.enumerations = {}
+        self.events = {}
 
         self.tree = tree
 
@@ -392,7 +395,7 @@ def __init__(self, tree):
         prop_matches = q_properties.matches(self.cls)
         method_matches = q_methods.matches(self.cls)
         enum_matches = q_enumerations.matches(self.cls)
-        events_matches = q_events.matches(self.cls)
+        event_matches = q_events.matches(self.cls)
 
         for _, prop_match in prop_matches:
             self._parse_property_section(prop_match)
@@ -400,6 +403,8 @@ def __init__(self, tree):
             self._parse_enum_section(enum_match)
         for _, method_match in method_matches:
             self._parse_method_section(method_match)
+        for _, event_match in event_matches:
+            self._parse_event_section(event_match)
         import pdb
 
         pdb.set_trace()
@@ -582,6 +587,54 @@ def _parse_enum_section(self, enums_match):
 
             self.enumerations[name] = {"args": args, "docstring": docstring}
 
+    def _parse_event_section(self, events_match):
+        attrs_nodes = events_match.get("attrs")
+        attrs = self._parse_attributes(attrs_nodes)
+        events = events_match.get("events")
+        for event in events:
+            name = event.text.decode("utf-8")
+            
+            docstring = ""
+            # look forward for docstring
+            next_node = event.next_named_sibling
+            if next_node is not None and next_node.type == "comment":
+                if next_node.start_point.row == event.end_point.row:
+                    # if the docstring is on the same line as the end of the definition only take the inline part
+                    docstring = process_text_into_docstring(next_node.text)
+                    docstring = docstring.split("\n")[0]
+                elif next_node.start_point.row - event.end_point.row <= 1:
+                    # Otherwise take the whole docstring
+                    docstring = process_text_into_docstring(next_node.text)
+
+            # override docstring with prior if exists
+            prev_node = event.prev_named_sibling
+            if prev_node is None:
+                # Nothing we can do, no previous comment
+                pass
+            elif prev_node.type == "comment":
+                # We have a previous comment if it ends on the previous
+                # line then we set the docstring. We also need to check
+                # if the first line of the comment is the same as a
+                # previous event.
+                if event.start_point.row - prev_node.end_point.row <= 1:
+                    ds = process_text_into_docstring(prev_node.text)
+                    prev_event = prev_node.prev_named_sibling
+                    if prev_event is not None and prev_event.type == "identifier":
+                        if prev_node.start_point.row == prev_event.end_point.row:
+                            ds = "\n".join(ds.split("\n")[1:])
+                    if ds:
+                        docstring = ds
+                else:
+                    if event.start_point.row - prev_node.end_point.row <= 1:
+                        docstring = process_text_into_docstring(prev_node.text)
+            # After all that if our docstring is empty then we have none
+            if docstring.strip() == "":
+                docstring == None
+
+            self.events[name] = {"attrs": attrs, "docstring": docstring}
+        
+        import pdb; pdb.set_trace()
+        
     def _parse_attributes(self, attrs_nodes):
         attrs = {}
         if attrs_nodes is not None:

From 522e69016b9fb5355c004b886a57820985ca89cc Mon Sep 17 00:00:00 2001
From: Anton Pozharskiy <apozharski@gmail.com>
Date: Sat, 27 Jul 2024 11:45:29 +0200
Subject: [PATCH 14/45] exit early if query returns for block with no elements

---
 sphinxcontrib/mat_tree_sitter_parser.py | 14 ++++++++++----
 1 file changed, 10 insertions(+), 4 deletions(-)

diff --git a/sphinxcontrib/mat_tree_sitter_parser.py b/sphinxcontrib/mat_tree_sitter_parser.py
index 6e42dbb..dc18a98 100644
--- a/sphinxcontrib/mat_tree_sitter_parser.py
+++ b/sphinxcontrib/mat_tree_sitter_parser.py
@@ -410,12 +410,12 @@ def __init__(self, tree):
         pdb.set_trace()
 
     def _parse_property_section(self, props_match):
+        properties = props_match.get("properties")
+        if properties is None:
+            return
         # extract property section attributes
         attrs_nodes = props_match.get("attrs")
         attrs = self._parse_attributes(attrs_nodes)
-
-        properties = props_match.get("properties")
-
         for prop in properties:
             # match property to extract details
             _, prop_match = q_property.matches(prop)[0]
@@ -529,9 +529,11 @@ def _parse_property_section(self, props_match):
             }
 
     def _parse_method_section(self, methods_match):
+        methods = methods_match.get("methods")
+        if methods is None:
+            return
         attrs_nodes = methods_match.get("attrs")
         attrs = self._parse_attributes(attrs_nodes)
-        methods = methods_match.get("methods")
         for method in methods:
             parsed_function = MatFunctionParser(method)
             self.methods[parsed_function.name] = parsed_function
@@ -539,6 +541,8 @@ def _parse_method_section(self, methods_match):
 
     def _parse_enum_section(self, enums_match):
         enums = enums_match.get("enums")
+        if enums is None:
+            return
         for enum in enums:
             _, enum_match = q_enum.matches(enum)[0]
             name = enum_match.get("name").text.decode("utf-8")
@@ -591,6 +595,8 @@ def _parse_event_section(self, events_match):
         attrs_nodes = events_match.get("attrs")
         attrs = self._parse_attributes(attrs_nodes)
         events = events_match.get("events")
+        if events is None:
+            return
         for event in events:
             name = event.text.decode("utf-8")
             

From 1c07f161de9d1135405cb16a296333f04af4aecd Mon Sep 17 00:00:00 2001
From: Anton Pozharskiy <apozharski@gmail.com>
Date: Sat, 27 Jul 2024 16:29:22 +0200
Subject: [PATCH 15/45] integrating tree-sitter parser into mat_types

---
 sphinxcontrib/mat_tree_sitter_parser.py | 15 +++---
 sphinxcontrib/mat_types.py              | 61 ++++++++++---------------
 2 files changed, 30 insertions(+), 46 deletions(-)

diff --git a/sphinxcontrib/mat_tree_sitter_parser.py b/sphinxcontrib/mat_tree_sitter_parser.py
index dc18a98..a75eb53 100644
--- a/sphinxcontrib/mat_tree_sitter_parser.py
+++ b/sphinxcontrib/mat_tree_sitter_parser.py
@@ -168,9 +168,9 @@ def process_default(text):
     return re.sub(re_assign_remove, "", default)
 
 class MatFunctionParser:
-    def __init__(self, fun_node):
+    def __init__(self, root_node):
         """Parse Function definition"""
-        _, fun_match = q_fun.matches(fun_node)[0]
+        _, fun_match = q_fun.matches(root_node)[0]
         self.name = fun_match.get("name").text.decode("utf-8")
 
         # Get outputs (possibly more than one)
@@ -353,7 +353,7 @@ def _parse_attributes(self, attrs_nodes):
 
 
 class MatClassParser:
-    def __init__(self, tree):
+    def __init__(self, root_node):
         # DATA
         self.name = ""
         self.supers = []
@@ -364,10 +364,10 @@ def __init__(self, tree):
         self.enumerations = {}
         self.events = {}
 
-        self.tree = tree
+        self.root_node = root_node
 
         # Parse class basics
-        class_matches = q_classdef.matches(tree.root_node)
+        class_matches = q_classdef.matches(root_node)
         _, class_match = class_matches[0]
         self.cls = class_match.get("class")
         self.name = class_match.get("name")
@@ -405,9 +405,6 @@ def __init__(self, tree):
             self._parse_method_section(method_match)
         for _, event_match in event_matches:
             self._parse_event_section(event_match)
-        import pdb
-
-        pdb.set_trace()
 
     def _parse_property_section(self, props_match):
         properties = props_match.get("properties")
@@ -661,4 +658,4 @@ def _parse_attributes(self, attrs_nodes):
         data = f.read()
 
     tree = parser.parse(data)
-    class_parser = MatClassParser(tree)
+    class_parser = MatClassParser(tree.root_node)
diff --git a/sphinxcontrib/mat_types.py b/sphinxcontrib/mat_types.py
index cd7acb9..824c0b5 100644
--- a/sphinxcontrib/mat_types.py
+++ b/sphinxcontrib/mat_types.py
@@ -17,8 +17,9 @@
 from zipfile import ZipFile
 import xml.etree.ElementTree as ET
 import sphinxcontrib.mat_parser as mat_parser
-from sphinxcontrib.mat_textmate_parser import MatClassParser, MatFunctionParser
-from textmate_grammar.parsers.matlab import MatlabParser
+from sphinxcontrib.mat_tree_sitter_parser import MatClassParser, MatFunctionParser, ML_LANG
+import tree_sitter_matlab as tsml
+from tree_sitter import Language, Parser
 import logging
 from pathlib import Path
 import cProfile
@@ -501,62 +502,48 @@ def parse_mfile(mfile, name, path, encoding=None):
         # read mfile code
         if encoding is None:
             encoding = "utf-8"
-        with open(mfile, "r", encoding=encoding, errors="replace") as code_f:
-            code = code_f.read().replace("\r\n", "\n")
+        with open(mfile, "rb") as code_f:
+            code = code_f.read()
 
         full_code = code
 
-        # quiet the textmate grammar logger and parse the file
-        logging.getLogger("textmate_grammar").setLevel(logging.ERROR)
-        parser = MatlabParser()
-        toks = parser.parse_file(mfile)
+        # parse the file
+        parser = Parser(ML_LANG)
+        tree = parser.parse(code)
 
         modname = path.replace(os.sep, ".")  # module name
 
         # assume that functions and classes always start with a keyword
-        def isFunction(token):
-            comments_and_functions = [
-                "comment.block.percentage.matlab",
-                "comment.line.percentage.matlab",
-                "meta.function.matlab",
-            ]
-            return all(
-                [(child.token in comments_and_functions) for child in token.children]
-            )
+        def isFunction(tree):
+            q_is_function = ML_LANG.query(r"""(source_file [(comment) "\n"]* (function_definition))""")
+            matches = q_is_function.matches(tree.root_node)
+            if matches:
+                return True
+            else:
+                return False
 
-        def isClass(token):
-            tok_gen = token.find(tokens="meta.class.matlab", depth=1)
-            try:
-                tok, _ = next(tok_gen)
+        def isClass(tree):
+            q_is_class = ML_LANG.query("(class_definition)")
+            matches = q_is_class.matches(tree.root_node)
+            if matches:
                 return True
-            except StopIteration:
+            else:
                 return False
 
-        if isClass(toks):
+        if isClass(tree):
             logger.debug(
                 "[sphinxcontrib-matlabdomain] parsing classdef %s from %s.",
                 name,
                 modname,
             )
-            return MatClass(name, modname, toks)
-        elif isFunction(toks):
+            return MatClass(name, modname, tree.root_node)
+        elif isFunction(tree):
             logger.debug(
                 "[sphinxcontrib-matlabdomain] parsing function %s from %s.",
                 name,
                 modname,
             )
-            fun_tok_gen = toks.find(tokens="meta.function.matlab")
-            parsed_function = None
-            try:
-                fun_tok, _ = next(fun_tok_gen)
-                parsed_function = MatFunctionParser(fun_tok)
-            except StopIteration:
-                logger.warning(
-                    "[sphinxcontrib-matlabdomain] Parsing failed in %s.%s. No function found.",
-                    modname,
-                    name,
-                )
-            return MatFunction(name, modname, toks)
+            return MatFunction(name, modname, tree.root_node)
         else:
             pass
             # it's a script file retoken with header comment

From 9f3297a5b2efe69bdbfdc64a797bf79d0ecdd45b Mon Sep 17 00:00:00 2001
From: Anton Pozharskiy <apozharski@gmail.com>
Date: Sat, 27 Jul 2024 17:14:13 +0200
Subject: [PATCH 16/45] fixing default value parsing

---
 sphinxcontrib/mat_tree_sitter_parser.py | 20 ++++++++++++--------
 1 file changed, 12 insertions(+), 8 deletions(-)

diff --git a/sphinxcontrib/mat_tree_sitter_parser.py b/sphinxcontrib/mat_tree_sitter_parser.py
index a75eb53..8d7f8e8 100644
--- a/sphinxcontrib/mat_tree_sitter_parser.py
+++ b/sphinxcontrib/mat_tree_sitter_parser.py
@@ -2,8 +2,8 @@
 from tree_sitter import Language, Parser
 import re
 
-#rpath = "../../../syscop/software/nosnoc/+nosnoc/Options.m"
-rpath = "/home/anton/tools/matlabdomain/tests/test_data/ClassTesting.m"
+rpath = "../../../syscop/software/nosnoc/+nosnoc/Options.m"
+# rpath = "/home/anton/tools/matlabdomain/tests/test_data/ClassTesting.m"
 
 ML_LANG = Language(tsml.language())
 
@@ -80,7 +80,7 @@
      (validation_functions
          [[(identifier) (function_call)] @validation_functions _]+
      )?
-     (default_value (number))? @default
+     (default_value)? @default
      (comment)? @docstring
     )
 """
@@ -159,14 +159,17 @@
 re_percent_remove = re.compile(r"^[ \t]*% ?", flags=re.M)
 re_assign_remove = re.compile(r"^=[ \t]*")
 
+
 def process_text_into_docstring(text):
     docstring = text.decode("utf-8")
     return re.sub(re_percent_remove, "", docstring)
 
+
 def process_default(text):
     default = text.decode("utf-8")
     return re.sub(re_assign_remove, "", default)
 
+
 class MatFunctionParser:
     def __init__(self, root_node):
         """Parse Function definition"""
@@ -206,7 +209,6 @@ def __init__(self, root_node):
         if not docstring:
             docstring = None
         self.docstring = docstring
-        
 
     def _parse_argument_section(self, argblock_node):
         _, argblock_match = q_argblock.matches(argblock_node)[0]
@@ -596,7 +598,7 @@ def _parse_event_section(self, events_match):
             return
         for event in events:
             name = event.text.decode("utf-8")
-            
+
             docstring = ""
             # look forward for docstring
             next_node = event.next_named_sibling
@@ -635,9 +637,11 @@ def _parse_event_section(self, events_match):
                 docstring == None
 
             self.events[name] = {"attrs": attrs, "docstring": docstring}
-        
-        import pdb; pdb.set_trace()
-        
+
+        import pdb
+
+        pdb.set_trace()
+
     def _parse_attributes(self, attrs_nodes):
         attrs = {}
         if attrs_nodes is not None:

From c6d8f4a132fa35572124c07478f17ad18fc83e45 Mon Sep 17 00:00:00 2001
From: Anton Pozharskiy <apozharski@gmail.com>
Date: Wed, 14 Aug 2024 10:44:54 +0200
Subject: [PATCH 17/45] some test fixes and requires tree-sitter

---
 setup.py                                |   7 +-
 sphinxcontrib/mat_tree_sitter_parser.py |  44 ++--
 sphinxcontrib/mat_types.py              |  12 +-
 tests/test_parse_mfile.py               | 262 ++++++++++++------------
 4 files changed, 168 insertions(+), 157 deletions(-)

diff --git a/setup.py b/setup.py
index f568894..98fba9a 100644
--- a/setup.py
+++ b/setup.py
@@ -5,7 +5,12 @@
 with open("README.rst", "r") as f_readme:
     long_desc = f_readme.read()
 
-requires = ["Sphinx>=4.0.0", "Pygments>=2.0.1"]
+requires = [
+    "Sphinx>=4.0.0",
+    "Pygments>=2.0.1",
+    "tree-sitter-matlab>=1.0.1",
+    "tree-sitter-python>=0.21.0",
+]
 
 setup(
     name="sphinxcontrib-matlabdomain",
diff --git a/sphinxcontrib/mat_tree_sitter_parser.py b/sphinxcontrib/mat_tree_sitter_parser.py
index 8d7f8e8..a71ebb5 100644
--- a/sphinxcontrib/mat_tree_sitter_parser.py
+++ b/sphinxcontrib/mat_tree_sitter_parser.py
@@ -2,8 +2,11 @@
 from tree_sitter import Language, Parser
 import re
 
-rpath = "../../../syscop/software/nosnoc/+nosnoc/Options.m"
-# rpath = "/home/anton/tools/matlabdomain/tests/test_data/ClassTesting.m"
+# rpath = "../../../syscop/software/nosnoc/+nosnoc/Options.m"
+rpath = (
+    "/home/anton/tools/matlabdomain/tests/test_data/submodule/f_ellipsis_empty_output.m"
+)
+# rpath = "/home/anton/tools/matlabdomain/tests/test_data/submodule/f_empty_output.m"
 
 ML_LANG = Language(tsml.language())
 
@@ -97,7 +100,7 @@
 
 q_fun = ML_LANG.query(
     """(function_definition
-    .
+    _*
     (function_output
         [
             (identifier) @outputs
@@ -106,13 +109,13 @@
             )
         ]
     )?
-    .
+    _*
     name: (identifier) @name
-    .
+    _*
     (function_arguments
         [(identifier) @params _]*
     )?
-    .
+    _*
     [(arguments_statement) @argblocks _]*
     .
     (comment)? @docstring
@@ -185,12 +188,12 @@ def __init__(self, root_node):
                 self.outputs[output] = {}
 
         # Get parameters
-        self.params = {}
-        param_nodes = fun_match.get("params")
-        if param_nodes is not None:
-            params = [param.text.decode("utf-8") for param in param_nodes]
-            for param in params:
-                self.params[param] = {}
+        self.args = {}
+        arg_nodes = fun_match.get("params")
+        if arg_nodes is not None:
+            args = [arg.text.decode("utf-8") for arg in arg_nodes]
+            for arg in args:
+                self.args[arg] = {}
 
         # parse out info from argument blocks
         argblock_nodes = fun_match.get("argblocks")
@@ -321,12 +324,14 @@ def _parse_argument_section(self, argblock_node):
             # After all that if our docstring is empty then we have none
             if not docstring.strip():
                 docstring = None
+            else:
+                pass  # docstring = docstring.rstrip()
 
             # Here we trust that the person is giving us valid matlab.
             if "Output" in attrs.keys():
                 arg_loc = self.outputs
             else:
-                arg_loc = self.params
+                arg_loc = self.args
             if len(name) == 1:
                 arg_loc[name[0]] = {
                     "attrs": attrs,
@@ -517,6 +522,8 @@ def _parse_property_section(self, props_match):
             # After all that if our docstring is empty then we have none
             if not docstring.strip():
                 docstring = None
+            else:
+                pass  # docstring = docstring.rstrip()
 
             self.properties[name] = {
                 "attrs": attrs,
@@ -587,6 +594,8 @@ def _parse_enum_section(self, enums_match):
             # After all that if our docstring is empty then we have none
             if docstring.strip() == "":
                 docstring == None
+            else:
+                pass  # docstring = docstring.rstrip()
 
             self.enumerations[name] = {"args": args, "docstring": docstring}
 
@@ -635,13 +644,11 @@ def _parse_event_section(self, events_match):
             # After all that if our docstring is empty then we have none
             if docstring.strip() == "":
                 docstring == None
+            else:
+                pass  # docstring = docstring.rstrip()
 
             self.events[name] = {"attrs": attrs, "docstring": docstring}
 
-        import pdb
-
-        pdb.set_trace()
-
     def _parse_attributes(self, attrs_nodes):
         attrs = {}
         if attrs_nodes is not None:
@@ -662,4 +669,5 @@ def _parse_attributes(self, attrs_nodes):
         data = f.read()
 
     tree = parser.parse(data)
-    class_parser = MatClassParser(tree.root_node)
+    # class_parser = MatClassParser(tree.root_node)
+    fun_parser = MatFunctionParser(tree.root_node)
diff --git a/sphinxcontrib/mat_types.py b/sphinxcontrib/mat_types.py
index 824c0b5..0a12cf1 100644
--- a/sphinxcontrib/mat_types.py
+++ b/sphinxcontrib/mat_types.py
@@ -17,7 +17,11 @@
 from zipfile import ZipFile
 import xml.etree.ElementTree as ET
 import sphinxcontrib.mat_parser as mat_parser
-from sphinxcontrib.mat_tree_sitter_parser import MatClassParser, MatFunctionParser, ML_LANG
+from sphinxcontrib.mat_tree_sitter_parser import (
+    MatClassParser,
+    MatFunctionParser,
+    ML_LANG,
+)
 import tree_sitter_matlab as tsml
 from tree_sitter import Language, Parser
 import logging
@@ -515,7 +519,9 @@ def parse_mfile(mfile, name, path, encoding=None):
 
         # assume that functions and classes always start with a keyword
         def isFunction(tree):
-            q_is_function = ML_LANG.query(r"""(source_file [(comment) "\n"]* (function_definition))""")
+            q_is_function = ML_LANG.query(
+                r"""(source_file [(comment) "\n"]* (function_definition))"""
+            )
             matches = q_is_function.matches(tree.root_node)
             if matches:
                 return True
@@ -874,7 +880,7 @@ def __init__(self, name, modname, tokens):
         #: output args
         self.retv = parsed_function.outputs
         #: input args
-        self.args = parsed_function.params
+        self.args = parsed_function.args
         #: remaining tokens after main function is parsed
         self.rem_tks = None
 
diff --git a/tests/test_parse_mfile.py b/tests/test_parse_mfile.py
index 24b4c6a..b901409 100644
--- a/tests/test_parse_mfile.py
+++ b/tests/test_parse_mfile.py
@@ -24,15 +24,15 @@ def test_ClassExample():
     assert obj.name == "ClassExample"
     assert (
         obj.docstring
-        == " test class methods\n\n :param a: the input to :class:`ClassExample`\n"
+        == " test class methods\n\n:param a: the input to :class:`ClassExample`"
     )
     mymethod = obj.methods["mymethod"]
     assert mymethod.name == "mymethod"
-    assert mymethod.retv == ["c"]
-    assert mymethod.args == ["obj", "b"]
+    assert list(mymethod.retv.keys()) == ["c"]
+    assert list(mymethod.args.keys()) == ["obj", "b"]
     assert (
         mymethod.docstring
-        == " a method in :class:`ClassExample`\n\n :param b: an input to :meth:`mymethod`\n"
+        == "a method in :class:`ClassExample`\n\n:param b: an input to :meth:`mymethod`"
     )
 
 
@@ -40,71 +40,71 @@ def test_comment_after_docstring():
     mfile = os.path.join(TESTDATA_SUB, "f_comment_after_docstring.m")
     obj = mat_types.MatObject.parse_mfile(mfile, "f_comment_after_docstring", "")
     assert obj.name == "f_comment_after_docstring"
-    assert obj.retv == ["output"]
-    assert obj.args == ["input"]
-    assert obj.docstring == " Tests a function with comments after docstring\n"
+    assert list(obj.retv.keys()) == ["output"]
+    assert list(obj.args.keys()) == ["input"]
+    assert obj.docstring == "Tests a function with comments after docstring"
 
 
 def test_docstring_no_newline():
     mfile = os.path.join(TESTDATA_SUB, "f_docstring_no_newline.m")
     obj = mat_types.MatObject.parse_mfile(mfile, "f_docstring_no_newline", "")
     assert obj.name == "f_docstring_no_newline"
-    assert obj.retv == ["y"]
-    assert obj.args is None
-    assert obj.docstring == " Test a function without a newline after docstring\n"
+    assert list(obj.retv.keys()) == ["y"]
+    assert list(obj.args.keys()) == []
+    assert obj.docstring == "Test a function without a newline after docstring"
 
 
 def test_ellipsis_after_equals():
     mfile = os.path.join(TESTDATA_SUB, "f_ellipsis_after_equals.m")
     obj = mat_types.MatObject.parse_mfile(mfile, "f_ellipsis_after_equals", "")
     assert obj.name == "f_ellipsis_after_equals"
-    assert obj.retv == ["output"]
-    assert obj.args == ["arg"]
-    assert obj.docstring == " Tests a function with ellipsis after equals\n"
+    assert list(obj.retv.keys()) == ["output"]
+    assert list(obj.args.keys()) == ["arg"]
+    assert obj.docstring == "Tests a function with ellipsis after equals"
 
 
 def test_ellipsis_empty_output():
     mfile = os.path.join(TESTDATA_SUB, "f_ellipsis_empty_output.m")
     obj = mat_types.MatObject.parse_mfile(mfile, "f_ellipsis_empty_output", "")
     assert obj.name == "f_ellipsis_empty_output"
-    assert obj.retv is None
-    assert obj.args == ["arg"]
-    assert obj.docstring == " Tests a function with ellipsis in the output\n"
+    assert list(obj.retv.keys()) == []
+    assert list(obj.args.keys()) == ["arg"]
+    assert obj.docstring == "Tests a function with ellipsis in the output"
 
 
 def test_ellipsis_in_comment():
     mfile = os.path.join(TESTDATA_SUB, "f_ellipsis_in_comment.m")
     obj = mat_types.MatObject.parse_mfile(mfile, "f_ellipsis_in_comment", "")
     assert obj.name == "f_ellipsis_in_comment"
-    assert obj.retv == ["y"]
-    assert obj.args == ["x"]
-    assert obj.docstring == " Tests a function with ellipsis in the comment ...\n"
+    assert list(obj.retv.keys()) == ["y"]
+    assert list(obj.args.keys()) == ["x"]
+    assert obj.docstring == "Tests a function with ellipsis in the comment ..."
 
 
 def test_ellipsis_in_output():
     mfile = os.path.join(TESTDATA_SUB, "f_ellipsis_in_output.m")
     obj = mat_types.MatObject.parse_mfile(mfile, "f_ellipsis_in_output", "")
     assert obj.name == "f_ellipsis_in_output"
-    assert obj.retv == ["output", "with", "ellipsis"]
-    assert obj.args == ["arg"]
-    assert obj.docstring == " Tests a function with ellipsis in the output\n"
+    assert list(obj.retv.keys()) == ["output", "with", "ellipsis"]
+    assert list(obj.args.keys()) == ["arg"]
+    assert obj.docstring == "Tests a function with ellipsis in the output"
 
 
 def test_ellipsis_in_output_multiple():
     mfile = os.path.join(TESTDATA_SUB, "f_ellipsis_in_output_multiple.m")
     obj = mat_types.MatObject.parse_mfile(mfile, "f_ellipsis_in_output_multiple", "")
     assert obj.name == "f_ellipsis_in_output_multiple"
-    assert obj.retv == ["output", "with", "ellipsis"]
-    assert obj.args == ["arg"]
-    assert obj.docstring == " Tests a function with multiple ellipsis in the output\n"
+    assert list(obj.retv.keys()) == ["output", "with", "ellipsis"]
+    assert list(obj.args.keys()) == ["arg"]
+    assert obj.docstring == "Tests a function with multiple ellipsis in the output"
 
 
 def test_no_docstring():
     mfile = os.path.join(TESTDATA_SUB, "f_no_docstring.m")
     obj = mat_types.MatObject.parse_mfile(mfile, "f_no_docstring", "")
     assert obj.name == "f_no_docstring"
-    assert obj.retv == ["y"]
-    assert obj.args is None
+    assert list(obj.retv.keys()) == ["y"]
+    assert list(obj.args.keys()) == []
     assert obj.docstring == ""
 
 
@@ -112,36 +112,36 @@ def test_no_output():
     mfile = os.path.join(TESTDATA_SUB, "f_no_output.m")
     obj = mat_types.MatObject.parse_mfile(mfile, "f_no_output", "")
     assert obj.name == "f_no_output"
-    assert obj.retv is None
-    assert obj.args == ["arg"]
-    assert obj.docstring == " A function with no outputs\n"
+    assert list(obj.retv.keys()) == []
+    assert list(obj.args.keys()) == ["arg"]
+    assert obj.docstring == "A function with no outputs"
 
 
 def test_no_input_parentheses():
     mfile = os.path.join(TESTDATA_SUB, "f_no_input_parentheses.m")
     obj = mat_types.MatObject.parse_mfile(mfile, "f_no_input_parentheses", "")
     assert obj.name == "f_no_input_parentheses"
-    assert obj.retv == ["y"]
-    assert obj.args is None
-    assert obj.docstring == " Tests a function without parentheses in input\n"
+    assert list(obj.retv.keys()) == ["y"]
+    assert list(obj.args.keys()) == []
+    assert obj.docstring == "Tests a function without parentheses in input"
 
 
 def test_no_spaces():
     mfile = os.path.join(TESTDATA_SUB, "f_no_spaces.m")
     obj = mat_types.MatObject.parse_mfile(mfile, "f_no_spaces", "")
     assert obj.name == "f_no_spaces"
-    assert obj.retv == ["a", "b", "c"]
-    assert obj.args == ["x", "y", "z"]
-    assert obj.docstring == " Tests a function with no spaces in function signature\n"
+    assert list(obj.retv.keys()) == ["a", "b", "c"]
+    assert list(obj.args.keys()) == ["x", "y", "z"]
+    assert obj.docstring == "Tests a function with no spaces in function signature"
 
 
 def test_with_tabs():
     mfile = os.path.join(TESTDATA_SUB, "f_with_tabs.m")
     obj = mat_types.MatObject.parse_mfile(mfile, "f_with_tabs", "")
     assert obj.name == "f_with_tabs"
-    assert obj.retv == ["y"]
-    assert obj.args == ["x"]
-    assert obj.docstring == " A function with tabs\n"
+    assert list(obj.retv.keys()) == ["y"]
+    assert list(obj.args.keys()) == ["x"]
+    assert obj.docstring == "A function with tabs"
 
 
 def test_ClassWithEndOfLineComment():
@@ -178,15 +178,15 @@ def test_ClassWithFunctionArguments():
     assert obj.name == "ClassWithFunctionArguments"
     assert (
         obj.docstring
-        == " test class methods with function arguments\n\n :param a: the input to :class:`ClassWithFunctionArguments`\n"
+        == "test class methods with function arguments\n\n:param a: the input to :class:`ClassWithFunctionArguments`"
     )
     mymethod = obj.methods["mymethod"]
     assert mymethod.name == "mymethod"
-    assert mymethod.retv == ["c"]
-    assert mymethod.args == ["obj", "b"]
+    assert list(mymethod.retv.keys()) == ["c"]
+    assert mymethod.args.keys() == ["obj", "b"]
     assert (
         mymethod.docstring
-        == " a method in :class:`ClassWithFunctionArguments`\n\n :param b: an input to :meth:`mymethod`\n"
+        == "a method in :class:`ClassWithFunctionArguments`\n\n:param b: an input to :meth:`mymethod`"
     )
 
 
@@ -206,7 +206,7 @@ def test_no_input_no_output_no_parentheses():
     assert obj.name == "f_no_input_no_output_no_parentheses"
     assert (
         obj.docstring
-        == " Tests a function without parentheses in input and no return value\n"
+        == "Tests a function without parentheses in input and no return value"
     )
 
 
@@ -218,26 +218,26 @@ def test_no_input_no_parentheses_no_docstring():
         mfile, "f_no_input_no_parentheses_no_docstring", "test_data"
     )
     assert obj.name == "f_no_input_no_parentheses_no_docstring"
-    assert obj.retv == ["result"]
-    assert obj.args is None
+    assert list(obj.retv.keys()) == ["result"]
+    assert list(obj.args.keys()) == []
 
 
 def test_ClassWithCommentHeader():
     mfile = os.path.join(DIRNAME, "test_data", "ClassWithCommentHeader.m")
     obj = mat_types.MatObject.parse_mfile(mfile, "ClassWithCommentHeader", "test_data")
     assert obj.name == "ClassWithCommentHeader"
-    assert obj.docstring == " A class with a comment header on the top.\n"
+    assert obj.docstring == "A class with a comment header on the top."
     method_get_tform = obj.methods["getTransformation"]
     assert method_get_tform.name == "getTransformation"
-    assert method_get_tform.retv == ["tform"]
-    assert method_get_tform.args == ["obj"]
+    assert list(method_get_tform.retv.keys()) == ["tform"]
+    assert list(method_get_tform.args.keys()) == ["obj"]
 
 
 def test_with_comment_header():
     mfile = os.path.join(DIRNAME, "test_data", "f_with_comment_header.m")
     obj = mat_types.MatObject.parse_mfile(mfile, "f_with_comment_header", "test_data")
     assert obj.name == "f_with_comment_header"
-    assert obj.docstring == " A simple function with a comment header on the top.\n"
+    assert obj.docstring == "A simple function with a comment header on the top."
 
 
 def test_script_with_comment_header():
@@ -247,7 +247,7 @@ def test_script_with_comment_header():
     )
     assert (
         obj.docstring
-        == """ This is a Comment Header
+        == """This is a Comment Header
  Copyright (C) <year>, by <full_name>
 
  Some descriptions ...
@@ -268,7 +268,7 @@ def test_script_with_comment_header_2():
     )
     assert (
         obj.docstring
-        == """ This is a Comment Header
+        == """This is a Comment Header
  Copyright (C) <year>, by <full_name>
 
  Some descriptions ...
@@ -289,7 +289,7 @@ def test_script_with_comment_header_3():
     )
     assert (
         obj.docstring
-        == """ This is a Comment Header with empty lines above
+        == """This is a Comment Header with empty lines above
  and many line comments.
 
 """
@@ -303,7 +303,7 @@ def test_script_with_comment_header_4():
     )
     assert (
         obj.docstring
-        == """ This is a Comment Header with a single instruction above
+        == """This is a Comment Header with a single instruction above
  and many line comments.
 
 """
@@ -356,14 +356,14 @@ def test_ClassWithMethodAttributes():
         mfile, "ClassWithMethodAttributes", "test_data"
     )
     assert obj.name == "ClassWithMethodAttributes"
-    assert obj.docstring == " Class with different method attributes\n"
+    assert obj.docstring == "Class with different method attributes"
     assert obj.methods["testNormal"].attrs == {}
     assert obj.methods["testPublic"].attrs == {"Access": "public"}
     assert obj.methods["testProtected"].attrs == {"Access": "protected"}
     assert obj.methods["testPrivate1"].attrs == {"Access": "private"}
     assert obj.methods["testPrivate2"].attrs == {"Access": "private"}
-    assert obj.methods["testHidden"].attrs == {"Hidden": True}
-    assert obj.methods["testStatic"].attrs == {"Static": True}
+    assert obj.methods["testHidden"].attrs == {"Hidden": None}
+    assert obj.methods["testStatic"].attrs == {"Static": None}
     assert obj.methods["testFriend1"].attrs == {"Access": "?OtherClass"}
     assert obj.methods["testFriend2"].attrs == {
         "Access": ["?OtherClass", "?pack.OtherClass2"]
@@ -376,7 +376,7 @@ def test_ClassWithPropertyAttributes():
         mfile, "ClassWithPropertyAttributes", "test_data"
     )
     assert obj.name == "ClassWithPropertyAttributes"
-    assert obj.docstring == " Class with different property attributes\n"
+    assert obj.docstring == "Class with different property attributes"
     assert obj.properties["testNormal"]["attrs"] == {}
     assert obj.properties["testPublic"]["attrs"] == {"Access": "public"}
     assert obj.properties["testProtected"]["attrs"] == {"Access": "protected"}
@@ -393,29 +393,27 @@ def test_ClassWithPropertyAttributes():
         "GetAccess": "private",
         "SetAccess": "private",
     }
-    assert obj.properties["TEST_CONSTANT"]["attrs"] == {"Constant": True}
+    assert obj.properties["TEST_CONSTANT"]["attrs"] == {"Constant": None}
     assert obj.properties["TEST_CONSTANT_PROTECTED"]["attrs"] == {
         "Access": "protected",
-        "Constant": True,
+        "Constant": None,
     }
-    assert obj.properties["testDependent"]["attrs"] == {"Dependent": True}
-    assert obj.properties["testHidden"]["attrs"] == {"Hidden": True}
+    assert obj.properties["testDependent"]["attrs"] == {"Dependent": None}
+    assert obj.properties["testHidden"]["attrs"] == {"Hidden": None}
 
 
 def test_ClassWithoutIndent():
     mfile = os.path.join(DIRNAME, "test_data", "ClassWithoutIndent.m")
     obj = mat_types.MatObject.parse_mfile(mfile, "ClassWithoutIndent", "test_data")
     assert obj.name == "ClassWithoutIndent"
-    assert (
-        obj.docstring == " First line is not indented\n Second line line is indented\n"
-    )
+    assert obj.docstring == "First line is not indented\nSecond line line is indented"
 
 
 def test_f_with_utf8():
     mfile = os.path.join(DIRNAME, "test_data", "f_with_utf8.m")
     obj = mat_types.MatObject.parse_mfile(mfile, "f_with_utf8", "test_data")
     assert obj.name == "f_with_utf8"
-    assert obj.docstring == " Cambia ubicación de partículas.\n"
+    assert obj.docstring == "Cambia ubicación de partículas."
 
 
 def test_file_parsing_encoding_can_be_specified():
@@ -424,14 +422,14 @@ def test_file_parsing_encoding_can_be_specified():
         mfile, "f_with_latin_1", "test_data", encoding="latin-1"
     )
     assert obj.name == "f_with_latin_1"
-    assert obj.docstring == " Analyse de la réponse à un créneau\n"
+    assert obj.docstring == "Analyse de la réponse à un créneau"
 
 
 def test_file_parsing_with_no_encoding_specified():
     mfile = os.path.join(DIRNAME, "test_data", "f_with_latin_1.m")
     obj = mat_types.MatObject.parse_mfile(mfile, "f_with_latin_1", "test_data")
     assert obj.name == "f_with_latin_1"
-    assert obj.docstring == " Analyse de la r\ufffdponse \ufffd un cr\ufffdneau\n"
+    assert obj.docstring == "Analyse de la r\ufffdponse \ufffd un cr\ufffdneau"
 
 
 def test_ClassWithBuiltinOverload():
@@ -440,7 +438,7 @@ def test_ClassWithBuiltinOverload():
         mfile, "ClassWithBuiltinOverload", "test_data"
     )
     assert obj.name == "ClassWithBuiltinOverload"
-    assert obj.docstring == " Class that overloads a builtin\n"
+    assert obj.docstring == "Class that overloads a builtin"
 
 
 def test_ClassWithBuiltinProperties():
@@ -449,14 +447,14 @@ def test_ClassWithBuiltinProperties():
         mfile, "ClassWithBuiltinProperties", "test_data"
     )
     assert obj.name == "ClassWithBuiltinProperties"
-    assert obj.docstring == " Class with properties that overload a builtin\n"
+    assert obj.docstring == "Class with properties that overload a builtin"
     assert set(obj.properties) == set(["omega", "alpha", "gamma", "beta"])
-    assert obj.properties["omega"]["docstring"] == " a property"
-    assert obj.properties["alpha"]["docstring"] == (" a property overloading a builtin")
+    assert obj.properties["omega"]["docstring"] == "a property"
+    assert obj.properties["alpha"]["docstring"] == ("a property overloading a builtin")
     assert obj.properties["gamma"]["docstring"] == (
-        " a property overloading a builtin with validation"
+        "a property overloading a builtin with validation"
     )
-    assert obj.properties["beta"]["docstring"] == (" another overloaded property")
+    assert obj.properties["beta"]["docstring"] == ("another overloaded property")
 
 
 # Fails when running with other test files. Warnings are already logged.
@@ -473,7 +471,7 @@ def test_f_with_name_mismatch(caplog):
             "sphinx.matlab-domain",
             WARNING,
             '[sphinxcontrib-matlabdomain] Unexpected function name: "f_name_with_mismatch".'
-            ' Expected "f_with_name_mismatch" in module "test_data".',
+            ' Expected "f_with_name_mismatch"in module "test_data".',
         ),
     ]
 
@@ -482,16 +480,16 @@ def test_f_with_dummy_argument():
     mfile = os.path.join(DIRNAME, "test_data", "f_with_dummy_argument.m")
     obj = mat_types.MatObject.parse_mfile(mfile, "f_with_dummy_argument", "")
     assert obj.name == "f_with_dummy_argument"
-    assert obj.retv == ["obj"]
-    assert obj.args == ["~", "name"]
-    assert obj.docstring == " Could be a callback, where first argument is ignored.\n"
+    assert list(obj.retv.keys()) == ["obj"]
+    assert list(obj.args.keys()) == ["~", "name"]
+    assert obj.docstring == "Could be a callback, where first argument is ignored."
 
 
 def test_f_with_string_ellipsis():
     mfile = os.path.join(DIRNAME, "test_data", "f_with_string_ellipsis.m")
     obj = mat_types.MatObject.parse_mfile(mfile, "f_with_string_ellipsis", "test_data")
     assert obj.name == "f_with_string_ellipsis"
-    assert obj.docstring == " A function with a string with ellipsis\n"
+    assert obj.docstring == "A function with a string with ellipsis"
 
 
 def test_ClassWithFunctionVariable():
@@ -500,7 +498,7 @@ def test_ClassWithFunctionVariable():
         mfile, "ClassWithFunctionVariable", "test_data"
     )
     assert obj.name == "ClassWithFunctionVariable"
-    assert obj.docstring == " This line contains functions!\n"
+    assert obj.docstring == "This line contains functions!"
     methods = set(obj.methods.keys())
     assert methods == {"ClassWithFunctionVariable", "anotherMethodWithFunctions"}
 
@@ -562,7 +560,7 @@ def test_ClassWithAttributes():
     obj = mat_types.MatObject.parse_mfile(mfile, "ClassWithAttributes", "test_data")
     assert isinstance(obj, mat_types.MatClass)
     assert obj.name == "ClassWithAttributes"
-    assert obj.attrs == {"Sealed": True}
+    assert obj.attrs == {"Sealed": None}
 
 
 # Fails when running with other test files. Warnings are already logged.
@@ -608,8 +606,8 @@ def test_f_with_function_variable():
         mfile, "f_with_function_variable", "test_data"
     )
     assert obj.name == "f_with_function_variable"
-    assert obj.retv == ["obj"]
-    assert obj.args == ["the_functions", "~"]
+    assert list(obj.retv.keys()) == ["obj"]
+    assert list(obj.args.keys()) == ["the_functions", "~"]
     print(obj.docstring)
 
 
@@ -620,12 +618,7 @@ def test_ClassWithGetterSetter():
     assert obj.name == "ClassWithGetterSetter"
     assert list(obj.methods.keys()) == ["ClassWithGetterSetter"]
     assert obj.properties == {
-        "a": {
-            "docstring": " A nice property",
-            "attrs": {},
-            "default": None,
-            "specs": "",
-        }
+        "a": {"docstring": "A nice property", "attrs": {}, "default": None}
     }
 
 
@@ -657,9 +650,9 @@ def test_ClassWithDummyArguments():
     assert obj.name == "ClassWithDummyArguments"
     assert set(obj.methods.keys()) == set(["someMethod1", "someMethod2"])
     m1 = obj.methods["someMethod1"]
-    assert m1.args == ["obj", "argument"]
+    assert list(m1.args.keys()) == ["obj", "argument"]
     m2 = obj.methods["someMethod2"]
-    assert m2.args == ["~", "argument"]
+    assert list(m2.args.keys()) == ["~", "argument"]
 
 
 def test_ClassFolderClassdef():
@@ -669,9 +662,9 @@ def test_ClassFolderClassdef():
     assert obj.name == "ClassFolder"
     assert set(obj.methods.keys()) == set(["ClassFolder", "method_inside_classdef"])
     m1 = obj.methods["ClassFolder"]
-    assert m1.args == ["p"]
+    assert list(m1.args.keys()) == ["p"]
     m2 = obj.methods["method_inside_classdef"]
-    assert m2.args == ["obj", "a", "b"]
+    assert list(m2.args.keys()) == ["obj", "a", "b"]
 
 
 def test_ClassWithMethodsWithSpaces():
@@ -683,10 +676,9 @@ def test_ClassWithMethodsWithSpaces():
     assert obj.name == "ClassWithMethodsWithSpaces"
     assert set(obj.methods.keys()) == set(["static_method"])
     assert (
-        obj.docstring
-        == " Class with methods that have space after the function name.\n"
+        obj.docstring == "Class with methods that have space after the function name."
     )
-    assert obj.methods["static_method"].attrs == {"Static": True}
+    assert obj.methods["static_method"].attrs == {"Static": None}
 
 
 def test_ClassContainingParfor():
@@ -695,7 +687,7 @@ def test_ClassContainingParfor():
     assert isinstance(obj, mat_types.MatClass)
     assert obj.name == "ClassContainingParfor"
     assert set(obj.methods.keys()) == set(["test"])
-    assert obj.docstring == " Parfor is a keyword\n"
+    assert obj.docstring == "Parfor is a keyword"
 
 
 def test_ClassWithStringEllipsis():
@@ -704,7 +696,7 @@ def test_ClassWithStringEllipsis():
     assert isinstance(obj, mat_types.MatClass)
     assert obj.name == "ClassWithStringEllipsis"
     assert set(obj.methods.keys()) == set(["test"])
-    assert obj.docstring == " Contains ellipsis in string\n"
+    assert obj.docstring == "Contains ellipsis in string"
 
 
 def test_ClassLongProperty():
@@ -712,13 +704,13 @@ def test_ClassLongProperty():
     obj = mat_types.MatObject.parse_mfile(mfile, "ClassLongProperty", "test_data")
     assert obj.name == "ClassLongProperty"
     assert (
-        obj.docstring == " test class property with long docstring\n\n "
-        ":param a: the input to :class:`ClassExample`\n"
+        obj.docstring == "test class property with long docstring\n\n"
+        ":param a: the input to :class:`ClassExample`"
     )
-    assert obj.properties["a"]["docstring"] == " short description"
+    assert obj.properties["a"]["docstring"] == "short description"
     assert (
-        obj.properties["b"]["docstring"] == " A property with a long "
-        "documentation\n This is the second line\n And a third\n"
+        obj.properties["b"]["docstring"] == "A property with a long "
+        "documentation\nThis is the second line\nAnd a third"
     )
     assert obj.properties["c"]["docstring"] is None
 
@@ -730,10 +722,10 @@ def test_ClassWithLongPropertyDocstrings():
     )
     assert obj.name == "ClassWithLongPropertyDocstrings"
     assert (
-        obj.properties["a"]["docstring"] == " This line is deleted\n"
-        " This line documents another property\n"
+        obj.properties["a"]["docstring"] == "This line is deleted"
+        "This line documents another property"
     )
-    assert obj.properties["b"]["docstring"] == " Document this property\n"
+    assert obj.properties["b"]["docstring"] == "Document this property"
 
 
 def test_ClassWithLongPropertyTrailingEmptyDocstrings():
@@ -745,10 +737,10 @@ def test_ClassWithLongPropertyTrailingEmptyDocstrings():
     )
     assert obj.name == "ClassWithLongPropertyTrailingEmptyDocstrings"
     assert (
-        obj.properties["a"]["docstring"] == " This line is deleted\n"
-        " This line documents another property\n"
+        obj.properties["a"]["docstring"] == "This line is deleted"
+        "This line documents another property"
     )
-    assert obj.properties["b"]["docstring"] == " Document this property\n"
+    assert obj.properties["b"]["docstring"] == "Document this property"
 
 
 def test_ClassWithPropertyValidators():
@@ -757,10 +749,10 @@ def test_ClassWithPropertyValidators():
         mfile, "ClassWithPropertyValidators", "test_data"
     )
     assert obj.name == "ClassWithPropertyValidators"
-    assert obj.properties["Location"]["docstring"] == " The location\n"
-    assert obj.properties["Label"]["docstring"] == " The label\n"
-    assert obj.properties["State"]["docstring"] == " The state\n"
-    assert obj.properties["ReportLevel"]["docstring"] == " The report level\n"
+    assert obj.properties["Location"]["docstring"] == "The location"
+    assert obj.properties["Label"]["docstring"] == "The label"
+    assert obj.properties["State"]["docstring"] == "The state"
+    assert obj.properties["ReportLevel"]["docstring"] == "The report level"
 
 
 def test_ClassWithTrailingCommentAfterBases():
@@ -769,18 +761,18 @@ def test_ClassWithTrailingCommentAfterBases():
         mfile, "ClassWithTrailingCommentAfterBases", "test_data"
     )
     assert obj.name == "ClassWithTrailingCommentAfterBases"
-    assert obj.bases == ["handle", "my.super.Class"]
+    assert obj.bases == [("handle",), ("my", "super", "Class")]
     assert (
         obj.docstring
-        == " test class methods\n\n :param a: the input to :class:`ClassWithTrailingCommentAfterBases`\n"
+        == "test class methods\n\n:param a: the input to :class:`ClassWithTrailingCommentAfterBases`"
     )
     mymethod = obj.methods["mymethod"]
     assert mymethod.name == "mymethod"
-    assert mymethod.retv == ["c"]
-    assert mymethod.args == ["obj", "b"]
+    assert list(mymethod.retv.keys()) == ["c"]
+    assert list(mymethod.args.keys()) == ["obj", "b"]
     assert (
         mymethod.docstring
-        == " a method in :class:`ClassWithTrailingCommentAfterBases`\n\n :param b: an input to :meth:`mymethod`\n"
+        == "a method in :class:`ClassWithTrailingCommentAfterBases`\n\n:param b: an input to :meth:`mymethod`"
     )
 
 
@@ -790,28 +782,28 @@ def test_ClassWithEllipsisProperties():
         mfile, "ClassWithEllipsisProperties", "test_data"
     )
     assert obj.name == "ClassWithEllipsisProperties"
-    assert obj.bases == ["handle"]
-    assert obj.docstring == " stuff\n"
+    assert obj.bases == [("handle",)]
+    assert obj.docstring == "stuff"
     assert len(obj.methods) == 0
 
-    assert obj.properties["A"]["docstring"] == " an expression with ellipsis"
+    assert obj.properties["A"]["docstring"] == "an expression with ellipsis"
     assert obj.properties["A"]["default"] == "1+2+3+4+5"
     assert (
         obj.properties["B"]["docstring"]
-        == " a cell array with ellipsis and other array notation"
+        == "a cell array with ellipsis and other array notation"
     )
     assert obj.properties["B"]["default"].startswith("{'hello','bye';")
     assert obj.properties["B"]["default"].endswith("}")
-    assert obj.properties["C"]["docstring"] == " using end inside array"
+    assert obj.properties["C"]["docstring"] == "using end inside array"
     assert obj.properties["C"]["default"] == "ClassWithEllipsisProperties.B(2:end,1)"
-    assert obj.properties["D"]["docstring"] == " String with line continuation"
+    assert obj.properties["D"]["docstring"] == "String with line continuation"
     assert obj.properties["D"]["default"] == "'...'"
-    assert obj.properties["E"]["docstring"] == " The string with spaces"
+    assert obj.properties["E"]["docstring"] == "The string with spaces"
     assert obj.properties["E"]["default"] == "'some string with spaces'"
 
 
 #         mymethod.docstring
-#         == " a method in :class:`ClassWithTrailingCommentAfterBases`\n\n :param b: an input to :meth:`mymethod`\n"
+#         == " a method in :class:`ClassWithTrailingCommentAfterBases`\n\n :param b: an input to :meth:`mymethod`"
 #     )
 
 
@@ -833,9 +825,9 @@ def test_ClassWithTrailingSemicolons():
     )
     assert (
         obj.docstring
-        == " Smoothing like it is performed withing Cxx >v7.0 (until v8.2 at least).\n Uses constant 228p_12k frequency vector:\n"
+        == "Smoothing like it is performed withing Cxx >v7.0 (until v8.2 at least).\nUses constant 228p_12k frequency vector:"
     )
-    assert obj.bases == ["hgsetget"]
+    assert obj.bases == [("hgsetget",)]
     assert list(obj.methods.keys()) == [
         "ClassWithTrailingSemicolons",
         "CxxSmoothing",
@@ -863,7 +855,7 @@ def test_ClassWithSeperatedComments():
     assert obj.bases == []
     assert "prop" in obj.properties
     prop = obj.properties["prop"]
-    assert prop["docstring"] == " Another comment\n"
+    assert prop["docstring"] == "Another comment"
 
 
 def test_ClassWithKeywordsAsFieldnames():
@@ -878,19 +870,19 @@ def test_ClassWithKeywordsAsFieldnames():
     assert "c" in obj.properties
     assert "calculate" in obj.methods
     meth = obj.methods["calculate"]
-    assert meth.docstring == " Returns the value of `d`\n"
+    assert meth.docstring == "Returns the value of `d`"
 
 
 def test_ClassWithNamedAsArguments():
     mfile = os.path.join(TESTDATA_ROOT, "arguments.m")
     obj = mat_types.MatObject.parse_mfile(mfile, "arguments", "test_data")
     assert obj.name == "arguments"
-    assert obj.bases == ["handle", "matlab.mixin.Copyable"]
+    assert obj.bases == [("handle",), ("matlab", "mixin", "Copyable")]
     assert "value" in obj.properties
     meth = obj.methods["arguments"]
-    assert meth.docstring == " Constructor for arguments\n"
+    assert meth.docstring == "Constructor for arguments"
     meth = obj.methods["add"]
-    assert meth.docstring == " Add new argument\n"
+    assert meth.docstring == "Add new argument"
 
 
 def test_ClassWithPropertyCellValues():
@@ -908,10 +900,10 @@ def test_ClassWithTests():
     mfile = os.path.join(TESTDATA_ROOT, "ClassWithTests.m")
     obj = mat_types.MatObject.parse_mfile(mfile, "ClassWithTests", "test_data")
     assert obj.name == "ClassWithTests"
-    assert obj.bases == ["matlab.unittest.TestCase"]
+    assert obj.bases == [("matlab", "unittest", "TestCase")]
     assert "testRunning" in obj.methods
     testRunning = obj.methods["testRunning"]
-    assert testRunning.attrs["TestTags"] == ["'Unit'"]
+    assert testRunning.attrs["TestTags"] == ["{'Unit'}"]
 
 
 if __name__ == "__main__":

From b349ea7fe33a780395fe4f150124b30e6f0ad399 Mon Sep 17 00:00:00 2001
From: Anton Pozharskiy <apozharski@gmail.com>
Date: Wed, 14 Aug 2024 10:45:36 +0200
Subject: [PATCH 18/45] rm textmate parser on this branch

---
 sphinxcontrib/mat_textmate_parser.py | 642 ---------------------------
 1 file changed, 642 deletions(-)
 delete mode 100644 sphinxcontrib/mat_textmate_parser.py

diff --git a/sphinxcontrib/mat_textmate_parser.py b/sphinxcontrib/mat_textmate_parser.py
deleted file mode 100644
index 46236ce..0000000
--- a/sphinxcontrib/mat_textmate_parser.py
+++ /dev/null
@@ -1,642 +0,0 @@
-from textmate_grammar.parsers.matlab import MatlabParser
-import re
-
-# rpath = "../../../syscop/software/nosnoc/+nosnoc/Options.m"
-
-rpath = "/home/anton/tools/matlabdomain/tests/roots/test_autodoc/target/ClassExample.m"
-
-
-def find_first_child(curr, tok, attr="children"):
-    tok_lst = getattr(curr, attr)
-    ind = [i for i in range(len(tok_lst)) if tok_lst[i].token == tok]
-    if not ind:
-        return (None, None)
-    return (tok_lst[ind[0]], ind[0])
-
-
-def _toks_on_same_line(tok1, tok2):
-    """Note: pass the tokens in order they appear in case of multiline tokens, otherwise this may return incorrect results"""
-    line1 = _get_last_line_of_tok(tok1)
-    line2 = _get_first_line_of_tok(tok2)
-    return line1 == line2
-
-
-def _is_empty_line_between_tok(tok1, tok2):
-    """Note: pass tokens in order they appear"""
-    line1 = _get_last_line_of_tok(tok1)
-    line2 = _get_first_line_of_tok(tok2)
-    return line2 - line1 > 1
-
-
-def _get_first_line_of_tok(tok):
-    return min([loc[0] for loc in tok.characters.keys()])
-
-
-def _get_last_line_of_tok(tok):
-    return max([loc[0] for loc in tok.characters.keys()])
-
-
-class MatFunctionParser:
-    def __init__(self, fun_tok):
-        """Parse Function definition"""
-        # First find the function name
-        name_gen = fun_tok.find(tokens="entity.name.function.matlab")
-        try:
-            name_tok, _ = next(name_gen)
-            self.name = name_tok.content
-        except StopIteration:
-            # TODO correct error here
-            raise Exception("Couldn't find function name")
-
-        # Find outputs and parameters
-        output_gen = fun_tok.find(tokens="variable.parameter.output.matlab")
-        param_gen = fun_tok.find(tokens="variable.parameter.input.matlab")
-
-        self.outputs = {}
-        self.params = {}
-        self.attrs = {}
-
-        for out, _ in output_gen:
-            self.outputs[out.content] = {}
-
-        for param, _ in param_gen:
-            self.params[param.content] = {}
-
-        # find arguments blocks
-        arg_section = None
-        for arg_section, _ in fun_tok.find(tokens="meta.arguments.matlab"):
-            self._parse_argument_section(arg_section)
-
-        fun_decl_gen = fun_tok.find(tokens="meta.function.declaration.matlab")
-        try:
-            fun_decl_tok, _ = next(fun_decl_gen)
-        except StopIteration:
-            raise Exception(
-                "missing function declaration"
-            )  # This cant happen as we'd be missing a function name
-
-        # Now parse for docstring
-        docstring = ""
-        comment_toks = fun_tok.findall(
-            tokens=["comment.line.percentage.matlab", "comment.block.percentage.matlab"]
-        )
-        last_tok = arg_section if arg_section is not None else fun_decl_tok
-
-        for comment_tok, _ in comment_toks:
-            if _is_empty_line_between_tok(last_tok, comment_tok):
-                # If we have non-consecutive tokens quit right away.
-                break
-            elif (
-                not docstring and comment_tok.token == "comment.block.percentage.matlab"
-            ):
-                # If we have no previous docstring lines and a comment block we take
-                # the comment block as the docstring and exit.
-                docstring = comment_tok.content.strip()[
-                    2:-2
-                ].strip()  # [2,-2] strips out block comment delimiters
-                break
-            elif comment_tok.token == "comment.line.percentage.matlab":
-                # keep parsing comments
-                docstring += comment_tok.content[1:] + "\n"
-            else:
-                # we are done.
-                break
-            last_tok = comment_tok
-
-        self.docstring = docstring if docstring else None
-
-    def _parse_argument_section(self, section):
-        modifiers = [
-            mod.content
-            for mod, _ in section.find(tokens="storage.modifier.arguments.matlab")
-        ]
-        arg_def_gen = section.find(tokens="meta.assignment.definition.property.matlab")
-        for arg_def, _ in arg_def_gen:
-            arg_name = arg_def.begin[
-                0
-            ].content  # Get argument name that is being defined
-            self._parse_argument_validation(arg_name, arg_def, modifiers)
-
-    def _parse_argument_validation(self, arg_name, arg, modifiers):
-        # TODO This should be identical to propery validation I think. Refactor
-        # First get the size if found
-        section = self.output if "Output" in modifiers else self.params
-        size_gen = arg.find(tokens="meta.parens.size.matlab", depth=1)
-        try:  # We have a size, therefore parse the comma separated list into tuple
-            size_tok, _ = next(size_gen)
-            size_elem_gen = size_tok.find(
-                tokens=[
-                    "constant.numeric.decimal.matlab",
-                    "keyword.operator.vector.colon.matlab",
-                ],
-                depth=1,
-            )
-            size = tuple([elem[0].content for elem in size_elem_gen])
-            section[arg_name]["size"] = size
-        except StopIteration:
-            pass
-
-        # Now find the type if it exists
-        # TODO this should be mapped to known types (though perhaps as a postprocess)
-        type_gen = arg.find(tokens="storage.type.matlab", depth=1)
-        try:
-            section[arg_name]["type"] = next(type_gen)[0].content
-        except StopIteration:
-            pass
-
-        # Now find list of validators
-        validator_gen = arg.find(tokens="meta.block.validation.matlab", depth=1)
-        try:
-            validator_tok, _ = next(validator_gen)
-            validator_toks = validator_tok.findall(
-                tokens="variable.other.readwrite.matlab", depth=1
-            )  # TODO Probably bug here in MATLAB-Language-grammar
-            section[arg_name]["validators"] = [tok[0].content for tok in validator_toks]
-        except StopIteration:
-            pass
-
-
-class MatClassParser:
-    def __init__(self, tokens):
-        # DATA
-        self.name = ""
-        self.supers = []
-        self.attrs = {}
-        self.docstring = ""
-        self.properties = {}
-        self.methods = {}
-        self.enumerations = {}
-
-        self.parsed = tokens
-        self.cls, _ = find_first_child(self.parsed, "meta.class.matlab")
-        if not self.cls:
-            raise Exception()  # TODO better exception
-        self.clsdef, _ = find_first_child(self.cls, "meta.class.declaration.matlab")
-        self._parse_clsdef()
-        self._find_class_docstring()
-
-        property_sections = self.cls.findall(tokens="meta.properties.matlab", depth=1)
-        method_sections = self.cls.findall(tokens="meta.methods.matlab", depth=1)
-        enumeration_sections = self.cls.findall(tokens="meta.enum.matlab", depth=1)
-
-        for section, _ in property_sections:
-            self._parse_property_section(section)
-
-        for section, _ in method_sections:
-            self._parse_method_section(section)
-
-        for section, _ in enumeration_sections:
-            self._parse_enum_section(section)
-
-        import pdb
-
-        pdb.set_trace()
-
-    def _find_class_docstring(self):
-        try:
-            possible_comment_tok = self.cls.children[1]
-        except IndexError:
-            return
-
-        if possible_comment_tok.token == "comment.line.percentage.matlab":
-            self._docstring_lines()
-        elif possible_comment_tok.token == "comment.block.percentage.matlab":
-            self.docstring = possible_comment_tok.content.strip()[
-                2:-2
-            ].strip()  # [2,-2] strips out block comment delimiters
-        else:
-            pass
-
-    def _docstring_lines(self):
-        idx = 1
-        cls_children = self.cls.children
-
-        while (
-            idx < len(cls_children)
-            and cls_children[idx].token == "comment.line.percentage.matlab"
-        ):
-            self.docstring += (
-                cls_children[idx].content[1:] + "\n"
-            )  # [1:] strips out percent sign
-            idx += 1
-        self.docstring = self.docstring.strip()
-
-    def _parse_clsdef(self):
-        # Try parsing attrs
-        attrs_tok_gen = self.clsdef.find(tokens="storage.modifier.section.class.matlab")
-        try:
-            attrs_tok, _ = next(attrs_tok_gen)
-            self._parse_class_attributes(attrs_tok)
-        except StopIteration:
-            pass
-
-        # Parse classname
-        classname_tok_gen = self.clsdef.find(tokens="entity.name.type.class.matlab")
-        try:
-            classname_tok, _ = next(classname_tok_gen)
-            self.name = classname_tok.content
-        except StopIteration:
-            print("ClassName not found")  # TODO this is probably fatal
-
-        # Parse interited classes
-        parent_class_toks = self.clsdef.findall(tokens="meta.inherited-class.matlab")
-
-        for parent_class_tok, _ in parent_class_toks:
-            sections = parent_class_tok.findall(
-                tokens=[
-                    "entity.name.namespace.matlab",
-                    "entity.other.inherited-class.matlab",
-                ]
-            )
-            super_cls = tuple([sec.content for sec, _ in sections])
-            self.supers.append(super_cls)
-        # Parse Attributes TODO maybe there is a smarter way to do this?
-        idx = 0
-        while self.clsdef.children[idx].token == "storage.modifier.class.matlab":
-            attr_tok = self.clsdef.children[idx]
-            attr = attr_tok.content
-            val = None  # TODO maybe do some typechecking here or we can assume that you give us valid Matlab
-            idx += 1
-            if attr_tok.token == "keyword.operator.assignment.matlab":  # pull out r.h.s
-                idx += 1
-                val = self.clsdef.children[idx].content
-                idx += 1
-            if (
-                attr_tok.token == "punctuation.separator.modifier.comma.matlab"
-            ):  # skip commas
-                idx += 1
-            self.attrs[attr] = val
-
-    def _parse_class_attributes(self, attrs_tok):
-        # walk down child list and parse manually
-        # TODO perhaps contribute a delimited list find to textmate-grammar-python
-        children = attrs_tok.children
-        idx = 0
-        while idx < len(children):
-            child_tok = children[idx]
-            if child_tok.token == "storage.modifier.class.matlab":
-                attr = child_tok.content
-                val = None
-                idx += 1  # walk to next token
-                try:  # however we may have walked off the end of the list in which case we exit
-                    maybe_assign_tok = children[idx]
-                except:
-                    self.attrs[attr] = val
-                    break
-                if maybe_assign_tok.token == "keyword.operator.assignment.matlab":
-                    idx += 1
-                    rhs_tok = children[idx]  # parse right hand side
-                    if rhs_tok.token == "meta.cell.literal.matlab":
-                        # A cell. For now just take the whole cell as value.
-                        # TODO parse out the cell array of metaclass literals.
-                        val = "{" + rhs_tok.content + "}"
-                        idx += 1
-                    elif rhs_tok.token == "constant.language.boolean.matlab":
-                        val = rhs_tok.content
-                        idx += 1
-                    elif rhs_tok.token == "keyword.operator.other.question.matlab":
-                        idx += 1
-                        metaclass_tok = children[idx]
-                        metaclass_components = metaclass_tok.findall(
-                            tokens=[
-                                "entity.name.namespace.matlab",
-                                "entity.other.class.matlab",
-                            ]
-                        )
-                        val = tuple([comp.content for comp, _ in metaclass_components])
-                    else:
-                        pass
-                self.attrs[attr] = val
-            else:  # Comma or continuation therefore skip
-                idx += 1
-
-    def _parse_property_section(self, section):
-        # TODO parse property section attrs
-        attrs = self._parse_attributes(section)
-        idxs = [
-            i
-            for i in range(len(section.children))
-            if section.children[i].token == "meta.assignment.definition.property.matlab"
-        ]
-        for idx in idxs:
-            prop_tok = section.children[idx]
-            prop_name = prop_tok.begin[0].content
-            self.properties[prop_name] = {"attrs": attrs}  # Create entry for property
-            self._parse_property_validation(
-                prop_name, prop_tok
-            )  # Parse property validation.
-
-            # Try to find a default assignment:
-            default = None
-            _, assgn_idx = find_first_child(
-                prop_tok, "keyword.operator.assignment.matlab", attr="end"
-            )
-            if assgn_idx is not None:
-                default = ""
-                assgn_idx += 1  # skip assignment
-                while assgn_idx < len(prop_tok.end):
-                    tok = prop_tok.end[assgn_idx]
-                    assgn_idx += 1
-                    if tok.token in [
-                        "comment.line.percentage.matlab",
-                        "punctuation.terminator.semicolon.matlab",
-                    ]:
-                        break
-                    default += tok.content
-            self.properties[prop_name]["default"] = default
-
-            # Get inline docstring
-            inline_docstring_gen = prop_tok.find(
-                tokens="comment.line.percentage.matlab", attribute="end"
-            )
-            try:
-                inline_docstring_tok, _ = next(inline_docstring_gen)
-                inline_docstring = inline_docstring_tok.content[
-                    1:
-                ]  # strip leading % sign
-            except StopIteration:
-                inline_docstring = None
-
-            # Walk backwards to get preceding docstring.
-            preceding_docstring = ""
-            walk_back_idx = idx - 1
-            next_tok = prop_tok
-            while walk_back_idx >= 0:
-                walk_tok = section.children[walk_back_idx]
-                if _is_empty_line_between_tok(walk_tok, next_tok):
-                    # Once there is an empty line between consecutive tokens we are done.
-                    break
-
-                if (
-                    not preceding_docstring
-                    and walk_tok.token == "comment.block.percentage.matlab"
-                ):
-                    # block comment immediately preceding enum so we are done.
-                    # TODO we might need to do some postprocessing here to handle indents gracefully
-                    preceding_docstring = walk_tok.content.strip()[2:-2]
-                    break
-                elif walk_tok.token == "comment.line.percentage.matlab":
-                    preceding_docstring = (
-                        walk_tok.content[1:] + "\n" + preceding_docstring
-                    )  # [1:] strips %
-                    walk_back_idx -= 1
-                    next_tok = walk_tok
-                elif walk_tok.token == "punctuation.whitespace.comment.leading.matlab":
-                    walk_back_idx -= 1
-                    # Dont update next_tok for whitespace
-                else:
-                    break
-
-            # Walk forwards to get following docstring or inline one.
-            following_docstring = ""
-            walk_fwd_idx = idx + 1
-            prev_tok = prop_tok
-            while walk_fwd_idx < len(section.children):
-                walk_tok = section.children[walk_fwd_idx]
-
-                if _is_empty_line_between_tok(prev_tok, walk_tok):
-                    # Once there is an empty line between consecutive tokens we are done.
-                    break
-
-                if (
-                    not following_docstring
-                    and walk_tok.token == "comment.block.percentage.matlab"
-                ):
-                    # block comment immediately following enum so we are done.
-                    # TODO we might need to do some postprocessing here to handle indents gracefully
-                    following_docstring = walk_tok.content.strip()[2:-2]
-                    break
-                elif walk_tok.token == "comment.line.percentage.matlab":
-                    following_docstring = (
-                        following_docstring + "\n" + walk_tok.content[1:]
-                    )  # [1:] strips %
-                    walk_fwd_idx += 1
-                    prev_tok = walk_tok
-                elif walk_tok.token == "punctuation.whitespace.comment.leading.matlab":
-                    walk_fwd_idx += 1
-                    # Dont update prev_tok for whitespace
-                else:
-                    break
-
-            if preceding_docstring:
-                self.properties[prop_name]["docstring"] = preceding_docstring.strip()
-            elif inline_docstring:
-                self.properties[prop_name]["docstring"] = inline_docstring.strip()
-            elif following_docstring:
-                self.properties[prop_name]["docstring"] = following_docstring.strip()
-            else:
-                self.properties[prop_name]["docstring"] = None
-
-    def _parse_property_validation(self, prop_name, prop):
-        """Parses property validation syntax"""
-        # First get the szize if found
-        size_gen = prop.find(tokens="meta.parens.size.matlab", depth=1)
-        try:  # We have a size, therefore parse the comma separated list into tuple
-            size_tok, _ = next(size_gen)
-            size_elem_gen = size_tok.find(
-                tokens=[
-                    "constant.numeric.decimal.matlab",
-                    "keyword.operator.vector.colon.matlab",
-                ],
-                depth=1,
-            )
-            size = tuple([elem[0].content for elem in size_elem_gen])
-            self.properties[prop_name]["size"] = size
-        except StopIteration:
-            pass
-
-        # Now find the type if it exists
-        # TODO this should be mapped to known types (though perhaps as a postprocess)
-        type_gen = prop.find(tokens="storage.type.matlab", depth=1)
-        try:
-            self.properties[prop_name]["type"] = next(type_gen)[0].content
-        except StopIteration:
-            pass
-
-        # Now find list of validators
-        validator_gen = prop.find(tokens="meta.block.validation.matlab", depth=1)
-        try:
-            validator_tok, _ = next(validator_gen)
-            validator_toks = validator_tok.findall(
-                tokens=[
-                    "variable.other.readwrite.matlab",
-                    "meta.function-call.parens.matlab",
-                ],
-                depth=1,
-            )  # TODO Probably bug here in MATLAB-Language-grammar
-            self.properties[prop_name]["validators"] = [
-                tok[0].content for tok in validator_toks
-            ]
-        except StopIteration:
-            pass
-
-    def _parse_method_section(self, section):
-        attrs = self._parse_attributes(section)
-        idxs = [
-            i
-            for i in range(len(section.children))
-            if section.children[i].token == "meta.function.matlab"
-        ]
-        for idx in idxs:
-            meth_tok = section.children[idx]
-            parsed_function = MatFunctionParser(meth_tok)
-            self.methods[parsed_function.name] = parsed_function
-            self.methods[parsed_function.name].attrs = attrs
-
-    def _parse_enum_section(self, section):
-        idxs = [
-            i
-            for i in range(len(section.children))
-            if section.children[i].token
-            == "meta.assignment.definition.enummember.matlab"
-        ]
-        for idx in idxs:
-            enum_tok = section.children[idx]
-            next_idx = idx
-            enum_name = enum_tok.children[0].content
-            self.enumerations[enum_name] = {}
-            if (
-                idx + 1 < len(section.children)
-                and section.children[idx + 1].token == "meta.parens.matlab"
-            ):  # Parse out args TODO this should be part of enummember assignment definition
-                args = tuple(
-                    [
-                        arg.content
-                        for arg in section.children[idx + 1].children
-                        if arg.token != "punctuation.separator.comma.matlab"
-                    ]
-                )
-                self.enumerations[enum_name]["args"] = args
-                next_idx += 1
-
-            # Walk backwards to get preceding docstring.
-            preceding_docstring = ""
-            walk_back_idx = idx - 1
-            next_tok = enum_tok
-            while walk_back_idx >= 0:
-                walk_tok = section.children[walk_back_idx]
-                if _is_empty_line_between_tok(walk_tok, next_tok):
-                    # Once there is an empty line between consecutive tokens we are done.
-                    break
-
-                if (
-                    not preceding_docstring
-                    and walk_tok.token == "comment.block.percentage.matlab"
-                ):
-                    # block comment immediately preceding enum so we are done.
-                    # TODO we might need to do some postprocessing here to handle indents gracefully
-                    preceding_docstring = walk_tok.content.strip()[2:-2]
-                    break
-                elif walk_tok.token == "comment.line.percentage.matlab":
-                    preceding_docstring = (
-                        walk_tok.content[1:] + "\n" + preceding_docstring
-                    )  # [1:] strips %
-                    walk_back_idx -= 1
-                    next_tok = walk_tok
-                elif walk_tok.token == "punctuation.whitespace.comment.leading.matlab":
-                    walk_back_idx -= 1
-                    # Dont update next_tok for whitespace
-                else:
-                    break
-
-            # Walk forwards to get following docstring or inline one.
-            inline_docstring = ""
-            following_docstring = ""
-            walk_fwd_idx = next_idx + 1
-            prev_tok = section.children[next_idx]
-            while walk_fwd_idx < len(section.children):
-                walk_tok = section.children[walk_fwd_idx]
-
-                if _is_empty_line_between_tok(prev_tok, walk_tok):
-                    # Once there is an empty line between consecutive tokens we are done.
-                    break
-
-                if (
-                    not following_docstring
-                    and walk_tok.token == "comment.block.percentage.matlab"
-                ):
-                    # block comment immediately following enum so we are done.
-                    # TODO we might need to do some postprocessing here to handle indents gracefully
-                    following_docstring = walk_tok.content.strip()[2:-2]
-                    break
-                elif walk_tok.token == "comment.line.percentage.matlab":
-                    # In the case the comment is on the same line as the end of the enum declaration, take it as inline comment and exit.
-                    if _toks_on_same_line(section.children[idx], walk_tok):
-                        inline_docstring = walk_tok.content[1:]
-                        break
-
-                    following_docstring = (
-                        following_docstring + "\n" + walk_tok.content[1:]
-                    )  # [1:] strips %
-                    walk_fwd_idx += 1
-                    prev_tok = walk_tok
-                elif walk_tok.token == "punctuation.whitespace.comment.leading.matlab":
-                    walk_fwd_idx += 1
-                    # Dont update prev_tok for whitespace
-                else:
-                    break
-
-            if preceding_docstring:
-                self.enumerations[enum_name]["docstring"] = preceding_docstring.strip()
-            elif inline_docstring:
-                self.enumerations[enum_name]["docstring"] = inline_docstring.strip()
-            elif following_docstring:
-                self.enumerations[enum_name]["docstring"] = following_docstring.strip()
-            else:
-                self.enumerations[enum_name]["docstring"] = None
-
-    def _parse_attributes(self, section):
-        # walk down child list and parse manually
-        children = section.begin
-        idx = 1
-        attrs = {}
-        while idx < len(children):
-            child_tok = children[idx]
-            if re.match(
-                "storage.modifier.(properties|methods|events).matlab", child_tok.token
-            ):
-                attr = child_tok.content
-                val = None
-                idx += 1  # walk to next token
-                try:  # however we may have walked off the end of the list in which case we exit
-                    maybe_assign_tok = children[idx]
-                except:
-                    attrs[attr] = val
-                    return attrs
-                if maybe_assign_tok.token == "keyword.operator.assignment.matlab":
-                    idx += 1
-                    rhs_tok = children[idx]  # parse right hand side
-                    if rhs_tok.token == "meta.cell.literal.matlab":
-                        # A cell. For now just take the whole cell as value.
-                        # TODO parse out the cell array of metaclass literals.
-                        val = "{" + rhs_tok.content + "}"
-                        idx += 1
-                    elif rhs_tok.token == "constant.language.boolean.matlab":
-                        val = rhs_tok.content
-                        idx += 1
-                    elif rhs_tok.token == "storage.modifier.access.matlab":
-                        val = rhs_tok.content
-                        idx += 1
-                    elif rhs_tok.token == "keyword.operator.other.question.matlab":
-                        idx += 1
-                        metaclass_tok = children[idx]
-                        metaclass_components = metaclass_tok.findall(
-                            tokens=[
-                                "entity.name.namespace.matlab",
-                                "entity.other.class.matlab",
-                            ]
-                        )
-                        val = tuple([comp.content for comp, _ in metaclass_components])
-                    else:
-                        pass
-                attrs[attr] = val
-            else:  # Comma or continuation therefore skip
-                idx += 1
-
-        return attrs
-
-
-if __name__ == "__main__":
-    parser = MatlabParser()
-    toks = parser.parse_file(rpath)
-    cls_parse = MatClassParser(toks)

From 4c9263222d076a07d8c7f66c8a044e8ece6aa492 Mon Sep 17 00:00:00 2001
From: Anton Pozharskiy <apozharski@gmail.com>
Date: Wed, 14 Aug 2024 10:51:04 +0200
Subject: [PATCH 19/45] also install tree-sitter

---
 setup.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/setup.py b/setup.py
index 98fba9a..38a154f 100644
--- a/setup.py
+++ b/setup.py
@@ -10,6 +10,7 @@
     "Pygments>=2.0.1",
     "tree-sitter-matlab>=1.0.1",
     "tree-sitter-python>=0.21.0",
+    "tree-sitter>=0.21.0",
 ]
 
 setup(

From 22f277945ca59d128099369184c801071f5dc0f7 Mon Sep 17 00:00:00 2001
From: Anton Pozharskiy <apozharski@gmail.com>
Date: Wed, 14 Aug 2024 11:12:13 +0200
Subject: [PATCH 20/45] bump required tree-sitter

---
 setup.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/setup.py b/setup.py
index 38a154f..7310620 100644
--- a/setup.py
+++ b/setup.py
@@ -9,8 +9,7 @@
     "Sphinx>=4.0.0",
     "Pygments>=2.0.1",
     "tree-sitter-matlab>=1.0.1",
-    "tree-sitter-python>=0.21.0",
-    "tree-sitter>=0.21.0",
+    "tree-sitter>=0.22.3",
 ]
 
 setup(

From c5b8d38ac6b436d136493b83ec9d383b3e041c72 Mon Sep 17 00:00:00 2001
From: Anton Pozharskiy <apozharski@gmail.com>
Date: Wed, 14 Aug 2024 11:32:39 +0200
Subject: [PATCH 21/45] tree-sitter version bump

---
 setup.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/setup.py b/setup.py
index 7310620..fda2092 100644
--- a/setup.py
+++ b/setup.py
@@ -9,7 +9,7 @@
     "Sphinx>=4.0.0",
     "Pygments>=2.0.1",
     "tree-sitter-matlab>=1.0.1",
-    "tree-sitter>=0.22.3",
+    "tree-sitter>=0.22.0",
 ]
 
 setup(

From 2c235f8d7fb6c6cde7f15df2d28a6aa51653a6f4 Mon Sep 17 00:00:00 2001
From: Anton Pozharskiy <apozharski@gmail.com>
Date: Wed, 14 Aug 2024 13:03:38 +0200
Subject: [PATCH 22/45] dealing with tree-sitter version diffs to maintain py
 3.8 compatibility

---
 sphinxcontrib/mat_tree_sitter_parser.py | 98 ++++++++++++++++---------
 sphinxcontrib/mat_types.py              |  8 +-
 2 files changed, 70 insertions(+), 36 deletions(-)

diff --git a/sphinxcontrib/mat_tree_sitter_parser.py b/sphinxcontrib/mat_tree_sitter_parser.py
index a71ebb5..afae186 100644
--- a/sphinxcontrib/mat_tree_sitter_parser.py
+++ b/sphinxcontrib/mat_tree_sitter_parser.py
@@ -1,3 +1,4 @@
+from importlib.metadata import version
 import tree_sitter_matlab as tsml
 from tree_sitter import Language, Parser
 import re
@@ -8,7 +9,7 @@
 )
 # rpath = "/home/anton/tools/matlabdomain/tests/test_data/submodule/f_empty_output.m"
 
-ML_LANG = Language(tsml.language())
+ML_LANG = Language(tsml.language(), "matlab")
 
 # QUERIES
 q_classdef = ML_LANG.query(
@@ -163,6 +164,21 @@
 re_assign_remove = re.compile(r"^=[ \t]*")
 
 
+def tree_sitter_is_0_21():
+    if not hasattr(tree_sitter_is_0_21, "is_21"):
+        tree_sitter_ver = tuple([int(sec) for sec in version("tree_sitter").split(".")])
+        tree_sitter_is_0_21.is_21 = tree_sitter_ver[1] == 21  # memoize
+    return tree_sitter_is_0_21.is_21
+
+
+def get_row(point):
+    """Get row from point. This api changed from v0.21.3 to v0.22.0"""
+    if tree_sitter_is_0_21():
+        return point[0]
+    else:
+        return point.row
+
+
 def process_text_into_docstring(text):
     docstring = text.decode("utf-8")
     return re.sub(re_percent_remove, "", docstring)
@@ -206,7 +222,7 @@ def __init__(self, root_node):
         docstring = None
         if docstring_node is not None:
             prev_sib = docstring_node.prev_named_sibling
-            if docstring_node.start_point.row - prev_sib.end_point.row <= 1:
+            if get_row(docstring_node.start_point) - get_row(prev_sib.end_point) <= 1:
                 docstring = process_text_into_docstring(docstring_node.text)
 
         if not docstring:
@@ -259,11 +275,14 @@ def _parse_argument_section(self, argblock_node):
                 # comments which means this requires some relatively ugly
                 # processing, but worth it for the ease of the rest of it.
                 prev_sib = docstring_node.prev_named_sibling
-                if docstring_node.start_point.row == prev_sib.end_point.row:
+                if get_row(docstring_node.start_point) == get_row(prev_sib.end_point):
                     # if the docstring is on the same line as the end of the definition only take the inline part
                     docstring = process_text_into_docstring(docstring_node.text)
                     docstring = docstring.split("\n")[0]
-                elif docstring_node.start_point.row - prev_sib.end_point.row <= 1:
+                elif (
+                    get_row(docstring_node.start_point) - get_row(prev_sib.end_point)
+                    <= 1
+                ):
                     # Otherwise take the whole docstring
                     docstring = process_text_into_docstring(docstring_node.text)
 
@@ -274,11 +293,11 @@ def _parse_argument_section(self, argblock_node):
                 # Nothing to be done.
                 pass
             elif next_node.type == "comment":
-                if next_node.start_point.row == arg.end_point.row:
+                if get_row(next_node.start_point) == get_row(arg.end_point):
                     # if the docstring is on the same line as the end of the definition only take the inline part
                     docstring = process_text_into_docstring(next_node.text)
                     docstring = docstring.split("\n")[0]
-                elif next_node.start_point.row - arg.end_point.row <= 1:
+                elif get_row(next_node.start_point) - get_row(arg.end_point) <= 1:
                     # Otherwise take the whole docstring
                     docstring = process_text_into_docstring(next_node.text)
 
@@ -292,16 +311,18 @@ def _parse_argument_section(self, argblock_node):
                 # line then we set the docstring. We also need to check
                 # if the first line of the comment is the same as a
                 # previous argument.
-                if arg.start_point.row - prev_node.end_point.row <= 1:
+                if get_row(arg.start_point) - get_row(prev_node.end_point) <= 1:
                     ds = process_text_into_docstring(prev_node.text)
                     prev_arg = prev_node.prev_named_sibling
                     if prev_arg is not None and prev_arg.type == "property":
-                        if prev_node.start_point.row == prev_arg.end_point.row:
+                        if get_row(prev_node.start_point) == get_row(
+                            prev_arg.end_point
+                        ):
                             ds = "\n".join(ds.split("\n")[1:])
                     if ds:
                         docstring = ds
                 else:
-                    if arg.start_point.row - prev_node.end_point.row <= 1:
+                    if get_row(arg.start_point) - get_row(prev_node.end_point) <= 1:
                         docstring = process_text_into_docstring(prev_node.text)
             elif prev_node.type == "property":
                 # The previous argumentnode may have eaten our comment
@@ -312,11 +333,10 @@ def _parse_argument_section(self, argblock_node):
                     # we now need to check if prev_comment ends on the line
                     # before ours and trim the first line if it on the same
                     # line as prev property.
-                    if arg.start_point.row - prev_comment.end_point.row <= 1:
+                    if get_row(arg.start_point) - get_row(prev_comment.end_point) <= 1:
                         ds = process_text_into_docstring(prev_comment.text)
-                        if (
-                            prev_comment.start_point.row
-                            == prev_comment.prev_named_sibling.end_point.row
+                        if get_row(prev_comment.start_point) == get_row(
+                            prev_comment.prev_named_sibling.end_point
                         ):
                             ds = "\n".join(ds.split("\n")[1:])
                         if ds:
@@ -396,7 +416,7 @@ def __init__(self, root_node):
         docstring_node = class_match.get("docstring")
         if docstring_node is not None:
             prev_node = docstring_node.prev_sibling
-            if docstring_node.start_point.row - prev_node.end_point.row <= 1:
+            if get_row(docstring_node.start_point) - get_row(prev_node.end_point) <= 1:
                 self.docstring = process_text_into_docstring(docstring_node.text)
 
         prop_matches = q_properties.matches(self.cls)
@@ -457,11 +477,14 @@ def _parse_property_section(self, props_match):
                 # comments which means this requires some relatively ugly
                 # processing, but worth it for the ease of the rest of it.
                 prev_sib = docstring_node.prev_named_sibling
-                if docstring_node.start_point.row == prev_sib.end_point.row:
+                if get_row(docstring_node.start_point) == get_row(prev_sib.end_point):
                     # if the docstring is on the same line as the end of the definition only take the inline part
                     docstring = process_text_into_docstring(docstring_node.text)
                     docstring = docstring.split("\n")[0]
-                elif docstring_node.start_point.row - prev_sib.end_point.row <= 1:
+                elif (
+                    get_row(docstring_node.start_point) - get_row(prev_sib.end_point)
+                    <= 1
+                ):
                     # Otherwise take the whole docstring
                     docstring = process_text_into_docstring(docstring_node.text)
 
@@ -472,11 +495,11 @@ def _parse_property_section(self, props_match):
                 # Nothing to be done.
                 pass
             elif next_node.type == "comment":
-                if next_node.start_point.row == prop.end_point.row:
+                if get_row(next_node.start_point) == get_row(prop.end_point):
                     # if the docstring is on the same line as the end of the definition only take the inline part
                     docstring = process_text_into_docstring(next_node.text)
                     docstring = docstring.split("\n")[0]
-                elif next_node.start_point.row - prop.end_point.row <= 1:
+                elif get_row(next_node.start_point) - get_row(prop.end_point) <= 1:
                     # Otherwise take the whole docstring
                     docstring = process_text_into_docstring(next_node.text)
 
@@ -490,16 +513,18 @@ def _parse_property_section(self, props_match):
                 # line then we set the docstring. We also need to check
                 # if the first line of the comment is the same as a
                 # previous property.
-                if prop.start_point.row - prev_node.end_point.row <= 1:
+                if get_row(prop.start_point) - get_row(prev_node.end_point) <= 1:
                     ds = process_text_into_docstring(prev_node.text)
                     prev_prop = prev_node.prev_named_sibling
                     if prev_prop is not None and prev_prop.type == "property":
-                        if prev_node.start_point.row == prev_prop.end_point.row:
+                        if get_row(prev_node.start_point) == get_row(
+                            prev_prop.end_point
+                        ):
                             ds = "\n".join(ds.split("\n")[1:])
                     if ds:
                         docstring = ds
                 else:
-                    if prop.start_point.row - prev_node.end_point.row <= 1:
+                    if get_row(prop.start_point) - get_row(prev_node.end_point) <= 1:
                         docstring = process_text_into_docstring(prev_node.text)
             elif prev_node.type == "property":
                 # The previous property node may have eaten our comment
@@ -510,11 +535,10 @@ def _parse_property_section(self, props_match):
                     # we now need to check if prev_comment ends on the line
                     # before ours and trim the first line if it on the same
                     # line as prev property.
-                    if prop.start_point.row - prev_comment.end_point.row <= 1:
+                    if get_row(prop.start_point) - get_row(prev_comment.end_point) <= 1:
                         ds = process_text_into_docstring(prev_comment.text)
-                        if (
-                            prev_comment.start_point.row
-                            == prev_comment.prev_named_sibling.end_point.row
+                        if get_row(prev_comment.start_point) == get_row(
+                            prev_comment.prev_named_sibling.end_point
                         ):
                             ds = "\n".join(ds.split("\n")[1:])
                         if ds:
@@ -562,11 +586,11 @@ def _parse_enum_section(self, enums_match):
             # look forward for docstring
             next_node = enum.next_named_sibling
             if next_node is not None and next_node.type == "comment":
-                if next_node.start_point.row == enum.end_point.row:
+                if get_row(next_node.start_point) == get_row(enum.end_point):
                     # if the docstring is on the same line as the end of the definition only take the inline part
                     docstring = process_text_into_docstring(next_node.text)
                     docstring = docstring.split("\n")[0]
-                elif next_node.start_point.row - enum.end_point.row <= 1:
+                elif get_row(next_node.start_point) - get_row(enum.end_point) <= 1:
                     # Otherwise take the whole docstring
                     docstring = process_text_into_docstring(next_node.text)
 
@@ -580,16 +604,18 @@ def _parse_enum_section(self, enums_match):
                 # line then we set the docstring. We also need to check
                 # if the first line of the comment is the same as a
                 # previous enum.
-                if enum.start_point.row - prev_node.end_point.row <= 1:
+                if get_row(enum.start_point) - get_row(prev_node.end_point) <= 1:
                     ds = process_text_into_docstring(prev_node.text)
                     prev_enum = prev_node.prev_named_sibling
                     if prev_enum is not None and prev_enum.type == "enum":
-                        if prev_node.start_point.row == prev_enum.end_point.row:
+                        if get_row(prev_node.start_point) == get_row(
+                            prev_enum.end_point
+                        ):
                             ds = "\n".join(ds.split("\n")[1:])
                     if ds:
                         docstring = ds
                 else:
-                    if enum.start_point.row - prev_node.end_point.row <= 1:
+                    if get_row(enum.start_point) - get_row(prev_node.end_point) <= 1:
                         docstring = process_text_into_docstring(prev_node.text)
             # After all that if our docstring is empty then we have none
             if docstring.strip() == "":
@@ -612,11 +638,11 @@ def _parse_event_section(self, events_match):
             # look forward for docstring
             next_node = event.next_named_sibling
             if next_node is not None and next_node.type == "comment":
-                if next_node.start_point.row == event.end_point.row:
+                if get_row(next_node.start_point) == get_row(event.end_point):
                     # if the docstring is on the same line as the end of the definition only take the inline part
                     docstring = process_text_into_docstring(next_node.text)
                     docstring = docstring.split("\n")[0]
-                elif next_node.start_point.row - event.end_point.row <= 1:
+                elif get_row(next_node.start_point) - get_row(event.end_point) <= 1:
                     # Otherwise take the whole docstring
                     docstring = process_text_into_docstring(next_node.text)
 
@@ -630,16 +656,18 @@ def _parse_event_section(self, events_match):
                 # line then we set the docstring. We also need to check
                 # if the first line of the comment is the same as a
                 # previous event.
-                if event.start_point.row - prev_node.end_point.row <= 1:
+                if get_row(event.start_point) - get_row(prev_node.end_point) <= 1:
                     ds = process_text_into_docstring(prev_node.text)
                     prev_event = prev_node.prev_named_sibling
                     if prev_event is not None and prev_event.type == "identifier":
-                        if prev_node.start_point.row == prev_event.end_point.row:
+                        if get_row(prev_node.start_point) == get_row(
+                            prev_event.end_point
+                        ):
                             ds = "\n".join(ds.split("\n")[1:])
                     if ds:
                         docstring = ds
                 else:
-                    if event.start_point.row - prev_node.end_point.row <= 1:
+                    if get_row(event.start_point) - get_row(prev_node.end_point) <= 1:
                         docstring = process_text_into_docstring(prev_node.text)
             # After all that if our docstring is empty then we have none
             if docstring.strip() == "":
diff --git a/sphinxcontrib/mat_types.py b/sphinxcontrib/mat_types.py
index 0a12cf1..5f5210e 100644
--- a/sphinxcontrib/mat_types.py
+++ b/sphinxcontrib/mat_types.py
@@ -28,6 +28,7 @@
 from pathlib import Path
 import cProfile
 import pstats
+from importlib.metadata import version
 
 logger = sphinx.util.logging.getLogger("matlab-domain")
 
@@ -512,7 +513,12 @@ def parse_mfile(mfile, name, path, encoding=None):
         full_code = code
 
         # parse the file
-        parser = Parser(ML_LANG)
+        tree_sitter_ver = tuple([int(sec) for sec in version("tree_sitter").split(".")])
+        if tree_sitter_ver[1] == 21:
+            parser = Parser()
+            parser.set_language(ML_LANG)
+        else:
+            parser = Parser(ML_LANG)
         tree = parser.parse(code)
 
         modname = path.replace(os.sep, ".")  # module name

From 2f36c14c4c7e4eb701c9a95d1f01cbb698d2a0d9 Mon Sep 17 00:00:00 2001
From: Anton Pozharskiy <apozharski@gmail.com>
Date: Wed, 14 Aug 2024 13:06:24 +0200
Subject: [PATCH 23/45] ML_LANG versions

---
 setup.py                                | 2 +-
 sphinxcontrib/mat_tree_sitter_parser.py | 6 +++++-
 2 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/setup.py b/setup.py
index fda2092..78acc7e 100644
--- a/setup.py
+++ b/setup.py
@@ -9,7 +9,7 @@
     "Sphinx>=4.0.0",
     "Pygments>=2.0.1",
     "tree-sitter-matlab>=1.0.1",
-    "tree-sitter>=0.22.0",
+    "tree-sitter>=0.21.3",
 ]
 
 setup(
diff --git a/sphinxcontrib/mat_tree_sitter_parser.py b/sphinxcontrib/mat_tree_sitter_parser.py
index afae186..0821de1 100644
--- a/sphinxcontrib/mat_tree_sitter_parser.py
+++ b/sphinxcontrib/mat_tree_sitter_parser.py
@@ -9,7 +9,11 @@
 )
 # rpath = "/home/anton/tools/matlabdomain/tests/test_data/submodule/f_empty_output.m"
 
-ML_LANG = Language(tsml.language(), "matlab")
+tree_sitter_ver = tuple([int(sec) for sec in version("tree_sitter").split(".")])
+if tree_sitter_ver[1] == 21:
+    ML_LANG = Language(tsml.language(), "matlab")
+else:
+    ML_LANG = Language(tsml.language())
 
 # QUERIES
 q_classdef = ML_LANG.query(

From c55b5d127252733f934d1a7a166c0369389feb93 Mon Sep 17 00:00:00 2001
From: Anton Pozharskiy <apozharski@gmail.com>
Date: Thu, 15 Aug 2024 15:30:40 +0200
Subject: [PATCH 24/45] a better attributes query

---
 sphinxcontrib/mat_tree_sitter_parser.py | 37 ++++++++++++++++++-------
 1 file changed, 27 insertions(+), 10 deletions(-)

diff --git a/sphinxcontrib/mat_tree_sitter_parser.py b/sphinxcontrib/mat_tree_sitter_parser.py
index 0821de1..579455e 100644
--- a/sphinxcontrib/mat_tree_sitter_parser.py
+++ b/sphinxcontrib/mat_tree_sitter_parser.py
@@ -4,10 +4,8 @@
 import re
 
 # rpath = "../../../syscop/software/nosnoc/+nosnoc/Options.m"
-rpath = (
-    "/home/anton/tools/matlabdomain/tests/test_data/submodule/f_ellipsis_empty_output.m"
-)
-# rpath = "/home/anton/tools/matlabdomain/tests/test_data/submodule/f_empty_output.m"
+rpath = "/home/anton/tools/matlabdomain/tests/test_data/ClassWithMethodAttributes.m"
+# rpath = "/home/anton/tools/matlabdomain/tests/test_data/f_with_dummy_argument.m"
 
 tree_sitter_ver = tuple([int(sec) for sec in version("tree_sitter").split(".")])
 if tree_sitter_ver[1] == 21:
@@ -36,7 +34,17 @@
 """
 )
 
-q_attributes = ML_LANG.query("""(attribute (identifier) @name (_)? @value)""")
+q_attributes = ML_LANG.query(
+    """(attribute
+    (identifier) @name
+    [
+        (identifier) @value
+        (string) @value
+        (metaclass_operator) @value
+        (cell) @value
+    ]?)
+    """
+)
 
 q_supers = ML_LANG.query("""[(identifier) @secs "."]+ """)
 
@@ -110,7 +118,7 @@
         [
             (identifier) @outputs
             (multioutput_variable
-                [(identifier) @outputs _]+
+                [[(identifier) (ignored_argument)] @outputs _]+
             )
         ]
     )?
@@ -118,7 +126,7 @@
     name: (identifier) @name
     _*
     (function_arguments
-        [(identifier) @params _]*
+        [(identifier) @params (ignored_argument) @params _]*
     )?
     _*
     [(arguments_statement) @argblocks _]*
@@ -685,6 +693,7 @@ def _parse_attributes(self, attrs_nodes):
         attrs = {}
         if attrs_nodes is not None:
             for attr_node in attrs_nodes:
+                print(attr_node.sexp())
                 _, attr_match = q_attributes.matches(attr_node)[0]
                 name = attr_match.get("name").text.decode("utf-8")
                 value_node = attr_match.get("value")
@@ -695,11 +704,19 @@ def _parse_attributes(self, attrs_nodes):
 
 
 if __name__ == "__main__":
-    parser = Parser(ML_LANG)
+    tree_sitter_ver = tuple([int(sec) for sec in version("tree_sitter").split(".")])
+    if tree_sitter_ver[1] == 21:
+        parser = Parser()
+        parser.set_language(ML_LANG)
+    else:
+        parser = Parser(ML_LANG)
 
     with open(rpath, "rb") as f:
         data = f.read()
 
     tree = parser.parse(data)
-    # class_parser = MatClassParser(tree.root_node)
-    fun_parser = MatFunctionParser(tree.root_node)
+    class_parser = MatClassParser(tree.root_node)
+    # fun_parser = MatFunctionParser(tree.root_node)
+    import pdb
+
+    pdb.set_trace()

From 46c0e4167d494d9b0d1d68645b7767c4259701a4 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?J=C3=B8rgen=20Cederberg?= <jorgen@cederberg.be>
Date: Wed, 24 Jul 2024 15:04:27 +0200
Subject: [PATCH 25/45] Ci: Test on Sphinx 8 / Dev. (#259)

* CI: Testing for latest Sphinx (8.0)

* CI: Fix helper class version checking.
---
 .github/workflows/python-package.yml | 22 ++++++++++++++++++++++
 1 file changed, 22 insertions(+)

diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml
index 41775ab..ee7aae6 100644
--- a/.github/workflows/python-package.yml
+++ b/.github/workflows/python-package.yml
@@ -54,6 +54,28 @@ jobs:
         tox -e "${{matrix.python-version}}-sphinx${{matrix.sphinx-version}}-pygments${{matrix.pygments-version}}"
 
 
+  test-sphinx-latest:
+    name: Run tests for Python ${{ matrix.python-version }}, Sphinx ${{ matrix.sphinx-version }},  Pygments ${{ matrix.pygments-version }}
+    timeout-minutes: 5
+    runs-on: ubuntu-latest
+    strategy:
+      matrix:
+        python-version: ["3.10", "3.11", "3.12"]
+        sphinx-version: ["dev"]
+        pygments-version: ["latest"]
+    steps:
+    - uses: actions/checkout@v4
+    - name: Set up Python ${{ matrix.python-version }}
+      uses: actions/setup-python@v5
+      with:
+        python-version: ${{ matrix.python-version }}
+        architecture: x64
+    - name: Run with Tox
+      run: |
+        pip install tox==4.8.0
+        tox -e "${{matrix.python-version}}-sphinx${{matrix.sphinx-version}}-pygments${{matrix.pygments-version}}"
+
+
   test-sphinx-latest:
     name: Run tests for Python ${{ matrix.python-version }}, Sphinx ${{ matrix.sphinx-version }},  Pygments ${{ matrix.pygments-version }}
     timeout-minutes: 5

From cf88ba4f5821c24fc9fcbe78bd28849e78b3f26c Mon Sep 17 00:00:00 2001
From: Anton Pozharskiy <apozharski@gmail.com>
Date: Thu, 15 Aug 2024 20:25:21 +0200
Subject: [PATCH 26/45] Fixing nearly all tests in test_parse_mfile

---
 sphinxcontrib/mat_tree_sitter_parser.py | 219 +++++++++++++++++-------
 sphinxcontrib/mat_types.py              | 160 ++---------------
 tests/test_parse_mfile.py               |  80 +++++----
 3 files changed, 217 insertions(+), 242 deletions(-)

diff --git a/sphinxcontrib/mat_tree_sitter_parser.py b/sphinxcontrib/mat_tree_sitter_parser.py
index 579455e..43828e4 100644
--- a/sphinxcontrib/mat_tree_sitter_parser.py
+++ b/sphinxcontrib/mat_tree_sitter_parser.py
@@ -4,7 +4,7 @@
 import re
 
 # rpath = "../../../syscop/software/nosnoc/+nosnoc/Options.m"
-rpath = "/home/anton/tools/matlabdomain/tests/test_data/ClassWithMethodAttributes.m"
+rpath = "/home/anton/tools/matlabdomain/tests/test_data/ClassWithGetterSetter.m"
 # rpath = "/home/anton/tools/matlabdomain/tests/test_data/f_with_dummy_argument.m"
 
 tree_sitter_ver = tuple([int(sec) for sec in version("tree_sitter").split(".")])
@@ -41,8 +41,10 @@
         (identifier) @value
         (string) @value
         (metaclass_operator) @value
-        (cell) @value
-    ]?)
+        (cell (row [(metaclass_operator) @value _]*))
+        (cell (row [(string) @value _]*))
+    ]? @rhs
+    )
     """
 )
 
@@ -171,6 +173,16 @@
 """
 )
 
+q_script = ML_LANG.query(
+    """
+    (source_file
+        (comment) @docstring
+    )
+    """
+)
+
+q_get_set = ML_LANG.query("""["get." "set."]""")
+
 
 re_percent_remove = re.compile(r"^[ \t]*% ?", flags=re.M)
 re_assign_remove = re.compile(r"^=[ \t]*")
@@ -191,35 +203,51 @@ def get_row(point):
         return point.row
 
 
-def process_text_into_docstring(text):
-    docstring = text.decode("utf-8")
+def process_text_into_docstring(text, encoding):
+    docstring = text.decode(encoding)
     return re.sub(re_percent_remove, "", docstring)
 
 
-def process_default(text):
-    default = text.decode("utf-8")
+def process_default(text, encoding):
+    default = text.decode(encoding)
     return re.sub(re_assign_remove, "", default)
 
 
+class MatScriptParser:
+    def __init__(self, root_node, encoding):
+        """Parse m script"""
+        self.encoding = encoding
+        _, script_match = q_script.matches(root_node)[0]
+        docstring_node = script_match.get("docstring")
+        if docstring_node is not None:
+            self.docstring = process_text_into_docstring(
+                docstring_node.text, self.encoding
+            )
+        else:
+            self.docstring = None
+        print(self.docstring)
+
+
 class MatFunctionParser:
-    def __init__(self, root_node):
+    def __init__(self, root_node, encoding):
         """Parse Function definition"""
+        self.encoding = encoding
         _, fun_match = q_fun.matches(root_node)[0]
-        self.name = fun_match.get("name").text.decode("utf-8")
+        self.name = fun_match.get("name").text.decode(self.encoding)
 
         # Get outputs (possibly more than one)
-        self.outputs = {}
+        self.retv = {}
         output_nodes = fun_match.get("outputs")
         if output_nodes is not None:
-            outputs = [output.text.decode("utf-8") for output in output_nodes]
-            for output in outputs:
-                self.outputs[output] = {}
+            retv = [output.text.decode(self.encoding) for output in output_nodes]
+            for output in retv:
+                self.retv[output] = {}
 
         # Get parameters
         self.args = {}
         arg_nodes = fun_match.get("params")
         if arg_nodes is not None:
-            args = [arg.text.decode("utf-8") for arg in arg_nodes]
+            args = [arg.text.decode(self.encoding) for arg in arg_nodes]
             for arg in args:
                 self.args[arg] = {}
 
@@ -235,7 +263,9 @@ def __init__(self, root_node):
         if docstring_node is not None:
             prev_sib = docstring_node.prev_named_sibling
             if get_row(docstring_node.start_point) - get_row(prev_sib.end_point) <= 1:
-                docstring = process_text_into_docstring(docstring_node.text)
+                docstring = process_text_into_docstring(
+                    docstring_node.text, self.encoding
+                )
 
         if not docstring:
             docstring = None
@@ -255,28 +285,32 @@ def _parse_argument_section(self, argblock_node):
             _, arg_match = q_arg.matches(arg)[0]
 
             # extract name (this is always available so no need for None check)
-            name = [name.text.decode("utf-8") for name in arg_match.get("name")]
+            name = [name.text.decode(self.encoding) for name in arg_match.get("name")]
 
             # extract dims list
             dims_list = arg_match.get("dims")
             dims = None
             if dims_list is not None:
-                dims = tuple([dim.text.decode("utf-8") for dim in dims_list])
+                dims = tuple([dim.text.decode(self.encoding) for dim in dims_list])
 
             # extract type
             type_node = arg_match.get("type")
-            typename = type_node.text.decode("utf-8") if type_node is not None else None
+            typename = (
+                type_node.text.decode(self.encoding) if type_node is not None else None
+            )
 
             # extract validator functions
             vf_list = arg_match.get("validator_functions")
             vfs = None
             if vf_list is not None:
-                vfs = [vf.text.decode("utf-8") for vf in vf_list]
+                vfs = [vf.text.decode(self.encoding) for vf in vf_list]
 
             # extract default
             default_node = arg_match.get("default")
             default = (
-                process_default(default_node.text) if default_node is not None else None
+                process_default(default_node.text, self.encoding)
+                if default_node is not None
+                else None
             )
 
             # extract inline or following docstring if there is no semicolon
@@ -289,14 +323,18 @@ def _parse_argument_section(self, argblock_node):
                 prev_sib = docstring_node.prev_named_sibling
                 if get_row(docstring_node.start_point) == get_row(prev_sib.end_point):
                     # if the docstring is on the same line as the end of the definition only take the inline part
-                    docstring = process_text_into_docstring(docstring_node.text)
+                    docstring = process_text_into_docstring(
+                        docstring_node.text, self.encoding
+                    )
                     docstring = docstring.split("\n")[0]
                 elif (
                     get_row(docstring_node.start_point) - get_row(prev_sib.end_point)
                     <= 1
                 ):
                     # Otherwise take the whole docstring
-                    docstring = process_text_into_docstring(docstring_node.text)
+                    docstring = process_text_into_docstring(
+                        docstring_node.text, self.encoding
+                    )
 
             # extract inline or following docstring if there _is_ a semicolon.
             # this is only done if we didn't already find a docstring with the previous approach
@@ -307,11 +345,15 @@ def _parse_argument_section(self, argblock_node):
             elif next_node.type == "comment":
                 if get_row(next_node.start_point) == get_row(arg.end_point):
                     # if the docstring is on the same line as the end of the definition only take the inline part
-                    docstring = process_text_into_docstring(next_node.text)
+                    docstring = process_text_into_docstring(
+                        next_node.text, self.encoding
+                    )
                     docstring = docstring.split("\n")[0]
                 elif get_row(next_node.start_point) - get_row(arg.end_point) <= 1:
                     # Otherwise take the whole docstring
-                    docstring = process_text_into_docstring(next_node.text)
+                    docstring = process_text_into_docstring(
+                        next_node.text, self.encoding
+                    )
 
             # override docstring with prior if exists
             prev_node = arg.prev_named_sibling
@@ -324,7 +366,7 @@ def _parse_argument_section(self, argblock_node):
                 # if the first line of the comment is the same as a
                 # previous argument.
                 if get_row(arg.start_point) - get_row(prev_node.end_point) <= 1:
-                    ds = process_text_into_docstring(prev_node.text)
+                    ds = process_text_into_docstring(prev_node.text, self.encoding)
                     prev_arg = prev_node.prev_named_sibling
                     if prev_arg is not None and prev_arg.type == "property":
                         if get_row(prev_node.start_point) == get_row(
@@ -335,7 +377,9 @@ def _parse_argument_section(self, argblock_node):
                         docstring = ds
                 else:
                     if get_row(arg.start_point) - get_row(prev_node.end_point) <= 1:
-                        docstring = process_text_into_docstring(prev_node.text)
+                        docstring = process_text_into_docstring(
+                            prev_node.text, self.encoding
+                        )
             elif prev_node.type == "property":
                 # The previous argumentnode may have eaten our comment
                 # check for it a trailing comment. If it is not there
@@ -346,7 +390,9 @@ def _parse_argument_section(self, argblock_node):
                     # before ours and trim the first line if it on the same
                     # line as prev property.
                     if get_row(arg.start_point) - get_row(prev_comment.end_point) <= 1:
-                        ds = process_text_into_docstring(prev_comment.text)
+                        ds = process_text_into_docstring(
+                            prev_comment.text, self.encoding
+                        )
                         if get_row(prev_comment.start_point) == get_row(
                             prev_comment.prev_named_sibling.end_point
                         ):
@@ -361,7 +407,7 @@ def _parse_argument_section(self, argblock_node):
 
             # Here we trust that the person is giving us valid matlab.
             if "Output" in attrs.keys():
-                arg_loc = self.outputs
+                arg_loc = self.retv
             else:
                 arg_loc = self.args
             if len(name) == 1:
@@ -383,17 +429,20 @@ def _parse_attributes(self, attrs_nodes):
         if attrs_nodes is not None:
             for attr_node in attrs_nodes:
                 _, attr_match = q_attributes.matches(attr_node)[0]
-                name = attr_match.get("name").text.decode("utf-8")
+                name = attr_match.get("name").text.decode(self.encoding)
                 value_node = attr_match.get("value")
                 attrs[name] = (
-                    value_node.text.decode("utf-8") if value_node is not None else None
+                    value_node.text.decode(self.encoding)
+                    if value_node is not None
+                    else None
                 )
         return attrs
 
 
 class MatClassParser:
-    def __init__(self, root_node):
+    def __init__(self, root_node, encoding):
         # DATA
+        self.encoding = encoding
         self.name = ""
         self.supers = []
         self.attrs = {}
@@ -420,7 +469,7 @@ def __init__(self, root_node):
             for super_node in supers_nodes:
                 _, super_match = q_supers.matches(super_node)[0]
                 super_cls = tuple(
-                    [sec.text.decode("utf-8") for sec in super_match.get("secs")]
+                    [sec.text.decode(self.encoding) for sec in super_match.get("secs")]
                 )
                 self.supers.append(super_cls)
 
@@ -429,7 +478,9 @@ def __init__(self, root_node):
         if docstring_node is not None:
             prev_node = docstring_node.prev_sibling
             if get_row(docstring_node.start_point) - get_row(prev_node.end_point) <= 1:
-                self.docstring = process_text_into_docstring(docstring_node.text)
+                self.docstring = process_text_into_docstring(
+                    docstring_node.text, self.encoding
+                )
 
         prop_matches = q_properties.matches(self.cls)
         method_matches = q_methods.matches(self.cls)
@@ -457,28 +508,32 @@ def _parse_property_section(self, props_match):
             _, prop_match = q_property.matches(prop)[0]
 
             # extract name (this is always available so no need for None check)
-            name = prop_match.get("name").text.decode("utf-8")
+            name = prop_match.get("name").text.decode(self.encoding)
 
             # extract dims list
             dims_list = prop_match.get("dims")
             dims = None
             if dims_list is not None:
-                dims = tuple([dim.text.decode("utf-8") for dim in dims_list])
+                dims = tuple([dim.text.decode(self.encoding) for dim in dims_list])
 
             # extract type
             type_node = prop_match.get("type")
-            typename = type_node.text.decode("utf-8") if type_node is not None else None
+            typename = (
+                type_node.text.decode(self.encoding) if type_node is not None else None
+            )
 
             # extract validator functions
             vf_list = prop_match.get("validator_functions")
             vfs = None
             if vf_list is not None:
-                vfs = [vf.text.decode("utf-8") for vf in vf_list]
+                vfs = [vf.text.decode(self.encoding) for vf in vf_list]
 
             # extract default
             default_node = prop_match.get("default")
             default = (
-                process_default(default_node.text) if default_node is not None else None
+                process_default(default_node.text, self.encoding)
+                if default_node is not None
+                else None
             )
 
             # extract inline or following docstring if there is no semicolon
@@ -491,29 +546,37 @@ def _parse_property_section(self, props_match):
                 prev_sib = docstring_node.prev_named_sibling
                 if get_row(docstring_node.start_point) == get_row(prev_sib.end_point):
                     # if the docstring is on the same line as the end of the definition only take the inline part
-                    docstring = process_text_into_docstring(docstring_node.text)
+                    docstring = process_text_into_docstring(
+                        docstring_node.text, self.encoding
+                    )
                     docstring = docstring.split("\n")[0]
                 elif (
                     get_row(docstring_node.start_point) - get_row(prev_sib.end_point)
                     <= 1
                 ):
                     # Otherwise take the whole docstring
-                    docstring = process_text_into_docstring(docstring_node.text)
+                    docstring = process_text_into_docstring(
+                        docstring_node.text, self.encoding
+                    )
 
             # extract inline or following docstring if there _is_ a semicolon.
             # this is only done if we didn't already find a docstring with the previous approach
             next_node = prop.next_named_sibling
-            if next_node is None or docstring is not None:
+            if next_node is None or docstring != "":
                 # Nothing to be done.
                 pass
             elif next_node.type == "comment":
                 if get_row(next_node.start_point) == get_row(prop.end_point):
                     # if the docstring is on the same line as the end of the definition only take the inline part
-                    docstring = process_text_into_docstring(next_node.text)
+                    docstring = process_text_into_docstring(
+                        next_node.text, self.encoding
+                    )
                     docstring = docstring.split("\n")[0]
                 elif get_row(next_node.start_point) - get_row(prop.end_point) <= 1:
                     # Otherwise take the whole docstring
-                    docstring = process_text_into_docstring(next_node.text)
+                    docstring = process_text_into_docstring(
+                        next_node.text, self.encoding
+                    )
 
             # override docstring with prior if exists
             prev_node = prop.prev_named_sibling
@@ -526,18 +589,21 @@ def _parse_property_section(self, props_match):
                 # if the first line of the comment is the same as a
                 # previous property.
                 if get_row(prop.start_point) - get_row(prev_node.end_point) <= 1:
-                    ds = process_text_into_docstring(prev_node.text)
+                    ds = process_text_into_docstring(prev_node.text, self.encoding)
                     prev_prop = prev_node.prev_named_sibling
                     if prev_prop is not None and prev_prop.type == "property":
                         if get_row(prev_node.start_point) == get_row(
                             prev_prop.end_point
                         ):
                             ds = "\n".join(ds.split("\n")[1:])
+
                     if ds:
                         docstring = ds
                 else:
                     if get_row(prop.start_point) - get_row(prev_node.end_point) <= 1:
-                        docstring = process_text_into_docstring(prev_node.text)
+                        docstring = process_text_into_docstring(
+                            prev_node.text, self.encoding
+                        )
             elif prev_node.type == "property":
                 # The previous property node may have eaten our comment
                 # check for it a trailing comment. If it is not there
@@ -548,7 +614,9 @@ def _parse_property_section(self, props_match):
                     # before ours and trim the first line if it on the same
                     # line as prev property.
                     if get_row(prop.start_point) - get_row(prev_comment.end_point) <= 1:
-                        ds = process_text_into_docstring(prev_comment.text)
+                        ds = process_text_into_docstring(
+                            prev_comment.text, self.encoding
+                        )
                         if get_row(prev_comment.start_point) == get_row(
                             prev_comment.prev_named_sibling.end_point
                         ):
@@ -577,7 +645,11 @@ def _parse_method_section(self, methods_match):
         attrs_nodes = methods_match.get("attrs")
         attrs = self._parse_attributes(attrs_nodes)
         for method in methods:
-            parsed_function = MatFunctionParser(method)
+            is_set_get = q_get_set.matches(method)
+            # Skip getter and setter
+            if len(is_set_get) > 0:
+                continue
+            parsed_function = MatFunctionParser(method, self.encoding)
             self.methods[parsed_function.name] = parsed_function
             self.methods[parsed_function.name].attrs = attrs
 
@@ -587,10 +659,10 @@ def _parse_enum_section(self, enums_match):
             return
         for enum in enums:
             _, enum_match = q_enum.matches(enum)[0]
-            name = enum_match.get("name").text.decode("utf-8")
+            name = enum_match.get("name").text.decode(self.encoding)
             arg_nodes = enum_match.get("args")
             if arg_nodes is not None:
-                args = [arg.text.decode("utf-8") for arg in arg_nodes]
+                args = [arg.text.decode(self.encoding) for arg in arg_nodes]
             else:
                 args = None
 
@@ -600,11 +672,15 @@ def _parse_enum_section(self, enums_match):
             if next_node is not None and next_node.type == "comment":
                 if get_row(next_node.start_point) == get_row(enum.end_point):
                     # if the docstring is on the same line as the end of the definition only take the inline part
-                    docstring = process_text_into_docstring(next_node.text)
+                    docstring = process_text_into_docstring(
+                        next_node.text, self.encoding
+                    )
                     docstring = docstring.split("\n")[0]
                 elif get_row(next_node.start_point) - get_row(enum.end_point) <= 1:
                     # Otherwise take the whole docstring
-                    docstring = process_text_into_docstring(next_node.text)
+                    docstring = process_text_into_docstring(
+                        next_node.text, self.encoding
+                    )
 
             # override docstring with prior if exists
             prev_node = enum.prev_named_sibling
@@ -617,7 +693,7 @@ def _parse_enum_section(self, enums_match):
                 # if the first line of the comment is the same as a
                 # previous enum.
                 if get_row(enum.start_point) - get_row(prev_node.end_point) <= 1:
-                    ds = process_text_into_docstring(prev_node.text)
+                    ds = process_text_into_docstring(prev_node.text, self.encoding)
                     prev_enum = prev_node.prev_named_sibling
                     if prev_enum is not None and prev_enum.type == "enum":
                         if get_row(prev_node.start_point) == get_row(
@@ -628,7 +704,9 @@ def _parse_enum_section(self, enums_match):
                         docstring = ds
                 else:
                     if get_row(enum.start_point) - get_row(prev_node.end_point) <= 1:
-                        docstring = process_text_into_docstring(prev_node.text)
+                        docstring = process_text_into_docstring(
+                            prev_node.text, self.encoding
+                        )
             # After all that if our docstring is empty then we have none
             if docstring.strip() == "":
                 docstring == None
@@ -644,7 +722,7 @@ def _parse_event_section(self, events_match):
         if events is None:
             return
         for event in events:
-            name = event.text.decode("utf-8")
+            name = event.text.decode(self.encoding)
 
             docstring = ""
             # look forward for docstring
@@ -652,11 +730,15 @@ def _parse_event_section(self, events_match):
             if next_node is not None and next_node.type == "comment":
                 if get_row(next_node.start_point) == get_row(event.end_point):
                     # if the docstring is on the same line as the end of the definition only take the inline part
-                    docstring = process_text_into_docstring(next_node.text)
+                    docstring = process_text_into_docstring(
+                        next_node.text, self.encoding
+                    )
                     docstring = docstring.split("\n")[0]
                 elif get_row(next_node.start_point) - get_row(event.end_point) <= 1:
                     # Otherwise take the whole docstring
-                    docstring = process_text_into_docstring(next_node.text)
+                    docstring = process_text_into_docstring(
+                        next_node.text, self.encoding
+                    )
 
             # override docstring with prior if exists
             prev_node = event.prev_named_sibling
@@ -669,7 +751,7 @@ def _parse_event_section(self, events_match):
                 # if the first line of the comment is the same as a
                 # previous event.
                 if get_row(event.start_point) - get_row(prev_node.end_point) <= 1:
-                    ds = process_text_into_docstring(prev_node.text)
+                    ds = process_text_into_docstring(prev_node.text, self.encoding)
                     prev_event = prev_node.prev_named_sibling
                     if prev_event is not None and prev_event.type == "identifier":
                         if get_row(prev_node.start_point) == get_row(
@@ -680,7 +762,9 @@ def _parse_event_section(self, events_match):
                         docstring = ds
                 else:
                     if get_row(event.start_point) - get_row(prev_node.end_point) <= 1:
-                        docstring = process_text_into_docstring(prev_node.text)
+                        docstring = process_text_into_docstring(
+                            prev_node.text, self.encoding
+                        )
             # After all that if our docstring is empty then we have none
             if docstring.strip() == "":
                 docstring == None
@@ -693,13 +777,20 @@ def _parse_attributes(self, attrs_nodes):
         attrs = {}
         if attrs_nodes is not None:
             for attr_node in attrs_nodes:
-                print(attr_node.sexp())
                 _, attr_match = q_attributes.matches(attr_node)[0]
-                name = attr_match.get("name").text.decode("utf-8")
+                name = attr_match.get("name").text.decode(self.encoding)
                 value_node = attr_match.get("value")
-                attrs[name] = (
-                    value_node.text.decode("utf-8") if value_node is not None else None
-                )
+                rhs_node = attr_match.get("rhs")
+                if rhs_node is not None:
+                    if rhs_node.type == "cell":
+                        attrs[name] = [
+                            vn.text.decode(self.encoding) for vn in value_node
+                        ]
+                    else:
+                        attrs[name] = value_node[0].text.decode(self.encoding)
+                else:
+                    attrs[name] = None
+
         return attrs
 
 
@@ -715,7 +806,7 @@ def _parse_attributes(self, attrs_nodes):
         data = f.read()
 
     tree = parser.parse(data)
-    class_parser = MatClassParser(tree.root_node)
+    class_parser = MatClassParser(tree.root_node, self.encoding)
     # fun_parser = MatFunctionParser(tree.root_node)
     import pdb
 
diff --git a/sphinxcontrib/mat_types.py b/sphinxcontrib/mat_types.py
index 5f5210e..cf9a8c2 100644
--- a/sphinxcontrib/mat_types.py
+++ b/sphinxcontrib/mat_types.py
@@ -20,6 +20,7 @@
 from sphinxcontrib.mat_tree_sitter_parser import (
     MatClassParser,
     MatFunctionParser,
+    MatScriptParser,
     ML_LANG,
 )
 import tree_sitter_matlab as tsml
@@ -548,19 +549,17 @@ def isClass(tree):
                 name,
                 modname,
             )
-            return MatClass(name, modname, tree.root_node)
+            return MatClass(name, modname, tree.root_node, encoding)
         elif isFunction(tree):
             logger.debug(
                 "[sphinxcontrib-matlabdomain] parsing function %s from %s.",
                 name,
                 modname,
             )
-            return MatFunction(name, modname, tree.root_node)
+            return MatFunction(name, modname, tree.root_node, encoding)
         else:
-            pass
-            # it's a script file retoken with header comment
-            # tks = list(MatlabLexer().get_tokens(full_code))
-            # return MatScript(name, modname, toks)
+            return MatScript(name, modname, tree.root_node, encoding)
+
         return None
 
     @staticmethod
@@ -876,15 +875,15 @@ class MatFunction(MatObject):
     :type tokens: list
     """
 
-    def __init__(self, name, modname, tokens):
+    def __init__(self, name, modname, tokens, encoding):
         super(MatFunction, self).__init__(name)
-        parsed_function = MatFunctionParser(tokens)
+        parsed_function = MatFunctionParser(tokens, encoding)
         #: Path of folder containing :class:`MatObject`.
         self.module = modname
         #: docstring
         self.docstring = parsed_function.docstring
         #: output args
-        self.retv = parsed_function.outputs
+        self.retv = parsed_function.retv
         #: input args
         self.args = parsed_function.args
         #: remaining tokens after main function is parsed
@@ -925,9 +924,9 @@ class MatClass(MatMixin, MatObject):
     :type tokens: list
     """
 
-    def __init__(self, name, modname, tokens):
+    def __init__(self, name, modname, tokens, encoding):
         super(MatClass, self).__init__(name)
-        parsed_class = MatClassParser(tokens)
+        parsed_class = MatClassParser(tokens, encoding)
         #: Path of folder containing :class:`MatObject`.
         self.module = modname
         #: dictionary of class attributes
@@ -974,105 +973,6 @@ def link(self, env, name=None):
         else:
             return f":class:`{target}`"
 
-    def attributes(self, idx, attr_types):
-        """
-        Retrieve MATLAB class, property and method attributes.
-        """
-        attr_dict = {}
-        idx += self._blanks(idx)  # skip blanks
-        # class, property & method "attributes" start with parenthesis
-        if self._tk_eq(idx, (Token.Punctuation, "(")):
-            idx += 1
-            # closing parenthesis terminates attributes
-            while self._tk_ne(idx, (Token.Punctuation, ")")):
-                idx += self._blanks(idx)  # skip blanks
-
-                k, attr_name = self.tokens[idx]  # split token key, value
-                if k is Token.Name and attr_name in attr_types:
-                    attr_dict[attr_name] = True  # add attibute to dictionary
-                    idx += 1
-                elif k is Token.Name:
-                    logger.warning(
-                        "[sphinxcontrib-matlabdomain] Unexpected class attribute: '%s'. "
-                        " In '%s.%s'.",
-                        str(self.tokens[idx][1]),
-                        self.module,
-                        self.name,
-                    )
-                    idx += 1
-
-                idx += self._blanks(idx)  # skip blanks
-
-                # Continue if attribute is assigned a boolean value
-                if self.tokens[idx][0] == Token.Name.Builtin:
-                    idx += 1
-                    continue
-
-                # continue to next attribute separated by commas
-                if self._tk_eq(idx, (Token.Punctuation, ",")):
-                    idx += 1
-                    continue
-                # attribute values
-                elif self._tk_eq(idx, (Token.Punctuation, "=")):
-                    idx += 1
-                    idx += self._blanks(idx)  # skip blanks
-                    k, attr_val = self.tokens[idx]  # split token key, value
-                    if k is Token.Name and attr_val in ["true", "false"]:
-                        # logical value
-                        if attr_val == "false":
-                            attr_dict[attr_name] = False
-                        idx += 1
-                    elif k is Token.Name or self._tk_eq(idx, (Token.Text, "?")):
-                        # concatenate enumeration or meta class
-                        enum_or_meta = self.tokens[idx][1]
-                        idx += 1
-                        while (
-                            self._tk_ne(idx, (Token.Text, " "))
-                            and self._tk_ne(idx, (Token.Text, "\t"))
-                            and self._tk_ne(idx, (Token.Punctuation, ","))
-                            and self._tk_ne(idx, (Token.Punctuation, ")"))
-                        ):
-                            enum_or_meta += self.tokens[idx][1]
-                            idx += 1
-                        if self._tk_ne(idx, (Token.Punctuation, ")")):
-                            idx += 1
-                        attr_dict[attr_name] = enum_or_meta
-                    # cell array of values
-                    elif self._tk_eq(idx, (Token.Punctuation, "{")):
-                        idx += 1
-                        # closing curly braces terminate cell array
-                        attr_dict[attr_name] = []
-                        while self._tk_ne(idx, (Token.Punctuation, "}")):
-                            idx += self._blanks(idx)  # skip blanks
-                            # concatenate attr value string
-                            attr_val = ""
-                            # TODO: use _blanks or _indent instead
-                            while self._tk_ne(
-                                idx, (Token.Punctuation, ",")
-                            ) and self._tk_ne(idx, (Token.Punctuation, "}")):
-                                attr_val += self.tokens[idx][1]
-                                idx += 1
-                            if self._tk_eq(idx, (Token.Punctuation, ",")):
-                                idx += 1
-                            if attr_val:
-                                attr_dict[attr_name].append(attr_val)
-                        idx += 1
-                    elif (
-                        self.tokens[idx][0] == Token.Literal.String
-                        and self.tokens[idx + 1][0] == Token.Literal.String
-                    ):
-                        # String
-                        attr_val += self.tokens[idx][1] + self.tokens[idx + 1][1]
-                        idx += 2
-                        attr_dict[attr_name] = attr_val.strip("'")
-
-                    idx += self._blanks(idx)  # skip blanks
-                    # continue to next attribute separated by commas
-                    if self._tk_eq(idx, (Token.Punctuation, ",")):
-                        idx += 1
-            idx += 1  # end of class attributes
-        return attr_dict, idx
-
     @property
     def __module__(self):
         return self.module
@@ -1194,49 +1094,15 @@ def __doc__(self):
 
 
 class MatScript(MatObject):
-    def __init__(self, name, modname, tks):
+    def __init__(self, name, modname, tks, encoding):
         super(MatScript, self).__init__(name)
+        parsed_script = MatScriptParser(tks, encoding)
         #: Path of folder containing :class:`MatScript`.
         self.module = modname
         #: List of tokens parsed from mfile by Pygments.
         self.tokens = tks
         #: docstring
-        self.docstring = ""
-        #: remaining tokens after main function is parsed
-        self.rem_tks = None
-
-        tks = copy(self.tokens)  # make a copy of tokens
-        tks.reverse()  # reverse in place for faster popping, stacks are LiLo
-        skip_whitespace(tks)
-        # =====================================================================
-        # docstring
-        try:
-            docstring = tks.pop()
-            # Skip any statements before first documentation header
-            while docstring and docstring[0] is not Token.Comment:
-                docstring = tks.pop()
-        except IndexError:
-            docstring = None
-        while docstring and docstring[0] is Token.Comment:
-            self.docstring += docstring[1].lstrip("%")
-            # Get newline if it exists and append to docstring
-            try:
-                wht = tks.pop()  # We expect a newline
-            except IndexError:
-                break
-            if wht[0] in (Token.Text, Token.Text.Whitespace) and wht[1] == "\n":
-                self.docstring += "\n"
-            # Skip whitespace
-            try:
-                wht = tks.pop()  # We expect a newline
-            except IndexError:
-                break
-            while wht in list(zip((Token.Text,) * 3, (" ", "\t"))):
-                try:
-                    wht = tks.pop()
-                except IndexError:
-                    break
-            docstring = wht  # check if Token is Comment
+        self.docstring = parsed_script.docstring
 
     @property
     def __doc__(self):
diff --git a/tests/test_parse_mfile.py b/tests/test_parse_mfile.py
index b901409..123c2a6 100644
--- a/tests/test_parse_mfile.py
+++ b/tests/test_parse_mfile.py
@@ -24,7 +24,7 @@ def test_ClassExample():
     assert obj.name == "ClassExample"
     assert (
         obj.docstring
-        == " test class methods\n\n:param a: the input to :class:`ClassExample`"
+        == "test class methods\n\n:param a: the input to :class:`ClassExample`"
     )
     mymethod = obj.methods["mymethod"]
     assert mymethod.name == "mymethod"
@@ -105,7 +105,7 @@ def test_no_docstring():
     assert obj.name == "f_no_docstring"
     assert list(obj.retv.keys()) == ["y"]
     assert list(obj.args.keys()) == []
-    assert obj.docstring == ""
+    assert obj.docstring is None
 
 
 def test_no_output():
@@ -183,7 +183,7 @@ def test_ClassWithFunctionArguments():
     mymethod = obj.methods["mymethod"]
     assert mymethod.name == "mymethod"
     assert list(mymethod.retv.keys()) == ["c"]
-    assert mymethod.args.keys() == ["obj", "b"]
+    assert list(mymethod.args.keys()) == ["obj", "b"]
     assert (
         mymethod.docstring
         == "a method in :class:`ClassWithFunctionArguments`\n\n:param b: an input to :meth:`mymethod`"
@@ -248,15 +248,14 @@ def test_script_with_comment_header():
     assert (
         obj.docstring
         == """This is a Comment Header
- Copyright (C) <year>, by <full_name>
-
- Some descriptions ...
+Copyright (C) <year>, by <full_name>
 
- This header and all further comments above the first command line
- of the script will be ignored by the documentation system.
+Some descriptions ...
 
- Lisence (GPL, BSD, etc.)
+This header and all further comments above the first command line
+of the script will be ignored by the documentation system.
 
+Lisence (GPL, BSD, etc.)
 """
     )
 
@@ -269,15 +268,14 @@ def test_script_with_comment_header_2():
     assert (
         obj.docstring
         == """This is a Comment Header
- Copyright (C) <year>, by <full_name>
+Copyright (C) <year>, by <full_name>
 
- Some descriptions ...
+Some descriptions ...
 
- This header and all further comments above the first command line
- of the script will be ignored by the documentation system.
-
- Lisence (GPL, BSD, etc.)
+This header and all further comments above the first command line
+of the script will be ignored by the documentation system.
 
+Lisence (GPL, BSD, etc.)
 """
     )
 
@@ -290,8 +288,7 @@ def test_script_with_comment_header_3():
     assert (
         obj.docstring
         == """This is a Comment Header with empty lines above
- and many line comments.
-
+and many line comments.
 """
     )
 
@@ -304,8 +301,7 @@ def test_script_with_comment_header_4():
     assert (
         obj.docstring
         == """This is a Comment Header with a single instruction above
- and many line comments.
-
+and many line comments.
 """
     )
 
@@ -320,26 +316,34 @@ def test_PropTypeOld():
             "docstring": None,
             "attrs": {},
             "default": "'none'",
-            "specs": "@char",
-        },
+            "size": None,
+            "type": "char",
+            "validators": None,
+        },  # 'type': ['char']
         "pos": {
             "docstring": None,
             "attrs": {},
             "default": "zeros(3,1)",
-            "specs": "@double vector",
-        },
+            "size": None,
+            "type": "vector",
+            "validators": None,
+        },  # 'type': ['double', 'vector'],
         "rotm": {
             "docstring": None,
             "attrs": {},
             "default": "zeros(3,3)",
-            "specs": "@double matrix",
-        },
+            "size": None,
+            "type": "matrix",
+            "validators": None,
+        },  # 'type': ['double', 'matrix'],
         "idx": {
             "docstring": None,
             "attrs": {},
             "default": "0",
-            "specs": "@uint8 scalar",
-        },
+            "size": None,
+            "type": "scalar",
+            "validators": None,
+        },  # 'type': ['uint8', 'scalar'],
     }
 
 
@@ -361,7 +365,7 @@ def test_ClassWithMethodAttributes():
     assert obj.methods["testPublic"].attrs == {"Access": "public"}
     assert obj.methods["testProtected"].attrs == {"Access": "protected"}
     assert obj.methods["testPrivate1"].attrs == {"Access": "private"}
-    assert obj.methods["testPrivate2"].attrs == {"Access": "private"}
+    assert obj.methods["testPrivate2"].attrs == {"Access": "'private'"}
     assert obj.methods["testHidden"].attrs == {"Hidden": None}
     assert obj.methods["testStatic"].attrs == {"Static": None}
     assert obj.methods["testFriend1"].attrs == {"Access": "?OtherClass"}
@@ -618,7 +622,14 @@ def test_ClassWithGetterSetter():
     assert obj.name == "ClassWithGetterSetter"
     assert list(obj.methods.keys()) == ["ClassWithGetterSetter"]
     assert obj.properties == {
-        "a": {"docstring": "A nice property", "attrs": {}, "default": None}
+        "a": {
+            "docstring": "A nice property",
+            "attrs": {},
+            "default": None,
+            "size": None,
+            "type": None,
+            "validators": None,
+        }
     }
 
 
@@ -631,7 +642,14 @@ def test_ClassWithDoubleQuotedString():
     assert obj.name == "ClassWithDoubleQuotedString"
     assert set(obj.methods.keys()) == set(["ClassWithDoubleQuotedString", "method1"])
     assert obj.properties == {
-        "Property1": {"docstring": None, "attrs": {}, "default": None, "specs": ""}
+        "Property1": {
+            "docstring": None,
+            "attrs": {},
+            "default": None,
+            "size": None,
+            "type": None,
+            "validators": None,
+        }
     }
 
 
@@ -903,7 +921,7 @@ def test_ClassWithTests():
     assert obj.bases == [("matlab", "unittest", "TestCase")]
     assert "testRunning" in obj.methods
     testRunning = obj.methods["testRunning"]
-    assert testRunning.attrs["TestTags"] == ["{'Unit'}"]
+    assert testRunning.attrs["TestTags"] == ["'Unit'"]
 
 
 if __name__ == "__main__":

From e12307bde0f7743c116677de9f7ed8cb89b9c537 Mon Sep 17 00:00:00 2001
From: Anton Pozharskiy <apozharski@gmail.com>
Date: Thu, 15 Aug 2024 23:04:39 +0200
Subject: [PATCH 27/45] fix a _lot_ of autodoc

---
 sphinxcontrib/mat_documenters.py |  7 ++++---
 sphinxcontrib/mat_types.py       | 28 +++++++++++++++-------------
 2 files changed, 19 insertions(+), 16 deletions(-)

diff --git a/sphinxcontrib/mat_documenters.py b/sphinxcontrib/mat_documenters.py
index 35fff8c..4c58cca 100644
--- a/sphinxcontrib/mat_documenters.py
+++ b/sphinxcontrib/mat_documenters.py
@@ -1380,10 +1380,11 @@ def format_args(self):
         is_ctor = self.object.cls.name == self.object.name
 
         if self.object.args:
-            if self.object.args[0] in ("obj", "self") and not is_ctor:
-                return "(" + ", ".join(self.object.args[1:]) + ")"
+            arglist = list(self.object.args.keys())
+            if arglist[0] in ("obj", "self") and not is_ctor:
+                return "(" + ", ".join(arglist[1:]) + ")"
             else:
-                return "(" + ", ".join(self.object.args) + ")"
+                return "(" + ", ".join(arglist) + ")"
 
     def document_members(self, all_members=False):
         pass
diff --git a/sphinxcontrib/mat_types.py b/sphinxcontrib/mat_types.py
index cf9a8c2..cf55766 100644
--- a/sphinxcontrib/mat_types.py
+++ b/sphinxcontrib/mat_types.py
@@ -938,7 +938,10 @@ def __init__(self, name, modname, tokens, encoding):
         #: dictionary of class properties
         self.properties = parsed_class.properties
         #: dictionary of class methods
-        self.methods = parsed_class.methods
+        self.methods = {
+            name: MatMethod(name, parsed_fun, modname, self)
+            for (name, parsed_fun) in parsed_class.methods.items()
+        }
         #:
         self.enumerations = parsed_class.enumerations
         #: remaining tokens after main class definition is parsed
@@ -1066,24 +1069,23 @@ def __doc__(self):
         return self.docstring
 
 class MatMethod(MatFunction):
-    def __init__(self, modname, tks, cls, attrs):
-        # set name to None
-        super(MatMethod, self).__init__(None, modname, tks)
+    def __init__(self, name, parsed_function, modname, cls):
+        self.name = name
+        #: Path of folder containing :class:`MatObject`.
+        self.module = modname
+        #: docstring
+        self.docstring = parsed_function.docstring
+        #: output args
+        self.retv = parsed_function.retv
+        #: input args
+        self.args = parsed_function.args
         self.cls = cls
-        self.attrs = attrs
+        self.attrs = parsed_function.attrs
 
     def ref_role(self):
         """Returns role to use for references to this object (e.g. when generating auto-links)"""
         return "meth"
 
-    def skip_tokens(self):
-        # Number of tokens to skip in `MatClass`
-        num_rem_tks = len(self.rem_tks)
-        len_meth = len(self.tokens) - num_rem_tks
-        self.tokens = self.tokens[:-num_rem_tks]
-        self.rem_tks = None
-        return len_meth
-
     @property
     def __module__(self):
         return self.module

From f1ea4f588a334813a82e22bea66fb14e6bbede8c Mon Sep 17 00:00:00 2001
From: Anton Edvinovich Pozharskiy <apozharski@gmail.com>
Date: Fri, 16 Aug 2024 13:08:45 +0200
Subject: [PATCH 28/45] temporarily point to tree-sitter-matlab branch on
 apozharski fork

---
 setup.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/setup.py b/setup.py
index 78acc7e..5d459e6 100644
--- a/setup.py
+++ b/setup.py
@@ -8,7 +8,7 @@
 requires = [
     "Sphinx>=4.0.0",
     "Pygments>=2.0.1",
-    "tree-sitter-matlab>=1.0.1",
+    "tree-sitter-matlab @ git+https://github.com/apozharski/tree-sitter-matlab.git",
     "tree-sitter>=0.21.3",
 ]
 

From 64b2ce236d036ddd9fc7556026f57bba8857fb24 Mon Sep 17 00:00:00 2001
From: Anton Edvinovich Pozharskiy <apozharski@gmail.com>
Date: Fri, 16 Aug 2024 15:05:35 +0200
Subject: [PATCH 29/45] fix old property syntax + update tests

---
 sphinxcontrib/mat_tree_sitter_parser.py | 71 ++++++++++++++++++-------
 tests/test_parse_mfile.py               | 12 ++---
 2 files changed, 59 insertions(+), 24 deletions(-)

diff --git a/sphinxcontrib/mat_tree_sitter_parser.py b/sphinxcontrib/mat_tree_sitter_parser.py
index 43828e4..4ef6285 100644
--- a/sphinxcontrib/mat_tree_sitter_parser.py
+++ b/sphinxcontrib/mat_tree_sitter_parser.py
@@ -4,7 +4,7 @@
 import re
 
 # rpath = "../../../syscop/software/nosnoc/+nosnoc/Options.m"
-rpath = "/home/anton/tools/matlabdomain/tests/test_data/ClassWithGetterSetter.m"
+rpath = "/home/anton/tools/matlabdomain/tests/test_data/PropTypeOld.m"
 # rpath = "/home/anton/tools/matlabdomain/tests/test_data/f_with_dummy_argument.m"
 
 tree_sitter_ver = tuple([int(sec) for sec in version("tree_sitter").split(".")])
@@ -56,7 +56,7 @@
     (attributes
         [(attribute) @attrs _]+
     )?
-    [(property) @properties _]+
+    [(property) @properties (old_property) @properties _]+
     ) @prop_block
 """
 )
@@ -104,6 +104,17 @@
 """
 )
 
+q_old_property = ML_LANG.query(
+    """
+    (old_property name: (identifier) @name
+     (identifier) @type
+     (old_property_type)? @size_type
+     (default_value)? @default
+     (comment)? @docstring
+    )
+"""
+)
+
 q_enum = ML_LANG.query(
     """(enum
     .
@@ -504,17 +515,47 @@ def _parse_property_section(self, props_match):
         attrs_nodes = props_match.get("attrs")
         attrs = self._parse_attributes(attrs_nodes)
         for prop in properties:
-            # match property to extract details
-            _, prop_match = q_property.matches(prop)[0]
+            if prop.type == "property":
+                # match property to extract details
+                _, prop_match = q_property.matches(prop)[0]
+
+                # extract name (this is always available so no need for None check)
+                name = prop_match.get("name").text.decode(self.encoding)
+
+                # extract dims list
+                dims_list = prop_match.get("dims")
+                dims = None
+                if dims_list is not None:
+                    dims = tuple([dim.text.decode(self.encoding) for dim in dims_list])
+
+                # extract validator functions
+                vf_list = prop_match.get("validator_functions")
+                vfs = None
+                if vf_list is not None:
+                    vfs = [vf.text.decode(self.encoding) for vf in vf_list]
+            else:
+                # match property to extract details
+                _, prop_match = q_old_property.matches(prop)[0]
 
-            # extract name (this is always available so no need for None check)
-            name = prop_match.get("name").text.decode(self.encoding)
+                # extract name (this is always available so no need for None check)
+                name = prop_match.get("name").text.decode(self.encoding)
 
-            # extract dims list
-            dims_list = prop_match.get("dims")
-            dims = None
-            if dims_list is not None:
-                dims = tuple([dim.text.decode(self.encoding) for dim in dims_list])
+                # extract size type
+                size_type = prop_match.get("size_type")
+                import pdb
+
+                pdb.set_trace()
+                if size_type is None:
+                    dims = None
+                elif size_type.text == b"scalar":
+                    dims = ("1", "1")
+                elif size_type.text == b"vector":
+                    dims = (":", "1")
+                elif size_type.text == b"matrix":
+                    dims = (":", ":")
+
+                # No validator functions
+                vfs = None
 
             # extract type
             type_node = prop_match.get("type")
@@ -522,12 +563,6 @@ def _parse_property_section(self, props_match):
                 type_node.text.decode(self.encoding) if type_node is not None else None
             )
 
-            # extract validator functions
-            vf_list = prop_match.get("validator_functions")
-            vfs = None
-            if vf_list is not None:
-                vfs = [vf.text.decode(self.encoding) for vf in vf_list]
-
             # extract default
             default_node = prop_match.get("default")
             default = (
@@ -806,7 +841,7 @@ def _parse_attributes(self, attrs_nodes):
         data = f.read()
 
     tree = parser.parse(data)
-    class_parser = MatClassParser(tree.root_node, self.encoding)
+    class_parser = MatClassParser(tree.root_node, "utf-8")
     # fun_parser = MatFunctionParser(tree.root_node)
     import pdb
 
diff --git a/tests/test_parse_mfile.py b/tests/test_parse_mfile.py
index 123c2a6..32b409b 100644
--- a/tests/test_parse_mfile.py
+++ b/tests/test_parse_mfile.py
@@ -324,24 +324,24 @@ def test_PropTypeOld():
             "docstring": None,
             "attrs": {},
             "default": "zeros(3,1)",
-            "size": None,
-            "type": "vector",
+            "size": (":", "1"),
+            "type": "double",
             "validators": None,
         },  # 'type': ['double', 'vector'],
         "rotm": {
             "docstring": None,
             "attrs": {},
             "default": "zeros(3,3)",
-            "size": None,
-            "type": "matrix",
+            "size": (":", ":"),
+            "type": "double",
             "validators": None,
         },  # 'type': ['double', 'matrix'],
         "idx": {
             "docstring": None,
             "attrs": {},
             "default": "0",
-            "size": None,
-            "type": "scalar",
+            "size": ("1", "1"),
+            "type": "uint8",
             "validators": None,
         },  # 'type': ['uint8', 'scalar'],
     }

From a892a14cd914cd5d9bbb7bec66b80288dba72416 Mon Sep 17 00:00:00 2001
From: Anton Edvinovich Pozharskiy <apozharski@gmail.com>
Date: Fri, 16 Aug 2024 17:16:34 +0200
Subject: [PATCH 30/45] fixing test_autodoc and test_matlabify, only comment
 and line continuation issues remain

---
 sphinxcontrib/mat_tree_sitter_parser.py | 118 +++++++++++++++++-------
 sphinxcontrib/mat_types.py              |   7 +-
 sphinxcontrib/matlab.py                 |   1 -
 tests/test_matlabify.py                 | 101 +++++++++++---------
 tests/test_parse_mfile.py               |   2 +-
 5 files changed, 143 insertions(+), 86 deletions(-)

diff --git a/sphinxcontrib/mat_tree_sitter_parser.py b/sphinxcontrib/mat_tree_sitter_parser.py
index 4ef6285..967b283 100644
--- a/sphinxcontrib/mat_tree_sitter_parser.py
+++ b/sphinxcontrib/mat_tree_sitter_parser.py
@@ -187,7 +187,7 @@
 q_script = ML_LANG.query(
     """
     (source_file
-        (comment) @docstring
+        (comment)? @docstring
     )
     """
 )
@@ -215,12 +215,12 @@ def get_row(point):
 
 
 def process_text_into_docstring(text, encoding):
-    docstring = text.decode(encoding)
+    docstring = text.decode(encoding, errors="backslashreplace")
     return re.sub(re_percent_remove, "", docstring)
 
 
 def process_default(text, encoding):
-    default = text.decode(encoding)
+    default = text.decode(encoding, errors="backslashreplace")
     return re.sub(re_assign_remove, "", default)
 
 
@@ -228,15 +228,18 @@ class MatScriptParser:
     def __init__(self, root_node, encoding):
         """Parse m script"""
         self.encoding = encoding
-        _, script_match = q_script.matches(root_node)[0]
-        docstring_node = script_match.get("docstring")
-        if docstring_node is not None:
-            self.docstring = process_text_into_docstring(
-                docstring_node.text, self.encoding
-            )
+        script_matches = q_script.matches(root_node)
+        if script_matches:
+            _, script_match = q_script.matches(root_node)[0]
+            docstring_node = script_match.get("docstring")
+            if docstring_node is not None:
+                self.docstring = process_text_into_docstring(
+                    docstring_node.text, self.encoding
+                )
+            else:
+                self.docstring = None
         else:
             self.docstring = None
-        print(self.docstring)
 
 
 class MatFunctionParser:
@@ -244,13 +247,18 @@ def __init__(self, root_node, encoding):
         """Parse Function definition"""
         self.encoding = encoding
         _, fun_match = q_fun.matches(root_node)[0]
-        self.name = fun_match.get("name").text.decode(self.encoding)
+        self.name = fun_match.get("name").text.decode(
+            self.encoding, errors="backslashreplace"
+        )
 
         # Get outputs (possibly more than one)
         self.retv = {}
         output_nodes = fun_match.get("outputs")
         if output_nodes is not None:
-            retv = [output.text.decode(self.encoding) for output in output_nodes]
+            retv = [
+                output.text.decode(self.encoding, errors="backslashreplace")
+                for output in output_nodes
+            ]
             for output in retv:
                 self.retv[output] = {}
 
@@ -258,7 +266,10 @@ def __init__(self, root_node, encoding):
         self.args = {}
         arg_nodes = fun_match.get("params")
         if arg_nodes is not None:
-            args = [arg.text.decode(self.encoding) for arg in arg_nodes]
+            args = [
+                arg.text.decode(self.encoding, errors="backslashreplace")
+                for arg in arg_nodes
+            ]
             for arg in args:
                 self.args[arg] = {}
 
@@ -296,25 +307,38 @@ def _parse_argument_section(self, argblock_node):
             _, arg_match = q_arg.matches(arg)[0]
 
             # extract name (this is always available so no need for None check)
-            name = [name.text.decode(self.encoding) for name in arg_match.get("name")]
+            name = [
+                name.text.decode(self.encoding, errors="backslashreplace")
+                for name in arg_match.get("name")
+            ]
 
             # extract dims list
             dims_list = arg_match.get("dims")
             dims = None
             if dims_list is not None:
-                dims = tuple([dim.text.decode(self.encoding) for dim in dims_list])
+                dims = tuple(
+                    [
+                        dim.text.decode(self.encoding, errors="backslashreplace")
+                        for dim in dims_list
+                    ]
+                )
 
             # extract type
             type_node = arg_match.get("type")
             typename = (
-                type_node.text.decode(self.encoding) if type_node is not None else None
+                type_node.text.decode(self.encoding, errors="backslashreplace")
+                if type_node is not None
+                else None
             )
 
             # extract validator functions
             vf_list = arg_match.get("validator_functions")
             vfs = None
             if vf_list is not None:
-                vfs = [vf.text.decode(self.encoding) for vf in vf_list]
+                vfs = [
+                    vf.text.decode(self.encoding, errors="backslashreplace")
+                    for vf in vf_list
+                ]
 
             # extract default
             default_node = arg_match.get("default")
@@ -440,10 +464,12 @@ def _parse_attributes(self, attrs_nodes):
         if attrs_nodes is not None:
             for attr_node in attrs_nodes:
                 _, attr_match = q_attributes.matches(attr_node)[0]
-                name = attr_match.get("name").text.decode(self.encoding)
+                name = attr_match.get("name").text.decode(
+                    self.encoding, errors="backslashreplace"
+                )
                 value_node = attr_match.get("value")
                 attrs[name] = (
-                    value_node.text.decode(self.encoding)
+                    value_node.text.decode(self.encoding, errors="backslashreplace")
                     if value_node is not None
                     else None
                 )
@@ -480,7 +506,10 @@ def __init__(self, root_node, encoding):
             for super_node in supers_nodes:
                 _, super_match = q_supers.matches(super_node)[0]
                 super_cls = tuple(
-                    [sec.text.decode(self.encoding) for sec in super_match.get("secs")]
+                    [
+                        sec.text.decode(self.encoding, errors="backslashreplace")
+                        for sec in super_match.get("secs")
+                    ]
                 )
                 self.supers.append(super_cls)
 
@@ -520,31 +549,40 @@ def _parse_property_section(self, props_match):
                 _, prop_match = q_property.matches(prop)[0]
 
                 # extract name (this is always available so no need for None check)
-                name = prop_match.get("name").text.decode(self.encoding)
+                name = prop_match.get("name").text.decode(
+                    self.encoding, errors="backslashreplace"
+                )
 
                 # extract dims list
                 dims_list = prop_match.get("dims")
                 dims = None
                 if dims_list is not None:
-                    dims = tuple([dim.text.decode(self.encoding) for dim in dims_list])
+                    dims = tuple(
+                        [
+                            dim.text.decode(self.encoding, errors="backslashreplace")
+                            for dim in dims_list
+                        ]
+                    )
 
                 # extract validator functions
                 vf_list = prop_match.get("validator_functions")
                 vfs = None
                 if vf_list is not None:
-                    vfs = [vf.text.decode(self.encoding) for vf in vf_list]
+                    vfs = [
+                        vf.text.decode(self.encoding, errors="backslashreplace")
+                        for vf in vf_list
+                    ]
             else:
                 # match property to extract details
                 _, prop_match = q_old_property.matches(prop)[0]
 
                 # extract name (this is always available so no need for None check)
-                name = prop_match.get("name").text.decode(self.encoding)
+                name = prop_match.get("name").text.decode(
+                    self.encoding, errors="backslashreplace"
+                )
 
                 # extract size type
                 size_type = prop_match.get("size_type")
-                import pdb
-
-                pdb.set_trace()
                 if size_type is None:
                     dims = None
                 elif size_type.text == b"scalar":
@@ -560,7 +598,9 @@ def _parse_property_section(self, props_match):
             # extract type
             type_node = prop_match.get("type")
             typename = (
-                type_node.text.decode(self.encoding) if type_node is not None else None
+                type_node.text.decode(self.encoding, errors="backslashreplace")
+                if type_node is not None
+                else None
             )
 
             # extract default
@@ -694,10 +734,15 @@ def _parse_enum_section(self, enums_match):
             return
         for enum in enums:
             _, enum_match = q_enum.matches(enum)[0]
-            name = enum_match.get("name").text.decode(self.encoding)
+            name = enum_match.get("name").text.decode(
+                self.encoding, errors="backslashreplace"
+            )
             arg_nodes = enum_match.get("args")
             if arg_nodes is not None:
-                args = [arg.text.decode(self.encoding) for arg in arg_nodes]
+                args = [
+                    arg.text.decode(self.encoding, errors="backslashreplace")
+                    for arg in arg_nodes
+                ]
             else:
                 args = None
 
@@ -757,7 +802,7 @@ def _parse_event_section(self, events_match):
         if events is None:
             return
         for event in events:
-            name = event.text.decode(self.encoding)
+            name = event.text.decode(self.encoding, errors="backslashreplace")
 
             docstring = ""
             # look forward for docstring
@@ -813,16 +858,21 @@ def _parse_attributes(self, attrs_nodes):
         if attrs_nodes is not None:
             for attr_node in attrs_nodes:
                 _, attr_match = q_attributes.matches(attr_node)[0]
-                name = attr_match.get("name").text.decode(self.encoding)
+                name = attr_match.get("name").text.decode(
+                    self.encoding, errors="backslashreplace"
+                )
                 value_node = attr_match.get("value")
                 rhs_node = attr_match.get("rhs")
                 if rhs_node is not None:
                     if rhs_node.type == "cell":
                         attrs[name] = [
-                            vn.text.decode(self.encoding) for vn in value_node
+                            vn.text.decode(self.encoding, errors="backslashreplace")
+                            for vn in value_node
                         ]
                     else:
-                        attrs[name] = value_node[0].text.decode(self.encoding)
+                        attrs[name] = value_node[0].text.decode(
+                            self.encoding, errors="backslashreplace"
+                        )
                 else:
                     attrs[name] = None
 
diff --git a/sphinxcontrib/mat_types.py b/sphinxcontrib/mat_types.py
index cf55766..4172743 100644
--- a/sphinxcontrib/mat_types.py
+++ b/sphinxcontrib/mat_types.py
@@ -444,11 +444,6 @@ def matlabify(objname):
             # make a full path out of basedir and objname
             fullpath = os.path.join(MatObject.basedir, objname)  # objname fullpath
 
-        # Check if path should be ignored
-        for ignore in MatObject.sphinx_env.config.matlab_ignore_dirs:
-            if Path(fullpath).is_relative_to(MatObject.basedir, ignore):
-                return None
-
         logger.debug(
             f"[sphinxcontrib-matlabdomain] matlabify {package=}, {objname=}, {fullpath=}"
         )
@@ -994,7 +989,7 @@ def __bases__(self):
             if isinstance(entity, MatClass) or "@" in name:
                 class_entity_table[name] = entity
 
-        for base in self.bases:
+        for base in bases_.keys():
             if base in class_entity_table.keys():
                 bases_[base] = class_entity_table[base]
 
diff --git a/sphinxcontrib/matlab.py b/sphinxcontrib/matlab.py
index bf83a5a..5764f9f 100644
--- a/sphinxcontrib/matlab.py
+++ b/sphinxcontrib/matlab.py
@@ -879,7 +879,6 @@ def setup(app):
     app.add_domain(MATLABDomain)
     # autodoc
     app.add_config_value("matlab_src_dir", None, "env")
-    app.add_config_value("matlab_ignore_dirs", [], "env")
     app.add_config_value("matlab_src_encoding", None, "env")
     app.add_config_value("matlab_keep_package_prefix", False, "env")
     app.add_config_value("matlab_show_property_default_value", False, "env")
diff --git a/tests/test_matlabify.py b/tests/test_matlabify.py
index 1e54fcc..d27cbe9 100644
--- a/tests/test_matlabify.py
+++ b/tests/test_matlabify.py
@@ -138,12 +138,19 @@ def test_classes(mod):
     assert isinstance(cls, doc.MatClass)
     assert cls.getter("__name__") == "ClassInheritHandle"
     assert cls.getter("__module__") == "test_data"
-    assert cls.bases == ["handle", "my.super.Class"]
+    assert cls.bases == [("handle",), ("my", "super", "Class")]
     assert cls.attrs == {}
     assert cls.properties == {
-        "x": {"attrs": {}, "default": None, "docstring": " a property", "specs": ""}
+        "x": {
+            "attrs": {},
+            "default": None,
+            "docstring": "a property",
+            "size": None,
+            "type": None,
+            "validators": None,
+        }
     }
-    assert cls.getter("__doc__") == " a handle class\n\n :param x: a variable\n"
+    assert cls.getter("__doc__") == "a handle class\n\n:param x: a variable"
 
 
 def test_abstract_class(mod):
@@ -154,39 +161,43 @@ def test_abstract_class(mod):
     assert abc.getter("__module__") == "test_data"
     assert "ClassInheritHandle" in abc.getter("__bases__")
     assert "ClassExample" in abc.getter("__bases__")
-    assert abc.bases == ["ClassInheritHandle", "ClassExample"]
-    assert abc.attrs == {"Abstract": True, "Sealed": True}
+    assert abc.bases == [("ClassInheritHandle",), ("ClassExample",)]
+    assert abc.attrs == {"Abstract": None, "Sealed": None}
     assert abc.properties == {
         "y": {
             "default": None,
-            "docstring": " y variable",
+            "docstring": "y variable",
             "attrs": {"GetAccess": "private", "SetAccess": "private"},
-            "specs": "",
+            "size": None,
+            "type": None,
+            "validators": None,
         },
         "version": {
             "default": "'0.1.1-beta'",
-            "docstring": " version",
-            "attrs": {"Constant": True},
-            "specs": "",
+            "docstring": "version",
+            "attrs": {"Constant": None},
+            "size": None,
+            "type": None,
+            "validators": None,
         },
     }
     assert (
         abc.getter("__doc__")
-        == " an abstract class\n\n :param y: a variable\n :type y: double\n"
+        == "an abstract class\n\n:param y: a variable\n:type y: double"
     )
     assert abc.getter("__doc__") == abc.docstring
 
     abc_y = abc.getter("y")
     assert isinstance(abc_y, doc.MatProperty)
     assert abc_y.default is None
-    assert abc_y.docstring == " y variable"
+    assert abc_y.docstring == "y variable"
     assert abc_y.attrs == {"SetAccess": "private", "GetAccess": "private"}
 
     abc_version = abc.getter("version")
     assert isinstance(abc_version, doc.MatProperty)
     assert abc_version.default == "'0.1.1-beta'"
-    assert abc_version.docstring == " version"
-    assert abc_version.attrs == {"Constant": True}
+    assert abc_version.docstring == "version"
+    assert abc_version.attrs == {"Constant": None}
 
 
 def test_class_method(mod):
@@ -195,7 +206,7 @@ def test_class_method(mod):
     assert cls_meth.getter("__name__") == "ClassExample"
     assert (
         cls_meth.docstring
-        == " test class methods\n\n :param a: the input to :class:`ClassExample`\n"
+        == "test class methods\n\n:param a: the input to :class:`ClassExample`"
     )
     constructor = cls_meth.getter("ClassExample")
     assert isinstance(constructor, doc.MatMethod)
@@ -206,37 +217,37 @@ def test_class_method(mod):
     # TODO: mymethod.args will contain ['obj', 'b'] if run standalone
     #       but if test_autodoc.py is run, the 'obj' is removed
     assert mymethod.args
-    assert mymethod.args[-1] == "b"
-    assert mymethod.retv == ["c"]
+    assert "b" in list(mymethod.args.keys())
+    assert list(mymethod.retv.keys()) == ["c"]
     assert (
         mymethod.docstring
-        == " a method in :class:`ClassExample`\n\n :param b: an input to :meth:`mymethod`\n"
+        == "a method in :class:`ClassExample`\n\n:param b: an input to :meth:`mymethod`"
     )
 
 
 def test_submodule_class(mod):
     cls = mod.getter("submodule.TestFibonacci")
     assert isinstance(cls, doc.MatClass)
-    assert cls.docstring == " Test of MATLAB unittest method attributes\n"
+    assert cls.docstring == "Test of MATLAB unittest method attributes"
     assert cls.attrs == {}
-    assert cls.bases == ["matlab.unittest.TestCase"]
+    assert cls.bases == [("matlab", "unittest", "TestCase")]
     assert "compareFirstThreeElementsToExpected" in cls.methods
     assert cls.module == "test_data.submodule"
     assert cls.properties == {}
     method = cls.getter("compareFirstThreeElementsToExpected")
     assert isinstance(method, doc.MatMethod)
     assert method.name == "compareFirstThreeElementsToExpected"
-    assert method.retv is None
-    assert method.args == ["tc"]
-    assert method.docstring == " Test case that compares first three elements\n"
-    assert method.attrs == {"Test": True}
+    assert method.retv == {}
+    assert list(method.args.keys()) == ["tc"]
+    assert method.docstring == "Test case that compares first three elements"
+    assert method.attrs == {"Test": None}
 
 
 def test_folder_class(mod):
     cls_mod = mod.getter("@ClassFolder")
     assert isinstance(cls_mod, doc.MatModule)
     cls = cls_mod.getter("ClassFolder")
-    assert cls.docstring == " A class in a folder\n"
+    assert cls.docstring == "A class in a folder"
     assert cls.attrs == {}
     assert cls.bases == []
     assert cls.module == "test_data.@ClassFolder"
@@ -244,8 +255,10 @@ def test_folder_class(mod):
         "p": {
             "attrs": {},
             "default": None,
-            "docstring": " a property of a class folder",
-            "specs": "",
+            "docstring": "a property of a class folder",
+            "size": None,
+            "type": None,
+            "validators": None,
         }
     }
 
@@ -254,18 +267,18 @@ def test_folder_class(mod):
     func = cls_mod.getter("a_static_func")
     assert isinstance(func, doc.MatFunction)
     assert func.name == "a_static_func"
-    assert func.args == ["args"]
-    assert func.retv == ["retv"]
-    assert func.docstring == " method in :class:`~test_data.@ClassFolder`\n"
+    assert list(func.args.keys()) == ["args"]
+    assert list(func.retv.keys()) == ["retv"]
+    assert func.docstring == "method in :class:`~test_data.@ClassFolder`"
     func = cls_mod.getter("classMethod")
     assert isinstance(func, doc.MatFunction)
     assert func.name == "classMethod"
-    assert func.args == ["obj", "varargin"]
-    assert func.retv == ["varargout"]
+    assert list(func.args.keys()) == ["obj", "varargin"]
+    assert list(func.retv.keys()) == ["varargout"]
     assert (
         func.docstring
-        == " CLASSMETHOD A function within a package\n\n :param obj: An instance of this class.\n"
-        " :param varargin: Variable input arguments.\n :returns: varargout\n"
+        == "CLASSMETHOD A function within a package\n\n:param obj: An instance of this class.\n"
+        ":param varargin: Variable input arguments.\n:returns: varargout"
     )
 
 
@@ -274,11 +287,11 @@ def test_function(mod):
     func = mod.getter("f_example")
     assert isinstance(func, doc.MatFunction)
     assert func.getter("__name__") == "f_example"
-    assert func.retv == ["o1", "o2", "o3"]
-    assert func.args == ["a1", "a2"]
+    assert list(func.retv.keys()) == ["o1", "o2", "o3"]
+    assert list(func.args.keys()) == ["a1", "a2"]
     assert (
         func.docstring
-        == " a fun function\n\n :param a1: the first input\n :param a2: another input\n :returns: ``[o1, o2, o3]`` some outputs\n"
+        == "a fun function\n\n:param a1: the first input\n:param a2: another input\n:returns: ``[o1, o2, o3]`` some outputs"
     )
 
 
@@ -289,7 +302,7 @@ def test_function_getter(mod):
     assert func.getter("__name__") == "f_example"
     assert (
         func.getter("__doc__")
-        == " a fun function\n\n :param a1: the first input\n :param a2: another input\n :returns: ``[o1, o2, o3]`` some outputs\n"
+        == "a fun function\n\n:param a1: the first input\n:param a2: another input\n:returns: ``[o1, o2, o3]`` some outputs"
     )
     assert func.getter("__module__") == "test_data"
 
@@ -299,11 +312,11 @@ def test_package_function(mod):
     func = mod.getter("f_example")
     assert isinstance(func, doc.MatFunction)
     assert func.getter("__name__") == "f_example"
-    assert func.retv == ["o1", "o2", "o3"]
-    assert func.args == ["a1", "a2"]
+    assert list(func.retv.keys()) == ["o1", "o2", "o3"]
+    assert list(func.args.keys()) == ["a1", "a2"]
     assert (
         func.docstring
-        == " a fun function\n\n :param a1: the first input\n :param a2: another input\n :returns: ``[o1, o2, o3]`` some outputs\n"
+        == "a fun function\n\n:param a1: the first input\n:param a2: another input\n:returns: ``[o1, o2, o3]`` some outputs"
     )
 
 
@@ -311,13 +324,13 @@ def test_class_with_get_method(mod):
     the_class = mod.getter("ClassWithGetMethod")
     assert isinstance(the_class, doc.MatClass)
     assert the_class.getter("__name__") == "ClassWithGetMethod"
-    assert the_class.docstring == " Class with a method named get\n"
+    assert the_class.docstring == "Class with a method named get"
     the_method = the_class.getter("get")
     assert isinstance(the_method, doc.MatMethod)
     assert the_method.getter("__name__") == "get"
-    assert the_method.retv == ["varargout"]
+    assert list(the_method.retv.keys()) == ["varargout"]
     assert the_method.docstring.startswith(
-        " Gets the numbers 1-n and fills in the outputs with them"
+        "Gets the numbers 1-n and fills in the outputs with them"
     )
 
 
diff --git a/tests/test_parse_mfile.py b/tests/test_parse_mfile.py
index 32b409b..cb30c24 100644
--- a/tests/test_parse_mfile.py
+++ b/tests/test_parse_mfile.py
@@ -433,7 +433,7 @@ def test_file_parsing_with_no_encoding_specified():
     mfile = os.path.join(DIRNAME, "test_data", "f_with_latin_1.m")
     obj = mat_types.MatObject.parse_mfile(mfile, "f_with_latin_1", "test_data")
     assert obj.name == "f_with_latin_1"
-    assert obj.docstring == "Analyse de la r\ufffdponse \ufffd un cr\ufffdneau"
+    assert obj.docstring == r"Analyse de la r\xe9dponse \xe0 un cr\xe9dneau"
 
 
 def test_ClassWithBuiltinOverload():

From a2eae1baf3939dbdabbc80e0135cd0d2bd0dafae Mon Sep 17 00:00:00 2001
From: Anton Edvinovich Pozharskiy <apozharski@gmail.com>
Date: Mon, 19 Aug 2024 14:37:20 +0200
Subject: [PATCH 31/45] fixing the last of the tests

---
 sphinxcontrib/mat_tree_sitter_parser.py | 68 ++++++++++++++++++++-----
 tests/test_parse_mfile.py               | 13 ++---
 2 files changed, 62 insertions(+), 19 deletions(-)

diff --git a/sphinxcontrib/mat_tree_sitter_parser.py b/sphinxcontrib/mat_tree_sitter_parser.py
index 967b283..f77c5bd 100644
--- a/sphinxcontrib/mat_tree_sitter_parser.py
+++ b/sphinxcontrib/mat_tree_sitter_parser.py
@@ -4,7 +4,7 @@
 import re
 
 # rpath = "../../../syscop/software/nosnoc/+nosnoc/Options.m"
-rpath = "/home/anton/tools/matlabdomain/tests/test_data/PropTypeOld.m"
+rpath = "/home/anton/tools/matlabdomain/tests/test_data/ClassWithTrailingCommentAfterBases.m"
 # rpath = "/home/anton/tools/matlabdomain/tests/test_data/f_with_dummy_argument.m"
 
 tree_sitter_ver = tuple([int(sec) for sec in version("tree_sitter").split(".")])
@@ -194,12 +194,16 @@
 
 q_get_set = ML_LANG.query("""["get." "set."]""")
 
+q_line_continuation = ML_LANG.query("(line_continuation) @lc")
+
 
 re_percent_remove = re.compile(r"^[ \t]*% ?", flags=re.M)
+re_trim_line = re.compile(r"^[ \t]*", flags=re.M)
 re_assign_remove = re.compile(r"^=[ \t]*")
 
 
 def tree_sitter_is_0_21():
+    """Check if tree-sitter is v0.21.* in order to use the correct language initialization and syntax."""
     if not hasattr(tree_sitter_is_0_21, "is_21"):
         tree_sitter_ver = tuple([int(sec) for sec in version("tree_sitter").split(".")])
         tree_sitter_is_0_21.is_21 = tree_sitter_ver[1] == 21  # memoize
@@ -215,13 +219,33 @@ def get_row(point):
 
 
 def process_text_into_docstring(text, encoding):
+    """Take a text bytestring and decode it into a docstring."""
     docstring = text.decode(encoding, errors="backslashreplace")
     return re.sub(re_percent_remove, "", docstring)
 
 
-def process_default(text, encoding):
-    default = text.decode(encoding, errors="backslashreplace")
-    return re.sub(re_assign_remove, "", default)
+def process_default(node, encoding):
+    """Take the node defining a default and remove any line continuations before generating the default."""
+    text = node.text
+    to_keep = set(range(node.end_byte - node.start_byte))
+    lc_matches = q_line_continuation.matches(node)
+    for _, match in lc_matches:
+        # TODO this copies a lot perhaps there is a better option.
+        lc = match["lc"]
+        cut_start = lc.start_byte - node.start_byte
+        cut_end = lc.end_byte - node.start_byte
+        to_keep -= set(range(cut_start, cut_end))
+    # NOTE: hardcoded endianess is fine because for one byte this does not matter.
+    #       See python bikeshed on possible defaults for this here:
+    #       https://discuss.python.org/t/what-should-be-the-default-value-for-int-to-bytes-byteorder/10616
+    new_text = b"".join(
+        [byte.to_bytes(1, "big") for idx, byte in enumerate(text) if idx in to_keep]
+    )
+    # TODO We may want to do an in-order traversal of the parse here to generate a "nice" reformatted single line
+    #      however doing so sufficiently generically is likely a major undertaking.
+    default = new_text.decode(encoding, errors="backslashreplace")
+    default = re.sub(re_assign_remove, "", default)
+    return re.sub(re_trim_line, "", default)
 
 
 class MatScriptParser:
@@ -281,13 +305,21 @@ def __init__(self, root_node, encoding):
 
         # get docstring
         docstring_node = fun_match.get("docstring")
-        docstring = None
+        docstring = ""
         if docstring_node is not None:
             prev_sib = docstring_node.prev_named_sibling
             if get_row(docstring_node.start_point) - get_row(prev_sib.end_point) <= 1:
-                docstring = process_text_into_docstring(
-                    docstring_node.text, self.encoding
-                )
+                if get_row(docstring_node.start_point) == get_row(prev_sib.end_point):
+                    # if the docstring is on the same line as the end of the function drop it
+                    docstring = process_text_into_docstring(
+                        docstring_node.text, self.encoding
+                    )
+                    split_ds = docstring.split("\n")
+                    docstring = "\n".join(split_ds[1:]) if len(split_ds) > 1 else ""
+                else:
+                    docstring = process_text_into_docstring(
+                        docstring_node.text, self.encoding
+                    )
 
         if not docstring:
             docstring = None
@@ -343,7 +375,7 @@ def _parse_argument_section(self, argblock_node):
             # extract default
             default_node = arg_match.get("default")
             default = (
-                process_default(default_node.text, self.encoding)
+                process_default(default_node, self.encoding)
                 if default_node is not None
                 else None
             )
@@ -515,12 +547,22 @@ def __init__(self, root_node, encoding):
 
         # get docstring and check that it consecutive
         docstring_node = class_match.get("docstring")
+        docstring = ""
         if docstring_node is not None:
             prev_node = docstring_node.prev_sibling
             if get_row(docstring_node.start_point) - get_row(prev_node.end_point) <= 1:
-                self.docstring = process_text_into_docstring(
-                    docstring_node.text, self.encoding
-                )
+                if get_row(docstring_node.start_point) == get_row(prev_node.end_point):
+                    # if the docstring is on the same line as the end of the classdef drop it
+                    docstring = process_text_into_docstring(
+                        docstring_node.text, self.encoding
+                    )
+                    split_ds = docstring.split("\n")
+                    docstring = "\n".join(split_ds[1:]) if len(split_ds) > 1 else ""
+                else:
+                    docstring = process_text_into_docstring(
+                        docstring_node.text, self.encoding
+                    )
+        self.docstring = docstring
 
         prop_matches = q_properties.matches(self.cls)
         method_matches = q_methods.matches(self.cls)
@@ -606,7 +648,7 @@ def _parse_property_section(self, props_match):
             # extract default
             default_node = prop_match.get("default")
             default = (
-                process_default(default_node.text, self.encoding)
+                process_default(default_node, self.encoding)
                 if default_node is not None
                 else None
             )
diff --git a/tests/test_parse_mfile.py b/tests/test_parse_mfile.py
index cb30c24..48f746e 100644
--- a/tests/test_parse_mfile.py
+++ b/tests/test_parse_mfile.py
@@ -433,7 +433,7 @@ def test_file_parsing_with_no_encoding_specified():
     mfile = os.path.join(DIRNAME, "test_data", "f_with_latin_1.m")
     obj = mat_types.MatObject.parse_mfile(mfile, "f_with_latin_1", "test_data")
     assert obj.name == "f_with_latin_1"
-    assert obj.docstring == r"Analyse de la r\xe9dponse \xe0 un cr\xe9dneau"
+    assert obj.docstring == r"Analyse de la r\xe9ponse \xe0 un cr\xe9neau"
 
 
 def test_ClassWithBuiltinOverload():
@@ -740,7 +740,7 @@ def test_ClassWithLongPropertyDocstrings():
     )
     assert obj.name == "ClassWithLongPropertyDocstrings"
     assert (
-        obj.properties["a"]["docstring"] == "This line is deleted"
+        obj.properties["a"]["docstring"] == "This line is deleted\n"
         "This line documents another property"
     )
     assert obj.properties["b"]["docstring"] == "Document this property"
@@ -755,7 +755,7 @@ def test_ClassWithLongPropertyTrailingEmptyDocstrings():
     )
     assert obj.name == "ClassWithLongPropertyTrailingEmptyDocstrings"
     assert (
-        obj.properties["a"]["docstring"] == "This line is deleted"
+        obj.properties["a"]["docstring"] == "This line is deleted\n"
         "This line documents another property"
     )
     assert obj.properties["b"]["docstring"] == "Document this property"
@@ -795,6 +795,7 @@ def test_ClassWithTrailingCommentAfterBases():
 
 
 def test_ClassWithEllipsisProperties():
+    # TODO change this when the functionality to "nicely" generate one line defaults exists
     mfile = os.path.join(TESTDATA_ROOT, "ClassWithEllipsisProperties.m")
     obj = mat_types.MatObject.parse_mfile(
         mfile, "ClassWithEllipsisProperties", "test_data"
@@ -805,15 +806,15 @@ def test_ClassWithEllipsisProperties():
     assert len(obj.methods) == 0
 
     assert obj.properties["A"]["docstring"] == "an expression with ellipsis"
-    assert obj.properties["A"]["default"] == "1+2+3+4+5"
+    assert obj.properties["A"]["default"] == "1 + 2 + 3 +             4 + 5"
     assert (
         obj.properties["B"]["docstring"]
         == "a cell array with ellipsis and other array notation"
     )
-    assert obj.properties["B"]["default"].startswith("{'hello','bye';")
+    assert obj.properties["B"]["default"].startswith("{'hello', 'bye';")
     assert obj.properties["B"]["default"].endswith("}")
     assert obj.properties["C"]["docstring"] == "using end inside array"
-    assert obj.properties["C"]["default"] == "ClassWithEllipsisProperties.B(2:end,1)"
+    assert obj.properties["C"]["default"] == "ClassWithEllipsisProperties.B(2:end, 1)"
     assert obj.properties["D"]["docstring"] == "String with line continuation"
     assert obj.properties["D"]["default"] == "'...'"
     assert obj.properties["E"]["docstring"] == "The string with spaces"

From 3189e286752134d57ea3adb42a1e520039cbc48d Mon Sep 17 00:00:00 2001
From: Anton Edvinovich Pozharskiy <apozharski@gmail.com>
Date: Mon, 19 Aug 2024 15:07:51 +0200
Subject: [PATCH 32/45] remove dead code

---
 sphinxcontrib/mat_parser.py |  88 ----------------------
 sphinxcontrib/mat_types.py  | 141 +-----------------------------------
 2 files changed, 1 insertion(+), 228 deletions(-)
 delete mode 100644 sphinxcontrib/mat_parser.py

diff --git a/sphinxcontrib/mat_parser.py b/sphinxcontrib/mat_parser.py
deleted file mode 100644
index 55db502..0000000
--- a/sphinxcontrib/mat_parser.py
+++ /dev/null
@@ -1,88 +0,0 @@
-"""
-    sphinxcontrib.mat_parser
-    ~~~~~~~~~~~~~~~~~~~~~~~~
-
-    Functions for parsing MatlabLexer output.
-
-    :copyright: Copyright 2023-2024 by the sphinxcontrib-matlabdomain team, see AUTHORS.
-    :license: BSD, see LICENSE for details.
-"""
-
-import re
-import sphinx.util
-
-logger = sphinx.util.logging.getLogger("matlab-domain")
-
-
-def remove_comment_header(code):
-    """
-    Removes the comment header (if there is one) and empty lines from the
-    top of the current read code.
-    :param code: Current code string.
-    :type code: str
-    :returns: Code string without comments above a function, class or
-            procedure/script.
-    """
-    # get the line number when the comment header ends (incl. empty lines)
-    ln_pos = 0
-    for line in code.splitlines(True):
-        if re.match(r"[ \t]*(%|\n)", line):
-            ln_pos += 1
-        else:
-            break
-
-    if ln_pos > 0:
-        # remove the header block and empty lines from the top of the code
-        try:
-            code = code.split("\n", ln_pos)[ln_pos:][0]
-        except IndexError:
-            # only header and empty lines.
-            code = ""
-
-    return code
-
-
-def remove_line_continuations(code):
-    """
-    Removes line continuations (...) from code as functions must be on a
-    single line
-    :param code:
-    :type code: str
-    :return:
-    """
-    # pat = r"('.*)(\.\.\.)(.*')"
-    # code = re.sub(pat, r"\g<1>\g<3>", code, flags=re.MULTILINE)
-
-    pat = r"^([^%'\"\n]*)(\.\.\..*\n)"
-    code = re.sub(pat, r"\g<1>", code, flags=re.MULTILINE)
-    return code
-
-
-def fix_function_signatures(code):
-    """
-    Transforms function signatures with line continuations to a function
-    on a single line with () appended. Required because pygments cannot
-    handle this situation correctly.
-
-    :param code:
-    :type code: str
-    :return: Code string with functions on single line
-    """
-    pat = r"""^[ \t]*function[ \t.\n]*  # keyword (function)
-                        (\[?[\w, \t.\n]*\]?)      # outputs: group(1)
-                        [ \t.\n]*=[ \t.\n]*       # punctuation (eq)
-                        (\w+)[ \t.\n]*            # name: group(2)
-                        \(?([\w, \t.\n]*)\)?"""  # args: group(3)
-    pat = re.compile(pat, re.X | re.MULTILINE)  # search start of every line
-
-    # replacement function
-    def repl(m):
-        retv = m.group(0)
-        # if no args and doesn't end with parentheses, append "()"
-        if not (m.group(3) or m.group(0).endswith("()")):
-            retv = retv.replace(m.group(2), m.group(2) + "()")
-        return retv
-
-    code = pat.sub(repl, code)  # search for functions and apply replacement
-
-    return code
diff --git a/sphinxcontrib/mat_types.py b/sphinxcontrib/mat_types.py
index 4172743..f85f4b8 100644
--- a/sphinxcontrib/mat_types.py
+++ b/sphinxcontrib/mat_types.py
@@ -16,7 +16,6 @@
 from pygments.token import Token
 from zipfile import ZipFile
 import xml.etree.ElementTree as ET
-import sphinxcontrib.mat_parser as mat_parser
 from sphinxcontrib.mat_tree_sitter_parser import (
     MatClassParser,
     MatFunctionParser,
@@ -47,14 +46,6 @@
     "MatApplication",
 ]
 
-# MATLAB keywords that increment keyword-end pair count
-MATLAB_KEYWORD_REQUIRES_END = list(
-    zip(
-        (Token.Keyword,) * 7,
-        ("arguments", "for", "if", "switch", "try", "while", "parfor"),
-    )
-)
-
 
 # MATLAB attribute type dictionaries.
 
@@ -115,12 +106,6 @@
     "TestTags": list,
 }
 
-
-MATLAB_FUNC_BRACES_BEGIN = tuple(zip((Token.Punctuation,) * 2, ("(", "{")))
-MATLAB_FUNC_BRACES_END = tuple(zip((Token.Punctuation,) * 2, (")", "}")))
-MATLAB_PROP_BRACES_BEGIN = tuple(zip((Token.Punctuation,) * 3, ("(", "{", "[")))
-MATLAB_PROP_BRACES_END = tuple(zip((Token.Punctuation,) * 3, (")", "}", "]")))
-
 # Dictionary containing all MATLAB entities that are found in `matlab_src_dir`.
 # The dictionary keys are both the full dotted path, relative to the root.
 # Further, "short names" are added. Example:
@@ -734,130 +719,6 @@ def getter(self, name, *defargs):
                 return entity
 
 
-class MatMixin(object):
-    """
-    Methods to comparing and manipulating tokens in :class:`MatFunction` and
-    :class:`MatClass`.
-    """
-
-    def _tk_eq(self, idx, token):
-        """
-        Returns ``True`` if token keys are the same and values are equal.
-
-        :param idx: Index of token in :class:`MatObject`.
-        :type idx: int
-        :param token: Comparison token.
-        :type token: tuple
-        """
-        return self.tokens[idx][0] is token[0] and self.tokens[idx][1] == token[1]
-
-    def _tk_ne(self, idx, token):
-        """
-        Returns ``True`` if token keys are not the same or values are not
-        equal.
-
-        :param idx: Index of token in :class:`MatObject`.
-        :type idx: int
-        :param token: Comparison token.
-        :type token: tuple
-        """
-        return self.tokens[idx][0] is not token[0] or self.tokens[idx][1] != token[1]
-
-    def _eotk(self, idx):
-        """
-        Returns ``True`` if end of tokens is reached.
-        """
-        return idx >= len(self.tokens)
-
-    def _blanks(self, idx):
-        """
-        Returns number of blank text tokens.
-
-        :param idx: Token index.
-        :type idx: int
-        """
-        # idx0 = idx  # original index
-        # while self._tk_eq(idx, (Token.Text, ' ')): idx += 1
-        # return idx - idx0  # blanks
-        return self._indent(idx)
-
-    def _whitespace(self, idx):
-        """
-        Returns number of whitespaces text tokens, including blanks, newline
-        and tabs.
-
-        :param idx: Token index.
-        :type idx: int
-        """
-        idx0 = idx  # original index
-        while (
-            self.tokens[idx][0] is Token.Text
-            or self.tokens[idx][0] is Token.Text.Whitespace
-        ) and self.tokens[idx][1] in [" ", "\n", "\t"]:
-            idx += 1
-        return idx - idx0  # whitespace
-
-    def _indent(self, idx):
-        """
-        Returns indentation tabs or spaces. No indentation is zero.
-
-        :param idx: Token index.
-        :type idx: int
-        """
-        idx0 = idx  # original index
-        while self.tokens[idx][0] is Token.Text and self.tokens[idx][1] in [" ", "\t"]:
-            idx += 1
-        return idx - idx0  # indentation
-
-    def _propspec(self, idx):
-        """
-        Returns number of "property" specification tokens
-
-        :param idx: Token index.
-        :type idx: int
-        """
-        idx0 = idx  # original index
-        while (
-            self._tk_eq(idx, (Token.Punctuation, "@"))
-            or self._tk_eq(idx, (Token.Punctuation, "("))
-            or self._tk_eq(idx, (Token.Punctuation, ")"))
-            or self._tk_eq(idx, (Token.Punctuation, ","))
-            or self._tk_eq(idx, (Token.Punctuation, ":"))
-            or self.tokens[idx][0] == Token.Literal.Number.Integer
-            or self._tk_eq(idx, (Token.Punctuation, "{"))
-            or self._tk_eq(idx, (Token.Punctuation, "}"))
-            or self._tk_eq(idx, (Token.Punctuation, "["))
-            or self._tk_eq(idx, (Token.Punctuation, "]"))
-            or self._tk_eq(idx, (Token.Punctuation, "."))
-            or self.tokens[idx][0] == Token.Literal.String
-            or self.tokens[idx][0] == Token.Name
-            or (self.tokens[idx][0] == Token.Text and self.tokens[idx][1] != "\n")
-        ):
-            idx += 1
-
-        count = idx - idx0  # property spec count.
-        propspec = "".join([content for _, content in self.tokens[idx0 : idx0 + count]])
-        propspec = propspec.strip()
-        return count, propspec
-
-    def _is_newline(self, idx):
-        """Returns true if the token at index is a newline"""
-        return (
-            self.tokens[idx][0] in (Token.Text, Token.Text.Whitespace)
-            and self.tokens[idx][1] == "\n"
-        )
-
-
-def skip_whitespace(tks):
-    """Eats whitespace from list of tokens"""
-    while tks and (
-        tks[-1][0] == Token.Text.Whitespace
-        or tks[-1][0] == Token.Text
-        and tks[-1][1] in [" ", "\t"]
-    ):
-        tks.pop()
-
-
 class MatFunction(MatObject):
     """
     A MATLAB function.
@@ -907,7 +768,7 @@ def getter(self, name, *defargs):
             super(MatFunction, self).getter(name, *defargs)
 
 
-class MatClass(MatMixin, MatObject):
+class MatClass(MatObject):
     """
     A MATLAB class definition.
 

From 86b0fa6da761d5e39b82d8ddc26d98dd66ccc8e3 Mon Sep 17 00:00:00 2001
From: Anton Edvinovich Pozharskiy <apozharski@gmail.com>
Date: Thu, 5 Sep 2024 14:59:44 +0200
Subject: [PATCH 33/45] address PR comments made by @joeced

---
 sphinxcontrib/mat_tree_sitter_parser.py | 90 +++++++++++++++----------
 sphinxcontrib/mat_types.py              |  4 +-
 tests/test_matlabify.py                 | 12 ++--
 tests/test_parse_mfile.py               | 26 +++----
 4 files changed, 75 insertions(+), 57 deletions(-)

diff --git a/sphinxcontrib/mat_tree_sitter_parser.py b/sphinxcontrib/mat_tree_sitter_parser.py
index f77c5bd..814cae8 100644
--- a/sphinxcontrib/mat_tree_sitter_parser.py
+++ b/sphinxcontrib/mat_tree_sitter_parser.py
@@ -3,9 +3,41 @@
 from tree_sitter import Language, Parser
 import re
 
-# rpath = "../../../syscop/software/nosnoc/+nosnoc/Options.m"
-rpath = "/home/anton/tools/matlabdomain/tests/test_data/ClassWithTrailingCommentAfterBases.m"
-# rpath = "/home/anton/tools/matlabdomain/tests/test_data/f_with_dummy_argument.m"
+# Attribute default dictionary used to give default values for e.g. `Abstract` or `Static` when used without
+# a right hand side i.e. `classdef (Abstract)` vs `classdef (Abstract=true)`
+# From:
+#  - http://www.mathworks.com/help/matlab/matlab_oop/class-attributes.html
+#  - https://mathworks.com/help/matlab/matlab_oop/property-attributes.html
+#  - https://mathworks.com/help/matlab/matlab_prog/define-property-attributes-1.htm
+#  - https://mathworks.com/help/matlab/matlab_oop/method-attributes.html
+#  - https://mathworks.com/help/matlab/ref/matlab.unittest.testcase-class.html
+MATLAB_ATTRIBUTE_DEFAULTS = {
+    "AbortSet": True,
+    "Abstract": True,
+    "ClassSetupParameter": True,
+    "Constant": True,
+    "ConstructOnLoad": True,
+    "Dependent": True,
+    "DiscreteState": True,
+    "GetObservable": True,
+    "HandleCompatible": True,
+    "Hidden": True,
+    "MethodSetupParameter": True,
+    "NonCopyable": True,
+    "Nontunable": True,
+    "PartialMatchPriority": True,
+    "Sealed": True,
+    "SetObservable": True,
+    "Static": True,
+    "Test": None,
+    "TestClassSetup": None,
+    "TestClassTeardown": None,
+    "TestMethodSetup": None,
+    "TestMethodTeardown": None,
+    "TestParameter": None,
+    "Transient": True,
+}
+
 
 tree_sitter_ver = tuple([int(sec) for sec in version("tree_sitter").split(".")])
 if tree_sitter_ver[1] == 21:
@@ -499,12 +531,21 @@ def _parse_attributes(self, attrs_nodes):
                 name = attr_match.get("name").text.decode(
                     self.encoding, errors="backslashreplace"
                 )
+
                 value_node = attr_match.get("value")
-                attrs[name] = (
-                    value_node.text.decode(self.encoding, errors="backslashreplace")
-                    if value_node is not None
-                    else None
-                )
+                rhs_node = attr_match.get("rhs")
+                if rhs_node is not None:
+                    if rhs_node.type == "cell":
+                        attrs[name] = [
+                            vn.text.decode(self.encoding, errors="backslashreplace")
+                            for vn in value_node
+                        ]
+                    else:
+                        attrs[name] = value_node[0].text.decode(
+                            self.encoding, errors="backslashreplace"
+                        )
+                else:
+                    attrs[name] = MATLAB_ATTRIBUTE_DEFAULTS.get(name)
         return attrs
 
 
@@ -537,13 +578,11 @@ def __init__(self, root_node, encoding):
         if supers_nodes is not None:
             for super_node in supers_nodes:
                 _, super_match = q_supers.matches(super_node)[0]
-                super_cls = tuple(
-                    [
-                        sec.text.decode(self.encoding, errors="backslashreplace")
-                        for sec in super_match.get("secs")
-                    ]
-                )
-                self.supers.append(super_cls)
+                super_cls = [
+                    sec.text.decode(self.encoding, errors="backslashreplace")
+                    for sec in super_match.get("secs")
+                ]
+                self.supers.append(".".join(super_cls))
 
         # get docstring and check that it consecutive
         docstring_node = class_match.get("docstring")
@@ -916,25 +955,6 @@ def _parse_attributes(self, attrs_nodes):
                             self.encoding, errors="backslashreplace"
                         )
                 else:
-                    attrs[name] = None
+                    attrs[name] = MATLAB_ATTRIBUTE_DEFAULTS.get(name)
 
         return attrs
-
-
-if __name__ == "__main__":
-    tree_sitter_ver = tuple([int(sec) for sec in version("tree_sitter").split(".")])
-    if tree_sitter_ver[1] == 21:
-        parser = Parser()
-        parser.set_language(ML_LANG)
-    else:
-        parser = Parser(ML_LANG)
-
-    with open(rpath, "rb") as f:
-        data = f.read()
-
-    tree = parser.parse(data)
-    class_parser = MatClassParser(tree.root_node, "utf-8")
-    # fun_parser = MatFunctionParser(tree.root_node)
-    import pdb
-
-    pdb.set_trace()
diff --git a/sphinxcontrib/mat_types.py b/sphinxcontrib/mat_types.py
index f85f4b8..f843537 100644
--- a/sphinxcontrib/mat_types.py
+++ b/sphinxcontrib/mat_types.py
@@ -842,9 +842,7 @@ def __doc__(self):
 
     @property
     def __bases__(self):
-        bases_ = dict.fromkeys(
-            [".".join(base) for base in self.bases]
-        )  # make copy of bases
+        bases_ = dict.fromkeys([base for base in self.bases])  # make copy of bases
         class_entity_table = {}
         for name, entity in entities_table.items():
             if isinstance(entity, MatClass) or "@" in name:
diff --git a/tests/test_matlabify.py b/tests/test_matlabify.py
index d27cbe9..25761a7 100644
--- a/tests/test_matlabify.py
+++ b/tests/test_matlabify.py
@@ -138,7 +138,7 @@ def test_classes(mod):
     assert isinstance(cls, doc.MatClass)
     assert cls.getter("__name__") == "ClassInheritHandle"
     assert cls.getter("__module__") == "test_data"
-    assert cls.bases == [("handle",), ("my", "super", "Class")]
+    assert cls.bases == ["handle", "my.super.Class"]
     assert cls.attrs == {}
     assert cls.properties == {
         "x": {
@@ -161,8 +161,8 @@ def test_abstract_class(mod):
     assert abc.getter("__module__") == "test_data"
     assert "ClassInheritHandle" in abc.getter("__bases__")
     assert "ClassExample" in abc.getter("__bases__")
-    assert abc.bases == [("ClassInheritHandle",), ("ClassExample",)]
-    assert abc.attrs == {"Abstract": None, "Sealed": None}
+    assert abc.bases == ["ClassInheritHandle", "ClassExample"]
+    assert abc.attrs == {"Abstract": True, "Sealed": True}
     assert abc.properties == {
         "y": {
             "default": None,
@@ -175,7 +175,7 @@ def test_abstract_class(mod):
         "version": {
             "default": "'0.1.1-beta'",
             "docstring": "version",
-            "attrs": {"Constant": None},
+            "attrs": {"Constant": True},
             "size": None,
             "type": None,
             "validators": None,
@@ -197,7 +197,7 @@ def test_abstract_class(mod):
     assert isinstance(abc_version, doc.MatProperty)
     assert abc_version.default == "'0.1.1-beta'"
     assert abc_version.docstring == "version"
-    assert abc_version.attrs == {"Constant": None}
+    assert abc_version.attrs == {"Constant": True}
 
 
 def test_class_method(mod):
@@ -230,7 +230,7 @@ def test_submodule_class(mod):
     assert isinstance(cls, doc.MatClass)
     assert cls.docstring == "Test of MATLAB unittest method attributes"
     assert cls.attrs == {}
-    assert cls.bases == [("matlab", "unittest", "TestCase")]
+    assert cls.bases == ["matlab.unittest.TestCase"]
     assert "compareFirstThreeElementsToExpected" in cls.methods
     assert cls.module == "test_data.submodule"
     assert cls.properties == {}
diff --git a/tests/test_parse_mfile.py b/tests/test_parse_mfile.py
index 48f746e..9ea6ea5 100644
--- a/tests/test_parse_mfile.py
+++ b/tests/test_parse_mfile.py
@@ -366,8 +366,8 @@ def test_ClassWithMethodAttributes():
     assert obj.methods["testProtected"].attrs == {"Access": "protected"}
     assert obj.methods["testPrivate1"].attrs == {"Access": "private"}
     assert obj.methods["testPrivate2"].attrs == {"Access": "'private'"}
-    assert obj.methods["testHidden"].attrs == {"Hidden": None}
-    assert obj.methods["testStatic"].attrs == {"Static": None}
+    assert obj.methods["testHidden"].attrs == {"Hidden": True}
+    assert obj.methods["testStatic"].attrs == {"Static": True}
     assert obj.methods["testFriend1"].attrs == {"Access": "?OtherClass"}
     assert obj.methods["testFriend2"].attrs == {
         "Access": ["?OtherClass", "?pack.OtherClass2"]
@@ -397,13 +397,13 @@ def test_ClassWithPropertyAttributes():
         "GetAccess": "private",
         "SetAccess": "private",
     }
-    assert obj.properties["TEST_CONSTANT"]["attrs"] == {"Constant": None}
+    assert obj.properties["TEST_CONSTANT"]["attrs"] == {"Constant": True}
     assert obj.properties["TEST_CONSTANT_PROTECTED"]["attrs"] == {
         "Access": "protected",
-        "Constant": None,
+        "Constant": True,
     }
-    assert obj.properties["testDependent"]["attrs"] == {"Dependent": None}
-    assert obj.properties["testHidden"]["attrs"] == {"Hidden": None}
+    assert obj.properties["testDependent"]["attrs"] == {"Dependent": True}
+    assert obj.properties["testHidden"]["attrs"] == {"Hidden": True}
 
 
 def test_ClassWithoutIndent():
@@ -564,7 +564,7 @@ def test_ClassWithAttributes():
     obj = mat_types.MatObject.parse_mfile(mfile, "ClassWithAttributes", "test_data")
     assert isinstance(obj, mat_types.MatClass)
     assert obj.name == "ClassWithAttributes"
-    assert obj.attrs == {"Sealed": None}
+    assert obj.attrs == {"Sealed": True}
 
 
 # Fails when running with other test files. Warnings are already logged.
@@ -696,7 +696,7 @@ def test_ClassWithMethodsWithSpaces():
     assert (
         obj.docstring == "Class with methods that have space after the function name."
     )
-    assert obj.methods["static_method"].attrs == {"Static": None}
+    assert obj.methods["static_method"].attrs == {"Static": True}
 
 
 def test_ClassContainingParfor():
@@ -779,7 +779,7 @@ def test_ClassWithTrailingCommentAfterBases():
         mfile, "ClassWithTrailingCommentAfterBases", "test_data"
     )
     assert obj.name == "ClassWithTrailingCommentAfterBases"
-    assert obj.bases == [("handle",), ("my", "super", "Class")]
+    assert obj.bases == ["handle", "my.super.Class"]
     assert (
         obj.docstring
         == "test class methods\n\n:param a: the input to :class:`ClassWithTrailingCommentAfterBases`"
@@ -801,7 +801,7 @@ def test_ClassWithEllipsisProperties():
         mfile, "ClassWithEllipsisProperties", "test_data"
     )
     assert obj.name == "ClassWithEllipsisProperties"
-    assert obj.bases == [("handle",)]
+    assert obj.bases == ["handle"]
     assert obj.docstring == "stuff"
     assert len(obj.methods) == 0
 
@@ -846,7 +846,7 @@ def test_ClassWithTrailingSemicolons():
         obj.docstring
         == "Smoothing like it is performed withing Cxx >v7.0 (until v8.2 at least).\nUses constant 228p_12k frequency vector:"
     )
-    assert obj.bases == [("hgsetget",)]
+    assert obj.bases == ["hgsetget"]
     assert list(obj.methods.keys()) == [
         "ClassWithTrailingSemicolons",
         "CxxSmoothing",
@@ -896,7 +896,7 @@ def test_ClassWithNamedAsArguments():
     mfile = os.path.join(TESTDATA_ROOT, "arguments.m")
     obj = mat_types.MatObject.parse_mfile(mfile, "arguments", "test_data")
     assert obj.name == "arguments"
-    assert obj.bases == [("handle",), ("matlab", "mixin", "Copyable")]
+    assert obj.bases == ["handle", "matlab.mixin.Copyable"]
     assert "value" in obj.properties
     meth = obj.methods["arguments"]
     assert meth.docstring == "Constructor for arguments"
@@ -919,7 +919,7 @@ def test_ClassWithTests():
     mfile = os.path.join(TESTDATA_ROOT, "ClassWithTests.m")
     obj = mat_types.MatObject.parse_mfile(mfile, "ClassWithTests", "test_data")
     assert obj.name == "ClassWithTests"
-    assert obj.bases == [("matlab", "unittest", "TestCase")]
+    assert obj.bases == ["matlab.unittest.TestCase"]
     assert "testRunning" in obj.methods
     testRunning = obj.methods["testRunning"]
     assert testRunning.attrs["TestTags"] == ["'Unit'"]

From d3b8aceaf2640974bfc19040416a9b80f7009b6f Mon Sep 17 00:00:00 2001
From: Anton Edvinovich Pozharskiy <apozharski@gmail.com>
Date: Thu, 5 Sep 2024 15:12:08 +0200
Subject: [PATCH 34/45] fix

---
 sphinxcontrib/mat_tree_sitter_parser.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/sphinxcontrib/mat_tree_sitter_parser.py b/sphinxcontrib/mat_tree_sitter_parser.py
index 814cae8..51420ae 100644
--- a/sphinxcontrib/mat_tree_sitter_parser.py
+++ b/sphinxcontrib/mat_tree_sitter_parser.py
@@ -221,6 +221,7 @@
     (source_file
         (comment)? @docstring
     )
+
     """
 )
 

From ae5c3579c73915a9a468af9dd5100501d5340247 Mon Sep 17 00:00:00 2001
From: Anton Edvinovich Pozharskiy <apozharski@gmail.com>
Date: Thu, 5 Sep 2024 15:16:22 +0200
Subject: [PATCH 35/45] lint

---
 sphinxcontrib/mat_documenters.py        | 14 +++++++-------
 sphinxcontrib/mat_tree_sitter_parser.py |  1 -
 sphinxcontrib/mat_types.py              |  8 ++++++--
 sphinxcontrib/matlab.py                 |  7 +++----
 4 files changed, 16 insertions(+), 14 deletions(-)

diff --git a/sphinxcontrib/mat_documenters.py b/sphinxcontrib/mat_documenters.py
index 4c58cca..f9bdf4b 100644
--- a/sphinxcontrib/mat_documenters.py
+++ b/sphinxcontrib/mat_documenters.py
@@ -558,9 +558,9 @@ def member_is_friend_of(member, friends):
 
         def member_is_enum(member):
             return isinstance(member, MatEnumeration)
-        
+
         ret = []
-        
+
         # search for members in source code too
         namespace = ".".join(self.objpath)  # will be empty for modules
 
@@ -1242,7 +1242,9 @@ def document_members(self, all_members=False):
         other_names = [
             membername
             for (membername, member) in filtered_members
-            if not isinstance(member, MatMethod) and not isinstance(member, MatProperty) and not isinstance(member, MatEnumeration)
+            if not isinstance(member, MatMethod)
+            and not isinstance(member, MatProperty)
+            and not isinstance(member, MatEnumeration)
             # exclude parent modules with names matching members (as in Myclass.Myclass)
             and not (hasattr(member, "module") and member.name == member.module)
         ]
@@ -1295,12 +1297,10 @@ def document_members(self, all_members=False):
             self.document_member_section(
                 "Property Summary", non_properties, all_members
             )
-            
+
         # enumss
         if enum_names:
-            self.document_member_section(
-                "Enumeration Values", non_enums, all_members
-            )
+            self.document_member_section("Enumeration Values", non_enums, all_members)
 
         # methods
         if meth_names:
diff --git a/sphinxcontrib/mat_tree_sitter_parser.py b/sphinxcontrib/mat_tree_sitter_parser.py
index 51420ae..814cae8 100644
--- a/sphinxcontrib/mat_tree_sitter_parser.py
+++ b/sphinxcontrib/mat_tree_sitter_parser.py
@@ -221,7 +221,6 @@
     (source_file
         (comment)? @docstring
     )
-
     """
 )
 
diff --git a/sphinxcontrib/mat_types.py b/sphinxcontrib/mat_types.py
index f843537..7bfa218 100644
--- a/sphinxcontrib/mat_types.py
+++ b/sphinxcontrib/mat_types.py
@@ -877,7 +877,9 @@ def getter(self, name, *defargs):
         elif name == "__dict__":
             objdict = dict([(pn, self.getter(pn)) for pn in self.properties.keys()])
             objdict.update(self.methods)
-            objdict.update(dict([(en, self.getter(en)) for en in self.enumerations.keys()]))
+            objdict.update(
+                dict([(en, self.getter(en)) for en in self.enumerations.keys()])
+            )
             return objdict
         else:
             super(MatClass, self).getter(name, *defargs)
@@ -904,12 +906,13 @@ def __module__(self):
     def __doc__(self):
         return self.docstring
 
+
 class MatEnumeration(MatObject):
     def __init__(self, name, cls, attrs):
         super(MatEnumeration, self).__init__(name)
         self.cls = cls
         self.docstring = attrs["docstring"]
-        
+
     def ref_role(self):
         """Returns role to use for references to this object (e.g. when generating auto-links)"""
         return "enum"
@@ -922,6 +925,7 @@ def __module__(self):
     def __doc__(self):
         return self.docstring
 
+
 class MatMethod(MatFunction):
     def __init__(self, name, parsed_function, modname, cls):
         self.name = name
diff --git a/sphinxcontrib/matlab.py b/sphinxcontrib/matlab.py
index 5764f9f..01755f3 100644
--- a/sphinxcontrib/matlab.py
+++ b/sphinxcontrib/matlab.py
@@ -346,14 +346,13 @@ def _object_hierarchy_parts(self, sig):
 
         This method must not be used outwith table of contents generation.
         """
-        parts = sig.attributes.get('module').split('.')
-        parts.append(sig.attributes.get('fullname'))
-        #import pdb;pdb.set_trace()
+        parts = sig.attributes.get("module").split(".")
+        parts.append(sig.attributes.get("fullname"))
         return tuple(parts)
 
     def _toc_entry_name(self, sig):
         # TODO respecting the configuration setting ``toc_object_entries_show_parents``
-        return sig.attributes.get('fullname')
+        return sig.attributes.get("fullname")
 
     def get_signature_prefix(self, sig):
         return self.objtype + " "

From 952af04c7a3c6eb36b2659d98262e825f15a573a Mon Sep 17 00:00:00 2001
From: Anton Edvinovich Pozharskiy <apozharski@gmail.com>
Date: Thu, 5 Sep 2024 15:22:11 +0200
Subject: [PATCH 36/45] fix typo in __all__

---
 sphinxcontrib/mat_types.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sphinxcontrib/mat_types.py b/sphinxcontrib/mat_types.py
index 7bfa218..ed08b19 100644
--- a/sphinxcontrib/mat_types.py
+++ b/sphinxcontrib/mat_types.py
@@ -38,7 +38,7 @@
     "MatFunction",
     "MatClass",
     "MatProperty",
-    "MatEnumerations",
+    "MatEnumeration",
     "MatMethod",
     "MatScript",
     "MatException",

From d4e64f24becfac11b6e3c003ac90678718e23dec Mon Sep 17 00:00:00 2001
From: Anton Edvinovich Pozharskiy <apozharski@gmail.com>
Date: Thu, 5 Sep 2024 16:01:24 +0200
Subject: [PATCH 37/45] remove Pygments dependencies

---
 setup.py                                |  3 +-
 sphinxcontrib/mat_tree_sitter_parser.py | 84 +++++++++++--------------
 tox.ini                                 |  4 +-
 3 files changed, 38 insertions(+), 53 deletions(-)

diff --git a/setup.py b/setup.py
index 5d459e6..a52fc4d 100644
--- a/setup.py
+++ b/setup.py
@@ -7,8 +7,7 @@
 
 requires = [
     "Sphinx>=4.0.0",
-    "Pygments>=2.0.1",
-    "tree-sitter-matlab @ git+https://github.com/apozharski/tree-sitter-matlab.git",
+    "tree-sitter-matlab>=1.0.2",
     "tree-sitter>=0.21.3",
 ]
 
diff --git a/sphinxcontrib/mat_tree_sitter_parser.py b/sphinxcontrib/mat_tree_sitter_parser.py
index 814cae8..ab21455 100644
--- a/sphinxcontrib/mat_tree_sitter_parser.py
+++ b/sphinxcontrib/mat_tree_sitter_parser.py
@@ -88,7 +88,7 @@
     (attributes
         [(attribute) @attrs _]+
     )?
-    [(property) @properties (old_property) @properties _]+
+    [(property) @properties  _]+
     ) @prop_block
 """
 )
@@ -127,6 +127,8 @@
          [[(spread_operator) (number)] @dims _]+
      )?
      (identifier)? @type
+     .
+     (identifier)? @size_type
      (validation_functions
          [[(identifier) (function_call)] @validation_functions _]+
      )?
@@ -138,9 +140,9 @@
 
 q_old_property = ML_LANG.query(
     """
-    (old_property name: (identifier) @name
+    (property name: (identifier) @name
      (identifier) @type
-     (old_property_type)? @size_type
+     (identifier)? @size_type
      (default_value)? @default
      (comment)? @docstring
     )
@@ -625,56 +627,42 @@ def _parse_property_section(self, props_match):
         attrs_nodes = props_match.get("attrs")
         attrs = self._parse_attributes(attrs_nodes)
         for prop in properties:
-            if prop.type == "property":
-                # match property to extract details
-                _, prop_match = q_property.matches(prop)[0]
-
-                # extract name (this is always available so no need for None check)
-                name = prop_match.get("name").text.decode(
-                    self.encoding, errors="backslashreplace"
-                )
+            # match property to extract details
+            _, prop_match = q_property.matches(prop)[0]
 
-                # extract dims list
-                dims_list = prop_match.get("dims")
-                dims = None
-                if dims_list is not None:
-                    dims = tuple(
-                        [
-                            dim.text.decode(self.encoding, errors="backslashreplace")
-                            for dim in dims_list
-                        ]
-                    )
+            # extract name (this is always available so no need for None check)
+            name = prop_match.get("name").text.decode(
+                self.encoding, errors="backslashreplace"
+            )
 
-                # extract validator functions
-                vf_list = prop_match.get("validator_functions")
-                vfs = None
-                if vf_list is not None:
-                    vfs = [
-                        vf.text.decode(self.encoding, errors="backslashreplace")
-                        for vf in vf_list
+            # extract dims list
+            size_type = prop_match.get("size_type")
+            dims_list = prop_match.get("dims")
+            dims = None
+            if dims_list is not None:
+                dims = tuple(
+                    [
+                        dim.text.decode(self.encoding, errors="backslashreplace")
+                        for dim in dims_list
                     ]
-            else:
-                # match property to extract details
-                _, prop_match = q_old_property.matches(prop)[0]
-
-                # extract name (this is always available so no need for None check)
-                name = prop_match.get("name").text.decode(
-                    self.encoding, errors="backslashreplace"
                 )
+            elif size_type is None:
+                dims = None
+            elif size_type.text == b"scalar":
+                dims = ("1", "1")
+            elif size_type.text == b"vector":
+                dims = (":", "1")
+            elif size_type.text == b"matrix":
+                dims = (":", ":")
 
-                # extract size type
-                size_type = prop_match.get("size_type")
-                if size_type is None:
-                    dims = None
-                elif size_type.text == b"scalar":
-                    dims = ("1", "1")
-                elif size_type.text == b"vector":
-                    dims = (":", "1")
-                elif size_type.text == b"matrix":
-                    dims = (":", ":")
-
-                # No validator functions
-                vfs = None
+            # extract validator functions
+            vf_list = prop_match.get("validator_functions")
+            vfs = None
+            if vf_list is not None:
+                vfs = [
+                    vf.text.decode(self.encoding, errors="backslashreplace")
+                    for vf in vf_list
+                ]
 
             # extract type
             type_node = prop_match.get("type")
diff --git a/tox.ini b/tox.ini
index 9744f88..deb1103 100644
--- a/tox.ini
+++ b/tox.ini
@@ -1,5 +1,5 @@
 [tox]
-envlist = py{38,39,310,311}-sphinx{45,53,60,latest}-pygments{213,latest}
+envlist = py{38,39,310,311}-sphinx{45,53,60,latest}
 
 
 [testenv]
@@ -18,8 +18,6 @@ deps =
     sphinx70: Sphinx>=7.0,<8.0
     sphinxlatest: Sphinx
     sphinxdev: https://github.com/sphinx-doc/sphinx/archive/refs/heads/master.zip
-    pygments213: Pygments>=2.0.1,<2.14.0
-    pygmentlatest: Pygments
 commands =
     pytest -vv {posargs} tests/
     sphinx-build -b html -d {envtmpdir}/doctrees tests/test_docs {envtmpdir}/html

From 7da1e6515a47067fdd67c3d1b451c9dde8525147 Mon Sep 17 00:00:00 2001
From: Anton Edvinovich Pozharskiy <apozharski@gmail.com>
Date: Thu, 5 Sep 2024 16:22:35 +0200
Subject: [PATCH 38/45] py-tree-sitter v0.23.0 has a breaking change for
 Query.match()

---
 setup.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/setup.py b/setup.py
index a52fc4d..32d6b09 100644
--- a/setup.py
+++ b/setup.py
@@ -8,7 +8,7 @@
 requires = [
     "Sphinx>=4.0.0",
     "tree-sitter-matlab>=1.0.2",
-    "tree-sitter>=0.21.3",
+    "tree-sitter>=0.21.3,<0.23.0",
 ]
 
 setup(

From a48c23636d08b3540530e04fb76e6bfa0543e3ca Mon Sep 17 00:00:00 2001
From: Anton Edvinovich Pozharskiy <apozharski@gmail.com>
Date: Thu, 5 Sep 2024 16:56:52 +0200
Subject: [PATCH 39/45] remove duplicat entry in yaml

---
 .github/workflows/python-package.yml | 22 ----------------------
 1 file changed, 22 deletions(-)

diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml
index ee7aae6..41775ab 100644
--- a/.github/workflows/python-package.yml
+++ b/.github/workflows/python-package.yml
@@ -54,28 +54,6 @@ jobs:
         tox -e "${{matrix.python-version}}-sphinx${{matrix.sphinx-version}}-pygments${{matrix.pygments-version}}"
 
 
-  test-sphinx-latest:
-    name: Run tests for Python ${{ matrix.python-version }}, Sphinx ${{ matrix.sphinx-version }},  Pygments ${{ matrix.pygments-version }}
-    timeout-minutes: 5
-    runs-on: ubuntu-latest
-    strategy:
-      matrix:
-        python-version: ["3.10", "3.11", "3.12"]
-        sphinx-version: ["dev"]
-        pygments-version: ["latest"]
-    steps:
-    - uses: actions/checkout@v4
-    - name: Set up Python ${{ matrix.python-version }}
-      uses: actions/setup-python@v5
-      with:
-        python-version: ${{ matrix.python-version }}
-        architecture: x64
-    - name: Run with Tox
-      run: |
-        pip install tox==4.8.0
-        tox -e "${{matrix.python-version}}-sphinx${{matrix.sphinx-version}}-pygments${{matrix.pygments-version}}"
-
-
   test-sphinx-latest:
     name: Run tests for Python ${{ matrix.python-version }}, Sphinx ${{ matrix.sphinx-version }},  Pygments ${{ matrix.pygments-version }}
     timeout-minutes: 5

From 1dfa6c791224417672c68355520bd0bab079f703 Mon Sep 17 00:00:00 2001
From: Anton Pozharskiy <apozharski@gmail.com>
Date: Fri, 6 Sep 2024 00:34:22 +0200
Subject: [PATCH 40/45] fix spec printing

---
 sphinxcontrib/mat_documenters.py        | 11 ++++++++++-
 sphinxcontrib/mat_tree_sitter_parser.py | 10 +++++-----
 sphinxcontrib/mat_types.py              |  4 +++-
 3 files changed, 18 insertions(+), 7 deletions(-)

diff --git a/sphinxcontrib/mat_documenters.py b/sphinxcontrib/mat_documenters.py
index f9bdf4b..4d5e51a 100644
--- a/sphinxcontrib/mat_documenters.py
+++ b/sphinxcontrib/mat_documenters.py
@@ -1475,7 +1475,16 @@ def add_directive_header(self, sig):
                     obj_default = " = " + obj_default
 
                 if self.env.config.matlab_show_property_specs:
-                    obj_default = self.object.specs + obj_default
+                    prop_spec = ""
+                    if self.object.size is not None:
+                        prop_spec = prop_spec + "(" + ",".join(self.object.size) + ")"
+                    if self.object.type is not None:
+                        prop_spec = prop_spec + " " + self.object.type
+                    if self.object.validators is not None:
+                        prop_spec = (
+                            prop_spec + " {" + ",".join(self.object.validators) + "}"
+                        )
+                    obj_default = prop_spec + obj_default
 
                 self.add_line("   :annotation: " + obj_default, "<autodoc>")
         elif self.options.annotation is SUPPRESS:
diff --git a/sphinxcontrib/mat_tree_sitter_parser.py b/sphinxcontrib/mat_tree_sitter_parser.py
index ab21455..988ecc7 100644
--- a/sphinxcontrib/mat_tree_sitter_parser.py
+++ b/sphinxcontrib/mat_tree_sitter_parser.py
@@ -124,13 +124,13 @@
     """
     (property name: (identifier) @name
      (dimensions
-         [[(spread_operator) (number)] @dims _]+
+         [(spread_operator) @dims (number) @dims _]+
      )?
      (identifier)? @type
      .
      (identifier)? @size_type
      (validation_functions
-         [[(identifier) (function_call)] @validation_functions _]+
+         [(identifier) @validation_functions (function_call) @validation_functions _]+
      )?
      (default_value)? @default
      (comment)? @docstring
@@ -398,7 +398,7 @@ def _parse_argument_section(self, argblock_node):
             )
 
             # extract validator functions
-            vf_list = arg_match.get("validator_functions")
+            vf_list = arg_match.get("validation_functions")
             vfs = None
             if vf_list is not None:
                 vfs = [
@@ -629,7 +629,7 @@ def _parse_property_section(self, props_match):
         for prop in properties:
             # match property to extract details
             _, prop_match = q_property.matches(prop)[0]
-
+            print(prop.sexp())
             # extract name (this is always available so no need for None check)
             name = prop_match.get("name").text.decode(
                 self.encoding, errors="backslashreplace"
@@ -656,7 +656,7 @@ def _parse_property_section(self, props_match):
                 dims = (":", ":")
 
             # extract validator functions
-            vf_list = prop_match.get("validator_functions")
+            vf_list = prop_match.get("validation_functions")
             vfs = None
             if vf_list is not None:
                 vfs = [
diff --git a/sphinxcontrib/mat_types.py b/sphinxcontrib/mat_types.py
index ed08b19..05aa151 100644
--- a/sphinxcontrib/mat_types.py
+++ b/sphinxcontrib/mat_types.py
@@ -892,7 +892,9 @@ def __init__(self, name, cls, attrs):
         self.attrs = attrs["attrs"]
         self.default = attrs["default"]
         self.docstring = attrs["docstring"]
-        self.specs = attrs["specs"]
+        self.size = attrs["size"]
+        self.type = attrs["type"]
+        self.validators = attrs["validators"]
 
     def ref_role(self):
         """Returns role to use for references to this object (e.g. when generating auto-links)"""

From d182b9c1c31072525b5e6533c70d4e1a263bb08c Mon Sep 17 00:00:00 2001
From: Anton Pozharskiy <apozharski@gmail.com>
Date: Sun, 8 Sep 2024 09:06:48 +0200
Subject: [PATCH 41/45] minor fixes for arg block parsing and a test

---
 sphinxcontrib/mat_tree_sitter_parser.py       |  4 ++--
 tests/test_data/f_with_input_argument_block.m | 12 ++++++++++++
 tests/test_parse_mfile.py                     | 19 ++++++++++++++++++-
 3 files changed, 32 insertions(+), 3 deletions(-)
 create mode 100644 tests/test_data/f_with_input_argument_block.m

diff --git a/sphinxcontrib/mat_tree_sitter_parser.py b/sphinxcontrib/mat_tree_sitter_parser.py
index 988ecc7..5090d1c 100644
--- a/sphinxcontrib/mat_tree_sitter_parser.py
+++ b/sphinxcontrib/mat_tree_sitter_parser.py
@@ -206,13 +206,13 @@
             )
         ]
      (dimensions
-         [[(spread_operator) (number)] @dims _]+
+         [(spread_operator) @dims (number) @dims _]+
      )?
      (identifier)? @type
      (validation_functions
          [[(identifier) (function_call)] @validation_functions _]+
      )?
-     (default_value (number))? @default
+     (default_value [(number) (identifier)])? @default
      (comment)? @docstring
     )
 """
diff --git a/tests/test_data/f_with_input_argument_block.m b/tests/test_data/f_with_input_argument_block.m
new file mode 100644
index 0000000..5b191c0
--- /dev/null
+++ b/tests/test_data/f_with_input_argument_block.m
@@ -0,0 +1,12 @@
+function [o1, o2, o3] = f_with_input_argument_block(a1, a2)
+    arguments
+        a1(1,1) double = 0 % the first input
+        a2(1,1) double = a1 % another input
+    end
+    o1 = a1; o2 = a2; o3 = a1 + a2;
+    for n = 1:3
+        o1 = o2;
+        o2 = o3;
+        o3 = o1 + o2;
+    end
+end
diff --git a/tests/test_parse_mfile.py b/tests/test_parse_mfile.py
index 9ea6ea5..3074697 100644
--- a/tests/test_parse_mfile.py
+++ b/tests/test_parse_mfile.py
@@ -612,7 +612,6 @@ def test_f_with_function_variable():
     assert obj.name == "f_with_function_variable"
     assert list(obj.retv.keys()) == ["obj"]
     assert list(obj.args.keys()) == ["the_functions", "~"]
-    print(obj.docstring)
 
 
 def test_ClassWithGetterSetter():
@@ -925,5 +924,23 @@ def test_ClassWithTests():
     assert testRunning.attrs["TestTags"] == ["'Unit'"]
 
 
+def test_f_with_input_argument_block():
+    mfile = os.path.join(DIRNAME, "test_data", "f_with_input_argument_block.m")
+    obj = mat_types.MatObject.parse_mfile(
+        mfile, "f_with_input_argument_block", "test_data"
+    )
+    assert obj.name == "f_with_input_argument_block"
+    assert list(obj.retv.keys()) == ["o1", "o2", "o3"]
+    assert list(obj.args.keys()) == ["a1", "a2"]
+
+    assert obj.args["a1"]["size"] == ("1", "1")
+    assert obj.args["a1"]["default"] == "0"
+    assert obj.args["a1"]["docstring"] == "the first input"
+
+    assert obj.args["a2"]["size"] == ("1", "1")
+    assert obj.args["a2"]["default"] == "a1"
+    assert obj.args["a2"]["docstring"] == "another input"
+
+
 if __name__ == "__main__":
     pytest.main([os.path.abspath(__file__)])

From aa078e11dae5f24198219b4cf9f697719c0db2a6 Mon Sep 17 00:00:00 2001
From: Anton Pozharskiy <apozharski@gmail.com>
Date: Sun, 8 Sep 2024 10:02:33 +0200
Subject: [PATCH 42/45] fix bug regarding output block argument parsing and add
 test

---
 sphinxcontrib/mat_tree_sitter_parser.py       | 24 +++---------------
 .../test_data/f_with_output_argument_block.m  | 13 ++++++++++
 tests/test_parse_mfile.py                     | 25 +++++++++++++++++++
 3 files changed, 41 insertions(+), 21 deletions(-)
 create mode 100644 tests/test_data/f_with_output_argument_block.m

diff --git a/sphinxcontrib/mat_tree_sitter_parser.py b/sphinxcontrib/mat_tree_sitter_parser.py
index 5090d1c..c735cbf 100644
--- a/sphinxcontrib/mat_tree_sitter_parser.py
+++ b/sphinxcontrib/mat_tree_sitter_parser.py
@@ -188,7 +188,7 @@
     (arguments_statement
     .
     (attributes
-        [(attribute) @attrs _]+
+        [(identifier) @attrs _]*
     )?
     .
     [(property) @args _]*
@@ -525,29 +525,11 @@ def _parse_argument_section(self, argblock_node):
                 pass
 
     def _parse_attributes(self, attrs_nodes):
-        # TOOD deduplicated this
         attrs = {}
         if attrs_nodes is not None:
             for attr_node in attrs_nodes:
-                _, attr_match = q_attributes.matches(attr_node)[0]
-                name = attr_match.get("name").text.decode(
-                    self.encoding, errors="backslashreplace"
-                )
-
-                value_node = attr_match.get("value")
-                rhs_node = attr_match.get("rhs")
-                if rhs_node is not None:
-                    if rhs_node.type == "cell":
-                        attrs[name] = [
-                            vn.text.decode(self.encoding, errors="backslashreplace")
-                            for vn in value_node
-                        ]
-                    else:
-                        attrs[name] = value_node[0].text.decode(
-                            self.encoding, errors="backslashreplace"
-                        )
-                else:
-                    attrs[name] = MATLAB_ATTRIBUTE_DEFAULTS.get(name)
+                name = attr_node.text.decode(self.encoding, errors="backslashreplace")
+                attrs[name] = None
         return attrs
 
 
diff --git a/tests/test_data/f_with_output_argument_block.m b/tests/test_data/f_with_output_argument_block.m
new file mode 100644
index 0000000..e063f7b
--- /dev/null
+++ b/tests/test_data/f_with_output_argument_block.m
@@ -0,0 +1,13 @@
+function [o1, o2, o3] = f_with_output_argument_block(a1, a2)
+    arguments(Output)
+        o1(1,1) double % Output one
+        o2(1,:) double % Another output
+        o3(1,1) double {mustBePositive} % A third output
+    end
+    o1 = a1; o2 = a2; o3 = a1 + a2;
+    for n = 1:3
+        o1 = o2;
+        o2 = o3;
+        o3 = o1 + o2;
+    end
+end
diff --git a/tests/test_parse_mfile.py b/tests/test_parse_mfile.py
index 3074697..bfd3f78 100644
--- a/tests/test_parse_mfile.py
+++ b/tests/test_parse_mfile.py
@@ -935,12 +935,37 @@ def test_f_with_input_argument_block():
 
     assert obj.args["a1"]["size"] == ("1", "1")
     assert obj.args["a1"]["default"] == "0"
+    assert obj.args["a1"]["type"] == "double"
     assert obj.args["a1"]["docstring"] == "the first input"
 
     assert obj.args["a2"]["size"] == ("1", "1")
     assert obj.args["a2"]["default"] == "a1"
+    assert obj.args["a1"]["type"] == "double"
     assert obj.args["a2"]["docstring"] == "another input"
 
 
+def test_f_with_output_argument_block():
+    mfile = os.path.join(DIRNAME, "test_data", "f_with_output_argument_block.m")
+    obj = mat_types.MatObject.parse_mfile(
+        mfile, "f_with_output_argument_block", "test_data"
+    )
+    assert obj.name == "f_with_output_argument_block"
+    assert list(obj.retv.keys()) == ["o1", "o2", "o3"]
+    assert list(obj.args.keys()) == ["a1", "a2"]
+
+    assert obj.retv["o1"]["size"] == ("1", "1")
+    assert obj.retv["o1"]["type"] == "double"
+    assert obj.retv["o1"]["docstring"] == "Output one"
+
+    assert obj.retv["o2"]["size"] == ("1", ":")
+    assert obj.retv["o2"]["type"] == "double"
+    assert obj.retv["o2"]["docstring"] == "Another output"
+
+    assert obj.retv["o3"]["size"] == ("1", "1")
+    assert obj.retv["o3"]["type"] == "double"
+    assert obj.retv["o3"]["docstring"] == "A third output"
+    assert obj.retv["o3"]["validators"] == ["mustBePositive"]
+
+
 if __name__ == "__main__":
     pytest.main([os.path.abspath(__file__)])

From 4b3fb89068b9af8ff33f3736bab046d7904ff895 Mon Sep 17 00:00:00 2001
From: Anton Pozharskiy <apozharski@gmail.com>
Date: Sun, 8 Sep 2024 10:06:25 +0200
Subject: [PATCH 43/45] remove print and fix test_matlabify

---
 sphinxcontrib/mat_tree_sitter_parser.py | 1 -
 tests/test_matlabify.py                 | 2 ++
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/sphinxcontrib/mat_tree_sitter_parser.py b/sphinxcontrib/mat_tree_sitter_parser.py
index c735cbf..4460691 100644
--- a/sphinxcontrib/mat_tree_sitter_parser.py
+++ b/sphinxcontrib/mat_tree_sitter_parser.py
@@ -611,7 +611,6 @@ def _parse_property_section(self, props_match):
         for prop in properties:
             # match property to extract details
             _, prop_match = q_property.matches(prop)[0]
-            print(prop.sexp())
             # extract name (this is always available so no need for None check)
             name = prop_match.get("name").text.decode(
                 self.encoding, errors="backslashreplace"
diff --git a/tests/test_matlabify.py b/tests/test_matlabify.py
index 25761a7..42ed9a8 100644
--- a/tests/test_matlabify.py
+++ b/tests/test_matlabify.py
@@ -99,6 +99,8 @@ def test_module(mod):
         "ClassWithEnumMethod",
         "ClassWithEventMethod",
         "f_with_function_variable",
+        "f_with_input_argument_block",
+        "f_with_output_argument_block",
         "ClassWithUndocumentedMembers",
         "ClassWithGetterSetter",
         "ClassWithDoubleQuotedString",

From 631466163a720f96d6d5fb30b943c438749a02bc Mon Sep 17 00:00:00 2001
From: Anton Pozharskiy <apozharski@gmail.com>
Date: Sun, 8 Sep 2024 10:10:08 +0200
Subject: [PATCH 44/45] remove textmate from dev-reqs

---
 dev-requirements.txt | 1 -
 1 file changed, 1 deletion(-)

diff --git a/dev-requirements.txt b/dev-requirements.txt
index c74a328..427369d 100644
--- a/dev-requirements.txt
+++ b/dev-requirements.txt
@@ -4,4 +4,3 @@ pytest-cov
 pre-commit
 defusedxml>=0.7.1
 sphinxcontrib-napoleon
-textmate-grammar-python

From 63781e398cdc26ff56ec00f33216cf7497bf4d4d Mon Sep 17 00:00:00 2001
From: Anton Edvinovich Pozharskiy <apozharski@gmail.com>
Date: Wed, 18 Sep 2024 08:56:02 +0200
Subject: [PATCH 45/45] fix typo

---
 sphinxcontrib/matlab.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sphinxcontrib/matlab.py b/sphinxcontrib/matlab.py
index 01755f3..c95463a 100644
--- a/sphinxcontrib/matlab.py
+++ b/sphinxcontrib/matlab.py
@@ -344,7 +344,7 @@ def _object_hierarchy_parts(self, sig):
         table of contents, and can also be used within the
         :py:meth:`_toc_entry_name` method.
 
-        This method must not be used outwith table of contents generation.
+        This method must not be used without table of contents generation.
         """
         parts = sig.attributes.get("module").split(".")
         parts.append(sig.attributes.get("fullname"))