From c6d889f1b10417db44540747aa2ab1583b4762e5 Mon Sep 17 00:00:00 2001 From: Anton Edvinovich Pozharskiy Date: Thu, 18 Jul 2024 07:55:24 +0200 Subject: [PATCH 01/45] [WIP] Work on writing a parser using the textmate grammar backend. (#253) * Actually not only enums, quick and dirty start to using textmate parser * a little more boilerplate * property validator parsing * working function parsing without docstrings yet * start enum work * some enum parsing * working parsing for enumeration comments * add handling for block comments to enums * backport enum docstring parsing to properties * remove vestigial file * minor fixes + black --- dev-requirements.txt | 1 + sphinxcontrib/mat_textmate_parser.py | 476 +++++++++++++++++++++++++++ 2 files changed, 477 insertions(+) create mode 100644 sphinxcontrib/mat_textmate_parser.py diff --git a/dev-requirements.txt b/dev-requirements.txt index 427369d..c74a328 100644 --- a/dev-requirements.txt +++ b/dev-requirements.txt @@ -4,3 +4,4 @@ pytest-cov pre-commit defusedxml>=0.7.1 sphinxcontrib-napoleon +textmate-grammar-python diff --git a/sphinxcontrib/mat_textmate_parser.py b/sphinxcontrib/mat_textmate_parser.py new file mode 100644 index 0000000..1ea1c8f --- /dev/null +++ b/sphinxcontrib/mat_textmate_parser.py @@ -0,0 +1,476 @@ +from textmate_grammar.parsers.matlab import MatlabParser + +rpath = "../tests/test_data/ClassWithPropertyValidators.m" + + +def find_first_child(curr, tok): + ind = [i for i in range(len(curr.children)) if curr.children[i].token == tok] + if not ind: + return None + return (curr.children[ind[0]], ind[0]) + + +class MatClassParser: + def __init__(self, path): + # DATA + self.name = "" + self.supers = [] + self.attrs = {} + self.docstring = "" + self.properties = {} + self.methods = {} + self.enumerations = {} + + # Maybe remove continuations as a crutch? currently parser is broken for continuations in attributes + # self.parser = MatlabParser(remove_line_continuations=True) + self.parser = MatlabParser() + self.parsed = self.parser.parse_file(path) + self.cls, _ = find_first_child(self.parsed, "meta.class.matlab") + if not self.cls: + raise Exception() # TODO better exception + self.clsdef, _ = find_first_child(self.cls, "meta.class.declaration.matlab") + self._parse_clsdef() + self._find_class_docstring() + + property_sections = self.cls.findall(tokens="meta.properties.matlab", depth=1) + method_sections = self.cls.findall(tokens="meta.methods.matlab", depth=1) + enumeration_sections = self.cls.findall(tokens="meta.enum.matlab", depth=1) + + for section in property_sections: + self._parse_property_section(section[0]) + + for section in method_sections: + self._parse_method_section(section[0]) + + for section in enumeration_sections: + self._parse_enum_section(section[0]) + import pdb + + pdb.set_trace() + + def _find_class_docstring(self): + if self.cls.children[1].token == "comment.line.percentage.matlab": + self._docstring_lines() + elif self.cls.children[1].token == "comment.block.percentage.matlab": + self.docstring = ( + self.cls.children[1].content.strip()[2:-2].strip() + ) # [2,-2] strips out block comment delimiters + else: + print("found no docstring") + + def _docstring_lines(self): + idx = 1 + while self.cls.children[idx].token == "comment.line.percentage.matlab": + self.docstring += ( + self.cls.children[idx].content[1:] + "\n" + ) # [1:] strips out percent sign + idx += 1 + self.docstring = self.docstring.strip() + + def _parse_clsdef(self): + for child in self.clsdef.children: + child.print() + + # Parse Attributes TODO maybe there is a smarter way to do this? + idx = 0 + while self.clsdef.children[idx].token == "storage.modifier.class.matlab": + attr = self.clsdef.children[idx].content + val = None # TODO maybe do some typechecking here or we can assume that you give us valid Matlab + idx += 1 + if ( + self.clsdef.children[idx].token == "keyword.operator.assignment.matlab" + ): # pull out r.h.s + idx += 1 + val = self.clsdef.children[idx].content + idx += 1 + if ( + self.clsdef.children[idx].token + == "punctuation.separator.modifier.comma.matlab" + ): # skip commas + idx += 1 + self.attrs[attr] = val + + if ( + self.clsdef.children[idx].token == "punctuation.section.parens.end.matlab" + ): # Skip end of attrs + idx += 1 + + # name must be next + self.name = self.clsdef.children[idx].content + idx += 1 + + while idx < len( + self.clsdef.children + ): # No children we care about after this except inherited classes + if self.clsdef.children[idx].token == "meta.inherited-class.matlab": + super_cls_tok = self.clsdef.children[idx] + # collect superclass as a tuple + super_cls = tuple( + [ + child.content + for child in super_cls_tok.children + if not child.token.startswith("punctuation") + ] + ) + self.supers.append(super_cls) + idx += 1 + + def _parse_property_section(self, section): + # TODO parse property section attrs + idxs = [ + i + for i in range(len(section.children)) + if section.children[i].token == "meta.assignment.definition.property.matlab" + ] + for idx in idxs: + prop_tok = section.children[idx] + prop_name = prop_tok.begin[0].content + self.properties[prop_name] = {} # Create entry for property + self._parse_property_validation( + prop_name, prop_tok + ) # Parse property validation. + + # Get inline docstring + inline_docstring_gen = prop_tok.find( + tokens="comment.line.percentage.matlab", attribute="end" + ) + try: + inline_docstring_tok, _ = next(inline_docstring_gen) + inline_docstring = inline_docstring_tok.content[ + 1: + ] # strip leading % sign + except StopIteration: + inline_docstring = None + + # Walk backwards to get preceding docstring. + preceding_docstring = "" + walk_back_idx = idx - 1 + next_tok = prop_tok + while walk_back_idx >= 0: + walk_tok = section.children[walk_back_idx] + if self._is_empty_line_between_tok(walk_tok, next_tok): + # Once there is an empty line between consecutive tokens we are done. + break + + if ( + not preceding_docstring + and walk_tok.token == "comment.block.percentage.matlab" + ): + # block comment immediately preceding enum so we are done. + # TODO we might need to do some postprocessing here to handle indents gracefully + preceding_docstring = walk_tok.content.strip()[2:-2] + break + elif walk_tok.token == "comment.line.percentage.matlab": + preceding_docstring = ( + walk_tok.content[1:] + "\n" + preceding_docstring + ) # [1:] strips % + walk_back_idx -= 1 + next_tok = walk_tok + elif walk_tok.token == "punctuation.whitespace.comment.leading.matlab": + walk_back_idx -= 1 + # Dont update next_tok for whitespace + else: + break + + # Walk forwards to get following docstring or inline one. + following_docstring = "" + walk_fwd_idx = idx + 1 + prev_tok = prop_tok + while walk_fwd_idx < len(section.children): + walk_tok = section.children[walk_fwd_idx] + + if self._is_empty_line_between_tok(prev_tok, walk_tok): + # Once there is an empty line between consecutive tokens we are done. + break + + if ( + not following_docstring + and walk_tok.token == "comment.block.percentage.matlab" + ): + # block comment immediately following enum so we are done. + # TODO we might need to do some postprocessing here to handle indents gracefully + following_docstring = walk_tok.content.strip()[2:-2] + break + elif walk_tok.token == "comment.line.percentage.matlab": + following_docstring = ( + following_docstring + "\n" + walk_tok.content[1:] + ) # [1:] strips % + walk_fwd_idx += 1 + prev_tok = walk_tok + elif walk_tok.token == "punctuation.whitespace.comment.leading.matlab": + walk_fwd_idx += 1 + # Dont update prev_tok for whitespace + else: + break + + if preceding_docstring: + self.properties[prop_name]["docstring"] = preceding_docstring.strip() + elif inline_docstring: + self.properties[prop_name]["docstring"] = inline_docstring.strip() + elif following_docstring: + self.properties[prop_name]["docstring"] = following_docstring.strip() + else: + self.properties[prop_name]["docstring"] = None + + def _parse_property_validation(self, prop_name, prop): + """Parses property validation syntax""" + # First get the szize if found + size_gen = prop.find(tokens="meta.parens.size.matlab", depth=1) + try: # We have a size, therefore parse the comma separated list into tuple + size_tok, _ = next(size_gen) + size_elem_gen = size_tok.find( + tokens=[ + "constant.numeric.decimal.matlab", + "keyword.operator.vector.colon.matlab", + ], + depth=1, + ) + size = tuple([elem[0].content for elem in size_elem_gen]) + self.properties[prop_name]["size"] = size + except StopIteration: + pass + + # Now find the type if it exists + # TODO this should be mapped to known types (though perhaps as a postprocess) + type_gen = prop.find(tokens="storage.type.matlab", depth=1) + try: + self.properties[prop_name]["type"] = next(type_gen)[0].content + except StopIteration: + pass + + # Now find list of validators + validator_gen = prop.find(tokens="meta.block.validation.matlab", depth=1) + try: + import pdb + + pdb.set_trace() + validator_tok, _ = next(validator_gen) + validator_toks = validator_tok.findall( + tokens="variable.other.readwrite.matlab", depth=1 + ) # TODO Probably bug here in MATLAB-Language-grammar + self.properties[prop_name]["validators"] = [ + tok[0].content for tok in validator_toks + ] + except StopIteration: + pass + + def _parse_method_section(self, section): + # TODO parse property section attrs + idxs = [ + i + for i in range(len(section.children)) + if section.children[i].token == "meta.function.matlab" + ] + for idx in idxs: + meth_tok = section.children[idx] + self._parse_function(meth_tok) + # TODO walk forward and backward to get property docstring. + # TODO if we have mutliple possible docstrings what is given priority? + # TODO parse out property validations syntax + + def _parse_function(self, fun_tok): + """Parse Function definition""" + # First find the function name + name_gen = fun_tok.find(tokens="entity.name.function.matlab") + try: + name_tok, _ = next(name_gen) + fun_name = name_tok.content + except StopIteration: + # TODO correct error here + raise Exception("Couldn't find function name") + + # Find outputs and parameters + output_gen = fun_tok.find(tokens="variable.parameter.output.matlab") + param_gen = fun_tok.find(tokens="variable.parameter.input.matlab") + + self.methods[fun_name] = {} + self.methods[fun_name]["outputs"] = {} + self.methods[fun_name]["params"] = {} + + for out, _ in output_gen: + self.methods[fun_name]["outputs"][out.content] = {} + + for param, _ in param_gen: + self.methods[fun_name]["params"][param.content] = {} + + # find arguments blocks + for arg_section, _ in fun_tok.find(tokens="meta.arguments.matlab"): + self._parse_argument_section(fun_name, arg_section) + + def _parse_argument_section(self, fun_name, section): + modifiers = [ + mod.content + for mod, _ in section.find(tokens="storage.modifier.arguments.matlab") + ] + arg_def_gen = section.find(tokens="meta.assignment.definition.property.matlab") + for arg_def, _ in arg_def_gen: + arg_name = arg_def.begin[ + 0 + ].content # Get argument name that is being defined + self._parse_argument_validation(fun_name, arg_name, arg_def, modifiers) + + def _parse_argument_validation(self, fun_name, arg_name, arg, modifiers): + # TODO This should be identical to propery validation I think. Refactor + # First get the size if found + section = "output" if "Output" in modifiers else "params" + size_gen = arg.find(tokens="meta.parens.size.matlab", depth=1) + try: # We have a size, therefore parse the comma separated list into tuple + size_tok, _ = next(size_gen) + size_elem_gen = size_tok.find( + tokens=[ + "constant.numeric.decimal.matlab", + "keyword.operator.vector.colon.matlab", + ], + depth=1, + ) + size = tuple([elem[0].content for elem in size_elem_gen]) + self.methods[fun_name][section][arg_name]["size"] = size + except StopIteration: + pass + + # Now find the type if it exists + # TODO this should be mapped to known types (though perhaps as a postprocess) + type_gen = arg.find(tokens="storage.type.matlab", depth=1) + try: + self.methods[fun_name][section][arg_name]["type"] = next(type_gen)[ + 0 + ].content + except StopIteration: + pass + + # Now find list of validators + validator_gen = arg.find(tokens="meta.block.validation.matlab", depth=1) + try: + validator_tok, _ = next(validator_gen) + validator_toks = validator_tok.findall( + tokens="variable.other.readwrite.matlab", depth=1 + ) # TODO Probably bug here in MATLAB-Language-grammar + self.methods[fun_name][section][arg_name]["validators"] = [ + tok[0].content for tok in validator_toks + ] + except StopIteration: + pass + + def _parse_enum_section(self, section): + # TODO parse property section attrs + idxs = [ + i + for i in range(len(section.children)) + if section.children[i].token + == "meta.assignment.definition.enummember.matlab" + ] + for idx in idxs: + enum_tok = section.children[idx] + next_idx = idx + enum_name = enum_tok.children[0].content + self.enumerations[enum_name] = {} + if ( + section.children[idx + 1].token == "meta.parens.matlab" + ): # Parse out args TODO this should be part of enummember assignment definition + args = tuple( + [ + arg.content + for arg in section.children[idx + 1].children + if arg.token != "punctuation.separator.comma.matlab" + ] + ) + self.enumerations[enum_name]["args"] = args + next_idx += 1 + + # Walk backwards to get preceding docstring. + preceding_docstring = "" + walk_back_idx = idx - 1 + next_tok = enum_tok + while walk_back_idx >= 0: + walk_tok = section.children[walk_back_idx] + if self._is_empty_line_between_tok(walk_tok, next_tok): + # Once there is an empty line between consecutive tokens we are done. + break + + if ( + not preceding_docstring + and walk_tok.token == "comment.block.percentage.matlab" + ): + # block comment immediately preceding enum so we are done. + # TODO we might need to do some postprocessing here to handle indents gracefully + preceding_docstring = walk_tok.content.strip()[2:-2] + break + elif walk_tok.token == "comment.line.percentage.matlab": + preceding_docstring = ( + walk_tok.content[1:] + "\n" + preceding_docstring + ) # [1:] strips % + walk_back_idx -= 1 + next_tok = walk_tok + elif walk_tok.token == "punctuation.whitespace.comment.leading.matlab": + walk_back_idx -= 1 + # Dont update next_tok for whitespace + else: + break + + # Walk forwards to get following docstring or inline one. + inline_docstring = "" + following_docstring = "" + walk_fwd_idx = next_idx + 1 + prev_tok = section.children[next_idx] + while walk_fwd_idx < len(section.children): + walk_tok = section.children[walk_fwd_idx] + + if self._is_empty_line_between_tok(prev_tok, walk_tok): + # Once there is an empty line between consecutive tokens we are done. + break + + if ( + not following_docstring + and walk_tok.token == "comment.block.percentage.matlab" + ): + # block comment immediately following enum so we are done. + # TODO we might need to do some postprocessing here to handle indents gracefully + following_docstring = walk_tok.content.strip()[2:-2] + break + elif walk_tok.token == "comment.line.percentage.matlab": + # In the case the comment is on the same line as the end of the enum declaration, take it as inline comment and exit. + if self._toks_on_same_line(section.children[idx], walk_tok): + inline_docstring = walk_tok.content[1:] + break + + following_docstring = ( + following_docstring + "\n" + walk_tok.content[1:] + ) # [1:] strips % + walk_fwd_idx += 1 + prev_tok = walk_tok + elif walk_tok.token == "punctuation.whitespace.comment.leading.matlab": + walk_fwd_idx += 1 + # Dont update prev_tok for whitespace + else: + break + + if preceding_docstring: + self.enumerations[enum_name]["docstring"] = preceding_docstring.strip() + elif inline_docstring: + self.enumerations[enum_name]["docstring"] = inline_docstring.strip() + elif following_docstring: + self.enumerations[enum_name]["docstring"] = following_docstring.strip() + else: + self.enumerations[enum_name]["docstring"] = None + + def _toks_on_same_line(self, tok1, tok2): + """Note: pass the tokens in order they appear in case of multiline tokens, otherwise this may return incorrect results""" + line1 = self._get_last_line_of_tok(tok1) + line2 = self._get_first_line_of_tok(tok2) + return line1 == line2 + + def _is_empty_line_between_tok(self, tok1, tok2): + """Note: pass tokens in order they appear""" + line1 = self._get_last_line_of_tok(tok1) + line2 = self._get_first_line_of_tok(tok2) + return line2 - line1 > 1 + + def _get_first_line_of_tok(self, tok): + return min([loc[0] for loc in tok.characters.keys()]) + + def _get_last_line_of_tok(self, tok): + return max([loc[0] for loc in tok.characters.keys()]) + + +if __name__ == "__main__": + cls_parse = MatClassParser(rpath) From b7bc00649152aac5d6eb55534779bc96c7317c6e Mon Sep 17 00:00:00 2001 From: Anton Pozharskiy Date: Sun, 7 Jul 2024 20:35:19 +0200 Subject: [PATCH 02/45] Hack for object hierarchy --- sphinxcontrib/matlab.py | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/sphinxcontrib/matlab.py b/sphinxcontrib/matlab.py index 43b96ab..d609e33 100644 --- a/sphinxcontrib/matlab.py +++ b/sphinxcontrib/matlab.py @@ -336,6 +336,25 @@ class MatClasslike(MatObject): Description of a class-like object (classes, interfaces, exceptions). """ + def _object_hierarchy_parts(self, sig): + """ + Returns a tuple of strings, one entry for each part of the object's + hierarchy (e.g. ``('module', 'submodule', 'Class', 'method')``). The + returned tuple is used to properly nest children within parents in the + table of contents, and can also be used within the + :py:meth:`_toc_entry_name` method. + + This method must not be used outwith table of contents generation. + """ + parts = sig.attributes.get('module').split('.') + parts.append(sig.attributes.get('fullname')) + #import pdb;pdb.set_trace() + return tuple(parts) + + def _toc_entry_name(self, sig): + # TODO respecting the configuration setting ``toc_object_entries_show_parents`` + return sig.attributes.get('fullname') + def get_signature_prefix(self, sig): return self.objtype + " " From 4e60a764268bd8cd3ddc261d0c8ad8ccd1fe8cc1 Mon Sep 17 00:00:00 2001 From: Anton Edvinovich Pozharskiy Date: Tue, 9 Jul 2024 15:40:18 +0200 Subject: [PATCH 03/45] Initial hack to get enumerations to work --- sphinxcontrib/mat_documenters.py | 29 +++++- sphinxcontrib/mat_types.py | 160 ++++++++++++++++++++++++++++++- sphinxcontrib/matlab.py | 6 ++ 3 files changed, 190 insertions(+), 5 deletions(-) diff --git a/sphinxcontrib/mat_documenters.py b/sphinxcontrib/mat_documenters.py index b5cd00c..35fff8c 100644 --- a/sphinxcontrib/mat_documenters.py +++ b/sphinxcontrib/mat_documenters.py @@ -14,6 +14,7 @@ MatFunction, MatClass, MatProperty, + MatEnumeration, MatMethod, MatScript, MatException, @@ -555,8 +556,11 @@ def member_is_friend_of(member, friends): else: return False + def member_is_enum(member): + return isinstance(member, MatEnumeration) + ret = [] - + # search for members in source code too namespace = ".".join(self.objpath) # will be empty for modules @@ -637,7 +641,7 @@ def member_is_friend_of(member, friends): isattr = True else: # ignore undocumented members if :undoc-members: is not given - keep = has_doc or self.options.undoc_members + keep = has_doc or self.options.undoc_members or member_is_enum(member) # give the user a chance to decide whether this member # should be skipped @@ -656,7 +660,6 @@ def member_is_friend_of(member, friends): if keep: ret.append((membername, member, isattr)) - return ret def document_members(self, all_members=False): @@ -1229,11 +1232,17 @@ def document_members(self, all_members=False): for (membername, member) in filtered_members if isinstance(member, MatMethod) and member.name != member.cls.name ] + # create list of enums + enum_names = [ + membername + for (membername, member) in filtered_members + if isinstance(member, MatEnumeration) + ] # create list of other members other_names = [ membername for (membername, member) in filtered_members - if not isinstance(member, MatMethod) and not isinstance(member, MatProperty) + if not isinstance(member, MatMethod) and not isinstance(member, MatProperty) and not isinstance(member, MatEnumeration) # exclude parent modules with names matching members (as in Myclass.Myclass) and not (hasattr(member, "module") and member.name == member.module) ] @@ -1255,6 +1264,12 @@ def document_members(self, all_members=False): for (membername, member) in members if not isinstance(member, MatMethod) or member.name == member.cls.name ] + # create list of members that are not properties + non_enums = [ + membername + for (membername, member) in members + if not isinstance(member, MatEnumeration) + ] # create list of members that are not non-constructor methods non_other = [ membername @@ -1280,6 +1295,12 @@ def document_members(self, all_members=False): self.document_member_section( "Property Summary", non_properties, all_members ) + + # enumss + if enum_names: + self.document_member_section( + "Enumeration Values", non_enums, all_members + ) # methods if meth_names: diff --git a/sphinxcontrib/mat_types.py b/sphinxcontrib/mat_types.py index 964b012..f76a697 100644 --- a/sphinxcontrib/mat_types.py +++ b/sphinxcontrib/mat_types.py @@ -26,6 +26,7 @@ "MatFunction", "MatClass", "MatProperty", + "MatEnumerations", "MatMethod", "MatScript", "MatException", @@ -1068,6 +1069,8 @@ def __init__(self, name, modname, tokens): self.properties = {} #: dictionary of class methods self.methods = {} + #: dictionary of class enumerations + self.enumerations = {} #: remaining tokens after main class definition is parsed self.rem_tks = None # ===================================================================== @@ -1421,10 +1424,143 @@ def __init__(self, name, modname, tokens): "[sphinxcontrib-matlabdomain] ignoring 'enumeration' in 'classdef %s'.", self.name, ) + # no attributes for enums idx += 1 # Token.Keyword: "end" terminates events block while self._tk_ne(idx, (Token.Keyword, "end")): - idx += 1 + # skip whitespace + while self._whitespace(idx): + whitespace = self._whitespace(idx) + if whitespace: + idx += whitespace + else: + idx += 1 + + # ========================================================= + # long docstring before property + if self.tokens[idx][0] is Token.Comment: + # docstring + docstring = "" + + # Collect comment lines + while self.tokens[idx][0] is Token.Comment: + docstring += self.tokens[idx][1].lstrip("%") + idx += 1 + idx += self._blanks(idx) + + try: + # Check if end of line was reached + if self._is_newline(idx): + docstring += "\n" + idx += 1 + idx += self._blanks(idx) + + # Check if variable name is next + if self.tokens[idx][0] is Token.Name: + enum_name = self.tokens[idx][1] + self.enumerations[enum_name] = {} + self.enumerations[enum_name][ + "docstring" + ] = docstring + break + + # If there is an empty line at the end of + # the comment: discard it + elif self._is_newline(idx): + docstring = "" + idx += self._whitespace(idx) + break + + except IndexError: + # EOF reached, quit gracefully + break + + # with "%:" directive trumps docstring after property + if self.tokens[idx][0] is Token.Name: + enum_name = self.tokens[idx][1] + idx += 1 + # Initialize property if it was not already done + if enum_name not in self.enumerations.keys(): + self.enumerations[enum_name] = {} + + # skip size, class and functions specifiers + # TODO: parse args and do a postprocessing step. + idx += self._propspec(idx) + + if self._tk_eq(idx, (Token.Punctuation, ";")): + continue + + # This is because matlab allows comma separated list of enums + if self._tk_eq(idx, (Token.Punctuation, ",")): + continue + + # subtype of Name EG Name.Builtin used as Name + elif self.tokens[idx][0] in Token.Name.subtypes: + prop_name = self.tokens[idx][1] + logger.debug( + "[sphinxcontrib-matlabdomain] WARNING %s.%s.%s is a builtin name.", + self.module, + self.name, + prop_name, + ) + self.properties[prop_name] = {"attrs": attr_dict} + idx += 1 + + # skip size, class and functions specifiers + # TODO: Parse old and new style property extras + idx += self._propspec(idx) + + if self._tk_eq(idx, (Token.Punctuation, ";")): + continue + + elif self._tk_eq(idx, (Token.Keyword, "end")): + idx += 1 + break + # skip semicolon after property name, but no default + elif self._tk_eq(idx, (Token.Punctuation, ";")): + idx += 1 + # A comment might come after semi-colon + idx += self._blanks(idx) + if self._is_newline(idx): + idx += 1 + # Property definition is finished; add missing values + if "default" not in self.properties[prop_name].keys(): + self.properties[prop_name]["default"] = None + if "docstring" not in self.properties[prop_name].keys(): + self.properties[prop_name]["docstring"] = None + + continue + elif self.tokens[idx][0] is Token.Comment: + docstring = self.tokens[idx][1].lstrip("%") + docstring += "\n" + self.properties[prop_name]["docstring"] = docstring + idx += 1 + elif self.tokens[idx][0] is Token.Comment: + # Comments seperated with blank lines. + idx = idx - 1 + continue + else: + logger.warning( + "sphinxcontrib-matlabdomain] Expected enumeration in %s.%s - got %s", + self.module, + self.name, + str(self.tokens[idx]), + ) + return + idx += self._blanks(idx) # skip blanks + + # docstring + if "docstring" not in self.enumerations[enum_name].keys(): + docstring = {"docstring": None} + if self.tokens[idx][0] is Token.Comment: + docstring["docstring"] = self.tokens[idx][1].lstrip("%") + idx += 1 + self.enumerations[enum_name].update(docstring) + elif self.tokens[idx][0] is Token.Comment: + # skip this comment + idx += 1 + + idx += self._whitespace(idx) idx += 1 if self._tk_eq(idx, (Token.Punctuation, ";")): # Skip trailing semicolon after end. @@ -1603,11 +1739,16 @@ def getter(self, name, *defargs): return self.__bases__ elif name in self.properties: return MatProperty(name, self, self.properties[name]) + elif name in self.enumerations: + return MatEnumeration(name, self, self.enumerations[name]) elif name in self.methods: return self.methods[name] + elif name in self.enumerations: + return elif name == "__dict__": objdict = dict([(pn, self.getter(pn)) for pn in self.properties.keys()]) objdict.update(self.methods) + objdict.update(dict([(en, self.getter(en)) for en in self.enumerations.keys()])) return objdict else: super(MatClass, self).getter(name, *defargs) @@ -1634,6 +1775,23 @@ def __module__(self): def __doc__(self): return self.docstring +class MatEnumeration(MatObject): + def __init__(self, name, cls, attrs): + super(MatEnumeration, self).__init__(name) + self.cls = cls + self.docstring = attrs["docstring"] + + def ref_role(self): + """Returns role to use for references to this object (e.g. when generating auto-links)""" + return "enum" + + @property + def __module__(self): + return self.cls.module + + @property + def __doc__(self): + return self.docstring class MatMethod(MatFunction): def __init__(self, modname, tks, cls, attrs): diff --git a/sphinxcontrib/matlab.py b/sphinxcontrib/matlab.py index d609e33..5764f9f 100644 --- a/sphinxcontrib/matlab.py +++ b/sphinxcontrib/matlab.py @@ -712,6 +712,7 @@ class MATLABDomain(Domain): "class": MatXRefRole(), "const": MatXRefRole(), "attr": MatXRefRole(), + "enum": MatXRefRole(), "meth": MatXRefRole(fix_parens=True), "mod": MatXRefRole(), "obj": MatXRefRole(), @@ -921,6 +922,11 @@ def setup(app): "mat", "autoattribute", mat_directives.MatlabAutodocDirective ) + app.registry.add_documenter("mat:enum", doc.MatAttributeDocumenter) + app.add_directive_to_domain( + "mat", "autoenum", mat_directives.MatlabAutodocDirective + ) + app.registry.add_documenter("mat:data", doc.MatDataDocumenter) app.add_directive_to_domain( "mat", "autodata", mat_directives.MatlabAutodocDirective From e5384dad3ceccb133c91eba1f53b626a183482a3 Mon Sep 17 00:00:00 2001 From: Anton Edvinovich Pozharskiy Date: Fri, 19 Jul 2024 11:44:18 +0200 Subject: [PATCH 04/45] better classdef parsing including changes to MATLAB-language-grammar prs #86, #88, and #90 --- sphinxcontrib/mat_textmate_parser.py | 137 ++++++++++++++++++--------- 1 file changed, 93 insertions(+), 44 deletions(-) diff --git a/sphinxcontrib/mat_textmate_parser.py b/sphinxcontrib/mat_textmate_parser.py index 1ea1c8f..db24a5c 100644 --- a/sphinxcontrib/mat_textmate_parser.py +++ b/sphinxcontrib/mat_textmate_parser.py @@ -1,6 +1,6 @@ from textmate_grammar.parsers.matlab import MatlabParser -rpath = "../tests/test_data/ClassWithPropertyValidators.m" +rpath = "../../../syscop/software/nosnoc/+nosnoc/Options.m" def find_first_child(curr, tok): @@ -49,71 +49,119 @@ def __init__(self, path): pdb.set_trace() def _find_class_docstring(self): - if self.cls.children[1].token == "comment.line.percentage.matlab": + try: + possible_comment_tok = self.cls.children[1] + except IndexError: + print("found no docstring") + return + + if possible_comment_tok.token == "comment.line.percentage.matlab": self._docstring_lines() - elif self.cls.children[1].token == "comment.block.percentage.matlab": - self.docstring = ( - self.cls.children[1].content.strip()[2:-2].strip() - ) # [2,-2] strips out block comment delimiters + elif possible_comment_tok.token == "comment.block.percentage.matlab": + self.docstring = possible_comment_tok.content.strip()[ + 2:-2 + ].strip() # [2,-2] strips out block comment delimiters else: print("found no docstring") def _docstring_lines(self): idx = 1 - while self.cls.children[idx].token == "comment.line.percentage.matlab": + cls_children = self.cls.children + + while ( + idx < len(cls_children) + and cls_children[idx].token == "comment.line.percentage.matlab" + ): self.docstring += ( - self.cls.children[idx].content[1:] + "\n" + cls_children[idx].content[1:] + "\n" ) # [1:] strips out percent sign idx += 1 self.docstring = self.docstring.strip() def _parse_clsdef(self): - for child in self.clsdef.children: - child.print() + # Try parsing attrs + attrs_tok_gen = self.clsdef.find(tokens="storage.modifier.section.class.matlab") + try: + attrs_tok, _ = next(attrs_tok_gen) + self._parse_class_attributes(attrs_tok) + except StopIteration: + pass + # Parse classname + classname_tok_gen = self.clsdef.find(tokens="entity.name.type.class.matlab") + try: + classname_tok, _ = next(classname_tok_gen) + self.name = classname_tok.content + except StopIteration: + print("ClassName not found") # TODO this is probably fatal + + # Parse interited classes + parent_class_toks = self.clsdef.findall(tokens="meta.inherited-class.matlab") + + for parent_class_tok, _ in parent_class_toks: + sections = parent_class_tok.findall( + tokens=[ + "entity.name.namespace.matlab", + "entity.other.inherited-class.matlab", + ] + ) + super_cls = tuple([sec.content for sec, _ in sections]) + self.supers.append(super_cls) # Parse Attributes TODO maybe there is a smarter way to do this? idx = 0 while self.clsdef.children[idx].token == "storage.modifier.class.matlab": - attr = self.clsdef.children[idx].content + attr_tok = self.clsdef.children[idx] + attr = attr_tok.content val = None # TODO maybe do some typechecking here or we can assume that you give us valid Matlab idx += 1 - if ( - self.clsdef.children[idx].token == "keyword.operator.assignment.matlab" - ): # pull out r.h.s + if attr_tok.token == "keyword.operator.assignment.matlab": # pull out r.h.s idx += 1 val = self.clsdef.children[idx].content idx += 1 if ( - self.clsdef.children[idx].token - == "punctuation.separator.modifier.comma.matlab" + attr_tok.token == "punctuation.separator.modifier.comma.matlab" ): # skip commas idx += 1 self.attrs[attr] = val - if ( - self.clsdef.children[idx].token == "punctuation.section.parens.end.matlab" - ): # Skip end of attrs - idx += 1 - - # name must be next - self.name = self.clsdef.children[idx].content - idx += 1 - - while idx < len( - self.clsdef.children - ): # No children we care about after this except inherited classes - if self.clsdef.children[idx].token == "meta.inherited-class.matlab": - super_cls_tok = self.clsdef.children[idx] - # collect superclass as a tuple - super_cls = tuple( - [ - child.content - for child in super_cls_tok.children - if not child.token.startswith("punctuation") - ] - ) - self.supers.append(super_cls) - idx += 1 + def _parse_class_attributes(self, attrs_tok): + # walk down child list and parse manually + # TODO perhaps contribute a delimited list find to textmate-grammar-python + children = attrs_tok.children + idx = 0 + while idx < len(children): + child_tok = children[idx] + if child_tok.token == "storage.modifier.class.matlab": + attr = child_tok.content + val = None + idx += 1 # walk to next token + maybe_assign_tok = children[idx] + if maybe_assign_tok.token == "keyword.operator.assignment.matlab": + idx += 1 + rhs_tok = children[idx] # parse right hand side + if rhs_tok.token == "meta.cell.literal.matlab": + # A cell. For now just take the whole cell as value. + # TODO parse out the cell array of metaclass literals. + val = "{" + rhs_tok.content + "}" + idx += 1 + elif rhs_tok.token == "constant.language.boolean.matlab": + val = rhs_tok.content + idx += 1 + elif rhs_tok.token == "keyword.operator.other.question.matlab": + idx += 1 + metaclass_tok = children[idx] + metaclass_components = metaclass_tok.findall( + tokens=[ + "entity.name.namespace.matlab", + "entity.other.class.matlab", + ] + ) + val = tuple([comp.content for comp, _ in metaclass_components]) + else: + pass + self.attrs[attr] = val + else: # Comma or continuation therefore skip + idx += 1 def _parse_property_section(self, section): # TODO parse property section attrs @@ -241,12 +289,13 @@ def _parse_property_validation(self, prop_name, prop): # Now find list of validators validator_gen = prop.find(tokens="meta.block.validation.matlab", depth=1) try: - import pdb - - pdb.set_trace() validator_tok, _ = next(validator_gen) validator_toks = validator_tok.findall( - tokens="variable.other.readwrite.matlab", depth=1 + tokens=[ + "variable.other.readwrite.matlab", + "meta.function-call.parens.matlab", + ], + depth=1, ) # TODO Probably bug here in MATLAB-Language-grammar self.properties[prop_name]["validators"] = [ tok[0].content for tok in validator_toks From fee6d03dd46f0e802a182a298e7a16a9b255e6ab Mon Sep 17 00:00:00 2001 From: Anton Edvinovich Pozharskiy Date: Mon, 22 Jul 2024 10:50:26 +0200 Subject: [PATCH 05/45] parse function docstring --- sphinxcontrib/mat_textmate_parser.py | 56 +++++++++++++++++++++++----- 1 file changed, 47 insertions(+), 9 deletions(-) diff --git a/sphinxcontrib/mat_textmate_parser.py b/sphinxcontrib/mat_textmate_parser.py index db24a5c..ae3f342 100644 --- a/sphinxcontrib/mat_textmate_parser.py +++ b/sphinxcontrib/mat_textmate_parser.py @@ -36,14 +36,14 @@ def __init__(self, path): method_sections = self.cls.findall(tokens="meta.methods.matlab", depth=1) enumeration_sections = self.cls.findall(tokens="meta.enum.matlab", depth=1) - for section in property_sections: - self._parse_property_section(section[0]) + for section, _ in property_sections: + self._parse_property_section(section) - for section in method_sections: - self._parse_method_section(section[0]) + for section, _ in method_sections: + self._parse_method_section(section) - for section in enumeration_sections: - self._parse_enum_section(section[0]) + for section, _ in enumeration_sections: + self._parse_enum_section(section) import pdb pdb.set_trace() @@ -313,9 +313,6 @@ def _parse_method_section(self, section): for idx in idxs: meth_tok = section.children[idx] self._parse_function(meth_tok) - # TODO walk forward and backward to get property docstring. - # TODO if we have mutliple possible docstrings what is given priority? - # TODO parse out property validations syntax def _parse_function(self, fun_tok): """Parse Function definition""" @@ -343,9 +340,50 @@ def _parse_function(self, fun_tok): self.methods[fun_name]["params"][param.content] = {} # find arguments blocks + arg_section = None for arg_section, _ in fun_tok.find(tokens="meta.arguments.matlab"): self._parse_argument_section(fun_name, arg_section) + fun_decl_gen = fun_tok.find(tokens="meta.function.declaration.matlab") + try: + fun_decl_tok, _ = next(fun_decl_gen) + except StopIteration: + raise Exception( + "missing function declaration" + ) # This cant happen as we'd be missing a function name + + # Now parse for docstring + docstring = "" + comment_toks = fun_tok.findall( + tokens=["comment.line.percentage.matlab", "comment.block.percentage.matlab"] + ) + last_tok = arg_section if arg_section is not None else fun_decl_tok + import pdb + + pdb.set_trace() + for comment_tok, _ in comment_toks: + if self._is_empty_line_between_tok(last_tok, comment_tok): + # If we have non-consecutive tokens quit right away. + break + elif ( + not docstring and comment_tok.token == "comment.block.percentage.matlab" + ): + # If we have no previous docstring lines and a comment block we take + # the comment block as the docstring and exit. + docstring = comment_tok.content.strip()[ + 2:-2 + ].strip() # [2,-2] strips out block comment delimiters + break + elif comment_tok.token == "comment.line.percentage.matlab": + # keep parsing comments + docstring += comment_tok.content[1:] + "\n" + else: + # we are done. + break + last_tok = comment_tok + + self.methods[fun_name]["docstring"] = docstring if docstring else None + def _parse_argument_section(self, fun_name, section): modifiers = [ mod.content From e75155f159c0fc0a2ee50186215b1c4c69e4c206 Mon Sep 17 00:00:00 2001 From: Anton Edvinovich Pozharskiy Date: Mon, 22 Jul 2024 11:19:30 +0200 Subject: [PATCH 06/45] extract function parser --- sphinxcontrib/mat_textmate_parser.py | 300 ++++++++++++++------------- 1 file changed, 152 insertions(+), 148 deletions(-) diff --git a/sphinxcontrib/mat_textmate_parser.py b/sphinxcontrib/mat_textmate_parser.py index ae3f342..8eba0b1 100644 --- a/sphinxcontrib/mat_textmate_parser.py +++ b/sphinxcontrib/mat_textmate_parser.py @@ -10,6 +10,151 @@ def find_first_child(curr, tok): return (curr.children[ind[0]], ind[0]) +def _toks_on_same_line(tok1, tok2): + """Note: pass the tokens in order they appear in case of multiline tokens, otherwise this may return incorrect results""" + line1 = _get_last_line_of_tok(tok1) + line2 = _get_first_line_of_tok(tok2) + return line1 == line2 + + +def _is_empty_line_between_tok(tok1, tok2): + """Note: pass tokens in order they appear""" + line1 = _get_last_line_of_tok(tok1) + line2 = _get_first_line_of_tok(tok2) + return line2 - line1 > 1 + + +def _get_first_line_of_tok(tok): + return min([loc[0] for loc in tok.characters.keys()]) + + +def _get_last_line_of_tok(tok): + return max([loc[0] for loc in tok.characters.keys()]) + + +class MatFunctionParser: + def __init__(self, fun_tok): + """Parse Function definition""" + # First find the function name + name_gen = fun_tok.find(tokens="entity.name.function.matlab") + try: + name_tok, _ = next(name_gen) + self.name = name_tok.content + except StopIteration: + # TODO correct error here + raise Exception("Couldn't find function name") + + # Find outputs and parameters + output_gen = fun_tok.find(tokens="variable.parameter.output.matlab") + param_gen = fun_tok.find(tokens="variable.parameter.input.matlab") + + self.outputs = {} + self.params = {} + + for out, _ in output_gen: + self.outputs[out.content] = {} + + for param, _ in param_gen: + self.params[param.content] = {} + + # find arguments blocks + arg_section = None + for arg_section, _ in fun_tok.find(tokens="meta.arguments.matlab"): + self._parse_argument_section(arg_section) + + fun_decl_gen = fun_tok.find(tokens="meta.function.declaration.matlab") + try: + fun_decl_tok, _ = next(fun_decl_gen) + except StopIteration: + raise Exception( + "missing function declaration" + ) # This cant happen as we'd be missing a function name + + # Now parse for docstring + docstring = "" + comment_toks = fun_tok.findall( + tokens=["comment.line.percentage.matlab", "comment.block.percentage.matlab"] + ) + last_tok = arg_section if arg_section is not None else fun_decl_tok + + for comment_tok, _ in comment_toks: + if _is_empty_line_between_tok(last_tok, comment_tok): + # If we have non-consecutive tokens quit right away. + break + elif ( + not docstring and comment_tok.token == "comment.block.percentage.matlab" + ): + # If we have no previous docstring lines and a comment block we take + # the comment block as the docstring and exit. + docstring = comment_tok.content.strip()[ + 2:-2 + ].strip() # [2,-2] strips out block comment delimiters + break + elif comment_tok.token == "comment.line.percentage.matlab": + # keep parsing comments + docstring += comment_tok.content[1:] + "\n" + else: + # we are done. + break + last_tok = comment_tok + + self.docstring = docstring if docstring else None + + def _parse_argument_section(self, section): + modifiers = [ + mod.content + for mod, _ in section.find(tokens="storage.modifier.arguments.matlab") + ] + arg_def_gen = section.find(tokens="meta.assignment.definition.property.matlab") + for arg_def, _ in arg_def_gen: + arg_name = arg_def.begin[ + 0 + ].content # Get argument name that is being defined + self._parse_argument_validation(fun_name, arg_name, arg_def, modifiers) + + def _parse_argument_validation(self, arg_name, arg, modifiers): + # TODO This should be identical to propery validation I think. Refactor + # First get the size if found + section = "output" if "Output" in modifiers else "params" + size_gen = arg.find(tokens="meta.parens.size.matlab", depth=1) + try: # We have a size, therefore parse the comma separated list into tuple + size_tok, _ = next(size_gen) + size_elem_gen = size_tok.find( + tokens=[ + "constant.numeric.decimal.matlab", + "keyword.operator.vector.colon.matlab", + ], + depth=1, + ) + size = tuple([elem[0].content for elem in size_elem_gen]) + self.methods[fun_name][section][arg_name]["size"] = size + except StopIteration: + pass + + # Now find the type if it exists + # TODO this should be mapped to known types (though perhaps as a postprocess) + type_gen = arg.find(tokens="storage.type.matlab", depth=1) + try: + self.methods[fun_name][section][arg_name]["type"] = next(type_gen)[ + 0 + ].content + except StopIteration: + pass + + # Now find list of validators + validator_gen = arg.find(tokens="meta.block.validation.matlab", depth=1) + try: + validator_tok, _ = next(validator_gen) + validator_toks = validator_tok.findall( + tokens="variable.other.readwrite.matlab", depth=1 + ) # TODO Probably bug here in MATLAB-Language-grammar + self.methods[fun_name][section][arg_name]["validators"] = [ + tok[0].content for tok in validator_toks + ] + except StopIteration: + pass + + class MatClassParser: def __init__(self, path): # DATA @@ -196,7 +341,7 @@ def _parse_property_section(self, section): next_tok = prop_tok while walk_back_idx >= 0: walk_tok = section.children[walk_back_idx] - if self._is_empty_line_between_tok(walk_tok, next_tok): + if _is_empty_line_between_tok(walk_tok, next_tok): # Once there is an empty line between consecutive tokens we are done. break @@ -227,7 +372,7 @@ def _parse_property_section(self, section): while walk_fwd_idx < len(section.children): walk_tok = section.children[walk_fwd_idx] - if self._is_empty_line_between_tok(prev_tok, walk_tok): + if _is_empty_line_between_tok(prev_tok, walk_tok): # Once there is an empty line between consecutive tokens we are done. break @@ -312,131 +457,8 @@ def _parse_method_section(self, section): ] for idx in idxs: meth_tok = section.children[idx] - self._parse_function(meth_tok) - - def _parse_function(self, fun_tok): - """Parse Function definition""" - # First find the function name - name_gen = fun_tok.find(tokens="entity.name.function.matlab") - try: - name_tok, _ = next(name_gen) - fun_name = name_tok.content - except StopIteration: - # TODO correct error here - raise Exception("Couldn't find function name") - - # Find outputs and parameters - output_gen = fun_tok.find(tokens="variable.parameter.output.matlab") - param_gen = fun_tok.find(tokens="variable.parameter.input.matlab") - - self.methods[fun_name] = {} - self.methods[fun_name]["outputs"] = {} - self.methods[fun_name]["params"] = {} - - for out, _ in output_gen: - self.methods[fun_name]["outputs"][out.content] = {} - - for param, _ in param_gen: - self.methods[fun_name]["params"][param.content] = {} - - # find arguments blocks - arg_section = None - for arg_section, _ in fun_tok.find(tokens="meta.arguments.matlab"): - self._parse_argument_section(fun_name, arg_section) - - fun_decl_gen = fun_tok.find(tokens="meta.function.declaration.matlab") - try: - fun_decl_tok, _ = next(fun_decl_gen) - except StopIteration: - raise Exception( - "missing function declaration" - ) # This cant happen as we'd be missing a function name - - # Now parse for docstring - docstring = "" - comment_toks = fun_tok.findall( - tokens=["comment.line.percentage.matlab", "comment.block.percentage.matlab"] - ) - last_tok = arg_section if arg_section is not None else fun_decl_tok - import pdb - - pdb.set_trace() - for comment_tok, _ in comment_toks: - if self._is_empty_line_between_tok(last_tok, comment_tok): - # If we have non-consecutive tokens quit right away. - break - elif ( - not docstring and comment_tok.token == "comment.block.percentage.matlab" - ): - # If we have no previous docstring lines and a comment block we take - # the comment block as the docstring and exit. - docstring = comment_tok.content.strip()[ - 2:-2 - ].strip() # [2,-2] strips out block comment delimiters - break - elif comment_tok.token == "comment.line.percentage.matlab": - # keep parsing comments - docstring += comment_tok.content[1:] + "\n" - else: - # we are done. - break - last_tok = comment_tok - - self.methods[fun_name]["docstring"] = docstring if docstring else None - - def _parse_argument_section(self, fun_name, section): - modifiers = [ - mod.content - for mod, _ in section.find(tokens="storage.modifier.arguments.matlab") - ] - arg_def_gen = section.find(tokens="meta.assignment.definition.property.matlab") - for arg_def, _ in arg_def_gen: - arg_name = arg_def.begin[ - 0 - ].content # Get argument name that is being defined - self._parse_argument_validation(fun_name, arg_name, arg_def, modifiers) - - def _parse_argument_validation(self, fun_name, arg_name, arg, modifiers): - # TODO This should be identical to propery validation I think. Refactor - # First get the size if found - section = "output" if "Output" in modifiers else "params" - size_gen = arg.find(tokens="meta.parens.size.matlab", depth=1) - try: # We have a size, therefore parse the comma separated list into tuple - size_tok, _ = next(size_gen) - size_elem_gen = size_tok.find( - tokens=[ - "constant.numeric.decimal.matlab", - "keyword.operator.vector.colon.matlab", - ], - depth=1, - ) - size = tuple([elem[0].content for elem in size_elem_gen]) - self.methods[fun_name][section][arg_name]["size"] = size - except StopIteration: - pass - - # Now find the type if it exists - # TODO this should be mapped to known types (though perhaps as a postprocess) - type_gen = arg.find(tokens="storage.type.matlab", depth=1) - try: - self.methods[fun_name][section][arg_name]["type"] = next(type_gen)[ - 0 - ].content - except StopIteration: - pass - - # Now find list of validators - validator_gen = arg.find(tokens="meta.block.validation.matlab", depth=1) - try: - validator_tok, _ = next(validator_gen) - validator_toks = validator_tok.findall( - tokens="variable.other.readwrite.matlab", depth=1 - ) # TODO Probably bug here in MATLAB-Language-grammar - self.methods[fun_name][section][arg_name]["validators"] = [ - tok[0].content for tok in validator_toks - ] - except StopIteration: - pass + parsed_function = MatFunctionParser(meth_tok) + self.methods[parsed_function.name] = parsed_function def _parse_enum_section(self, section): # TODO parse property section attrs @@ -470,7 +492,7 @@ def _parse_enum_section(self, section): next_tok = enum_tok while walk_back_idx >= 0: walk_tok = section.children[walk_back_idx] - if self._is_empty_line_between_tok(walk_tok, next_tok): + if _is_empty_line_between_tok(walk_tok, next_tok): # Once there is an empty line between consecutive tokens we are done. break @@ -502,7 +524,7 @@ def _parse_enum_section(self, section): while walk_fwd_idx < len(section.children): walk_tok = section.children[walk_fwd_idx] - if self._is_empty_line_between_tok(prev_tok, walk_tok): + if _is_empty_line_between_tok(prev_tok, walk_tok): # Once there is an empty line between consecutive tokens we are done. break @@ -516,7 +538,7 @@ def _parse_enum_section(self, section): break elif walk_tok.token == "comment.line.percentage.matlab": # In the case the comment is on the same line as the end of the enum declaration, take it as inline comment and exit. - if self._toks_on_same_line(section.children[idx], walk_tok): + if _toks_on_same_line(section.children[idx], walk_tok): inline_docstring = walk_tok.content[1:] break @@ -540,24 +562,6 @@ def _parse_enum_section(self, section): else: self.enumerations[enum_name]["docstring"] = None - def _toks_on_same_line(self, tok1, tok2): - """Note: pass the tokens in order they appear in case of multiline tokens, otherwise this may return incorrect results""" - line1 = self._get_last_line_of_tok(tok1) - line2 = self._get_first_line_of_tok(tok2) - return line1 == line2 - - def _is_empty_line_between_tok(self, tok1, tok2): - """Note: pass tokens in order they appear""" - line1 = self._get_last_line_of_tok(tok1) - line2 = self._get_first_line_of_tok(tok2) - return line2 - line1 > 1 - - def _get_first_line_of_tok(self, tok): - return min([loc[0] for loc in tok.characters.keys()]) - - def _get_last_line_of_tok(self, tok): - return max([loc[0] for loc in tok.characters.keys()]) - if __name__ == "__main__": cls_parse = MatClassParser(rpath) From 597ab641e3eabe9d2e44248cd7bb6363ad5720f4 Mon Sep 17 00:00:00 2001 From: Anton Edvinovich Pozharskiy Date: Mon, 22 Jul 2024 16:49:16 +0200 Subject: [PATCH 07/45] initial work --- sphinxcontrib/mat_textmate_parser.py | 29 +- sphinxcontrib/mat_types.py | 745 +++------------------------ sphinxcontrib/matlab.py | 1 + 3 files changed, 70 insertions(+), 705 deletions(-) diff --git a/sphinxcontrib/mat_textmate_parser.py b/sphinxcontrib/mat_textmate_parser.py index 8eba0b1..0320df2 100644 --- a/sphinxcontrib/mat_textmate_parser.py +++ b/sphinxcontrib/mat_textmate_parser.py @@ -1,6 +1,4 @@ -from textmate_grammar.parsers.matlab import MatlabParser - -rpath = "../../../syscop/software/nosnoc/+nosnoc/Options.m" +rpath = "../../../syscop/software/nosnoc/src/NosnocIpoptCallback.m" def find_first_child(curr, tok): @@ -110,12 +108,12 @@ def _parse_argument_section(self, section): arg_name = arg_def.begin[ 0 ].content # Get argument name that is being defined - self._parse_argument_validation(fun_name, arg_name, arg_def, modifiers) + self._parse_argument_validation(arg_name, arg_def, modifiers) def _parse_argument_validation(self, arg_name, arg, modifiers): # TODO This should be identical to propery validation I think. Refactor # First get the size if found - section = "output" if "Output" in modifiers else "params" + section = self.output if "Output" in modifiers else self.params size_gen = arg.find(tokens="meta.parens.size.matlab", depth=1) try: # We have a size, therefore parse the comma separated list into tuple size_tok, _ = next(size_gen) @@ -127,7 +125,7 @@ def _parse_argument_validation(self, arg_name, arg, modifiers): depth=1, ) size = tuple([elem[0].content for elem in size_elem_gen]) - self.methods[fun_name][section][arg_name]["size"] = size + section[arg_name]["size"] = size except StopIteration: pass @@ -135,9 +133,7 @@ def _parse_argument_validation(self, arg_name, arg, modifiers): # TODO this should be mapped to known types (though perhaps as a postprocess) type_gen = arg.find(tokens="storage.type.matlab", depth=1) try: - self.methods[fun_name][section][arg_name]["type"] = next(type_gen)[ - 0 - ].content + section[arg_name]["type"] = next(type_gen)[0].content except StopIteration: pass @@ -148,15 +144,13 @@ def _parse_argument_validation(self, arg_name, arg, modifiers): validator_toks = validator_tok.findall( tokens="variable.other.readwrite.matlab", depth=1 ) # TODO Probably bug here in MATLAB-Language-grammar - self.methods[fun_name][section][arg_name]["validators"] = [ - tok[0].content for tok in validator_toks - ] + section[arg_name]["validators"] = [tok[0].content for tok in validator_toks] except StopIteration: pass class MatClassParser: - def __init__(self, path): + def __init__(self, tokens): # DATA self.name = "" self.supers = [] @@ -168,8 +162,7 @@ def __init__(self, path): # Maybe remove continuations as a crutch? currently parser is broken for continuations in attributes # self.parser = MatlabParser(remove_line_continuations=True) - self.parser = MatlabParser() - self.parsed = self.parser.parse_file(path) + self.parsed = tokens self.cls, _ = find_first_child(self.parsed, "meta.class.matlab") if not self.cls: raise Exception() # TODO better exception @@ -189,9 +182,6 @@ def __init__(self, path): for section, _ in enumeration_sections: self._parse_enum_section(section) - import pdb - - pdb.set_trace() def _find_class_docstring(self): try: @@ -474,7 +464,8 @@ def _parse_enum_section(self, section): enum_name = enum_tok.children[0].content self.enumerations[enum_name] = {} if ( - section.children[idx + 1].token == "meta.parens.matlab" + idx + 1 < len(section.children) + and section.children[idx + 1].token == "meta.parens.matlab" ): # Parse out args TODO this should be part of enummember assignment definition args = tuple( [ diff --git a/sphinxcontrib/mat_types.py b/sphinxcontrib/mat_types.py index f76a697..18a9b5a 100644 --- a/sphinxcontrib/mat_types.py +++ b/sphinxcontrib/mat_types.py @@ -17,6 +17,10 @@ from zipfile import ZipFile import xml.etree.ElementTree as ET import sphinxcontrib.mat_parser as mat_parser +from sphinxcontrib.mat_textmate_parser import MatClassParser, MatFunctionParser +from textmate_grammar.parsers.matlab import MatlabParser +import logging +from pathlib import Path logger = sphinx.util.logging.getLogger("matlab-domain") @@ -430,7 +434,9 @@ def matlabify(objname): # make a full path out of basedir and objname fullpath = os.path.join(MatObject.basedir, objname) # objname fullpath + import pdb + pdb.set_trace() logger.debug( f"[sphinxcontrib-matlabdomain] matlabify {package=}, {objname=}, {fullpath=}" ) @@ -495,40 +501,69 @@ def parse_mfile(mfile, name, path, encoding=None): full_code = code + print(mfile) + # remove the top comment header (if there is one) from the code string - code = mat_parser.remove_comment_header(code) - code = mat_parser.remove_line_continuations(code) - code = mat_parser.fix_function_signatures(code) + # code = mat_parser.remove_comment_header(code) + # code = mat_parser.remove_line_continuations(code) + # code = mat_parser.fix_function_signatures(code) + # TODO: This might not be necessary - tks = list(MatlabLexer().get_tokens(code)) + logging.getLogger("textmate_grammar").setLevel(logging.ERROR) + parser = MatlabParser() + toks = parser.parse_file(mfile) modname = path.replace(os.sep, ".") # module name # assume that functions and classes always start with a keyword def isFunction(token): - return token == (Token.Keyword, "function") + comments_and_functions = [ + "comment.block.percentage.matlab", + "comment.line.percentage.matlab", + "meta.function.matlab", + ] + return all( + [(child.token in comments_and_functions) for child in token.children] + ) def isClass(token): - return token == (Token.Keyword, "classdef") + tok_gen = token.find(tokens="meta.class.matlab", depth=1) + try: + tok, _ = next(tok_gen) + return True + except StopIteration: + return False - if isClass(tks[0]): + if isClass(toks): logger.debug( "[sphinxcontrib-matlabdomain] parsing classdef %s from %s.", name, modname, ) - return MatClass(name, modname, tks) - elif isFunction(tks[0]): + return MatClass(name, modname, toks) + elif isFunction(toks): logger.debug( "[sphinxcontrib-matlabdomain] parsing function %s from %s.", name, modname, ) - return MatFunction(name, modname, tks) + fun_tok_gen = toks.find(tokens="meta.function.matlab") + parsed_function = None + try: + fun_tok, _ = next(fun_tok_gen) + parsed_function = MatFunctionParser(fun_tok) + except StopIteration: + logger.warning( + "[sphinxcontrib-matlabdomain] Parsing failed in %s.%s. No function found.", + modname, + name, + ) + return MatFunction(name, modname, toks) else: + pass # it's a script file retoken with header comment - tks = list(MatlabLexer().get_tokens(full_code)) - return MatScript(name, modname, tks) + # tks = list(MatlabLexer().get_tokens(full_code)) + # return MatScript(name, modname, toks) return None @staticmethod @@ -846,177 +881,17 @@ class MatFunction(MatObject): def __init__(self, name, modname, tokens): super(MatFunction, self).__init__(name) + parsed_function = MatFunctionParser(tokens) #: Path of folder containing :class:`MatObject`. self.module = modname - #: List of tokens parsed from mfile by Pygments. - self.tokens = tokens #: docstring - self.docstring = "" + self.docstring = parsed_function.docstring #: output args - self.retv = None + self.retv = parsed_function.outputs #: input args - self.args = None + self.args = parsed_function.params #: remaining tokens after main function is parsed self.rem_tks = None - # ===================================================================== - # parse tokens - # XXX: Pygments always reads MATLAB function signature as: - # [(Token.Keyword, 'function'), # any whitespace is stripped - # (Token.Text.Whitesapce, ' '), # spaces and tabs are concatenated - # (Token.Text, '[o1, o2]'), # if there are outputs, they're all - # concatenated w/ or w/o brackets and any - # trailing whitespace - # (Token.Punctuation, '='), # possibly an equal sign - # (Token.Text.Whitesapce, ' '), # spaces and tabs are concatenated - # (Token.Name.Function, 'myfun'), # the name of the function - # (Token.Punctuation, '('), # opening parenthesis - # (Token.Text, 'a1, a2', # if there are args, they're concatenated - # (Token.Punctuation, ')'), # closing parenthesis - # (Token.Text.Whitesapce, '\n')] # all whitespace after args - # XXX: Pygments does not tolerate MATLAB continuation ellipsis! - tks = copy(self.tokens) # make a copy of tokens - tks.reverse() # reverse in place for faster popping, stacks are LiLo - try: - # ===================================================================== - # parse function signature - # function [output] = name(inputs) - # % docstring - # ===================================================================== - # Skip function token - already checked in MatObject.parse_mfile - tks.pop() - skip_whitespace(tks) - - # Check for return values - retv = tks.pop() - if retv[0] is Token.Text: - self.retv = [rv.strip() for rv in retv[1].strip("[ ]").split(",")] - if len(self.retv) == 1: - # check if return is empty - if not self.retv[0]: - self.retv = None - # check if return delimited by whitespace - elif " " in self.retv[0] or "\t" in self.retv[0]: - self.retv = [ - rv - for rv_tab in self.retv[0].split("\t") - for rv in rv_tab.split(" ") - ] - if tks.pop() != (Token.Punctuation, "="): - # Unlikely to end here. But never-the-less warn! - logger.warning( - "[sphinxcontrib-matlabdomain] Parsing failed in %s.%s. Expected '='.", - modname, - name, - ) - return - - skip_whitespace(tks) - elif retv[0] is Token.Name.Function: - tks.append(retv) - # ===================================================================== - # function name - func_name = tks.pop() - func_name = ( - func_name[0], - func_name[1].strip(" ()"), - ) # Strip () in case of dummy arg - if func_name != (Token.Name.Function, self.name): # @UndefinedVariable - if isinstance(self, MatMethod): - self.name = func_name[1] - else: - logger.warning( - "[sphinxcontrib-matlabdomain] Unexpected function name: '%s'. " - "Expected '%s' in module '%s'.", - func_name[1], - name, - modname, - ) - - # ===================================================================== - # input args - if tks.pop() == (Token.Punctuation, "("): - args = tks.pop() - if args[0] is Token.Text: - self.args = [ - arg.strip() for arg in args[1].split(",") - ] # no arguments given - elif args == (Token.Punctuation, ")"): - # put closing parenthesis back in stack - tks.append(args) - # check if function args parsed correctly - if tks.pop() != (Token.Punctuation, ")"): - # Unlikely to end here. But never-the-less warn! - logger.warning( - "[sphinxcontrib-matlabdomain] Parsing failed in {}.{}. Expected ')'.", - modname, - name, - ) - return - - skip_whitespace(tks) - # ===================================================================== - # docstring - try: - docstring = tks.pop() - except IndexError: - docstring = None - while docstring and docstring[0] is Token.Comment: - self.docstring += docstring[1].lstrip("%") - # Get newline if it exists and append to docstring - try: - wht = tks.pop() # We expect a newline - except IndexError: - break - if wht[0] in (Token.Text, Token.Text.Whitespace) and wht[1] == "\n": - self.docstring += "\n" - # Skip whitespace - try: - wht = tks.pop() # We expect a newline - except IndexError: - break - while wht in list(zip((Token.Text,) * 3, (" ", "\t"))): - try: - wht = tks.pop() - except IndexError: - break - docstring = wht # check if Token is Comment - - # Find the end of the function - used in `MatMethod`` to determine where a method ends. - if docstring is None: - return - kw = docstring # last token - lastkw = 0 # set last keyword placeholder - kw_end = 1 # count function keyword - while kw_end > 0: - # increment keyword-end pairs count - if kw in MATLAB_KEYWORD_REQUIRES_END: - kw_end += 1 - # nested function definition - elif kw[0] is Token.Keyword and kw[1].strip() == "function": - kw_end += 1 - # decrement keyword-end pairs count but - # don't decrement `end` if used as index - elif kw == (Token.Keyword, "end") and not lastkw: - kw_end -= 1 - # save last punctuation - elif kw in MATLAB_FUNC_BRACES_BEGIN: - lastkw += 1 - elif kw in MATLAB_FUNC_BRACES_END: - lastkw -= 1 - try: - kw = tks.pop() - except IndexError: - break - tks.append(kw) # put last token back in list - except IndexError: - logger.warning( - "[sphinxcontrib-matlabdomain] Parsing failed in %s.%s. Check if valid MATLAB code.", - modname, - name, - ) - # if there are any tokens left save them - if len(tks) > 0: - self.rem_tks = tks # save extra tokens def ref_role(self): """Returns role to use for references to this object (e.g. when generating auto-links)""" @@ -1055,525 +930,23 @@ class MatClass(MatMixin, MatObject): def __init__(self, name, modname, tokens): super(MatClass, self).__init__(name) + parsed_class = MatClassParser(tokens) #: Path of folder containing :class:`MatObject`. self.module = modname - #: List of tokens parsed from mfile by Pygments. - self.tokens = tokens #: dictionary of class attributes - self.attrs = {} + self.attrs = parsed_class.attrs #: list of class superclasses - self.bases = [] + self.bases = parsed_class.supers #: docstring - self.docstring = "" + self.docstring = parsed_class.docstring #: dictionary of class properties - self.properties = {} + self.properties = parsed_class.properties #: dictionary of class methods - self.methods = {} - #: dictionary of class enumerations - self.enumerations = {} + self.methods = parsed_class.methods + #: + self.enumerations = parsed_class.enumerations #: remaining tokens after main class definition is parsed self.rem_tks = None - # ===================================================================== - # parse tokens - # TODO: use generator and next() instead of stepping index! - try: - # Skip classdef token - already checked in MatObject.parse_mfile - idx = 1 # token index - - # class "attributes" - self.attrs, idx = self.attributes(idx, MATLAB_CLASS_ATTRIBUTE_TYPES) - - # Check if self.name matches the name in the file. - idx += self._blanks(idx) - if not self.tokens[idx][1] == self.name: - logger.warning( - "[sphinxcontrib-matlabdomain] Unexpected class name: '%s'." - " Expected '%s' in '%s'.", - self.tokens[idx][1], - name, - modname, - ) - - idx += 1 - idx += self._blanks(idx) # skip blanks - # ===================================================================== - # super classes - if self._tk_eq(idx, (Token.Operator, "<")): - idx += 1 - # newline terminates superclasses - while not self._is_newline(idx): - idx += self._blanks(idx) # skip blanks - # concatenate base name - base_name = "" - while ( - not self._whitespace(idx) - and self.tokens[idx][0] is not Token.Comment - ): - base_name += self.tokens[idx][1] - idx += 1 - # If it's a newline, we are done parsing. - if not self._is_newline(idx): - idx += 1 - if base_name: - self.bases.append(base_name) - idx += self._blanks(idx) # skip blanks - # continue to next super class separated by & - if self._tk_eq(idx, (Token.Operator, "&")): - idx += 1 - idx += 1 # end of super classes - # newline terminates classdef signature - elif self._is_newline(idx): - idx += 1 # end of classdef signature - # ===================================================================== - # docstring - idx += self._indent(idx) # calculation indentation - # concatenate docstring - while self.tokens[idx][0] is Token.Comment: - self.docstring += self.tokens[idx][1].lstrip("%") - idx += 1 - # append newline to docstring - if self._is_newline(idx): - self.docstring += self.tokens[idx][1] - idx += 1 - # skip tab - indent = self._indent(idx) # calculation indentation - idx += indent - # ===================================================================== - # properties & methods blocks - # loop over code body searching for blocks until end of class - while self._tk_ne(idx, (Token.Keyword, "end")): - # skip comments and whitespace - while self._whitespace(idx) or self.tokens[idx][0] is Token.Comment: - whitespace = self._whitespace(idx) - if whitespace: - idx += whitespace - else: - idx += 1 - - # ================================================================= - # properties blocks - if self._tk_eq(idx, (Token.Keyword, "properties")): - prop_name = "" - idx += 1 - # property "attributes" - attr_dict, idx = self.attributes( - idx, MATLAB_PROPERTY_ATTRIBUTE_TYPES - ) - # Token.Keyword: "end" terminates properties & methods block - while self._tk_ne(idx, (Token.Keyword, "end")): - # skip whitespace - while self._whitespace(idx): - whitespace = self._whitespace(idx) - if whitespace: - idx += whitespace - else: - idx += 1 - - # ========================================================= - # long docstring before property - if self.tokens[idx][0] is Token.Comment: - # docstring - docstring = "" - - # Collect comment lines - while self.tokens[idx][0] is Token.Comment: - docstring += self.tokens[idx][1].lstrip("%") - idx += 1 - idx += self._blanks(idx) - - try: - # Check if end of line was reached - if self._is_newline(idx): - docstring += "\n" - idx += 1 - idx += self._blanks(idx) - - # Check if variable name is next - if self.tokens[idx][0] is Token.Name: - prop_name = self.tokens[idx][1] - self.properties[prop_name] = { - "attrs": attr_dict - } - self.properties[prop_name][ - "docstring" - ] = docstring - break - - # If there is an empty line at the end of - # the comment: discard it - elif self._is_newline(idx): - docstring = "" - idx += self._whitespace(idx) - break - - except IndexError: - # EOF reached, quit gracefully - break - - # with "%:" directive trumps docstring after property - isTokenName = self.tokens[idx][0] is Token.Name - isTokenNameSubtype = self.tokens[idx][0] in Token.Name.subtypes - if isTokenName or isTokenNameSubtype: - prop_name = self.tokens[idx][1] - idx += 1 - if isTokenNameSubtype: - logger.debug( - "[sphinxcontrib-matlabdomain] WARNING %s.%s.%s is a builtin name.", - self.module, - self.name, - prop_name, - ) - - # Initialize property if it was not already done - if prop_name not in self.properties.keys(): - self.properties[prop_name] = {"attrs": attr_dict} - - # Capture (dimensions) class {validators} as "specs" - # https://mathworks.com/help/matlab/matlab_oop/defining-properties.html - count, propspec = self._propspec(idx) - self.properties[prop_name]["specs"] = propspec - - idx = idx + count - if self._tk_eq(idx, (Token.Punctuation, ";")): - continue - - elif self._tk_eq(idx, (Token.Keyword, "end")): - idx += 1 - break - # skip semicolon after property name, but no default - elif self._tk_eq(idx, (Token.Punctuation, ";")): - idx += 1 - # A comment might come after semi-colon - idx += self._blanks(idx) - if self._is_newline(idx): - idx += 1 - # Property definition is finished; add missing values - if "default" not in self.properties[prop_name].keys(): - self.properties[prop_name]["default"] = None - if "docstring" not in self.properties[prop_name].keys(): - self.properties[prop_name]["docstring"] = None - - continue - elif self.tokens[idx][0] is Token.Comment: - docstring = self.tokens[idx][1].lstrip("%") - docstring += "\n" - self.properties[prop_name]["docstring"] = docstring - idx += 1 - elif self.tokens[idx][0] is Token.Comment: - # Comments seperated with blank lines. - idx = idx - 1 - continue - else: - logger.warning( - "sphinxcontrib-matlabdomain] Expected property in %s.%s - got %s", - self.module, - self.name, - str(self.tokens[idx]), - ) - return - idx += self._blanks(idx) # skip blanks - # ========================================================= - # defaults - default = {"default": None} - if self._tk_eq(idx, (Token.Punctuation, "=")): - idx += 1 - idx += self._blanks(idx) # skip blanks - # concatenate default value until newline or comment - default = "" - brace_count = 0 - # keep reading until newline or comment - # only if all punctuation pairs are closed - # and comment is **not** continuation ellipsis - while ( - ( - not self._is_newline(idx) - and self.tokens[idx][0] is not Token.Comment - ) - or brace_count > 0 - or ( - self.tokens[idx][0] is Token.Comment - and self.tokens[idx][1].startswith("...") - ) - ): - token = self.tokens[idx] - # default has an array spanning multiple lines - # keep track of braces - if token in MATLAB_PROP_BRACES_BEGIN: - brace_count += 1 - # look for end of array - elif token in MATLAB_PROP_BRACES_END: - brace_count -= 1 - # Pygments treats continuation ellipsis as comments - # text from ellipsis until newline is in token - elif token[0] is Token.Comment and token[1].startswith( - "..." - ): - idx += 1 # skip ellipsis comments - # include newline which should follow comment - if self._is_newline(idx): - default += "\n" - idx += 1 - continue - elif self._is_newline(idx - 1) and not self._is_newline( - idx - ): - idx += self._blanks(idx) - continue - elif token[0] is Token.Text and token[1] == " ": - # Skip spaces that are not in strings. - idx += 1 - continue - default += token[1] - idx += 1 - if self.tokens[idx][0] is not Token.Comment: - idx += 1 - if default: - default = {"default": default.rstrip("; ")} - - self.properties[prop_name].update(default) - # ========================================================= - # docstring - if "docstring" not in self.properties[prop_name].keys(): - docstring = {"docstring": None} - if self.tokens[idx][0] is Token.Comment: - docstring["docstring"] = self.tokens[idx][1].lstrip("%") - idx += 1 - self.properties[prop_name].update(docstring) - elif self.tokens[idx][0] is Token.Comment: - # skip this comment - idx += 1 - - idx += self._whitespace(idx) - idx += 1 - # ================================================================= - # method blocks - if self._tk_eq(idx, (Token.Keyword, "methods")): - idx += 1 - # method "attributes" - attr_dict, idx = self.attributes(idx, MATLAB_METHOD_ATTRIBUTE_TYPES) - # Token.Keyword: "end" terminates properties & methods block - while self._tk_ne(idx, (Token.Keyword, "end")): - # skip comments and whitespace - while ( - self._whitespace(idx) - or self.tokens[idx][0] is Token.Comment - ): - whitespace = self._whitespace(idx) - if whitespace: - idx += whitespace - else: - idx += 1 - # skip methods defined in other files - meth_tk = self.tokens[idx] - if ( - meth_tk[0] is Token.Name - or meth_tk[0] is Token.Name.Builtin - or meth_tk[0] is Token.Name.Function - or ( - meth_tk[0] is Token.Keyword - and meth_tk[1].strip() == "function" - and self.tokens[idx + 1][0] is Token.Name.Function - ) - or self._tk_eq(idx, (Token.Punctuation, "[")) - or self._tk_eq(idx, (Token.Punctuation, "]")) - or self._tk_eq(idx, (Token.Punctuation, "=")) - or self._tk_eq(idx, (Token.Punctuation, "(")) - or self._tk_eq(idx, (Token.Punctuation, ")")) - or self._tk_eq(idx, (Token.Punctuation, ";")) - or self._tk_eq(idx, (Token.Punctuation, ",")) - ): - logger.debug( - "[sphinxcontrib-matlabdomain] Skipping tokens for methods defined in separate files." - "Token #%d: %r", - idx, - self.tokens[idx], - ) - idx += 1 + self._whitespace(idx + 1) - elif self._tk_eq(idx, (Token.Keyword, "end")): - idx += 1 - break - else: - # find methods - meth = MatMethod( - self.module, self.tokens[idx:], self, attr_dict - ) - - # Detect getter/setter methods - these are not documented - isGetter = meth.name.startswith("get.") - isSetter = meth.name.startswith("set.") - if not (isGetter or isSetter): - # Add the parsed method to methods dictionary - self.methods[meth.name] = meth - - # Update idx with the number of parsed tokens. - idx += meth.skip_tokens() - idx += self._whitespace(idx) - idx += 1 - if self._tk_eq(idx, (Token.Keyword, "events")): - logger.debug( - "[sphinxcontrib-matlabdomain] ignoring 'events' in 'classdef %s.'", - self.name, - ) - idx += 1 - # Token.Keyword: "end" terminates events block - while self._tk_ne(idx, (Token.Keyword, "end")): - idx += 1 - idx += 1 - if self._tk_eq(idx, (Token.Name, "enumeration")): - logger.debug( - "[sphinxcontrib-matlabdomain] ignoring 'enumeration' in 'classdef %s'.", - self.name, - ) - # no attributes for enums - idx += 1 - # Token.Keyword: "end" terminates events block - while self._tk_ne(idx, (Token.Keyword, "end")): - # skip whitespace - while self._whitespace(idx): - whitespace = self._whitespace(idx) - if whitespace: - idx += whitespace - else: - idx += 1 - - # ========================================================= - # long docstring before property - if self.tokens[idx][0] is Token.Comment: - # docstring - docstring = "" - - # Collect comment lines - while self.tokens[idx][0] is Token.Comment: - docstring += self.tokens[idx][1].lstrip("%") - idx += 1 - idx += self._blanks(idx) - - try: - # Check if end of line was reached - if self._is_newline(idx): - docstring += "\n" - idx += 1 - idx += self._blanks(idx) - - # Check if variable name is next - if self.tokens[idx][0] is Token.Name: - enum_name = self.tokens[idx][1] - self.enumerations[enum_name] = {} - self.enumerations[enum_name][ - "docstring" - ] = docstring - break - - # If there is an empty line at the end of - # the comment: discard it - elif self._is_newline(idx): - docstring = "" - idx += self._whitespace(idx) - break - - except IndexError: - # EOF reached, quit gracefully - break - - # with "%:" directive trumps docstring after property - if self.tokens[idx][0] is Token.Name: - enum_name = self.tokens[idx][1] - idx += 1 - # Initialize property if it was not already done - if enum_name not in self.enumerations.keys(): - self.enumerations[enum_name] = {} - - # skip size, class and functions specifiers - # TODO: parse args and do a postprocessing step. - idx += self._propspec(idx) - - if self._tk_eq(idx, (Token.Punctuation, ";")): - continue - - # This is because matlab allows comma separated list of enums - if self._tk_eq(idx, (Token.Punctuation, ",")): - continue - - # subtype of Name EG Name.Builtin used as Name - elif self.tokens[idx][0] in Token.Name.subtypes: - prop_name = self.tokens[idx][1] - logger.debug( - "[sphinxcontrib-matlabdomain] WARNING %s.%s.%s is a builtin name.", - self.module, - self.name, - prop_name, - ) - self.properties[prop_name] = {"attrs": attr_dict} - idx += 1 - - # skip size, class and functions specifiers - # TODO: Parse old and new style property extras - idx += self._propspec(idx) - - if self._tk_eq(idx, (Token.Punctuation, ";")): - continue - - elif self._tk_eq(idx, (Token.Keyword, "end")): - idx += 1 - break - # skip semicolon after property name, but no default - elif self._tk_eq(idx, (Token.Punctuation, ";")): - idx += 1 - # A comment might come after semi-colon - idx += self._blanks(idx) - if self._is_newline(idx): - idx += 1 - # Property definition is finished; add missing values - if "default" not in self.properties[prop_name].keys(): - self.properties[prop_name]["default"] = None - if "docstring" not in self.properties[prop_name].keys(): - self.properties[prop_name]["docstring"] = None - - continue - elif self.tokens[idx][0] is Token.Comment: - docstring = self.tokens[idx][1].lstrip("%") - docstring += "\n" - self.properties[prop_name]["docstring"] = docstring - idx += 1 - elif self.tokens[idx][0] is Token.Comment: - # Comments seperated with blank lines. - idx = idx - 1 - continue - else: - logger.warning( - "sphinxcontrib-matlabdomain] Expected enumeration in %s.%s - got %s", - self.module, - self.name, - str(self.tokens[idx]), - ) - return - idx += self._blanks(idx) # skip blanks - - # docstring - if "docstring" not in self.enumerations[enum_name].keys(): - docstring = {"docstring": None} - if self.tokens[idx][0] is Token.Comment: - docstring["docstring"] = self.tokens[idx][1].lstrip("%") - idx += 1 - self.enumerations[enum_name].update(docstring) - elif self.tokens[idx][0] is Token.Comment: - # skip this comment - idx += 1 - - idx += self._whitespace(idx) - idx += 1 - if self._tk_eq(idx, (Token.Punctuation, ";")): - # Skip trailing semicolon after end. - idx += 1 - except IndexError: - logger.warning( - "[sphinxcontrib-matlabdomain] Parsing failed in %s.%s. " - "Check if valid MATLAB code.", - modname, - name, - ) - - self.rem_tks = idx # index of last token def ref_role(self): """Returns role to use for references to this object (e.g. when generating auto-links)""" diff --git a/sphinxcontrib/matlab.py b/sphinxcontrib/matlab.py index 5764f9f..bf83a5a 100644 --- a/sphinxcontrib/matlab.py +++ b/sphinxcontrib/matlab.py @@ -879,6 +879,7 @@ def setup(app): app.add_domain(MATLABDomain) # autodoc app.add_config_value("matlab_src_dir", None, "env") + app.add_config_value("matlab_ignore_dirs", [], "env") app.add_config_value("matlab_src_encoding", None, "env") app.add_config_value("matlab_keep_package_prefix", False, "env") app.add_config_value("matlab_show_property_default_value", False, "env") From 54899fb49e24d7efe5c264485c5e08871df07c89 Mon Sep 17 00:00:00 2001 From: Anton Edvinovich Pozharskiy Date: Tue, 23 Jul 2024 14:32:37 +0200 Subject: [PATCH 08/45] finish integrating mat_textmate_parser with mat_types --- sphinxcontrib/mat_textmate_parser.py | 100 +++++++++++++++++++++++---- sphinxcontrib/mat_types.py | 22 +++--- 2 files changed, 98 insertions(+), 24 deletions(-) diff --git a/sphinxcontrib/mat_textmate_parser.py b/sphinxcontrib/mat_textmate_parser.py index 0320df2..2b4a4e9 100644 --- a/sphinxcontrib/mat_textmate_parser.py +++ b/sphinxcontrib/mat_textmate_parser.py @@ -1,11 +1,15 @@ -rpath = "../../../syscop/software/nosnoc/src/NosnocIpoptCallback.m" +from textmate_grammar.parsers.matlab import MatlabParser +import re +rpath = "../../../syscop/software/nosnoc/+nosnoc/Options.m" -def find_first_child(curr, tok): - ind = [i for i in range(len(curr.children)) if curr.children[i].token == tok] + +def find_first_child(curr, tok, attr="children"): + tok_lst = getattr(curr, attr) + ind = [i for i in range(len(tok_lst)) if tok_lst[i].token == tok] if not ind: - return None - return (curr.children[ind[0]], ind[0]) + return (None, None) + return (tok_lst[ind[0]], ind[0]) def _toks_on_same_line(tok1, tok2): @@ -48,6 +52,7 @@ def __init__(self, fun_tok): self.outputs = {} self.params = {} + self.attrs = {} for out, _ in output_gen: self.outputs[out.content] = {} @@ -160,8 +165,6 @@ def __init__(self, tokens): self.methods = {} self.enumerations = {} - # Maybe remove continuations as a crutch? currently parser is broken for continuations in attributes - # self.parser = MatlabParser(remove_line_continuations=True) self.parsed = tokens self.cls, _ = find_first_child(self.parsed, "meta.class.matlab") if not self.cls: @@ -187,7 +190,6 @@ def _find_class_docstring(self): try: possible_comment_tok = self.cls.children[1] except IndexError: - print("found no docstring") return if possible_comment_tok.token == "comment.line.percentage.matlab": @@ -197,7 +199,7 @@ def _find_class_docstring(self): 2:-2 ].strip() # [2,-2] strips out block comment delimiters else: - print("found no docstring") + pass def _docstring_lines(self): idx = 1 @@ -300,6 +302,7 @@ def _parse_class_attributes(self, attrs_tok): def _parse_property_section(self, section): # TODO parse property section attrs + attrs = self._parse_attributes(section) idxs = [ i for i in range(len(section.children)) @@ -308,11 +311,30 @@ def _parse_property_section(self, section): for idx in idxs: prop_tok = section.children[idx] prop_name = prop_tok.begin[0].content - self.properties[prop_name] = {} # Create entry for property + self.properties[prop_name] = {"attrs": attrs} # Create entry for property self._parse_property_validation( prop_name, prop_tok ) # Parse property validation. + # Try to find a default assignment: + default = None + _, assgn_idx = find_first_child( + prop_tok, "keyword.operator.assignment.matlab", attr="end" + ) + if assgn_idx is not None: + default = "" + assgn_idx += 1 # skip assignment + while assgn_idx < len(prop_tok.end): + tok = prop_tok.end[assgn_idx] + assgn_idx += 1 + if tok.token in [ + "comment.line.percentage.matlab", + "punctuation.terminator.semicolon.matlab", + ]: + break + default += tok.content + self.properties[prop_name]["default"] = default + # Get inline docstring inline_docstring_gen = prop_tok.find( tokens="comment.line.percentage.matlab", attribute="end" @@ -439,7 +461,7 @@ def _parse_property_validation(self, prop_name, prop): pass def _parse_method_section(self, section): - # TODO parse property section attrs + attrs = self._parse_attributes(section) idxs = [ i for i in range(len(section.children)) @@ -449,9 +471,9 @@ def _parse_method_section(self, section): meth_tok = section.children[idx] parsed_function = MatFunctionParser(meth_tok) self.methods[parsed_function.name] = parsed_function + self.methods[parsed_function.name].attrs = attrs def _parse_enum_section(self, section): - # TODO parse property section attrs idxs = [ i for i in range(len(section.children)) @@ -553,6 +575,58 @@ def _parse_enum_section(self, section): else: self.enumerations[enum_name]["docstring"] = None + def _parse_attributes(self, section): + # walk down child list and parse manually + children = section.begin + idx = 1 + attrs = {} + while idx < len(children): + child_tok = children[idx] + if re.match( + "storage.modifier.(properties|methods|events).matlab", child_tok.token + ): + attr = child_tok.content + val = None + idx += 1 # walk to next token + try: # however we may have walked off the end of the list in which case we exit + maybe_assign_tok = children[idx] + except: + attrs[attr] = val + return attrs + if maybe_assign_tok.token == "keyword.operator.assignment.matlab": + idx += 1 + rhs_tok = children[idx] # parse right hand side + if rhs_tok.token == "meta.cell.literal.matlab": + # A cell. For now just take the whole cell as value. + # TODO parse out the cell array of metaclass literals. + val = "{" + rhs_tok.content + "}" + idx += 1 + elif rhs_tok.token == "constant.language.boolean.matlab": + val = rhs_tok.content + idx += 1 + elif rhs_tok.token == "storage.modifier.access.matlab": + val = rhs_tok.content + idx += 1 + elif rhs_tok.token == "keyword.operator.other.question.matlab": + idx += 1 + metaclass_tok = children[idx] + metaclass_components = metaclass_tok.findall( + tokens=[ + "entity.name.namespace.matlab", + "entity.other.class.matlab", + ] + ) + val = tuple([comp.content for comp, _ in metaclass_components]) + else: + pass + attrs[attr] = val + else: # Comma or continuation therefore skip + idx += 1 + + return attrs + if __name__ == "__main__": - cls_parse = MatClassParser(rpath) + parser = MatlabParser() + toks = parser.parse_file(rpath) + cls_parse = MatClassParser(toks) diff --git a/sphinxcontrib/mat_types.py b/sphinxcontrib/mat_types.py index 18a9b5a..cd7acb9 100644 --- a/sphinxcontrib/mat_types.py +++ b/sphinxcontrib/mat_types.py @@ -21,6 +21,8 @@ from textmate_grammar.parsers.matlab import MatlabParser import logging from pathlib import Path +import cProfile +import pstats logger = sphinx.util.logging.getLogger("matlab-domain") @@ -434,9 +436,12 @@ def matlabify(objname): # make a full path out of basedir and objname fullpath = os.path.join(MatObject.basedir, objname) # objname fullpath - import pdb - pdb.set_trace() + # Check if path should be ignored + for ignore in MatObject.sphinx_env.config.matlab_ignore_dirs: + if Path(fullpath).is_relative_to(MatObject.basedir, ignore): + return None + logger.debug( f"[sphinxcontrib-matlabdomain] matlabify {package=}, {objname=}, {fullpath=}" ) @@ -501,14 +506,7 @@ def parse_mfile(mfile, name, path, encoding=None): full_code = code - print(mfile) - - # remove the top comment header (if there is one) from the code string - # code = mat_parser.remove_comment_header(code) - # code = mat_parser.remove_line_continuations(code) - # code = mat_parser.fix_function_signatures(code) - # TODO: This might not be necessary - + # quiet the textmate grammar logger and parse the file logging.getLogger("textmate_grammar").setLevel(logging.ERROR) parser = MatlabParser() toks = parser.parse_file(mfile) @@ -1086,7 +1084,9 @@ def __doc__(self): @property def __bases__(self): - bases_ = dict.fromkeys(self.bases) # make copy of bases + bases_ = dict.fromkeys( + [".".join(base) for base in self.bases] + ) # make copy of bases class_entity_table = {} for name, entity in entities_table.items(): if isinstance(entity, MatClass) or "@" in name: From 3c8c18e5f1f3fd5de5f5e28d8a7c732d6503fd99 Mon Sep 17 00:00:00 2001 From: Anton Edvinovich Pozharskiy Date: Wed, 24 Jul 2024 17:45:51 +0200 Subject: [PATCH 09/45] [skip-ci] some minor changes --- sphinxcontrib/mat_textmate_parser.py | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/sphinxcontrib/mat_textmate_parser.py b/sphinxcontrib/mat_textmate_parser.py index 2b4a4e9..46236ce 100644 --- a/sphinxcontrib/mat_textmate_parser.py +++ b/sphinxcontrib/mat_textmate_parser.py @@ -1,7 +1,9 @@ from textmate_grammar.parsers.matlab import MatlabParser import re -rpath = "../../../syscop/software/nosnoc/+nosnoc/Options.m" +# rpath = "../../../syscop/software/nosnoc/+nosnoc/Options.m" + +rpath = "/home/anton/tools/matlabdomain/tests/roots/test_autodoc/target/ClassExample.m" def find_first_child(curr, tok, attr="children"): @@ -186,6 +188,10 @@ def __init__(self, tokens): for section, _ in enumeration_sections: self._parse_enum_section(section) + import pdb + + pdb.set_trace() + def _find_class_docstring(self): try: possible_comment_tok = self.cls.children[1] @@ -272,7 +278,11 @@ def _parse_class_attributes(self, attrs_tok): attr = child_tok.content val = None idx += 1 # walk to next token - maybe_assign_tok = children[idx] + try: # however we may have walked off the end of the list in which case we exit + maybe_assign_tok = children[idx] + except: + self.attrs[attr] = val + break if maybe_assign_tok.token == "keyword.operator.assignment.matlab": idx += 1 rhs_tok = children[idx] # parse right hand side From bf1e6a26197888a714adcb0d5312571c71a0166d Mon Sep 17 00:00:00 2001 From: Anton Edvinovich Pozharskiy Date: Wed, 24 Jul 2024 17:47:02 +0200 Subject: [PATCH 10/45] initial work on a tree sitter based parser --- sphinxcontrib/mat_tree_sitter_parser.py | 729 ++++++++++++++++++++++++ 1 file changed, 729 insertions(+) create mode 100644 sphinxcontrib/mat_tree_sitter_parser.py diff --git a/sphinxcontrib/mat_tree_sitter_parser.py b/sphinxcontrib/mat_tree_sitter_parser.py new file mode 100644 index 0000000..e0c039e --- /dev/null +++ b/sphinxcontrib/mat_tree_sitter_parser.py @@ -0,0 +1,729 @@ +import tree_sitter_matlab as tsml +from tree_sitter import Language, Parser +import re + +# rpath = "../../../syscop/software/nosnoc/+nosnoc/Options.m" + +ML_LANG = Language(tsml.language()) + +rpath = "/home/anton/tools/matlabdomain/tests/test_data/ClassTesting.m" + +# QUERIES +q_classdef = ML_LANG.query( + """(class_definition + "classdef" + (attributes + [(attribute) @attrs _]+ + )? + (identifier) @name + (superclasses + [(property_name) @supers _]+ + )? + ) @class +""" +) + +q_attributes = ML_LANG.query("""(identifier) @name (_)? @value""") + +q_supers = ML_LANG.query("""[(identifier) @secs "."]+ """) + +q_properties = ML_LANG.query( + """(properties + (attributes)? @attrs + (property)* @properties + ) @prop_block +""" +) + +q_methods = ML_LANG.query( + """(methods + (attributes)? @attrs + (function_definition)* @methods + ) @meth_block +""" +) + +q_enumerations = ML_LANG.query( + """(enumeration + (enum)* @enums + ) @enum_block +""" +) + +q_events = ML_LANG.query( + """(events + (attributes)? @attrs + (identifier)* @events + ) @event_block +""" +) + + +def find_first_child(curr, tok, attr="children"): + tok_lst = getattr(curr, attr) + ind = [i for i in range(len(tok_lst)) if tok_lst[i].token == tok] + if not ind: + return (None, None) + return (tok_lst[ind[0]], ind[0]) + + +def _toks_on_same_line(tok1, tok2): + """Note: pass the tokens in order they appear in case of multiline tokens, otherwise this may return incorrect results""" + line1 = _get_last_line_of_tok(tok1) + line2 = _get_first_line_of_tok(tok2) + return line1 == line2 + + +def _is_empty_line_between_tok(tok1, tok2): + """Note: pass tokens in order they appear""" + line1 = _get_last_line_of_tok(tok1) + line2 = _get_first_line_of_tok(tok2) + return line2 - line1 > 1 + + +def _get_first_line_of_tok(tok): + return min([loc[0] for loc in tok.characters.keys()]) + + +def _get_last_line_of_tok(tok): + return max([loc[0] for loc in tok.characters.keys()]) + + +class MatFunctionParser: + def __init__(self, fun_tok): + """Parse Function definition""" + # First find the function name + name_gen = fun_tok.find(tokens="entity.name.function.matlab") + try: + name_tok, _ = next(name_gen) + self.name = name_tok.content + except StopIteration: + # TODO correct error here + raise Exception("Couldn't find function name") + + # Find outputs and parameters + output_gen = fun_tok.find(tokens="variable.parameter.output.matlab") + param_gen = fun_tok.find(tokens="variable.parameter.input.matlab") + + self.outputs = {} + self.params = {} + self.attrs = {} + + for out, _ in output_gen: + self.outputs[out.content] = {} + + for param, _ in param_gen: + self.params[param.content] = {} + + # find arguments blocks + arg_section = None + for arg_section, _ in fun_tok.find(tokens="meta.arguments.matlab"): + self._parse_argument_section(arg_section) + + fun_decl_gen = fun_tok.find(tokens="meta.function.declaration.matlab") + try: + fun_decl_tok, _ = next(fun_decl_gen) + except StopIteration: + raise Exception( + "missing function declaration" + ) # This cant happen as we'd be missing a function name + + # Now parse for docstring + docstring = "" + comment_toks = fun_tok.findall( + tokens=["comment.line.percentage.matlab", "comment.block.percentage.matlab"] + ) + last_tok = arg_section if arg_section is not None else fun_decl_tok + + for comment_tok, _ in comment_toks: + if _is_empty_line_between_tok(last_tok, comment_tok): + # If we have non-consecutive tokens quit right away. + break + elif ( + not docstring and comment_tok.token == "comment.block.percentage.matlab" + ): + # If we have no previous docstring lines and a comment block we take + # the comment block as the docstring and exit. + docstring = comment_tok.content.strip()[ + 2:-2 + ].strip() # [2,-2] strips out block comment delimiters + break + elif comment_tok.token == "comment.line.percentage.matlab": + # keep parsing comments + docstring += comment_tok.content[1:] + "\n" + else: + # we are done. + break + last_tok = comment_tok + + self.docstring = docstring if docstring else None + + def _parse_argument_section(self, section): + modifiers = [ + mod.content + for mod, _ in section.find(tokens="storage.modifier.arguments.matlab") + ] + arg_def_gen = section.find(tokens="meta.assignment.definition.property.matlab") + for arg_def, _ in arg_def_gen: + arg_name = arg_def.begin[ + 0 + ].content # Get argument name that is being defined + self._parse_argument_validation(arg_name, arg_def, modifiers) + + def _parse_argument_validation(self, arg_name, arg, modifiers): + # TODO This should be identical to propery validation I think. Refactor + # First get the size if found + section = self.output if "Output" in modifiers else self.params + size_gen = arg.find(tokens="meta.parens.size.matlab", depth=1) + try: # We have a size, therefore parse the comma separated list into tuple + size_tok, _ = next(size_gen) + size_elem_gen = size_tok.find( + tokens=[ + "constant.numeric.decimal.matlab", + "keyword.operator.vector.colon.matlab", + ], + depth=1, + ) + size = tuple([elem[0].content for elem in size_elem_gen]) + section[arg_name]["size"] = size + except StopIteration: + pass + + # Now find the type if it exists + # TODO this should be mapped to known types (though perhaps as a postprocess) + type_gen = arg.find(tokens="storage.type.matlab", depth=1) + try: + section[arg_name]["type"] = next(type_gen)[0].content + except StopIteration: + pass + + # Now find list of validators + validator_gen = arg.find(tokens="meta.block.validation.matlab", depth=1) + try: + validator_tok, _ = next(validator_gen) + validator_toks = validator_tok.findall( + tokens="variable.other.readwrite.matlab", depth=1 + ) # TODO Probably bug here in MATLAB-Language-grammar + section[arg_name]["validators"] = [tok[0].content for tok in validator_toks] + except StopIteration: + pass + + +class MatClassParser: + def __init__(self, tree): + # DATA + self.name = "" + self.supers = [] + self.attrs = {} + self.docstring = "" + self.properties = {} + self.methods = {} + self.enumerations = {} + + self.tree = tree + + # Parse class basics + class_matches = q_classdef.matches(tree.root_node) + _, class_match = class_matches[0] + self.cls = class_match.get("class") + self.name = class_match.get("name") + + import pdb + + pdb.set_trace() + # Parse class attrs and supers + attrs_node = class_match.get("attrs") + if attrs_node is not None: + attrs_matches = q_attributes.matches(attrs_node) + for _, match in attrs_matches: + name = match.get("name").text.decode("utf-8") + value_node = match.get("value") + self.attrs[name] = ( + value_node.text.decode("utf-8") if value_node is not None else None + ) + + supers_node = class_match.get("supers") + if supers_node is not None: + supers_matches = q_supers.matches(supers_node) + for _, match in supers_matches: + super_cls = tuple( + [sec.text.decode("utf-8") for sec in match.get("secs")] + ) + self.supers.append(super_cls) + + prop_matches = q_properties.matches(self.cls) + method_matches = q_methods.matches(self.cls) + enumeration_matches = q_enumerations.matches(self.cls) + events_matches = q_events.matches(self.cls) + + self._parse_clsdef() + self._find_class_docstring() + + property_sections = self.cls.findall(tokens="meta.properties.matlab", depth=1) + method_sections = self.cls.findall(tokens="meta.methods.matlab", depth=1) + enumeration_sections = self.cls.findall(tokens="meta.enum.matlab", depth=1) + + for section, _ in property_sections: + self._parse_property_section(section) + + for section, _ in method_sections: + self._parse_method_section(section) + + for section, _ in enumeration_sections: + self._parse_enum_section(section) + + def _find_class_docstring(self): + try: + possible_comment_tok = self.cls.children[1] + except IndexError: + return + + if possible_comment_tok.token == "comment.line.percentage.matlab": + self._docstring_lines() + elif possible_comment_tok.token == "comment.block.percentage.matlab": + self.docstring = possible_comment_tok.content.strip()[ + 2:-2 + ].strip() # [2,-2] strips out block comment delimiters + else: + pass + + def _docstring_lines(self): + idx = 1 + cls_children = self.cls.children + + while ( + idx < len(cls_children) + and cls_children[idx].token == "comment.line.percentage.matlab" + ): + self.docstring += ( + cls_children[idx].content[1:] + "\n" + ) # [1:] strips out percent sign + idx += 1 + self.docstring = self.docstring.strip() + + def _parse_clsdef(self): + # Try parsing attrs + attrs_tok_gen = self.clsdef.find(tokens="storage.modifier.section.class.matlab") + try: + attrs_tok, _ = next(attrs_tok_gen) + self._parse_class_attributes(attrs_tok) + except StopIteration: + pass + + # Parse classname + classname_tok_gen = self.clsdef.find(tokens="entity.name.type.class.matlab") + try: + classname_tok, _ = next(classname_tok_gen) + self.name = classname_tok.content + except StopIteration: + print("ClassName not found") # TODO this is probably fatal + + # Parse interited classes + parent_class_toks = self.clsdef.findall(tokens="meta.inherited-class.matlab") + + for parent_class_tok, _ in parent_class_toks: + sections = parent_class_tok.findall( + tokens=[ + "entity.name.namespace.matlab", + "entity.other.inherited-class.matlab", + ] + ) + super_cls = tuple([sec.content for sec, _ in sections]) + self.supers.append(super_cls) + # Parse Attributes TODO maybe there is a smarter way to do this? + idx = 0 + while self.clsdef.children[idx].token == "storage.modifier.class.matlab": + attr_tok = self.clsdef.children[idx] + attr = attr_tok.content + val = None # TODO maybe do some typechecking here or we can assume that you give us valid Matlab + idx += 1 + if attr_tok.token == "keyword.operator.assignment.matlab": # pull out r.h.s + idx += 1 + val = self.clsdef.children[idx].content + idx += 1 + if ( + attr_tok.token == "punctuation.separator.modifier.comma.matlab" + ): # skip commas + idx += 1 + self.attrs[attr] = val + + def _parse_class_attributes(self, attrs_tok): + # walk down child list and parse manually + # TODO perhaps contribute a delimited list find to textmate-grammar-python + children = attrs_tok.children + idx = 0 + while idx < len(children): + child_tok = children[idx] + if child_tok.token == "storage.modifier.class.matlab": + attr = child_tok.content + val = None + idx += 1 # walk to next token + try: # however we may have walked off the end of the list in which case we exit + maybe_assign_tok = children[idx] + except: + self.attrs[attr] = val + break + if maybe_assign_tok.token == "keyword.operator.assignment.matlab": + idx += 1 + rhs_tok = children[idx] # parse right hand side + if rhs_tok.token == "meta.cell.literal.matlab": + # A cell. For now just take the whole cell as value. + # TODO parse out the cell array of metaclass literals. + val = "{" + rhs_tok.content + "}" + idx += 1 + elif rhs_tok.token == "constant.language.boolean.matlab": + val = rhs_tok.content + idx += 1 + elif rhs_tok.token == "keyword.operator.other.question.matlab": + idx += 1 + metaclass_tok = children[idx] + metaclass_components = metaclass_tok.findall( + tokens=[ + "entity.name.namespace.matlab", + "entity.other.class.matlab", + ] + ) + val = tuple([comp.content for comp, _ in metaclass_components]) + else: + pass + self.attrs[attr] = val + else: # Comma or continuation therefore skip + idx += 1 + + def _parse_property_section(self, section): + # TODO parse property section attrs + attrs = self._parse_attributes(section) + idxs = [ + i + for i in range(len(section.children)) + if section.children[i].token == "meta.assignment.definition.property.matlab" + ] + for idx in idxs: + prop_tok = section.children[idx] + prop_name = prop_tok.begin[0].content + self.properties[prop_name] = {"attrs": attrs} # Create entry for property + self._parse_property_validation( + prop_name, prop_tok + ) # Parse property validation. + + # Try to find a default assignment: + default = None + _, assgn_idx = find_first_child( + prop_tok, "keyword.operator.assignment.matlab", attr="end" + ) + if assgn_idx is not None: + default = "" + assgn_idx += 1 # skip assignment + while assgn_idx < len(prop_tok.end): + tok = prop_tok.end[assgn_idx] + assgn_idx += 1 + if tok.token in [ + "comment.line.percentage.matlab", + "punctuation.terminator.semicolon.matlab", + ]: + break + default += tok.content + self.properties[prop_name]["default"] = default + + # Get inline docstring + inline_docstring_gen = prop_tok.find( + tokens="comment.line.percentage.matlab", attribute="end" + ) + try: + inline_docstring_tok, _ = next(inline_docstring_gen) + inline_docstring = inline_docstring_tok.content[ + 1: + ] # strip leading % sign + except StopIteration: + inline_docstring = None + + # Walk backwards to get preceding docstring. + preceding_docstring = "" + walk_back_idx = idx - 1 + next_tok = prop_tok + while walk_back_idx >= 0: + walk_tok = section.children[walk_back_idx] + if _is_empty_line_between_tok(walk_tok, next_tok): + # Once there is an empty line between consecutive tokens we are done. + break + + if ( + not preceding_docstring + and walk_tok.token == "comment.block.percentage.matlab" + ): + # block comment immediately preceding enum so we are done. + # TODO we might need to do some postprocessing here to handle indents gracefully + preceding_docstring = walk_tok.content.strip()[2:-2] + break + elif walk_tok.token == "comment.line.percentage.matlab": + preceding_docstring = ( + walk_tok.content[1:] + "\n" + preceding_docstring + ) # [1:] strips % + walk_back_idx -= 1 + next_tok = walk_tok + elif walk_tok.token == "punctuation.whitespace.comment.leading.matlab": + walk_back_idx -= 1 + # Dont update next_tok for whitespace + else: + break + + # Walk forwards to get following docstring or inline one. + following_docstring = "" + walk_fwd_idx = idx + 1 + prev_tok = prop_tok + while walk_fwd_idx < len(section.children): + walk_tok = section.children[walk_fwd_idx] + + if _is_empty_line_between_tok(prev_tok, walk_tok): + # Once there is an empty line between consecutive tokens we are done. + break + + if ( + not following_docstring + and walk_tok.token == "comment.block.percentage.matlab" + ): + # block comment immediately following enum so we are done. + # TODO we might need to do some postprocessing here to handle indents gracefully + following_docstring = walk_tok.content.strip()[2:-2] + break + elif walk_tok.token == "comment.line.percentage.matlab": + following_docstring = ( + following_docstring + "\n" + walk_tok.content[1:] + ) # [1:] strips % + walk_fwd_idx += 1 + prev_tok = walk_tok + elif walk_tok.token == "punctuation.whitespace.comment.leading.matlab": + walk_fwd_idx += 1 + # Dont update prev_tok for whitespace + else: + break + + if preceding_docstring: + self.properties[prop_name]["docstring"] = preceding_docstring.strip() + elif inline_docstring: + self.properties[prop_name]["docstring"] = inline_docstring.strip() + elif following_docstring: + self.properties[prop_name]["docstring"] = following_docstring.strip() + else: + self.properties[prop_name]["docstring"] = None + + def _parse_property_validation(self, prop_name, prop): + """Parses property validation syntax""" + # First get the szize if found + size_gen = prop.find(tokens="meta.parens.size.matlab", depth=1) + try: # We have a size, therefore parse the comma separated list into tuple + size_tok, _ = next(size_gen) + size_elem_gen = size_tok.find( + tokens=[ + "constant.numeric.decimal.matlab", + "keyword.operator.vector.colon.matlab", + ], + depth=1, + ) + size = tuple([elem[0].content for elem in size_elem_gen]) + self.properties[prop_name]["size"] = size + except StopIteration: + pass + + # Now find the type if it exists + # TODO this should be mapped to known types (though perhaps as a postprocess) + type_gen = prop.find(tokens="storage.type.matlab", depth=1) + try: + self.properties[prop_name]["type"] = next(type_gen)[0].content + except StopIteration: + pass + + # Now find list of validators + validator_gen = prop.find(tokens="meta.block.validation.matlab", depth=1) + try: + validator_tok, _ = next(validator_gen) + validator_toks = validator_tok.findall( + tokens=[ + "variable.other.readwrite.matlab", + "meta.function-call.parens.matlab", + ], + depth=1, + ) # TODO Probably bug here in MATLAB-Language-grammar + self.properties[prop_name]["validators"] = [ + tok[0].content for tok in validator_toks + ] + except StopIteration: + pass + + def _parse_method_section(self, section): + attrs = self._parse_attributes(section) + idxs = [ + i + for i in range(len(section.children)) + if section.children[i].token == "meta.function.matlab" + ] + for idx in idxs: + meth_tok = section.children[idx] + parsed_function = MatFunctionParser(meth_tok) + self.methods[parsed_function.name] = parsed_function + self.methods[parsed_function.name].attrs = attrs + + def _parse_enum_section(self, section): + idxs = [ + i + for i in range(len(section.children)) + if section.children[i].token + == "meta.assignment.definition.enummember.matlab" + ] + for idx in idxs: + enum_tok = section.children[idx] + next_idx = idx + enum_name = enum_tok.children[0].content + self.enumerations[enum_name] = {} + if ( + idx + 1 < len(section.children) + and section.children[idx + 1].token == "meta.parens.matlab" + ): # Parse out args TODO this should be part of enummember assignment definition + args = tuple( + [ + arg.content + for arg in section.children[idx + 1].children + if arg.token != "punctuation.separator.comma.matlab" + ] + ) + self.enumerations[enum_name]["args"] = args + next_idx += 1 + + # Walk backwards to get preceding docstring. + preceding_docstring = "" + walk_back_idx = idx - 1 + next_tok = enum_tok + while walk_back_idx >= 0: + walk_tok = section.children[walk_back_idx] + if _is_empty_line_between_tok(walk_tok, next_tok): + # Once there is an empty line between consecutive tokens we are done. + break + + if ( + not preceding_docstring + and walk_tok.token == "comment.block.percentage.matlab" + ): + # block comment immediately preceding enum so we are done. + # TODO we might need to do some postprocessing here to handle indents gracefully + preceding_docstring = walk_tok.content.strip()[2:-2] + break + elif walk_tok.token == "comment.line.percentage.matlab": + preceding_docstring = ( + walk_tok.content[1:] + "\n" + preceding_docstring + ) # [1:] strips % + walk_back_idx -= 1 + next_tok = walk_tok + elif walk_tok.token == "punctuation.whitespace.comment.leading.matlab": + walk_back_idx -= 1 + # Dont update next_tok for whitespace + else: + break + + # Walk forwards to get following docstring or inline one. + inline_docstring = "" + following_docstring = "" + walk_fwd_idx = next_idx + 1 + prev_tok = section.children[next_idx] + while walk_fwd_idx < len(section.children): + walk_tok = section.children[walk_fwd_idx] + + if _is_empty_line_between_tok(prev_tok, walk_tok): + # Once there is an empty line between consecutive tokens we are done. + break + + if ( + not following_docstring + and walk_tok.token == "comment.block.percentage.matlab" + ): + # block comment immediately following enum so we are done. + # TODO we might need to do some postprocessing here to handle indents gracefully + following_docstring = walk_tok.content.strip()[2:-2] + break + elif walk_tok.token == "comment.line.percentage.matlab": + # In the case the comment is on the same line as the end of the enum declaration, take it as inline comment and exit. + if _toks_on_same_line(section.children[idx], walk_tok): + inline_docstring = walk_tok.content[1:] + break + + following_docstring = ( + following_docstring + "\n" + walk_tok.content[1:] + ) # [1:] strips % + walk_fwd_idx += 1 + prev_tok = walk_tok + elif walk_tok.token == "punctuation.whitespace.comment.leading.matlab": + walk_fwd_idx += 1 + # Dont update prev_tok for whitespace + else: + break + + if preceding_docstring: + self.enumerations[enum_name]["docstring"] = preceding_docstring.strip() + elif inline_docstring: + self.enumerations[enum_name]["docstring"] = inline_docstring.strip() + elif following_docstring: + self.enumerations[enum_name]["docstring"] = following_docstring.strip() + else: + self.enumerations[enum_name]["docstring"] = None + + def _parse_attributes(self, section): + # walk down child list and parse manually + children = section.begin + idx = 1 + attrs = {} + while idx < len(children): + child_tok = children[idx] + if re.match( + "storage.modifier.(properties|methods|events).matlab", child_tok.token + ): + attr = child_tok.content + val = None + idx += 1 # walk to next token + try: # however we may have walked off the end of the list in which case we exit + maybe_assign_tok = children[idx] + except: + attrs[attr] = val + return attrs + if maybe_assign_tok.token == "keyword.operator.assignment.matlab": + idx += 1 + rhs_tok = children[idx] # parse right hand side + if rhs_tok.token == "meta.cell.literal.matlab": + # A cell. For now just take the whole cell as value. + # TODO parse out the cell array of metaclass literals. + val = "{" + rhs_tok.content + "}" + idx += 1 + elif rhs_tok.token == "constant.language.boolean.matlab": + val = rhs_tok.content + idx += 1 + elif rhs_tok.token == "storage.modifier.access.matlab": + val = rhs_tok.content + idx += 1 + elif rhs_tok.token == "keyword.operator.other.question.matlab": + idx += 1 + metaclass_tok = children[idx] + metaclass_components = metaclass_tok.findall( + tokens=[ + "entity.name.namespace.matlab", + "entity.other.class.matlab", + ] + ) + val = tuple([comp.content for comp, _ in metaclass_components]) + else: + pass + attrs[attr] = val + else: # Comma or continuation therefore skip + idx += 1 + + return attrs + + +if __name__ == "__main__": + parser = Parser(ML_LANG) + + with open(rpath, "rb") as f: + data = f.read() + + tree = parser.parse(data) + class_parser = MatClassParser(tree) + import pdb + + pdb.set_trace() From 3090fa5d6df922086663b9473081c21ab1382820 Mon Sep 17 00:00:00 2001 From: Anton Edvinovich Pozharskiy Date: Thu, 25 Jul 2024 17:17:53 +0200 Subject: [PATCH 11/45] nearly finished with tree-sitter implementation --- sphinxcontrib/mat_tree_sitter_parser.py | 1078 ++++++++++------------- 1 file changed, 473 insertions(+), 605 deletions(-) diff --git a/sphinxcontrib/mat_tree_sitter_parser.py b/sphinxcontrib/mat_tree_sitter_parser.py index e0c039e..98bdfa1 100644 --- a/sphinxcontrib/mat_tree_sitter_parser.py +++ b/sphinxcontrib/mat_tree_sitter_parser.py @@ -11,41 +11,52 @@ # QUERIES q_classdef = ML_LANG.query( """(class_definition + . "classdef" + . (attributes - [(attribute) @attrs _]+ + [(attribute) @attrs _]+ )? + . (identifier) @name + . (superclasses [(property_name) @supers _]+ )? + . + (comment)? @docstring ) @class """ ) -q_attributes = ML_LANG.query("""(identifier) @name (_)? @value""") +q_attributes = ML_LANG.query("""(attribute (identifier) @name (_)? @value)""") q_supers = ML_LANG.query("""[(identifier) @secs "."]+ """) q_properties = ML_LANG.query( """(properties - (attributes)? @attrs - (property)* @properties + . + (attributes + [(attribute) @attrs _]+ + )? + [(property) @properties _]* ) @prop_block """ ) q_methods = ML_LANG.query( """(methods - (attributes)? @attrs - (function_definition)* @methods + (attributes + [(attribute) @attrs _]+ + )? + [(function_definition) @methods _]* ) @meth_block """ ) q_enumerations = ML_LANG.query( """(enumeration - (enum)* @enums + [(enum) @enums _]* ) @enum_block """ ) @@ -58,155 +69,274 @@ """ ) +q_property = ML_LANG.query( + """ + (property name: (identifier) @name + (dimensions + [[(spread_operator) (number)] @dims _]+ + )? + (identifier)? @type + (validation_functions + [[(identifier) (function_call)] @validation_functions _]+ + )? + (default_value (number))? @default + (comment)? @docstring + ) +""" +) -def find_first_child(curr, tok, attr="children"): - tok_lst = getattr(curr, attr) - ind = [i for i in range(len(tok_lst)) if tok_lst[i].token == tok] - if not ind: - return (None, None) - return (tok_lst[ind[0]], ind[0]) - +q_enum = ML_LANG.query( + """(enum + . + (identifier) @name + [(_) @args _]* + ) +""" +) -def _toks_on_same_line(tok1, tok2): - """Note: pass the tokens in order they appear in case of multiline tokens, otherwise this may return incorrect results""" - line1 = _get_last_line_of_tok(tok1) - line2 = _get_first_line_of_tok(tok2) - return line1 == line2 +q_fun = ML_LANG.query( + """(function_definition + . + (function_output + [ + (identifier) @outputs + (multioutput_variable + [(identifier) @outputs _]+ + ) + ] + )? + . + name: (identifier) @name + . + (function_arguments + [(identifier) @params _]* + )? + . + [(arguments_statement) @argblocks _]* + . + (comment)? @docstring + ) +""" +) +q_argblock = ML_LANG.query( + """ + (arguments_statement + . + (attributes + [(attribute) @attrs _]+ + )? + . + [(property) @args _]* + ) +""" +) -def _is_empty_line_between_tok(tok1, tok2): - """Note: pass tokens in order they appear""" - line1 = _get_last_line_of_tok(tok1) - line2 = _get_first_line_of_tok(tok2) - return line2 - line1 > 1 +q_arg = ML_LANG.query( + """ + (property name: + [ + (identifier) @name + (property_name + [(identifier) @name _]+ + ) + ] + (dimensions + [[(spread_operator) (number)] @dims _]+ + )? + (identifier)? @type + (validation_functions + [[(identifier) (function_call)] @validation_functions _]+ + )? + (default_value (number))? @default + (comment)? @docstring + ) +""" +) -def _get_first_line_of_tok(tok): - return min([loc[0] for loc in tok.characters.keys()]) +re_percent_remove = re.compile(r"^[ \t]*%", flags=re.M) -def _get_last_line_of_tok(tok): - return max([loc[0] for loc in tok.characters.keys()]) +def process_text_into_docstring(text): + docstring = text.decode("utf-8") + return re.sub(re_percent_remove, "", docstring) class MatFunctionParser: - def __init__(self, fun_tok): + def __init__(self, fun_node): """Parse Function definition""" - # First find the function name - name_gen = fun_tok.find(tokens="entity.name.function.matlab") - try: - name_tok, _ = next(name_gen) - self.name = name_tok.content - except StopIteration: - # TODO correct error here - raise Exception("Couldn't find function name") - - # Find outputs and parameters - output_gen = fun_tok.find(tokens="variable.parameter.output.matlab") - param_gen = fun_tok.find(tokens="variable.parameter.input.matlab") + _, fun_match = q_fun.matches(fun_node)[0] + self.name = fun_match.get("name").text.decode("utf-8") + # Get outputs (possibly more than one) self.outputs = {} - self.params = {} - self.attrs = {} + output_nodes = fun_match.get("outputs") + if output_nodes is not None: + outputs = [output.text.decode("utf-8") for output in output_nodes] + for output in outputs: + self.outputs[output] = {} - for out, _ in output_gen: - self.outputs[out.content] = {} - - for param, _ in param_gen: - self.params[param.content] = {} - - # find arguments blocks - arg_section = None - for arg_section, _ in fun_tok.find(tokens="meta.arguments.matlab"): - self._parse_argument_section(arg_section) - - fun_decl_gen = fun_tok.find(tokens="meta.function.declaration.matlab") - try: - fun_decl_tok, _ = next(fun_decl_gen) - except StopIteration: - raise Exception( - "missing function declaration" - ) # This cant happen as we'd be missing a function name - - # Now parse for docstring - docstring = "" - comment_toks = fun_tok.findall( - tokens=["comment.line.percentage.matlab", "comment.block.percentage.matlab"] - ) - last_tok = arg_section if arg_section is not None else fun_decl_tok - - for comment_tok, _ in comment_toks: - if _is_empty_line_between_tok(last_tok, comment_tok): - # If we have non-consecutive tokens quit right away. - break - elif ( - not docstring and comment_tok.token == "comment.block.percentage.matlab" - ): - # If we have no previous docstring lines and a comment block we take - # the comment block as the docstring and exit. - docstring = comment_tok.content.strip()[ - 2:-2 - ].strip() # [2,-2] strips out block comment delimiters - break - elif comment_tok.token == "comment.line.percentage.matlab": - # keep parsing comments - docstring += comment_tok.content[1:] + "\n" - else: - # we are done. - break - last_tok = comment_tok + # Get parameters + self.params = {} + param_nodes = fun_match.get("params") + if output_nodes is not None: + params = [param.text.decode("utf-8") for param in param_nodes] + for param in params: + self.params[param] = {} + + # parse out info from argument blocks + argblock_nodes = fun_match.get("argblocks") + for argblock_node in argblock_nodes: + self._parse_argument_section(argblock_node) + + # + import pdb - self.docstring = docstring if docstring else None + pdb.set_trace() - def _parse_argument_section(self, section): - modifiers = [ - mod.content - for mod, _ in section.find(tokens="storage.modifier.arguments.matlab") - ] - arg_def_gen = section.find(tokens="meta.assignment.definition.property.matlab") - for arg_def, _ in arg_def_gen: - arg_name = arg_def.begin[ - 0 - ].content # Get argument name that is being defined - self._parse_argument_validation(arg_name, arg_def, modifiers) - - def _parse_argument_validation(self, arg_name, arg, modifiers): - # TODO This should be identical to propery validation I think. Refactor - # First get the size if found - section = self.output if "Output" in modifiers else self.params - size_gen = arg.find(tokens="meta.parens.size.matlab", depth=1) - try: # We have a size, therefore parse the comma separated list into tuple - size_tok, _ = next(size_gen) - size_elem_gen = size_tok.find( - tokens=[ - "constant.numeric.decimal.matlab", - "keyword.operator.vector.colon.matlab", - ], - depth=1, + def _parse_argument_section(self, argblock_node): + _, argblock_match = q_argblock.matches(argblock_node)[0] + attrs_nodes = argblock_match.get("attrs") + attrs = self._parse_attributes(attrs_nodes) + + arguments = argblock_match.get("args") + + # TODO this is almost identical to property parsing. + # might be a good idea to extract common code here. + for arg in arguments: + # match property to extract details + _, arg_match = q_arg.matches(arg)[0] + + # extract name (this is always available so no need for None check) + name = [name.text.decode("utf-8") for name in arg_match.get("name")] + + # extract dims list + dims_list = arg_match.get("dims") + dims = None + if dims_list is not None: + dims = tuple([dim.text.decode("utf-8") for dim in dims_list]) + + # extract type + type_node = arg_match.get("type") + typename = type_node.text.decode("utf-8") if type_node is not None else None + + # extract validator functions + vf_list = arg_match.get("validator_functions") + vfs = None + if vf_list is not None: + vfs = [vf.text.decode("utf-8") for vf in vf_list] + + # extract default + default_node = arg_match.get("default") + default = ( + default_node.text.decode("utf-8") if default_node is not None else None ) - size = tuple([elem[0].content for elem in size_elem_gen]) - section[arg_name]["size"] = size - except StopIteration: - pass - - # Now find the type if it exists - # TODO this should be mapped to known types (though perhaps as a postprocess) - type_gen = arg.find(tokens="storage.type.matlab", depth=1) - try: - section[arg_name]["type"] = next(type_gen)[0].content - except StopIteration: - pass - - # Now find list of validators - validator_gen = arg.find(tokens="meta.block.validation.matlab", depth=1) - try: - validator_tok, _ = next(validator_gen) - validator_toks = validator_tok.findall( - tokens="variable.other.readwrite.matlab", depth=1 - ) # TODO Probably bug here in MATLAB-Language-grammar - section[arg_name]["validators"] = [tok[0].content for tok in validator_toks] - except StopIteration: - pass + + # extract inline or following docstring if there is no semicolon + docstring_node = arg_match.get("docstring") + docstring = "" + if docstring_node is not None: + # tree-sitter-matlab combines inline comments with following + # comments which means this requires some relatively ugly + # processing, but worth it for the ease of the rest of it. + prev_sib = docstring_node.prev_named_sibling + if docstring_node.start_point.row == prev_sib.end_point.row: + # if the docstring is on the same line as the end of the definition only take the inline part + docstring = process_text_into_docstring(docstring_node.text) + docstring = docstring.split("\n")[0] + elif docstring_node.start_point.row - prev_sib.end_point.row <= 1: + # Otherwise take the whole docstring + docstring = process_text_into_docstring(docstring_node.text) + + # extract inline or following docstring if there _is_ a semicolon. + # this is only done if we didn't already find a docstring with the previous approach + next_node = arg.next_named_sibling + if next_node is None or docstring is not None: + # Nothing to be done. + pass + elif next_node.type == "comment": + if next_node.start_point.row == arg.end_point.row: + # if the docstring is on the same line as the end of the definition only take the inline part + docstring = process_text_into_docstring(next_node.text) + docstring = docstring.split("\n")[0] + elif next_node.start_point.row - arg.end_point.row <= 1: + # Otherwise take the whole docstring + docstring = process_text_into_docstring(next_node.text) + + # override docstring with prior if exists + prev_node = arg.prev_named_sibling + if prev_node is None: + # Nothing we can do, no previous comment + pass + elif prev_node.type == "comment": + # We have a previous comment if it ends on the previous + # line then we set the docstring. We also need to check + # if the first line of the comment is the same as a + # previous argument. + if arg.start_point.row - prev_node.end_point.row <= 1: + ds = process_text_into_docstring(prev_node.text) + prev_arg = prev_node.prev_named_sibling + if prev_arg is not None and prev_arg.type == "property": + if prev_node.start_point.row == prev_arg.end_point.row: + ds = "\n".join(ds.split("\n")[1:]) + if ds: + docstring = ds + else: + if arg.start_point.row - prev_node.end_point.row <= 1: + docstring = process_text_into_docstring(prev_node.text) + elif prev_node.type == "property": + # The previous argumentnode may have eaten our comment + # check for it a trailing comment. If it is not there + # then we stop looking. + prev_comment = prev_node.named_children[-1] + if prev_comment.type == "comment": + # we now need to check if prev_comment ends on the line + # before ours and trim the first line if it on the same + # line as prev property. + if arg.start_point.row - prev_comment.end_point.row <= 1: + ds = process_text_into_docstring(prev_comment.text) + if ( + prev_comment.start_point.row + == prev_comment.prev_named_sibling.end_point.row + ): + ds = "\n".join(ds.split("\n")[1:]) + if ds: + docstring = ds + # After all that if our docstring is empty then we have none + if docstring.strip() == "": + docstring == None + + # Here we trust that the person is giving us valid matlab. + if "Output" in attrs.keys(): + arg_loc = self.outputs + else: + arg_loc = self.params + if len(name) == 1: + arg_loc[name[0]] = { + "attrs": attrs, + "size": dims, + "type": typename, + "validators": vfs, + "default": default, + "docstring": docstring, + } + else: + # how to handle dotted args + pass + + def _parse_attributes(self, attrs_nodes): + # TOOD deduplicated this + attrs = {} + if attrs_nodes is not None: + for attr_node in attrs_nodes: + _, attr_match = q_attributes.matches(attr_node)[0] + name = attr_match.get("name").text.decode("utf-8") + value_node = attr_match.get("value") + attrs[name] = ( + value_node.text.decode("utf-8") if value_node is not None else None + ) + return attrs class MatClassParser: @@ -228,491 +358,229 @@ def __init__(self, tree): self.cls = class_match.get("class") self.name = class_match.get("name") - import pdb - - pdb.set_trace() # Parse class attrs and supers - attrs_node = class_match.get("attrs") - if attrs_node is not None: - attrs_matches = q_attributes.matches(attrs_node) - for _, match in attrs_matches: - name = match.get("name").text.decode("utf-8") - value_node = match.get("value") - self.attrs[name] = ( - value_node.text.decode("utf-8") if value_node is not None else None - ) + attrs_nodes = class_match.get("attrs") + self.attrs = self._parse_attributes(attrs_nodes) - supers_node = class_match.get("supers") - if supers_node is not None: - supers_matches = q_supers.matches(supers_node) - for _, match in supers_matches: + supers_nodes = class_match.get("supers") + if supers_nodes is not None: + for super_node in supers_nodes: + _, super_match = q_supers.matches(super_node)[0] super_cls = tuple( - [sec.text.decode("utf-8") for sec in match.get("secs")] + [sec.text.decode("utf-8") for sec in super_match.get("secs")] ) self.supers.append(super_cls) + # get docstring and check that it consecutive + docstring_node = class_match.get("docstring") + if docstring_node is not None: + prev_node = docstring_node.prev_sibling + if docstring_node.start_point.row - prev_node.end_point.row <= 1: + self.docstring = process_text_into_docstring(docstring_node.text) + prop_matches = q_properties.matches(self.cls) method_matches = q_methods.matches(self.cls) - enumeration_matches = q_enumerations.matches(self.cls) + enum_matches = q_enumerations.matches(self.cls) events_matches = q_events.matches(self.cls) - self._parse_clsdef() - self._find_class_docstring() - - property_sections = self.cls.findall(tokens="meta.properties.matlab", depth=1) - method_sections = self.cls.findall(tokens="meta.methods.matlab", depth=1) - enumeration_sections = self.cls.findall(tokens="meta.enum.matlab", depth=1) - - for section, _ in property_sections: - self._parse_property_section(section) - - for section, _ in method_sections: - self._parse_method_section(section) - - for section, _ in enumeration_sections: - self._parse_enum_section(section) - - def _find_class_docstring(self): - try: - possible_comment_tok = self.cls.children[1] - except IndexError: - return - - if possible_comment_tok.token == "comment.line.percentage.matlab": - self._docstring_lines() - elif possible_comment_tok.token == "comment.block.percentage.matlab": - self.docstring = possible_comment_tok.content.strip()[ - 2:-2 - ].strip() # [2,-2] strips out block comment delimiters - else: - pass - - def _docstring_lines(self): - idx = 1 - cls_children = self.cls.children - - while ( - idx < len(cls_children) - and cls_children[idx].token == "comment.line.percentage.matlab" - ): - self.docstring += ( - cls_children[idx].content[1:] + "\n" - ) # [1:] strips out percent sign - idx += 1 - self.docstring = self.docstring.strip() - - def _parse_clsdef(self): - # Try parsing attrs - attrs_tok_gen = self.clsdef.find(tokens="storage.modifier.section.class.matlab") - try: - attrs_tok, _ = next(attrs_tok_gen) - self._parse_class_attributes(attrs_tok) - except StopIteration: - pass - - # Parse classname - classname_tok_gen = self.clsdef.find(tokens="entity.name.type.class.matlab") - try: - classname_tok, _ = next(classname_tok_gen) - self.name = classname_tok.content - except StopIteration: - print("ClassName not found") # TODO this is probably fatal - - # Parse interited classes - parent_class_toks = self.clsdef.findall(tokens="meta.inherited-class.matlab") - - for parent_class_tok, _ in parent_class_toks: - sections = parent_class_tok.findall( - tokens=[ - "entity.name.namespace.matlab", - "entity.other.inherited-class.matlab", - ] - ) - super_cls = tuple([sec.content for sec, _ in sections]) - self.supers.append(super_cls) - # Parse Attributes TODO maybe there is a smarter way to do this? - idx = 0 - while self.clsdef.children[idx].token == "storage.modifier.class.matlab": - attr_tok = self.clsdef.children[idx] - attr = attr_tok.content - val = None # TODO maybe do some typechecking here or we can assume that you give us valid Matlab - idx += 1 - if attr_tok.token == "keyword.operator.assignment.matlab": # pull out r.h.s - idx += 1 - val = self.clsdef.children[idx].content - idx += 1 - if ( - attr_tok.token == "punctuation.separator.modifier.comma.matlab" - ): # skip commas - idx += 1 - self.attrs[attr] = val - - def _parse_class_attributes(self, attrs_tok): - # walk down child list and parse manually - # TODO perhaps contribute a delimited list find to textmate-grammar-python - children = attrs_tok.children - idx = 0 - while idx < len(children): - child_tok = children[idx] - if child_tok.token == "storage.modifier.class.matlab": - attr = child_tok.content - val = None - idx += 1 # walk to next token - try: # however we may have walked off the end of the list in which case we exit - maybe_assign_tok = children[idx] - except: - self.attrs[attr] = val - break - if maybe_assign_tok.token == "keyword.operator.assignment.matlab": - idx += 1 - rhs_tok = children[idx] # parse right hand side - if rhs_tok.token == "meta.cell.literal.matlab": - # A cell. For now just take the whole cell as value. - # TODO parse out the cell array of metaclass literals. - val = "{" + rhs_tok.content + "}" - idx += 1 - elif rhs_tok.token == "constant.language.boolean.matlab": - val = rhs_tok.content - idx += 1 - elif rhs_tok.token == "keyword.operator.other.question.matlab": - idx += 1 - metaclass_tok = children[idx] - metaclass_components = metaclass_tok.findall( - tokens=[ - "entity.name.namespace.matlab", - "entity.other.class.matlab", - ] - ) - val = tuple([comp.content for comp, _ in metaclass_components]) - else: - pass - self.attrs[attr] = val - else: # Comma or continuation therefore skip - idx += 1 - - def _parse_property_section(self, section): - # TODO parse property section attrs - attrs = self._parse_attributes(section) - idxs = [ - i - for i in range(len(section.children)) - if section.children[i].token == "meta.assignment.definition.property.matlab" - ] - for idx in idxs: - prop_tok = section.children[idx] - prop_name = prop_tok.begin[0].content - self.properties[prop_name] = {"attrs": attrs} # Create entry for property - self._parse_property_validation( - prop_name, prop_tok - ) # Parse property validation. - - # Try to find a default assignment: - default = None - _, assgn_idx = find_first_child( - prop_tok, "keyword.operator.assignment.matlab", attr="end" - ) - if assgn_idx is not None: - default = "" - assgn_idx += 1 # skip assignment - while assgn_idx < len(prop_tok.end): - tok = prop_tok.end[assgn_idx] - assgn_idx += 1 - if tok.token in [ - "comment.line.percentage.matlab", - "punctuation.terminator.semicolon.matlab", - ]: - break - default += tok.content - self.properties[prop_name]["default"] = default - - # Get inline docstring - inline_docstring_gen = prop_tok.find( - tokens="comment.line.percentage.matlab", attribute="end" + for _, prop_match in prop_matches: + self._parse_property_section(prop_match) + for _, enum_match in enum_matches: + self._parse_enum_section(enum_match) + for _, method_match in method_matches: + self._parse_method_section(method_match) + import pdb + + pdb.set_trace() + + def _parse_property_section(self, props_match): + # extract property section attributes + attrs_nodes = props_match.get("attrs") + attrs = self._parse_attributes(attrs_nodes) + + properties = props_match.get("properties") + + for prop in properties: + # match property to extract details + _, prop_match = q_property.matches(prop)[0] + + # extract name (this is always available so no need for None check) + name = prop_match.get("name").text.decode("utf-8") + + # extract dims list + dims_list = prop_match.get("dims") + dims = None + if dims_list is not None: + dims = tuple([dim.text.decode("utf-8") for dim in dims_list]) + + # extract type + type_node = prop_match.get("type") + typename = type_node.text.decode("utf-8") if type_node is not None else None + + # extract validator functions + vf_list = prop_match.get("validator_functions") + vfs = None + if vf_list is not None: + vfs = [vf.text.decode("utf-8") for vf in vf_list] + + # extract default + default_node = prop_match.get("default") + default = ( + default_node.text.decode("utf-8") if default_node is not None else None ) - try: - inline_docstring_tok, _ = next(inline_docstring_gen) - inline_docstring = inline_docstring_tok.content[ - 1: - ] # strip leading % sign - except StopIteration: - inline_docstring = None - - # Walk backwards to get preceding docstring. - preceding_docstring = "" - walk_back_idx = idx - 1 - next_tok = prop_tok - while walk_back_idx >= 0: - walk_tok = section.children[walk_back_idx] - if _is_empty_line_between_tok(walk_tok, next_tok): - # Once there is an empty line between consecutive tokens we are done. - break - - if ( - not preceding_docstring - and walk_tok.token == "comment.block.percentage.matlab" - ): - # block comment immediately preceding enum so we are done. - # TODO we might need to do some postprocessing here to handle indents gracefully - preceding_docstring = walk_tok.content.strip()[2:-2] - break - elif walk_tok.token == "comment.line.percentage.matlab": - preceding_docstring = ( - walk_tok.content[1:] + "\n" + preceding_docstring - ) # [1:] strips % - walk_back_idx -= 1 - next_tok = walk_tok - elif walk_tok.token == "punctuation.whitespace.comment.leading.matlab": - walk_back_idx -= 1 - # Dont update next_tok for whitespace - else: - break - - # Walk forwards to get following docstring or inline one. - following_docstring = "" - walk_fwd_idx = idx + 1 - prev_tok = prop_tok - while walk_fwd_idx < len(section.children): - walk_tok = section.children[walk_fwd_idx] - - if _is_empty_line_between_tok(prev_tok, walk_tok): - # Once there is an empty line between consecutive tokens we are done. - break - - if ( - not following_docstring - and walk_tok.token == "comment.block.percentage.matlab" - ): - # block comment immediately following enum so we are done. - # TODO we might need to do some postprocessing here to handle indents gracefully - following_docstring = walk_tok.content.strip()[2:-2] - break - elif walk_tok.token == "comment.line.percentage.matlab": - following_docstring = ( - following_docstring + "\n" + walk_tok.content[1:] - ) # [1:] strips % - walk_fwd_idx += 1 - prev_tok = walk_tok - elif walk_tok.token == "punctuation.whitespace.comment.leading.matlab": - walk_fwd_idx += 1 - # Dont update prev_tok for whitespace + + # extract inline or following docstring if there is no semicolon + docstring_node = prop_match.get("docstring") + docstring = "" + if docstring_node is not None: + # tree-sitter-matlab combines inline comments with following + # comments which means this requires some relatively ugly + # processing, but worth it for the ease of the rest of it. + prev_sib = docstring_node.prev_named_sibling + if docstring_node.start_point.row == prev_sib.end_point.row: + # if the docstring is on the same line as the end of the definition only take the inline part + docstring = process_text_into_docstring(docstring_node.text) + docstring = docstring.split("\n")[0] + elif docstring_node.start_point.row - prev_sib.end_point.row <= 1: + # Otherwise take the whole docstring + docstring = process_text_into_docstring(docstring_node.text) + + # extract inline or following docstring if there _is_ a semicolon. + # this is only done if we didn't already find a docstring with the previous approach + next_node = prop.next_named_sibling + if next_node is None or docstring is not None: + # Nothing to be done. + pass + elif next_node.type == "comment": + if next_node.start_point.row == prop.end_point.row: + # if the docstring is on the same line as the end of the definition only take the inline part + docstring = process_text_into_docstring(next_node.text) + docstring = docstring.split("\n")[0] + elif next_node.start_point.row - prop.end_point.row <= 1: + # Otherwise take the whole docstring + docstring = process_text_into_docstring(next_node.text) + + # override docstring with prior if exists + prev_node = prop.prev_named_sibling + if prev_node is None: + # Nothing we can do, no previous comment + pass + elif prev_node.type == "comment": + # We have a previous comment if it ends on the previous + # line then we set the docstring. We also need to check + # if the first line of the comment is the same as a + # previous property. + if prop.start_point.row - prev_node.end_point.row <= 1: + ds = process_text_into_docstring(prev_node.text) + prev_prop = prev_node.prev_named_sibling + if prev_prop is not None and prev_prop.type == "property": + if prev_node.start_point.row == prev_prop.end_point.row: + ds = "\n".join(ds.split("\n")[1:]) + if ds: + docstring = ds else: - break - - if preceding_docstring: - self.properties[prop_name]["docstring"] = preceding_docstring.strip() - elif inline_docstring: - self.properties[prop_name]["docstring"] = inline_docstring.strip() - elif following_docstring: - self.properties[prop_name]["docstring"] = following_docstring.strip() - else: - self.properties[prop_name]["docstring"] = None - - def _parse_property_validation(self, prop_name, prop): - """Parses property validation syntax""" - # First get the szize if found - size_gen = prop.find(tokens="meta.parens.size.matlab", depth=1) - try: # We have a size, therefore parse the comma separated list into tuple - size_tok, _ = next(size_gen) - size_elem_gen = size_tok.find( - tokens=[ - "constant.numeric.decimal.matlab", - "keyword.operator.vector.colon.matlab", - ], - depth=1, - ) - size = tuple([elem[0].content for elem in size_elem_gen]) - self.properties[prop_name]["size"] = size - except StopIteration: - pass - - # Now find the type if it exists - # TODO this should be mapped to known types (though perhaps as a postprocess) - type_gen = prop.find(tokens="storage.type.matlab", depth=1) - try: - self.properties[prop_name]["type"] = next(type_gen)[0].content - except StopIteration: - pass - - # Now find list of validators - validator_gen = prop.find(tokens="meta.block.validation.matlab", depth=1) - try: - validator_tok, _ = next(validator_gen) - validator_toks = validator_tok.findall( - tokens=[ - "variable.other.readwrite.matlab", - "meta.function-call.parens.matlab", - ], - depth=1, - ) # TODO Probably bug here in MATLAB-Language-grammar - self.properties[prop_name]["validators"] = [ - tok[0].content for tok in validator_toks - ] - except StopIteration: - pass - - def _parse_method_section(self, section): - attrs = self._parse_attributes(section) - idxs = [ - i - for i in range(len(section.children)) - if section.children[i].token == "meta.function.matlab" - ] - for idx in idxs: - meth_tok = section.children[idx] - parsed_function = MatFunctionParser(meth_tok) + if prop.start_point.row - prev_node.end_point.row <= 1: + docstring = process_text_into_docstring(prev_node.text) + elif prev_node.type == "property": + # The previous property node may have eaten our comment + # check for it a trailing comment. If it is not there + # then we stop looking. + prev_comment = prev_node.named_children[-1] + if prev_comment.type == "comment": + # we now need to check if prev_comment ends on the line + # before ours and trim the first line if it on the same + # line as prev property. + if prop.start_point.row - prev_comment.end_point.row <= 1: + ds = process_text_into_docstring(prev_comment.text) + if ( + prev_comment.start_point.row + == prev_comment.prev_named_sibling.end_point.row + ): + ds = "\n".join(ds.split("\n")[1:]) + if ds: + docstring = ds + # After all that if our docstring is empty then we have none + if docstring.strip() == "": + docstring == None + + self.properties[name] = { + "attrs": attrs, + "size": dims, + "type": typename, + "validators": vfs, + "default": default, + "docstring": docstring, + } + + def _parse_method_section(self, methods_match): + attrs_nodes = methods_match.get("attrs") + attrs = self._parse_attributes(attrs_nodes) + methods = methods_match.get("methods") + for method in methods: + parsed_function = MatFunctionParser(method) self.methods[parsed_function.name] = parsed_function self.methods[parsed_function.name].attrs = attrs - def _parse_enum_section(self, section): - idxs = [ - i - for i in range(len(section.children)) - if section.children[i].token - == "meta.assignment.definition.enummember.matlab" - ] - for idx in idxs: - enum_tok = section.children[idx] - next_idx = idx - enum_name = enum_tok.children[0].content - self.enumerations[enum_name] = {} - if ( - idx + 1 < len(section.children) - and section.children[idx + 1].token == "meta.parens.matlab" - ): # Parse out args TODO this should be part of enummember assignment definition - args = tuple( - [ - arg.content - for arg in section.children[idx + 1].children - if arg.token != "punctuation.separator.comma.matlab" - ] - ) - self.enumerations[enum_name]["args"] = args - next_idx += 1 - - # Walk backwards to get preceding docstring. - preceding_docstring = "" - walk_back_idx = idx - 1 - next_tok = enum_tok - while walk_back_idx >= 0: - walk_tok = section.children[walk_back_idx] - if _is_empty_line_between_tok(walk_tok, next_tok): - # Once there is an empty line between consecutive tokens we are done. - break - - if ( - not preceding_docstring - and walk_tok.token == "comment.block.percentage.matlab" - ): - # block comment immediately preceding enum so we are done. - # TODO we might need to do some postprocessing here to handle indents gracefully - preceding_docstring = walk_tok.content.strip()[2:-2] - break - elif walk_tok.token == "comment.line.percentage.matlab": - preceding_docstring = ( - walk_tok.content[1:] + "\n" + preceding_docstring - ) # [1:] strips % - walk_back_idx -= 1 - next_tok = walk_tok - elif walk_tok.token == "punctuation.whitespace.comment.leading.matlab": - walk_back_idx -= 1 - # Dont update next_tok for whitespace - else: - break - - # Walk forwards to get following docstring or inline one. - inline_docstring = "" - following_docstring = "" - walk_fwd_idx = next_idx + 1 - prev_tok = section.children[next_idx] - while walk_fwd_idx < len(section.children): - walk_tok = section.children[walk_fwd_idx] - - if _is_empty_line_between_tok(prev_tok, walk_tok): - # Once there is an empty line between consecutive tokens we are done. - break - - if ( - not following_docstring - and walk_tok.token == "comment.block.percentage.matlab" - ): - # block comment immediately following enum so we are done. - # TODO we might need to do some postprocessing here to handle indents gracefully - following_docstring = walk_tok.content.strip()[2:-2] - break - elif walk_tok.token == "comment.line.percentage.matlab": - # In the case the comment is on the same line as the end of the enum declaration, take it as inline comment and exit. - if _toks_on_same_line(section.children[idx], walk_tok): - inline_docstring = walk_tok.content[1:] - break - - following_docstring = ( - following_docstring + "\n" + walk_tok.content[1:] - ) # [1:] strips % - walk_fwd_idx += 1 - prev_tok = walk_tok - elif walk_tok.token == "punctuation.whitespace.comment.leading.matlab": - walk_fwd_idx += 1 - # Dont update prev_tok for whitespace - else: - break - - if preceding_docstring: - self.enumerations[enum_name]["docstring"] = preceding_docstring.strip() - elif inline_docstring: - self.enumerations[enum_name]["docstring"] = inline_docstring.strip() - elif following_docstring: - self.enumerations[enum_name]["docstring"] = following_docstring.strip() + def _parse_enum_section(self, enums_match): + enums = enums_match.get("enums") + for enum in enums: + _, enum_match = q_enum.matches(enum)[0] + name = enum_match.get("name").text.decode("utf-8") + arg_nodes = enum_match.get("args") + if arg_nodes is not None: + args = [arg.text.decode("utf-8") for arg in arg_nodes] else: - self.enumerations[enum_name]["docstring"] = None + args = None + + docstring = "" + # look forward for docstring + next_node = enum.next_named_sibling + if next_node is not None and next_node.type == "comment": + if next_node.start_point.row == enum.end_point.row: + # if the docstring is on the same line as the end of the definition only take the inline part + docstring = process_text_into_docstring(next_node.text) + docstring = docstring.split("\n")[0] + elif next_node.start_point.row - enum.end_point.row <= 1: + # Otherwise take the whole docstring + docstring = process_text_into_docstring(next_node.text) + + # override docstring with prior if exists + prev_node = enum.prev_named_sibling + if prev_node is None: + # Nothing we can do, no previous comment + pass + elif prev_node.type == "comment": + # We have a previous comment if it ends on the previous + # line then we set the docstring. We also need to check + # if the first line of the comment is the same as a + # previous enum. + if enum.start_point.row - prev_node.end_point.row <= 1: + ds = process_text_into_docstring(prev_node.text) + prev_enum = prev_node.prev_named_sibling + if prev_enum is not None and prev_enum.type == "enum": + if prev_node.start_point.row == prev_enum.end_point.row: + ds = "\n".join(ds.split("\n")[1:]) + if ds: + docstring = ds + else: + if enum.start_point.row - prev_node.end_point.row <= 1: + docstring = process_text_into_docstring(prev_node.text) + # After all that if our docstring is empty then we have none + if docstring.strip() == "": + docstring == None - def _parse_attributes(self, section): - # walk down child list and parse manually - children = section.begin - idx = 1 - attrs = {} - while idx < len(children): - child_tok = children[idx] - if re.match( - "storage.modifier.(properties|methods|events).matlab", child_tok.token - ): - attr = child_tok.content - val = None - idx += 1 # walk to next token - try: # however we may have walked off the end of the list in which case we exit - maybe_assign_tok = children[idx] - except: - attrs[attr] = val - return attrs - if maybe_assign_tok.token == "keyword.operator.assignment.matlab": - idx += 1 - rhs_tok = children[idx] # parse right hand side - if rhs_tok.token == "meta.cell.literal.matlab": - # A cell. For now just take the whole cell as value. - # TODO parse out the cell array of metaclass literals. - val = "{" + rhs_tok.content + "}" - idx += 1 - elif rhs_tok.token == "constant.language.boolean.matlab": - val = rhs_tok.content - idx += 1 - elif rhs_tok.token == "storage.modifier.access.matlab": - val = rhs_tok.content - idx += 1 - elif rhs_tok.token == "keyword.operator.other.question.matlab": - idx += 1 - metaclass_tok = children[idx] - metaclass_components = metaclass_tok.findall( - tokens=[ - "entity.name.namespace.matlab", - "entity.other.class.matlab", - ] - ) - val = tuple([comp.content for comp, _ in metaclass_components]) - else: - pass - attrs[attr] = val - else: # Comma or continuation therefore skip - idx += 1 + self.enumerations[name] = {"args": args, "docstring": docstring} + def _parse_attributes(self, attrs_nodes): + attrs = {} + if attrs_nodes is not None: + for attr_node in attrs_nodes: + _, attr_match = q_attributes.matches(attr_node)[0] + name = attr_match.get("name").text.decode("utf-8") + value_node = attr_match.get("value") + attrs[name] = ( + value_node.text.decode("utf-8") if value_node is not None else None + ) return attrs From b0d603ee49984e30e304dd1173ff51ea6ef04284 Mon Sep 17 00:00:00 2001 From: Anton Pozharskiy Date: Sat, 27 Jul 2024 10:57:10 +0200 Subject: [PATCH 12/45] everything but events working --- sphinxcontrib/mat_tree_sitter_parser.py | 48 ++++++++++++++----------- 1 file changed, 28 insertions(+), 20 deletions(-) diff --git a/sphinxcontrib/mat_tree_sitter_parser.py b/sphinxcontrib/mat_tree_sitter_parser.py index 98bdfa1..b277c8c 100644 --- a/sphinxcontrib/mat_tree_sitter_parser.py +++ b/sphinxcontrib/mat_tree_sitter_parser.py @@ -2,12 +2,11 @@ from tree_sitter import Language, Parser import re -# rpath = "../../../syscop/software/nosnoc/+nosnoc/Options.m" +rpath = "../../../syscop/software/nosnoc/+nosnoc/Options.m" +# rpath = "/home/anton/tools/matlabdomain/tests/test_data/ClassTesting.m" ML_LANG = Language(tsml.language()) -rpath = "/home/anton/tools/matlabdomain/tests/test_data/ClassTesting.m" - # QUERIES q_classdef = ML_LANG.query( """(class_definition @@ -155,13 +154,16 @@ ) -re_percent_remove = re.compile(r"^[ \t]*%", flags=re.M) - +re_percent_remove = re.compile(r"^[ \t]*% ?", flags=re.M) +re_assign_remove = re.compile(r"^=[ \t]*") def process_text_into_docstring(text): docstring = text.decode("utf-8") return re.sub(re_percent_remove, "", docstring) +def process_default(text): + default = text.decode("utf-8") + return re.sub(re_assign_remove, "", default) class MatFunctionParser: def __init__(self, fun_node): @@ -180,20 +182,29 @@ def __init__(self, fun_node): # Get parameters self.params = {} param_nodes = fun_match.get("params") - if output_nodes is not None: + if param_nodes is not None: params = [param.text.decode("utf-8") for param in param_nodes] for param in params: self.params[param] = {} # parse out info from argument blocks argblock_nodes = fun_match.get("argblocks") - for argblock_node in argblock_nodes: - self._parse_argument_section(argblock_node) + if argblock_nodes is not None: + for argblock_node in argblock_nodes: + self._parse_argument_section(argblock_node) - # - import pdb + # get docstring + docstring_node = fun_match.get("docstring") + docstring = None + if docstring_node is not None: + prev_sib = docstring_node.prev_named_sibling + if docstring_node.start_point.row - prev_sib.end_point.row <= 1: + docstring = process_text_into_docstring(docstring_node.text) - pdb.set_trace() + if not docstring: + docstring = None + self.docstring = docstring + def _parse_argument_section(self, argblock_node): _, argblock_match = q_argblock.matches(argblock_node)[0] @@ -230,7 +241,7 @@ def _parse_argument_section(self, argblock_node): # extract default default_node = arg_match.get("default") default = ( - default_node.text.decode("utf-8") if default_node is not None else None + process_default(default_node.text) if default_node is not None else None ) # extract inline or following docstring if there is no semicolon @@ -304,8 +315,8 @@ def _parse_argument_section(self, argblock_node): if ds: docstring = ds # After all that if our docstring is empty then we have none - if docstring.strip() == "": - docstring == None + if not docstring.strip(): + docstring = None # Here we trust that the person is giving us valid matlab. if "Output" in attrs.keys(): @@ -426,7 +437,7 @@ def _parse_property_section(self, props_match): # extract default default_node = prop_match.get("default") default = ( - default_node.text.decode("utf-8") if default_node is not None else None + process_default(default_node.text) if default_node is not None else None ) # extract inline or following docstring if there is no semicolon @@ -500,8 +511,8 @@ def _parse_property_section(self, props_match): if ds: docstring = ds # After all that if our docstring is empty then we have none - if docstring.strip() == "": - docstring == None + if not docstring.strip(): + docstring = None self.properties[name] = { "attrs": attrs, @@ -592,6 +603,3 @@ def _parse_attributes(self, attrs_nodes): tree = parser.parse(data) class_parser = MatClassParser(tree) - import pdb - - pdb.set_trace() From f12b0a47588bc82e261efcccae03ef06464f7333 Mon Sep 17 00:00:00 2001 From: Anton Pozharskiy Date: Sat, 27 Jul 2024 11:39:32 +0200 Subject: [PATCH 13/45] working events --- sphinxcontrib/mat_tree_sitter_parser.py | 69 ++++++++++++++++++++++--- 1 file changed, 61 insertions(+), 8 deletions(-) diff --git a/sphinxcontrib/mat_tree_sitter_parser.py b/sphinxcontrib/mat_tree_sitter_parser.py index b277c8c..6e42dbb 100644 --- a/sphinxcontrib/mat_tree_sitter_parser.py +++ b/sphinxcontrib/mat_tree_sitter_parser.py @@ -2,8 +2,8 @@ from tree_sitter import Language, Parser import re -rpath = "../../../syscop/software/nosnoc/+nosnoc/Options.m" -# rpath = "/home/anton/tools/matlabdomain/tests/test_data/ClassTesting.m" +#rpath = "../../../syscop/software/nosnoc/+nosnoc/Options.m" +rpath = "/home/anton/tools/matlabdomain/tests/test_data/ClassTesting.m" ML_LANG = Language(tsml.language()) @@ -38,7 +38,7 @@ (attributes [(attribute) @attrs _]+ )? - [(property) @properties _]* + [(property) @properties _]+ ) @prop_block """ ) @@ -48,22 +48,24 @@ (attributes [(attribute) @attrs _]+ )? - [(function_definition) @methods _]* + [(function_definition) @methods _]+ ) @meth_block """ ) q_enumerations = ML_LANG.query( """(enumeration - [(enum) @enums _]* + [(enum) @enums _]+ ) @enum_block """ ) q_events = ML_LANG.query( """(events - (attributes)? @attrs - (identifier)* @events + (attributes + [(attribute) @attrs _]+ + )? + (identifier)+ @events ) @event_block """ ) @@ -360,6 +362,7 @@ def __init__(self, tree): self.properties = {} self.methods = {} self.enumerations = {} + self.events = {} self.tree = tree @@ -392,7 +395,7 @@ def __init__(self, tree): prop_matches = q_properties.matches(self.cls) method_matches = q_methods.matches(self.cls) enum_matches = q_enumerations.matches(self.cls) - events_matches = q_events.matches(self.cls) + event_matches = q_events.matches(self.cls) for _, prop_match in prop_matches: self._parse_property_section(prop_match) @@ -400,6 +403,8 @@ def __init__(self, tree): self._parse_enum_section(enum_match) for _, method_match in method_matches: self._parse_method_section(method_match) + for _, event_match in event_matches: + self._parse_event_section(event_match) import pdb pdb.set_trace() @@ -582,6 +587,54 @@ def _parse_enum_section(self, enums_match): self.enumerations[name] = {"args": args, "docstring": docstring} + def _parse_event_section(self, events_match): + attrs_nodes = events_match.get("attrs") + attrs = self._parse_attributes(attrs_nodes) + events = events_match.get("events") + for event in events: + name = event.text.decode("utf-8") + + docstring = "" + # look forward for docstring + next_node = event.next_named_sibling + if next_node is not None and next_node.type == "comment": + if next_node.start_point.row == event.end_point.row: + # if the docstring is on the same line as the end of the definition only take the inline part + docstring = process_text_into_docstring(next_node.text) + docstring = docstring.split("\n")[0] + elif next_node.start_point.row - event.end_point.row <= 1: + # Otherwise take the whole docstring + docstring = process_text_into_docstring(next_node.text) + + # override docstring with prior if exists + prev_node = event.prev_named_sibling + if prev_node is None: + # Nothing we can do, no previous comment + pass + elif prev_node.type == "comment": + # We have a previous comment if it ends on the previous + # line then we set the docstring. We also need to check + # if the first line of the comment is the same as a + # previous event. + if event.start_point.row - prev_node.end_point.row <= 1: + ds = process_text_into_docstring(prev_node.text) + prev_event = prev_node.prev_named_sibling + if prev_event is not None and prev_event.type == "identifier": + if prev_node.start_point.row == prev_event.end_point.row: + ds = "\n".join(ds.split("\n")[1:]) + if ds: + docstring = ds + else: + if event.start_point.row - prev_node.end_point.row <= 1: + docstring = process_text_into_docstring(prev_node.text) + # After all that if our docstring is empty then we have none + if docstring.strip() == "": + docstring == None + + self.events[name] = {"attrs": attrs, "docstring": docstring} + + import pdb; pdb.set_trace() + def _parse_attributes(self, attrs_nodes): attrs = {} if attrs_nodes is not None: From 522e69016b9fb5355c004b886a57820985ca89cc Mon Sep 17 00:00:00 2001 From: Anton Pozharskiy Date: Sat, 27 Jul 2024 11:45:29 +0200 Subject: [PATCH 14/45] exit early if query returns for block with no elements --- sphinxcontrib/mat_tree_sitter_parser.py | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/sphinxcontrib/mat_tree_sitter_parser.py b/sphinxcontrib/mat_tree_sitter_parser.py index 6e42dbb..dc18a98 100644 --- a/sphinxcontrib/mat_tree_sitter_parser.py +++ b/sphinxcontrib/mat_tree_sitter_parser.py @@ -410,12 +410,12 @@ def __init__(self, tree): pdb.set_trace() def _parse_property_section(self, props_match): + properties = props_match.get("properties") + if properties is None: + return # extract property section attributes attrs_nodes = props_match.get("attrs") attrs = self._parse_attributes(attrs_nodes) - - properties = props_match.get("properties") - for prop in properties: # match property to extract details _, prop_match = q_property.matches(prop)[0] @@ -529,9 +529,11 @@ def _parse_property_section(self, props_match): } def _parse_method_section(self, methods_match): + methods = methods_match.get("methods") + if methods is None: + return attrs_nodes = methods_match.get("attrs") attrs = self._parse_attributes(attrs_nodes) - methods = methods_match.get("methods") for method in methods: parsed_function = MatFunctionParser(method) self.methods[parsed_function.name] = parsed_function @@ -539,6 +541,8 @@ def _parse_method_section(self, methods_match): def _parse_enum_section(self, enums_match): enums = enums_match.get("enums") + if enums is None: + return for enum in enums: _, enum_match = q_enum.matches(enum)[0] name = enum_match.get("name").text.decode("utf-8") @@ -591,6 +595,8 @@ def _parse_event_section(self, events_match): attrs_nodes = events_match.get("attrs") attrs = self._parse_attributes(attrs_nodes) events = events_match.get("events") + if events is None: + return for event in events: name = event.text.decode("utf-8") From 1c07f161de9d1135405cb16a296333f04af4aecd Mon Sep 17 00:00:00 2001 From: Anton Pozharskiy Date: Sat, 27 Jul 2024 16:29:22 +0200 Subject: [PATCH 15/45] integrating tree-sitter parser into mat_types --- sphinxcontrib/mat_tree_sitter_parser.py | 15 +++--- sphinxcontrib/mat_types.py | 61 ++++++++++--------------- 2 files changed, 30 insertions(+), 46 deletions(-) diff --git a/sphinxcontrib/mat_tree_sitter_parser.py b/sphinxcontrib/mat_tree_sitter_parser.py index dc18a98..a75eb53 100644 --- a/sphinxcontrib/mat_tree_sitter_parser.py +++ b/sphinxcontrib/mat_tree_sitter_parser.py @@ -168,9 +168,9 @@ def process_default(text): return re.sub(re_assign_remove, "", default) class MatFunctionParser: - def __init__(self, fun_node): + def __init__(self, root_node): """Parse Function definition""" - _, fun_match = q_fun.matches(fun_node)[0] + _, fun_match = q_fun.matches(root_node)[0] self.name = fun_match.get("name").text.decode("utf-8") # Get outputs (possibly more than one) @@ -353,7 +353,7 @@ def _parse_attributes(self, attrs_nodes): class MatClassParser: - def __init__(self, tree): + def __init__(self, root_node): # DATA self.name = "" self.supers = [] @@ -364,10 +364,10 @@ def __init__(self, tree): self.enumerations = {} self.events = {} - self.tree = tree + self.root_node = root_node # Parse class basics - class_matches = q_classdef.matches(tree.root_node) + class_matches = q_classdef.matches(root_node) _, class_match = class_matches[0] self.cls = class_match.get("class") self.name = class_match.get("name") @@ -405,9 +405,6 @@ def __init__(self, tree): self._parse_method_section(method_match) for _, event_match in event_matches: self._parse_event_section(event_match) - import pdb - - pdb.set_trace() def _parse_property_section(self, props_match): properties = props_match.get("properties") @@ -661,4 +658,4 @@ def _parse_attributes(self, attrs_nodes): data = f.read() tree = parser.parse(data) - class_parser = MatClassParser(tree) + class_parser = MatClassParser(tree.root_node) diff --git a/sphinxcontrib/mat_types.py b/sphinxcontrib/mat_types.py index cd7acb9..824c0b5 100644 --- a/sphinxcontrib/mat_types.py +++ b/sphinxcontrib/mat_types.py @@ -17,8 +17,9 @@ from zipfile import ZipFile import xml.etree.ElementTree as ET import sphinxcontrib.mat_parser as mat_parser -from sphinxcontrib.mat_textmate_parser import MatClassParser, MatFunctionParser -from textmate_grammar.parsers.matlab import MatlabParser +from sphinxcontrib.mat_tree_sitter_parser import MatClassParser, MatFunctionParser, ML_LANG +import tree_sitter_matlab as tsml +from tree_sitter import Language, Parser import logging from pathlib import Path import cProfile @@ -501,62 +502,48 @@ def parse_mfile(mfile, name, path, encoding=None): # read mfile code if encoding is None: encoding = "utf-8" - with open(mfile, "r", encoding=encoding, errors="replace") as code_f: - code = code_f.read().replace("\r\n", "\n") + with open(mfile, "rb") as code_f: + code = code_f.read() full_code = code - # quiet the textmate grammar logger and parse the file - logging.getLogger("textmate_grammar").setLevel(logging.ERROR) - parser = MatlabParser() - toks = parser.parse_file(mfile) + # parse the file + parser = Parser(ML_LANG) + tree = parser.parse(code) modname = path.replace(os.sep, ".") # module name # assume that functions and classes always start with a keyword - def isFunction(token): - comments_and_functions = [ - "comment.block.percentage.matlab", - "comment.line.percentage.matlab", - "meta.function.matlab", - ] - return all( - [(child.token in comments_and_functions) for child in token.children] - ) + def isFunction(tree): + q_is_function = ML_LANG.query(r"""(source_file [(comment) "\n"]* (function_definition))""") + matches = q_is_function.matches(tree.root_node) + if matches: + return True + else: + return False - def isClass(token): - tok_gen = token.find(tokens="meta.class.matlab", depth=1) - try: - tok, _ = next(tok_gen) + def isClass(tree): + q_is_class = ML_LANG.query("(class_definition)") + matches = q_is_class.matches(tree.root_node) + if matches: return True - except StopIteration: + else: return False - if isClass(toks): + if isClass(tree): logger.debug( "[sphinxcontrib-matlabdomain] parsing classdef %s from %s.", name, modname, ) - return MatClass(name, modname, toks) - elif isFunction(toks): + return MatClass(name, modname, tree.root_node) + elif isFunction(tree): logger.debug( "[sphinxcontrib-matlabdomain] parsing function %s from %s.", name, modname, ) - fun_tok_gen = toks.find(tokens="meta.function.matlab") - parsed_function = None - try: - fun_tok, _ = next(fun_tok_gen) - parsed_function = MatFunctionParser(fun_tok) - except StopIteration: - logger.warning( - "[sphinxcontrib-matlabdomain] Parsing failed in %s.%s. No function found.", - modname, - name, - ) - return MatFunction(name, modname, toks) + return MatFunction(name, modname, tree.root_node) else: pass # it's a script file retoken with header comment From 9f3297a5b2efe69bdbfdc64a797bf79d0ecdd45b Mon Sep 17 00:00:00 2001 From: Anton Pozharskiy Date: Sat, 27 Jul 2024 17:14:13 +0200 Subject: [PATCH 16/45] fixing default value parsing --- sphinxcontrib/mat_tree_sitter_parser.py | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/sphinxcontrib/mat_tree_sitter_parser.py b/sphinxcontrib/mat_tree_sitter_parser.py index a75eb53..8d7f8e8 100644 --- a/sphinxcontrib/mat_tree_sitter_parser.py +++ b/sphinxcontrib/mat_tree_sitter_parser.py @@ -2,8 +2,8 @@ from tree_sitter import Language, Parser import re -#rpath = "../../../syscop/software/nosnoc/+nosnoc/Options.m" -rpath = "/home/anton/tools/matlabdomain/tests/test_data/ClassTesting.m" +rpath = "../../../syscop/software/nosnoc/+nosnoc/Options.m" +# rpath = "/home/anton/tools/matlabdomain/tests/test_data/ClassTesting.m" ML_LANG = Language(tsml.language()) @@ -80,7 +80,7 @@ (validation_functions [[(identifier) (function_call)] @validation_functions _]+ )? - (default_value (number))? @default + (default_value)? @default (comment)? @docstring ) """ @@ -159,14 +159,17 @@ re_percent_remove = re.compile(r"^[ \t]*% ?", flags=re.M) re_assign_remove = re.compile(r"^=[ \t]*") + def process_text_into_docstring(text): docstring = text.decode("utf-8") return re.sub(re_percent_remove, "", docstring) + def process_default(text): default = text.decode("utf-8") return re.sub(re_assign_remove, "", default) + class MatFunctionParser: def __init__(self, root_node): """Parse Function definition""" @@ -206,7 +209,6 @@ def __init__(self, root_node): if not docstring: docstring = None self.docstring = docstring - def _parse_argument_section(self, argblock_node): _, argblock_match = q_argblock.matches(argblock_node)[0] @@ -596,7 +598,7 @@ def _parse_event_section(self, events_match): return for event in events: name = event.text.decode("utf-8") - + docstring = "" # look forward for docstring next_node = event.next_named_sibling @@ -635,9 +637,11 @@ def _parse_event_section(self, events_match): docstring == None self.events[name] = {"attrs": attrs, "docstring": docstring} - - import pdb; pdb.set_trace() - + + import pdb + + pdb.set_trace() + def _parse_attributes(self, attrs_nodes): attrs = {} if attrs_nodes is not None: From c6d8f4a132fa35572124c07478f17ad18fc83e45 Mon Sep 17 00:00:00 2001 From: Anton Pozharskiy Date: Wed, 14 Aug 2024 10:44:54 +0200 Subject: [PATCH 17/45] some test fixes and requires tree-sitter --- setup.py | 7 +- sphinxcontrib/mat_tree_sitter_parser.py | 44 ++-- sphinxcontrib/mat_types.py | 12 +- tests/test_parse_mfile.py | 262 ++++++++++++------------ 4 files changed, 168 insertions(+), 157 deletions(-) diff --git a/setup.py b/setup.py index f568894..98fba9a 100644 --- a/setup.py +++ b/setup.py @@ -5,7 +5,12 @@ with open("README.rst", "r") as f_readme: long_desc = f_readme.read() -requires = ["Sphinx>=4.0.0", "Pygments>=2.0.1"] +requires = [ + "Sphinx>=4.0.0", + "Pygments>=2.0.1", + "tree-sitter-matlab>=1.0.1", + "tree-sitter-python>=0.21.0", +] setup( name="sphinxcontrib-matlabdomain", diff --git a/sphinxcontrib/mat_tree_sitter_parser.py b/sphinxcontrib/mat_tree_sitter_parser.py index 8d7f8e8..a71ebb5 100644 --- a/sphinxcontrib/mat_tree_sitter_parser.py +++ b/sphinxcontrib/mat_tree_sitter_parser.py @@ -2,8 +2,11 @@ from tree_sitter import Language, Parser import re -rpath = "../../../syscop/software/nosnoc/+nosnoc/Options.m" -# rpath = "/home/anton/tools/matlabdomain/tests/test_data/ClassTesting.m" +# rpath = "../../../syscop/software/nosnoc/+nosnoc/Options.m" +rpath = ( + "/home/anton/tools/matlabdomain/tests/test_data/submodule/f_ellipsis_empty_output.m" +) +# rpath = "/home/anton/tools/matlabdomain/tests/test_data/submodule/f_empty_output.m" ML_LANG = Language(tsml.language()) @@ -97,7 +100,7 @@ q_fun = ML_LANG.query( """(function_definition - . + _* (function_output [ (identifier) @outputs @@ -106,13 +109,13 @@ ) ] )? - . + _* name: (identifier) @name - . + _* (function_arguments [(identifier) @params _]* )? - . + _* [(arguments_statement) @argblocks _]* . (comment)? @docstring @@ -185,12 +188,12 @@ def __init__(self, root_node): self.outputs[output] = {} # Get parameters - self.params = {} - param_nodes = fun_match.get("params") - if param_nodes is not None: - params = [param.text.decode("utf-8") for param in param_nodes] - for param in params: - self.params[param] = {} + self.args = {} + arg_nodes = fun_match.get("params") + if arg_nodes is not None: + args = [arg.text.decode("utf-8") for arg in arg_nodes] + for arg in args: + self.args[arg] = {} # parse out info from argument blocks argblock_nodes = fun_match.get("argblocks") @@ -321,12 +324,14 @@ def _parse_argument_section(self, argblock_node): # After all that if our docstring is empty then we have none if not docstring.strip(): docstring = None + else: + pass # docstring = docstring.rstrip() # Here we trust that the person is giving us valid matlab. if "Output" in attrs.keys(): arg_loc = self.outputs else: - arg_loc = self.params + arg_loc = self.args if len(name) == 1: arg_loc[name[0]] = { "attrs": attrs, @@ -517,6 +522,8 @@ def _parse_property_section(self, props_match): # After all that if our docstring is empty then we have none if not docstring.strip(): docstring = None + else: + pass # docstring = docstring.rstrip() self.properties[name] = { "attrs": attrs, @@ -587,6 +594,8 @@ def _parse_enum_section(self, enums_match): # After all that if our docstring is empty then we have none if docstring.strip() == "": docstring == None + else: + pass # docstring = docstring.rstrip() self.enumerations[name] = {"args": args, "docstring": docstring} @@ -635,13 +644,11 @@ def _parse_event_section(self, events_match): # After all that if our docstring is empty then we have none if docstring.strip() == "": docstring == None + else: + pass # docstring = docstring.rstrip() self.events[name] = {"attrs": attrs, "docstring": docstring} - import pdb - - pdb.set_trace() - def _parse_attributes(self, attrs_nodes): attrs = {} if attrs_nodes is not None: @@ -662,4 +669,5 @@ def _parse_attributes(self, attrs_nodes): data = f.read() tree = parser.parse(data) - class_parser = MatClassParser(tree.root_node) + # class_parser = MatClassParser(tree.root_node) + fun_parser = MatFunctionParser(tree.root_node) diff --git a/sphinxcontrib/mat_types.py b/sphinxcontrib/mat_types.py index 824c0b5..0a12cf1 100644 --- a/sphinxcontrib/mat_types.py +++ b/sphinxcontrib/mat_types.py @@ -17,7 +17,11 @@ from zipfile import ZipFile import xml.etree.ElementTree as ET import sphinxcontrib.mat_parser as mat_parser -from sphinxcontrib.mat_tree_sitter_parser import MatClassParser, MatFunctionParser, ML_LANG +from sphinxcontrib.mat_tree_sitter_parser import ( + MatClassParser, + MatFunctionParser, + ML_LANG, +) import tree_sitter_matlab as tsml from tree_sitter import Language, Parser import logging @@ -515,7 +519,9 @@ def parse_mfile(mfile, name, path, encoding=None): # assume that functions and classes always start with a keyword def isFunction(tree): - q_is_function = ML_LANG.query(r"""(source_file [(comment) "\n"]* (function_definition))""") + q_is_function = ML_LANG.query( + r"""(source_file [(comment) "\n"]* (function_definition))""" + ) matches = q_is_function.matches(tree.root_node) if matches: return True @@ -874,7 +880,7 @@ def __init__(self, name, modname, tokens): #: output args self.retv = parsed_function.outputs #: input args - self.args = parsed_function.params + self.args = parsed_function.args #: remaining tokens after main function is parsed self.rem_tks = None diff --git a/tests/test_parse_mfile.py b/tests/test_parse_mfile.py index 24b4c6a..b901409 100644 --- a/tests/test_parse_mfile.py +++ b/tests/test_parse_mfile.py @@ -24,15 +24,15 @@ def test_ClassExample(): assert obj.name == "ClassExample" assert ( obj.docstring - == " test class methods\n\n :param a: the input to :class:`ClassExample`\n" + == " test class methods\n\n:param a: the input to :class:`ClassExample`" ) mymethod = obj.methods["mymethod"] assert mymethod.name == "mymethod" - assert mymethod.retv == ["c"] - assert mymethod.args == ["obj", "b"] + assert list(mymethod.retv.keys()) == ["c"] + assert list(mymethod.args.keys()) == ["obj", "b"] assert ( mymethod.docstring - == " a method in :class:`ClassExample`\n\n :param b: an input to :meth:`mymethod`\n" + == "a method in :class:`ClassExample`\n\n:param b: an input to :meth:`mymethod`" ) @@ -40,71 +40,71 @@ def test_comment_after_docstring(): mfile = os.path.join(TESTDATA_SUB, "f_comment_after_docstring.m") obj = mat_types.MatObject.parse_mfile(mfile, "f_comment_after_docstring", "") assert obj.name == "f_comment_after_docstring" - assert obj.retv == ["output"] - assert obj.args == ["input"] - assert obj.docstring == " Tests a function with comments after docstring\n" + assert list(obj.retv.keys()) == ["output"] + assert list(obj.args.keys()) == ["input"] + assert obj.docstring == "Tests a function with comments after docstring" def test_docstring_no_newline(): mfile = os.path.join(TESTDATA_SUB, "f_docstring_no_newline.m") obj = mat_types.MatObject.parse_mfile(mfile, "f_docstring_no_newline", "") assert obj.name == "f_docstring_no_newline" - assert obj.retv == ["y"] - assert obj.args is None - assert obj.docstring == " Test a function without a newline after docstring\n" + assert list(obj.retv.keys()) == ["y"] + assert list(obj.args.keys()) == [] + assert obj.docstring == "Test a function without a newline after docstring" def test_ellipsis_after_equals(): mfile = os.path.join(TESTDATA_SUB, "f_ellipsis_after_equals.m") obj = mat_types.MatObject.parse_mfile(mfile, "f_ellipsis_after_equals", "") assert obj.name == "f_ellipsis_after_equals" - assert obj.retv == ["output"] - assert obj.args == ["arg"] - assert obj.docstring == " Tests a function with ellipsis after equals\n" + assert list(obj.retv.keys()) == ["output"] + assert list(obj.args.keys()) == ["arg"] + assert obj.docstring == "Tests a function with ellipsis after equals" def test_ellipsis_empty_output(): mfile = os.path.join(TESTDATA_SUB, "f_ellipsis_empty_output.m") obj = mat_types.MatObject.parse_mfile(mfile, "f_ellipsis_empty_output", "") assert obj.name == "f_ellipsis_empty_output" - assert obj.retv is None - assert obj.args == ["arg"] - assert obj.docstring == " Tests a function with ellipsis in the output\n" + assert list(obj.retv.keys()) == [] + assert list(obj.args.keys()) == ["arg"] + assert obj.docstring == "Tests a function with ellipsis in the output" def test_ellipsis_in_comment(): mfile = os.path.join(TESTDATA_SUB, "f_ellipsis_in_comment.m") obj = mat_types.MatObject.parse_mfile(mfile, "f_ellipsis_in_comment", "") assert obj.name == "f_ellipsis_in_comment" - assert obj.retv == ["y"] - assert obj.args == ["x"] - assert obj.docstring == " Tests a function with ellipsis in the comment ...\n" + assert list(obj.retv.keys()) == ["y"] + assert list(obj.args.keys()) == ["x"] + assert obj.docstring == "Tests a function with ellipsis in the comment ..." def test_ellipsis_in_output(): mfile = os.path.join(TESTDATA_SUB, "f_ellipsis_in_output.m") obj = mat_types.MatObject.parse_mfile(mfile, "f_ellipsis_in_output", "") assert obj.name == "f_ellipsis_in_output" - assert obj.retv == ["output", "with", "ellipsis"] - assert obj.args == ["arg"] - assert obj.docstring == " Tests a function with ellipsis in the output\n" + assert list(obj.retv.keys()) == ["output", "with", "ellipsis"] + assert list(obj.args.keys()) == ["arg"] + assert obj.docstring == "Tests a function with ellipsis in the output" def test_ellipsis_in_output_multiple(): mfile = os.path.join(TESTDATA_SUB, "f_ellipsis_in_output_multiple.m") obj = mat_types.MatObject.parse_mfile(mfile, "f_ellipsis_in_output_multiple", "") assert obj.name == "f_ellipsis_in_output_multiple" - assert obj.retv == ["output", "with", "ellipsis"] - assert obj.args == ["arg"] - assert obj.docstring == " Tests a function with multiple ellipsis in the output\n" + assert list(obj.retv.keys()) == ["output", "with", "ellipsis"] + assert list(obj.args.keys()) == ["arg"] + assert obj.docstring == "Tests a function with multiple ellipsis in the output" def test_no_docstring(): mfile = os.path.join(TESTDATA_SUB, "f_no_docstring.m") obj = mat_types.MatObject.parse_mfile(mfile, "f_no_docstring", "") assert obj.name == "f_no_docstring" - assert obj.retv == ["y"] - assert obj.args is None + assert list(obj.retv.keys()) == ["y"] + assert list(obj.args.keys()) == [] assert obj.docstring == "" @@ -112,36 +112,36 @@ def test_no_output(): mfile = os.path.join(TESTDATA_SUB, "f_no_output.m") obj = mat_types.MatObject.parse_mfile(mfile, "f_no_output", "") assert obj.name == "f_no_output" - assert obj.retv is None - assert obj.args == ["arg"] - assert obj.docstring == " A function with no outputs\n" + assert list(obj.retv.keys()) == [] + assert list(obj.args.keys()) == ["arg"] + assert obj.docstring == "A function with no outputs" def test_no_input_parentheses(): mfile = os.path.join(TESTDATA_SUB, "f_no_input_parentheses.m") obj = mat_types.MatObject.parse_mfile(mfile, "f_no_input_parentheses", "") assert obj.name == "f_no_input_parentheses" - assert obj.retv == ["y"] - assert obj.args is None - assert obj.docstring == " Tests a function without parentheses in input\n" + assert list(obj.retv.keys()) == ["y"] + assert list(obj.args.keys()) == [] + assert obj.docstring == "Tests a function without parentheses in input" def test_no_spaces(): mfile = os.path.join(TESTDATA_SUB, "f_no_spaces.m") obj = mat_types.MatObject.parse_mfile(mfile, "f_no_spaces", "") assert obj.name == "f_no_spaces" - assert obj.retv == ["a", "b", "c"] - assert obj.args == ["x", "y", "z"] - assert obj.docstring == " Tests a function with no spaces in function signature\n" + assert list(obj.retv.keys()) == ["a", "b", "c"] + assert list(obj.args.keys()) == ["x", "y", "z"] + assert obj.docstring == "Tests a function with no spaces in function signature" def test_with_tabs(): mfile = os.path.join(TESTDATA_SUB, "f_with_tabs.m") obj = mat_types.MatObject.parse_mfile(mfile, "f_with_tabs", "") assert obj.name == "f_with_tabs" - assert obj.retv == ["y"] - assert obj.args == ["x"] - assert obj.docstring == " A function with tabs\n" + assert list(obj.retv.keys()) == ["y"] + assert list(obj.args.keys()) == ["x"] + assert obj.docstring == "A function with tabs" def test_ClassWithEndOfLineComment(): @@ -178,15 +178,15 @@ def test_ClassWithFunctionArguments(): assert obj.name == "ClassWithFunctionArguments" assert ( obj.docstring - == " test class methods with function arguments\n\n :param a: the input to :class:`ClassWithFunctionArguments`\n" + == "test class methods with function arguments\n\n:param a: the input to :class:`ClassWithFunctionArguments`" ) mymethod = obj.methods["mymethod"] assert mymethod.name == "mymethod" - assert mymethod.retv == ["c"] - assert mymethod.args == ["obj", "b"] + assert list(mymethod.retv.keys()) == ["c"] + assert mymethod.args.keys() == ["obj", "b"] assert ( mymethod.docstring - == " a method in :class:`ClassWithFunctionArguments`\n\n :param b: an input to :meth:`mymethod`\n" + == "a method in :class:`ClassWithFunctionArguments`\n\n:param b: an input to :meth:`mymethod`" ) @@ -206,7 +206,7 @@ def test_no_input_no_output_no_parentheses(): assert obj.name == "f_no_input_no_output_no_parentheses" assert ( obj.docstring - == " Tests a function without parentheses in input and no return value\n" + == "Tests a function without parentheses in input and no return value" ) @@ -218,26 +218,26 @@ def test_no_input_no_parentheses_no_docstring(): mfile, "f_no_input_no_parentheses_no_docstring", "test_data" ) assert obj.name == "f_no_input_no_parentheses_no_docstring" - assert obj.retv == ["result"] - assert obj.args is None + assert list(obj.retv.keys()) == ["result"] + assert list(obj.args.keys()) == [] def test_ClassWithCommentHeader(): mfile = os.path.join(DIRNAME, "test_data", "ClassWithCommentHeader.m") obj = mat_types.MatObject.parse_mfile(mfile, "ClassWithCommentHeader", "test_data") assert obj.name == "ClassWithCommentHeader" - assert obj.docstring == " A class with a comment header on the top.\n" + assert obj.docstring == "A class with a comment header on the top." method_get_tform = obj.methods["getTransformation"] assert method_get_tform.name == "getTransformation" - assert method_get_tform.retv == ["tform"] - assert method_get_tform.args == ["obj"] + assert list(method_get_tform.retv.keys()) == ["tform"] + assert list(method_get_tform.args.keys()) == ["obj"] def test_with_comment_header(): mfile = os.path.join(DIRNAME, "test_data", "f_with_comment_header.m") obj = mat_types.MatObject.parse_mfile(mfile, "f_with_comment_header", "test_data") assert obj.name == "f_with_comment_header" - assert obj.docstring == " A simple function with a comment header on the top.\n" + assert obj.docstring == "A simple function with a comment header on the top." def test_script_with_comment_header(): @@ -247,7 +247,7 @@ def test_script_with_comment_header(): ) assert ( obj.docstring - == """ This is a Comment Header + == """This is a Comment Header Copyright (C) , by Some descriptions ... @@ -268,7 +268,7 @@ def test_script_with_comment_header_2(): ) assert ( obj.docstring - == """ This is a Comment Header + == """This is a Comment Header Copyright (C) , by Some descriptions ... @@ -289,7 +289,7 @@ def test_script_with_comment_header_3(): ) assert ( obj.docstring - == """ This is a Comment Header with empty lines above + == """This is a Comment Header with empty lines above and many line comments. """ @@ -303,7 +303,7 @@ def test_script_with_comment_header_4(): ) assert ( obj.docstring - == """ This is a Comment Header with a single instruction above + == """This is a Comment Header with a single instruction above and many line comments. """ @@ -356,14 +356,14 @@ def test_ClassWithMethodAttributes(): mfile, "ClassWithMethodAttributes", "test_data" ) assert obj.name == "ClassWithMethodAttributes" - assert obj.docstring == " Class with different method attributes\n" + assert obj.docstring == "Class with different method attributes" assert obj.methods["testNormal"].attrs == {} assert obj.methods["testPublic"].attrs == {"Access": "public"} assert obj.methods["testProtected"].attrs == {"Access": "protected"} assert obj.methods["testPrivate1"].attrs == {"Access": "private"} assert obj.methods["testPrivate2"].attrs == {"Access": "private"} - assert obj.methods["testHidden"].attrs == {"Hidden": True} - assert obj.methods["testStatic"].attrs == {"Static": True} + assert obj.methods["testHidden"].attrs == {"Hidden": None} + assert obj.methods["testStatic"].attrs == {"Static": None} assert obj.methods["testFriend1"].attrs == {"Access": "?OtherClass"} assert obj.methods["testFriend2"].attrs == { "Access": ["?OtherClass", "?pack.OtherClass2"] @@ -376,7 +376,7 @@ def test_ClassWithPropertyAttributes(): mfile, "ClassWithPropertyAttributes", "test_data" ) assert obj.name == "ClassWithPropertyAttributes" - assert obj.docstring == " Class with different property attributes\n" + assert obj.docstring == "Class with different property attributes" assert obj.properties["testNormal"]["attrs"] == {} assert obj.properties["testPublic"]["attrs"] == {"Access": "public"} assert obj.properties["testProtected"]["attrs"] == {"Access": "protected"} @@ -393,29 +393,27 @@ def test_ClassWithPropertyAttributes(): "GetAccess": "private", "SetAccess": "private", } - assert obj.properties["TEST_CONSTANT"]["attrs"] == {"Constant": True} + assert obj.properties["TEST_CONSTANT"]["attrs"] == {"Constant": None} assert obj.properties["TEST_CONSTANT_PROTECTED"]["attrs"] == { "Access": "protected", - "Constant": True, + "Constant": None, } - assert obj.properties["testDependent"]["attrs"] == {"Dependent": True} - assert obj.properties["testHidden"]["attrs"] == {"Hidden": True} + assert obj.properties["testDependent"]["attrs"] == {"Dependent": None} + assert obj.properties["testHidden"]["attrs"] == {"Hidden": None} def test_ClassWithoutIndent(): mfile = os.path.join(DIRNAME, "test_data", "ClassWithoutIndent.m") obj = mat_types.MatObject.parse_mfile(mfile, "ClassWithoutIndent", "test_data") assert obj.name == "ClassWithoutIndent" - assert ( - obj.docstring == " First line is not indented\n Second line line is indented\n" - ) + assert obj.docstring == "First line is not indented\nSecond line line is indented" def test_f_with_utf8(): mfile = os.path.join(DIRNAME, "test_data", "f_with_utf8.m") obj = mat_types.MatObject.parse_mfile(mfile, "f_with_utf8", "test_data") assert obj.name == "f_with_utf8" - assert obj.docstring == " Cambia ubicación de partículas.\n" + assert obj.docstring == "Cambia ubicación de partículas." def test_file_parsing_encoding_can_be_specified(): @@ -424,14 +422,14 @@ def test_file_parsing_encoding_can_be_specified(): mfile, "f_with_latin_1", "test_data", encoding="latin-1" ) assert obj.name == "f_with_latin_1" - assert obj.docstring == " Analyse de la réponse à un créneau\n" + assert obj.docstring == "Analyse de la réponse à un créneau" def test_file_parsing_with_no_encoding_specified(): mfile = os.path.join(DIRNAME, "test_data", "f_with_latin_1.m") obj = mat_types.MatObject.parse_mfile(mfile, "f_with_latin_1", "test_data") assert obj.name == "f_with_latin_1" - assert obj.docstring == " Analyse de la r\ufffdponse \ufffd un cr\ufffdneau\n" + assert obj.docstring == "Analyse de la r\ufffdponse \ufffd un cr\ufffdneau" def test_ClassWithBuiltinOverload(): @@ -440,7 +438,7 @@ def test_ClassWithBuiltinOverload(): mfile, "ClassWithBuiltinOverload", "test_data" ) assert obj.name == "ClassWithBuiltinOverload" - assert obj.docstring == " Class that overloads a builtin\n" + assert obj.docstring == "Class that overloads a builtin" def test_ClassWithBuiltinProperties(): @@ -449,14 +447,14 @@ def test_ClassWithBuiltinProperties(): mfile, "ClassWithBuiltinProperties", "test_data" ) assert obj.name == "ClassWithBuiltinProperties" - assert obj.docstring == " Class with properties that overload a builtin\n" + assert obj.docstring == "Class with properties that overload a builtin" assert set(obj.properties) == set(["omega", "alpha", "gamma", "beta"]) - assert obj.properties["omega"]["docstring"] == " a property" - assert obj.properties["alpha"]["docstring"] == (" a property overloading a builtin") + assert obj.properties["omega"]["docstring"] == "a property" + assert obj.properties["alpha"]["docstring"] == ("a property overloading a builtin") assert obj.properties["gamma"]["docstring"] == ( - " a property overloading a builtin with validation" + "a property overloading a builtin with validation" ) - assert obj.properties["beta"]["docstring"] == (" another overloaded property") + assert obj.properties["beta"]["docstring"] == ("another overloaded property") # Fails when running with other test files. Warnings are already logged. @@ -473,7 +471,7 @@ def test_f_with_name_mismatch(caplog): "sphinx.matlab-domain", WARNING, '[sphinxcontrib-matlabdomain] Unexpected function name: "f_name_with_mismatch".' - ' Expected "f_with_name_mismatch" in module "test_data".', + ' Expected "f_with_name_mismatch"in module "test_data".', ), ] @@ -482,16 +480,16 @@ def test_f_with_dummy_argument(): mfile = os.path.join(DIRNAME, "test_data", "f_with_dummy_argument.m") obj = mat_types.MatObject.parse_mfile(mfile, "f_with_dummy_argument", "") assert obj.name == "f_with_dummy_argument" - assert obj.retv == ["obj"] - assert obj.args == ["~", "name"] - assert obj.docstring == " Could be a callback, where first argument is ignored.\n" + assert list(obj.retv.keys()) == ["obj"] + assert list(obj.args.keys()) == ["~", "name"] + assert obj.docstring == "Could be a callback, where first argument is ignored." def test_f_with_string_ellipsis(): mfile = os.path.join(DIRNAME, "test_data", "f_with_string_ellipsis.m") obj = mat_types.MatObject.parse_mfile(mfile, "f_with_string_ellipsis", "test_data") assert obj.name == "f_with_string_ellipsis" - assert obj.docstring == " A function with a string with ellipsis\n" + assert obj.docstring == "A function with a string with ellipsis" def test_ClassWithFunctionVariable(): @@ -500,7 +498,7 @@ def test_ClassWithFunctionVariable(): mfile, "ClassWithFunctionVariable", "test_data" ) assert obj.name == "ClassWithFunctionVariable" - assert obj.docstring == " This line contains functions!\n" + assert obj.docstring == "This line contains functions!" methods = set(obj.methods.keys()) assert methods == {"ClassWithFunctionVariable", "anotherMethodWithFunctions"} @@ -562,7 +560,7 @@ def test_ClassWithAttributes(): obj = mat_types.MatObject.parse_mfile(mfile, "ClassWithAttributes", "test_data") assert isinstance(obj, mat_types.MatClass) assert obj.name == "ClassWithAttributes" - assert obj.attrs == {"Sealed": True} + assert obj.attrs == {"Sealed": None} # Fails when running with other test files. Warnings are already logged. @@ -608,8 +606,8 @@ def test_f_with_function_variable(): mfile, "f_with_function_variable", "test_data" ) assert obj.name == "f_with_function_variable" - assert obj.retv == ["obj"] - assert obj.args == ["the_functions", "~"] + assert list(obj.retv.keys()) == ["obj"] + assert list(obj.args.keys()) == ["the_functions", "~"] print(obj.docstring) @@ -620,12 +618,7 @@ def test_ClassWithGetterSetter(): assert obj.name == "ClassWithGetterSetter" assert list(obj.methods.keys()) == ["ClassWithGetterSetter"] assert obj.properties == { - "a": { - "docstring": " A nice property", - "attrs": {}, - "default": None, - "specs": "", - } + "a": {"docstring": "A nice property", "attrs": {}, "default": None} } @@ -657,9 +650,9 @@ def test_ClassWithDummyArguments(): assert obj.name == "ClassWithDummyArguments" assert set(obj.methods.keys()) == set(["someMethod1", "someMethod2"]) m1 = obj.methods["someMethod1"] - assert m1.args == ["obj", "argument"] + assert list(m1.args.keys()) == ["obj", "argument"] m2 = obj.methods["someMethod2"] - assert m2.args == ["~", "argument"] + assert list(m2.args.keys()) == ["~", "argument"] def test_ClassFolderClassdef(): @@ -669,9 +662,9 @@ def test_ClassFolderClassdef(): assert obj.name == "ClassFolder" assert set(obj.methods.keys()) == set(["ClassFolder", "method_inside_classdef"]) m1 = obj.methods["ClassFolder"] - assert m1.args == ["p"] + assert list(m1.args.keys()) == ["p"] m2 = obj.methods["method_inside_classdef"] - assert m2.args == ["obj", "a", "b"] + assert list(m2.args.keys()) == ["obj", "a", "b"] def test_ClassWithMethodsWithSpaces(): @@ -683,10 +676,9 @@ def test_ClassWithMethodsWithSpaces(): assert obj.name == "ClassWithMethodsWithSpaces" assert set(obj.methods.keys()) == set(["static_method"]) assert ( - obj.docstring - == " Class with methods that have space after the function name.\n" + obj.docstring == "Class with methods that have space after the function name." ) - assert obj.methods["static_method"].attrs == {"Static": True} + assert obj.methods["static_method"].attrs == {"Static": None} def test_ClassContainingParfor(): @@ -695,7 +687,7 @@ def test_ClassContainingParfor(): assert isinstance(obj, mat_types.MatClass) assert obj.name == "ClassContainingParfor" assert set(obj.methods.keys()) == set(["test"]) - assert obj.docstring == " Parfor is a keyword\n" + assert obj.docstring == "Parfor is a keyword" def test_ClassWithStringEllipsis(): @@ -704,7 +696,7 @@ def test_ClassWithStringEllipsis(): assert isinstance(obj, mat_types.MatClass) assert obj.name == "ClassWithStringEllipsis" assert set(obj.methods.keys()) == set(["test"]) - assert obj.docstring == " Contains ellipsis in string\n" + assert obj.docstring == "Contains ellipsis in string" def test_ClassLongProperty(): @@ -712,13 +704,13 @@ def test_ClassLongProperty(): obj = mat_types.MatObject.parse_mfile(mfile, "ClassLongProperty", "test_data") assert obj.name == "ClassLongProperty" assert ( - obj.docstring == " test class property with long docstring\n\n " - ":param a: the input to :class:`ClassExample`\n" + obj.docstring == "test class property with long docstring\n\n" + ":param a: the input to :class:`ClassExample`" ) - assert obj.properties["a"]["docstring"] == " short description" + assert obj.properties["a"]["docstring"] == "short description" assert ( - obj.properties["b"]["docstring"] == " A property with a long " - "documentation\n This is the second line\n And a third\n" + obj.properties["b"]["docstring"] == "A property with a long " + "documentation\nThis is the second line\nAnd a third" ) assert obj.properties["c"]["docstring"] is None @@ -730,10 +722,10 @@ def test_ClassWithLongPropertyDocstrings(): ) assert obj.name == "ClassWithLongPropertyDocstrings" assert ( - obj.properties["a"]["docstring"] == " This line is deleted\n" - " This line documents another property\n" + obj.properties["a"]["docstring"] == "This line is deleted" + "This line documents another property" ) - assert obj.properties["b"]["docstring"] == " Document this property\n" + assert obj.properties["b"]["docstring"] == "Document this property" def test_ClassWithLongPropertyTrailingEmptyDocstrings(): @@ -745,10 +737,10 @@ def test_ClassWithLongPropertyTrailingEmptyDocstrings(): ) assert obj.name == "ClassWithLongPropertyTrailingEmptyDocstrings" assert ( - obj.properties["a"]["docstring"] == " This line is deleted\n" - " This line documents another property\n" + obj.properties["a"]["docstring"] == "This line is deleted" + "This line documents another property" ) - assert obj.properties["b"]["docstring"] == " Document this property\n" + assert obj.properties["b"]["docstring"] == "Document this property" def test_ClassWithPropertyValidators(): @@ -757,10 +749,10 @@ def test_ClassWithPropertyValidators(): mfile, "ClassWithPropertyValidators", "test_data" ) assert obj.name == "ClassWithPropertyValidators" - assert obj.properties["Location"]["docstring"] == " The location\n" - assert obj.properties["Label"]["docstring"] == " The label\n" - assert obj.properties["State"]["docstring"] == " The state\n" - assert obj.properties["ReportLevel"]["docstring"] == " The report level\n" + assert obj.properties["Location"]["docstring"] == "The location" + assert obj.properties["Label"]["docstring"] == "The label" + assert obj.properties["State"]["docstring"] == "The state" + assert obj.properties["ReportLevel"]["docstring"] == "The report level" def test_ClassWithTrailingCommentAfterBases(): @@ -769,18 +761,18 @@ def test_ClassWithTrailingCommentAfterBases(): mfile, "ClassWithTrailingCommentAfterBases", "test_data" ) assert obj.name == "ClassWithTrailingCommentAfterBases" - assert obj.bases == ["handle", "my.super.Class"] + assert obj.bases == [("handle",), ("my", "super", "Class")] assert ( obj.docstring - == " test class methods\n\n :param a: the input to :class:`ClassWithTrailingCommentAfterBases`\n" + == "test class methods\n\n:param a: the input to :class:`ClassWithTrailingCommentAfterBases`" ) mymethod = obj.methods["mymethod"] assert mymethod.name == "mymethod" - assert mymethod.retv == ["c"] - assert mymethod.args == ["obj", "b"] + assert list(mymethod.retv.keys()) == ["c"] + assert list(mymethod.args.keys()) == ["obj", "b"] assert ( mymethod.docstring - == " a method in :class:`ClassWithTrailingCommentAfterBases`\n\n :param b: an input to :meth:`mymethod`\n" + == "a method in :class:`ClassWithTrailingCommentAfterBases`\n\n:param b: an input to :meth:`mymethod`" ) @@ -790,28 +782,28 @@ def test_ClassWithEllipsisProperties(): mfile, "ClassWithEllipsisProperties", "test_data" ) assert obj.name == "ClassWithEllipsisProperties" - assert obj.bases == ["handle"] - assert obj.docstring == " stuff\n" + assert obj.bases == [("handle",)] + assert obj.docstring == "stuff" assert len(obj.methods) == 0 - assert obj.properties["A"]["docstring"] == " an expression with ellipsis" + assert obj.properties["A"]["docstring"] == "an expression with ellipsis" assert obj.properties["A"]["default"] == "1+2+3+4+5" assert ( obj.properties["B"]["docstring"] - == " a cell array with ellipsis and other array notation" + == "a cell array with ellipsis and other array notation" ) assert obj.properties["B"]["default"].startswith("{'hello','bye';") assert obj.properties["B"]["default"].endswith("}") - assert obj.properties["C"]["docstring"] == " using end inside array" + assert obj.properties["C"]["docstring"] == "using end inside array" assert obj.properties["C"]["default"] == "ClassWithEllipsisProperties.B(2:end,1)" - assert obj.properties["D"]["docstring"] == " String with line continuation" + assert obj.properties["D"]["docstring"] == "String with line continuation" assert obj.properties["D"]["default"] == "'...'" - assert obj.properties["E"]["docstring"] == " The string with spaces" + assert obj.properties["E"]["docstring"] == "The string with spaces" assert obj.properties["E"]["default"] == "'some string with spaces'" # mymethod.docstring -# == " a method in :class:`ClassWithTrailingCommentAfterBases`\n\n :param b: an input to :meth:`mymethod`\n" +# == " a method in :class:`ClassWithTrailingCommentAfterBases`\n\n :param b: an input to :meth:`mymethod`" # ) @@ -833,9 +825,9 @@ def test_ClassWithTrailingSemicolons(): ) assert ( obj.docstring - == " Smoothing like it is performed withing Cxx >v7.0 (until v8.2 at least).\n Uses constant 228p_12k frequency vector:\n" + == "Smoothing like it is performed withing Cxx >v7.0 (until v8.2 at least).\nUses constant 228p_12k frequency vector:" ) - assert obj.bases == ["hgsetget"] + assert obj.bases == [("hgsetget",)] assert list(obj.methods.keys()) == [ "ClassWithTrailingSemicolons", "CxxSmoothing", @@ -863,7 +855,7 @@ def test_ClassWithSeperatedComments(): assert obj.bases == [] assert "prop" in obj.properties prop = obj.properties["prop"] - assert prop["docstring"] == " Another comment\n" + assert prop["docstring"] == "Another comment" def test_ClassWithKeywordsAsFieldnames(): @@ -878,19 +870,19 @@ def test_ClassWithKeywordsAsFieldnames(): assert "c" in obj.properties assert "calculate" in obj.methods meth = obj.methods["calculate"] - assert meth.docstring == " Returns the value of `d`\n" + assert meth.docstring == "Returns the value of `d`" def test_ClassWithNamedAsArguments(): mfile = os.path.join(TESTDATA_ROOT, "arguments.m") obj = mat_types.MatObject.parse_mfile(mfile, "arguments", "test_data") assert obj.name == "arguments" - assert obj.bases == ["handle", "matlab.mixin.Copyable"] + assert obj.bases == [("handle",), ("matlab", "mixin", "Copyable")] assert "value" in obj.properties meth = obj.methods["arguments"] - assert meth.docstring == " Constructor for arguments\n" + assert meth.docstring == "Constructor for arguments" meth = obj.methods["add"] - assert meth.docstring == " Add new argument\n" + assert meth.docstring == "Add new argument" def test_ClassWithPropertyCellValues(): @@ -908,10 +900,10 @@ def test_ClassWithTests(): mfile = os.path.join(TESTDATA_ROOT, "ClassWithTests.m") obj = mat_types.MatObject.parse_mfile(mfile, "ClassWithTests", "test_data") assert obj.name == "ClassWithTests" - assert obj.bases == ["matlab.unittest.TestCase"] + assert obj.bases == [("matlab", "unittest", "TestCase")] assert "testRunning" in obj.methods testRunning = obj.methods["testRunning"] - assert testRunning.attrs["TestTags"] == ["'Unit'"] + assert testRunning.attrs["TestTags"] == ["{'Unit'}"] if __name__ == "__main__": From b349ea7fe33a780395fe4f150124b30e6f0ad399 Mon Sep 17 00:00:00 2001 From: Anton Pozharskiy Date: Wed, 14 Aug 2024 10:45:36 +0200 Subject: [PATCH 18/45] rm textmate parser on this branch --- sphinxcontrib/mat_textmate_parser.py | 642 --------------------------- 1 file changed, 642 deletions(-) delete mode 100644 sphinxcontrib/mat_textmate_parser.py diff --git a/sphinxcontrib/mat_textmate_parser.py b/sphinxcontrib/mat_textmate_parser.py deleted file mode 100644 index 46236ce..0000000 --- a/sphinxcontrib/mat_textmate_parser.py +++ /dev/null @@ -1,642 +0,0 @@ -from textmate_grammar.parsers.matlab import MatlabParser -import re - -# rpath = "../../../syscop/software/nosnoc/+nosnoc/Options.m" - -rpath = "/home/anton/tools/matlabdomain/tests/roots/test_autodoc/target/ClassExample.m" - - -def find_first_child(curr, tok, attr="children"): - tok_lst = getattr(curr, attr) - ind = [i for i in range(len(tok_lst)) if tok_lst[i].token == tok] - if not ind: - return (None, None) - return (tok_lst[ind[0]], ind[0]) - - -def _toks_on_same_line(tok1, tok2): - """Note: pass the tokens in order they appear in case of multiline tokens, otherwise this may return incorrect results""" - line1 = _get_last_line_of_tok(tok1) - line2 = _get_first_line_of_tok(tok2) - return line1 == line2 - - -def _is_empty_line_between_tok(tok1, tok2): - """Note: pass tokens in order they appear""" - line1 = _get_last_line_of_tok(tok1) - line2 = _get_first_line_of_tok(tok2) - return line2 - line1 > 1 - - -def _get_first_line_of_tok(tok): - return min([loc[0] for loc in tok.characters.keys()]) - - -def _get_last_line_of_tok(tok): - return max([loc[0] for loc in tok.characters.keys()]) - - -class MatFunctionParser: - def __init__(self, fun_tok): - """Parse Function definition""" - # First find the function name - name_gen = fun_tok.find(tokens="entity.name.function.matlab") - try: - name_tok, _ = next(name_gen) - self.name = name_tok.content - except StopIteration: - # TODO correct error here - raise Exception("Couldn't find function name") - - # Find outputs and parameters - output_gen = fun_tok.find(tokens="variable.parameter.output.matlab") - param_gen = fun_tok.find(tokens="variable.parameter.input.matlab") - - self.outputs = {} - self.params = {} - self.attrs = {} - - for out, _ in output_gen: - self.outputs[out.content] = {} - - for param, _ in param_gen: - self.params[param.content] = {} - - # find arguments blocks - arg_section = None - for arg_section, _ in fun_tok.find(tokens="meta.arguments.matlab"): - self._parse_argument_section(arg_section) - - fun_decl_gen = fun_tok.find(tokens="meta.function.declaration.matlab") - try: - fun_decl_tok, _ = next(fun_decl_gen) - except StopIteration: - raise Exception( - "missing function declaration" - ) # This cant happen as we'd be missing a function name - - # Now parse for docstring - docstring = "" - comment_toks = fun_tok.findall( - tokens=["comment.line.percentage.matlab", "comment.block.percentage.matlab"] - ) - last_tok = arg_section if arg_section is not None else fun_decl_tok - - for comment_tok, _ in comment_toks: - if _is_empty_line_between_tok(last_tok, comment_tok): - # If we have non-consecutive tokens quit right away. - break - elif ( - not docstring and comment_tok.token == "comment.block.percentage.matlab" - ): - # If we have no previous docstring lines and a comment block we take - # the comment block as the docstring and exit. - docstring = comment_tok.content.strip()[ - 2:-2 - ].strip() # [2,-2] strips out block comment delimiters - break - elif comment_tok.token == "comment.line.percentage.matlab": - # keep parsing comments - docstring += comment_tok.content[1:] + "\n" - else: - # we are done. - break - last_tok = comment_tok - - self.docstring = docstring if docstring else None - - def _parse_argument_section(self, section): - modifiers = [ - mod.content - for mod, _ in section.find(tokens="storage.modifier.arguments.matlab") - ] - arg_def_gen = section.find(tokens="meta.assignment.definition.property.matlab") - for arg_def, _ in arg_def_gen: - arg_name = arg_def.begin[ - 0 - ].content # Get argument name that is being defined - self._parse_argument_validation(arg_name, arg_def, modifiers) - - def _parse_argument_validation(self, arg_name, arg, modifiers): - # TODO This should be identical to propery validation I think. Refactor - # First get the size if found - section = self.output if "Output" in modifiers else self.params - size_gen = arg.find(tokens="meta.parens.size.matlab", depth=1) - try: # We have a size, therefore parse the comma separated list into tuple - size_tok, _ = next(size_gen) - size_elem_gen = size_tok.find( - tokens=[ - "constant.numeric.decimal.matlab", - "keyword.operator.vector.colon.matlab", - ], - depth=1, - ) - size = tuple([elem[0].content for elem in size_elem_gen]) - section[arg_name]["size"] = size - except StopIteration: - pass - - # Now find the type if it exists - # TODO this should be mapped to known types (though perhaps as a postprocess) - type_gen = arg.find(tokens="storage.type.matlab", depth=1) - try: - section[arg_name]["type"] = next(type_gen)[0].content - except StopIteration: - pass - - # Now find list of validators - validator_gen = arg.find(tokens="meta.block.validation.matlab", depth=1) - try: - validator_tok, _ = next(validator_gen) - validator_toks = validator_tok.findall( - tokens="variable.other.readwrite.matlab", depth=1 - ) # TODO Probably bug here in MATLAB-Language-grammar - section[arg_name]["validators"] = [tok[0].content for tok in validator_toks] - except StopIteration: - pass - - -class MatClassParser: - def __init__(self, tokens): - # DATA - self.name = "" - self.supers = [] - self.attrs = {} - self.docstring = "" - self.properties = {} - self.methods = {} - self.enumerations = {} - - self.parsed = tokens - self.cls, _ = find_first_child(self.parsed, "meta.class.matlab") - if not self.cls: - raise Exception() # TODO better exception - self.clsdef, _ = find_first_child(self.cls, "meta.class.declaration.matlab") - self._parse_clsdef() - self._find_class_docstring() - - property_sections = self.cls.findall(tokens="meta.properties.matlab", depth=1) - method_sections = self.cls.findall(tokens="meta.methods.matlab", depth=1) - enumeration_sections = self.cls.findall(tokens="meta.enum.matlab", depth=1) - - for section, _ in property_sections: - self._parse_property_section(section) - - for section, _ in method_sections: - self._parse_method_section(section) - - for section, _ in enumeration_sections: - self._parse_enum_section(section) - - import pdb - - pdb.set_trace() - - def _find_class_docstring(self): - try: - possible_comment_tok = self.cls.children[1] - except IndexError: - return - - if possible_comment_tok.token == "comment.line.percentage.matlab": - self._docstring_lines() - elif possible_comment_tok.token == "comment.block.percentage.matlab": - self.docstring = possible_comment_tok.content.strip()[ - 2:-2 - ].strip() # [2,-2] strips out block comment delimiters - else: - pass - - def _docstring_lines(self): - idx = 1 - cls_children = self.cls.children - - while ( - idx < len(cls_children) - and cls_children[idx].token == "comment.line.percentage.matlab" - ): - self.docstring += ( - cls_children[idx].content[1:] + "\n" - ) # [1:] strips out percent sign - idx += 1 - self.docstring = self.docstring.strip() - - def _parse_clsdef(self): - # Try parsing attrs - attrs_tok_gen = self.clsdef.find(tokens="storage.modifier.section.class.matlab") - try: - attrs_tok, _ = next(attrs_tok_gen) - self._parse_class_attributes(attrs_tok) - except StopIteration: - pass - - # Parse classname - classname_tok_gen = self.clsdef.find(tokens="entity.name.type.class.matlab") - try: - classname_tok, _ = next(classname_tok_gen) - self.name = classname_tok.content - except StopIteration: - print("ClassName not found") # TODO this is probably fatal - - # Parse interited classes - parent_class_toks = self.clsdef.findall(tokens="meta.inherited-class.matlab") - - for parent_class_tok, _ in parent_class_toks: - sections = parent_class_tok.findall( - tokens=[ - "entity.name.namespace.matlab", - "entity.other.inherited-class.matlab", - ] - ) - super_cls = tuple([sec.content for sec, _ in sections]) - self.supers.append(super_cls) - # Parse Attributes TODO maybe there is a smarter way to do this? - idx = 0 - while self.clsdef.children[idx].token == "storage.modifier.class.matlab": - attr_tok = self.clsdef.children[idx] - attr = attr_tok.content - val = None # TODO maybe do some typechecking here or we can assume that you give us valid Matlab - idx += 1 - if attr_tok.token == "keyword.operator.assignment.matlab": # pull out r.h.s - idx += 1 - val = self.clsdef.children[idx].content - idx += 1 - if ( - attr_tok.token == "punctuation.separator.modifier.comma.matlab" - ): # skip commas - idx += 1 - self.attrs[attr] = val - - def _parse_class_attributes(self, attrs_tok): - # walk down child list and parse manually - # TODO perhaps contribute a delimited list find to textmate-grammar-python - children = attrs_tok.children - idx = 0 - while idx < len(children): - child_tok = children[idx] - if child_tok.token == "storage.modifier.class.matlab": - attr = child_tok.content - val = None - idx += 1 # walk to next token - try: # however we may have walked off the end of the list in which case we exit - maybe_assign_tok = children[idx] - except: - self.attrs[attr] = val - break - if maybe_assign_tok.token == "keyword.operator.assignment.matlab": - idx += 1 - rhs_tok = children[idx] # parse right hand side - if rhs_tok.token == "meta.cell.literal.matlab": - # A cell. For now just take the whole cell as value. - # TODO parse out the cell array of metaclass literals. - val = "{" + rhs_tok.content + "}" - idx += 1 - elif rhs_tok.token == "constant.language.boolean.matlab": - val = rhs_tok.content - idx += 1 - elif rhs_tok.token == "keyword.operator.other.question.matlab": - idx += 1 - metaclass_tok = children[idx] - metaclass_components = metaclass_tok.findall( - tokens=[ - "entity.name.namespace.matlab", - "entity.other.class.matlab", - ] - ) - val = tuple([comp.content for comp, _ in metaclass_components]) - else: - pass - self.attrs[attr] = val - else: # Comma or continuation therefore skip - idx += 1 - - def _parse_property_section(self, section): - # TODO parse property section attrs - attrs = self._parse_attributes(section) - idxs = [ - i - for i in range(len(section.children)) - if section.children[i].token == "meta.assignment.definition.property.matlab" - ] - for idx in idxs: - prop_tok = section.children[idx] - prop_name = prop_tok.begin[0].content - self.properties[prop_name] = {"attrs": attrs} # Create entry for property - self._parse_property_validation( - prop_name, prop_tok - ) # Parse property validation. - - # Try to find a default assignment: - default = None - _, assgn_idx = find_first_child( - prop_tok, "keyword.operator.assignment.matlab", attr="end" - ) - if assgn_idx is not None: - default = "" - assgn_idx += 1 # skip assignment - while assgn_idx < len(prop_tok.end): - tok = prop_tok.end[assgn_idx] - assgn_idx += 1 - if tok.token in [ - "comment.line.percentage.matlab", - "punctuation.terminator.semicolon.matlab", - ]: - break - default += tok.content - self.properties[prop_name]["default"] = default - - # Get inline docstring - inline_docstring_gen = prop_tok.find( - tokens="comment.line.percentage.matlab", attribute="end" - ) - try: - inline_docstring_tok, _ = next(inline_docstring_gen) - inline_docstring = inline_docstring_tok.content[ - 1: - ] # strip leading % sign - except StopIteration: - inline_docstring = None - - # Walk backwards to get preceding docstring. - preceding_docstring = "" - walk_back_idx = idx - 1 - next_tok = prop_tok - while walk_back_idx >= 0: - walk_tok = section.children[walk_back_idx] - if _is_empty_line_between_tok(walk_tok, next_tok): - # Once there is an empty line between consecutive tokens we are done. - break - - if ( - not preceding_docstring - and walk_tok.token == "comment.block.percentage.matlab" - ): - # block comment immediately preceding enum so we are done. - # TODO we might need to do some postprocessing here to handle indents gracefully - preceding_docstring = walk_tok.content.strip()[2:-2] - break - elif walk_tok.token == "comment.line.percentage.matlab": - preceding_docstring = ( - walk_tok.content[1:] + "\n" + preceding_docstring - ) # [1:] strips % - walk_back_idx -= 1 - next_tok = walk_tok - elif walk_tok.token == "punctuation.whitespace.comment.leading.matlab": - walk_back_idx -= 1 - # Dont update next_tok for whitespace - else: - break - - # Walk forwards to get following docstring or inline one. - following_docstring = "" - walk_fwd_idx = idx + 1 - prev_tok = prop_tok - while walk_fwd_idx < len(section.children): - walk_tok = section.children[walk_fwd_idx] - - if _is_empty_line_between_tok(prev_tok, walk_tok): - # Once there is an empty line between consecutive tokens we are done. - break - - if ( - not following_docstring - and walk_tok.token == "comment.block.percentage.matlab" - ): - # block comment immediately following enum so we are done. - # TODO we might need to do some postprocessing here to handle indents gracefully - following_docstring = walk_tok.content.strip()[2:-2] - break - elif walk_tok.token == "comment.line.percentage.matlab": - following_docstring = ( - following_docstring + "\n" + walk_tok.content[1:] - ) # [1:] strips % - walk_fwd_idx += 1 - prev_tok = walk_tok - elif walk_tok.token == "punctuation.whitespace.comment.leading.matlab": - walk_fwd_idx += 1 - # Dont update prev_tok for whitespace - else: - break - - if preceding_docstring: - self.properties[prop_name]["docstring"] = preceding_docstring.strip() - elif inline_docstring: - self.properties[prop_name]["docstring"] = inline_docstring.strip() - elif following_docstring: - self.properties[prop_name]["docstring"] = following_docstring.strip() - else: - self.properties[prop_name]["docstring"] = None - - def _parse_property_validation(self, prop_name, prop): - """Parses property validation syntax""" - # First get the szize if found - size_gen = prop.find(tokens="meta.parens.size.matlab", depth=1) - try: # We have a size, therefore parse the comma separated list into tuple - size_tok, _ = next(size_gen) - size_elem_gen = size_tok.find( - tokens=[ - "constant.numeric.decimal.matlab", - "keyword.operator.vector.colon.matlab", - ], - depth=1, - ) - size = tuple([elem[0].content for elem in size_elem_gen]) - self.properties[prop_name]["size"] = size - except StopIteration: - pass - - # Now find the type if it exists - # TODO this should be mapped to known types (though perhaps as a postprocess) - type_gen = prop.find(tokens="storage.type.matlab", depth=1) - try: - self.properties[prop_name]["type"] = next(type_gen)[0].content - except StopIteration: - pass - - # Now find list of validators - validator_gen = prop.find(tokens="meta.block.validation.matlab", depth=1) - try: - validator_tok, _ = next(validator_gen) - validator_toks = validator_tok.findall( - tokens=[ - "variable.other.readwrite.matlab", - "meta.function-call.parens.matlab", - ], - depth=1, - ) # TODO Probably bug here in MATLAB-Language-grammar - self.properties[prop_name]["validators"] = [ - tok[0].content for tok in validator_toks - ] - except StopIteration: - pass - - def _parse_method_section(self, section): - attrs = self._parse_attributes(section) - idxs = [ - i - for i in range(len(section.children)) - if section.children[i].token == "meta.function.matlab" - ] - for idx in idxs: - meth_tok = section.children[idx] - parsed_function = MatFunctionParser(meth_tok) - self.methods[parsed_function.name] = parsed_function - self.methods[parsed_function.name].attrs = attrs - - def _parse_enum_section(self, section): - idxs = [ - i - for i in range(len(section.children)) - if section.children[i].token - == "meta.assignment.definition.enummember.matlab" - ] - for idx in idxs: - enum_tok = section.children[idx] - next_idx = idx - enum_name = enum_tok.children[0].content - self.enumerations[enum_name] = {} - if ( - idx + 1 < len(section.children) - and section.children[idx + 1].token == "meta.parens.matlab" - ): # Parse out args TODO this should be part of enummember assignment definition - args = tuple( - [ - arg.content - for arg in section.children[idx + 1].children - if arg.token != "punctuation.separator.comma.matlab" - ] - ) - self.enumerations[enum_name]["args"] = args - next_idx += 1 - - # Walk backwards to get preceding docstring. - preceding_docstring = "" - walk_back_idx = idx - 1 - next_tok = enum_tok - while walk_back_idx >= 0: - walk_tok = section.children[walk_back_idx] - if _is_empty_line_between_tok(walk_tok, next_tok): - # Once there is an empty line between consecutive tokens we are done. - break - - if ( - not preceding_docstring - and walk_tok.token == "comment.block.percentage.matlab" - ): - # block comment immediately preceding enum so we are done. - # TODO we might need to do some postprocessing here to handle indents gracefully - preceding_docstring = walk_tok.content.strip()[2:-2] - break - elif walk_tok.token == "comment.line.percentage.matlab": - preceding_docstring = ( - walk_tok.content[1:] + "\n" + preceding_docstring - ) # [1:] strips % - walk_back_idx -= 1 - next_tok = walk_tok - elif walk_tok.token == "punctuation.whitespace.comment.leading.matlab": - walk_back_idx -= 1 - # Dont update next_tok for whitespace - else: - break - - # Walk forwards to get following docstring or inline one. - inline_docstring = "" - following_docstring = "" - walk_fwd_idx = next_idx + 1 - prev_tok = section.children[next_idx] - while walk_fwd_idx < len(section.children): - walk_tok = section.children[walk_fwd_idx] - - if _is_empty_line_between_tok(prev_tok, walk_tok): - # Once there is an empty line between consecutive tokens we are done. - break - - if ( - not following_docstring - and walk_tok.token == "comment.block.percentage.matlab" - ): - # block comment immediately following enum so we are done. - # TODO we might need to do some postprocessing here to handle indents gracefully - following_docstring = walk_tok.content.strip()[2:-2] - break - elif walk_tok.token == "comment.line.percentage.matlab": - # In the case the comment is on the same line as the end of the enum declaration, take it as inline comment and exit. - if _toks_on_same_line(section.children[idx], walk_tok): - inline_docstring = walk_tok.content[1:] - break - - following_docstring = ( - following_docstring + "\n" + walk_tok.content[1:] - ) # [1:] strips % - walk_fwd_idx += 1 - prev_tok = walk_tok - elif walk_tok.token == "punctuation.whitespace.comment.leading.matlab": - walk_fwd_idx += 1 - # Dont update prev_tok for whitespace - else: - break - - if preceding_docstring: - self.enumerations[enum_name]["docstring"] = preceding_docstring.strip() - elif inline_docstring: - self.enumerations[enum_name]["docstring"] = inline_docstring.strip() - elif following_docstring: - self.enumerations[enum_name]["docstring"] = following_docstring.strip() - else: - self.enumerations[enum_name]["docstring"] = None - - def _parse_attributes(self, section): - # walk down child list and parse manually - children = section.begin - idx = 1 - attrs = {} - while idx < len(children): - child_tok = children[idx] - if re.match( - "storage.modifier.(properties|methods|events).matlab", child_tok.token - ): - attr = child_tok.content - val = None - idx += 1 # walk to next token - try: # however we may have walked off the end of the list in which case we exit - maybe_assign_tok = children[idx] - except: - attrs[attr] = val - return attrs - if maybe_assign_tok.token == "keyword.operator.assignment.matlab": - idx += 1 - rhs_tok = children[idx] # parse right hand side - if rhs_tok.token == "meta.cell.literal.matlab": - # A cell. For now just take the whole cell as value. - # TODO parse out the cell array of metaclass literals. - val = "{" + rhs_tok.content + "}" - idx += 1 - elif rhs_tok.token == "constant.language.boolean.matlab": - val = rhs_tok.content - idx += 1 - elif rhs_tok.token == "storage.modifier.access.matlab": - val = rhs_tok.content - idx += 1 - elif rhs_tok.token == "keyword.operator.other.question.matlab": - idx += 1 - metaclass_tok = children[idx] - metaclass_components = metaclass_tok.findall( - tokens=[ - "entity.name.namespace.matlab", - "entity.other.class.matlab", - ] - ) - val = tuple([comp.content for comp, _ in metaclass_components]) - else: - pass - attrs[attr] = val - else: # Comma or continuation therefore skip - idx += 1 - - return attrs - - -if __name__ == "__main__": - parser = MatlabParser() - toks = parser.parse_file(rpath) - cls_parse = MatClassParser(toks) From 4c9263222d076a07d8c7f66c8a044e8ece6aa492 Mon Sep 17 00:00:00 2001 From: Anton Pozharskiy Date: Wed, 14 Aug 2024 10:51:04 +0200 Subject: [PATCH 19/45] also install tree-sitter --- setup.py | 1 + 1 file changed, 1 insertion(+) diff --git a/setup.py b/setup.py index 98fba9a..38a154f 100644 --- a/setup.py +++ b/setup.py @@ -10,6 +10,7 @@ "Pygments>=2.0.1", "tree-sitter-matlab>=1.0.1", "tree-sitter-python>=0.21.0", + "tree-sitter>=0.21.0", ] setup( From 22f277945ca59d128099369184c801071f5dc0f7 Mon Sep 17 00:00:00 2001 From: Anton Pozharskiy Date: Wed, 14 Aug 2024 11:12:13 +0200 Subject: [PATCH 20/45] bump required tree-sitter --- setup.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/setup.py b/setup.py index 38a154f..7310620 100644 --- a/setup.py +++ b/setup.py @@ -9,8 +9,7 @@ "Sphinx>=4.0.0", "Pygments>=2.0.1", "tree-sitter-matlab>=1.0.1", - "tree-sitter-python>=0.21.0", - "tree-sitter>=0.21.0", + "tree-sitter>=0.22.3", ] setup( From c5b8d38ac6b436d136493b83ec9d383b3e041c72 Mon Sep 17 00:00:00 2001 From: Anton Pozharskiy Date: Wed, 14 Aug 2024 11:32:39 +0200 Subject: [PATCH 21/45] tree-sitter version bump --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 7310620..fda2092 100644 --- a/setup.py +++ b/setup.py @@ -9,7 +9,7 @@ "Sphinx>=4.0.0", "Pygments>=2.0.1", "tree-sitter-matlab>=1.0.1", - "tree-sitter>=0.22.3", + "tree-sitter>=0.22.0", ] setup( From 2c235f8d7fb6c6cde7f15df2d28a6aa51653a6f4 Mon Sep 17 00:00:00 2001 From: Anton Pozharskiy Date: Wed, 14 Aug 2024 13:03:38 +0200 Subject: [PATCH 22/45] dealing with tree-sitter version diffs to maintain py 3.8 compatibility --- sphinxcontrib/mat_tree_sitter_parser.py | 98 ++++++++++++++++--------- sphinxcontrib/mat_types.py | 8 +- 2 files changed, 70 insertions(+), 36 deletions(-) diff --git a/sphinxcontrib/mat_tree_sitter_parser.py b/sphinxcontrib/mat_tree_sitter_parser.py index a71ebb5..afae186 100644 --- a/sphinxcontrib/mat_tree_sitter_parser.py +++ b/sphinxcontrib/mat_tree_sitter_parser.py @@ -1,3 +1,4 @@ +from importlib.metadata import version import tree_sitter_matlab as tsml from tree_sitter import Language, Parser import re @@ -8,7 +9,7 @@ ) # rpath = "/home/anton/tools/matlabdomain/tests/test_data/submodule/f_empty_output.m" -ML_LANG = Language(tsml.language()) +ML_LANG = Language(tsml.language(), "matlab") # QUERIES q_classdef = ML_LANG.query( @@ -163,6 +164,21 @@ re_assign_remove = re.compile(r"^=[ \t]*") +def tree_sitter_is_0_21(): + if not hasattr(tree_sitter_is_0_21, "is_21"): + tree_sitter_ver = tuple([int(sec) for sec in version("tree_sitter").split(".")]) + tree_sitter_is_0_21.is_21 = tree_sitter_ver[1] == 21 # memoize + return tree_sitter_is_0_21.is_21 + + +def get_row(point): + """Get row from point. This api changed from v0.21.3 to v0.22.0""" + if tree_sitter_is_0_21(): + return point[0] + else: + return point.row + + def process_text_into_docstring(text): docstring = text.decode("utf-8") return re.sub(re_percent_remove, "", docstring) @@ -206,7 +222,7 @@ def __init__(self, root_node): docstring = None if docstring_node is not None: prev_sib = docstring_node.prev_named_sibling - if docstring_node.start_point.row - prev_sib.end_point.row <= 1: + if get_row(docstring_node.start_point) - get_row(prev_sib.end_point) <= 1: docstring = process_text_into_docstring(docstring_node.text) if not docstring: @@ -259,11 +275,14 @@ def _parse_argument_section(self, argblock_node): # comments which means this requires some relatively ugly # processing, but worth it for the ease of the rest of it. prev_sib = docstring_node.prev_named_sibling - if docstring_node.start_point.row == prev_sib.end_point.row: + if get_row(docstring_node.start_point) == get_row(prev_sib.end_point): # if the docstring is on the same line as the end of the definition only take the inline part docstring = process_text_into_docstring(docstring_node.text) docstring = docstring.split("\n")[0] - elif docstring_node.start_point.row - prev_sib.end_point.row <= 1: + elif ( + get_row(docstring_node.start_point) - get_row(prev_sib.end_point) + <= 1 + ): # Otherwise take the whole docstring docstring = process_text_into_docstring(docstring_node.text) @@ -274,11 +293,11 @@ def _parse_argument_section(self, argblock_node): # Nothing to be done. pass elif next_node.type == "comment": - if next_node.start_point.row == arg.end_point.row: + if get_row(next_node.start_point) == get_row(arg.end_point): # if the docstring is on the same line as the end of the definition only take the inline part docstring = process_text_into_docstring(next_node.text) docstring = docstring.split("\n")[0] - elif next_node.start_point.row - arg.end_point.row <= 1: + elif get_row(next_node.start_point) - get_row(arg.end_point) <= 1: # Otherwise take the whole docstring docstring = process_text_into_docstring(next_node.text) @@ -292,16 +311,18 @@ def _parse_argument_section(self, argblock_node): # line then we set the docstring. We also need to check # if the first line of the comment is the same as a # previous argument. - if arg.start_point.row - prev_node.end_point.row <= 1: + if get_row(arg.start_point) - get_row(prev_node.end_point) <= 1: ds = process_text_into_docstring(prev_node.text) prev_arg = prev_node.prev_named_sibling if prev_arg is not None and prev_arg.type == "property": - if prev_node.start_point.row == prev_arg.end_point.row: + if get_row(prev_node.start_point) == get_row( + prev_arg.end_point + ): ds = "\n".join(ds.split("\n")[1:]) if ds: docstring = ds else: - if arg.start_point.row - prev_node.end_point.row <= 1: + if get_row(arg.start_point) - get_row(prev_node.end_point) <= 1: docstring = process_text_into_docstring(prev_node.text) elif prev_node.type == "property": # The previous argumentnode may have eaten our comment @@ -312,11 +333,10 @@ def _parse_argument_section(self, argblock_node): # we now need to check if prev_comment ends on the line # before ours and trim the first line if it on the same # line as prev property. - if arg.start_point.row - prev_comment.end_point.row <= 1: + if get_row(arg.start_point) - get_row(prev_comment.end_point) <= 1: ds = process_text_into_docstring(prev_comment.text) - if ( - prev_comment.start_point.row - == prev_comment.prev_named_sibling.end_point.row + if get_row(prev_comment.start_point) == get_row( + prev_comment.prev_named_sibling.end_point ): ds = "\n".join(ds.split("\n")[1:]) if ds: @@ -396,7 +416,7 @@ def __init__(self, root_node): docstring_node = class_match.get("docstring") if docstring_node is not None: prev_node = docstring_node.prev_sibling - if docstring_node.start_point.row - prev_node.end_point.row <= 1: + if get_row(docstring_node.start_point) - get_row(prev_node.end_point) <= 1: self.docstring = process_text_into_docstring(docstring_node.text) prop_matches = q_properties.matches(self.cls) @@ -457,11 +477,14 @@ def _parse_property_section(self, props_match): # comments which means this requires some relatively ugly # processing, but worth it for the ease of the rest of it. prev_sib = docstring_node.prev_named_sibling - if docstring_node.start_point.row == prev_sib.end_point.row: + if get_row(docstring_node.start_point) == get_row(prev_sib.end_point): # if the docstring is on the same line as the end of the definition only take the inline part docstring = process_text_into_docstring(docstring_node.text) docstring = docstring.split("\n")[0] - elif docstring_node.start_point.row - prev_sib.end_point.row <= 1: + elif ( + get_row(docstring_node.start_point) - get_row(prev_sib.end_point) + <= 1 + ): # Otherwise take the whole docstring docstring = process_text_into_docstring(docstring_node.text) @@ -472,11 +495,11 @@ def _parse_property_section(self, props_match): # Nothing to be done. pass elif next_node.type == "comment": - if next_node.start_point.row == prop.end_point.row: + if get_row(next_node.start_point) == get_row(prop.end_point): # if the docstring is on the same line as the end of the definition only take the inline part docstring = process_text_into_docstring(next_node.text) docstring = docstring.split("\n")[0] - elif next_node.start_point.row - prop.end_point.row <= 1: + elif get_row(next_node.start_point) - get_row(prop.end_point) <= 1: # Otherwise take the whole docstring docstring = process_text_into_docstring(next_node.text) @@ -490,16 +513,18 @@ def _parse_property_section(self, props_match): # line then we set the docstring. We also need to check # if the first line of the comment is the same as a # previous property. - if prop.start_point.row - prev_node.end_point.row <= 1: + if get_row(prop.start_point) - get_row(prev_node.end_point) <= 1: ds = process_text_into_docstring(prev_node.text) prev_prop = prev_node.prev_named_sibling if prev_prop is not None and prev_prop.type == "property": - if prev_node.start_point.row == prev_prop.end_point.row: + if get_row(prev_node.start_point) == get_row( + prev_prop.end_point + ): ds = "\n".join(ds.split("\n")[1:]) if ds: docstring = ds else: - if prop.start_point.row - prev_node.end_point.row <= 1: + if get_row(prop.start_point) - get_row(prev_node.end_point) <= 1: docstring = process_text_into_docstring(prev_node.text) elif prev_node.type == "property": # The previous property node may have eaten our comment @@ -510,11 +535,10 @@ def _parse_property_section(self, props_match): # we now need to check if prev_comment ends on the line # before ours and trim the first line if it on the same # line as prev property. - if prop.start_point.row - prev_comment.end_point.row <= 1: + if get_row(prop.start_point) - get_row(prev_comment.end_point) <= 1: ds = process_text_into_docstring(prev_comment.text) - if ( - prev_comment.start_point.row - == prev_comment.prev_named_sibling.end_point.row + if get_row(prev_comment.start_point) == get_row( + prev_comment.prev_named_sibling.end_point ): ds = "\n".join(ds.split("\n")[1:]) if ds: @@ -562,11 +586,11 @@ def _parse_enum_section(self, enums_match): # look forward for docstring next_node = enum.next_named_sibling if next_node is not None and next_node.type == "comment": - if next_node.start_point.row == enum.end_point.row: + if get_row(next_node.start_point) == get_row(enum.end_point): # if the docstring is on the same line as the end of the definition only take the inline part docstring = process_text_into_docstring(next_node.text) docstring = docstring.split("\n")[0] - elif next_node.start_point.row - enum.end_point.row <= 1: + elif get_row(next_node.start_point) - get_row(enum.end_point) <= 1: # Otherwise take the whole docstring docstring = process_text_into_docstring(next_node.text) @@ -580,16 +604,18 @@ def _parse_enum_section(self, enums_match): # line then we set the docstring. We also need to check # if the first line of the comment is the same as a # previous enum. - if enum.start_point.row - prev_node.end_point.row <= 1: + if get_row(enum.start_point) - get_row(prev_node.end_point) <= 1: ds = process_text_into_docstring(prev_node.text) prev_enum = prev_node.prev_named_sibling if prev_enum is not None and prev_enum.type == "enum": - if prev_node.start_point.row == prev_enum.end_point.row: + if get_row(prev_node.start_point) == get_row( + prev_enum.end_point + ): ds = "\n".join(ds.split("\n")[1:]) if ds: docstring = ds else: - if enum.start_point.row - prev_node.end_point.row <= 1: + if get_row(enum.start_point) - get_row(prev_node.end_point) <= 1: docstring = process_text_into_docstring(prev_node.text) # After all that if our docstring is empty then we have none if docstring.strip() == "": @@ -612,11 +638,11 @@ def _parse_event_section(self, events_match): # look forward for docstring next_node = event.next_named_sibling if next_node is not None and next_node.type == "comment": - if next_node.start_point.row == event.end_point.row: + if get_row(next_node.start_point) == get_row(event.end_point): # if the docstring is on the same line as the end of the definition only take the inline part docstring = process_text_into_docstring(next_node.text) docstring = docstring.split("\n")[0] - elif next_node.start_point.row - event.end_point.row <= 1: + elif get_row(next_node.start_point) - get_row(event.end_point) <= 1: # Otherwise take the whole docstring docstring = process_text_into_docstring(next_node.text) @@ -630,16 +656,18 @@ def _parse_event_section(self, events_match): # line then we set the docstring. We also need to check # if the first line of the comment is the same as a # previous event. - if event.start_point.row - prev_node.end_point.row <= 1: + if get_row(event.start_point) - get_row(prev_node.end_point) <= 1: ds = process_text_into_docstring(prev_node.text) prev_event = prev_node.prev_named_sibling if prev_event is not None and prev_event.type == "identifier": - if prev_node.start_point.row == prev_event.end_point.row: + if get_row(prev_node.start_point) == get_row( + prev_event.end_point + ): ds = "\n".join(ds.split("\n")[1:]) if ds: docstring = ds else: - if event.start_point.row - prev_node.end_point.row <= 1: + if get_row(event.start_point) - get_row(prev_node.end_point) <= 1: docstring = process_text_into_docstring(prev_node.text) # After all that if our docstring is empty then we have none if docstring.strip() == "": diff --git a/sphinxcontrib/mat_types.py b/sphinxcontrib/mat_types.py index 0a12cf1..5f5210e 100644 --- a/sphinxcontrib/mat_types.py +++ b/sphinxcontrib/mat_types.py @@ -28,6 +28,7 @@ from pathlib import Path import cProfile import pstats +from importlib.metadata import version logger = sphinx.util.logging.getLogger("matlab-domain") @@ -512,7 +513,12 @@ def parse_mfile(mfile, name, path, encoding=None): full_code = code # parse the file - parser = Parser(ML_LANG) + tree_sitter_ver = tuple([int(sec) for sec in version("tree_sitter").split(".")]) + if tree_sitter_ver[1] == 21: + parser = Parser() + parser.set_language(ML_LANG) + else: + parser = Parser(ML_LANG) tree = parser.parse(code) modname = path.replace(os.sep, ".") # module name From 2f36c14c4c7e4eb701c9a95d1f01cbb698d2a0d9 Mon Sep 17 00:00:00 2001 From: Anton Pozharskiy Date: Wed, 14 Aug 2024 13:06:24 +0200 Subject: [PATCH 23/45] ML_LANG versions --- setup.py | 2 +- sphinxcontrib/mat_tree_sitter_parser.py | 6 +++++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/setup.py b/setup.py index fda2092..78acc7e 100644 --- a/setup.py +++ b/setup.py @@ -9,7 +9,7 @@ "Sphinx>=4.0.0", "Pygments>=2.0.1", "tree-sitter-matlab>=1.0.1", - "tree-sitter>=0.22.0", + "tree-sitter>=0.21.3", ] setup( diff --git a/sphinxcontrib/mat_tree_sitter_parser.py b/sphinxcontrib/mat_tree_sitter_parser.py index afae186..0821de1 100644 --- a/sphinxcontrib/mat_tree_sitter_parser.py +++ b/sphinxcontrib/mat_tree_sitter_parser.py @@ -9,7 +9,11 @@ ) # rpath = "/home/anton/tools/matlabdomain/tests/test_data/submodule/f_empty_output.m" -ML_LANG = Language(tsml.language(), "matlab") +tree_sitter_ver = tuple([int(sec) for sec in version("tree_sitter").split(".")]) +if tree_sitter_ver[1] == 21: + ML_LANG = Language(tsml.language(), "matlab") +else: + ML_LANG = Language(tsml.language()) # QUERIES q_classdef = ML_LANG.query( From c55b5d127252733f934d1a7a166c0369389feb93 Mon Sep 17 00:00:00 2001 From: Anton Pozharskiy Date: Thu, 15 Aug 2024 15:30:40 +0200 Subject: [PATCH 24/45] a better attributes query --- sphinxcontrib/mat_tree_sitter_parser.py | 37 ++++++++++++++++++------- 1 file changed, 27 insertions(+), 10 deletions(-) diff --git a/sphinxcontrib/mat_tree_sitter_parser.py b/sphinxcontrib/mat_tree_sitter_parser.py index 0821de1..579455e 100644 --- a/sphinxcontrib/mat_tree_sitter_parser.py +++ b/sphinxcontrib/mat_tree_sitter_parser.py @@ -4,10 +4,8 @@ import re # rpath = "../../../syscop/software/nosnoc/+nosnoc/Options.m" -rpath = ( - "/home/anton/tools/matlabdomain/tests/test_data/submodule/f_ellipsis_empty_output.m" -) -# rpath = "/home/anton/tools/matlabdomain/tests/test_data/submodule/f_empty_output.m" +rpath = "/home/anton/tools/matlabdomain/tests/test_data/ClassWithMethodAttributes.m" +# rpath = "/home/anton/tools/matlabdomain/tests/test_data/f_with_dummy_argument.m" tree_sitter_ver = tuple([int(sec) for sec in version("tree_sitter").split(".")]) if tree_sitter_ver[1] == 21: @@ -36,7 +34,17 @@ """ ) -q_attributes = ML_LANG.query("""(attribute (identifier) @name (_)? @value)""") +q_attributes = ML_LANG.query( + """(attribute + (identifier) @name + [ + (identifier) @value + (string) @value + (metaclass_operator) @value + (cell) @value + ]?) + """ +) q_supers = ML_LANG.query("""[(identifier) @secs "."]+ """) @@ -110,7 +118,7 @@ [ (identifier) @outputs (multioutput_variable - [(identifier) @outputs _]+ + [[(identifier) (ignored_argument)] @outputs _]+ ) ] )? @@ -118,7 +126,7 @@ name: (identifier) @name _* (function_arguments - [(identifier) @params _]* + [(identifier) @params (ignored_argument) @params _]* )? _* [(arguments_statement) @argblocks _]* @@ -685,6 +693,7 @@ def _parse_attributes(self, attrs_nodes): attrs = {} if attrs_nodes is not None: for attr_node in attrs_nodes: + print(attr_node.sexp()) _, attr_match = q_attributes.matches(attr_node)[0] name = attr_match.get("name").text.decode("utf-8") value_node = attr_match.get("value") @@ -695,11 +704,19 @@ def _parse_attributes(self, attrs_nodes): if __name__ == "__main__": - parser = Parser(ML_LANG) + tree_sitter_ver = tuple([int(sec) for sec in version("tree_sitter").split(".")]) + if tree_sitter_ver[1] == 21: + parser = Parser() + parser.set_language(ML_LANG) + else: + parser = Parser(ML_LANG) with open(rpath, "rb") as f: data = f.read() tree = parser.parse(data) - # class_parser = MatClassParser(tree.root_node) - fun_parser = MatFunctionParser(tree.root_node) + class_parser = MatClassParser(tree.root_node) + # fun_parser = MatFunctionParser(tree.root_node) + import pdb + + pdb.set_trace() From 46c0e4167d494d9b0d1d68645b7767c4259701a4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=B8rgen=20Cederberg?= Date: Wed, 24 Jul 2024 15:04:27 +0200 Subject: [PATCH 25/45] Ci: Test on Sphinx 8 / Dev. (#259) * CI: Testing for latest Sphinx (8.0) * CI: Fix helper class version checking. --- .github/workflows/python-package.yml | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml index 41775ab..ee7aae6 100644 --- a/.github/workflows/python-package.yml +++ b/.github/workflows/python-package.yml @@ -54,6 +54,28 @@ jobs: tox -e "${{matrix.python-version}}-sphinx${{matrix.sphinx-version}}-pygments${{matrix.pygments-version}}" + test-sphinx-latest: + name: Run tests for Python ${{ matrix.python-version }}, Sphinx ${{ matrix.sphinx-version }}, Pygments ${{ matrix.pygments-version }} + timeout-minutes: 5 + runs-on: ubuntu-latest + strategy: + matrix: + python-version: ["3.10", "3.11", "3.12"] + sphinx-version: ["dev"] + pygments-version: ["latest"] + steps: + - uses: actions/checkout@v4 + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v5 + with: + python-version: ${{ matrix.python-version }} + architecture: x64 + - name: Run with Tox + run: | + pip install tox==4.8.0 + tox -e "${{matrix.python-version}}-sphinx${{matrix.sphinx-version}}-pygments${{matrix.pygments-version}}" + + test-sphinx-latest: name: Run tests for Python ${{ matrix.python-version }}, Sphinx ${{ matrix.sphinx-version }}, Pygments ${{ matrix.pygments-version }} timeout-minutes: 5 From cf88ba4f5821c24fc9fcbe78bd28849e78b3f26c Mon Sep 17 00:00:00 2001 From: Anton Pozharskiy Date: Thu, 15 Aug 2024 20:25:21 +0200 Subject: [PATCH 26/45] Fixing nearly all tests in test_parse_mfile --- sphinxcontrib/mat_tree_sitter_parser.py | 219 +++++++++++++++++------- sphinxcontrib/mat_types.py | 160 ++--------------- tests/test_parse_mfile.py | 80 +++++---- 3 files changed, 217 insertions(+), 242 deletions(-) diff --git a/sphinxcontrib/mat_tree_sitter_parser.py b/sphinxcontrib/mat_tree_sitter_parser.py index 579455e..43828e4 100644 --- a/sphinxcontrib/mat_tree_sitter_parser.py +++ b/sphinxcontrib/mat_tree_sitter_parser.py @@ -4,7 +4,7 @@ import re # rpath = "../../../syscop/software/nosnoc/+nosnoc/Options.m" -rpath = "/home/anton/tools/matlabdomain/tests/test_data/ClassWithMethodAttributes.m" +rpath = "/home/anton/tools/matlabdomain/tests/test_data/ClassWithGetterSetter.m" # rpath = "/home/anton/tools/matlabdomain/tests/test_data/f_with_dummy_argument.m" tree_sitter_ver = tuple([int(sec) for sec in version("tree_sitter").split(".")]) @@ -41,8 +41,10 @@ (identifier) @value (string) @value (metaclass_operator) @value - (cell) @value - ]?) + (cell (row [(metaclass_operator) @value _]*)) + (cell (row [(string) @value _]*)) + ]? @rhs + ) """ ) @@ -171,6 +173,16 @@ """ ) +q_script = ML_LANG.query( + """ + (source_file + (comment) @docstring + ) + """ +) + +q_get_set = ML_LANG.query("""["get." "set."]""") + re_percent_remove = re.compile(r"^[ \t]*% ?", flags=re.M) re_assign_remove = re.compile(r"^=[ \t]*") @@ -191,35 +203,51 @@ def get_row(point): return point.row -def process_text_into_docstring(text): - docstring = text.decode("utf-8") +def process_text_into_docstring(text, encoding): + docstring = text.decode(encoding) return re.sub(re_percent_remove, "", docstring) -def process_default(text): - default = text.decode("utf-8") +def process_default(text, encoding): + default = text.decode(encoding) return re.sub(re_assign_remove, "", default) +class MatScriptParser: + def __init__(self, root_node, encoding): + """Parse m script""" + self.encoding = encoding + _, script_match = q_script.matches(root_node)[0] + docstring_node = script_match.get("docstring") + if docstring_node is not None: + self.docstring = process_text_into_docstring( + docstring_node.text, self.encoding + ) + else: + self.docstring = None + print(self.docstring) + + class MatFunctionParser: - def __init__(self, root_node): + def __init__(self, root_node, encoding): """Parse Function definition""" + self.encoding = encoding _, fun_match = q_fun.matches(root_node)[0] - self.name = fun_match.get("name").text.decode("utf-8") + self.name = fun_match.get("name").text.decode(self.encoding) # Get outputs (possibly more than one) - self.outputs = {} + self.retv = {} output_nodes = fun_match.get("outputs") if output_nodes is not None: - outputs = [output.text.decode("utf-8") for output in output_nodes] - for output in outputs: - self.outputs[output] = {} + retv = [output.text.decode(self.encoding) for output in output_nodes] + for output in retv: + self.retv[output] = {} # Get parameters self.args = {} arg_nodes = fun_match.get("params") if arg_nodes is not None: - args = [arg.text.decode("utf-8") for arg in arg_nodes] + args = [arg.text.decode(self.encoding) for arg in arg_nodes] for arg in args: self.args[arg] = {} @@ -235,7 +263,9 @@ def __init__(self, root_node): if docstring_node is not None: prev_sib = docstring_node.prev_named_sibling if get_row(docstring_node.start_point) - get_row(prev_sib.end_point) <= 1: - docstring = process_text_into_docstring(docstring_node.text) + docstring = process_text_into_docstring( + docstring_node.text, self.encoding + ) if not docstring: docstring = None @@ -255,28 +285,32 @@ def _parse_argument_section(self, argblock_node): _, arg_match = q_arg.matches(arg)[0] # extract name (this is always available so no need for None check) - name = [name.text.decode("utf-8") for name in arg_match.get("name")] + name = [name.text.decode(self.encoding) for name in arg_match.get("name")] # extract dims list dims_list = arg_match.get("dims") dims = None if dims_list is not None: - dims = tuple([dim.text.decode("utf-8") for dim in dims_list]) + dims = tuple([dim.text.decode(self.encoding) for dim in dims_list]) # extract type type_node = arg_match.get("type") - typename = type_node.text.decode("utf-8") if type_node is not None else None + typename = ( + type_node.text.decode(self.encoding) if type_node is not None else None + ) # extract validator functions vf_list = arg_match.get("validator_functions") vfs = None if vf_list is not None: - vfs = [vf.text.decode("utf-8") for vf in vf_list] + vfs = [vf.text.decode(self.encoding) for vf in vf_list] # extract default default_node = arg_match.get("default") default = ( - process_default(default_node.text) if default_node is not None else None + process_default(default_node.text, self.encoding) + if default_node is not None + else None ) # extract inline or following docstring if there is no semicolon @@ -289,14 +323,18 @@ def _parse_argument_section(self, argblock_node): prev_sib = docstring_node.prev_named_sibling if get_row(docstring_node.start_point) == get_row(prev_sib.end_point): # if the docstring is on the same line as the end of the definition only take the inline part - docstring = process_text_into_docstring(docstring_node.text) + docstring = process_text_into_docstring( + docstring_node.text, self.encoding + ) docstring = docstring.split("\n")[0] elif ( get_row(docstring_node.start_point) - get_row(prev_sib.end_point) <= 1 ): # Otherwise take the whole docstring - docstring = process_text_into_docstring(docstring_node.text) + docstring = process_text_into_docstring( + docstring_node.text, self.encoding + ) # extract inline or following docstring if there _is_ a semicolon. # this is only done if we didn't already find a docstring with the previous approach @@ -307,11 +345,15 @@ def _parse_argument_section(self, argblock_node): elif next_node.type == "comment": if get_row(next_node.start_point) == get_row(arg.end_point): # if the docstring is on the same line as the end of the definition only take the inline part - docstring = process_text_into_docstring(next_node.text) + docstring = process_text_into_docstring( + next_node.text, self.encoding + ) docstring = docstring.split("\n")[0] elif get_row(next_node.start_point) - get_row(arg.end_point) <= 1: # Otherwise take the whole docstring - docstring = process_text_into_docstring(next_node.text) + docstring = process_text_into_docstring( + next_node.text, self.encoding + ) # override docstring with prior if exists prev_node = arg.prev_named_sibling @@ -324,7 +366,7 @@ def _parse_argument_section(self, argblock_node): # if the first line of the comment is the same as a # previous argument. if get_row(arg.start_point) - get_row(prev_node.end_point) <= 1: - ds = process_text_into_docstring(prev_node.text) + ds = process_text_into_docstring(prev_node.text, self.encoding) prev_arg = prev_node.prev_named_sibling if prev_arg is not None and prev_arg.type == "property": if get_row(prev_node.start_point) == get_row( @@ -335,7 +377,9 @@ def _parse_argument_section(self, argblock_node): docstring = ds else: if get_row(arg.start_point) - get_row(prev_node.end_point) <= 1: - docstring = process_text_into_docstring(prev_node.text) + docstring = process_text_into_docstring( + prev_node.text, self.encoding + ) elif prev_node.type == "property": # The previous argumentnode may have eaten our comment # check for it a trailing comment. If it is not there @@ -346,7 +390,9 @@ def _parse_argument_section(self, argblock_node): # before ours and trim the first line if it on the same # line as prev property. if get_row(arg.start_point) - get_row(prev_comment.end_point) <= 1: - ds = process_text_into_docstring(prev_comment.text) + ds = process_text_into_docstring( + prev_comment.text, self.encoding + ) if get_row(prev_comment.start_point) == get_row( prev_comment.prev_named_sibling.end_point ): @@ -361,7 +407,7 @@ def _parse_argument_section(self, argblock_node): # Here we trust that the person is giving us valid matlab. if "Output" in attrs.keys(): - arg_loc = self.outputs + arg_loc = self.retv else: arg_loc = self.args if len(name) == 1: @@ -383,17 +429,20 @@ def _parse_attributes(self, attrs_nodes): if attrs_nodes is not None: for attr_node in attrs_nodes: _, attr_match = q_attributes.matches(attr_node)[0] - name = attr_match.get("name").text.decode("utf-8") + name = attr_match.get("name").text.decode(self.encoding) value_node = attr_match.get("value") attrs[name] = ( - value_node.text.decode("utf-8") if value_node is not None else None + value_node.text.decode(self.encoding) + if value_node is not None + else None ) return attrs class MatClassParser: - def __init__(self, root_node): + def __init__(self, root_node, encoding): # DATA + self.encoding = encoding self.name = "" self.supers = [] self.attrs = {} @@ -420,7 +469,7 @@ def __init__(self, root_node): for super_node in supers_nodes: _, super_match = q_supers.matches(super_node)[0] super_cls = tuple( - [sec.text.decode("utf-8") for sec in super_match.get("secs")] + [sec.text.decode(self.encoding) for sec in super_match.get("secs")] ) self.supers.append(super_cls) @@ -429,7 +478,9 @@ def __init__(self, root_node): if docstring_node is not None: prev_node = docstring_node.prev_sibling if get_row(docstring_node.start_point) - get_row(prev_node.end_point) <= 1: - self.docstring = process_text_into_docstring(docstring_node.text) + self.docstring = process_text_into_docstring( + docstring_node.text, self.encoding + ) prop_matches = q_properties.matches(self.cls) method_matches = q_methods.matches(self.cls) @@ -457,28 +508,32 @@ def _parse_property_section(self, props_match): _, prop_match = q_property.matches(prop)[0] # extract name (this is always available so no need for None check) - name = prop_match.get("name").text.decode("utf-8") + name = prop_match.get("name").text.decode(self.encoding) # extract dims list dims_list = prop_match.get("dims") dims = None if dims_list is not None: - dims = tuple([dim.text.decode("utf-8") for dim in dims_list]) + dims = tuple([dim.text.decode(self.encoding) for dim in dims_list]) # extract type type_node = prop_match.get("type") - typename = type_node.text.decode("utf-8") if type_node is not None else None + typename = ( + type_node.text.decode(self.encoding) if type_node is not None else None + ) # extract validator functions vf_list = prop_match.get("validator_functions") vfs = None if vf_list is not None: - vfs = [vf.text.decode("utf-8") for vf in vf_list] + vfs = [vf.text.decode(self.encoding) for vf in vf_list] # extract default default_node = prop_match.get("default") default = ( - process_default(default_node.text) if default_node is not None else None + process_default(default_node.text, self.encoding) + if default_node is not None + else None ) # extract inline or following docstring if there is no semicolon @@ -491,29 +546,37 @@ def _parse_property_section(self, props_match): prev_sib = docstring_node.prev_named_sibling if get_row(docstring_node.start_point) == get_row(prev_sib.end_point): # if the docstring is on the same line as the end of the definition only take the inline part - docstring = process_text_into_docstring(docstring_node.text) + docstring = process_text_into_docstring( + docstring_node.text, self.encoding + ) docstring = docstring.split("\n")[0] elif ( get_row(docstring_node.start_point) - get_row(prev_sib.end_point) <= 1 ): # Otherwise take the whole docstring - docstring = process_text_into_docstring(docstring_node.text) + docstring = process_text_into_docstring( + docstring_node.text, self.encoding + ) # extract inline or following docstring if there _is_ a semicolon. # this is only done if we didn't already find a docstring with the previous approach next_node = prop.next_named_sibling - if next_node is None or docstring is not None: + if next_node is None or docstring != "": # Nothing to be done. pass elif next_node.type == "comment": if get_row(next_node.start_point) == get_row(prop.end_point): # if the docstring is on the same line as the end of the definition only take the inline part - docstring = process_text_into_docstring(next_node.text) + docstring = process_text_into_docstring( + next_node.text, self.encoding + ) docstring = docstring.split("\n")[0] elif get_row(next_node.start_point) - get_row(prop.end_point) <= 1: # Otherwise take the whole docstring - docstring = process_text_into_docstring(next_node.text) + docstring = process_text_into_docstring( + next_node.text, self.encoding + ) # override docstring with prior if exists prev_node = prop.prev_named_sibling @@ -526,18 +589,21 @@ def _parse_property_section(self, props_match): # if the first line of the comment is the same as a # previous property. if get_row(prop.start_point) - get_row(prev_node.end_point) <= 1: - ds = process_text_into_docstring(prev_node.text) + ds = process_text_into_docstring(prev_node.text, self.encoding) prev_prop = prev_node.prev_named_sibling if prev_prop is not None and prev_prop.type == "property": if get_row(prev_node.start_point) == get_row( prev_prop.end_point ): ds = "\n".join(ds.split("\n")[1:]) + if ds: docstring = ds else: if get_row(prop.start_point) - get_row(prev_node.end_point) <= 1: - docstring = process_text_into_docstring(prev_node.text) + docstring = process_text_into_docstring( + prev_node.text, self.encoding + ) elif prev_node.type == "property": # The previous property node may have eaten our comment # check for it a trailing comment. If it is not there @@ -548,7 +614,9 @@ def _parse_property_section(self, props_match): # before ours and trim the first line if it on the same # line as prev property. if get_row(prop.start_point) - get_row(prev_comment.end_point) <= 1: - ds = process_text_into_docstring(prev_comment.text) + ds = process_text_into_docstring( + prev_comment.text, self.encoding + ) if get_row(prev_comment.start_point) == get_row( prev_comment.prev_named_sibling.end_point ): @@ -577,7 +645,11 @@ def _parse_method_section(self, methods_match): attrs_nodes = methods_match.get("attrs") attrs = self._parse_attributes(attrs_nodes) for method in methods: - parsed_function = MatFunctionParser(method) + is_set_get = q_get_set.matches(method) + # Skip getter and setter + if len(is_set_get) > 0: + continue + parsed_function = MatFunctionParser(method, self.encoding) self.methods[parsed_function.name] = parsed_function self.methods[parsed_function.name].attrs = attrs @@ -587,10 +659,10 @@ def _parse_enum_section(self, enums_match): return for enum in enums: _, enum_match = q_enum.matches(enum)[0] - name = enum_match.get("name").text.decode("utf-8") + name = enum_match.get("name").text.decode(self.encoding) arg_nodes = enum_match.get("args") if arg_nodes is not None: - args = [arg.text.decode("utf-8") for arg in arg_nodes] + args = [arg.text.decode(self.encoding) for arg in arg_nodes] else: args = None @@ -600,11 +672,15 @@ def _parse_enum_section(self, enums_match): if next_node is not None and next_node.type == "comment": if get_row(next_node.start_point) == get_row(enum.end_point): # if the docstring is on the same line as the end of the definition only take the inline part - docstring = process_text_into_docstring(next_node.text) + docstring = process_text_into_docstring( + next_node.text, self.encoding + ) docstring = docstring.split("\n")[0] elif get_row(next_node.start_point) - get_row(enum.end_point) <= 1: # Otherwise take the whole docstring - docstring = process_text_into_docstring(next_node.text) + docstring = process_text_into_docstring( + next_node.text, self.encoding + ) # override docstring with prior if exists prev_node = enum.prev_named_sibling @@ -617,7 +693,7 @@ def _parse_enum_section(self, enums_match): # if the first line of the comment is the same as a # previous enum. if get_row(enum.start_point) - get_row(prev_node.end_point) <= 1: - ds = process_text_into_docstring(prev_node.text) + ds = process_text_into_docstring(prev_node.text, self.encoding) prev_enum = prev_node.prev_named_sibling if prev_enum is not None and prev_enum.type == "enum": if get_row(prev_node.start_point) == get_row( @@ -628,7 +704,9 @@ def _parse_enum_section(self, enums_match): docstring = ds else: if get_row(enum.start_point) - get_row(prev_node.end_point) <= 1: - docstring = process_text_into_docstring(prev_node.text) + docstring = process_text_into_docstring( + prev_node.text, self.encoding + ) # After all that if our docstring is empty then we have none if docstring.strip() == "": docstring == None @@ -644,7 +722,7 @@ def _parse_event_section(self, events_match): if events is None: return for event in events: - name = event.text.decode("utf-8") + name = event.text.decode(self.encoding) docstring = "" # look forward for docstring @@ -652,11 +730,15 @@ def _parse_event_section(self, events_match): if next_node is not None and next_node.type == "comment": if get_row(next_node.start_point) == get_row(event.end_point): # if the docstring is on the same line as the end of the definition only take the inline part - docstring = process_text_into_docstring(next_node.text) + docstring = process_text_into_docstring( + next_node.text, self.encoding + ) docstring = docstring.split("\n")[0] elif get_row(next_node.start_point) - get_row(event.end_point) <= 1: # Otherwise take the whole docstring - docstring = process_text_into_docstring(next_node.text) + docstring = process_text_into_docstring( + next_node.text, self.encoding + ) # override docstring with prior if exists prev_node = event.prev_named_sibling @@ -669,7 +751,7 @@ def _parse_event_section(self, events_match): # if the first line of the comment is the same as a # previous event. if get_row(event.start_point) - get_row(prev_node.end_point) <= 1: - ds = process_text_into_docstring(prev_node.text) + ds = process_text_into_docstring(prev_node.text, self.encoding) prev_event = prev_node.prev_named_sibling if prev_event is not None and prev_event.type == "identifier": if get_row(prev_node.start_point) == get_row( @@ -680,7 +762,9 @@ def _parse_event_section(self, events_match): docstring = ds else: if get_row(event.start_point) - get_row(prev_node.end_point) <= 1: - docstring = process_text_into_docstring(prev_node.text) + docstring = process_text_into_docstring( + prev_node.text, self.encoding + ) # After all that if our docstring is empty then we have none if docstring.strip() == "": docstring == None @@ -693,13 +777,20 @@ def _parse_attributes(self, attrs_nodes): attrs = {} if attrs_nodes is not None: for attr_node in attrs_nodes: - print(attr_node.sexp()) _, attr_match = q_attributes.matches(attr_node)[0] - name = attr_match.get("name").text.decode("utf-8") + name = attr_match.get("name").text.decode(self.encoding) value_node = attr_match.get("value") - attrs[name] = ( - value_node.text.decode("utf-8") if value_node is not None else None - ) + rhs_node = attr_match.get("rhs") + if rhs_node is not None: + if rhs_node.type == "cell": + attrs[name] = [ + vn.text.decode(self.encoding) for vn in value_node + ] + else: + attrs[name] = value_node[0].text.decode(self.encoding) + else: + attrs[name] = None + return attrs @@ -715,7 +806,7 @@ def _parse_attributes(self, attrs_nodes): data = f.read() tree = parser.parse(data) - class_parser = MatClassParser(tree.root_node) + class_parser = MatClassParser(tree.root_node, self.encoding) # fun_parser = MatFunctionParser(tree.root_node) import pdb diff --git a/sphinxcontrib/mat_types.py b/sphinxcontrib/mat_types.py index 5f5210e..cf9a8c2 100644 --- a/sphinxcontrib/mat_types.py +++ b/sphinxcontrib/mat_types.py @@ -20,6 +20,7 @@ from sphinxcontrib.mat_tree_sitter_parser import ( MatClassParser, MatFunctionParser, + MatScriptParser, ML_LANG, ) import tree_sitter_matlab as tsml @@ -548,19 +549,17 @@ def isClass(tree): name, modname, ) - return MatClass(name, modname, tree.root_node) + return MatClass(name, modname, tree.root_node, encoding) elif isFunction(tree): logger.debug( "[sphinxcontrib-matlabdomain] parsing function %s from %s.", name, modname, ) - return MatFunction(name, modname, tree.root_node) + return MatFunction(name, modname, tree.root_node, encoding) else: - pass - # it's a script file retoken with header comment - # tks = list(MatlabLexer().get_tokens(full_code)) - # return MatScript(name, modname, toks) + return MatScript(name, modname, tree.root_node, encoding) + return None @staticmethod @@ -876,15 +875,15 @@ class MatFunction(MatObject): :type tokens: list """ - def __init__(self, name, modname, tokens): + def __init__(self, name, modname, tokens, encoding): super(MatFunction, self).__init__(name) - parsed_function = MatFunctionParser(tokens) + parsed_function = MatFunctionParser(tokens, encoding) #: Path of folder containing :class:`MatObject`. self.module = modname #: docstring self.docstring = parsed_function.docstring #: output args - self.retv = parsed_function.outputs + self.retv = parsed_function.retv #: input args self.args = parsed_function.args #: remaining tokens after main function is parsed @@ -925,9 +924,9 @@ class MatClass(MatMixin, MatObject): :type tokens: list """ - def __init__(self, name, modname, tokens): + def __init__(self, name, modname, tokens, encoding): super(MatClass, self).__init__(name) - parsed_class = MatClassParser(tokens) + parsed_class = MatClassParser(tokens, encoding) #: Path of folder containing :class:`MatObject`. self.module = modname #: dictionary of class attributes @@ -974,105 +973,6 @@ def link(self, env, name=None): else: return f":class:`{target}`" - def attributes(self, idx, attr_types): - """ - Retrieve MATLAB class, property and method attributes. - """ - attr_dict = {} - idx += self._blanks(idx) # skip blanks - # class, property & method "attributes" start with parenthesis - if self._tk_eq(idx, (Token.Punctuation, "(")): - idx += 1 - # closing parenthesis terminates attributes - while self._tk_ne(idx, (Token.Punctuation, ")")): - idx += self._blanks(idx) # skip blanks - - k, attr_name = self.tokens[idx] # split token key, value - if k is Token.Name and attr_name in attr_types: - attr_dict[attr_name] = True # add attibute to dictionary - idx += 1 - elif k is Token.Name: - logger.warning( - "[sphinxcontrib-matlabdomain] Unexpected class attribute: '%s'. " - " In '%s.%s'.", - str(self.tokens[idx][1]), - self.module, - self.name, - ) - idx += 1 - - idx += self._blanks(idx) # skip blanks - - # Continue if attribute is assigned a boolean value - if self.tokens[idx][0] == Token.Name.Builtin: - idx += 1 - continue - - # continue to next attribute separated by commas - if self._tk_eq(idx, (Token.Punctuation, ",")): - idx += 1 - continue - # attribute values - elif self._tk_eq(idx, (Token.Punctuation, "=")): - idx += 1 - idx += self._blanks(idx) # skip blanks - k, attr_val = self.tokens[idx] # split token key, value - if k is Token.Name and attr_val in ["true", "false"]: - # logical value - if attr_val == "false": - attr_dict[attr_name] = False - idx += 1 - elif k is Token.Name or self._tk_eq(idx, (Token.Text, "?")): - # concatenate enumeration or meta class - enum_or_meta = self.tokens[idx][1] - idx += 1 - while ( - self._tk_ne(idx, (Token.Text, " ")) - and self._tk_ne(idx, (Token.Text, "\t")) - and self._tk_ne(idx, (Token.Punctuation, ",")) - and self._tk_ne(idx, (Token.Punctuation, ")")) - ): - enum_or_meta += self.tokens[idx][1] - idx += 1 - if self._tk_ne(idx, (Token.Punctuation, ")")): - idx += 1 - attr_dict[attr_name] = enum_or_meta - # cell array of values - elif self._tk_eq(idx, (Token.Punctuation, "{")): - idx += 1 - # closing curly braces terminate cell array - attr_dict[attr_name] = [] - while self._tk_ne(idx, (Token.Punctuation, "}")): - idx += self._blanks(idx) # skip blanks - # concatenate attr value string - attr_val = "" - # TODO: use _blanks or _indent instead - while self._tk_ne( - idx, (Token.Punctuation, ",") - ) and self._tk_ne(idx, (Token.Punctuation, "}")): - attr_val += self.tokens[idx][1] - idx += 1 - if self._tk_eq(idx, (Token.Punctuation, ",")): - idx += 1 - if attr_val: - attr_dict[attr_name].append(attr_val) - idx += 1 - elif ( - self.tokens[idx][0] == Token.Literal.String - and self.tokens[idx + 1][0] == Token.Literal.String - ): - # String - attr_val += self.tokens[idx][1] + self.tokens[idx + 1][1] - idx += 2 - attr_dict[attr_name] = attr_val.strip("'") - - idx += self._blanks(idx) # skip blanks - # continue to next attribute separated by commas - if self._tk_eq(idx, (Token.Punctuation, ",")): - idx += 1 - idx += 1 # end of class attributes - return attr_dict, idx - @property def __module__(self): return self.module @@ -1194,49 +1094,15 @@ def __doc__(self): class MatScript(MatObject): - def __init__(self, name, modname, tks): + def __init__(self, name, modname, tks, encoding): super(MatScript, self).__init__(name) + parsed_script = MatScriptParser(tks, encoding) #: Path of folder containing :class:`MatScript`. self.module = modname #: List of tokens parsed from mfile by Pygments. self.tokens = tks #: docstring - self.docstring = "" - #: remaining tokens after main function is parsed - self.rem_tks = None - - tks = copy(self.tokens) # make a copy of tokens - tks.reverse() # reverse in place for faster popping, stacks are LiLo - skip_whitespace(tks) - # ===================================================================== - # docstring - try: - docstring = tks.pop() - # Skip any statements before first documentation header - while docstring and docstring[0] is not Token.Comment: - docstring = tks.pop() - except IndexError: - docstring = None - while docstring and docstring[0] is Token.Comment: - self.docstring += docstring[1].lstrip("%") - # Get newline if it exists and append to docstring - try: - wht = tks.pop() # We expect a newline - except IndexError: - break - if wht[0] in (Token.Text, Token.Text.Whitespace) and wht[1] == "\n": - self.docstring += "\n" - # Skip whitespace - try: - wht = tks.pop() # We expect a newline - except IndexError: - break - while wht in list(zip((Token.Text,) * 3, (" ", "\t"))): - try: - wht = tks.pop() - except IndexError: - break - docstring = wht # check if Token is Comment + self.docstring = parsed_script.docstring @property def __doc__(self): diff --git a/tests/test_parse_mfile.py b/tests/test_parse_mfile.py index b901409..123c2a6 100644 --- a/tests/test_parse_mfile.py +++ b/tests/test_parse_mfile.py @@ -24,7 +24,7 @@ def test_ClassExample(): assert obj.name == "ClassExample" assert ( obj.docstring - == " test class methods\n\n:param a: the input to :class:`ClassExample`" + == "test class methods\n\n:param a: the input to :class:`ClassExample`" ) mymethod = obj.methods["mymethod"] assert mymethod.name == "mymethod" @@ -105,7 +105,7 @@ def test_no_docstring(): assert obj.name == "f_no_docstring" assert list(obj.retv.keys()) == ["y"] assert list(obj.args.keys()) == [] - assert obj.docstring == "" + assert obj.docstring is None def test_no_output(): @@ -183,7 +183,7 @@ def test_ClassWithFunctionArguments(): mymethod = obj.methods["mymethod"] assert mymethod.name == "mymethod" assert list(mymethod.retv.keys()) == ["c"] - assert mymethod.args.keys() == ["obj", "b"] + assert list(mymethod.args.keys()) == ["obj", "b"] assert ( mymethod.docstring == "a method in :class:`ClassWithFunctionArguments`\n\n:param b: an input to :meth:`mymethod`" @@ -248,15 +248,14 @@ def test_script_with_comment_header(): assert ( obj.docstring == """This is a Comment Header - Copyright (C) , by - - Some descriptions ... +Copyright (C) , by - This header and all further comments above the first command line - of the script will be ignored by the documentation system. +Some descriptions ... - Lisence (GPL, BSD, etc.) +This header and all further comments above the first command line +of the script will be ignored by the documentation system. +Lisence (GPL, BSD, etc.) """ ) @@ -269,15 +268,14 @@ def test_script_with_comment_header_2(): assert ( obj.docstring == """This is a Comment Header - Copyright (C) , by +Copyright (C) , by - Some descriptions ... +Some descriptions ... - This header and all further comments above the first command line - of the script will be ignored by the documentation system. - - Lisence (GPL, BSD, etc.) +This header and all further comments above the first command line +of the script will be ignored by the documentation system. +Lisence (GPL, BSD, etc.) """ ) @@ -290,8 +288,7 @@ def test_script_with_comment_header_3(): assert ( obj.docstring == """This is a Comment Header with empty lines above - and many line comments. - +and many line comments. """ ) @@ -304,8 +301,7 @@ def test_script_with_comment_header_4(): assert ( obj.docstring == """This is a Comment Header with a single instruction above - and many line comments. - +and many line comments. """ ) @@ -320,26 +316,34 @@ def test_PropTypeOld(): "docstring": None, "attrs": {}, "default": "'none'", - "specs": "@char", - }, + "size": None, + "type": "char", + "validators": None, + }, # 'type': ['char'] "pos": { "docstring": None, "attrs": {}, "default": "zeros(3,1)", - "specs": "@double vector", - }, + "size": None, + "type": "vector", + "validators": None, + }, # 'type': ['double', 'vector'], "rotm": { "docstring": None, "attrs": {}, "default": "zeros(3,3)", - "specs": "@double matrix", - }, + "size": None, + "type": "matrix", + "validators": None, + }, # 'type': ['double', 'matrix'], "idx": { "docstring": None, "attrs": {}, "default": "0", - "specs": "@uint8 scalar", - }, + "size": None, + "type": "scalar", + "validators": None, + }, # 'type': ['uint8', 'scalar'], } @@ -361,7 +365,7 @@ def test_ClassWithMethodAttributes(): assert obj.methods["testPublic"].attrs == {"Access": "public"} assert obj.methods["testProtected"].attrs == {"Access": "protected"} assert obj.methods["testPrivate1"].attrs == {"Access": "private"} - assert obj.methods["testPrivate2"].attrs == {"Access": "private"} + assert obj.methods["testPrivate2"].attrs == {"Access": "'private'"} assert obj.methods["testHidden"].attrs == {"Hidden": None} assert obj.methods["testStatic"].attrs == {"Static": None} assert obj.methods["testFriend1"].attrs == {"Access": "?OtherClass"} @@ -618,7 +622,14 @@ def test_ClassWithGetterSetter(): assert obj.name == "ClassWithGetterSetter" assert list(obj.methods.keys()) == ["ClassWithGetterSetter"] assert obj.properties == { - "a": {"docstring": "A nice property", "attrs": {}, "default": None} + "a": { + "docstring": "A nice property", + "attrs": {}, + "default": None, + "size": None, + "type": None, + "validators": None, + } } @@ -631,7 +642,14 @@ def test_ClassWithDoubleQuotedString(): assert obj.name == "ClassWithDoubleQuotedString" assert set(obj.methods.keys()) == set(["ClassWithDoubleQuotedString", "method1"]) assert obj.properties == { - "Property1": {"docstring": None, "attrs": {}, "default": None, "specs": ""} + "Property1": { + "docstring": None, + "attrs": {}, + "default": None, + "size": None, + "type": None, + "validators": None, + } } @@ -903,7 +921,7 @@ def test_ClassWithTests(): assert obj.bases == [("matlab", "unittest", "TestCase")] assert "testRunning" in obj.methods testRunning = obj.methods["testRunning"] - assert testRunning.attrs["TestTags"] == ["{'Unit'}"] + assert testRunning.attrs["TestTags"] == ["'Unit'"] if __name__ == "__main__": From e12307bde0f7743c116677de9f7ed8cb89b9c537 Mon Sep 17 00:00:00 2001 From: Anton Pozharskiy Date: Thu, 15 Aug 2024 23:04:39 +0200 Subject: [PATCH 27/45] fix a _lot_ of autodoc --- sphinxcontrib/mat_documenters.py | 7 ++++--- sphinxcontrib/mat_types.py | 28 +++++++++++++++------------- 2 files changed, 19 insertions(+), 16 deletions(-) diff --git a/sphinxcontrib/mat_documenters.py b/sphinxcontrib/mat_documenters.py index 35fff8c..4c58cca 100644 --- a/sphinxcontrib/mat_documenters.py +++ b/sphinxcontrib/mat_documenters.py @@ -1380,10 +1380,11 @@ def format_args(self): is_ctor = self.object.cls.name == self.object.name if self.object.args: - if self.object.args[0] in ("obj", "self") and not is_ctor: - return "(" + ", ".join(self.object.args[1:]) + ")" + arglist = list(self.object.args.keys()) + if arglist[0] in ("obj", "self") and not is_ctor: + return "(" + ", ".join(arglist[1:]) + ")" else: - return "(" + ", ".join(self.object.args) + ")" + return "(" + ", ".join(arglist) + ")" def document_members(self, all_members=False): pass diff --git a/sphinxcontrib/mat_types.py b/sphinxcontrib/mat_types.py index cf9a8c2..cf55766 100644 --- a/sphinxcontrib/mat_types.py +++ b/sphinxcontrib/mat_types.py @@ -938,7 +938,10 @@ def __init__(self, name, modname, tokens, encoding): #: dictionary of class properties self.properties = parsed_class.properties #: dictionary of class methods - self.methods = parsed_class.methods + self.methods = { + name: MatMethod(name, parsed_fun, modname, self) + for (name, parsed_fun) in parsed_class.methods.items() + } #: self.enumerations = parsed_class.enumerations #: remaining tokens after main class definition is parsed @@ -1066,24 +1069,23 @@ def __doc__(self): return self.docstring class MatMethod(MatFunction): - def __init__(self, modname, tks, cls, attrs): - # set name to None - super(MatMethod, self).__init__(None, modname, tks) + def __init__(self, name, parsed_function, modname, cls): + self.name = name + #: Path of folder containing :class:`MatObject`. + self.module = modname + #: docstring + self.docstring = parsed_function.docstring + #: output args + self.retv = parsed_function.retv + #: input args + self.args = parsed_function.args self.cls = cls - self.attrs = attrs + self.attrs = parsed_function.attrs def ref_role(self): """Returns role to use for references to this object (e.g. when generating auto-links)""" return "meth" - def skip_tokens(self): - # Number of tokens to skip in `MatClass` - num_rem_tks = len(self.rem_tks) - len_meth = len(self.tokens) - num_rem_tks - self.tokens = self.tokens[:-num_rem_tks] - self.rem_tks = None - return len_meth - @property def __module__(self): return self.module From f1ea4f588a334813a82e22bea66fb14e6bbede8c Mon Sep 17 00:00:00 2001 From: Anton Edvinovich Pozharskiy Date: Fri, 16 Aug 2024 13:08:45 +0200 Subject: [PATCH 28/45] temporarily point to tree-sitter-matlab branch on apozharski fork --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 78acc7e..5d459e6 100644 --- a/setup.py +++ b/setup.py @@ -8,7 +8,7 @@ requires = [ "Sphinx>=4.0.0", "Pygments>=2.0.1", - "tree-sitter-matlab>=1.0.1", + "tree-sitter-matlab @ git+https://github.com/apozharski/tree-sitter-matlab.git", "tree-sitter>=0.21.3", ] From 64b2ce236d036ddd9fc7556026f57bba8857fb24 Mon Sep 17 00:00:00 2001 From: Anton Edvinovich Pozharskiy Date: Fri, 16 Aug 2024 15:05:35 +0200 Subject: [PATCH 29/45] fix old property syntax + update tests --- sphinxcontrib/mat_tree_sitter_parser.py | 71 ++++++++++++++++++------- tests/test_parse_mfile.py | 12 ++--- 2 files changed, 59 insertions(+), 24 deletions(-) diff --git a/sphinxcontrib/mat_tree_sitter_parser.py b/sphinxcontrib/mat_tree_sitter_parser.py index 43828e4..4ef6285 100644 --- a/sphinxcontrib/mat_tree_sitter_parser.py +++ b/sphinxcontrib/mat_tree_sitter_parser.py @@ -4,7 +4,7 @@ import re # rpath = "../../../syscop/software/nosnoc/+nosnoc/Options.m" -rpath = "/home/anton/tools/matlabdomain/tests/test_data/ClassWithGetterSetter.m" +rpath = "/home/anton/tools/matlabdomain/tests/test_data/PropTypeOld.m" # rpath = "/home/anton/tools/matlabdomain/tests/test_data/f_with_dummy_argument.m" tree_sitter_ver = tuple([int(sec) for sec in version("tree_sitter").split(".")]) @@ -56,7 +56,7 @@ (attributes [(attribute) @attrs _]+ )? - [(property) @properties _]+ + [(property) @properties (old_property) @properties _]+ ) @prop_block """ ) @@ -104,6 +104,17 @@ """ ) +q_old_property = ML_LANG.query( + """ + (old_property name: (identifier) @name + (identifier) @type + (old_property_type)? @size_type + (default_value)? @default + (comment)? @docstring + ) +""" +) + q_enum = ML_LANG.query( """(enum . @@ -504,17 +515,47 @@ def _parse_property_section(self, props_match): attrs_nodes = props_match.get("attrs") attrs = self._parse_attributes(attrs_nodes) for prop in properties: - # match property to extract details - _, prop_match = q_property.matches(prop)[0] + if prop.type == "property": + # match property to extract details + _, prop_match = q_property.matches(prop)[0] + + # extract name (this is always available so no need for None check) + name = prop_match.get("name").text.decode(self.encoding) + + # extract dims list + dims_list = prop_match.get("dims") + dims = None + if dims_list is not None: + dims = tuple([dim.text.decode(self.encoding) for dim in dims_list]) + + # extract validator functions + vf_list = prop_match.get("validator_functions") + vfs = None + if vf_list is not None: + vfs = [vf.text.decode(self.encoding) for vf in vf_list] + else: + # match property to extract details + _, prop_match = q_old_property.matches(prop)[0] - # extract name (this is always available so no need for None check) - name = prop_match.get("name").text.decode(self.encoding) + # extract name (this is always available so no need for None check) + name = prop_match.get("name").text.decode(self.encoding) - # extract dims list - dims_list = prop_match.get("dims") - dims = None - if dims_list is not None: - dims = tuple([dim.text.decode(self.encoding) for dim in dims_list]) + # extract size type + size_type = prop_match.get("size_type") + import pdb + + pdb.set_trace() + if size_type is None: + dims = None + elif size_type.text == b"scalar": + dims = ("1", "1") + elif size_type.text == b"vector": + dims = (":", "1") + elif size_type.text == b"matrix": + dims = (":", ":") + + # No validator functions + vfs = None # extract type type_node = prop_match.get("type") @@ -522,12 +563,6 @@ def _parse_property_section(self, props_match): type_node.text.decode(self.encoding) if type_node is not None else None ) - # extract validator functions - vf_list = prop_match.get("validator_functions") - vfs = None - if vf_list is not None: - vfs = [vf.text.decode(self.encoding) for vf in vf_list] - # extract default default_node = prop_match.get("default") default = ( @@ -806,7 +841,7 @@ def _parse_attributes(self, attrs_nodes): data = f.read() tree = parser.parse(data) - class_parser = MatClassParser(tree.root_node, self.encoding) + class_parser = MatClassParser(tree.root_node, "utf-8") # fun_parser = MatFunctionParser(tree.root_node) import pdb diff --git a/tests/test_parse_mfile.py b/tests/test_parse_mfile.py index 123c2a6..32b409b 100644 --- a/tests/test_parse_mfile.py +++ b/tests/test_parse_mfile.py @@ -324,24 +324,24 @@ def test_PropTypeOld(): "docstring": None, "attrs": {}, "default": "zeros(3,1)", - "size": None, - "type": "vector", + "size": (":", "1"), + "type": "double", "validators": None, }, # 'type': ['double', 'vector'], "rotm": { "docstring": None, "attrs": {}, "default": "zeros(3,3)", - "size": None, - "type": "matrix", + "size": (":", ":"), + "type": "double", "validators": None, }, # 'type': ['double', 'matrix'], "idx": { "docstring": None, "attrs": {}, "default": "0", - "size": None, - "type": "scalar", + "size": ("1", "1"), + "type": "uint8", "validators": None, }, # 'type': ['uint8', 'scalar'], } From a892a14cd914cd5d9bbb7bec66b80288dba72416 Mon Sep 17 00:00:00 2001 From: Anton Edvinovich Pozharskiy Date: Fri, 16 Aug 2024 17:16:34 +0200 Subject: [PATCH 30/45] fixing test_autodoc and test_matlabify, only comment and line continuation issues remain --- sphinxcontrib/mat_tree_sitter_parser.py | 118 +++++++++++++++++------- sphinxcontrib/mat_types.py | 7 +- sphinxcontrib/matlab.py | 1 - tests/test_matlabify.py | 101 +++++++++++--------- tests/test_parse_mfile.py | 2 +- 5 files changed, 143 insertions(+), 86 deletions(-) diff --git a/sphinxcontrib/mat_tree_sitter_parser.py b/sphinxcontrib/mat_tree_sitter_parser.py index 4ef6285..967b283 100644 --- a/sphinxcontrib/mat_tree_sitter_parser.py +++ b/sphinxcontrib/mat_tree_sitter_parser.py @@ -187,7 +187,7 @@ q_script = ML_LANG.query( """ (source_file - (comment) @docstring + (comment)? @docstring ) """ ) @@ -215,12 +215,12 @@ def get_row(point): def process_text_into_docstring(text, encoding): - docstring = text.decode(encoding) + docstring = text.decode(encoding, errors="backslashreplace") return re.sub(re_percent_remove, "", docstring) def process_default(text, encoding): - default = text.decode(encoding) + default = text.decode(encoding, errors="backslashreplace") return re.sub(re_assign_remove, "", default) @@ -228,15 +228,18 @@ class MatScriptParser: def __init__(self, root_node, encoding): """Parse m script""" self.encoding = encoding - _, script_match = q_script.matches(root_node)[0] - docstring_node = script_match.get("docstring") - if docstring_node is not None: - self.docstring = process_text_into_docstring( - docstring_node.text, self.encoding - ) + script_matches = q_script.matches(root_node) + if script_matches: + _, script_match = q_script.matches(root_node)[0] + docstring_node = script_match.get("docstring") + if docstring_node is not None: + self.docstring = process_text_into_docstring( + docstring_node.text, self.encoding + ) + else: + self.docstring = None else: self.docstring = None - print(self.docstring) class MatFunctionParser: @@ -244,13 +247,18 @@ def __init__(self, root_node, encoding): """Parse Function definition""" self.encoding = encoding _, fun_match = q_fun.matches(root_node)[0] - self.name = fun_match.get("name").text.decode(self.encoding) + self.name = fun_match.get("name").text.decode( + self.encoding, errors="backslashreplace" + ) # Get outputs (possibly more than one) self.retv = {} output_nodes = fun_match.get("outputs") if output_nodes is not None: - retv = [output.text.decode(self.encoding) for output in output_nodes] + retv = [ + output.text.decode(self.encoding, errors="backslashreplace") + for output in output_nodes + ] for output in retv: self.retv[output] = {} @@ -258,7 +266,10 @@ def __init__(self, root_node, encoding): self.args = {} arg_nodes = fun_match.get("params") if arg_nodes is not None: - args = [arg.text.decode(self.encoding) for arg in arg_nodes] + args = [ + arg.text.decode(self.encoding, errors="backslashreplace") + for arg in arg_nodes + ] for arg in args: self.args[arg] = {} @@ -296,25 +307,38 @@ def _parse_argument_section(self, argblock_node): _, arg_match = q_arg.matches(arg)[0] # extract name (this is always available so no need for None check) - name = [name.text.decode(self.encoding) for name in arg_match.get("name")] + name = [ + name.text.decode(self.encoding, errors="backslashreplace") + for name in arg_match.get("name") + ] # extract dims list dims_list = arg_match.get("dims") dims = None if dims_list is not None: - dims = tuple([dim.text.decode(self.encoding) for dim in dims_list]) + dims = tuple( + [ + dim.text.decode(self.encoding, errors="backslashreplace") + for dim in dims_list + ] + ) # extract type type_node = arg_match.get("type") typename = ( - type_node.text.decode(self.encoding) if type_node is not None else None + type_node.text.decode(self.encoding, errors="backslashreplace") + if type_node is not None + else None ) # extract validator functions vf_list = arg_match.get("validator_functions") vfs = None if vf_list is not None: - vfs = [vf.text.decode(self.encoding) for vf in vf_list] + vfs = [ + vf.text.decode(self.encoding, errors="backslashreplace") + for vf in vf_list + ] # extract default default_node = arg_match.get("default") @@ -440,10 +464,12 @@ def _parse_attributes(self, attrs_nodes): if attrs_nodes is not None: for attr_node in attrs_nodes: _, attr_match = q_attributes.matches(attr_node)[0] - name = attr_match.get("name").text.decode(self.encoding) + name = attr_match.get("name").text.decode( + self.encoding, errors="backslashreplace" + ) value_node = attr_match.get("value") attrs[name] = ( - value_node.text.decode(self.encoding) + value_node.text.decode(self.encoding, errors="backslashreplace") if value_node is not None else None ) @@ -480,7 +506,10 @@ def __init__(self, root_node, encoding): for super_node in supers_nodes: _, super_match = q_supers.matches(super_node)[0] super_cls = tuple( - [sec.text.decode(self.encoding) for sec in super_match.get("secs")] + [ + sec.text.decode(self.encoding, errors="backslashreplace") + for sec in super_match.get("secs") + ] ) self.supers.append(super_cls) @@ -520,31 +549,40 @@ def _parse_property_section(self, props_match): _, prop_match = q_property.matches(prop)[0] # extract name (this is always available so no need for None check) - name = prop_match.get("name").text.decode(self.encoding) + name = prop_match.get("name").text.decode( + self.encoding, errors="backslashreplace" + ) # extract dims list dims_list = prop_match.get("dims") dims = None if dims_list is not None: - dims = tuple([dim.text.decode(self.encoding) for dim in dims_list]) + dims = tuple( + [ + dim.text.decode(self.encoding, errors="backslashreplace") + for dim in dims_list + ] + ) # extract validator functions vf_list = prop_match.get("validator_functions") vfs = None if vf_list is not None: - vfs = [vf.text.decode(self.encoding) for vf in vf_list] + vfs = [ + vf.text.decode(self.encoding, errors="backslashreplace") + for vf in vf_list + ] else: # match property to extract details _, prop_match = q_old_property.matches(prop)[0] # extract name (this is always available so no need for None check) - name = prop_match.get("name").text.decode(self.encoding) + name = prop_match.get("name").text.decode( + self.encoding, errors="backslashreplace" + ) # extract size type size_type = prop_match.get("size_type") - import pdb - - pdb.set_trace() if size_type is None: dims = None elif size_type.text == b"scalar": @@ -560,7 +598,9 @@ def _parse_property_section(self, props_match): # extract type type_node = prop_match.get("type") typename = ( - type_node.text.decode(self.encoding) if type_node is not None else None + type_node.text.decode(self.encoding, errors="backslashreplace") + if type_node is not None + else None ) # extract default @@ -694,10 +734,15 @@ def _parse_enum_section(self, enums_match): return for enum in enums: _, enum_match = q_enum.matches(enum)[0] - name = enum_match.get("name").text.decode(self.encoding) + name = enum_match.get("name").text.decode( + self.encoding, errors="backslashreplace" + ) arg_nodes = enum_match.get("args") if arg_nodes is not None: - args = [arg.text.decode(self.encoding) for arg in arg_nodes] + args = [ + arg.text.decode(self.encoding, errors="backslashreplace") + for arg in arg_nodes + ] else: args = None @@ -757,7 +802,7 @@ def _parse_event_section(self, events_match): if events is None: return for event in events: - name = event.text.decode(self.encoding) + name = event.text.decode(self.encoding, errors="backslashreplace") docstring = "" # look forward for docstring @@ -813,16 +858,21 @@ def _parse_attributes(self, attrs_nodes): if attrs_nodes is not None: for attr_node in attrs_nodes: _, attr_match = q_attributes.matches(attr_node)[0] - name = attr_match.get("name").text.decode(self.encoding) + name = attr_match.get("name").text.decode( + self.encoding, errors="backslashreplace" + ) value_node = attr_match.get("value") rhs_node = attr_match.get("rhs") if rhs_node is not None: if rhs_node.type == "cell": attrs[name] = [ - vn.text.decode(self.encoding) for vn in value_node + vn.text.decode(self.encoding, errors="backslashreplace") + for vn in value_node ] else: - attrs[name] = value_node[0].text.decode(self.encoding) + attrs[name] = value_node[0].text.decode( + self.encoding, errors="backslashreplace" + ) else: attrs[name] = None diff --git a/sphinxcontrib/mat_types.py b/sphinxcontrib/mat_types.py index cf55766..4172743 100644 --- a/sphinxcontrib/mat_types.py +++ b/sphinxcontrib/mat_types.py @@ -444,11 +444,6 @@ def matlabify(objname): # make a full path out of basedir and objname fullpath = os.path.join(MatObject.basedir, objname) # objname fullpath - # Check if path should be ignored - for ignore in MatObject.sphinx_env.config.matlab_ignore_dirs: - if Path(fullpath).is_relative_to(MatObject.basedir, ignore): - return None - logger.debug( f"[sphinxcontrib-matlabdomain] matlabify {package=}, {objname=}, {fullpath=}" ) @@ -994,7 +989,7 @@ def __bases__(self): if isinstance(entity, MatClass) or "@" in name: class_entity_table[name] = entity - for base in self.bases: + for base in bases_.keys(): if base in class_entity_table.keys(): bases_[base] = class_entity_table[base] diff --git a/sphinxcontrib/matlab.py b/sphinxcontrib/matlab.py index bf83a5a..5764f9f 100644 --- a/sphinxcontrib/matlab.py +++ b/sphinxcontrib/matlab.py @@ -879,7 +879,6 @@ def setup(app): app.add_domain(MATLABDomain) # autodoc app.add_config_value("matlab_src_dir", None, "env") - app.add_config_value("matlab_ignore_dirs", [], "env") app.add_config_value("matlab_src_encoding", None, "env") app.add_config_value("matlab_keep_package_prefix", False, "env") app.add_config_value("matlab_show_property_default_value", False, "env") diff --git a/tests/test_matlabify.py b/tests/test_matlabify.py index 1e54fcc..d27cbe9 100644 --- a/tests/test_matlabify.py +++ b/tests/test_matlabify.py @@ -138,12 +138,19 @@ def test_classes(mod): assert isinstance(cls, doc.MatClass) assert cls.getter("__name__") == "ClassInheritHandle" assert cls.getter("__module__") == "test_data" - assert cls.bases == ["handle", "my.super.Class"] + assert cls.bases == [("handle",), ("my", "super", "Class")] assert cls.attrs == {} assert cls.properties == { - "x": {"attrs": {}, "default": None, "docstring": " a property", "specs": ""} + "x": { + "attrs": {}, + "default": None, + "docstring": "a property", + "size": None, + "type": None, + "validators": None, + } } - assert cls.getter("__doc__") == " a handle class\n\n :param x: a variable\n" + assert cls.getter("__doc__") == "a handle class\n\n:param x: a variable" def test_abstract_class(mod): @@ -154,39 +161,43 @@ def test_abstract_class(mod): assert abc.getter("__module__") == "test_data" assert "ClassInheritHandle" in abc.getter("__bases__") assert "ClassExample" in abc.getter("__bases__") - assert abc.bases == ["ClassInheritHandle", "ClassExample"] - assert abc.attrs == {"Abstract": True, "Sealed": True} + assert abc.bases == [("ClassInheritHandle",), ("ClassExample",)] + assert abc.attrs == {"Abstract": None, "Sealed": None} assert abc.properties == { "y": { "default": None, - "docstring": " y variable", + "docstring": "y variable", "attrs": {"GetAccess": "private", "SetAccess": "private"}, - "specs": "", + "size": None, + "type": None, + "validators": None, }, "version": { "default": "'0.1.1-beta'", - "docstring": " version", - "attrs": {"Constant": True}, - "specs": "", + "docstring": "version", + "attrs": {"Constant": None}, + "size": None, + "type": None, + "validators": None, }, } assert ( abc.getter("__doc__") - == " an abstract class\n\n :param y: a variable\n :type y: double\n" + == "an abstract class\n\n:param y: a variable\n:type y: double" ) assert abc.getter("__doc__") == abc.docstring abc_y = abc.getter("y") assert isinstance(abc_y, doc.MatProperty) assert abc_y.default is None - assert abc_y.docstring == " y variable" + assert abc_y.docstring == "y variable" assert abc_y.attrs == {"SetAccess": "private", "GetAccess": "private"} abc_version = abc.getter("version") assert isinstance(abc_version, doc.MatProperty) assert abc_version.default == "'0.1.1-beta'" - assert abc_version.docstring == " version" - assert abc_version.attrs == {"Constant": True} + assert abc_version.docstring == "version" + assert abc_version.attrs == {"Constant": None} def test_class_method(mod): @@ -195,7 +206,7 @@ def test_class_method(mod): assert cls_meth.getter("__name__") == "ClassExample" assert ( cls_meth.docstring - == " test class methods\n\n :param a: the input to :class:`ClassExample`\n" + == "test class methods\n\n:param a: the input to :class:`ClassExample`" ) constructor = cls_meth.getter("ClassExample") assert isinstance(constructor, doc.MatMethod) @@ -206,37 +217,37 @@ def test_class_method(mod): # TODO: mymethod.args will contain ['obj', 'b'] if run standalone # but if test_autodoc.py is run, the 'obj' is removed assert mymethod.args - assert mymethod.args[-1] == "b" - assert mymethod.retv == ["c"] + assert "b" in list(mymethod.args.keys()) + assert list(mymethod.retv.keys()) == ["c"] assert ( mymethod.docstring - == " a method in :class:`ClassExample`\n\n :param b: an input to :meth:`mymethod`\n" + == "a method in :class:`ClassExample`\n\n:param b: an input to :meth:`mymethod`" ) def test_submodule_class(mod): cls = mod.getter("submodule.TestFibonacci") assert isinstance(cls, doc.MatClass) - assert cls.docstring == " Test of MATLAB unittest method attributes\n" + assert cls.docstring == "Test of MATLAB unittest method attributes" assert cls.attrs == {} - assert cls.bases == ["matlab.unittest.TestCase"] + assert cls.bases == [("matlab", "unittest", "TestCase")] assert "compareFirstThreeElementsToExpected" in cls.methods assert cls.module == "test_data.submodule" assert cls.properties == {} method = cls.getter("compareFirstThreeElementsToExpected") assert isinstance(method, doc.MatMethod) assert method.name == "compareFirstThreeElementsToExpected" - assert method.retv is None - assert method.args == ["tc"] - assert method.docstring == " Test case that compares first three elements\n" - assert method.attrs == {"Test": True} + assert method.retv == {} + assert list(method.args.keys()) == ["tc"] + assert method.docstring == "Test case that compares first three elements" + assert method.attrs == {"Test": None} def test_folder_class(mod): cls_mod = mod.getter("@ClassFolder") assert isinstance(cls_mod, doc.MatModule) cls = cls_mod.getter("ClassFolder") - assert cls.docstring == " A class in a folder\n" + assert cls.docstring == "A class in a folder" assert cls.attrs == {} assert cls.bases == [] assert cls.module == "test_data.@ClassFolder" @@ -244,8 +255,10 @@ def test_folder_class(mod): "p": { "attrs": {}, "default": None, - "docstring": " a property of a class folder", - "specs": "", + "docstring": "a property of a class folder", + "size": None, + "type": None, + "validators": None, } } @@ -254,18 +267,18 @@ def test_folder_class(mod): func = cls_mod.getter("a_static_func") assert isinstance(func, doc.MatFunction) assert func.name == "a_static_func" - assert func.args == ["args"] - assert func.retv == ["retv"] - assert func.docstring == " method in :class:`~test_data.@ClassFolder`\n" + assert list(func.args.keys()) == ["args"] + assert list(func.retv.keys()) == ["retv"] + assert func.docstring == "method in :class:`~test_data.@ClassFolder`" func = cls_mod.getter("classMethod") assert isinstance(func, doc.MatFunction) assert func.name == "classMethod" - assert func.args == ["obj", "varargin"] - assert func.retv == ["varargout"] + assert list(func.args.keys()) == ["obj", "varargin"] + assert list(func.retv.keys()) == ["varargout"] assert ( func.docstring - == " CLASSMETHOD A function within a package\n\n :param obj: An instance of this class.\n" - " :param varargin: Variable input arguments.\n :returns: varargout\n" + == "CLASSMETHOD A function within a package\n\n:param obj: An instance of this class.\n" + ":param varargin: Variable input arguments.\n:returns: varargout" ) @@ -274,11 +287,11 @@ def test_function(mod): func = mod.getter("f_example") assert isinstance(func, doc.MatFunction) assert func.getter("__name__") == "f_example" - assert func.retv == ["o1", "o2", "o3"] - assert func.args == ["a1", "a2"] + assert list(func.retv.keys()) == ["o1", "o2", "o3"] + assert list(func.args.keys()) == ["a1", "a2"] assert ( func.docstring - == " a fun function\n\n :param a1: the first input\n :param a2: another input\n :returns: ``[o1, o2, o3]`` some outputs\n" + == "a fun function\n\n:param a1: the first input\n:param a2: another input\n:returns: ``[o1, o2, o3]`` some outputs" ) @@ -289,7 +302,7 @@ def test_function_getter(mod): assert func.getter("__name__") == "f_example" assert ( func.getter("__doc__") - == " a fun function\n\n :param a1: the first input\n :param a2: another input\n :returns: ``[o1, o2, o3]`` some outputs\n" + == "a fun function\n\n:param a1: the first input\n:param a2: another input\n:returns: ``[o1, o2, o3]`` some outputs" ) assert func.getter("__module__") == "test_data" @@ -299,11 +312,11 @@ def test_package_function(mod): func = mod.getter("f_example") assert isinstance(func, doc.MatFunction) assert func.getter("__name__") == "f_example" - assert func.retv == ["o1", "o2", "o3"] - assert func.args == ["a1", "a2"] + assert list(func.retv.keys()) == ["o1", "o2", "o3"] + assert list(func.args.keys()) == ["a1", "a2"] assert ( func.docstring - == " a fun function\n\n :param a1: the first input\n :param a2: another input\n :returns: ``[o1, o2, o3]`` some outputs\n" + == "a fun function\n\n:param a1: the first input\n:param a2: another input\n:returns: ``[o1, o2, o3]`` some outputs" ) @@ -311,13 +324,13 @@ def test_class_with_get_method(mod): the_class = mod.getter("ClassWithGetMethod") assert isinstance(the_class, doc.MatClass) assert the_class.getter("__name__") == "ClassWithGetMethod" - assert the_class.docstring == " Class with a method named get\n" + assert the_class.docstring == "Class with a method named get" the_method = the_class.getter("get") assert isinstance(the_method, doc.MatMethod) assert the_method.getter("__name__") == "get" - assert the_method.retv == ["varargout"] + assert list(the_method.retv.keys()) == ["varargout"] assert the_method.docstring.startswith( - " Gets the numbers 1-n and fills in the outputs with them" + "Gets the numbers 1-n and fills in the outputs with them" ) diff --git a/tests/test_parse_mfile.py b/tests/test_parse_mfile.py index 32b409b..cb30c24 100644 --- a/tests/test_parse_mfile.py +++ b/tests/test_parse_mfile.py @@ -433,7 +433,7 @@ def test_file_parsing_with_no_encoding_specified(): mfile = os.path.join(DIRNAME, "test_data", "f_with_latin_1.m") obj = mat_types.MatObject.parse_mfile(mfile, "f_with_latin_1", "test_data") assert obj.name == "f_with_latin_1" - assert obj.docstring == "Analyse de la r\ufffdponse \ufffd un cr\ufffdneau" + assert obj.docstring == r"Analyse de la r\xe9dponse \xe0 un cr\xe9dneau" def test_ClassWithBuiltinOverload(): From a2eae1baf3939dbdabbc80e0135cd0d2bd0dafae Mon Sep 17 00:00:00 2001 From: Anton Edvinovich Pozharskiy Date: Mon, 19 Aug 2024 14:37:20 +0200 Subject: [PATCH 31/45] fixing the last of the tests --- sphinxcontrib/mat_tree_sitter_parser.py | 68 ++++++++++++++++++++----- tests/test_parse_mfile.py | 13 ++--- 2 files changed, 62 insertions(+), 19 deletions(-) diff --git a/sphinxcontrib/mat_tree_sitter_parser.py b/sphinxcontrib/mat_tree_sitter_parser.py index 967b283..f77c5bd 100644 --- a/sphinxcontrib/mat_tree_sitter_parser.py +++ b/sphinxcontrib/mat_tree_sitter_parser.py @@ -4,7 +4,7 @@ import re # rpath = "../../../syscop/software/nosnoc/+nosnoc/Options.m" -rpath = "/home/anton/tools/matlabdomain/tests/test_data/PropTypeOld.m" +rpath = "/home/anton/tools/matlabdomain/tests/test_data/ClassWithTrailingCommentAfterBases.m" # rpath = "/home/anton/tools/matlabdomain/tests/test_data/f_with_dummy_argument.m" tree_sitter_ver = tuple([int(sec) for sec in version("tree_sitter").split(".")]) @@ -194,12 +194,16 @@ q_get_set = ML_LANG.query("""["get." "set."]""") +q_line_continuation = ML_LANG.query("(line_continuation) @lc") + re_percent_remove = re.compile(r"^[ \t]*% ?", flags=re.M) +re_trim_line = re.compile(r"^[ \t]*", flags=re.M) re_assign_remove = re.compile(r"^=[ \t]*") def tree_sitter_is_0_21(): + """Check if tree-sitter is v0.21.* in order to use the correct language initialization and syntax.""" if not hasattr(tree_sitter_is_0_21, "is_21"): tree_sitter_ver = tuple([int(sec) for sec in version("tree_sitter").split(".")]) tree_sitter_is_0_21.is_21 = tree_sitter_ver[1] == 21 # memoize @@ -215,13 +219,33 @@ def get_row(point): def process_text_into_docstring(text, encoding): + """Take a text bytestring and decode it into a docstring.""" docstring = text.decode(encoding, errors="backslashreplace") return re.sub(re_percent_remove, "", docstring) -def process_default(text, encoding): - default = text.decode(encoding, errors="backslashreplace") - return re.sub(re_assign_remove, "", default) +def process_default(node, encoding): + """Take the node defining a default and remove any line continuations before generating the default.""" + text = node.text + to_keep = set(range(node.end_byte - node.start_byte)) + lc_matches = q_line_continuation.matches(node) + for _, match in lc_matches: + # TODO this copies a lot perhaps there is a better option. + lc = match["lc"] + cut_start = lc.start_byte - node.start_byte + cut_end = lc.end_byte - node.start_byte + to_keep -= set(range(cut_start, cut_end)) + # NOTE: hardcoded endianess is fine because for one byte this does not matter. + # See python bikeshed on possible defaults for this here: + # https://discuss.python.org/t/what-should-be-the-default-value-for-int-to-bytes-byteorder/10616 + new_text = b"".join( + [byte.to_bytes(1, "big") for idx, byte in enumerate(text) if idx in to_keep] + ) + # TODO We may want to do an in-order traversal of the parse here to generate a "nice" reformatted single line + # however doing so sufficiently generically is likely a major undertaking. + default = new_text.decode(encoding, errors="backslashreplace") + default = re.sub(re_assign_remove, "", default) + return re.sub(re_trim_line, "", default) class MatScriptParser: @@ -281,13 +305,21 @@ def __init__(self, root_node, encoding): # get docstring docstring_node = fun_match.get("docstring") - docstring = None + docstring = "" if docstring_node is not None: prev_sib = docstring_node.prev_named_sibling if get_row(docstring_node.start_point) - get_row(prev_sib.end_point) <= 1: - docstring = process_text_into_docstring( - docstring_node.text, self.encoding - ) + if get_row(docstring_node.start_point) == get_row(prev_sib.end_point): + # if the docstring is on the same line as the end of the function drop it + docstring = process_text_into_docstring( + docstring_node.text, self.encoding + ) + split_ds = docstring.split("\n") + docstring = "\n".join(split_ds[1:]) if len(split_ds) > 1 else "" + else: + docstring = process_text_into_docstring( + docstring_node.text, self.encoding + ) if not docstring: docstring = None @@ -343,7 +375,7 @@ def _parse_argument_section(self, argblock_node): # extract default default_node = arg_match.get("default") default = ( - process_default(default_node.text, self.encoding) + process_default(default_node, self.encoding) if default_node is not None else None ) @@ -515,12 +547,22 @@ def __init__(self, root_node, encoding): # get docstring and check that it consecutive docstring_node = class_match.get("docstring") + docstring = "" if docstring_node is not None: prev_node = docstring_node.prev_sibling if get_row(docstring_node.start_point) - get_row(prev_node.end_point) <= 1: - self.docstring = process_text_into_docstring( - docstring_node.text, self.encoding - ) + if get_row(docstring_node.start_point) == get_row(prev_node.end_point): + # if the docstring is on the same line as the end of the classdef drop it + docstring = process_text_into_docstring( + docstring_node.text, self.encoding + ) + split_ds = docstring.split("\n") + docstring = "\n".join(split_ds[1:]) if len(split_ds) > 1 else "" + else: + docstring = process_text_into_docstring( + docstring_node.text, self.encoding + ) + self.docstring = docstring prop_matches = q_properties.matches(self.cls) method_matches = q_methods.matches(self.cls) @@ -606,7 +648,7 @@ def _parse_property_section(self, props_match): # extract default default_node = prop_match.get("default") default = ( - process_default(default_node.text, self.encoding) + process_default(default_node, self.encoding) if default_node is not None else None ) diff --git a/tests/test_parse_mfile.py b/tests/test_parse_mfile.py index cb30c24..48f746e 100644 --- a/tests/test_parse_mfile.py +++ b/tests/test_parse_mfile.py @@ -433,7 +433,7 @@ def test_file_parsing_with_no_encoding_specified(): mfile = os.path.join(DIRNAME, "test_data", "f_with_latin_1.m") obj = mat_types.MatObject.parse_mfile(mfile, "f_with_latin_1", "test_data") assert obj.name == "f_with_latin_1" - assert obj.docstring == r"Analyse de la r\xe9dponse \xe0 un cr\xe9dneau" + assert obj.docstring == r"Analyse de la r\xe9ponse \xe0 un cr\xe9neau" def test_ClassWithBuiltinOverload(): @@ -740,7 +740,7 @@ def test_ClassWithLongPropertyDocstrings(): ) assert obj.name == "ClassWithLongPropertyDocstrings" assert ( - obj.properties["a"]["docstring"] == "This line is deleted" + obj.properties["a"]["docstring"] == "This line is deleted\n" "This line documents another property" ) assert obj.properties["b"]["docstring"] == "Document this property" @@ -755,7 +755,7 @@ def test_ClassWithLongPropertyTrailingEmptyDocstrings(): ) assert obj.name == "ClassWithLongPropertyTrailingEmptyDocstrings" assert ( - obj.properties["a"]["docstring"] == "This line is deleted" + obj.properties["a"]["docstring"] == "This line is deleted\n" "This line documents another property" ) assert obj.properties["b"]["docstring"] == "Document this property" @@ -795,6 +795,7 @@ def test_ClassWithTrailingCommentAfterBases(): def test_ClassWithEllipsisProperties(): + # TODO change this when the functionality to "nicely" generate one line defaults exists mfile = os.path.join(TESTDATA_ROOT, "ClassWithEllipsisProperties.m") obj = mat_types.MatObject.parse_mfile( mfile, "ClassWithEllipsisProperties", "test_data" @@ -805,15 +806,15 @@ def test_ClassWithEllipsisProperties(): assert len(obj.methods) == 0 assert obj.properties["A"]["docstring"] == "an expression with ellipsis" - assert obj.properties["A"]["default"] == "1+2+3+4+5" + assert obj.properties["A"]["default"] == "1 + 2 + 3 + 4 + 5" assert ( obj.properties["B"]["docstring"] == "a cell array with ellipsis and other array notation" ) - assert obj.properties["B"]["default"].startswith("{'hello','bye';") + assert obj.properties["B"]["default"].startswith("{'hello', 'bye';") assert obj.properties["B"]["default"].endswith("}") assert obj.properties["C"]["docstring"] == "using end inside array" - assert obj.properties["C"]["default"] == "ClassWithEllipsisProperties.B(2:end,1)" + assert obj.properties["C"]["default"] == "ClassWithEllipsisProperties.B(2:end, 1)" assert obj.properties["D"]["docstring"] == "String with line continuation" assert obj.properties["D"]["default"] == "'...'" assert obj.properties["E"]["docstring"] == "The string with spaces" From 3189e286752134d57ea3adb42a1e520039cbc48d Mon Sep 17 00:00:00 2001 From: Anton Edvinovich Pozharskiy Date: Mon, 19 Aug 2024 15:07:51 +0200 Subject: [PATCH 32/45] remove dead code --- sphinxcontrib/mat_parser.py | 88 ---------------------- sphinxcontrib/mat_types.py | 141 +----------------------------------- 2 files changed, 1 insertion(+), 228 deletions(-) delete mode 100644 sphinxcontrib/mat_parser.py diff --git a/sphinxcontrib/mat_parser.py b/sphinxcontrib/mat_parser.py deleted file mode 100644 index 55db502..0000000 --- a/sphinxcontrib/mat_parser.py +++ /dev/null @@ -1,88 +0,0 @@ -""" - sphinxcontrib.mat_parser - ~~~~~~~~~~~~~~~~~~~~~~~~ - - Functions for parsing MatlabLexer output. - - :copyright: Copyright 2023-2024 by the sphinxcontrib-matlabdomain team, see AUTHORS. - :license: BSD, see LICENSE for details. -""" - -import re -import sphinx.util - -logger = sphinx.util.logging.getLogger("matlab-domain") - - -def remove_comment_header(code): - """ - Removes the comment header (if there is one) and empty lines from the - top of the current read code. - :param code: Current code string. - :type code: str - :returns: Code string without comments above a function, class or - procedure/script. - """ - # get the line number when the comment header ends (incl. empty lines) - ln_pos = 0 - for line in code.splitlines(True): - if re.match(r"[ \t]*(%|\n)", line): - ln_pos += 1 - else: - break - - if ln_pos > 0: - # remove the header block and empty lines from the top of the code - try: - code = code.split("\n", ln_pos)[ln_pos:][0] - except IndexError: - # only header and empty lines. - code = "" - - return code - - -def remove_line_continuations(code): - """ - Removes line continuations (...) from code as functions must be on a - single line - :param code: - :type code: str - :return: - """ - # pat = r"('.*)(\.\.\.)(.*')" - # code = re.sub(pat, r"\g<1>\g<3>", code, flags=re.MULTILINE) - - pat = r"^([^%'\"\n]*)(\.\.\..*\n)" - code = re.sub(pat, r"\g<1>", code, flags=re.MULTILINE) - return code - - -def fix_function_signatures(code): - """ - Transforms function signatures with line continuations to a function - on a single line with () appended. Required because pygments cannot - handle this situation correctly. - - :param code: - :type code: str - :return: Code string with functions on single line - """ - pat = r"""^[ \t]*function[ \t.\n]* # keyword (function) - (\[?[\w, \t.\n]*\]?) # outputs: group(1) - [ \t.\n]*=[ \t.\n]* # punctuation (eq) - (\w+)[ \t.\n]* # name: group(2) - \(?([\w, \t.\n]*)\)?""" # args: group(3) - pat = re.compile(pat, re.X | re.MULTILINE) # search start of every line - - # replacement function - def repl(m): - retv = m.group(0) - # if no args and doesn't end with parentheses, append "()" - if not (m.group(3) or m.group(0).endswith("()")): - retv = retv.replace(m.group(2), m.group(2) + "()") - return retv - - code = pat.sub(repl, code) # search for functions and apply replacement - - return code diff --git a/sphinxcontrib/mat_types.py b/sphinxcontrib/mat_types.py index 4172743..f85f4b8 100644 --- a/sphinxcontrib/mat_types.py +++ b/sphinxcontrib/mat_types.py @@ -16,7 +16,6 @@ from pygments.token import Token from zipfile import ZipFile import xml.etree.ElementTree as ET -import sphinxcontrib.mat_parser as mat_parser from sphinxcontrib.mat_tree_sitter_parser import ( MatClassParser, MatFunctionParser, @@ -47,14 +46,6 @@ "MatApplication", ] -# MATLAB keywords that increment keyword-end pair count -MATLAB_KEYWORD_REQUIRES_END = list( - zip( - (Token.Keyword,) * 7, - ("arguments", "for", "if", "switch", "try", "while", "parfor"), - ) -) - # MATLAB attribute type dictionaries. @@ -115,12 +106,6 @@ "TestTags": list, } - -MATLAB_FUNC_BRACES_BEGIN = tuple(zip((Token.Punctuation,) * 2, ("(", "{"))) -MATLAB_FUNC_BRACES_END = tuple(zip((Token.Punctuation,) * 2, (")", "}"))) -MATLAB_PROP_BRACES_BEGIN = tuple(zip((Token.Punctuation,) * 3, ("(", "{", "["))) -MATLAB_PROP_BRACES_END = tuple(zip((Token.Punctuation,) * 3, (")", "}", "]"))) - # Dictionary containing all MATLAB entities that are found in `matlab_src_dir`. # The dictionary keys are both the full dotted path, relative to the root. # Further, "short names" are added. Example: @@ -734,130 +719,6 @@ def getter(self, name, *defargs): return entity -class MatMixin(object): - """ - Methods to comparing and manipulating tokens in :class:`MatFunction` and - :class:`MatClass`. - """ - - def _tk_eq(self, idx, token): - """ - Returns ``True`` if token keys are the same and values are equal. - - :param idx: Index of token in :class:`MatObject`. - :type idx: int - :param token: Comparison token. - :type token: tuple - """ - return self.tokens[idx][0] is token[0] and self.tokens[idx][1] == token[1] - - def _tk_ne(self, idx, token): - """ - Returns ``True`` if token keys are not the same or values are not - equal. - - :param idx: Index of token in :class:`MatObject`. - :type idx: int - :param token: Comparison token. - :type token: tuple - """ - return self.tokens[idx][0] is not token[0] or self.tokens[idx][1] != token[1] - - def _eotk(self, idx): - """ - Returns ``True`` if end of tokens is reached. - """ - return idx >= len(self.tokens) - - def _blanks(self, idx): - """ - Returns number of blank text tokens. - - :param idx: Token index. - :type idx: int - """ - # idx0 = idx # original index - # while self._tk_eq(idx, (Token.Text, ' ')): idx += 1 - # return idx - idx0 # blanks - return self._indent(idx) - - def _whitespace(self, idx): - """ - Returns number of whitespaces text tokens, including blanks, newline - and tabs. - - :param idx: Token index. - :type idx: int - """ - idx0 = idx # original index - while ( - self.tokens[idx][0] is Token.Text - or self.tokens[idx][0] is Token.Text.Whitespace - ) and self.tokens[idx][1] in [" ", "\n", "\t"]: - idx += 1 - return idx - idx0 # whitespace - - def _indent(self, idx): - """ - Returns indentation tabs or spaces. No indentation is zero. - - :param idx: Token index. - :type idx: int - """ - idx0 = idx # original index - while self.tokens[idx][0] is Token.Text and self.tokens[idx][1] in [" ", "\t"]: - idx += 1 - return idx - idx0 # indentation - - def _propspec(self, idx): - """ - Returns number of "property" specification tokens - - :param idx: Token index. - :type idx: int - """ - idx0 = idx # original index - while ( - self._tk_eq(idx, (Token.Punctuation, "@")) - or self._tk_eq(idx, (Token.Punctuation, "(")) - or self._tk_eq(idx, (Token.Punctuation, ")")) - or self._tk_eq(idx, (Token.Punctuation, ",")) - or self._tk_eq(idx, (Token.Punctuation, ":")) - or self.tokens[idx][0] == Token.Literal.Number.Integer - or self._tk_eq(idx, (Token.Punctuation, "{")) - or self._tk_eq(idx, (Token.Punctuation, "}")) - or self._tk_eq(idx, (Token.Punctuation, "[")) - or self._tk_eq(idx, (Token.Punctuation, "]")) - or self._tk_eq(idx, (Token.Punctuation, ".")) - or self.tokens[idx][0] == Token.Literal.String - or self.tokens[idx][0] == Token.Name - or (self.tokens[idx][0] == Token.Text and self.tokens[idx][1] != "\n") - ): - idx += 1 - - count = idx - idx0 # property spec count. - propspec = "".join([content for _, content in self.tokens[idx0 : idx0 + count]]) - propspec = propspec.strip() - return count, propspec - - def _is_newline(self, idx): - """Returns true if the token at index is a newline""" - return ( - self.tokens[idx][0] in (Token.Text, Token.Text.Whitespace) - and self.tokens[idx][1] == "\n" - ) - - -def skip_whitespace(tks): - """Eats whitespace from list of tokens""" - while tks and ( - tks[-1][0] == Token.Text.Whitespace - or tks[-1][0] == Token.Text - and tks[-1][1] in [" ", "\t"] - ): - tks.pop() - - class MatFunction(MatObject): """ A MATLAB function. @@ -907,7 +768,7 @@ def getter(self, name, *defargs): super(MatFunction, self).getter(name, *defargs) -class MatClass(MatMixin, MatObject): +class MatClass(MatObject): """ A MATLAB class definition. From 86b0fa6da761d5e39b82d8ddc26d98dd66ccc8e3 Mon Sep 17 00:00:00 2001 From: Anton Edvinovich Pozharskiy Date: Thu, 5 Sep 2024 14:59:44 +0200 Subject: [PATCH 33/45] address PR comments made by @joeced --- sphinxcontrib/mat_tree_sitter_parser.py | 90 +++++++++++++++---------- sphinxcontrib/mat_types.py | 4 +- tests/test_matlabify.py | 12 ++-- tests/test_parse_mfile.py | 26 +++---- 4 files changed, 75 insertions(+), 57 deletions(-) diff --git a/sphinxcontrib/mat_tree_sitter_parser.py b/sphinxcontrib/mat_tree_sitter_parser.py index f77c5bd..814cae8 100644 --- a/sphinxcontrib/mat_tree_sitter_parser.py +++ b/sphinxcontrib/mat_tree_sitter_parser.py @@ -3,9 +3,41 @@ from tree_sitter import Language, Parser import re -# rpath = "../../../syscop/software/nosnoc/+nosnoc/Options.m" -rpath = "/home/anton/tools/matlabdomain/tests/test_data/ClassWithTrailingCommentAfterBases.m" -# rpath = "/home/anton/tools/matlabdomain/tests/test_data/f_with_dummy_argument.m" +# Attribute default dictionary used to give default values for e.g. `Abstract` or `Static` when used without +# a right hand side i.e. `classdef (Abstract)` vs `classdef (Abstract=true)` +# From: +# - http://www.mathworks.com/help/matlab/matlab_oop/class-attributes.html +# - https://mathworks.com/help/matlab/matlab_oop/property-attributes.html +# - https://mathworks.com/help/matlab/matlab_prog/define-property-attributes-1.htm +# - https://mathworks.com/help/matlab/matlab_oop/method-attributes.html +# - https://mathworks.com/help/matlab/ref/matlab.unittest.testcase-class.html +MATLAB_ATTRIBUTE_DEFAULTS = { + "AbortSet": True, + "Abstract": True, + "ClassSetupParameter": True, + "Constant": True, + "ConstructOnLoad": True, + "Dependent": True, + "DiscreteState": True, + "GetObservable": True, + "HandleCompatible": True, + "Hidden": True, + "MethodSetupParameter": True, + "NonCopyable": True, + "Nontunable": True, + "PartialMatchPriority": True, + "Sealed": True, + "SetObservable": True, + "Static": True, + "Test": None, + "TestClassSetup": None, + "TestClassTeardown": None, + "TestMethodSetup": None, + "TestMethodTeardown": None, + "TestParameter": None, + "Transient": True, +} + tree_sitter_ver = tuple([int(sec) for sec in version("tree_sitter").split(".")]) if tree_sitter_ver[1] == 21: @@ -499,12 +531,21 @@ def _parse_attributes(self, attrs_nodes): name = attr_match.get("name").text.decode( self.encoding, errors="backslashreplace" ) + value_node = attr_match.get("value") - attrs[name] = ( - value_node.text.decode(self.encoding, errors="backslashreplace") - if value_node is not None - else None - ) + rhs_node = attr_match.get("rhs") + if rhs_node is not None: + if rhs_node.type == "cell": + attrs[name] = [ + vn.text.decode(self.encoding, errors="backslashreplace") + for vn in value_node + ] + else: + attrs[name] = value_node[0].text.decode( + self.encoding, errors="backslashreplace" + ) + else: + attrs[name] = MATLAB_ATTRIBUTE_DEFAULTS.get(name) return attrs @@ -537,13 +578,11 @@ def __init__(self, root_node, encoding): if supers_nodes is not None: for super_node in supers_nodes: _, super_match = q_supers.matches(super_node)[0] - super_cls = tuple( - [ - sec.text.decode(self.encoding, errors="backslashreplace") - for sec in super_match.get("secs") - ] - ) - self.supers.append(super_cls) + super_cls = [ + sec.text.decode(self.encoding, errors="backslashreplace") + for sec in super_match.get("secs") + ] + self.supers.append(".".join(super_cls)) # get docstring and check that it consecutive docstring_node = class_match.get("docstring") @@ -916,25 +955,6 @@ def _parse_attributes(self, attrs_nodes): self.encoding, errors="backslashreplace" ) else: - attrs[name] = None + attrs[name] = MATLAB_ATTRIBUTE_DEFAULTS.get(name) return attrs - - -if __name__ == "__main__": - tree_sitter_ver = tuple([int(sec) for sec in version("tree_sitter").split(".")]) - if tree_sitter_ver[1] == 21: - parser = Parser() - parser.set_language(ML_LANG) - else: - parser = Parser(ML_LANG) - - with open(rpath, "rb") as f: - data = f.read() - - tree = parser.parse(data) - class_parser = MatClassParser(tree.root_node, "utf-8") - # fun_parser = MatFunctionParser(tree.root_node) - import pdb - - pdb.set_trace() diff --git a/sphinxcontrib/mat_types.py b/sphinxcontrib/mat_types.py index f85f4b8..f843537 100644 --- a/sphinxcontrib/mat_types.py +++ b/sphinxcontrib/mat_types.py @@ -842,9 +842,7 @@ def __doc__(self): @property def __bases__(self): - bases_ = dict.fromkeys( - [".".join(base) for base in self.bases] - ) # make copy of bases + bases_ = dict.fromkeys([base for base in self.bases]) # make copy of bases class_entity_table = {} for name, entity in entities_table.items(): if isinstance(entity, MatClass) or "@" in name: diff --git a/tests/test_matlabify.py b/tests/test_matlabify.py index d27cbe9..25761a7 100644 --- a/tests/test_matlabify.py +++ b/tests/test_matlabify.py @@ -138,7 +138,7 @@ def test_classes(mod): assert isinstance(cls, doc.MatClass) assert cls.getter("__name__") == "ClassInheritHandle" assert cls.getter("__module__") == "test_data" - assert cls.bases == [("handle",), ("my", "super", "Class")] + assert cls.bases == ["handle", "my.super.Class"] assert cls.attrs == {} assert cls.properties == { "x": { @@ -161,8 +161,8 @@ def test_abstract_class(mod): assert abc.getter("__module__") == "test_data" assert "ClassInheritHandle" in abc.getter("__bases__") assert "ClassExample" in abc.getter("__bases__") - assert abc.bases == [("ClassInheritHandle",), ("ClassExample",)] - assert abc.attrs == {"Abstract": None, "Sealed": None} + assert abc.bases == ["ClassInheritHandle", "ClassExample"] + assert abc.attrs == {"Abstract": True, "Sealed": True} assert abc.properties == { "y": { "default": None, @@ -175,7 +175,7 @@ def test_abstract_class(mod): "version": { "default": "'0.1.1-beta'", "docstring": "version", - "attrs": {"Constant": None}, + "attrs": {"Constant": True}, "size": None, "type": None, "validators": None, @@ -197,7 +197,7 @@ def test_abstract_class(mod): assert isinstance(abc_version, doc.MatProperty) assert abc_version.default == "'0.1.1-beta'" assert abc_version.docstring == "version" - assert abc_version.attrs == {"Constant": None} + assert abc_version.attrs == {"Constant": True} def test_class_method(mod): @@ -230,7 +230,7 @@ def test_submodule_class(mod): assert isinstance(cls, doc.MatClass) assert cls.docstring == "Test of MATLAB unittest method attributes" assert cls.attrs == {} - assert cls.bases == [("matlab", "unittest", "TestCase")] + assert cls.bases == ["matlab.unittest.TestCase"] assert "compareFirstThreeElementsToExpected" in cls.methods assert cls.module == "test_data.submodule" assert cls.properties == {} diff --git a/tests/test_parse_mfile.py b/tests/test_parse_mfile.py index 48f746e..9ea6ea5 100644 --- a/tests/test_parse_mfile.py +++ b/tests/test_parse_mfile.py @@ -366,8 +366,8 @@ def test_ClassWithMethodAttributes(): assert obj.methods["testProtected"].attrs == {"Access": "protected"} assert obj.methods["testPrivate1"].attrs == {"Access": "private"} assert obj.methods["testPrivate2"].attrs == {"Access": "'private'"} - assert obj.methods["testHidden"].attrs == {"Hidden": None} - assert obj.methods["testStatic"].attrs == {"Static": None} + assert obj.methods["testHidden"].attrs == {"Hidden": True} + assert obj.methods["testStatic"].attrs == {"Static": True} assert obj.methods["testFriend1"].attrs == {"Access": "?OtherClass"} assert obj.methods["testFriend2"].attrs == { "Access": ["?OtherClass", "?pack.OtherClass2"] @@ -397,13 +397,13 @@ def test_ClassWithPropertyAttributes(): "GetAccess": "private", "SetAccess": "private", } - assert obj.properties["TEST_CONSTANT"]["attrs"] == {"Constant": None} + assert obj.properties["TEST_CONSTANT"]["attrs"] == {"Constant": True} assert obj.properties["TEST_CONSTANT_PROTECTED"]["attrs"] == { "Access": "protected", - "Constant": None, + "Constant": True, } - assert obj.properties["testDependent"]["attrs"] == {"Dependent": None} - assert obj.properties["testHidden"]["attrs"] == {"Hidden": None} + assert obj.properties["testDependent"]["attrs"] == {"Dependent": True} + assert obj.properties["testHidden"]["attrs"] == {"Hidden": True} def test_ClassWithoutIndent(): @@ -564,7 +564,7 @@ def test_ClassWithAttributes(): obj = mat_types.MatObject.parse_mfile(mfile, "ClassWithAttributes", "test_data") assert isinstance(obj, mat_types.MatClass) assert obj.name == "ClassWithAttributes" - assert obj.attrs == {"Sealed": None} + assert obj.attrs == {"Sealed": True} # Fails when running with other test files. Warnings are already logged. @@ -696,7 +696,7 @@ def test_ClassWithMethodsWithSpaces(): assert ( obj.docstring == "Class with methods that have space after the function name." ) - assert obj.methods["static_method"].attrs == {"Static": None} + assert obj.methods["static_method"].attrs == {"Static": True} def test_ClassContainingParfor(): @@ -779,7 +779,7 @@ def test_ClassWithTrailingCommentAfterBases(): mfile, "ClassWithTrailingCommentAfterBases", "test_data" ) assert obj.name == "ClassWithTrailingCommentAfterBases" - assert obj.bases == [("handle",), ("my", "super", "Class")] + assert obj.bases == ["handle", "my.super.Class"] assert ( obj.docstring == "test class methods\n\n:param a: the input to :class:`ClassWithTrailingCommentAfterBases`" @@ -801,7 +801,7 @@ def test_ClassWithEllipsisProperties(): mfile, "ClassWithEllipsisProperties", "test_data" ) assert obj.name == "ClassWithEllipsisProperties" - assert obj.bases == [("handle",)] + assert obj.bases == ["handle"] assert obj.docstring == "stuff" assert len(obj.methods) == 0 @@ -846,7 +846,7 @@ def test_ClassWithTrailingSemicolons(): obj.docstring == "Smoothing like it is performed withing Cxx >v7.0 (until v8.2 at least).\nUses constant 228p_12k frequency vector:" ) - assert obj.bases == [("hgsetget",)] + assert obj.bases == ["hgsetget"] assert list(obj.methods.keys()) == [ "ClassWithTrailingSemicolons", "CxxSmoothing", @@ -896,7 +896,7 @@ def test_ClassWithNamedAsArguments(): mfile = os.path.join(TESTDATA_ROOT, "arguments.m") obj = mat_types.MatObject.parse_mfile(mfile, "arguments", "test_data") assert obj.name == "arguments" - assert obj.bases == [("handle",), ("matlab", "mixin", "Copyable")] + assert obj.bases == ["handle", "matlab.mixin.Copyable"] assert "value" in obj.properties meth = obj.methods["arguments"] assert meth.docstring == "Constructor for arguments" @@ -919,7 +919,7 @@ def test_ClassWithTests(): mfile = os.path.join(TESTDATA_ROOT, "ClassWithTests.m") obj = mat_types.MatObject.parse_mfile(mfile, "ClassWithTests", "test_data") assert obj.name == "ClassWithTests" - assert obj.bases == [("matlab", "unittest", "TestCase")] + assert obj.bases == ["matlab.unittest.TestCase"] assert "testRunning" in obj.methods testRunning = obj.methods["testRunning"] assert testRunning.attrs["TestTags"] == ["'Unit'"] From d3b8aceaf2640974bfc19040416a9b80f7009b6f Mon Sep 17 00:00:00 2001 From: Anton Edvinovich Pozharskiy Date: Thu, 5 Sep 2024 15:12:08 +0200 Subject: [PATCH 34/45] fix --- sphinxcontrib/mat_tree_sitter_parser.py | 1 + 1 file changed, 1 insertion(+) diff --git a/sphinxcontrib/mat_tree_sitter_parser.py b/sphinxcontrib/mat_tree_sitter_parser.py index 814cae8..51420ae 100644 --- a/sphinxcontrib/mat_tree_sitter_parser.py +++ b/sphinxcontrib/mat_tree_sitter_parser.py @@ -221,6 +221,7 @@ (source_file (comment)? @docstring ) + """ ) From ae5c3579c73915a9a468af9dd5100501d5340247 Mon Sep 17 00:00:00 2001 From: Anton Edvinovich Pozharskiy Date: Thu, 5 Sep 2024 15:16:22 +0200 Subject: [PATCH 35/45] lint --- sphinxcontrib/mat_documenters.py | 14 +++++++------- sphinxcontrib/mat_tree_sitter_parser.py | 1 - sphinxcontrib/mat_types.py | 8 ++++++-- sphinxcontrib/matlab.py | 7 +++---- 4 files changed, 16 insertions(+), 14 deletions(-) diff --git a/sphinxcontrib/mat_documenters.py b/sphinxcontrib/mat_documenters.py index 4c58cca..f9bdf4b 100644 --- a/sphinxcontrib/mat_documenters.py +++ b/sphinxcontrib/mat_documenters.py @@ -558,9 +558,9 @@ def member_is_friend_of(member, friends): def member_is_enum(member): return isinstance(member, MatEnumeration) - + ret = [] - + # search for members in source code too namespace = ".".join(self.objpath) # will be empty for modules @@ -1242,7 +1242,9 @@ def document_members(self, all_members=False): other_names = [ membername for (membername, member) in filtered_members - if not isinstance(member, MatMethod) and not isinstance(member, MatProperty) and not isinstance(member, MatEnumeration) + if not isinstance(member, MatMethod) + and not isinstance(member, MatProperty) + and not isinstance(member, MatEnumeration) # exclude parent modules with names matching members (as in Myclass.Myclass) and not (hasattr(member, "module") and member.name == member.module) ] @@ -1295,12 +1297,10 @@ def document_members(self, all_members=False): self.document_member_section( "Property Summary", non_properties, all_members ) - + # enumss if enum_names: - self.document_member_section( - "Enumeration Values", non_enums, all_members - ) + self.document_member_section("Enumeration Values", non_enums, all_members) # methods if meth_names: diff --git a/sphinxcontrib/mat_tree_sitter_parser.py b/sphinxcontrib/mat_tree_sitter_parser.py index 51420ae..814cae8 100644 --- a/sphinxcontrib/mat_tree_sitter_parser.py +++ b/sphinxcontrib/mat_tree_sitter_parser.py @@ -221,7 +221,6 @@ (source_file (comment)? @docstring ) - """ ) diff --git a/sphinxcontrib/mat_types.py b/sphinxcontrib/mat_types.py index f843537..7bfa218 100644 --- a/sphinxcontrib/mat_types.py +++ b/sphinxcontrib/mat_types.py @@ -877,7 +877,9 @@ def getter(self, name, *defargs): elif name == "__dict__": objdict = dict([(pn, self.getter(pn)) for pn in self.properties.keys()]) objdict.update(self.methods) - objdict.update(dict([(en, self.getter(en)) for en in self.enumerations.keys()])) + objdict.update( + dict([(en, self.getter(en)) for en in self.enumerations.keys()]) + ) return objdict else: super(MatClass, self).getter(name, *defargs) @@ -904,12 +906,13 @@ def __module__(self): def __doc__(self): return self.docstring + class MatEnumeration(MatObject): def __init__(self, name, cls, attrs): super(MatEnumeration, self).__init__(name) self.cls = cls self.docstring = attrs["docstring"] - + def ref_role(self): """Returns role to use for references to this object (e.g. when generating auto-links)""" return "enum" @@ -922,6 +925,7 @@ def __module__(self): def __doc__(self): return self.docstring + class MatMethod(MatFunction): def __init__(self, name, parsed_function, modname, cls): self.name = name diff --git a/sphinxcontrib/matlab.py b/sphinxcontrib/matlab.py index 5764f9f..01755f3 100644 --- a/sphinxcontrib/matlab.py +++ b/sphinxcontrib/matlab.py @@ -346,14 +346,13 @@ def _object_hierarchy_parts(self, sig): This method must not be used outwith table of contents generation. """ - parts = sig.attributes.get('module').split('.') - parts.append(sig.attributes.get('fullname')) - #import pdb;pdb.set_trace() + parts = sig.attributes.get("module").split(".") + parts.append(sig.attributes.get("fullname")) return tuple(parts) def _toc_entry_name(self, sig): # TODO respecting the configuration setting ``toc_object_entries_show_parents`` - return sig.attributes.get('fullname') + return sig.attributes.get("fullname") def get_signature_prefix(self, sig): return self.objtype + " " From 952af04c7a3c6eb36b2659d98262e825f15a573a Mon Sep 17 00:00:00 2001 From: Anton Edvinovich Pozharskiy Date: Thu, 5 Sep 2024 15:22:11 +0200 Subject: [PATCH 36/45] fix typo in __all__ --- sphinxcontrib/mat_types.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sphinxcontrib/mat_types.py b/sphinxcontrib/mat_types.py index 7bfa218..ed08b19 100644 --- a/sphinxcontrib/mat_types.py +++ b/sphinxcontrib/mat_types.py @@ -38,7 +38,7 @@ "MatFunction", "MatClass", "MatProperty", - "MatEnumerations", + "MatEnumeration", "MatMethod", "MatScript", "MatException", From d4e64f24becfac11b6e3c003ac90678718e23dec Mon Sep 17 00:00:00 2001 From: Anton Edvinovich Pozharskiy Date: Thu, 5 Sep 2024 16:01:24 +0200 Subject: [PATCH 37/45] remove Pygments dependencies --- setup.py | 3 +- sphinxcontrib/mat_tree_sitter_parser.py | 84 +++++++++++-------------- tox.ini | 4 +- 3 files changed, 38 insertions(+), 53 deletions(-) diff --git a/setup.py b/setup.py index 5d459e6..a52fc4d 100644 --- a/setup.py +++ b/setup.py @@ -7,8 +7,7 @@ requires = [ "Sphinx>=4.0.0", - "Pygments>=2.0.1", - "tree-sitter-matlab @ git+https://github.com/apozharski/tree-sitter-matlab.git", + "tree-sitter-matlab>=1.0.2", "tree-sitter>=0.21.3", ] diff --git a/sphinxcontrib/mat_tree_sitter_parser.py b/sphinxcontrib/mat_tree_sitter_parser.py index 814cae8..ab21455 100644 --- a/sphinxcontrib/mat_tree_sitter_parser.py +++ b/sphinxcontrib/mat_tree_sitter_parser.py @@ -88,7 +88,7 @@ (attributes [(attribute) @attrs _]+ )? - [(property) @properties (old_property) @properties _]+ + [(property) @properties _]+ ) @prop_block """ ) @@ -127,6 +127,8 @@ [[(spread_operator) (number)] @dims _]+ )? (identifier)? @type + . + (identifier)? @size_type (validation_functions [[(identifier) (function_call)] @validation_functions _]+ )? @@ -138,9 +140,9 @@ q_old_property = ML_LANG.query( """ - (old_property name: (identifier) @name + (property name: (identifier) @name (identifier) @type - (old_property_type)? @size_type + (identifier)? @size_type (default_value)? @default (comment)? @docstring ) @@ -625,56 +627,42 @@ def _parse_property_section(self, props_match): attrs_nodes = props_match.get("attrs") attrs = self._parse_attributes(attrs_nodes) for prop in properties: - if prop.type == "property": - # match property to extract details - _, prop_match = q_property.matches(prop)[0] - - # extract name (this is always available so no need for None check) - name = prop_match.get("name").text.decode( - self.encoding, errors="backslashreplace" - ) + # match property to extract details + _, prop_match = q_property.matches(prop)[0] - # extract dims list - dims_list = prop_match.get("dims") - dims = None - if dims_list is not None: - dims = tuple( - [ - dim.text.decode(self.encoding, errors="backslashreplace") - for dim in dims_list - ] - ) + # extract name (this is always available so no need for None check) + name = prop_match.get("name").text.decode( + self.encoding, errors="backslashreplace" + ) - # extract validator functions - vf_list = prop_match.get("validator_functions") - vfs = None - if vf_list is not None: - vfs = [ - vf.text.decode(self.encoding, errors="backslashreplace") - for vf in vf_list + # extract dims list + size_type = prop_match.get("size_type") + dims_list = prop_match.get("dims") + dims = None + if dims_list is not None: + dims = tuple( + [ + dim.text.decode(self.encoding, errors="backslashreplace") + for dim in dims_list ] - else: - # match property to extract details - _, prop_match = q_old_property.matches(prop)[0] - - # extract name (this is always available so no need for None check) - name = prop_match.get("name").text.decode( - self.encoding, errors="backslashreplace" ) + elif size_type is None: + dims = None + elif size_type.text == b"scalar": + dims = ("1", "1") + elif size_type.text == b"vector": + dims = (":", "1") + elif size_type.text == b"matrix": + dims = (":", ":") - # extract size type - size_type = prop_match.get("size_type") - if size_type is None: - dims = None - elif size_type.text == b"scalar": - dims = ("1", "1") - elif size_type.text == b"vector": - dims = (":", "1") - elif size_type.text == b"matrix": - dims = (":", ":") - - # No validator functions - vfs = None + # extract validator functions + vf_list = prop_match.get("validator_functions") + vfs = None + if vf_list is not None: + vfs = [ + vf.text.decode(self.encoding, errors="backslashreplace") + for vf in vf_list + ] # extract type type_node = prop_match.get("type") diff --git a/tox.ini b/tox.ini index 9744f88..deb1103 100644 --- a/tox.ini +++ b/tox.ini @@ -1,5 +1,5 @@ [tox] -envlist = py{38,39,310,311}-sphinx{45,53,60,latest}-pygments{213,latest} +envlist = py{38,39,310,311}-sphinx{45,53,60,latest} [testenv] @@ -18,8 +18,6 @@ deps = sphinx70: Sphinx>=7.0,<8.0 sphinxlatest: Sphinx sphinxdev: https://github.com/sphinx-doc/sphinx/archive/refs/heads/master.zip - pygments213: Pygments>=2.0.1,<2.14.0 - pygmentlatest: Pygments commands = pytest -vv {posargs} tests/ sphinx-build -b html -d {envtmpdir}/doctrees tests/test_docs {envtmpdir}/html From 7da1e6515a47067fdd67c3d1b451c9dde8525147 Mon Sep 17 00:00:00 2001 From: Anton Edvinovich Pozharskiy Date: Thu, 5 Sep 2024 16:22:35 +0200 Subject: [PATCH 38/45] py-tree-sitter v0.23.0 has a breaking change for Query.match() --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index a52fc4d..32d6b09 100644 --- a/setup.py +++ b/setup.py @@ -8,7 +8,7 @@ requires = [ "Sphinx>=4.0.0", "tree-sitter-matlab>=1.0.2", - "tree-sitter>=0.21.3", + "tree-sitter>=0.21.3,<0.23.0", ] setup( From a48c23636d08b3540530e04fb76e6bfa0543e3ca Mon Sep 17 00:00:00 2001 From: Anton Edvinovich Pozharskiy Date: Thu, 5 Sep 2024 16:56:52 +0200 Subject: [PATCH 39/45] remove duplicat entry in yaml --- .github/workflows/python-package.yml | 22 ---------------------- 1 file changed, 22 deletions(-) diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml index ee7aae6..41775ab 100644 --- a/.github/workflows/python-package.yml +++ b/.github/workflows/python-package.yml @@ -54,28 +54,6 @@ jobs: tox -e "${{matrix.python-version}}-sphinx${{matrix.sphinx-version}}-pygments${{matrix.pygments-version}}" - test-sphinx-latest: - name: Run tests for Python ${{ matrix.python-version }}, Sphinx ${{ matrix.sphinx-version }}, Pygments ${{ matrix.pygments-version }} - timeout-minutes: 5 - runs-on: ubuntu-latest - strategy: - matrix: - python-version: ["3.10", "3.11", "3.12"] - sphinx-version: ["dev"] - pygments-version: ["latest"] - steps: - - uses: actions/checkout@v4 - - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v5 - with: - python-version: ${{ matrix.python-version }} - architecture: x64 - - name: Run with Tox - run: | - pip install tox==4.8.0 - tox -e "${{matrix.python-version}}-sphinx${{matrix.sphinx-version}}-pygments${{matrix.pygments-version}}" - - test-sphinx-latest: name: Run tests for Python ${{ matrix.python-version }}, Sphinx ${{ matrix.sphinx-version }}, Pygments ${{ matrix.pygments-version }} timeout-minutes: 5 From 1dfa6c791224417672c68355520bd0bab079f703 Mon Sep 17 00:00:00 2001 From: Anton Pozharskiy Date: Fri, 6 Sep 2024 00:34:22 +0200 Subject: [PATCH 40/45] fix spec printing --- sphinxcontrib/mat_documenters.py | 11 ++++++++++- sphinxcontrib/mat_tree_sitter_parser.py | 10 +++++----- sphinxcontrib/mat_types.py | 4 +++- 3 files changed, 18 insertions(+), 7 deletions(-) diff --git a/sphinxcontrib/mat_documenters.py b/sphinxcontrib/mat_documenters.py index f9bdf4b..4d5e51a 100644 --- a/sphinxcontrib/mat_documenters.py +++ b/sphinxcontrib/mat_documenters.py @@ -1475,7 +1475,16 @@ def add_directive_header(self, sig): obj_default = " = " + obj_default if self.env.config.matlab_show_property_specs: - obj_default = self.object.specs + obj_default + prop_spec = "" + if self.object.size is not None: + prop_spec = prop_spec + "(" + ",".join(self.object.size) + ")" + if self.object.type is not None: + prop_spec = prop_spec + " " + self.object.type + if self.object.validators is not None: + prop_spec = ( + prop_spec + " {" + ",".join(self.object.validators) + "}" + ) + obj_default = prop_spec + obj_default self.add_line(" :annotation: " + obj_default, "") elif self.options.annotation is SUPPRESS: diff --git a/sphinxcontrib/mat_tree_sitter_parser.py b/sphinxcontrib/mat_tree_sitter_parser.py index ab21455..988ecc7 100644 --- a/sphinxcontrib/mat_tree_sitter_parser.py +++ b/sphinxcontrib/mat_tree_sitter_parser.py @@ -124,13 +124,13 @@ """ (property name: (identifier) @name (dimensions - [[(spread_operator) (number)] @dims _]+ + [(spread_operator) @dims (number) @dims _]+ )? (identifier)? @type . (identifier)? @size_type (validation_functions - [[(identifier) (function_call)] @validation_functions _]+ + [(identifier) @validation_functions (function_call) @validation_functions _]+ )? (default_value)? @default (comment)? @docstring @@ -398,7 +398,7 @@ def _parse_argument_section(self, argblock_node): ) # extract validator functions - vf_list = arg_match.get("validator_functions") + vf_list = arg_match.get("validation_functions") vfs = None if vf_list is not None: vfs = [ @@ -629,7 +629,7 @@ def _parse_property_section(self, props_match): for prop in properties: # match property to extract details _, prop_match = q_property.matches(prop)[0] - + print(prop.sexp()) # extract name (this is always available so no need for None check) name = prop_match.get("name").text.decode( self.encoding, errors="backslashreplace" @@ -656,7 +656,7 @@ def _parse_property_section(self, props_match): dims = (":", ":") # extract validator functions - vf_list = prop_match.get("validator_functions") + vf_list = prop_match.get("validation_functions") vfs = None if vf_list is not None: vfs = [ diff --git a/sphinxcontrib/mat_types.py b/sphinxcontrib/mat_types.py index ed08b19..05aa151 100644 --- a/sphinxcontrib/mat_types.py +++ b/sphinxcontrib/mat_types.py @@ -892,7 +892,9 @@ def __init__(self, name, cls, attrs): self.attrs = attrs["attrs"] self.default = attrs["default"] self.docstring = attrs["docstring"] - self.specs = attrs["specs"] + self.size = attrs["size"] + self.type = attrs["type"] + self.validators = attrs["validators"] def ref_role(self): """Returns role to use for references to this object (e.g. when generating auto-links)""" From d182b9c1c31072525b5e6533c70d4e1a263bb08c Mon Sep 17 00:00:00 2001 From: Anton Pozharskiy Date: Sun, 8 Sep 2024 09:06:48 +0200 Subject: [PATCH 41/45] minor fixes for arg block parsing and a test --- sphinxcontrib/mat_tree_sitter_parser.py | 4 ++-- tests/test_data/f_with_input_argument_block.m | 12 ++++++++++++ tests/test_parse_mfile.py | 19 ++++++++++++++++++- 3 files changed, 32 insertions(+), 3 deletions(-) create mode 100644 tests/test_data/f_with_input_argument_block.m diff --git a/sphinxcontrib/mat_tree_sitter_parser.py b/sphinxcontrib/mat_tree_sitter_parser.py index 988ecc7..5090d1c 100644 --- a/sphinxcontrib/mat_tree_sitter_parser.py +++ b/sphinxcontrib/mat_tree_sitter_parser.py @@ -206,13 +206,13 @@ ) ] (dimensions - [[(spread_operator) (number)] @dims _]+ + [(spread_operator) @dims (number) @dims _]+ )? (identifier)? @type (validation_functions [[(identifier) (function_call)] @validation_functions _]+ )? - (default_value (number))? @default + (default_value [(number) (identifier)])? @default (comment)? @docstring ) """ diff --git a/tests/test_data/f_with_input_argument_block.m b/tests/test_data/f_with_input_argument_block.m new file mode 100644 index 0000000..5b191c0 --- /dev/null +++ b/tests/test_data/f_with_input_argument_block.m @@ -0,0 +1,12 @@ +function [o1, o2, o3] = f_with_input_argument_block(a1, a2) + arguments + a1(1,1) double = 0 % the first input + a2(1,1) double = a1 % another input + end + o1 = a1; o2 = a2; o3 = a1 + a2; + for n = 1:3 + o1 = o2; + o2 = o3; + o3 = o1 + o2; + end +end diff --git a/tests/test_parse_mfile.py b/tests/test_parse_mfile.py index 9ea6ea5..3074697 100644 --- a/tests/test_parse_mfile.py +++ b/tests/test_parse_mfile.py @@ -612,7 +612,6 @@ def test_f_with_function_variable(): assert obj.name == "f_with_function_variable" assert list(obj.retv.keys()) == ["obj"] assert list(obj.args.keys()) == ["the_functions", "~"] - print(obj.docstring) def test_ClassWithGetterSetter(): @@ -925,5 +924,23 @@ def test_ClassWithTests(): assert testRunning.attrs["TestTags"] == ["'Unit'"] +def test_f_with_input_argument_block(): + mfile = os.path.join(DIRNAME, "test_data", "f_with_input_argument_block.m") + obj = mat_types.MatObject.parse_mfile( + mfile, "f_with_input_argument_block", "test_data" + ) + assert obj.name == "f_with_input_argument_block" + assert list(obj.retv.keys()) == ["o1", "o2", "o3"] + assert list(obj.args.keys()) == ["a1", "a2"] + + assert obj.args["a1"]["size"] == ("1", "1") + assert obj.args["a1"]["default"] == "0" + assert obj.args["a1"]["docstring"] == "the first input" + + assert obj.args["a2"]["size"] == ("1", "1") + assert obj.args["a2"]["default"] == "a1" + assert obj.args["a2"]["docstring"] == "another input" + + if __name__ == "__main__": pytest.main([os.path.abspath(__file__)]) From aa078e11dae5f24198219b4cf9f697719c0db2a6 Mon Sep 17 00:00:00 2001 From: Anton Pozharskiy Date: Sun, 8 Sep 2024 10:02:33 +0200 Subject: [PATCH 42/45] fix bug regarding output block argument parsing and add test --- sphinxcontrib/mat_tree_sitter_parser.py | 24 +++--------------- .../test_data/f_with_output_argument_block.m | 13 ++++++++++ tests/test_parse_mfile.py | 25 +++++++++++++++++++ 3 files changed, 41 insertions(+), 21 deletions(-) create mode 100644 tests/test_data/f_with_output_argument_block.m diff --git a/sphinxcontrib/mat_tree_sitter_parser.py b/sphinxcontrib/mat_tree_sitter_parser.py index 5090d1c..c735cbf 100644 --- a/sphinxcontrib/mat_tree_sitter_parser.py +++ b/sphinxcontrib/mat_tree_sitter_parser.py @@ -188,7 +188,7 @@ (arguments_statement . (attributes - [(attribute) @attrs _]+ + [(identifier) @attrs _]* )? . [(property) @args _]* @@ -525,29 +525,11 @@ def _parse_argument_section(self, argblock_node): pass def _parse_attributes(self, attrs_nodes): - # TOOD deduplicated this attrs = {} if attrs_nodes is not None: for attr_node in attrs_nodes: - _, attr_match = q_attributes.matches(attr_node)[0] - name = attr_match.get("name").text.decode( - self.encoding, errors="backslashreplace" - ) - - value_node = attr_match.get("value") - rhs_node = attr_match.get("rhs") - if rhs_node is not None: - if rhs_node.type == "cell": - attrs[name] = [ - vn.text.decode(self.encoding, errors="backslashreplace") - for vn in value_node - ] - else: - attrs[name] = value_node[0].text.decode( - self.encoding, errors="backslashreplace" - ) - else: - attrs[name] = MATLAB_ATTRIBUTE_DEFAULTS.get(name) + name = attr_node.text.decode(self.encoding, errors="backslashreplace") + attrs[name] = None return attrs diff --git a/tests/test_data/f_with_output_argument_block.m b/tests/test_data/f_with_output_argument_block.m new file mode 100644 index 0000000..e063f7b --- /dev/null +++ b/tests/test_data/f_with_output_argument_block.m @@ -0,0 +1,13 @@ +function [o1, o2, o3] = f_with_output_argument_block(a1, a2) + arguments(Output) + o1(1,1) double % Output one + o2(1,:) double % Another output + o3(1,1) double {mustBePositive} % A third output + end + o1 = a1; o2 = a2; o3 = a1 + a2; + for n = 1:3 + o1 = o2; + o2 = o3; + o3 = o1 + o2; + end +end diff --git a/tests/test_parse_mfile.py b/tests/test_parse_mfile.py index 3074697..bfd3f78 100644 --- a/tests/test_parse_mfile.py +++ b/tests/test_parse_mfile.py @@ -935,12 +935,37 @@ def test_f_with_input_argument_block(): assert obj.args["a1"]["size"] == ("1", "1") assert obj.args["a1"]["default"] == "0" + assert obj.args["a1"]["type"] == "double" assert obj.args["a1"]["docstring"] == "the first input" assert obj.args["a2"]["size"] == ("1", "1") assert obj.args["a2"]["default"] == "a1" + assert obj.args["a1"]["type"] == "double" assert obj.args["a2"]["docstring"] == "another input" +def test_f_with_output_argument_block(): + mfile = os.path.join(DIRNAME, "test_data", "f_with_output_argument_block.m") + obj = mat_types.MatObject.parse_mfile( + mfile, "f_with_output_argument_block", "test_data" + ) + assert obj.name == "f_with_output_argument_block" + assert list(obj.retv.keys()) == ["o1", "o2", "o3"] + assert list(obj.args.keys()) == ["a1", "a2"] + + assert obj.retv["o1"]["size"] == ("1", "1") + assert obj.retv["o1"]["type"] == "double" + assert obj.retv["o1"]["docstring"] == "Output one" + + assert obj.retv["o2"]["size"] == ("1", ":") + assert obj.retv["o2"]["type"] == "double" + assert obj.retv["o2"]["docstring"] == "Another output" + + assert obj.retv["o3"]["size"] == ("1", "1") + assert obj.retv["o3"]["type"] == "double" + assert obj.retv["o3"]["docstring"] == "A third output" + assert obj.retv["o3"]["validators"] == ["mustBePositive"] + + if __name__ == "__main__": pytest.main([os.path.abspath(__file__)]) From 4b3fb89068b9af8ff33f3736bab046d7904ff895 Mon Sep 17 00:00:00 2001 From: Anton Pozharskiy Date: Sun, 8 Sep 2024 10:06:25 +0200 Subject: [PATCH 43/45] remove print and fix test_matlabify --- sphinxcontrib/mat_tree_sitter_parser.py | 1 - tests/test_matlabify.py | 2 ++ 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/sphinxcontrib/mat_tree_sitter_parser.py b/sphinxcontrib/mat_tree_sitter_parser.py index c735cbf..4460691 100644 --- a/sphinxcontrib/mat_tree_sitter_parser.py +++ b/sphinxcontrib/mat_tree_sitter_parser.py @@ -611,7 +611,6 @@ def _parse_property_section(self, props_match): for prop in properties: # match property to extract details _, prop_match = q_property.matches(prop)[0] - print(prop.sexp()) # extract name (this is always available so no need for None check) name = prop_match.get("name").text.decode( self.encoding, errors="backslashreplace" diff --git a/tests/test_matlabify.py b/tests/test_matlabify.py index 25761a7..42ed9a8 100644 --- a/tests/test_matlabify.py +++ b/tests/test_matlabify.py @@ -99,6 +99,8 @@ def test_module(mod): "ClassWithEnumMethod", "ClassWithEventMethod", "f_with_function_variable", + "f_with_input_argument_block", + "f_with_output_argument_block", "ClassWithUndocumentedMembers", "ClassWithGetterSetter", "ClassWithDoubleQuotedString", From 631466163a720f96d6d5fb30b943c438749a02bc Mon Sep 17 00:00:00 2001 From: Anton Pozharskiy Date: Sun, 8 Sep 2024 10:10:08 +0200 Subject: [PATCH 44/45] remove textmate from dev-reqs --- dev-requirements.txt | 1 - 1 file changed, 1 deletion(-) diff --git a/dev-requirements.txt b/dev-requirements.txt index c74a328..427369d 100644 --- a/dev-requirements.txt +++ b/dev-requirements.txt @@ -4,4 +4,3 @@ pytest-cov pre-commit defusedxml>=0.7.1 sphinxcontrib-napoleon -textmate-grammar-python From 63781e398cdc26ff56ec00f33216cf7497bf4d4d Mon Sep 17 00:00:00 2001 From: Anton Edvinovich Pozharskiy Date: Wed, 18 Sep 2024 08:56:02 +0200 Subject: [PATCH 45/45] fix typo --- sphinxcontrib/matlab.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sphinxcontrib/matlab.py b/sphinxcontrib/matlab.py index 01755f3..c95463a 100644 --- a/sphinxcontrib/matlab.py +++ b/sphinxcontrib/matlab.py @@ -344,7 +344,7 @@ def _object_hierarchy_parts(self, sig): table of contents, and can also be used within the :py:meth:`_toc_entry_name` method. - This method must not be used outwith table of contents generation. + This method must not be used without table of contents generation. """ parts = sig.attributes.get("module").split(".") parts.append(sig.attributes.get("fullname"))