diff --git a/sphinxcontrib/mat_textmate_parser.py b/sphinxcontrib/mat_textmate_parser.py index 1ea1c8f1..46236ce0 100644 --- a/sphinxcontrib/mat_textmate_parser.py +++ b/sphinxcontrib/mat_textmate_parser.py @@ -1,17 +1,163 @@ from textmate_grammar.parsers.matlab import MatlabParser +import re -rpath = "../tests/test_data/ClassWithPropertyValidators.m" +# rpath = "../../../syscop/software/nosnoc/+nosnoc/Options.m" +rpath = "/home/anton/tools/matlabdomain/tests/roots/test_autodoc/target/ClassExample.m" -def find_first_child(curr, tok): - ind = [i for i in range(len(curr.children)) if curr.children[i].token == tok] + +def find_first_child(curr, tok, attr="children"): + tok_lst = getattr(curr, attr) + ind = [i for i in range(len(tok_lst)) if tok_lst[i].token == tok] if not ind: - return None - return (curr.children[ind[0]], ind[0]) + return (None, None) + return (tok_lst[ind[0]], ind[0]) + + +def _toks_on_same_line(tok1, tok2): + """Note: pass the tokens in order they appear in case of multiline tokens, otherwise this may return incorrect results""" + line1 = _get_last_line_of_tok(tok1) + line2 = _get_first_line_of_tok(tok2) + return line1 == line2 + + +def _is_empty_line_between_tok(tok1, tok2): + """Note: pass tokens in order they appear""" + line1 = _get_last_line_of_tok(tok1) + line2 = _get_first_line_of_tok(tok2) + return line2 - line1 > 1 + + +def _get_first_line_of_tok(tok): + return min([loc[0] for loc in tok.characters.keys()]) + + +def _get_last_line_of_tok(tok): + return max([loc[0] for loc in tok.characters.keys()]) + + +class MatFunctionParser: + def __init__(self, fun_tok): + """Parse Function definition""" + # First find the function name + name_gen = fun_tok.find(tokens="entity.name.function.matlab") + try: + name_tok, _ = next(name_gen) + self.name = name_tok.content + except StopIteration: + # TODO correct error here + raise Exception("Couldn't find function name") + + # Find outputs and parameters + output_gen = fun_tok.find(tokens="variable.parameter.output.matlab") + param_gen = fun_tok.find(tokens="variable.parameter.input.matlab") + + self.outputs = {} + self.params = {} + self.attrs = {} + + for out, _ in output_gen: + self.outputs[out.content] = {} + + for param, _ in param_gen: + self.params[param.content] = {} + + # find arguments blocks + arg_section = None + for arg_section, _ in fun_tok.find(tokens="meta.arguments.matlab"): + self._parse_argument_section(arg_section) + + fun_decl_gen = fun_tok.find(tokens="meta.function.declaration.matlab") + try: + fun_decl_tok, _ = next(fun_decl_gen) + except StopIteration: + raise Exception( + "missing function declaration" + ) # This cant happen as we'd be missing a function name + + # Now parse for docstring + docstring = "" + comment_toks = fun_tok.findall( + tokens=["comment.line.percentage.matlab", "comment.block.percentage.matlab"] + ) + last_tok = arg_section if arg_section is not None else fun_decl_tok + + for comment_tok, _ in comment_toks: + if _is_empty_line_between_tok(last_tok, comment_tok): + # If we have non-consecutive tokens quit right away. + break + elif ( + not docstring and comment_tok.token == "comment.block.percentage.matlab" + ): + # If we have no previous docstring lines and a comment block we take + # the comment block as the docstring and exit. + docstring = comment_tok.content.strip()[ + 2:-2 + ].strip() # [2,-2] strips out block comment delimiters + break + elif comment_tok.token == "comment.line.percentage.matlab": + # keep parsing comments + docstring += comment_tok.content[1:] + "\n" + else: + # we are done. + break + last_tok = comment_tok + + self.docstring = docstring if docstring else None + + def _parse_argument_section(self, section): + modifiers = [ + mod.content + for mod, _ in section.find(tokens="storage.modifier.arguments.matlab") + ] + arg_def_gen = section.find(tokens="meta.assignment.definition.property.matlab") + for arg_def, _ in arg_def_gen: + arg_name = arg_def.begin[ + 0 + ].content # Get argument name that is being defined + self._parse_argument_validation(arg_name, arg_def, modifiers) + + def _parse_argument_validation(self, arg_name, arg, modifiers): + # TODO This should be identical to propery validation I think. Refactor + # First get the size if found + section = self.output if "Output" in modifiers else self.params + size_gen = arg.find(tokens="meta.parens.size.matlab", depth=1) + try: # We have a size, therefore parse the comma separated list into tuple + size_tok, _ = next(size_gen) + size_elem_gen = size_tok.find( + tokens=[ + "constant.numeric.decimal.matlab", + "keyword.operator.vector.colon.matlab", + ], + depth=1, + ) + size = tuple([elem[0].content for elem in size_elem_gen]) + section[arg_name]["size"] = size + except StopIteration: + pass + + # Now find the type if it exists + # TODO this should be mapped to known types (though perhaps as a postprocess) + type_gen = arg.find(tokens="storage.type.matlab", depth=1) + try: + section[arg_name]["type"] = next(type_gen)[0].content + except StopIteration: + pass + + # Now find list of validators + validator_gen = arg.find(tokens="meta.block.validation.matlab", depth=1) + try: + validator_tok, _ = next(validator_gen) + validator_toks = validator_tok.findall( + tokens="variable.other.readwrite.matlab", depth=1 + ) # TODO Probably bug here in MATLAB-Language-grammar + section[arg_name]["validators"] = [tok[0].content for tok in validator_toks] + except StopIteration: + pass class MatClassParser: - def __init__(self, path): + def __init__(self, tokens): # DATA self.name = "" self.supers = [] @@ -21,10 +167,7 @@ def __init__(self, path): self.methods = {} self.enumerations = {} - # Maybe remove continuations as a crutch? currently parser is broken for continuations in attributes - # self.parser = MatlabParser(remove_line_continuations=True) - self.parser = MatlabParser() - self.parsed = self.parser.parse_file(path) + self.parsed = tokens self.cls, _ = find_first_child(self.parsed, "meta.class.matlab") if not self.cls: raise Exception() # TODO better exception @@ -36,87 +179,140 @@ def __init__(self, path): method_sections = self.cls.findall(tokens="meta.methods.matlab", depth=1) enumeration_sections = self.cls.findall(tokens="meta.enum.matlab", depth=1) - for section in property_sections: - self._parse_property_section(section[0]) + for section, _ in property_sections: + self._parse_property_section(section) + + for section, _ in method_sections: + self._parse_method_section(section) - for section in method_sections: - self._parse_method_section(section[0]) + for section, _ in enumeration_sections: + self._parse_enum_section(section) - for section in enumeration_sections: - self._parse_enum_section(section[0]) import pdb pdb.set_trace() def _find_class_docstring(self): - if self.cls.children[1].token == "comment.line.percentage.matlab": + try: + possible_comment_tok = self.cls.children[1] + except IndexError: + return + + if possible_comment_tok.token == "comment.line.percentage.matlab": self._docstring_lines() - elif self.cls.children[1].token == "comment.block.percentage.matlab": - self.docstring = ( - self.cls.children[1].content.strip()[2:-2].strip() - ) # [2,-2] strips out block comment delimiters + elif possible_comment_tok.token == "comment.block.percentage.matlab": + self.docstring = possible_comment_tok.content.strip()[ + 2:-2 + ].strip() # [2,-2] strips out block comment delimiters else: - print("found no docstring") + pass def _docstring_lines(self): idx = 1 - while self.cls.children[idx].token == "comment.line.percentage.matlab": + cls_children = self.cls.children + + while ( + idx < len(cls_children) + and cls_children[idx].token == "comment.line.percentage.matlab" + ): self.docstring += ( - self.cls.children[idx].content[1:] + "\n" + cls_children[idx].content[1:] + "\n" ) # [1:] strips out percent sign idx += 1 self.docstring = self.docstring.strip() def _parse_clsdef(self): - for child in self.clsdef.children: - child.print() + # Try parsing attrs + attrs_tok_gen = self.clsdef.find(tokens="storage.modifier.section.class.matlab") + try: + attrs_tok, _ = next(attrs_tok_gen) + self._parse_class_attributes(attrs_tok) + except StopIteration: + pass + # Parse classname + classname_tok_gen = self.clsdef.find(tokens="entity.name.type.class.matlab") + try: + classname_tok, _ = next(classname_tok_gen) + self.name = classname_tok.content + except StopIteration: + print("ClassName not found") # TODO this is probably fatal + + # Parse interited classes + parent_class_toks = self.clsdef.findall(tokens="meta.inherited-class.matlab") + + for parent_class_tok, _ in parent_class_toks: + sections = parent_class_tok.findall( + tokens=[ + "entity.name.namespace.matlab", + "entity.other.inherited-class.matlab", + ] + ) + super_cls = tuple([sec.content for sec, _ in sections]) + self.supers.append(super_cls) # Parse Attributes TODO maybe there is a smarter way to do this? idx = 0 while self.clsdef.children[idx].token == "storage.modifier.class.matlab": - attr = self.clsdef.children[idx].content + attr_tok = self.clsdef.children[idx] + attr = attr_tok.content val = None # TODO maybe do some typechecking here or we can assume that you give us valid Matlab idx += 1 - if ( - self.clsdef.children[idx].token == "keyword.operator.assignment.matlab" - ): # pull out r.h.s + if attr_tok.token == "keyword.operator.assignment.matlab": # pull out r.h.s idx += 1 val = self.clsdef.children[idx].content idx += 1 if ( - self.clsdef.children[idx].token - == "punctuation.separator.modifier.comma.matlab" + attr_tok.token == "punctuation.separator.modifier.comma.matlab" ): # skip commas idx += 1 self.attrs[attr] = val - if ( - self.clsdef.children[idx].token == "punctuation.section.parens.end.matlab" - ): # Skip end of attrs - idx += 1 - - # name must be next - self.name = self.clsdef.children[idx].content - idx += 1 - - while idx < len( - self.clsdef.children - ): # No children we care about after this except inherited classes - if self.clsdef.children[idx].token == "meta.inherited-class.matlab": - super_cls_tok = self.clsdef.children[idx] - # collect superclass as a tuple - super_cls = tuple( - [ - child.content - for child in super_cls_tok.children - if not child.token.startswith("punctuation") - ] - ) - self.supers.append(super_cls) - idx += 1 + def _parse_class_attributes(self, attrs_tok): + # walk down child list and parse manually + # TODO perhaps contribute a delimited list find to textmate-grammar-python + children = attrs_tok.children + idx = 0 + while idx < len(children): + child_tok = children[idx] + if child_tok.token == "storage.modifier.class.matlab": + attr = child_tok.content + val = None + idx += 1 # walk to next token + try: # however we may have walked off the end of the list in which case we exit + maybe_assign_tok = children[idx] + except: + self.attrs[attr] = val + break + if maybe_assign_tok.token == "keyword.operator.assignment.matlab": + idx += 1 + rhs_tok = children[idx] # parse right hand side + if rhs_tok.token == "meta.cell.literal.matlab": + # A cell. For now just take the whole cell as value. + # TODO parse out the cell array of metaclass literals. + val = "{" + rhs_tok.content + "}" + idx += 1 + elif rhs_tok.token == "constant.language.boolean.matlab": + val = rhs_tok.content + idx += 1 + elif rhs_tok.token == "keyword.operator.other.question.matlab": + idx += 1 + metaclass_tok = children[idx] + metaclass_components = metaclass_tok.findall( + tokens=[ + "entity.name.namespace.matlab", + "entity.other.class.matlab", + ] + ) + val = tuple([comp.content for comp, _ in metaclass_components]) + else: + pass + self.attrs[attr] = val + else: # Comma or continuation therefore skip + idx += 1 def _parse_property_section(self, section): # TODO parse property section attrs + attrs = self._parse_attributes(section) idxs = [ i for i in range(len(section.children)) @@ -125,11 +321,30 @@ def _parse_property_section(self, section): for idx in idxs: prop_tok = section.children[idx] prop_name = prop_tok.begin[0].content - self.properties[prop_name] = {} # Create entry for property + self.properties[prop_name] = {"attrs": attrs} # Create entry for property self._parse_property_validation( prop_name, prop_tok ) # Parse property validation. + # Try to find a default assignment: + default = None + _, assgn_idx = find_first_child( + prop_tok, "keyword.operator.assignment.matlab", attr="end" + ) + if assgn_idx is not None: + default = "" + assgn_idx += 1 # skip assignment + while assgn_idx < len(prop_tok.end): + tok = prop_tok.end[assgn_idx] + assgn_idx += 1 + if tok.token in [ + "comment.line.percentage.matlab", + "punctuation.terminator.semicolon.matlab", + ]: + break + default += tok.content + self.properties[prop_name]["default"] = default + # Get inline docstring inline_docstring_gen = prop_tok.find( tokens="comment.line.percentage.matlab", attribute="end" @@ -148,7 +363,7 @@ def _parse_property_section(self, section): next_tok = prop_tok while walk_back_idx >= 0: walk_tok = section.children[walk_back_idx] - if self._is_empty_line_between_tok(walk_tok, next_tok): + if _is_empty_line_between_tok(walk_tok, next_tok): # Once there is an empty line between consecutive tokens we are done. break @@ -179,7 +394,7 @@ def _parse_property_section(self, section): while walk_fwd_idx < len(section.children): walk_tok = section.children[walk_fwd_idx] - if self._is_empty_line_between_tok(prev_tok, walk_tok): + if _is_empty_line_between_tok(prev_tok, walk_tok): # Once there is an empty line between consecutive tokens we are done. break @@ -241,12 +456,13 @@ def _parse_property_validation(self, prop_name, prop): # Now find list of validators validator_gen = prop.find(tokens="meta.block.validation.matlab", depth=1) try: - import pdb - - pdb.set_trace() validator_tok, _ = next(validator_gen) validator_toks = validator_tok.findall( - tokens="variable.other.readwrite.matlab", depth=1 + tokens=[ + "variable.other.readwrite.matlab", + "meta.function-call.parens.matlab", + ], + depth=1, ) # TODO Probably bug here in MATLAB-Language-grammar self.properties[prop_name]["validators"] = [ tok[0].content for tok in validator_toks @@ -255,7 +471,7 @@ def _parse_property_validation(self, prop_name, prop): pass def _parse_method_section(self, section): - # TODO parse property section attrs + attrs = self._parse_attributes(section) idxs = [ i for i in range(len(section.children)) @@ -263,96 +479,11 @@ def _parse_method_section(self, section): ] for idx in idxs: meth_tok = section.children[idx] - self._parse_function(meth_tok) - # TODO walk forward and backward to get property docstring. - # TODO if we have mutliple possible docstrings what is given priority? - # TODO parse out property validations syntax - - def _parse_function(self, fun_tok): - """Parse Function definition""" - # First find the function name - name_gen = fun_tok.find(tokens="entity.name.function.matlab") - try: - name_tok, _ = next(name_gen) - fun_name = name_tok.content - except StopIteration: - # TODO correct error here - raise Exception("Couldn't find function name") - - # Find outputs and parameters - output_gen = fun_tok.find(tokens="variable.parameter.output.matlab") - param_gen = fun_tok.find(tokens="variable.parameter.input.matlab") - - self.methods[fun_name] = {} - self.methods[fun_name]["outputs"] = {} - self.methods[fun_name]["params"] = {} - - for out, _ in output_gen: - self.methods[fun_name]["outputs"][out.content] = {} - - for param, _ in param_gen: - self.methods[fun_name]["params"][param.content] = {} - - # find arguments blocks - for arg_section, _ in fun_tok.find(tokens="meta.arguments.matlab"): - self._parse_argument_section(fun_name, arg_section) - - def _parse_argument_section(self, fun_name, section): - modifiers = [ - mod.content - for mod, _ in section.find(tokens="storage.modifier.arguments.matlab") - ] - arg_def_gen = section.find(tokens="meta.assignment.definition.property.matlab") - for arg_def, _ in arg_def_gen: - arg_name = arg_def.begin[ - 0 - ].content # Get argument name that is being defined - self._parse_argument_validation(fun_name, arg_name, arg_def, modifiers) - - def _parse_argument_validation(self, fun_name, arg_name, arg, modifiers): - # TODO This should be identical to propery validation I think. Refactor - # First get the size if found - section = "output" if "Output" in modifiers else "params" - size_gen = arg.find(tokens="meta.parens.size.matlab", depth=1) - try: # We have a size, therefore parse the comma separated list into tuple - size_tok, _ = next(size_gen) - size_elem_gen = size_tok.find( - tokens=[ - "constant.numeric.decimal.matlab", - "keyword.operator.vector.colon.matlab", - ], - depth=1, - ) - size = tuple([elem[0].content for elem in size_elem_gen]) - self.methods[fun_name][section][arg_name]["size"] = size - except StopIteration: - pass - - # Now find the type if it exists - # TODO this should be mapped to known types (though perhaps as a postprocess) - type_gen = arg.find(tokens="storage.type.matlab", depth=1) - try: - self.methods[fun_name][section][arg_name]["type"] = next(type_gen)[ - 0 - ].content - except StopIteration: - pass - - # Now find list of validators - validator_gen = arg.find(tokens="meta.block.validation.matlab", depth=1) - try: - validator_tok, _ = next(validator_gen) - validator_toks = validator_tok.findall( - tokens="variable.other.readwrite.matlab", depth=1 - ) # TODO Probably bug here in MATLAB-Language-grammar - self.methods[fun_name][section][arg_name]["validators"] = [ - tok[0].content for tok in validator_toks - ] - except StopIteration: - pass + parsed_function = MatFunctionParser(meth_tok) + self.methods[parsed_function.name] = parsed_function + self.methods[parsed_function.name].attrs = attrs def _parse_enum_section(self, section): - # TODO parse property section attrs idxs = [ i for i in range(len(section.children)) @@ -365,7 +496,8 @@ def _parse_enum_section(self, section): enum_name = enum_tok.children[0].content self.enumerations[enum_name] = {} if ( - section.children[idx + 1].token == "meta.parens.matlab" + idx + 1 < len(section.children) + and section.children[idx + 1].token == "meta.parens.matlab" ): # Parse out args TODO this should be part of enummember assignment definition args = tuple( [ @@ -383,7 +515,7 @@ def _parse_enum_section(self, section): next_tok = enum_tok while walk_back_idx >= 0: walk_tok = section.children[walk_back_idx] - if self._is_empty_line_between_tok(walk_tok, next_tok): + if _is_empty_line_between_tok(walk_tok, next_tok): # Once there is an empty line between consecutive tokens we are done. break @@ -415,7 +547,7 @@ def _parse_enum_section(self, section): while walk_fwd_idx < len(section.children): walk_tok = section.children[walk_fwd_idx] - if self._is_empty_line_between_tok(prev_tok, walk_tok): + if _is_empty_line_between_tok(prev_tok, walk_tok): # Once there is an empty line between consecutive tokens we are done. break @@ -429,7 +561,7 @@ def _parse_enum_section(self, section): break elif walk_tok.token == "comment.line.percentage.matlab": # In the case the comment is on the same line as the end of the enum declaration, take it as inline comment and exit. - if self._toks_on_same_line(section.children[idx], walk_tok): + if _toks_on_same_line(section.children[idx], walk_tok): inline_docstring = walk_tok.content[1:] break @@ -453,24 +585,58 @@ def _parse_enum_section(self, section): else: self.enumerations[enum_name]["docstring"] = None - def _toks_on_same_line(self, tok1, tok2): - """Note: pass the tokens in order they appear in case of multiline tokens, otherwise this may return incorrect results""" - line1 = self._get_last_line_of_tok(tok1) - line2 = self._get_first_line_of_tok(tok2) - return line1 == line2 - - def _is_empty_line_between_tok(self, tok1, tok2): - """Note: pass tokens in order they appear""" - line1 = self._get_last_line_of_tok(tok1) - line2 = self._get_first_line_of_tok(tok2) - return line2 - line1 > 1 - - def _get_first_line_of_tok(self, tok): - return min([loc[0] for loc in tok.characters.keys()]) + def _parse_attributes(self, section): + # walk down child list and parse manually + children = section.begin + idx = 1 + attrs = {} + while idx < len(children): + child_tok = children[idx] + if re.match( + "storage.modifier.(properties|methods|events).matlab", child_tok.token + ): + attr = child_tok.content + val = None + idx += 1 # walk to next token + try: # however we may have walked off the end of the list in which case we exit + maybe_assign_tok = children[idx] + except: + attrs[attr] = val + return attrs + if maybe_assign_tok.token == "keyword.operator.assignment.matlab": + idx += 1 + rhs_tok = children[idx] # parse right hand side + if rhs_tok.token == "meta.cell.literal.matlab": + # A cell. For now just take the whole cell as value. + # TODO parse out the cell array of metaclass literals. + val = "{" + rhs_tok.content + "}" + idx += 1 + elif rhs_tok.token == "constant.language.boolean.matlab": + val = rhs_tok.content + idx += 1 + elif rhs_tok.token == "storage.modifier.access.matlab": + val = rhs_tok.content + idx += 1 + elif rhs_tok.token == "keyword.operator.other.question.matlab": + idx += 1 + metaclass_tok = children[idx] + metaclass_components = metaclass_tok.findall( + tokens=[ + "entity.name.namespace.matlab", + "entity.other.class.matlab", + ] + ) + val = tuple([comp.content for comp, _ in metaclass_components]) + else: + pass + attrs[attr] = val + else: # Comma or continuation therefore skip + idx += 1 - def _get_last_line_of_tok(self, tok): - return max([loc[0] for loc in tok.characters.keys()]) + return attrs if __name__ == "__main__": - cls_parse = MatClassParser(rpath) + parser = MatlabParser() + toks = parser.parse_file(rpath) + cls_parse = MatClassParser(toks) diff --git a/sphinxcontrib/mat_types.py b/sphinxcontrib/mat_types.py index 6513cfe6..18c71b26 100644 --- a/sphinxcontrib/mat_types.py +++ b/sphinxcontrib/mat_types.py @@ -17,6 +17,12 @@ from zipfile import ZipFile import xml.etree.ElementTree as ET import sphinxcontrib.mat_parser as mat_parser +from sphinxcontrib.mat_textmate_parser import MatClassParser, MatFunctionParser +from textmate_grammar.parsers.matlab import MatlabParser +import logging +from pathlib import Path +import cProfile +import pstats logger = sphinx.util.logging.getLogger("matlab-domain") @@ -430,6 +436,11 @@ def matlabify(objname): # make a full path out of basedir and objname fullpath = os.path.join(MatObject.basedir, objname) # objname fullpath + # Check if path should be ignored + for ignore in MatObject.sphinx_env.config.matlab_ignore_dirs: + if Path(fullpath).is_relative_to(MatObject.basedir, ignore): + return None + logger.debug( f"[sphinxcontrib-matlabdomain] matlabify {package=}, {objname=}, {fullpath=}" ) @@ -494,40 +505,62 @@ def parse_mfile(mfile, name, path, encoding=None): full_code = code - # remove the top comment header (if there is one) from the code string - code = mat_parser.remove_comment_header(code) - code = mat_parser.remove_line_continuations(code) - code = mat_parser.fix_function_signatures(code) - - tks = list(MatlabLexer().get_tokens(code)) + # quiet the textmate grammar logger and parse the file + logging.getLogger("textmate_grammar").setLevel(logging.ERROR) + parser = MatlabParser() + toks = parser.parse_file(mfile) modname = path.replace(os.sep, ".") # module name # assume that functions and classes always start with a keyword def isFunction(token): - return token == (Token.Keyword, "function") + comments_and_functions = [ + "comment.block.percentage.matlab", + "comment.line.percentage.matlab", + "meta.function.matlab", + ] + return all( + [(child.token in comments_and_functions) for child in token.children] + ) def isClass(token): - return token == (Token.Keyword, "classdef") + tok_gen = token.find(tokens="meta.class.matlab", depth=1) + try: + tok, _ = next(tok_gen) + return True + except StopIteration: + return False - if isClass(tks[0]): + if isClass(toks): logger.debug( "[sphinxcontrib-matlabdomain] parsing classdef %s from %s.", name, modname, ) - return MatClass(name, modname, tks) - elif isFunction(tks[0]): + return MatClass(name, modname, toks) + elif isFunction(toks): logger.debug( "[sphinxcontrib-matlabdomain] parsing function %s from %s.", name, modname, ) - return MatFunction(name, modname, tks) + fun_tok_gen = toks.find(tokens="meta.function.matlab") + parsed_function = None + try: + fun_tok, _ = next(fun_tok_gen) + parsed_function = MatFunctionParser(fun_tok) + except StopIteration: + logger.warning( + "[sphinxcontrib-matlabdomain] Parsing failed in %s.%s. No function found.", + modname, + name, + ) + return MatFunction(name, modname, toks) else: + pass # it's a script file retoken with header comment - tks = list(MatlabLexer().get_tokens(full_code)) - return MatScript(name, modname, tks) + # tks = list(MatlabLexer().get_tokens(full_code)) + # return MatScript(name, modname, toks) return None @staticmethod @@ -841,177 +874,17 @@ class MatFunction(MatObject): def __init__(self, name, modname, tokens): super(MatFunction, self).__init__(name) + parsed_function = MatFunctionParser(tokens) #: Path of folder containing :class:`MatObject`. self.module = modname - #: List of tokens parsed from mfile by Pygments. - self.tokens = tokens #: docstring - self.docstring = "" + self.docstring = parsed_function.docstring #: output args - self.retv = None + self.retv = parsed_function.outputs #: input args - self.args = None + self.args = parsed_function.params #: remaining tokens after main function is parsed self.rem_tks = None - # ===================================================================== - # parse tokens - # XXX: Pygments always reads MATLAB function signature as: - # [(Token.Keyword, 'function'), # any whitespace is stripped - # (Token.Text.Whitesapce, ' '), # spaces and tabs are concatenated - # (Token.Text, '[o1, o2]'), # if there are outputs, they're all - # concatenated w/ or w/o brackets and any - # trailing whitespace - # (Token.Punctuation, '='), # possibly an equal sign - # (Token.Text.Whitesapce, ' '), # spaces and tabs are concatenated - # (Token.Name.Function, 'myfun'), # the name of the function - # (Token.Punctuation, '('), # opening parenthesis - # (Token.Text, 'a1, a2', # if there are args, they're concatenated - # (Token.Punctuation, ')'), # closing parenthesis - # (Token.Text.Whitesapce, '\n')] # all whitespace after args - # XXX: Pygments does not tolerate MATLAB continuation ellipsis! - tks = copy(self.tokens) # make a copy of tokens - tks.reverse() # reverse in place for faster popping, stacks are LiLo - try: - # ===================================================================== - # parse function signature - # function [output] = name(inputs) - # % docstring - # ===================================================================== - # Skip function token - already checked in MatObject.parse_mfile - tks.pop() - skip_whitespace(tks) - - # Check for return values - retv = tks.pop() - if retv[0] is Token.Text: - self.retv = [rv.strip() for rv in retv[1].strip("[ ]").split(",")] - if len(self.retv) == 1: - # check if return is empty - if not self.retv[0]: - self.retv = None - # check if return delimited by whitespace - elif " " in self.retv[0] or "\t" in self.retv[0]: - self.retv = [ - rv - for rv_tab in self.retv[0].split("\t") - for rv in rv_tab.split(" ") - ] - if tks.pop() != (Token.Punctuation, "="): - # Unlikely to end here. But never-the-less warn! - logger.warning( - "[sphinxcontrib-matlabdomain] Parsing failed in %s.%s. Expected '='.", - modname, - name, - ) - return - - skip_whitespace(tks) - elif retv[0] is Token.Name.Function: - tks.append(retv) - # ===================================================================== - # function name - func_name = tks.pop() - func_name = ( - func_name[0], - func_name[1].strip(" ()"), - ) # Strip () in case of dummy arg - if func_name != (Token.Name.Function, self.name): # @UndefinedVariable - if isinstance(self, MatMethod): - self.name = func_name[1] - else: - logger.warning( - "[sphinxcontrib-matlabdomain] Unexpected function name: '%s'. " - "Expected '%s' in module '%s'.", - func_name[1], - name, - modname, - ) - - # ===================================================================== - # input args - if tks.pop() == (Token.Punctuation, "("): - args = tks.pop() - if args[0] is Token.Text: - self.args = [ - arg.strip() for arg in args[1].split(",") - ] # no arguments given - elif args == (Token.Punctuation, ")"): - # put closing parenthesis back in stack - tks.append(args) - # check if function args parsed correctly - if tks.pop() != (Token.Punctuation, ")"): - # Unlikely to end here. But never-the-less warn! - logger.warning( - "[sphinxcontrib-matlabdomain] Parsing failed in {}.{}. Expected ')'.", - modname, - name, - ) - return - - skip_whitespace(tks) - # ===================================================================== - # docstring - try: - docstring = tks.pop() - except IndexError: - docstring = None - while docstring and docstring[0] is Token.Comment: - self.docstring += docstring[1].lstrip("%") - # Get newline if it exists and append to docstring - try: - wht = tks.pop() # We expect a newline - except IndexError: - break - if wht[0] in (Token.Text, Token.Text.Whitespace) and wht[1] == "\n": - self.docstring += "\n" - # Skip whitespace - try: - wht = tks.pop() # We expect a newline - except IndexError: - break - while wht in list(zip((Token.Text,) * 3, (" ", "\t"))): - try: - wht = tks.pop() - except IndexError: - break - docstring = wht # check if Token is Comment - - # Find the end of the function - used in `MatMethod`` to determine where a method ends. - if docstring is None: - return - kw = docstring # last token - lastkw = 0 # set last keyword placeholder - kw_end = 1 # count function keyword - while kw_end > 0: - # increment keyword-end pairs count - if kw in MATLAB_KEYWORD_REQUIRES_END: - kw_end += 1 - # nested function definition - elif kw[0] is Token.Keyword and kw[1].strip() == "function": - kw_end += 1 - # decrement keyword-end pairs count but - # don't decrement `end` if used as index - elif kw == (Token.Keyword, "end") and not lastkw: - kw_end -= 1 - # save last punctuation - elif kw in MATLAB_FUNC_BRACES_BEGIN: - lastkw += 1 - elif kw in MATLAB_FUNC_BRACES_END: - lastkw -= 1 - try: - kw = tks.pop() - except IndexError: - break - tks.append(kw) # put last token back in list - except IndexError: - logger.warning( - "[sphinxcontrib-matlabdomain] Parsing failed in %s.%s. Check if valid MATLAB code.", - modname, - name, - ) - # if there are any tokens left save them - if len(tks) > 0: - self.rem_tks = tks # save extra tokens def ref_role(self): """Returns role to use for references to this object (e.g. when generating auto-links)""" @@ -1050,397 +923,23 @@ class MatClass(MatMixin, MatObject): def __init__(self, name, modname, tokens): super(MatClass, self).__init__(name) + parsed_class = MatClassParser(tokens) #: Path of folder containing :class:`MatObject`. self.module = modname - #: List of tokens parsed from mfile by Pygments. - self.tokens = tokens #: dictionary of class attributes - self.attrs = {} + self.attrs = parsed_class.attrs #: list of class superclasses - self.bases = [] + self.bases = parsed_class.supers #: docstring - self.docstring = "" + self.docstring = parsed_class.docstring #: dictionary of class properties - self.properties = {} + self.properties = parsed_class.properties #: dictionary of class methods - self.methods = {} + self.methods = parsed_class.methods + #: + self.enumerations = parsed_class.enumerations #: remaining tokens after main class definition is parsed self.rem_tks = None - # ===================================================================== - # parse tokens - # TODO: use generator and next() instead of stepping index! - try: - # Skip classdef token - already checked in MatObject.parse_mfile - idx = 1 # token index - - # class "attributes" - self.attrs, idx = self.attributes(idx, MATLAB_CLASS_ATTRIBUTE_TYPES) - - # Check if self.name matches the name in the file. - idx += self._blanks(idx) - if not self.tokens[idx][1] == self.name: - logger.warning( - "[sphinxcontrib-matlabdomain] Unexpected class name: '%s'." - " Expected '%s' in '%s'.", - self.tokens[idx][1], - name, - modname, - ) - - idx += 1 - idx += self._blanks(idx) # skip blanks - # ===================================================================== - # super classes - if self._tk_eq(idx, (Token.Operator, "<")): - idx += 1 - # newline terminates superclasses - while not self._is_newline(idx): - idx += self._blanks(idx) # skip blanks - # concatenate base name - base_name = "" - while ( - not self._whitespace(idx) - and self.tokens[idx][0] is not Token.Comment - ): - base_name += self.tokens[idx][1] - idx += 1 - # If it's a newline, we are done parsing. - if not self._is_newline(idx): - idx += 1 - if base_name: - self.bases.append(base_name) - idx += self._blanks(idx) # skip blanks - # continue to next super class separated by & - if self._tk_eq(idx, (Token.Operator, "&")): - idx += 1 - idx += 1 # end of super classes - # newline terminates classdef signature - elif self._is_newline(idx): - idx += 1 # end of classdef signature - # ===================================================================== - # docstring - idx += self._indent(idx) # calculation indentation - # concatenate docstring - while self.tokens[idx][0] is Token.Comment: - self.docstring += self.tokens[idx][1].lstrip("%") - idx += 1 - # append newline to docstring - if self._is_newline(idx): - self.docstring += self.tokens[idx][1] - idx += 1 - # skip tab - indent = self._indent(idx) # calculation indentation - idx += indent - # ===================================================================== - # properties & methods blocks - # loop over code body searching for blocks until end of class - while self._tk_ne(idx, (Token.Keyword, "end")): - # skip comments and whitespace - while self._whitespace(idx) or self.tokens[idx][0] is Token.Comment: - whitespace = self._whitespace(idx) - if whitespace: - idx += whitespace - else: - idx += 1 - - # ================================================================= - # properties blocks - if self._tk_eq(idx, (Token.Keyword, "properties")): - prop_name = "" - idx += 1 - # property "attributes" - attr_dict, idx = self.attributes( - idx, MATLAB_PROPERTY_ATTRIBUTE_TYPES - ) - # Token.Keyword: "end" terminates properties & methods block - while self._tk_ne(idx, (Token.Keyword, "end")): - # skip whitespace - while self._whitespace(idx): - whitespace = self._whitespace(idx) - if whitespace: - idx += whitespace - else: - idx += 1 - - # ========================================================= - # long docstring before property - if self.tokens[idx][0] is Token.Comment: - # docstring - docstring = "" - - # Collect comment lines - while self.tokens[idx][0] is Token.Comment: - docstring += self.tokens[idx][1].lstrip("%") - idx += 1 - idx += self._blanks(idx) - - try: - # Check if end of line was reached - if self._is_newline(idx): - docstring += "\n" - idx += 1 - idx += self._blanks(idx) - - # Check if variable name is next - if self.tokens[idx][0] is Token.Name: - prop_name = self.tokens[idx][1] - self.properties[prop_name] = { - "attrs": attr_dict - } - self.properties[prop_name][ - "docstring" - ] = docstring - break - - # If there is an empty line at the end of - # the comment: discard it - elif self._is_newline(idx): - docstring = "" - idx += self._whitespace(idx) - break - - except IndexError: - # EOF reached, quit gracefully - break - - # with "%:" directive trumps docstring after property - if self.tokens[idx][0] is Token.Name: - prop_name = self.tokens[idx][1] - idx += 1 - # Initialize property if it was not already done - if prop_name not in self.properties.keys(): - self.properties[prop_name] = {"attrs": attr_dict} - - # skip size, class and functions specifiers - # TODO: Parse old and new style property extras - idx += self._propspec(idx) - - if self._tk_eq(idx, (Token.Punctuation, ";")): - continue - - # subtype of Name EG Name.Builtin used as Name - elif self.tokens[idx][0] in Token.Name.subtypes: - prop_name = self.tokens[idx][1] - logger.debug( - "[sphinxcontrib-matlabdomain] WARNING %s.%s.%s is a builtin name.", - self.module, - self.name, - prop_name, - ) - self.properties[prop_name] = {"attrs": attr_dict} - idx += 1 - - # skip size, class and functions specifiers - # TODO: Parse old and new style property extras - idx += self._propspec(idx) - - if self._tk_eq(idx, (Token.Punctuation, ";")): - continue - - elif self._tk_eq(idx, (Token.Keyword, "end")): - idx += 1 - break - # skip semicolon after property name, but no default - elif self._tk_eq(idx, (Token.Punctuation, ";")): - idx += 1 - # A comment might come after semi-colon - idx += self._blanks(idx) - if self._is_newline(idx): - idx += 1 - # Property definition is finished; add missing values - if "default" not in self.properties[prop_name].keys(): - self.properties[prop_name]["default"] = None - if "docstring" not in self.properties[prop_name].keys(): - self.properties[prop_name]["docstring"] = None - - continue - elif self.tokens[idx][0] is Token.Comment: - docstring = self.tokens[idx][1].lstrip("%") - docstring += "\n" - self.properties[prop_name]["docstring"] = docstring - idx += 1 - elif self.tokens[idx][0] is Token.Comment: - # Comments seperated with blank lines. - idx = idx - 1 - continue - else: - logger.warning( - "sphinxcontrib-matlabdomain] Expected property in %s.%s - got %s", - self.module, - self.name, - str(self.tokens[idx]), - ) - return - idx += self._blanks(idx) # skip blanks - # ========================================================= - # defaults - default = {"default": None} - if self._tk_eq(idx, (Token.Punctuation, "=")): - idx += 1 - idx += self._blanks(idx) # skip blanks - # concatenate default value until newline or comment - default = "" - brace_count = 0 - # keep reading until newline or comment - # only if all punctuation pairs are closed - # and comment is **not** continuation ellipsis - while ( - ( - not self._is_newline(idx) - and self.tokens[idx][0] is not Token.Comment - ) - or brace_count > 0 - or ( - self.tokens[idx][0] is Token.Comment - and self.tokens[idx][1].startswith("...") - ) - ): - token = self.tokens[idx] - # default has an array spanning multiple lines - # keep track of braces - if token in MATLAB_PROP_BRACES_BEGIN: - brace_count += 1 - # look for end of array - elif token in MATLAB_PROP_BRACES_END: - brace_count -= 1 - # Pygments treats continuation ellipsis as comments - # text from ellipsis until newline is in token - elif token[0] is Token.Comment and token[1].startswith( - "..." - ): - idx += 1 # skip ellipsis comments - # include newline which should follow comment - if self._is_newline(idx): - default += "\n" - idx += 1 - continue - elif self._is_newline(idx - 1) and not self._is_newline( - idx - ): - idx += self._blanks(idx) - continue - elif token[0] is Token.Text and token[1] == " ": - # Skip spaces that are not in strings. - idx += 1 - continue - default += token[1] - idx += 1 - if self.tokens[idx][0] is not Token.Comment: - idx += 1 - if default: - default = {"default": default.rstrip("; ")} - - self.properties[prop_name].update(default) - # ========================================================= - # docstring - if "docstring" not in self.properties[prop_name].keys(): - docstring = {"docstring": None} - if self.tokens[idx][0] is Token.Comment: - docstring["docstring"] = self.tokens[idx][1].lstrip("%") - idx += 1 - self.properties[prop_name].update(docstring) - elif self.tokens[idx][0] is Token.Comment: - # skip this comment - idx += 1 - - idx += self._whitespace(idx) - idx += 1 - # ================================================================= - # method blocks - if self._tk_eq(idx, (Token.Keyword, "methods")): - idx += 1 - # method "attributes" - attr_dict, idx = self.attributes(idx, MATLAB_METHOD_ATTRIBUTE_TYPES) - # Token.Keyword: "end" terminates properties & methods block - while self._tk_ne(idx, (Token.Keyword, "end")): - # skip comments and whitespace - while ( - self._whitespace(idx) - or self.tokens[idx][0] is Token.Comment - ): - whitespace = self._whitespace(idx) - if whitespace: - idx += whitespace - else: - idx += 1 - # skip methods defined in other files - meth_tk = self.tokens[idx] - if ( - meth_tk[0] is Token.Name - or meth_tk[0] is Token.Name.Builtin - or meth_tk[0] is Token.Name.Function - or ( - meth_tk[0] is Token.Keyword - and meth_tk[1].strip() == "function" - and self.tokens[idx + 1][0] is Token.Name.Function - ) - or self._tk_eq(idx, (Token.Punctuation, "[")) - or self._tk_eq(idx, (Token.Punctuation, "]")) - or self._tk_eq(idx, (Token.Punctuation, "=")) - or self._tk_eq(idx, (Token.Punctuation, "(")) - or self._tk_eq(idx, (Token.Punctuation, ")")) - or self._tk_eq(idx, (Token.Punctuation, ";")) - or self._tk_eq(idx, (Token.Punctuation, ",")) - ): - logger.debug( - "[sphinxcontrib-matlabdomain] Skipping tokens for methods defined in separate files." - "Token #%d: %r", - idx, - self.tokens[idx], - ) - idx += 1 + self._whitespace(idx + 1) - elif self._tk_eq(idx, (Token.Keyword, "end")): - idx += 1 - break - else: - # find methods - meth = MatMethod( - self.module, self.tokens[idx:], self, attr_dict - ) - - # Detect getter/setter methods - these are not documented - isGetter = meth.name.startswith("get.") - isSetter = meth.name.startswith("set.") - if not (isGetter or isSetter): - # Add the parsed method to methods dictionary - self.methods[meth.name] = meth - - # Update idx with the number of parsed tokens. - idx += meth.skip_tokens() - idx += self._whitespace(idx) - idx += 1 - if self._tk_eq(idx, (Token.Keyword, "events")): - logger.debug( - "[sphinxcontrib-matlabdomain] ignoring 'events' in 'classdef %s.'", - self.name, - ) - idx += 1 - # Token.Keyword: "end" terminates events block - while self._tk_ne(idx, (Token.Keyword, "end")): - idx += 1 - idx += 1 - if self._tk_eq(idx, (Token.Name, "enumeration")): - logger.debug( - "[sphinxcontrib-matlabdomain] ignoring 'enumeration' in 'classdef %s'.", - self.name, - ) - idx += 1 - # Token.Keyword: "end" terminates events block - while self._tk_ne(idx, (Token.Keyword, "end")): - idx += 1 - idx += 1 - if self._tk_eq(idx, (Token.Punctuation, ";")): - # Skip trailing semicolon after end. - idx += 1 - except IndexError: - logger.warning( - "[sphinxcontrib-matlabdomain] Parsing failed in %s.%s. " - "Check if valid MATLAB code.", - modname, - name, - ) - - self.rem_tks = idx # index of last token def ref_role(self): """Returns role to use for references to this object (e.g. when generating auto-links)""" @@ -1580,7 +1079,9 @@ def __doc__(self): @property def __bases__(self): - bases_ = dict.fromkeys(self.bases) # make copy of bases + bases_ = dict.fromkeys( + [".".join(base) for base in self.bases] + ) # make copy of bases class_entity_table = {} for name, entity in entities_table.items(): if isinstance(entity, MatClass) or "@" in name: diff --git a/sphinxcontrib/matlab.py b/sphinxcontrib/matlab.py index 83737648..73c19301 100644 --- a/sphinxcontrib/matlab.py +++ b/sphinxcontrib/matlab.py @@ -860,6 +860,7 @@ def setup(app): app.add_domain(MATLABDomain) # autodoc app.add_config_value("matlab_src_dir", None, "env") + app.add_config_value("matlab_ignore_dirs", [], "env") app.add_config_value("matlab_src_encoding", None, "env") app.add_config_value("matlab_keep_package_prefix", False, "env") app.add_config_value("matlab_show_property_default_value", False, "env")