From a225bb4461e2009322b72c8f64598c1bec1383c1 Mon Sep 17 00:00:00 2001 From: Ben Jeffery Date: Thu, 17 Oct 2024 14:56:34 +0100 Subject: [PATCH] Add doc report code --- docs/documentation_report.py | 215 +++++++++++++++++++++++++++++++++++ 1 file changed, 215 insertions(+) create mode 100644 docs/documentation_report.py diff --git a/docs/documentation_report.py b/docs/documentation_report.py new file mode 100644 index 0000000000..8dcd289754 --- /dev/null +++ b/docs/documentation_report.py @@ -0,0 +1,215 @@ +import ast +import os +from collections import defaultdict + +from bs4 import BeautifulSoup + + +class ParentNodeVisitor(ast.NodeVisitor): + def __init__(self): + self.parent = None + + def visit(self, node): + previous_parent = self.parent + self.parent = node + for child in ast.iter_child_nodes(node): + child.parent = node + self.visit(child) + self.parent = previous_parent + + +class DocAnalyzer: + def __init__(self, source_dir, docs_dir): + self.source_dir = source_dir + self.docs_dir = docs_dir + self.source_items = defaultdict(dict) + self.doc_items = set() + + def analyze_source(self): + for root, _, files in os.walk(self.source_dir): + for file in files: + if file.endswith(".py"): + module_name = os.path.splitext(file)[0] + with open(os.path.join(root, file)) as f: + tree = ast.parse(f.read()) + visitor = ParentNodeVisitor() + visitor.visit(tree) + for node in ast.walk(tree): + if isinstance(node, (ast.FunctionDef, ast.ClassDef)): + item_name = node.name + parent_class = None + if ( + isinstance(node, ast.FunctionDef) + and hasattr(node, "parent") + and isinstance(node.parent, ast.ClassDef) + ): + parent_class = node.parent.name + item_name = f"{parent_class}.{node.name}" + full_name = f"{module_name}.{item_name}" + self.source_items[module_name][full_name] = { + "type": ( + "function" + if isinstance(node, ast.FunctionDef) + else "class" + ), + "internal": node.name.startswith("_"), + "has_docstring": ast.get_docstring(node) + is not None, + "parent_class": parent_class, + } + + def analyze_docs(self): + for root, _, files in os.walk(self.docs_dir): + for file in files: + if file.endswith(".html"): + with open(os.path.join(root, file)) as f: + soup = BeautifulSoup(f, "html.parser") + + # Find all class definitions + for class_def in soup.find_all("dl", class_="py class"): + class_name = class_def.find( + "dt", class_="sig sig-object py" + )["id"] + self.doc_items.add(class_name) + + # Find all method definitions within the class + for method_def in class_def.find_all( + "dl", class_="py method" + ): + method_name = method_def.find( + "dt", class_="sig sig-object py" + )["id"] + self.doc_items.add(method_name) + + # Find all function definitions + for func_def in soup.find_all("dl", class_="py function"): + func_name = func_def.find("dt", class_="sig sig-object py")[ + "id" + ] + self.doc_items.add(func_name) + + # Find all property definitions + for prop_def in soup.find_all("dl", class_="py property"): + prop_name = prop_def.find("dt", class_="sig sig-object py")[ + "id" + ] + self.doc_items.add(prop_name) + + def generate_html_report(self): + html = """ + + + + + +

Documentation Report

+

Classes and methods that start with a single underscore are + excluded from the report

+

Red rows have a docstring, but are not in Sphinx, yellow have neither

+ """ + + for module, items in self.source_items.items(): + html += f"
Module: {module}" + + # Separate classes and functions + classes = defaultdict(dict) + functions = {} + for full_name, info in items.items(): + if info["type"] == "class": + classes[full_name] = {"class_info": info, "methods": {}} + elif info["parent_class"] is None: + functions[full_name] = info + else: + parent_class_name = f"{module}.{info['parent_class']}" + classes[parent_class_name]["methods"][full_name] = info + + # Generate table for isolated functions + html += self.generate_table("Isolated Functions", functions) + + # Generate table for each class + for class_name, class_data in classes.items(): + class_items = {"class": {class_name: class_data["class_info"]}} + class_items.update({"methods": class_data["methods"]}) + if class_name[0] == "_": + continue + html += self.generate_table( + f'Class: {class_name.split(".")[-1]}', class_items + ) + html += "
" + html += "" + return html + + def generate_table(self, title, items): + html = f"

{title}

" + html += """ + + + + + + + + """ + + if "class" in items: + # This is a class table + for name, info in items["class"].items(): + html += self.generate_table_row(name, info) + for name, info in items["methods"].items(): + html += self.generate_table_row(name, info) + else: + # This is a function table + for name, info in items.items(): + html += self.generate_table_row(name, info) + + html += "
NameTypeHas DocstringIn Sphinx
" + return html + + def generate_table_row(self, name, info): + short_name = name.split(".")[-1] + if short_name[0] == "_": + return "" + # first part of name before first . is replaced with "tskit" due to * imports + name = "tskit." + name.split(".", 1)[1] + in_sphinx = name in self.doc_items + highlight = ( + ' class="highlight"' if info["has_docstring"] and not in_sphinx else "" + ) + highlight = ( + ' class="highlight-low"' + if not info["has_docstring"] and not in_sphinx + else highlight + ) + return f""" + + {short_name} + {info['type']} + {'' if info['has_docstring'] else + ''} + {'' if in_sphinx else + ''} + + """ + + def run(self): + self.analyze_source() + self.analyze_docs() + return self.generate_html_report() + + +analyzer = DocAnalyzer("python/tskit", "docs/_build/html") +report = analyzer.run() + +with open("documentation_report.html", "w") as f: + f.write(report) + +print("Report generated as documentation_report.html")