Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: add NumberedHeadingsPreprocessor #2187

Open
wants to merge 5 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -1634,6 +1634,7 @@ raw template
{%- endblock in_prompt -%}
"""


exporter_attr = AttrExporter()
output_attr, _ = exporter_attr.from_notebook_node(nb)
assert "raw template" in output_attr
Expand Down
2 changes: 2 additions & 0 deletions docs/source/api/preprocessors.rst
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,8 @@ Converting text

.. autoclass:: HighlightMagicsPreprocessor

.. autoclass:: NumberedHeadingsPreprocessor

Metadata and header control
~~~~~~~~~~~~~~~~~~~~~~~~~~~

Expand Down
1 change: 1 addition & 0 deletions nbconvert/exporters/exporter.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,7 @@ class Exporter(LoggingConfigurable):
"nbconvert.preprocessors.ExtractOutputPreprocessor",
"nbconvert.preprocessors.ExtractAttachmentsPreprocessor",
"nbconvert.preprocessors.ClearMetadataPreprocessor",
"nbconvert.preprocessors.NumberedHeadingsPreprocessor",
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Just added the processor here, otherwise it was not running python -m nbconvert --to=html --NumberedHeadingsPreprocessor.enabled=true notebook.ipynb (even though the option to enable it appeared in python -m nbconvert --help-all

],
help="""List of preprocessors available by default, by name, namespace,
instance, or type.""",
Expand Down
2 changes: 2 additions & 0 deletions nbconvert/preprocessors/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
from .extractoutput import ExtractOutputPreprocessor
from .highlightmagics import HighlightMagicsPreprocessor
from .latex import LatexPreprocessor
from .numbered_headings import NumberedHeadingsPreprocessor
from .regexremove import RegexRemovePreprocessor
from .svg2pdf import SVG2PDFPreprocessor
from .tagremove import TagRemovePreprocessor
Expand All @@ -30,6 +31,7 @@
"ExtractOutputPreprocessor",
"HighlightMagicsPreprocessor",
"LatexPreprocessor",
"NumberedHeadingsPreprocessor",
"RegexRemovePreprocessor",
"SVG2PDFPreprocessor",
"TagRemovePreprocessor",
Expand Down
65 changes: 65 additions & 0 deletions nbconvert/preprocessors/numbered_headings.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
"""
Preprocessor that transforms markdown cells: Insert numbering in from of heading
"""

from nbconvert.preprocessors.base import Preprocessor

try: # for Mistune >= 3.0
import mistune
from mistune.core import BlockState
from mistune.renderers.markdown import MarkdownRenderer

MISTUNE_V3 = True
except ImportError: # for Mistune >= 2.0
MISTUNE_V3 = False

WRONG_MISTUNE_VERSION_ERROR = "Error: NumberedHeadingsPreprocessor requires mistune >= 3"


class NumberedHeadingsPreprocessor(Preprocessor):
"""Pre-processor that will rewrite markdown headings to include numberings."""

def __init__(self, *args, **kwargs):
"""Init"""
super().__init__(*args, **kwargs)
if not MISTUNE_V3:
raise Exception(WRONG_MISTUNE_VERSION_ERROR)
self.md_parser = mistune.create_markdown(renderer=None)
self.md_renderer = MarkdownRenderer()
self.current_numbering = [0]

def format_numbering(self):
"""Return a string representation of the current numbering"""
return ".".join(str(n) for n in self.current_numbering)

def _inc_current_numbering(self, level):
"""Increase internal counter keeping track of numberings"""
if level > len(self.current_numbering):
self.current_numbering = self.current_numbering + [0] * (
level - len(self.current_numbering)
)
elif level < len(self.current_numbering):
self.current_numbering = self.current_numbering[:level]
self.current_numbering[level - 1] += 1

def preprocess_cell(self, cell, resources, index):
"""Rewrites all the headings in the cell if it is markdown"""
if cell["cell_type"] != "markdown":
return cell, resources
try:
md_ast = self.md_parser(cell["source"])
assert not isinstance(md_ast, str) # type guard ; str is not returned by ast parser
for element in md_ast:
if element["type"] == "heading":
level = element["attrs"]["level"]
self._inc_current_numbering(level)
if len(element["children"]) > 0:
child = element["children"][0]
if child["type"] == "text":
child["raw"] = self.format_numbering() + " " + child["raw"]
new_source = self.md_renderer(md_ast, BlockState())
cell["source"] = new_source
return cell, resources
except Exception:
self.log.warning("Failed processing cell headings", exc_info=True)
return cell, resources
106 changes: 106 additions & 0 deletions tests/preprocessors/test_numbered_headings.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,106 @@
"""
Module with tests for the Numbered Headings preprocessor.
"""

from nbformat import v4 as nbformat

from nbconvert.preprocessors.numbered_headings import NumberedHeadingsPreprocessor

from .base import PreprocessorTestsBase

MARKDOWN_1 = """
# Heading 1

## Sub-heading

some content
"""

MARKDOWN_1_POST = """
# 1 Heading 1

## 1.1 Sub-heading

some content
"""


MARKDOWN_2 = """

## Second sub-heading

# Another main heading

## Sub-heading


some more content

### Third heading
"""

MARKDOWN_2_POST = """

## 1.2 Second sub-heading

# 2 Another main heading

## 2.1 Sub-heading

some more content

### 2.1.1 Third heading
"""

MARKDOWN_3 = """
# HEADING

```
# this is not a heading

## this neither
```
"""

MARKDOWN_3_POST = """
# 3 HEADING

```
# this is not a heading

## this neither
```
"""


class TestNumberedHeadings(PreprocessorTestsBase):
def build_notebook(self):
cells = [
nbformat.new_code_cell(source="$ e $", execution_count=1),
nbformat.new_markdown_cell(source=MARKDOWN_1),
nbformat.new_code_cell(source="$ e $", execution_count=1),
nbformat.new_markdown_cell(source=MARKDOWN_2),
nbformat.new_markdown_cell(source=MARKDOWN_3),
]

return nbformat.new_notebook(cells=cells)

def build_preprocessor(self):
"""Make an instance of a preprocessor"""
preprocessor = NumberedHeadingsPreprocessor()
preprocessor.enabled = True
return preprocessor

def test_constructor(self):
"""Can a NumberedHeadingsPreprocessor be constructed?"""
self.build_preprocessor()

def test_output(self):
"""Test the output of the NumberedHeadingsPreprocessor"""
nb = self.build_notebook()
res = self.build_resources()
preprocessor = self.build_preprocessor()
nb, res = preprocessor(nb, res)
assert nb.cells[1].source.strip() == MARKDOWN_1_POST.strip()
assert nb.cells[3].source.strip() == MARKDOWN_2_POST.strip()
assert nb.cells[4].source.strip() == MARKDOWN_3_POST.strip()
Loading