From b9d633594aea7bb78457fa025723f1052fec61ae Mon Sep 17 00:00:00 2001
From: OleJoik <57186239+OleJoik@users.noreply.github.com>
Date: Thu, 13 Jun 2024 07:32:09 +0200
Subject: [PATCH] Add utilities to convert from html to htpy (#26)

* Add utilities to convert from html to htpy

* Refactor cli and python interface into html2htpy

* Minor refactor, use iterable instead of loop

* Refactor tests (hardcoded expected, no formatting)

* Undo unintentional formatting of __init__.py

* String content escaping

* simplify cli app, input from file or stdin

* Remove the import of html2htpy in init file

* Update import of html2htpy in test

* Bugfix: accept jinja style templates w/period

* Docs for html2htpy

* Removing select.select when reading stdin

* Update Formatting, -f auto/ruff/black/none from path

* Default shorthand syntax, --explicit -> kwargs id, class_

* Refactor, more descriptive function name

* fix failing test

* Avoid intermediate var in _get_formatter, immediate returns

* html -> HTML, HTPY -> htpy, python -> Python

* Get rid of __name__ == "__main__" in html2htpy

* Adding black as dev dependency

* Updates to docs after changes to --shorthand flag

* Update tests for html2htpy

* Ruff lint --fix and ruff format

* mypy lint

* Remove 'Self', use 'Any'. Python 3.10 compatible

* Ruff formatting

* -i: Flag to include import htpy elements in output

* Remove "expected formatting" from test

* Bugfix: correct handling of void elements without / in endtag

* Additional import options: notably --imports=h

* --no-shorthand instead of --explicit options

* Fix minor outdated info in docs

* Fix typo in docs

* Another typo in docs

* Remove rogue print()
---
 docs/html2htpy.md       | 207 ++++++++++++++++++++
 htpy/html2htpy.py       | 408 ++++++++++++++++++++++++++++++++++++++++
 mkdocs.yml              |   1 +
 pyproject.toml          |   4 +
 tests/test_html2htpy.py | 366 +++++++++++++++++++++++++++++++++++
 5 files changed, 986 insertions(+)
 create mode 100644 docs/html2htpy.md
 create mode 100644 htpy/html2htpy.py
 create mode 100644 tests/test_html2htpy.py
diff --git a/docs/html2htpy.md b/docs/html2htpy.md
new file mode 100644
index 0000000..61d1992
--- /dev/null
+++ b/docs/html2htpy.md
@@ -0,0 +1,207 @@
+
+# Convert HTML to htpy code
+
+Maybe you already have a bunch of HTML, or templates that you would like to migrate to htpy. 
+We got you covered. The utility command `html2htpy` ships with `htpy`, and can be used to transform existing 
+html into Python code (htpy!).
+
+```
+$ html2htpy -h
+usage: html2htpy [-h] [-f {auto,ruff,black,none}] [-i {yes,h,no}] [--no-shorthand] [input]
+
+positional arguments:
+  input                 input HTML from file or stdin
+
+options:
+  -h, --help            show this help message and exit
+  -f {auto,ruff,black,none}, --format {auto,ruff,black,none}
+                        Select one of the following formatting options: auto, ruff, black or none
+  -i {yes,h,no}, --imports {yes,h,no}
+                        Output mode for imports of found htpy elements
+  --no-shorthand        Use explicit `id` and `class_` kwargs instead of the shorthand #id.class syntax
+```
+
+
+Lets say you have an existing HTML file:
+
+```html title="index.html"
+<!DOCTYPE html>
+<html lang="en">
+<head>
+    <meta charset="UTF-8" />
+    <meta name="viewport" content="width=device-width, initial-scale=1.0" />
+    <title>htpy Recipes</title>
+</head>
+<body>
+    <div id="header">
+        <h1>Welcome to the cooking site</h1>
+        <p>Your go-to place for delicious recipes!</p>
+    </div>
+
+    <div id="recipe-of-the-day" class="section">
+        <h2>Recipe of the Day: <span class="highlight">Spaghetti Carbonara</span></h2>
+        <p>This classic Italian dish is quick and easy to make.</p>
+    </div>
+
+    <div id="footer">
+        <p>&copy; 2024 My Cooking Site. All rights reserved.</p>
+    </div>
+</body>
+</html>
+```
+
+Now, if you run the command, it outputs the corresponding Python code (htpy).
+
+```
+$  html2htpy index.html
+```
+
+```py
+from htpy import body, div, h1, h2, head, html, meta, p, span, title
+
+html(lang="en")[
+    head[
+        meta(charset="UTF-8"),
+        meta(name="viewport", content="width=device-width, initial-scale=1.0"),
+        title["htpy Recipes"],
+    ],
+    body[
+        div("#header")[
+            h1["Welcome to the cooking site"], p["Your go-to place for delicious recipes!"]
+        ],
+        div("#recipe-of-the-day.section")[
+            h2["Recipe of the Day: ", span(".highlight")["Spaghetti Carbonara"]],
+            p["This classic Italian dish is quick and easy to make."],
+        ],
+        div("#footer")[p["© 2024 My Cooking Site. All rights reserved."]],
+    ],
+]
+```
+
+## Piping input/stdin stream
+
+You can also pipe input to htpy, for example `cat demo.html | html2htpy`.
+
+This can be combined with other workflows in the way that you find most suitable. 
+For example, you might pipe from your clipboard to htpy, and optionally direct the output to a file.
+
+#### Linux 
+
+```
+xclip -o -selection clipboard | html2htpy > output.py
+```
+
+#### Mac 
+
+```
+pbpaste | html2htpy > output.py
+```
+
+#### Windows
+
+```
+powershell Get-Clipboard | html2htpy > output.py
+```
+
+
+## Formatting the output
+`html2htpy` can format the output Python code using `black` or `ruff`.
+Select the preferred formatter with the `-f`/`--format` flag. Options are `auto`, `ruff`, `black` and `none`.
+
+By default, the selection will be `auto`, formatting if it finds a formatter on path, prefering `ruff` if it's available.
+If no formatters are available on path, the output will not be formatted.
+
+
+## Import options
+
+You have a couple of options regarding imports with the `-i`/`--imports` flag. 
+Options are `yes` (default), `h`, `no`. 
+
+#### Module import of htpy: `--imports=h`
+
+Some people prefer to `import htpy as h` instead of importing individual elements from htpy.
+If this is you, you can use the `--imports=h` option to get corresponding output when using `html2htpy`.
+
+```py title="$ html2htpy --imports=h example.html"
+import htpy as h
+
+h.section("#main-section.hero.is-link")[
+    h.p(".subtitle.is-3.is-spaced")["Welcome"]
+]
+```
+
+## Explicit id and class kwargs
+
+
+If you prefer the explicit `id="id", class_="class"` kwargs syntax over the default htpy shorthand `#id.class` syntax, you can get it by passing the `--no-shorthand` flag.
+
+```html title="example.html"
+<section id="main-section" class="hero is-link">
+    <p class="subtitle is-3 is-spaced">Welcome</p>
+</section>
+```
+
+#### Default shorthand yield `#id.class`
+```py title="$ html2htpy example.html"
+from htpy import p, section
+
+section("#main-section.hero.is-link")[
+    p(".subtitle.is-3.is-spaced")["Welcome"]
+]
+```
+
+#### No shorthand yields kwargs `id`, `class_`
+```py title="$ html2htpy --no-shorthand example.html"
+from htpy import p, section
+
+section(id="main-section", class_="hero is-link")[
+    p(class_="subtitle is-3 is-spaced")["Welcome"]
+]
+```
+
+
+## Template interpolation to f-strings
+
+`html2htpy` will try to convert template variables to pythonic f-strings:
+
+`template {{ variables }}` -> `f"template { variables }"`
+
+Note that other typical template syntax, such as loops `{% for x in y %}`, can not be transformed this way, 
+so you will often have to clean up a bit after `html2htpy` is done with its thing.
+
+See the example below:
+
+```html title="jinja.html"
+<body>
+    <h1>{{ heading }}</h1>
+    <p>Welcome to our cooking site, {{ user.name }}!</p>
+
+    <h2>Recipe of the Day: {{ recipe.name }}</h2>
+    <p>{{ recipe.description }}</p>
+
+    <h3>Instructions:</h3>
+    <ol>
+        {% for step in recipe.steps %}
+        <li>{{ step }}</li>
+        {% endfor %}
+    </ol>
+</body>
+```
+
+```py title="$ html2htpy jinja.html"
+from htpy import body, h1, h2, h3, li, ol, p
+
+body[
+    h1[f"{ heading }"],
+    p[f"Welcome to our cooking site, { user.name }!"],
+    h2[f"Recipe of the Day: { recipe.name }"],
+    p[f"{ recipe.description }"],
+    h3["Instructions:"],
+    ol[
+        """        {% for step in recipe.steps %}        """,
+        li[f"{ step }"],
+        """        {% endfor %}    """,
+    ],
+]
+```
+
diff --git a/htpy/html2htpy.py b/htpy/html2htpy.py
new file mode 100644
index 0000000..aba138a
--- /dev/null
+++ b/htpy/html2htpy.py
@@ -0,0 +1,408 @@
+import argparse
+import re
+import shutil
+import subprocess
+import sys
+from abc import ABC, abstractmethod
+from html.parser import HTMLParser
+from typing import Any, Literal
+
+__all__ = ["html2htpy"]
+
+_void_elements = [
+    "area",
+    "base",
+    "br",
+    "col",
+    "embed",
+    "hr",
+    "img",
+    "input",
+    "link",
+    "meta",
+    "param",
+    "source",
+    "track",
+    "wbr",
+]
+
+
+class Tag:
+    def __init__(
+        self,
+        type: str,
+        attrs: list[tuple[str, str | None]],
+        parent: Any | None = None,
+    ):
+        self.html_type = type
+        self.python_type = type
+        if "-" in self.python_type:
+            self.python_type = self.python_type.replace("-", "_")
+
+        self.attrs = attrs
+        self.parent = parent
+        self.children: list[Any | str] = []
+
+    def serialize(self, shorthand_id_class: bool, use_h_prefix: bool) -> str:
+        _positional_attrs: dict[str, str | None] = {}
+        _attrs = ""
+        _kwattrs: list[tuple[str, str | None]] = []
+
+        for a in self.attrs:
+            key = a[0]
+            if key == "class":
+                if shorthand_id_class:
+                    _positional_attrs[key] = a[1]
+                else:
+                    _kwattrs.append(a)
+
+            elif key == "id":
+                if shorthand_id_class:
+                    _positional_attrs[key] = a[1]
+                else:
+                    _kwattrs.append(a)
+            else:
+                _kwattrs.append(a)
+
+        if _positional_attrs or _kwattrs:
+            _attrs += "("
+
+        if _positional_attrs:
+            arg0 = ""
+            if "id" in _positional_attrs:
+                if _positional_attrs["id"] is None:
+                    raise Exception("Id attribute cannot be none")
+
+                arg0 += "#" + _positional_attrs["id"]
+
+            if "class" in _positional_attrs:
+                if _positional_attrs["class"] is None:
+                    raise Exception("Class attribute cannot be none")
+
+                classes = ".".join(_positional_attrs["class"].split(" "))
+                arg0 += "." + classes
+
+            _attrs += '"' + arg0 + '",'
+
+        if _kwattrs:
+            for a in _kwattrs:
+                key = a[0]
+                if "-" in key:
+                    key = key.replace("-", "_")
+
+                if key == "class":
+                    key = "class_"
+                elif key == "for":
+                    key = "for_"
+
+                val = a[1]
+                if not val:
+                    _attrs += f"{key}=True,"
+
+                else:
+                    _attrs += f'{key}="{val}",'
+
+        if _positional_attrs or _kwattrs:
+            _attrs = _attrs[:-1] + ")"
+
+        _children: str = ""
+        if self.children:
+            _children += "["
+            for c in self.children:
+                if isinstance(c, Tag):
+                    _children += c.serialize(shorthand_id_class, use_h_prefix)
+                else:
+                    _children += str(c)
+
+                _children += ","
+
+            _children = _children[:-1] + "]"
+
+        if use_h_prefix:
+            return f"h.{self.python_type}{_attrs}{_children}"
+
+        return f"{self.python_type}{_attrs}{_children}"
+
+
+class Formatter(ABC):
+    @abstractmethod
+    def format(self, s: str) -> str:
+        raise NotImplementedError()
+
+
+class BlackFormatter(Formatter):
+    def format(self, s: str) -> str:
+        result = subprocess.run(
+            ["black", "-q", "-"],
+            input=s.encode("utf8"),
+            stdout=subprocess.PIPE,
+        )
+        return result.stdout.decode("utf8")
+
+
+class RuffFormatter(Formatter):
+    def format(self, s: str) -> str:
+        result = subprocess.run(
+            ["ruff", "format", "-"],
+            input=s.encode("utf8"),
+            stdout=subprocess.PIPE,
+        )
+        return result.stdout.decode("utf8")
+
+
+class HTPYParser(HTMLParser):
+    def __init__(self) -> None:
+        self._collected: list[Tag | str] = []
+        self._current: Tag | None = None
+        super().__init__()
+
+    def handle_starttag(self, tag: str, attrs: list[tuple[str, str | None]]) -> None:
+        t = Tag(tag, attrs, parent=self._current)
+
+        if not self._current:
+            self._collected.append(t)
+        else:
+            self._current.children.append(t)
+
+        if tag not in _void_elements:
+            self._current = t
+
+    def handle_startendtag(self, tag: str, attrs: list[tuple[str, str | None]]) -> None:
+        t = Tag(tag, attrs, parent=self._current)
+
+        if not self._current:
+            self._collected.append(t)
+        else:
+            self._current.children.append(t)
+
+    def handle_endtag(self, tag: str) -> None:
+        if not self._current:
+            raise Exception(f"Error parsing html: Closing tag {tag} when not inside any other tag")
+
+        if not self._current.html_type == tag:
+            raise Exception(
+                f"Error parsing html: Closing tag {tag} does not match the "
+                f"currently open tag ({self._current.html_type})"
+            )
+
+        self._current = self._current.parent
+
+    def handle_data(self, data: str) -> None:
+        if not data.isspace():
+            stringified_data = _convert_data_to_string(data)
+
+            if self._current:
+                self._current.children.append(stringified_data)
+            else:
+                self._collected.append(stringified_data)
+
+    def serialize_python(
+        self,
+        shorthand_id_class: bool = False,
+        import_mode: Literal["yes", "h", "no"] = "yes",
+        formatter: Formatter | None = None,
+    ) -> str:
+        o = ""
+
+        use_h_prefix = False
+
+        if import_mode == "yes":
+            unique_tags: set[str] = set()
+
+            def _tags_from_children(parent: Tag) -> None:
+                for c in parent.children:
+                    if isinstance(c, Tag):
+                        unique_tags.add(c.python_type)
+                        _tags_from_children(c)
+
+            for t in self._collected:
+                if isinstance(t, Tag):
+                    unique_tags.add(t.python_type)
+                    _tags_from_children(t)
+
+            sorted_tags = list(unique_tags)
+            sorted_tags.sort()
+
+            o += f'from htpy import {", ".join(sorted_tags)}\n'
+
+        elif import_mode == "h":
+            o += "import htpy as h\n"
+            use_h_prefix = True
+
+        if len(self._collected) == 1:
+            o += _serialize(self._collected[0], shorthand_id_class, use_h_prefix)
+
+        else:
+            o += "["
+            for t in self._collected:
+                o += _serialize(t, shorthand_id_class, use_h_prefix) + ","
+            o = o[:-1] + "]"
+
+        if formatter:
+            return formatter.format(o)
+        else:
+            return o
+
+
+def html2htpy(
+    html: str,
+    shorthand_id_class: bool = True,
+    import_mode: Literal["yes", "h", "no"] = "yes",
+    formatter: Formatter | None = None,
+) -> str:
+    parser = HTPYParser()
+    parser.feed(html)
+
+    return parser.serialize_python(shorthand_id_class, import_mode, formatter)
+
+
+def _convert_data_to_string(data: str) -> str:
+    _data = str(data)
+
+    is_multiline = "\n" in _data
+
+    _data = _data.replace("\n", "")
+
+    # escape unescaped dblquote: " -> \"
+    _data = re.compile(r'(?<![\\])"').sub('\\"', _data)
+
+    template_string_pattern = re.compile(r"\{\{\s*[\w\.]+\s*\}\}")
+
+    has_jinja_pattern = re.search(template_string_pattern, _data)
+    if has_jinja_pattern:
+        # regex replaces these 3 cases:
+        # {{ var.xx }} -> { var.xx }
+        # { -> {{
+        # } -> }}
+        template_string_replace_pattern = re.compile(
+            r"(\{\{\s*[\w\.]+\s*\}\}|(?<![\{]){(?![\{])|(?<![\}])}(?![\}]))"
+        )
+
+        def replacer(match: re.Match[str]) -> str:
+            captured = match.group(1)
+
+            if captured.startswith("{{"):
+                return captured[1:-1]
+
+            if captured == "{":
+                return "{{"
+
+            return "}}"
+
+        _data = template_string_replace_pattern.sub(replacer, _data)
+        if is_multiline:
+            _data = '""' + _data + '""'
+
+        _data = 'f"' + _data + '"'
+    else:
+        if is_multiline:
+            _data = '""' + _data + '""'
+
+        _data = '"' + _data + '"'
+
+    return _data
+
+
+def _serialize(el: Tag | str, shorthand_id_class: bool, use_h_prefix: bool) -> str:
+    if isinstance(el, Tag):
+        return el.serialize(shorthand_id_class, use_h_prefix)
+    else:
+        return str(el)
+
+
+def _get_formatter(format: Literal["auto", "ruff", "black", "none"]) -> Formatter | None:
+    if format == "ruff":
+        if _is_command_available("ruff"):
+            return RuffFormatter()
+        else:
+            _printerr(
+                "Selected formatter (ruff) is not installed.",
+            )
+            _printerr("Please install it or select another formatter.")
+            _printerr("`html2htpy -h` for help")
+            sys.exit(1)
+
+    if format == "black":
+        if _is_command_available("black"):
+            return BlackFormatter()
+        else:
+            _printerr(
+                "Selected formatter (black) is not installed.",
+            )
+            _printerr("Please install it or select another formatter.")
+            _printerr("`html2htpy -h` for help")
+            sys.exit(1)
+
+    elif format == "auto":
+        if _is_command_available("ruff"):
+            return RuffFormatter()
+        elif _is_command_available("black"):
+            return BlackFormatter()
+
+    return None
+
+
+def _is_command_available(command: str) -> bool:
+    return shutil.which(command) is not None
+
+
+def main() -> None:
+    parser = argparse.ArgumentParser(prog="html2htpy")
+
+    parser.add_argument(
+        "-f",
+        "--format",
+        choices=["auto", "ruff", "black", "none"],
+        default="auto",
+        help="Select one of the following formatting options: auto, ruff, black or none",
+    )
+    parser.add_argument(
+        "-i",
+        "--imports",
+        choices=["yes", "h", "no"],
+        help="Output mode for imports of found htpy elements",
+        default="yes",
+    )
+    parser.add_argument(
+        "--no-shorthand",
+        help="Use explicit `id` and `class_` kwargs instead of the shorthand #id.class syntax",
+        action="store_true",
+    )
+    parser.add_argument(
+        "input",
+        type=argparse.FileType("r"),
+        nargs="?",
+        default=sys.stdin,
+        help="input HTML from file or stdin",
+    )
+
+    args = parser.parse_args()
+
+    try:
+        if args.input == sys.stdin:
+            input = args.input.read()
+        elif args.input != sys.stdin:
+            input = args.input.read()
+        else:
+            _printerr(
+                "No input provided. Please supply an input file or stream.",
+            )
+            _printerr("Example usage: `cat index.html | html2htpy`")
+            _printerr("`html2htpy -h` for help")
+            sys.exit(1)
+    except KeyboardInterrupt:
+        _printerr(
+            "\nInterrupted",
+        )
+        sys.exit(1)
+
+    shorthand: bool = False if args.no_shorthand else True
+    imports: Literal["yes", "h", "no"] = args.imports
+
+    formatter = _get_formatter(args.format)
+
+    print(html2htpy(input, shorthand, imports, formatter))
+
+
+def _printerr(value: str) -> None:
+    print(value, file=sys.stderr)
diff --git a/mkdocs.yml b/mkdocs.yml
index fd1a2f4..fb2f2ad 100644
--- a/mkdocs.yml
+++ b/mkdocs.yml
@@ -16,6 +16,7 @@ nav:
   - static-typing.md
   - django.md
   - streaming.md
+  - html2htpy.md
   - faq.md
   - references.md
 markdown_extensions:
diff --git a/pyproject.toml b/pyproject.toml
index 890873a..40f4b01 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -26,6 +26,7 @@ optional-dependencies.dev = [
     "mypy",
     "pyright",
     "pytest",
+    "black",
     "ruff",
     "django",
     "django-stubs",
@@ -41,6 +42,9 @@ Repository = "https://github.com/pelme/htpy"
 Documentation = "https://htpy.dev"
 Issues = "https://github.com/pelme/htpy/issues"
 
+[project.scripts]
+html2htpy = "htpy.html2htpy:main"
+
 [build-system]
 requires = ["flit_core >=3.2,<4"]
 build-backend = "flit_core.buildapi"
diff --git a/tests/test_html2htpy.py b/tests/test_html2htpy.py
new file mode 100644
index 0000000..cfd0737
--- /dev/null
+++ b/tests/test_html2htpy.py
@@ -0,0 +1,366 @@
+import textwrap
+
+import pytest
+
+from htpy.html2htpy import BlackFormatter, RuffFormatter, html2htpy
+
+
+def test_convert_default_shorthand_id_and_class() -> None:
+    input = """
+        <div id="div-id" class="some-class other-class">
+          <p>This is a paragraph.</p>
+        </div>
+    """
+
+    actual = html2htpy(input, import_mode="no")
+    expected = 'div("#div-id.some-class.other-class")[p["This is a paragraph."]]'
+
+    assert actual == expected
+
+
+def test_convert_explicit_id_class_syntas() -> None:
+    input = """
+        <div id="div-id" class="some-class other-class">
+          <p>This is a paragraph.</p>
+        </div>
+    """
+
+    actual = html2htpy(input, shorthand_id_class=False, import_mode="no")
+    expected = 'div(id="div-id",class_="some-class other-class")[p["This is a paragraph."]]'
+
+    assert actual == expected
+
+
+nested_html = """
+    <div>
+      <p>This is a <span>nested</span> element.</p>
+      <p>Another <a href="#">nested <strong>tag</strong></a>.</p>
+    </div>
+"""
+
+
+def test_convert_nested_element_without_formatting() -> None:
+    actual = html2htpy(nested_html, formatter=None, import_mode="no")
+
+    expected = (
+        "div["
+        'p["This is a ",span["nested"]," element."],'
+        'p["Another ",a(href="#")["nested ",strong["tag"]],"."]'
+        "]"
+    )
+
+    assert actual == expected
+
+
+def test_convert_nested_element_ruff_formatting() -> None:
+    actual = html2htpy(nested_html, formatter=RuffFormatter(), import_mode="no")
+    assert actual == textwrap.dedent(
+        """\
+        div[
+            p["This is a ", span["nested"], " element."],
+            p["Another ", a(href="#")["nested ", strong["tag"]], "."],
+        ]
+        """
+    )
+
+
+def test_convert_nested_element_black_formatting() -> None:
+    actual = html2htpy(nested_html, formatter=BlackFormatter(), import_mode="no")
+    assert actual == textwrap.dedent(
+        """\
+        div[
+            p["This is a ", span["nested"], " element."],
+            p["Another ", a(href="#")["nested ", strong["tag"]], "."],
+        ]
+        """
+    )
+
+
+def test_convert_nested_element___import_mode_yes() -> None:
+    actual = html2htpy(nested_html, import_mode="yes")
+    assert actual == (
+        "from htpy import a, div, p, span, strong\n"
+        "div["
+        'p["This is a ",span["nested"]," element."],'
+        'p["Another ",a(href="#")["nested ",strong["tag"]],"."]'
+        "]"
+    )
+
+
+def test_convert_nested_element___import_mode_h() -> None:
+    actual = html2htpy(nested_html, import_mode="h")
+    assert actual == (
+        "import htpy as h\n"
+        "h.div["
+        'h.p["This is a ",h.span["nested"]," element."],'
+        'h.p["Another ",h.a(href="#")["nested ",h.strong["tag"]],"."]'
+        "]"
+    )
+
+
+def test_convert_custom_element_include_imports() -> None:
+    input = '<custom-element attribute="value">Custom content</custom-element>'
+    actual = html2htpy(input, import_mode="yes")
+
+    assert actual == (
+        "from htpy import custom_element\n" 'custom_element(attribute="value")["Custom content"]'
+    )
+
+
+def test_convert_self_closing_tags() -> None:
+    input = """
+        <img src="image.jpg" alt="An image" />
+        <br />
+        <input type="text" />
+    """
+
+    actual = html2htpy(input, import_mode="no")
+
+    assert actual == '[img(src="image.jpg",alt="An image"),br,input(type="text")]'
+
+
+def test_convert_attribute_with_special_characters() -> None:
+    input = """<img src="path/to/image.jpg" alt="A <test> & 'image'" />"""
+    actual = html2htpy(input, import_mode="no")
+    assert actual == """img(src="path/to/image.jpg",alt="A <test> & 'image'")"""
+
+
+def test_convert_ignores_comments() -> None:
+    input = """
+    <!-- This is a comment -->
+    <div>Content <!-- Another comment --> inside</div>
+    """
+    actual = html2htpy(input, import_mode="no")
+    assert actual == 'div["Content "," inside"]'
+
+
+def test_convert_special_characters() -> None:
+    input = """
+    <p>Special characters: &amp; &lt; &gt; &quot; &apos; &copy;</p>
+    """
+
+    actual = html2htpy(input, import_mode="no")
+    assert actual == 'p["Special characters: & < > \\" \' ©"]'
+
+
+def test_convert_f_string_escaping() -> None:
+    input = """
+        <p>{{ variable }} is "a" { paragraph }.</p>
+    """
+
+    actual = html2htpy(input, import_mode="no")
+    expected = r'p[f"{ variable } is \"a\" {{ paragraph }}."]'
+
+    assert actual == expected
+
+
+def test_convert_f_string_escaping_complex() -> None:
+    input = """
+    <body>
+      <h1>{{ heading }}</h1>
+      <p>Welcome to our cooking site, {{ user.name }}!</p>
+
+      <h2>Recipe of the Day: {{ recipe.name }}</h2>
+      <p>{{ recipe.description }}</p>
+
+      <h3>Instructions:</h3>
+      <ol>
+          {% for step in recipe.steps %}
+          <li>{{ step }}</li>
+          {% endfor %}
+      </ol>
+    </body>
+    """
+
+    actual = html2htpy(input, formatter=RuffFormatter(), import_mode="no")
+    expected = textwrap.dedent(
+        """\
+        body[
+            h1[f"{ heading }"],
+            p[f"Welcome to our cooking site, { user.name }!"],
+            h2[f"Recipe of the Day: { recipe.name }"],
+            p[f"{ recipe.description }"],
+            h3["Instructions:"],
+            ol[
+                \"\"\"          {% for step in recipe.steps %}          \"\"\",
+                li[f"{ step }"],
+                \"\"\"          {% endfor %}      \"\"\",
+            ],
+        ]
+    """
+    )
+
+    assert actual == expected
+
+
+def test_convert_script_tag() -> None:
+    input = """
+        <script type="text/javascript">alert('This is a script');</script>
+    """
+
+    actual = html2htpy(input, import_mode="no")
+    assert actual == """script(type="text/javascript")["alert('This is a script');"]"""
+
+
+def test_convert_style_tag() -> None:
+    input = """
+        <style>body { background-color: #fff; }</style>
+    """
+    actual = html2htpy(input, import_mode="no")
+    assert actual == """style["body { background-color: #fff; }"]"""
+
+
+def test_convert_html_doctype() -> None:
+    input = """
+        <!DOCTYPE html>
+        <html>
+        <head>
+          <title>Test Document</title>
+        </head>
+        <body>
+          <h1>Header</h1>
+          <p>Paragraph</p>
+        </body>
+        </html>
+    """
+
+    actual = html2htpy(input, import_mode="no")
+    expected = """html[head[title["Test Document"]],body[h1["Header"],p["Paragraph"]]]"""
+
+    assert actual == expected
+
+
+def test_convert_empty_elements() -> None:
+    input = """
+        <div></div>
+        <p></p>
+        <span></span>
+    """
+
+    actual = html2htpy(input, import_mode="no")
+    assert actual == "[div,p,span]"
+
+
+def test_convert_void_elements() -> None:
+    input = """
+      <div>
+        <div>
+          <input type="text" />
+        </div>
+
+        <div>
+          <input type="text">
+        </div>
+      </div>
+    """
+
+    actual = html2htpy(input, import_mode="no")
+    assert actual == 'div[div[input(type="text")],div[input(type="text")]]'
+
+
+def test_convert_custom_tag() -> None:
+    input = """
+        <custom-element attribute="value">Custom content</custom-element>
+    """
+
+    actual = html2htpy(input, import_mode="no")
+    assert actual == """custom_element(attribute="value")["Custom content"]"""
+
+
+def test_convert_malformed_html() -> None:
+    input = """
+        <div>
+          <p>Paragraph without closing tag
+          <div>Another div</p>
+        </div>
+    """
+
+    with pytest.raises(Exception) as e:
+        html2htpy(input)
+
+    assert "Closing tag p does not match the currently open tag (div)" in str(e.value)
+
+
+def test_convert_attributes_without_values() -> None:
+    input = """
+        <input type="checkbox" checked />
+        <option selected>Option</option>
+    """
+    actual = html2htpy(input, import_mode="no")
+    assert actual == """[input(type="checkbox",checked=True),option(selected=True)["Option"]]"""
+
+
+def test_convert_complex_section() -> None:
+    input = """
+        <section class="hero is-fullheight is-link">
+          <div class="hero-body">
+            <div class='container'>
+              <p class="subtitle is-3 is-spaced">Welcome</p>
+              <p class="title is-1 is-spaced">Student code: {{student_code}}</p>
+            </div>
+          </div>
+        </section>
+    """
+
+    actual = html2htpy(input, shorthand_id_class=False, import_mode="no")
+    expected = (
+        'section(class_="hero is-fullheight is-link")['
+        'div(class_="hero-body")['
+        'div(class_="container")['
+        'p(class_="subtitle is-3 is-spaced")["Welcome"],'
+        'p(class_="title is-1 is-spaced")[f"Student code: {student_code}"]'
+        "]"
+        "]"
+        "]"
+    )
+
+    assert actual == expected
+
+
+def test_convert_complex_svg() -> None:
+    path_d: str = (
+        "m16.862 4.487 1.687-1.688a1.875 1.875 0 1 1 2"
+        ".652 2.652L10.582 16.07a4.5 4.5 0 0 1-1.897 1"
+        ".13L6 18l.8-2.685a4.5 4.5 0 0 1 1.13-1.897l8."
+        "932-8.931Zm0 0L19.5 7.125M18 14v4.75A2.25 2.2"
+        "5 0 0 1 15.75 21H5.25A2.25 2.25 0 0 1 3 18.75"
+        "V8.25A2.25 2.25 0 0 1 5.25 6H10"
+    )
+
+    input = f"""
+        <svg
+          xmlns="http://www.w3.org/2000/svg"
+          fill="none" viewBox="0 0 24 24"
+          stroke-width="1.5"
+          stroke="currentColor"
+          class="w-6 h-6">
+            <path
+              stroke-linecap="round"
+              stroke-linejoin="round"
+              d="{path_d}"
+            />
+        </svg>
+    """
+
+    actual_output = html2htpy(input, formatter=BlackFormatter(), import_mode="no")
+
+    expected_output = textwrap.dedent(
+        f"""\
+            svg(
+                ".w-6.h-6",
+                xmlns="http://www.w3.org/2000/svg",
+                fill="none",
+                viewbox="0 0 24 24",
+                stroke_width="1.5",
+                stroke="currentColor",
+            )[
+                path(
+                    stroke_linecap="round",
+                    stroke_linejoin="round",
+                    d="{path_d}",
+                )
+            ]
+        """
+    )
+
+    assert expected_output == actual_output