diff --git a/htpy/html2htpy.py b/htpy/html2htpy.py
index abea239..3bcc3de 100644
--- a/htpy/html2htpy.py
+++ b/htpy/html2htpy.py
@@ -1,3 +1,5 @@
+from __future__ import annotations
+
import argparse
import keyword
import re
@@ -6,7 +8,7 @@
import sys
from abc import ABC, abstractmethod
from html.parser import HTMLParser
-from typing import Any, Literal
+from typing import Literal
__all__ = ["html2htpy"]
@@ -28,86 +30,136 @@
]
-class Tag:
- def __init__(
- self,
- type: str,
- attrs: list[tuple[str, str | None]],
- parent: Any | None = None,
- ):
- self.html_type = type
- self.python_type = type
- if "-" in self.python_type:
- self.python_type = self.python_type.replace("-", "_")
+def _quote(x: str) -> str:
+ if '"' in x:
+ return f"'{x}'"
- self.attrs = attrs
- self.parent = parent
- self.children: list[Any | str] = []
+ return f'"{x}"'
- def serialize(self, shorthand_id_class: bool, use_h_prefix: bool) -> str:
- _positional_attrs: dict[str, str | None] = {}
- _attrs = ""
- _kwattrs: list[tuple[str, str | None]] = []
- for key, value in self.attrs:
- if key in ("id", "class") and shorthand_id_class:
- _positional_attrs[key] = value
- else:
- _kwattrs.append((key, value))
+def _format_value(value: str | None) -> str:
+ if value is None:
+ return "True"
+
+ return _quote(value)
- if _positional_attrs or _kwattrs:
- _attrs += "("
- if _positional_attrs:
- arg0 = ""
- if "id" in _positional_attrs:
- if _positional_attrs["id"] is None:
- raise Exception("Id attribute cannot be none")
+def _format_id_class_shorthand_attrs(id_: str, class_: str) -> str:
+ classes = class_.split(" ") if class_ else []
+ result = (f"#{id_}" if id_ else "") + (("." + ".".join(classes)) if classes else "")
- arg0 += "#" + _positional_attrs["id"]
+ if result:
+ return f'"{result}"'
- if "class" in _positional_attrs:
- if _positional_attrs["class"] is None:
- raise Exception("Class attribute cannot be none")
+ return ""
- classes = ".".join(_positional_attrs["class"].split(" "))
- arg0 += "." + classes
- _attrs += '"' + arg0 + '",'
+def _format_keyword_attrs(attrs: dict[str, str | None]) -> str:
+ if not attrs:
+ return ""
- for key, value in _kwattrs:
- if "-" in key:
- key = key.replace("-", "_")
+ return ", ".join(f"{key}={_format_value(value)}" for key, value in attrs.items())
- if keyword.iskeyword(key):
- key += "_"
- if not value:
- _attrs += f"{key}=True,"
+def _format_dict_attrs(attrs: dict[str, str | None]) -> str:
+ if not attrs:
+ return ""
+ return (
+ "{"
+ + ", ".join(f"{_quote(key)}: {_format_value(value)}" for key, value in attrs.items())
+ + "}"
+ )
+
+
+def _format_attrs(attrs: dict[str, str | None], shorthand_id_class: bool) -> str:
+ keyword_attrs: dict[str, str | None] = {}
+ dict_attrs: dict[str, str | None] = {}
+
+ shorthand_id_class_str = (
+ _format_id_class_shorthand_attrs(attrs.pop("id", "") or "", attrs.pop("class", "") or "")
+ if shorthand_id_class
+ else ""
+ )
+
+ for key, value in attrs.items():
+ potential_keyword_key = key.replace("-", "_")
+ if potential_keyword_key.isidentifier():
+ if keyword.iskeyword(potential_keyword_key):
+ keyword_attrs[potential_keyword_key + "_"] = value
else:
- _attrs += f'{key}="{value}",'
+ keyword_attrs[potential_keyword_key] = value
+ else:
+ dict_attrs[key] = value
+
+ _attrs = ", ".join(
+ x
+ for x in [
+ shorthand_id_class_str,
+ _format_keyword_attrs(keyword_attrs),
+ _format_dict_attrs(dict_attrs),
+ ]
+ if x
+ )
- if _positional_attrs or _kwattrs:
- _attrs = _attrs[:-1] + ")"
+ if not _attrs:
+ return ""
- _children: str = ""
- if self.children:
- _children += "["
- for c in self.children:
- if isinstance(c, Tag):
- _children += c.serialize(shorthand_id_class, use_h_prefix)
- else:
- _children += str(c)
+ return f"({_attrs})"
- _children += ","
- _children = _children[:-1] + "]"
+def _format_element(python_element_name: str, use_h_prefix: bool) -> str:
+ if use_h_prefix:
+ return f"h.{python_element_name}"
+ return python_element_name
- if use_h_prefix:
- return f"h.{self.python_type}{_attrs}{_children}"
- return f"{self.python_type}{_attrs}{_children}"
+def _format_child(child: Tag | str, *, shorthand_id_class: bool, use_h_prefix: bool) -> str:
+ if isinstance(child, Tag):
+ return child.serialize(shorthand_id_class=shorthand_id_class, use_h_prefix=use_h_prefix)
+ else:
+ return str(child)
+
+
+def _format_children(
+ children: list[Tag | str], *, shorthand_id_class: bool, use_u_prefix: bool
+) -> str:
+ if not children:
+ return ""
+ return (
+ "["
+ + ", ".join(
+ _format_child(child, shorthand_id_class=shorthand_id_class, use_h_prefix=use_u_prefix)
+ for child in children
+ )
+ + "]"
+ )
+
+
+class Tag:
+ def __init__(
+ self,
+ html_tag: str,
+ attrs: dict[str, str | None],
+ parent: Tag | None,
+ ):
+ self.html_tag = html_tag
+ self.attrs = attrs
+ self.children: list[Tag | str] = []
+ self.parent = parent
+
+ @property
+ def python_element_name(self) -> str:
+ return self.html_tag.replace("-", "_")
+
+ def serialize(self, *, shorthand_id_class: bool, use_h_prefix: bool) -> str:
+ return (
+ _format_element(self.python_element_name, use_h_prefix)
+ + _format_attrs(dict(self.attrs), shorthand_id_class)
+ + _format_children(
+ self.children, shorthand_id_class=shorthand_id_class, use_u_prefix=use_h_prefix
+ )
+ )
class Formatter(ABC):
@@ -143,7 +195,7 @@ def __init__(self) -> None:
super().__init__()
def handle_starttag(self, tag: str, attrs: list[tuple[str, str | None]]) -> None:
- t = Tag(tag, attrs, parent=self._current)
+ t = Tag(tag, dict(attrs), parent=self._current)
if not self._current:
self._collected.append(t)
@@ -154,7 +206,7 @@ def handle_starttag(self, tag: str, attrs: list[tuple[str, str | None]]) -> None
self._current = t
def handle_startendtag(self, tag: str, attrs: list[tuple[str, str | None]]) -> None:
- t = Tag(tag, attrs, parent=self._current)
+ t = Tag(tag, dict(attrs), parent=self._current)
if not self._current:
self._collected.append(t)
@@ -165,10 +217,10 @@ def handle_endtag(self, tag: str) -> None:
if not self._current:
raise Exception(f"Error parsing html: Closing tag {tag} when not inside any other tag")
- if not self._current.html_type == tag:
+ if not self._current.html_tag == tag:
raise Exception(
f"Error parsing html: Closing tag {tag} does not match the "
- f"currently open tag ({self._current.html_type})"
+ f"currently open tag ({self._current.html_tag})"
)
self._current = self._current.parent
@@ -198,12 +250,12 @@ def serialize_python(
def _tags_from_children(parent: Tag) -> None:
for c in parent.children:
if isinstance(c, Tag):
- unique_tags.add(c.python_type)
+ unique_tags.add(c.python_element_name)
_tags_from_children(c)
for t in self._collected:
if isinstance(t, Tag):
- unique_tags.add(t.python_type)
+ unique_tags.add(t.python_element_name)
_tags_from_children(t)
sorted_tags = list(unique_tags)
@@ -291,7 +343,7 @@ def replacer(match: re.Match[str]) -> str:
def _serialize(el: Tag | str, shorthand_id_class: bool, use_h_prefix: bool) -> str:
if isinstance(el, Tag):
- return el.serialize(shorthand_id_class, use_h_prefix)
+ return el.serialize(shorthand_id_class=shorthand_id_class, use_h_prefix=use_h_prefix)
else:
return str(el)
diff --git a/tests/test_html2htpy.py b/tests/test_html2htpy.py
index 6c3762b..b810ec9 100644
--- a/tests/test_html2htpy.py
+++ b/tests/test_html2htpy.py
@@ -26,7 +26,7 @@ def test_convert_explicit_id_class_syntas() -> None:
"""
actual = html2htpy(input, shorthand_id_class=False, import_mode="no")
- expected = 'div(id="div-id",class_="some-class other-class")[p["This is a paragraph."]]'
+ expected = 'div(id="div-id", class_="some-class other-class")[p["This is a paragraph."]]'
assert actual == expected
@@ -44,8 +44,8 @@ def test_convert_nested_element_without_formatting() -> None:
expected = (
"div["
- 'p["This is a ",span["nested"]," element."],'
- 'p["Another ",a(href="#")["nested ",strong["tag"]],"."]'
+ 'p["This is a ", span["nested"], " element."], '
+ 'p["Another ", a(href="#")["nested ", strong["tag"]], "."]'
"]"
)
@@ -81,8 +81,8 @@ def test_convert_nested_element___import_mode_yes() -> None:
assert actual == (
"from htpy import a, div, p, span, strong\n"
"div["
- 'p["This is a ",span["nested"]," element."],'
- 'p["Another ",a(href="#")["nested ",strong["tag"]],"."]'
+ 'p["This is a ", span["nested"], " element."], '
+ 'p["Another ", a(href="#")["nested ", strong["tag"]], "."]'
"]"
)
@@ -92,8 +92,8 @@ def test_convert_nested_element___import_mode_h() -> None:
assert actual == (
"import htpy as h\n"
"h.div["
- 'h.p["This is a ",h.span["nested"]," element."],'
- 'h.p["Another ",h.a(href="#")["nested ",h.strong["tag"]],"."]'
+ 'h.p["This is a ", h.span["nested"], " element."], '
+ 'h.p["Another ", h.a(href="#")["nested ", h.strong["tag"]], "."]'
"]"
)
@@ -116,13 +116,13 @@ def test_convert_self_closing_tags() -> None:
actual = html2htpy(input, import_mode="no")
- assert actual == '[img(src="image.jpg",alt="An image"),br,input(type="text")]'
+ assert actual == '[img(src="image.jpg", alt="An image"),br,input(type="text")]'
def test_convert_attribute_with_special_characters() -> None:
input = """"""
actual = html2htpy(input, import_mode="no")
- assert actual == """img(src="path/to/image.jpg",alt="A & 'image'")"""
+ assert actual == """img(src="path/to/image.jpg", alt="A & 'image'")"""
def test_convert_ignores_comments() -> None:
@@ -131,7 +131,7 @@ def test_convert_ignores_comments() -> None:
Content inside
"""
actual = html2htpy(input, import_mode="no")
- assert actual == 'div["Content "," inside"]'
+ assert actual == 'div["Content ", " inside"]'
def test_convert_special_characters() -> None:
@@ -225,7 +225,7 @@ def test_convert_html_doctype() -> None:
"""
actual = html2htpy(input, import_mode="no")
- expected = """html[head[title["Test Document"]],body[h1["Header"],p["Paragraph"]]]"""
+ expected = """html[head[title["Test Document"]], body[h1["Header"], p["Paragraph"]]]"""
assert actual == expected
@@ -255,7 +255,7 @@ def test_convert_void_elements() -> None:
"""
actual = html2htpy(input, import_mode="no")
- assert actual == 'div[div[input(type="text")],div[input(type="text")]]'
+ assert actual == 'div[div[input(type="text")], div[input(type="text")]]'
def test_convert_custom_tag() -> None:
@@ -287,7 +287,7 @@ def test_convert_attributes_without_values() -> None:
"""
actual = html2htpy(input, import_mode="no")
- assert actual == """[input(type="checkbox",checked=True),option(selected=True)["Option"]]"""
+ assert actual == """[input(type="checkbox", checked=True),option(selected=True)["Option"]]"""
def test_convert_complex_section() -> None:
@@ -307,7 +307,7 @@ def test_convert_complex_section() -> None:
'section(class_="hero is-fullheight is-link")['
'div(class_="hero-body")['
'div(class_="container")['
- 'p(class_="subtitle is-3 is-spaced")["Welcome"],'
+ 'p(class_="subtitle is-3 is-spaced")["Welcome"], '
'p(class_="title is-1 is-spaced")[f"Student code: {student_code}"]'
"]"
"]"
@@ -368,6 +368,17 @@ def test_convert_complex_svg() -> None:
def test_reserved_keyword_attributes() -> None:
actual = html2htpy('', shorthand_id_class=False, import_mode="no")
- expected = 'img(class_="foo",del_="x")'
+ expected = 'img(class_="foo", del_="x")'
+
+ assert actual == expected
+
+
+def test_dict_attributes() -> None:
+ actual = html2htpy(
+ '',
+ shorthand_id_class=False,
+ import_mode="no",
+ )
+ expected = 'img(src="bar.gif", {"@a-b": "c", "@d": True})'
assert actual == expected