diff --git a/htpy/html2htpy.py b/htpy/html2htpy.py index abea239..3bcc3de 100644 --- a/htpy/html2htpy.py +++ b/htpy/html2htpy.py @@ -1,3 +1,5 @@ +from __future__ import annotations + import argparse import keyword import re @@ -6,7 +8,7 @@ import sys from abc import ABC, abstractmethod from html.parser import HTMLParser -from typing import Any, Literal +from typing import Literal __all__ = ["html2htpy"] @@ -28,86 +30,136 @@ ] -class Tag: - def __init__( - self, - type: str, - attrs: list[tuple[str, str | None]], - parent: Any | None = None, - ): - self.html_type = type - self.python_type = type - if "-" in self.python_type: - self.python_type = self.python_type.replace("-", "_") +def _quote(x: str) -> str: + if '"' in x: + return f"'{x}'" - self.attrs = attrs - self.parent = parent - self.children: list[Any | str] = [] + return f'"{x}"' - def serialize(self, shorthand_id_class: bool, use_h_prefix: bool) -> str: - _positional_attrs: dict[str, str | None] = {} - _attrs = "" - _kwattrs: list[tuple[str, str | None]] = [] - for key, value in self.attrs: - if key in ("id", "class") and shorthand_id_class: - _positional_attrs[key] = value - else: - _kwattrs.append((key, value)) +def _format_value(value: str | None) -> str: + if value is None: + return "True" + + return _quote(value) - if _positional_attrs or _kwattrs: - _attrs += "(" - if _positional_attrs: - arg0 = "" - if "id" in _positional_attrs: - if _positional_attrs["id"] is None: - raise Exception("Id attribute cannot be none") +def _format_id_class_shorthand_attrs(id_: str, class_: str) -> str: + classes = class_.split(" ") if class_ else [] + result = (f"#{id_}" if id_ else "") + (("." + ".".join(classes)) if classes else "") - arg0 += "#" + _positional_attrs["id"] + if result: + return f'"{result}"' - if "class" in _positional_attrs: - if _positional_attrs["class"] is None: - raise Exception("Class attribute cannot be none") + return "" - classes = ".".join(_positional_attrs["class"].split(" ")) - arg0 += "." + classes - _attrs += '"' + arg0 + '",' +def _format_keyword_attrs(attrs: dict[str, str | None]) -> str: + if not attrs: + return "" - for key, value in _kwattrs: - if "-" in key: - key = key.replace("-", "_") + return ", ".join(f"{key}={_format_value(value)}" for key, value in attrs.items()) - if keyword.iskeyword(key): - key += "_" - if not value: - _attrs += f"{key}=True," +def _format_dict_attrs(attrs: dict[str, str | None]) -> str: + if not attrs: + return "" + return ( + "{" + + ", ".join(f"{_quote(key)}: {_format_value(value)}" for key, value in attrs.items()) + + "}" + ) + + +def _format_attrs(attrs: dict[str, str | None], shorthand_id_class: bool) -> str: + keyword_attrs: dict[str, str | None] = {} + dict_attrs: dict[str, str | None] = {} + + shorthand_id_class_str = ( + _format_id_class_shorthand_attrs(attrs.pop("id", "") or "", attrs.pop("class", "") or "") + if shorthand_id_class + else "" + ) + + for key, value in attrs.items(): + potential_keyword_key = key.replace("-", "_") + if potential_keyword_key.isidentifier(): + if keyword.iskeyword(potential_keyword_key): + keyword_attrs[potential_keyword_key + "_"] = value else: - _attrs += f'{key}="{value}",' + keyword_attrs[potential_keyword_key] = value + else: + dict_attrs[key] = value + + _attrs = ", ".join( + x + for x in [ + shorthand_id_class_str, + _format_keyword_attrs(keyword_attrs), + _format_dict_attrs(dict_attrs), + ] + if x + ) - if _positional_attrs or _kwattrs: - _attrs = _attrs[:-1] + ")" + if not _attrs: + return "" - _children: str = "" - if self.children: - _children += "[" - for c in self.children: - if isinstance(c, Tag): - _children += c.serialize(shorthand_id_class, use_h_prefix) - else: - _children += str(c) + return f"({_attrs})" - _children += "," - _children = _children[:-1] + "]" +def _format_element(python_element_name: str, use_h_prefix: bool) -> str: + if use_h_prefix: + return f"h.{python_element_name}" + return python_element_name - if use_h_prefix: - return f"h.{self.python_type}{_attrs}{_children}" - return f"{self.python_type}{_attrs}{_children}" +def _format_child(child: Tag | str, *, shorthand_id_class: bool, use_h_prefix: bool) -> str: + if isinstance(child, Tag): + return child.serialize(shorthand_id_class=shorthand_id_class, use_h_prefix=use_h_prefix) + else: + return str(child) + + +def _format_children( + children: list[Tag | str], *, shorthand_id_class: bool, use_u_prefix: bool +) -> str: + if not children: + return "" + return ( + "[" + + ", ".join( + _format_child(child, shorthand_id_class=shorthand_id_class, use_h_prefix=use_u_prefix) + for child in children + ) + + "]" + ) + + +class Tag: + def __init__( + self, + html_tag: str, + attrs: dict[str, str | None], + parent: Tag | None, + ): + self.html_tag = html_tag + self.attrs = attrs + self.children: list[Tag | str] = [] + self.parent = parent + + @property + def python_element_name(self) -> str: + return self.html_tag.replace("-", "_") + + def serialize(self, *, shorthand_id_class: bool, use_h_prefix: bool) -> str: + return ( + _format_element(self.python_element_name, use_h_prefix) + + _format_attrs(dict(self.attrs), shorthand_id_class) + + _format_children( + self.children, shorthand_id_class=shorthand_id_class, use_u_prefix=use_h_prefix + ) + ) class Formatter(ABC): @@ -143,7 +195,7 @@ def __init__(self) -> None: super().__init__() def handle_starttag(self, tag: str, attrs: list[tuple[str, str | None]]) -> None: - t = Tag(tag, attrs, parent=self._current) + t = Tag(tag, dict(attrs), parent=self._current) if not self._current: self._collected.append(t) @@ -154,7 +206,7 @@ def handle_starttag(self, tag: str, attrs: list[tuple[str, str | None]]) -> None self._current = t def handle_startendtag(self, tag: str, attrs: list[tuple[str, str | None]]) -> None: - t = Tag(tag, attrs, parent=self._current) + t = Tag(tag, dict(attrs), parent=self._current) if not self._current: self._collected.append(t) @@ -165,10 +217,10 @@ def handle_endtag(self, tag: str) -> None: if not self._current: raise Exception(f"Error parsing html: Closing tag {tag} when not inside any other tag") - if not self._current.html_type == tag: + if not self._current.html_tag == tag: raise Exception( f"Error parsing html: Closing tag {tag} does not match the " - f"currently open tag ({self._current.html_type})" + f"currently open tag ({self._current.html_tag})" ) self._current = self._current.parent @@ -198,12 +250,12 @@ def serialize_python( def _tags_from_children(parent: Tag) -> None: for c in parent.children: if isinstance(c, Tag): - unique_tags.add(c.python_type) + unique_tags.add(c.python_element_name) _tags_from_children(c) for t in self._collected: if isinstance(t, Tag): - unique_tags.add(t.python_type) + unique_tags.add(t.python_element_name) _tags_from_children(t) sorted_tags = list(unique_tags) @@ -291,7 +343,7 @@ def replacer(match: re.Match[str]) -> str: def _serialize(el: Tag | str, shorthand_id_class: bool, use_h_prefix: bool) -> str: if isinstance(el, Tag): - return el.serialize(shorthand_id_class, use_h_prefix) + return el.serialize(shorthand_id_class=shorthand_id_class, use_h_prefix=use_h_prefix) else: return str(el) diff --git a/tests/test_html2htpy.py b/tests/test_html2htpy.py index 6c3762b..b810ec9 100644 --- a/tests/test_html2htpy.py +++ b/tests/test_html2htpy.py @@ -26,7 +26,7 @@ def test_convert_explicit_id_class_syntas() -> None: """ actual = html2htpy(input, shorthand_id_class=False, import_mode="no") - expected = 'div(id="div-id",class_="some-class other-class")[p["This is a paragraph."]]' + expected = 'div(id="div-id", class_="some-class other-class")[p["This is a paragraph."]]' assert actual == expected @@ -44,8 +44,8 @@ def test_convert_nested_element_without_formatting() -> None: expected = ( "div[" - 'p["This is a ",span["nested"]," element."],' - 'p["Another ",a(href="#")["nested ",strong["tag"]],"."]' + 'p["This is a ", span["nested"], " element."], ' + 'p["Another ", a(href="#")["nested ", strong["tag"]], "."]' "]" ) @@ -81,8 +81,8 @@ def test_convert_nested_element___import_mode_yes() -> None: assert actual == ( "from htpy import a, div, p, span, strong\n" "div[" - 'p["This is a ",span["nested"]," element."],' - 'p["Another ",a(href="#")["nested ",strong["tag"]],"."]' + 'p["This is a ", span["nested"], " element."], ' + 'p["Another ", a(href="#")["nested ", strong["tag"]], "."]' "]" ) @@ -92,8 +92,8 @@ def test_convert_nested_element___import_mode_h() -> None: assert actual == ( "import htpy as h\n" "h.div[" - 'h.p["This is a ",h.span["nested"]," element."],' - 'h.p["Another ",h.a(href="#")["nested ",h.strong["tag"]],"."]' + 'h.p["This is a ", h.span["nested"], " element."], ' + 'h.p["Another ", h.a(href="#")["nested ", h.strong["tag"]], "."]' "]" ) @@ -116,13 +116,13 @@ def test_convert_self_closing_tags() -> None: actual = html2htpy(input, import_mode="no") - assert actual == '[img(src="image.jpg",alt="An image"),br,input(type="text")]' + assert actual == '[img(src="image.jpg", alt="An image"),br,input(type="text")]' def test_convert_attribute_with_special_characters() -> None: input = """A <test> & 'image'""" actual = html2htpy(input, import_mode="no") - assert actual == """img(src="path/to/image.jpg",alt="A & 'image'")""" + assert actual == """img(src="path/to/image.jpg", alt="A & 'image'")""" def test_convert_ignores_comments() -> None: @@ -131,7 +131,7 @@ def test_convert_ignores_comments() -> None:
Content inside
""" actual = html2htpy(input, import_mode="no") - assert actual == 'div["Content "," inside"]' + assert actual == 'div["Content ", " inside"]' def test_convert_special_characters() -> None: @@ -225,7 +225,7 @@ def test_convert_html_doctype() -> None: """ actual = html2htpy(input, import_mode="no") - expected = """html[head[title["Test Document"]],body[h1["Header"],p["Paragraph"]]]""" + expected = """html[head[title["Test Document"]], body[h1["Header"], p["Paragraph"]]]""" assert actual == expected @@ -255,7 +255,7 @@ def test_convert_void_elements() -> None: """ actual = html2htpy(input, import_mode="no") - assert actual == 'div[div[input(type="text")],div[input(type="text")]]' + assert actual == 'div[div[input(type="text")], div[input(type="text")]]' def test_convert_custom_tag() -> None: @@ -287,7 +287,7 @@ def test_convert_attributes_without_values() -> None: """ actual = html2htpy(input, import_mode="no") - assert actual == """[input(type="checkbox",checked=True),option(selected=True)["Option"]]""" + assert actual == """[input(type="checkbox", checked=True),option(selected=True)["Option"]]""" def test_convert_complex_section() -> None: @@ -307,7 +307,7 @@ def test_convert_complex_section() -> None: 'section(class_="hero is-fullheight is-link")[' 'div(class_="hero-body")[' 'div(class_="container")[' - 'p(class_="subtitle is-3 is-spaced")["Welcome"],' + 'p(class_="subtitle is-3 is-spaced")["Welcome"], ' 'p(class_="title is-1 is-spaced")[f"Student code: {student_code}"]' "]" "]" @@ -368,6 +368,17 @@ def test_convert_complex_svg() -> None: def test_reserved_keyword_attributes() -> None: actual = html2htpy('', shorthand_id_class=False, import_mode="no") - expected = 'img(class_="foo",del_="x")' + expected = 'img(class_="foo", del_="x")' + + assert actual == expected + + +def test_dict_attributes() -> None: + actual = html2htpy( + '', + shorthand_id_class=False, + import_mode="no", + ) + expected = 'img(src="bar.gif", {"@a-b": "c", "@d": True})' assert actual == expected