Skip to content

Commit

Permalink
Refactor html2htpy, handle dict attributes.
Browse files Browse the repository at this point in the history
Closes #28.
  • Loading branch information
pelme committed Jun 25, 2024
1 parent 9f03963 commit 8ebc8e7
Show file tree
Hide file tree
Showing 2 changed files with 146 additions and 83 deletions.
188 changes: 120 additions & 68 deletions htpy/html2htpy.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
from __future__ import annotations

import argparse
import keyword
import re
Expand All @@ -6,7 +8,7 @@
import sys
from abc import ABC, abstractmethod
from html.parser import HTMLParser
from typing import Any, Literal
from typing import Literal

__all__ = ["html2htpy"]

Expand All @@ -28,86 +30,136 @@
]


class Tag:
def __init__(
self,
type: str,
attrs: list[tuple[str, str | None]],
parent: Any | None = None,
):
self.html_type = type
self.python_type = type
if "-" in self.python_type:
self.python_type = self.python_type.replace("-", "_")
def _quote(x: str) -> str:
if '"' in x:
return f"'{x}'"

self.attrs = attrs
self.parent = parent
self.children: list[Any | str] = []
return f'"{x}"'

def serialize(self, shorthand_id_class: bool, use_h_prefix: bool) -> str:
_positional_attrs: dict[str, str | None] = {}
_attrs = ""
_kwattrs: list[tuple[str, str | None]] = []

for key, value in self.attrs:
if key in ("id", "class") and shorthand_id_class:
_positional_attrs[key] = value
else:
_kwattrs.append((key, value))
def _format_value(value: str | None) -> str:
if value is None:
return "True"

return _quote(value)

if _positional_attrs or _kwattrs:
_attrs += "("

if _positional_attrs:
arg0 = ""
if "id" in _positional_attrs:
if _positional_attrs["id"] is None:
raise Exception("Id attribute cannot be none")
def _format_id_class_shorthand_attrs(id_: str, class_: str) -> str:
classes = class_.split(" ") if class_ else []
result = (f"#{id_}" if id_ else "") + (("." + ".".join(classes)) if classes else "")

arg0 += "#" + _positional_attrs["id"]
if result:
return f'"{result}"'

if "class" in _positional_attrs:
if _positional_attrs["class"] is None:
raise Exception("Class attribute cannot be none")
return ""

classes = ".".join(_positional_attrs["class"].split(" "))
arg0 += "." + classes

_attrs += '"' + arg0 + '",'
def _format_keyword_attrs(attrs: dict[str, str | None]) -> str:
if not attrs:
return ""

for key, value in _kwattrs:
if "-" in key:
key = key.replace("-", "_")
return ", ".join(f"{key}={_format_value(value)}" for key, value in attrs.items())

if keyword.iskeyword(key):
key += "_"

if not value:
_attrs += f"{key}=True,"
def _format_dict_attrs(attrs: dict[str, str | None]) -> str:
if not attrs:
return ""

return (
"{"
+ ", ".join(f"{_quote(key)}: {_format_value(value)}" for key, value in attrs.items())
+ "}"
)


def _format_attrs(attrs: dict[str, str | None], shorthand_id_class: bool) -> str:
keyword_attrs: dict[str, str | None] = {}
dict_attrs: dict[str, str | None] = {}

shorthand_id_class_str = (
_format_id_class_shorthand_attrs(attrs.pop("id", "") or "", attrs.pop("class", "") or "")
if shorthand_id_class
else ""
)

for key, value in attrs.items():
potential_keyword_key = key.replace("-", "_")
if potential_keyword_key.isidentifier():
if keyword.iskeyword(potential_keyword_key):
keyword_attrs[potential_keyword_key + "_"] = value
else:
_attrs += f'{key}="{value}",'
keyword_attrs[potential_keyword_key] = value
else:
dict_attrs[key] = value

_attrs = ", ".join(
x
for x in [
shorthand_id_class_str,
_format_keyword_attrs(keyword_attrs),
_format_dict_attrs(dict_attrs),
]
if x
)

if _positional_attrs or _kwattrs:
_attrs = _attrs[:-1] + ")"
if not _attrs:
return ""

_children: str = ""
if self.children:
_children += "["
for c in self.children:
if isinstance(c, Tag):
_children += c.serialize(shorthand_id_class, use_h_prefix)
else:
_children += str(c)
return f"({_attrs})"

_children += ","

_children = _children[:-1] + "]"
def _format_element(python_element_name: str, use_h_prefix: bool) -> str:
if use_h_prefix:
return f"h.{python_element_name}"
return python_element_name

if use_h_prefix:
return f"h.{self.python_type}{_attrs}{_children}"

return f"{self.python_type}{_attrs}{_children}"
def _format_child(child: Tag | str, *, shorthand_id_class: bool, use_h_prefix: bool) -> str:
if isinstance(child, Tag):
return child.serialize(shorthand_id_class=shorthand_id_class, use_h_prefix=use_h_prefix)
else:
return str(child)


def _format_children(
children: list[Tag | str], *, shorthand_id_class: bool, use_u_prefix: bool
) -> str:
if not children:
return ""
return (
"["
+ ", ".join(
_format_child(child, shorthand_id_class=shorthand_id_class, use_h_prefix=use_u_prefix)
for child in children
)
+ "]"
)


class Tag:
def __init__(
self,
html_tag: str,
attrs: dict[str, str | None],
parent: Tag | None,
):
self.html_tag = html_tag
self.attrs = attrs
self.children: list[Tag | str] = []
self.parent = parent

@property
def python_element_name(self) -> str:
return self.html_tag.replace("-", "_")

def serialize(self, *, shorthand_id_class: bool, use_h_prefix: bool) -> str:
return (
_format_element(self.python_element_name, use_h_prefix)
+ _format_attrs(dict(self.attrs), shorthand_id_class)
+ _format_children(
self.children, shorthand_id_class=shorthand_id_class, use_u_prefix=use_h_prefix
)
)


class Formatter(ABC):
Expand Down Expand Up @@ -143,7 +195,7 @@ def __init__(self) -> None:
super().__init__()

def handle_starttag(self, tag: str, attrs: list[tuple[str, str | None]]) -> None:
t = Tag(tag, attrs, parent=self._current)
t = Tag(tag, dict(attrs), parent=self._current)

if not self._current:
self._collected.append(t)
Expand All @@ -154,7 +206,7 @@ def handle_starttag(self, tag: str, attrs: list[tuple[str, str | None]]) -> None
self._current = t

def handle_startendtag(self, tag: str, attrs: list[tuple[str, str | None]]) -> None:
t = Tag(tag, attrs, parent=self._current)
t = Tag(tag, dict(attrs), parent=self._current)

if not self._current:
self._collected.append(t)
Expand All @@ -165,10 +217,10 @@ def handle_endtag(self, tag: str) -> None:
if not self._current:
raise Exception(f"Error parsing html: Closing tag {tag} when not inside any other tag")

if not self._current.html_type == tag:
if not self._current.html_tag == tag:
raise Exception(
f"Error parsing html: Closing tag {tag} does not match the "
f"currently open tag ({self._current.html_type})"
f"currently open tag ({self._current.html_tag})"
)

self._current = self._current.parent
Expand Down Expand Up @@ -198,12 +250,12 @@ def serialize_python(
def _tags_from_children(parent: Tag) -> None:
for c in parent.children:
if isinstance(c, Tag):
unique_tags.add(c.python_type)
unique_tags.add(c.python_element_name)
_tags_from_children(c)

for t in self._collected:
if isinstance(t, Tag):
unique_tags.add(t.python_type)
unique_tags.add(t.python_element_name)
_tags_from_children(t)

sorted_tags = list(unique_tags)
Expand Down Expand Up @@ -291,7 +343,7 @@ def replacer(match: re.Match[str]) -> str:

def _serialize(el: Tag | str, shorthand_id_class: bool, use_h_prefix: bool) -> str:
if isinstance(el, Tag):
return el.serialize(shorthand_id_class, use_h_prefix)
return el.serialize(shorthand_id_class=shorthand_id_class, use_h_prefix=use_h_prefix)
else:
return str(el)

Expand Down
41 changes: 26 additions & 15 deletions tests/test_html2htpy.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ def test_convert_explicit_id_class_syntas() -> None:
"""

actual = html2htpy(input, shorthand_id_class=False, import_mode="no")
expected = 'div(id="div-id",class_="some-class other-class")[p["This is a paragraph."]]'
expected = 'div(id="div-id", class_="some-class other-class")[p["This is a paragraph."]]'

assert actual == expected

Expand All @@ -44,8 +44,8 @@ def test_convert_nested_element_without_formatting() -> None:

expected = (
"div["
'p["This is a ",span["nested"]," element."],'
'p["Another ",a(href="#")["nested ",strong["tag"]],"."]'
'p["This is a ", span["nested"], " element."], '
'p["Another ", a(href="#")["nested ", strong["tag"]], "."]'
"]"
)

Expand Down Expand Up @@ -81,8 +81,8 @@ def test_convert_nested_element___import_mode_yes() -> None:
assert actual == (
"from htpy import a, div, p, span, strong\n"
"div["
'p["This is a ",span["nested"]," element."],'
'p["Another ",a(href="#")["nested ",strong["tag"]],"."]'
'p["This is a ", span["nested"], " element."], '
'p["Another ", a(href="#")["nested ", strong["tag"]], "."]'
"]"
)

Expand All @@ -92,8 +92,8 @@ def test_convert_nested_element___import_mode_h() -> None:
assert actual == (
"import htpy as h\n"
"h.div["
'h.p["This is a ",h.span["nested"]," element."],'
'h.p["Another ",h.a(href="#")["nested ",h.strong["tag"]],"."]'
'h.p["This is a ", h.span["nested"], " element."], '
'h.p["Another ", h.a(href="#")["nested ", h.strong["tag"]], "."]'
"]"
)

Expand All @@ -116,13 +116,13 @@ def test_convert_self_closing_tags() -> None:

actual = html2htpy(input, import_mode="no")

assert actual == '[img(src="image.jpg",alt="An image"),br,input(type="text")]'
assert actual == '[img(src="image.jpg", alt="An image"),br,input(type="text")]'


def test_convert_attribute_with_special_characters() -> None:
input = """<img src="path/to/image.jpg" alt="A <test> & 'image'" />"""
actual = html2htpy(input, import_mode="no")
assert actual == """img(src="path/to/image.jpg",alt="A <test> & 'image'")"""
assert actual == """img(src="path/to/image.jpg", alt="A <test> & 'image'")"""


def test_convert_ignores_comments() -> None:
Expand All @@ -131,7 +131,7 @@ def test_convert_ignores_comments() -> None:
<div>Content <!-- Another comment --> inside</div>
"""
actual = html2htpy(input, import_mode="no")
assert actual == 'div["Content "," inside"]'
assert actual == 'div["Content ", " inside"]'


def test_convert_special_characters() -> None:
Expand Down Expand Up @@ -225,7 +225,7 @@ def test_convert_html_doctype() -> None:
"""

actual = html2htpy(input, import_mode="no")
expected = """html[head[title["Test Document"]],body[h1["Header"],p["Paragraph"]]]"""
expected = """html[head[title["Test Document"]], body[h1["Header"], p["Paragraph"]]]"""

assert actual == expected

Expand Down Expand Up @@ -255,7 +255,7 @@ def test_convert_void_elements() -> None:
"""

actual = html2htpy(input, import_mode="no")
assert actual == 'div[div[input(type="text")],div[input(type="text")]]'
assert actual == 'div[div[input(type="text")], div[input(type="text")]]'


def test_convert_custom_tag() -> None:
Expand Down Expand Up @@ -287,7 +287,7 @@ def test_convert_attributes_without_values() -> None:
<option selected>Option</option>
"""
actual = html2htpy(input, import_mode="no")
assert actual == """[input(type="checkbox",checked=True),option(selected=True)["Option"]]"""
assert actual == """[input(type="checkbox", checked=True),option(selected=True)["Option"]]"""


def test_convert_complex_section() -> None:
Expand All @@ -307,7 +307,7 @@ def test_convert_complex_section() -> None:
'section(class_="hero is-fullheight is-link")['
'div(class_="hero-body")['
'div(class_="container")['
'p(class_="subtitle is-3 is-spaced")["Welcome"],'
'p(class_="subtitle is-3 is-spaced")["Welcome"], '
'p(class_="title is-1 is-spaced")[f"Student code: {student_code}"]'
"]"
"]"
Expand Down Expand Up @@ -368,6 +368,17 @@ def test_convert_complex_svg() -> None:

def test_reserved_keyword_attributes() -> None:
actual = html2htpy('<img class="foo" del="x">', shorthand_id_class=False, import_mode="no")
expected = 'img(class_="foo",del_="x")'
expected = 'img(class_="foo", del_="x")'

assert actual == expected


def test_dict_attributes() -> None:
actual = html2htpy(
'<img src="bar.gif" @a-b="c" @d>',
shorthand_id_class=False,
import_mode="no",
)
expected = 'img(src="bar.gif", {"@a-b": "c", "@d": True})'

assert actual == expected

0 comments on commit 8ebc8e7

Please sign in to comment.