From 22ef54843ff0c99760b27aee2f1cea5b23636471 Mon Sep 17 00:00:00 2001 From: Ole-Jakob Olsen Date: Tue, 11 Jun 2024 00:17:41 +0200 Subject: [PATCH 01/35] Add utilities to convert from html to htpy --- htpy/cli/cli.py | 79 +++++++++++ htpy/utils/__init__.py | 1 + htpy/utils/html_to_htpy.py | 208 ++++++++++++++++++++++++++++ pyproject.toml | 6 + tests/test_convert_html.py | 271 +++++++++++++++++++++++++++++++++++++ 5 files changed, 565 insertions(+) create mode 100644 htpy/cli/cli.py create mode 100644 htpy/utils/__init__.py create mode 100644 htpy/utils/html_to_htpy.py create mode 100644 tests/test_convert_html.py diff --git a/htpy/cli/cli.py b/htpy/cli/cli.py new file mode 100644 index 0000000..d6f7dc5 --- /dev/null +++ b/htpy/cli/cli.py @@ -0,0 +1,79 @@ +import argparse +from dataclasses import dataclass + +from htpy.utils import html_to_htpy + + +@dataclass +class ConvertArgs: + shorthand: bool + format: bool + + +def main(): + global_parser = argparse.ArgumentParser(prog="htpy") + subparsers = global_parser.add_subparsers(title="commands", help="commands") + + convert_parser = subparsers.add_parser( + "convert", help="convert html to python (htpy)" + ) + convert_parser.add_argument( + "-s", + "--shorthand", + help="Use shorthand syntax for class and id attributes", + action="store_true", + ) + convert_parser.add_argument( + "-f", + "--format", + help="Format output code (requires black installed)", + action="store_true", + ) + + def _convert_html(args: ConvertArgs): + convert_html_cli(args.shorthand, args.format) + + convert_parser.set_defaults(func=_convert_html) + + args = global_parser.parse_args() + + args.func(args) + + +if __name__ == "__main__": + main() + + +def convert_html_cli(shorthand_id_class: bool, format: bool): + import time + + print("") + print(f"HTML to HTPY converter") + print(f"selected options: ") + print(f" format: {format}") + print(f" shorthand id class: {shorthand_id_class}") + print("\n>>>>>>>>>>>>>>>>>>") + print(">>> paste html >>>") + print(">>>>>>>>>>>>>>>>>>\n") + + collected_text = "" + input_starttime = None + + try: + while True: + user_input = input() + if not input_starttime: + input_starttime = time.time() + + collected_text += user_input + + if input_starttime + 0.1 < time.time(): + break + + output = html_to_htpy(collected_text, shorthand_id_class, format) + print("\n##############################################") + print("### serialized and formatted python (htpy) ###") + print("##############################################\n") + print(output) + except KeyboardInterrupt: + print("\nInterrupted") diff --git a/htpy/utils/__init__.py b/htpy/utils/__init__.py new file mode 100644 index 0000000..2a107d0 --- /dev/null +++ b/htpy/utils/__init__.py @@ -0,0 +1 @@ +from htpy.utils.html_to_htpy import * diff --git a/htpy/utils/html_to_htpy.py b/htpy/utils/html_to_htpy.py new file mode 100644 index 0000000..aa79789 --- /dev/null +++ b/htpy/utils/html_to_htpy.py @@ -0,0 +1,208 @@ +from html.parser import HTMLParser +import re +from typing import Self + +__all__ = ["html_to_htpy"] + + +class Tag: + def __init__( + self, + type: str, + attrs: list[tuple[str, str | None]], + parent: Self | None = None, + ): + self.type = type + self.attrs = attrs + self.parent = parent + self.children: list[Self | str] = [] + + def serialize(self, shorthand_id_class: bool = False): + _type = self.type + if "-" in _type: + _type = _type.replace("-", "_") + + _positional_attrs: dict[str, str | None] = {} + _attrs = "" + _kwattrs: list[tuple[str, str | None]] = [] + + for i in range(len(self.attrs)): + a = self.attrs[i] + key = a[0] + if key == "class": + if shorthand_id_class: + _positional_attrs[key] = self.attrs[i][1] + else: + _kwattrs.append(a) + + elif key == "id": + if shorthand_id_class: + _positional_attrs[key] = self.attrs[i][1] + else: + _kwattrs.append(a) + else: + _kwattrs.append(a) + + if _positional_attrs or _kwattrs: + _attrs += "(" + + if _positional_attrs: + arg0 = "" + if "id" in _positional_attrs: + if _positional_attrs["id"] == None: + raise Exception("Id attribute cannot be none") + + arg0 += "#" + _positional_attrs["id"] + + if "class" in _positional_attrs: + if _positional_attrs["class"] == None: + raise Exception("Class attribute cannot be none") + + classes = ".".join(_positional_attrs["class"].split(" ")) + arg0 += "." + classes + + _attrs += '"' + arg0 + '",' + + if _kwattrs: + for a in _kwattrs: + key = a[0] + if "-" in key: + key = key.replace("-", "_") + + if key == "class": + key = "class_" + elif key == "for": + key = "for_" + + val = a[1] + if not val: + _attrs += f"{key}=True," + + else: + _attrs += f'{key}="{val}",' + + if _positional_attrs or _kwattrs: + _attrs = _attrs[:-1] + ")" + + _children: str = "" + if self.children: + _children += "[" + for c in self.children: + if isinstance(c, Tag): + _children += c.serialize(shorthand_id_class=shorthand_id_class) + else: + _children += str(c) + + _children += "," + + _children = _children[:-1] + "]" + + return f"{_type}{_attrs}{_children}" + + +class HTPYParser(HTMLParser): + def __init__(self): + self._collected: list[Tag | str] = [] + self._current: Tag | None = None + super().__init__() + + def handle_starttag(self, tag: str, attrs: list[tuple[str, str | None]]): + t = Tag(tag, attrs, parent=self._current) + + if not self._current: + self._collected.append(t) + else: + self._current.children.append(t) + + self._current = t + + def handle_startendtag(self, tag: str, attrs: list[tuple[str, str | None]]): + t = Tag(tag, attrs, parent=self._current) + + if not self._current: + self._collected.append(t) + else: + self._current.children.append(t) + + def handle_endtag(self, tag: str): + if not self._current: + raise Exception( + f"Error parsing html: Closing tag {tag} when not inside any other tag" + ) + + if not self._current.type == tag: + raise Exception( + f"Error parsing html: Closing tag {tag} does not match the currently open tag ({self._current.type})" + ) + + self._current = self._current.parent + + def handle_data(self, data: str): + if not data.isspace(): + stringified_data = _convert_data_to_string(data) + + if self._current: + self._current.children.append(stringified_data) + else: + self._collected.append(stringified_data) + + def serialize_python(self, shorthand_id_class: bool = False, format: bool = False): + o = "" + + if len(self._collected) == 1: + o += _serialize(self._collected[0], shorthand_id_class) + + else: + o += "[" + for t in self._collected: + o += _serialize(t, shorthand_id_class) + "," + o = o[:-1] + "]" + + if format: + try: + import black + except: + raise Exception( + "Cannot import formatter. Please ensure black is installed." + ) + + return black.format_str( + o, mode=black.FileMode(line_length=80, magic_trailing_comma=False) + ) + else: + return o + + +def html_to_htpy(html: str, shorthand_id_class: bool = False, format: bool = False): + parser = HTPYParser() + parser.feed(html) + + return parser.serialize_python(shorthand_id_class, format) + + +def _convert_data_to_string(data: str): + _data = str(data) + escaped_text = _data.replace('"', '\\"') + + pattern = re.compile(r"\{\{\s*(\w+)\s*\}\}") + + has_jinja_pattern = re.search(pattern, _data) + if has_jinja_pattern: + + def replacer(match: re.Match[str]): + var_name = match.group(1) + return f"{{{var_name}}}" + + _data = pattern.sub(replacer, escaped_text) + _data = 'f"' + _data + '"' + else: + _data = '"' + _data + '"' + + return _data + + +def _serialize(el: Tag | str, shorthand_id_class: bool): + if isinstance(el, Tag): + return el.serialize(shorthand_id_class=shorthand_id_class) + else: + return str(el) diff --git a/pyproject.toml b/pyproject.toml index 890873a..9dd6c0d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -31,6 +31,9 @@ optional-dependencies.dev = [ "django-stubs", "jinja2", ] +optional-dependencies.extras = [ + "black" +] optional-dependencies.docs = [ "mkdocs-material==9.5.12", ] @@ -41,6 +44,9 @@ Repository = "https://github.com/pelme/htpy" Documentation = "https://htpy.dev" Issues = "https://github.com/pelme/htpy/issues" +[project.scripts] +htpy = "htpy.cli.cli:main" + [build-system] requires = ["flit_core >=3.2,<4"] build-backend = "flit_core.buildapi" diff --git a/tests/test_convert_html.py b/tests/test_convert_html.py new file mode 100644 index 0000000..6bfc9a4 --- /dev/null +++ b/tests/test_convert_html.py @@ -0,0 +1,271 @@ +import black +import pytest +from htpy.utils import html_to_htpy + + +def test_convert_shorthand_id_and_class(): + input = """ +
+

This is a paragraph.

+
+ """ + + actual = html_to_htpy(input, shorthand_id_class=True, format=True) + expected = _format( + """ + div("#div-id.some-class.other-class")[ + p["This is a paragraph."] + ] + """ + ) + + assert actual == expected + +def test_convert_nested_element(): + input = """ +
+

This is a nested element.

+

Another nested tag.

+
+ """ + + actual = html_to_htpy(input, format=True) + expected = _format( + """ + div[ + p["This is a ", span["nested"], " element."], + p["Another ", a(href="#")["nested ", strong["tag"]], "."] + ] + """ + ) + + assert actual == expected + +def test_convert_self_closing_tags(): + input = """ + An image +
+ + """ + + actual = html_to_htpy(input) + + assert actual == '[img(src="image.jpg",alt="An image"),br,input(type="text")]' + + +def test_convert_attribute_with_special_characters(): + input = """A <test> & 'image'""" + actual = html_to_htpy(input) + assert actual == """img(src="path/to/image.jpg",alt="A & 'image'")""" + + +def test_convert_ignores_comments(): + input = """ + +
Content inside
+ """ + actual = html_to_htpy(input) + assert actual == 'div["Content "," inside"]' + + +def test_convert_special_characters(): + input = """ +

Special characters: & < > " ' ©

+ """ + + actual = html_to_htpy(input) + assert actual == """p["Special characters: & < > " ' ©"]""" + + +def test_convert_script_style_tags(): + input = """ + + + """ + + actual = html_to_htpy(input, format=True) + assert actual == _format( + """[ + script(type="text/javascript")[ + "alert('This is a script');" + ], + style["body { background-color: #fff; }"], + ]""" + ) + + +def test_convert_html_doctype(): + input = """ + + + + Test Document + + +

Header

+

Paragraph

+ + + """ + + actual = html_to_htpy(input) + expected = ( + """html[head[title["Test Document"]],body[h1["Header"],p["Paragraph"]]]""" + ) + + assert actual == expected + + +def test_convert_empty_elements(): + input = """ +
+

+ + """ + + actual = html_to_htpy(input) + assert actual == "[div,p,span]" + + +def test_convert_custom_tag(): + input = """ + Custom content + """ + + actual = html_to_htpy(input) + assert actual == """custom_element(attribute="value")["Custom content"]""" + + +def test_convert_malformed_html(): + input = """ +
+

Paragraph without closing tag +

Another div

+
+ """ + + with pytest.raises(Exception) as e: + html_to_htpy(input) + + assert "Closing tag p does not match the currently open tag (div)" in str(e.value) + + +def test_convert_attributes_without_values(): + input = """ + + + """ + actual = html_to_htpy(input) + assert ( + actual + == """[input(type="checkbox",checked=True),option(selected=True)["Option"]]""" + ) + + +def test_convert_section_regular(): + input = """ + + """ + + actual = html_to_htpy(input, shorthand_id_class=False, format=True) + expected = _format( + """ + section(class_="hero is-fullheight is-link")[ + div(class_="hero-body")[ + div(class_="container")[ + p(class_="subtitle is-3 is-spaced")["Welcome"], + p(class_="title is-1 is-spaced")[f"Student code: {student_code}"] + ] + ] + ] + """ + ) + + assert actual == expected + +def test_convert_section_shorthand_id_class(): + input = """ + + """ + + actual = html_to_htpy(input, shorthand_id_class=True, format=True) + + assert actual == _format( + """ + section(".hero.is-fullheight.is-link")[ + div(".hero-body")[ + div(".container")[ + p(".subtitle.is-3.is-spaced")["Welcome"], + p(".title.is-1.is-spaced")[f"Student code: {student_code}"], + ] + ] + ] + """ + ) + + +def test_convert_nested_element_without_formatting(): + input = """ +
+

This is a nested element.

+

Another nested tag.

+
+ """ + + actual = html_to_htpy(input, format=False) + + expected = 'div[p["This is a ",span["nested"]," element."],p["Another ",a(href="#")["nested ",strong["tag"]],"."]]' + + assert actual == expected + + +def test_convert_html_to_htpy_svg(): + input = """ + + + + """ + + actual_output = html_to_htpy(input, format=True) + + expected_output = _format( + """ + svg( + xmlns="http://www.w3.org/2000/svg", + fill="none", + viewbox="0 0 24 24", + stroke_width="1.5", + stroke="currentColor", + class_="w-6 h-6" + )[ + path( + stroke_linecap="round", + stroke_linejoin="round", + d="m16.862 4.487 1.687-1.688a1.875 1.875 0 1 1 2.652 2.652L10.582 16.07a4.5 4.5 0 0 1-1.897 1.13L6 18l.8-2.685a4.5 4.5 0 0 1 1.13-1.897l8.932-8.931Zm0 0L19.5 7.125M18 14v4.75A2.25 2.25 0 0 1 15.75 21H5.25A2.25 2.25 0 0 1 3 18.75V8.25A2.25 2.25 0 0 1 5.25 6H10" + ) + ] + """ + ) + + assert expected_output == actual_output + + +def _format(s: str): + return black.format_str( + s, + mode=black.FileMode(line_length=80, magic_trailing_comma=False), + ) From 1c7335eb481af4a2c030909993755384f20a2e0b Mon Sep 17 00:00:00 2001 From: Ole-Jakob Olsen Date: Tue, 11 Jun 2024 17:42:38 +0200 Subject: [PATCH 02/35] Refactor cli and python interface into html2htpy --- htpy/__init__.py | 15 +++- htpy/cli/cli.py | 79 ------------------- htpy/{utils/html_to_htpy.py => html2htpy.py} | 79 ++++++++++++++++++- htpy/utils/__init__.py | 1 - pyproject.toml | 2 +- ...test_convert_html.py => test_html2htpy.py} | 37 +++++---- 6 files changed, 109 insertions(+), 104 deletions(-) delete mode 100644 htpy/cli/cli.py rename htpy/{utils/html_to_htpy.py => html2htpy.py} (75%) delete mode 100644 htpy/utils/__init__.py rename tests/{test_convert_html.py => test_html2htpy.py} (90%) diff --git a/htpy/__init__.py b/htpy/__init__.py index 5dc4d48..09812e1 100644 --- a/htpy/__init__.py +++ b/htpy/__init__.py @@ -10,6 +10,8 @@ from markupsafe import Markup as _Markup from markupsafe import escape as _escape +from .html2htpy import * + BaseElementSelf = TypeVar("BaseElementSelf", bound="BaseElement") ElementSelf = TypeVar("ElementSelf", bound="Element") @@ -95,7 +97,9 @@ def _generate_attrs(raw_attrs: dict[str, Attribute]) -> Iterable[tuple[str, Attr def _attrs_string(attrs: dict[str, Attribute]) -> str: - result = " ".join(k if v is True else f'{k}="{v}"' for k, v in _generate_attrs(attrs)) + result = " ".join( + k if v is True else f'{k}="{v}"' for k, v in _generate_attrs(attrs) + ) if not result: return "" @@ -147,7 +151,10 @@ def __str__(self) -> _Markup: @overload def __call__( - self: BaseElementSelf, id_class: str, attrs: dict[str, Attribute], **kwargs: Attribute + self: BaseElementSelf, + id_class: str, + attrs: dict[str, Attribute], + **kwargs: Attribute, ) -> BaseElementSelf: ... @overload def __call__( @@ -223,7 +230,9 @@ def __html__(self) -> str: ... _ClassNamesDict: TypeAlias = dict[str, bool] _ClassNames: TypeAlias = Iterable[str | None | bool | _ClassNamesDict] | _ClassNamesDict -Node: TypeAlias = None | str | BaseElement | _HasHtml | Iterable["Node"] | Callable[[], "Node"] +Node: TypeAlias = ( + None | str | BaseElement | _HasHtml | Iterable["Node"] | Callable[[], "Node"] +) Attribute: TypeAlias = None | bool | str | _HasHtml | _ClassNames diff --git a/htpy/cli/cli.py b/htpy/cli/cli.py deleted file mode 100644 index d6f7dc5..0000000 --- a/htpy/cli/cli.py +++ /dev/null @@ -1,79 +0,0 @@ -import argparse -from dataclasses import dataclass - -from htpy.utils import html_to_htpy - - -@dataclass -class ConvertArgs: - shorthand: bool - format: bool - - -def main(): - global_parser = argparse.ArgumentParser(prog="htpy") - subparsers = global_parser.add_subparsers(title="commands", help="commands") - - convert_parser = subparsers.add_parser( - "convert", help="convert html to python (htpy)" - ) - convert_parser.add_argument( - "-s", - "--shorthand", - help="Use shorthand syntax for class and id attributes", - action="store_true", - ) - convert_parser.add_argument( - "-f", - "--format", - help="Format output code (requires black installed)", - action="store_true", - ) - - def _convert_html(args: ConvertArgs): - convert_html_cli(args.shorthand, args.format) - - convert_parser.set_defaults(func=_convert_html) - - args = global_parser.parse_args() - - args.func(args) - - -if __name__ == "__main__": - main() - - -def convert_html_cli(shorthand_id_class: bool, format: bool): - import time - - print("") - print(f"HTML to HTPY converter") - print(f"selected options: ") - print(f" format: {format}") - print(f" shorthand id class: {shorthand_id_class}") - print("\n>>>>>>>>>>>>>>>>>>") - print(">>> paste html >>>") - print(">>>>>>>>>>>>>>>>>>\n") - - collected_text = "" - input_starttime = None - - try: - while True: - user_input = input() - if not input_starttime: - input_starttime = time.time() - - collected_text += user_input - - if input_starttime + 0.1 < time.time(): - break - - output = html_to_htpy(collected_text, shorthand_id_class, format) - print("\n##############################################") - print("### serialized and formatted python (htpy) ###") - print("##############################################\n") - print(output) - except KeyboardInterrupt: - print("\nInterrupted") diff --git a/htpy/utils/html_to_htpy.py b/htpy/html2htpy.py similarity index 75% rename from htpy/utils/html_to_htpy.py rename to htpy/html2htpy.py index aa79789..a0439f8 100644 --- a/htpy/utils/html_to_htpy.py +++ b/htpy/html2htpy.py @@ -1,8 +1,10 @@ -from html.parser import HTMLParser import re +import argparse +from dataclasses import dataclass from typing import Self +from html.parser import HTMLParser -__all__ = ["html_to_htpy"] +__all__ = ["html2htpy"] class Tag: @@ -173,7 +175,7 @@ def serialize_python(self, shorthand_id_class: bool = False, format: bool = Fals return o -def html_to_htpy(html: str, shorthand_id_class: bool = False, format: bool = False): +def html2htpy(html: str, shorthand_id_class: bool = False, format: bool = False): parser = HTPYParser() parser.feed(html) @@ -206,3 +208,74 @@ def _serialize(el: Tag | str, shorthand_id_class: bool): return el.serialize(shorthand_id_class=shorthand_id_class) else: return str(el) + + +@dataclass +class ConvertArgs: + shorthand: bool + format: bool + + +def main(): + parser = argparse.ArgumentParser(prog="html2htpy") + + parser.add_argument( + "-s", + "--shorthand", + help="Use shorthand syntax for class and id attributes", + action="store_true", + ) + parser.add_argument( + "-f", + "--format", + help="Format output code (requires black installed)", + action="store_true", + ) + + def _convert_html(args: ConvertArgs): + convert_html_cli(args.shorthand, args.format) + + parser.set_defaults(func=_convert_html) + + args = parser.parse_args() + + args.func(args) + + +def convert_html_cli(shorthand_id_class: bool, format: bool): + import time + + print("") + print(f"HTML to HTPY converter") + print(f"selected options: ") + print(f" format: {format}") + print(f" shorthand id class: {shorthand_id_class}") + print("\n>>>>>>>>>>>>>>>>>>") + print(">>> paste html >>>") + print(">>>>>>>>>>>>>>>>>>\n") + + collected_text = "" + input_starttime = None + + try: + while True: + user_input = input() + if not input_starttime: + input_starttime = time.time() + + collected_text += user_input + + if input_starttime + 0.1 < time.time(): + break + + output = html2htpy(collected_text, shorthand_id_class, format) + print("\n##############################################") + print("### serialized and formatted python (htpy) ###") + print("##############################################\n") + print(output) + except KeyboardInterrupt: + print("\nInterrupted") + + +if __name__ == "__main__": + main() diff --git a/htpy/utils/__init__.py b/htpy/utils/__init__.py deleted file mode 100644 index 2a107d0..0000000 --- a/htpy/utils/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from htpy.utils.html_to_htpy import * diff --git a/pyproject.toml b/pyproject.toml index 9dd6c0d..390693b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -45,7 +45,7 @@ Documentation = "https://htpy.dev" Issues = "https://github.com/pelme/htpy/issues" [project.scripts] -htpy = "htpy.cli.cli:main" +html2htpy = "htpy.html2htpy:main" [build-system] requires = ["flit_core >=3.2,<4"] diff --git a/tests/test_convert_html.py b/tests/test_html2htpy.py similarity index 90% rename from tests/test_convert_html.py rename to tests/test_html2htpy.py index 6bfc9a4..ee02905 100644 --- a/tests/test_convert_html.py +++ b/tests/test_html2htpy.py @@ -1,6 +1,6 @@ import black import pytest -from htpy.utils import html_to_htpy +from htpy import html2htpy def test_convert_shorthand_id_and_class(): @@ -10,7 +10,7 @@ def test_convert_shorthand_id_and_class():
""" - actual = html_to_htpy(input, shorthand_id_class=True, format=True) + actual = html2htpy(input, shorthand_id_class=True, format=True) expected = _format( """ div("#div-id.some-class.other-class")[ @@ -21,6 +21,7 @@ def test_convert_shorthand_id_and_class(): assert actual == expected + def test_convert_nested_element(): input = """
@@ -29,7 +30,7 @@ def test_convert_nested_element():
""" - actual = html_to_htpy(input, format=True) + actual = html2htpy(input, format=True) expected = _format( """ div[ @@ -41,6 +42,7 @@ def test_convert_nested_element(): assert actual == expected + def test_convert_self_closing_tags(): input = """ An image @@ -48,14 +50,14 @@ def test_convert_self_closing_tags(): """ - actual = html_to_htpy(input) + actual = html2htpy(input) assert actual == '[img(src="image.jpg",alt="An image"),br,input(type="text")]' def test_convert_attribute_with_special_characters(): input = """A <test> & 'image'""" - actual = html_to_htpy(input) + actual = html2htpy(input) assert actual == """img(src="path/to/image.jpg",alt="A & 'image'")""" @@ -64,7 +66,7 @@ def test_convert_ignores_comments():
Content inside
""" - actual = html_to_htpy(input) + actual = html2htpy(input) assert actual == 'div["Content "," inside"]' @@ -73,7 +75,7 @@ def test_convert_special_characters():

Special characters: & < > " ' ©

""" - actual = html_to_htpy(input) + actual = html2htpy(input) assert actual == """p["Special characters: & < > " ' ©"]""" @@ -83,7 +85,7 @@ def test_convert_script_style_tags(): """ - actual = html_to_htpy(input, format=True) + actual = html2htpy(input, format=True) assert actual == _format( """[ script(type="text/javascript")[ @@ -108,7 +110,7 @@ def test_convert_html_doctype(): """ - actual = html_to_htpy(input) + actual = html2htpy(input) expected = ( """html[head[title["Test Document"]],body[h1["Header"],p["Paragraph"]]]""" ) @@ -123,7 +125,7 @@ def test_convert_empty_elements(): """ - actual = html_to_htpy(input) + actual = html2htpy(input) assert actual == "[div,p,span]" @@ -132,7 +134,7 @@ def test_convert_custom_tag(): Custom content """ - actual = html_to_htpy(input) + actual = html2htpy(input) assert actual == """custom_element(attribute="value")["Custom content"]""" @@ -145,7 +147,7 @@ def test_convert_malformed_html(): """ with pytest.raises(Exception) as e: - html_to_htpy(input) + html2htpy(input) assert "Closing tag p does not match the currently open tag (div)" in str(e.value) @@ -155,7 +157,7 @@ def test_convert_attributes_without_values(): """ - actual = html_to_htpy(input) + actual = html2htpy(input) assert ( actual == """[input(type="checkbox",checked=True),option(selected=True)["Option"]]""" @@ -174,7 +176,7 @@ def test_convert_section_regular(): """ - actual = html_to_htpy(input, shorthand_id_class=False, format=True) + actual = html2htpy(input, shorthand_id_class=False, format=True) expected = _format( """ section(class_="hero is-fullheight is-link")[ @@ -190,6 +192,7 @@ def test_convert_section_regular(): assert actual == expected + def test_convert_section_shorthand_id_class(): input = """ """ - actual = html_to_htpy(input, shorthand_id_class=True, format=True) + actual = html2htpy(input, shorthand_id_class=True, format=True) assert actual == _format( """ @@ -226,7 +229,7 @@ def test_convert_nested_element_without_formatting(): """ - actual = html_to_htpy(input, format=False) + actual = html2htpy(input, format=False) expected = 'div[p["This is a ",span["nested"]," element."],p["Another ",a(href="#")["nested ",strong["tag"]],"."]]' @@ -240,7 +243,7 @@ def test_convert_html_to_htpy_svg(): """ - actual_output = html_to_htpy(input, format=True) + actual_output = html2htpy(input, format=True) expected_output = _format( """ From 698b4bffe084f998390e2405385690451606616a Mon Sep 17 00:00:00 2001 From: Ole-Jakob Olsen Date: Tue, 11 Jun 2024 17:47:12 +0200 Subject: [PATCH 03/35] Minor refactor, use iterable instead of loop --- htpy/html2htpy.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/htpy/html2htpy.py b/htpy/html2htpy.py index a0439f8..024683d 100644 --- a/htpy/html2htpy.py +++ b/htpy/html2htpy.py @@ -28,18 +28,17 @@ def serialize(self, shorthand_id_class: bool = False): _attrs = "" _kwattrs: list[tuple[str, str | None]] = [] - for i in range(len(self.attrs)): - a = self.attrs[i] + for a in self.attrs: key = a[0] if key == "class": if shorthand_id_class: - _positional_attrs[key] = self.attrs[i][1] + _positional_attrs[key] = a[1] else: _kwattrs.append(a) elif key == "id": if shorthand_id_class: - _positional_attrs[key] = self.attrs[i][1] + _positional_attrs[key] = a[1] else: _kwattrs.append(a) else: From b1c5aeb1f736badaea1ac766836e1b28400332de Mon Sep 17 00:00:00 2001 From: Ole-Jakob Olsen Date: Tue, 11 Jun 2024 18:29:28 +0200 Subject: [PATCH 04/35] Refactor tests (hardcoded expected, no formatting) --- tests/test_html2htpy.py | 61 ++++++++++++++++------------------------- 1 file changed, 24 insertions(+), 37 deletions(-) diff --git a/tests/test_html2htpy.py b/tests/test_html2htpy.py index ee02905..d84ef43 100644 --- a/tests/test_html2htpy.py +++ b/tests/test_html2htpy.py @@ -1,4 +1,4 @@ -import black +import textwrap import pytest from htpy import html2htpy @@ -11,31 +11,25 @@ def test_convert_shorthand_id_and_class(): """ actual = html2htpy(input, shorthand_id_class=True, format=True) - expected = _format( - """ - div("#div-id.some-class.other-class")[ - p["This is a paragraph."] - ] - """ - ) + expected = 'div("#div-id.some-class.other-class")[p["This is a paragraph."]]\n' assert actual == expected def test_convert_nested_element(): input = """ -
-

This is a nested element.

-

Another nested tag.

-
+
+

This is a nested element.

+

Another nested tag.

+
""" actual = html2htpy(input, format=True) - expected = _format( - """ + expected = textwrap.dedent( + """\ div[ p["This is a ", span["nested"], " element."], - p["Another ", a(href="#")["nested ", strong["tag"]], "."] + p["Another ", a(href="#")["nested ", strong["tag"]], "."], ] """ ) @@ -86,13 +80,13 @@ def test_convert_script_style_tags(): """ actual = html2htpy(input, format=True) - assert actual == _format( - """[ - script(type="text/javascript")[ - "alert('This is a script');" - ], + assert actual == textwrap.dedent( + """\ + [ + script(type="text/javascript")["alert('This is a script');"], style["body { background-color: #fff; }"], - ]""" + ] + """ ) @@ -177,13 +171,13 @@ def test_convert_section_regular(): """ actual = html2htpy(input, shorthand_id_class=False, format=True) - expected = _format( - """ + expected = textwrap.dedent( + """\ section(class_="hero is-fullheight is-link")[ div(class_="hero-body")[ div(class_="container")[ p(class_="subtitle is-3 is-spaced")["Welcome"], - p(class_="title is-1 is-spaced")[f"Student code: {student_code}"] + p(class_="title is-1 is-spaced")[f"Student code: {student_code}"], ] ] ] @@ -207,8 +201,8 @@ def test_convert_section_shorthand_id_class(): actual = html2htpy(input, shorthand_id_class=True, format=True) - assert actual == _format( - """ + assert actual == textwrap.dedent( + """\ section(".hero.is-fullheight.is-link")[ div(".hero-body")[ div(".container")[ @@ -245,30 +239,23 @@ def test_convert_html_to_htpy_svg(): actual_output = html2htpy(input, format=True) - expected_output = _format( - """ + expected_output = textwrap.dedent( + """\ svg( xmlns="http://www.w3.org/2000/svg", fill="none", viewbox="0 0 24 24", stroke_width="1.5", stroke="currentColor", - class_="w-6 h-6" + class_="w-6 h-6", )[ path( stroke_linecap="round", stroke_linejoin="round", - d="m16.862 4.487 1.687-1.688a1.875 1.875 0 1 1 2.652 2.652L10.582 16.07a4.5 4.5 0 0 1-1.897 1.13L6 18l.8-2.685a4.5 4.5 0 0 1 1.13-1.897l8.932-8.931Zm0 0L19.5 7.125M18 14v4.75A2.25 2.25 0 0 1 15.75 21H5.25A2.25 2.25 0 0 1 3 18.75V8.25A2.25 2.25 0 0 1 5.25 6H10" + d="m16.862 4.487 1.687-1.688a1.875 1.875 0 1 1 2.652 2.652L10.582 16.07a4.5 4.5 0 0 1-1.897 1.13L6 18l.8-2.685a4.5 4.5 0 0 1 1.13-1.897l8.932-8.931Zm0 0L19.5 7.125M18 14v4.75A2.25 2.25 0 0 1 15.75 21H5.25A2.25 2.25 0 0 1 3 18.75V8.25A2.25 2.25 0 0 1 5.25 6H10", ) ] """ ) assert expected_output == actual_output - - -def _format(s: str): - return black.format_str( - s, - mode=black.FileMode(line_length=80, magic_trailing_comma=False), - ) From 3cf0827b253632e3a00c8e7733025dc5816622ad Mon Sep 17 00:00:00 2001 From: Ole-Jakob Olsen Date: Tue, 11 Jun 2024 18:36:23 +0200 Subject: [PATCH 05/35] Undo unintentional formatting of __init__.py --- htpy/__init__.py | 13 +++---------- 1 file changed, 3 insertions(+), 10 deletions(-) diff --git a/htpy/__init__.py b/htpy/__init__.py index 09812e1..810e5d6 100644 --- a/htpy/__init__.py +++ b/htpy/__init__.py @@ -97,9 +97,7 @@ def _generate_attrs(raw_attrs: dict[str, Attribute]) -> Iterable[tuple[str, Attr def _attrs_string(attrs: dict[str, Attribute]) -> str: - result = " ".join( - k if v is True else f'{k}="{v}"' for k, v in _generate_attrs(attrs) - ) + result = " ".join(k if v is True else f'{k}="{v}"' for k, v in _generate_attrs(attrs)) if not result: return "" @@ -151,10 +149,7 @@ def __str__(self) -> _Markup: @overload def __call__( - self: BaseElementSelf, - id_class: str, - attrs: dict[str, Attribute], - **kwargs: Attribute, + self: BaseElementSelf, id_class: str, attrs: dict[str, Attribute], **kwargs: Attribute ) -> BaseElementSelf: ... @overload def __call__( @@ -230,9 +225,7 @@ def __html__(self) -> str: ... _ClassNamesDict: TypeAlias = dict[str, bool] _ClassNames: TypeAlias = Iterable[str | None | bool | _ClassNamesDict] | _ClassNamesDict -Node: TypeAlias = ( - None | str | BaseElement | _HasHtml | Iterable["Node"] | Callable[[], "Node"] -) +Node: TypeAlias = None | str | BaseElement | _HasHtml | Iterable["Node"] | Callable[[], "Node"] Attribute: TypeAlias = None | bool | str | _HasHtml | _ClassNames From 98f283ff478e6a31a39ff692a494cc6b5fc2d8e0 Mon Sep 17 00:00:00 2001 From: Ole-Jakob Olsen Date: Tue, 11 Jun 2024 20:19:11 +0200 Subject: [PATCH 06/35] String content escaping --- htpy/html2htpy.py | 28 ++++++++++++++++++++++------ tests/test_html2htpy.py | 13 ++++++++++++- 2 files changed, 34 insertions(+), 7 deletions(-) diff --git a/htpy/html2htpy.py b/htpy/html2htpy.py index 024683d..58e7f42 100644 --- a/htpy/html2htpy.py +++ b/htpy/html2htpy.py @@ -183,18 +183,34 @@ def html2htpy(html: str, shorthand_id_class: bool = False, format: bool = False) def _convert_data_to_string(data: str): _data = str(data) - escaped_text = _data.replace('"', '\\"') - pattern = re.compile(r"\{\{\s*(\w+)\s*\}\}") + # escape unescaped dblquote: " -> \" + _data = re.compile(r'(? { var } + # { -> {{ + # } -> }} + template_string_replace_pattern = re.compile( + r"(\{\{\s*(\w+)\s*\}\}|(? " ' ©"]""" + assert actual == 'p["Special characters: & < > \\" \' ©"]' + + +def test_convert_f_string_escaping(): + input = """ +

{{ variable }} is "a" { paragraph }.

+ """ + + actual = html2htpy(input, format=False) + expected = r'p[f"{ variable } is \"a\" {{ paragraph }}."]' + + assert actual == expected def test_convert_script_style_tags(): From c4b71a01c0e629ccc1e7977d60674c111367c9db Mon Sep 17 00:00:00 2001 From: Ole-Jakob Olsen Date: Tue, 11 Jun 2024 21:53:03 +0200 Subject: [PATCH 07/35] simplify cli app, input from file or stdin --- htpy/html2htpy.py | 62 +++++++++++++++++++---------------------------- 1 file changed, 25 insertions(+), 37 deletions(-) diff --git a/htpy/html2htpy.py b/htpy/html2htpy.py index 58e7f42..79e4034 100644 --- a/htpy/html2htpy.py +++ b/htpy/html2htpy.py @@ -1,5 +1,7 @@ +import sys import re import argparse +import select from dataclasses import dataclass from typing import Self from html.parser import HTMLParser @@ -246,50 +248,36 @@ def main(): help="Format output code (requires black installed)", action="store_true", ) - - def _convert_html(args: ConvertArgs): - convert_html_cli(args.shorthand, args.format) - - parser.set_defaults(func=_convert_html) + parser.add_argument( + "input", + type=argparse.FileType("r"), + nargs="?", + default=sys.stdin, + help="input html from file or stdin", + ) args = parser.parse_args() - args.func(args) - - -def convert_html_cli(shorthand_id_class: bool, format: bool): - import time - - print("") - print(f"HTML to HTPY converter") - print(f"selected options: ") - print(f" format: {format}") - print(f" shorthand id class: {shorthand_id_class}") - print("\n>>>>>>>>>>>>>>>>>>") - print(">>> paste html >>>") - print(">>>>>>>>>>>>>>>>>>\n") - - collected_text = "" - input_starttime = None + if args.input == sys.stdin and select.select([sys.stdin], [], [], 0.1)[0]: + input = args.input.read() + elif args.input != sys.stdin: + input = args.input.read() + else: + _printerr( + "No input provided. Please supply an input file or stream.", + ) + _printerr("Example usage: `cat index.html | html2htpy`") + _printerr("`html2htpy -h` for help") + sys.exit(1) - try: - while True: - user_input = input() - if not input_starttime: - input_starttime = time.time() + shorthand: bool = args.shorthand + format: bool = args.format - collected_text += user_input + print(html2htpy(input, shorthand, format)) - if input_starttime + 0.1 < time.time(): - break - output = html2htpy(collected_text, shorthand_id_class, format) - print("\n##############################################") - print("### serialized and formatted python (htpy) ###") - print("##############################################\n") - print(output) - except KeyboardInterrupt: - print("\nInterrupted") +def _printerr(value: str): + print(value, file=sys.stderr) if __name__ == "__main__": From 158215b1f8450858dd0ab50e3d958918b6d8ec5b Mon Sep 17 00:00:00 2001 From: Ole-Jakob Olsen Date: Tue, 11 Jun 2024 22:20:49 +0200 Subject: [PATCH 08/35] Remove the import of html2htpy in init file --- htpy/__init__.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/htpy/__init__.py b/htpy/__init__.py index 810e5d6..5dc4d48 100644 --- a/htpy/__init__.py +++ b/htpy/__init__.py @@ -10,8 +10,6 @@ from markupsafe import Markup as _Markup from markupsafe import escape as _escape -from .html2htpy import * - BaseElementSelf = TypeVar("BaseElementSelf", bound="BaseElement") ElementSelf = TypeVar("ElementSelf", bound="Element") From bebe80ebce7ddbbb782fcc5eec1af0238fb92d88 Mon Sep 17 00:00:00 2001 From: Ole-Jakob Olsen Date: Tue, 11 Jun 2024 23:07:46 +0200 Subject: [PATCH 09/35] Update import of html2htpy in test --- tests/test_html2htpy.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_html2htpy.py b/tests/test_html2htpy.py index 646d90b..1bfc9b3 100644 --- a/tests/test_html2htpy.py +++ b/tests/test_html2htpy.py @@ -1,6 +1,6 @@ import textwrap import pytest -from htpy import html2htpy +from htpy.html2htpy import html2htpy def test_convert_shorthand_id_and_class(): From 1f93e2ea2597db593e2797bb0cff3859be2f6505 Mon Sep 17 00:00:00 2001 From: Ole-Jakob Olsen Date: Tue, 11 Jun 2024 23:51:29 +0200 Subject: [PATCH 10/35] Bugfix: accept jinja style templates w/period --- htpy/html2htpy.py | 16 +++++++++++++--- tests/test_html2htpy.py | 39 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 52 insertions(+), 3 deletions(-) diff --git a/htpy/html2htpy.py b/htpy/html2htpy.py index 79e4034..f8cd0bc 100644 --- a/htpy/html2htpy.py +++ b/htpy/html2htpy.py @@ -186,19 +186,23 @@ def html2htpy(html: str, shorthand_id_class: bool = False, format: bool = False) def _convert_data_to_string(data: str): _data = str(data) + is_multiline = "\n" in _data + + _data = _data.replace("\n", "") + # escape unescaped dblquote: " -> \" _data = re.compile(r'(? { var } + # {{ var.xx }} -> { var.xx } # { -> {{ # } -> }} template_string_replace_pattern = re.compile( - r"(\{\{\s*(\w+)\s*\}\}|(? +

{{ heading }}

+

Welcome to our cooking site, {{ user.name }}!

+ +

Recipe of the Day: {{ recipe.name }}

+

{{ recipe.description }}

+ +

Instructions:

+
    + {% for step in recipe.steps %} +
  1. {{ step }}
  2. + {% endfor %} +
+ + """ + + actual = html2htpy(input, format=True) + expected = textwrap.dedent( + """\ + body[ + h1[f"{ heading }"], + p[f"Welcome to our cooking site, { user.name }!"], + h2[f"Recipe of the Day: { recipe.name }"], + p[f"{ recipe.description }"], + h3["Instructions:"], + ol[ + \"\"\" {% for step in recipe.steps %} \"\"\", + li[f"{ step }"], + \"\"\" {% endfor %} \"\"\", + ], + ] + """ + ) + + assert actual == expected + + def test_convert_script_style_tags(): input = """ From d1efa4b2c8f647fae74f2bd6ef8c305316dcc3ab Mon Sep 17 00:00:00 2001 From: Ole-Jakob Olsen Date: Tue, 11 Jun 2024 23:57:54 +0200 Subject: [PATCH 11/35] Docs for html2htpy --- docs/html2htpy.md | 187 ++++++++++++++++++++++++++++++++++++++++++++++ mkdocs.yml | 1 + 2 files changed, 188 insertions(+) create mode 100644 docs/html2htpy.md diff --git a/docs/html2htpy.md b/docs/html2htpy.md new file mode 100644 index 0000000..8f69bfa --- /dev/null +++ b/docs/html2htpy.md @@ -0,0 +1,187 @@ + +# Convert HTML to HTPY + +Maybe you already have a bunch of html, or templates that you would like to migrate to htpy. +We got you covered. HTPY ships with a utility command `html2htpy` that can be used to transform existing +html into python code (htpy!). + +``` +$ html2htpy -h +usage: html2htpy [-h] [-s] [-f] [input] + +positional arguments: + input input html from file or stdin + +options: + -h, --help show this help message and exit + -s, --shorthand Use shorthand syntax for class and id attributes + -f, --format Format output code (requires black installed) +``` + + +Lets say you have an existing html file: + +```html title="index.html" + + + + + + HTPY Recipes + + + + +
+

Recipe of the Day: Spaghetti Carbonara

+

This classic Italian dish is quick and easy to make.

+
+ + + + +``` + +Now, if you run the command, it outputs the corresponding python code (htpy). + +``` +$ html2htpy -f index.html +``` + +```py +html(lang="en")[ + head[ + meta(charset="UTF-8"), + meta(name="viewport", content="width=device-width, initial-scale=1.0"), + title["HTPY Recipes"], + ], + body[ + div(id="header")[ + h1["Welcome to the cooking site"], + p["Your go-to place for delicious recipes!"], + ], + div(id="recipe-of-the-day", class_="section")[ + h2[ + "Recipe of the Day: ", + span(class_="highlight")["Spaghetti Carbonara"], + ], + p["This classic Italian dish is quick and easy to make."], + ], + div(id="footer")[p["© 2024 My Cooking Site. All rights reserved."]], + ], +] +``` + +## Piping input/stdin stream + +You can also pipe input to htpy, for example `cat demo.html | html2htpy`. + +This can be combinded with other workflows in the way that you find most suitable. +For example, you might pipe from your clipboard to htpy, and optionaly direct the output to a file. + +#### Linux + +``` +xclip -o -selection clipboard | html2htpy > output.py +``` + +#### Mac + +``` +pbpaste | html2htpy > output.py +``` + +#### Windows + +``` +powershell Get-Clipboard | html2htpy > output.py +``` + + +## Formatting the output +`html2htpy` can format the output python code using `black`. It needs to be available in your python environment +when you run `html2htpy` with the `-f`/`--format` flag. You might have it in your environment already, or you can install it +as part of the htpy extras: `pip install htpy[extras]`. + +By default, output code is not formatted. + + +## Shorthand syntax + +If you prefer the htpy "shorthand" syntax for the id and class properties, you can get it by passing the `-s`/`--shorthand` flag + + +```html title="shorthand.html" + +``` + +...becomes: + +```py +$ html2htpy -f -s example.html +section(".hero.is-fullheight.is-link")[ + div(".hero-body")[ + div(".container")[ + p(".subtitle.is-3.is-spaced")["Welcome"], + ] + ] +] +``` + +## Template interpolation to f-strings + +You might have some templates laying around after using jinja or some other templating language. + +`html2htpy` will try to convert the `template {{ variables }}`... + +...to pythonic f-strings: `f"template { variables }"` + +Note that other template template syntax, such as loops `{% for x in y %}` can not be transformed at +this time, so you will often have to clean up a bit after `html2htpy` is done with its thing. + +See the example below: + +```html title="jinja.html" + +

{{ heading }}

+

Welcome to our cooking site, {{ user.name }}!

+ +

Recipe of the Day: {{ recipe.name }}

+

{{ recipe.description }}

+ +

Instructions:

+
    + {% for step in recipe.steps %} +
  1. {{ step }}
  2. + {% endfor %} +
+ +``` + +```py +$ html2htpy -f -s jinja.html +body[ + h1[f"{ heading }"], + p[f"Welcome to our cooking site, { user.name }!"], + h2[f"Recipe of the Day: { recipe.name }"], + p[f"{ recipe.description }"], + h3["Instructions:"], + ol[ + """ {% for step in recipe.steps %} """, + li[f"{ step }"], + """ {% endfor %} """, + ], +] +``` + diff --git a/mkdocs.yml b/mkdocs.yml index fd1a2f4..fb2f2ad 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -16,6 +16,7 @@ nav: - static-typing.md - django.md - streaming.md + - html2htpy.md - faq.md - references.md markdown_extensions: From 731c97e5a62f5daae00e4ebb38cd06e89ac448c4 Mon Sep 17 00:00:00 2001 From: Ole-Jakob Olsen Date: Wed, 12 Jun 2024 08:49:53 +0200 Subject: [PATCH 12/35] Removing select.select when reading stdin --- htpy/html2htpy.py | 23 ++++++++++++++--------- 1 file changed, 14 insertions(+), 9 deletions(-) diff --git a/htpy/html2htpy.py b/htpy/html2htpy.py index f8cd0bc..020f9b1 100644 --- a/htpy/html2htpy.py +++ b/htpy/html2htpy.py @@ -1,7 +1,6 @@ import sys import re import argparse -import select from dataclasses import dataclass from typing import Self from html.parser import HTMLParser @@ -268,16 +267,22 @@ def main(): args = parser.parse_args() - if args.input == sys.stdin and select.select([sys.stdin], [], [], 0.1)[0]: - input = args.input.read() - elif args.input != sys.stdin: - input = args.input.read() - else: + try: + if args.input == sys.stdin: + input = args.input.read() + elif args.input != sys.stdin: + input = args.input.read() + else: + _printerr( + "No input provided. Please supply an input file or stream.", + ) + _printerr("Example usage: `cat index.html | html2htpy`") + _printerr("`html2htpy -h` for help") + sys.exit(1) + except KeyboardInterrupt: _printerr( - "No input provided. Please supply an input file or stream.", + "\nInterrupted", ) - _printerr("Example usage: `cat index.html | html2htpy`") - _printerr("`html2htpy -h` for help") sys.exit(1) shorthand: bool = args.shorthand From c60dd1e260a2523a6fd2b6e5bfce131c9374f322 Mon Sep 17 00:00:00 2001 From: Ole-Jakob Olsen Date: Wed, 12 Jun 2024 13:05:28 +0200 Subject: [PATCH 13/35] Update Formatting, -f auto/ruff/black/none from path --- docs/html2htpy.md | 23 ++++----- htpy/html2htpy.py | 105 ++++++++++++++++++++++++++++++++-------- pyproject.toml | 3 -- tests/test_html2htpy.py | 20 ++++---- 4 files changed, 108 insertions(+), 43 deletions(-) diff --git a/docs/html2htpy.md b/docs/html2htpy.md index 8f69bfa..75f55f0 100644 --- a/docs/html2htpy.md +++ b/docs/html2htpy.md @@ -7,15 +7,16 @@ html into python code (htpy!). ``` $ html2htpy -h -usage: html2htpy [-h] [-s] [-f] [input] +usage: html2htpy [-h] [-s] [-f {auto,ruff,black,none}] [input] positional arguments: - input input html from file or stdin + input input html from file or stdin options: - -h, --help show this help message and exit - -s, --shorthand Use shorthand syntax for class and id attributes - -f, --format Format output code (requires black installed) + -h, --help show this help message and exit + -s, --shorthand Use shorthand syntax for class and id attributes + -f {auto,ruff,black,none}, --format {auto,ruff,black,none} + Select one of the following formatting options: auto, ruff, black or none ``` @@ -50,7 +51,7 @@ Lets say you have an existing html file: Now, if you run the command, it outputs the corresponding python code (htpy). ``` -$ html2htpy -f index.html +$ html2htpy index.html ``` ```py @@ -104,11 +105,11 @@ powershell Get-Clipboard | html2htpy > output.py ## Formatting the output -`html2htpy` can format the output python code using `black`. It needs to be available in your python environment -when you run `html2htpy` with the `-f`/`--format` flag. You might have it in your environment already, or you can install it -as part of the htpy extras: `pip install htpy[extras]`. +`html2htpy` can format the output python code using `black` or `ruff`. +Select the preferred formatter with the `-f`/`--format` flag. Options are `auto`, `ruff`, `black` and `none`. -By default, output code is not formatted. +By default, the selection will be `auto`, formatting if it finds a formatter on path, prefering `ruff` if it's available. +If no formatters are available on path, the output not be formatted. ## Shorthand syntax @@ -170,7 +171,7 @@ See the example below: ``` ```py -$ html2htpy -f -s jinja.html +$ html2htpy -s jinja.html body[ h1[f"{ heading }"], p[f"Welcome to our cooking site, { user.name }!"], diff --git a/htpy/html2htpy.py b/htpy/html2htpy.py index 020f9b1..cb45508 100644 --- a/htpy/html2htpy.py +++ b/htpy/html2htpy.py @@ -1,8 +1,11 @@ +from abc import ABC, abstractmethod import sys import re +import subprocess import argparse +import shutil from dataclasses import dataclass -from typing import Self +from typing import Literal, Self from html.parser import HTMLParser __all__ = ["html2htpy"] @@ -102,6 +105,32 @@ def serialize(self, shorthand_id_class: bool = False): return f"{_type}{_attrs}{_children}" +class Formatter(ABC): + @abstractmethod + def format(self, s: str) -> str: + raise NotImplementedError() + + +class BlackFormatter(Formatter): + def format(self, s: str) -> str: + result = subprocess.run( + ["black", "-q", "-"], + input=s.encode("utf8"), + stdout=subprocess.PIPE, + ) + return result.stdout.decode("utf8") + + +class RuffFormatter(Formatter): + def format(self, s: str) -> str: + result = subprocess.run( + ["ruff", "format", "-"], + input=s.encode("utf8"), + stdout=subprocess.PIPE, + ) + return result.stdout.decode("utf8") + + class HTPYParser(HTMLParser): def __init__(self): self._collected: list[Tag | str] = [] @@ -148,7 +177,9 @@ def handle_data(self, data: str): else: self._collected.append(stringified_data) - def serialize_python(self, shorthand_id_class: bool = False, format: bool = False): + def serialize_python( + self, shorthand_id_class: bool = False, formatter: Formatter | None = None + ): o = "" if len(self._collected) == 1: @@ -160,26 +191,21 @@ def serialize_python(self, shorthand_id_class: bool = False, format: bool = Fals o += _serialize(t, shorthand_id_class) + "," o = o[:-1] + "]" - if format: - try: - import black - except: - raise Exception( - "Cannot import formatter. Please ensure black is installed." - ) - - return black.format_str( - o, mode=black.FileMode(line_length=80, magic_trailing_comma=False) - ) + if formatter: + return formatter.format(o) else: return o -def html2htpy(html: str, shorthand_id_class: bool = False, format: bool = False): +def html2htpy( + html: str, + shorthand_id_class: bool = False, + formatter: Formatter | None = None, +): parser = HTPYParser() parser.feed(html) - return parser.serialize_python(shorthand_id_class, format) + return parser.serialize_python(shorthand_id_class, formatter) def _convert_data_to_string(data: str): @@ -236,6 +262,45 @@ def _serialize(el: Tag | str, shorthand_id_class: bool): return str(el) +def _get_formatter( + format: Literal["auto", "ruff", "black", "none"] +) -> Formatter | None: + formatter: Formatter | None = None + if format == "ruff": + if _is_package_installed("ruff"): + formatter = RuffFormatter() + else: + _printerr( + "Selected formatter (ruff) is not installed.", + ) + _printerr("Please install it or select another formatter.") + _printerr("`html2htpy -h` for help") + sys.exit(1) + + if format == "black": + if _is_package_installed("black"): + formatter = BlackFormatter() + else: + _printerr( + "Selected formatter (black) is not installed.", + ) + _printerr("Please install it or select another formatter.") + _printerr("`html2htpy -h` for help") + sys.exit(1) + + elif format == "auto": + if _is_package_installed("ruff"): + formatter = RuffFormatter() + elif _is_package_installed("black"): + formatter = BlackFormatter() + + return formatter + + +def _is_package_installed(package_name: str): + return shutil.which(package_name) is not None + + @dataclass class ConvertArgs: shorthand: bool @@ -254,8 +319,9 @@ def main(): parser.add_argument( "-f", "--format", - help="Format output code (requires black installed)", - action="store_true", + choices=["auto", "ruff", "black", "none"], + default="auto", + help="Select one of the following formatting options: auto, ruff, black or none", ) parser.add_argument( "input", @@ -286,9 +352,10 @@ def main(): sys.exit(1) shorthand: bool = args.shorthand - format: bool = args.format - print(html2htpy(input, shorthand, format)) + formatter = _get_formatter(args.format) + + print(html2htpy(input, shorthand, formatter)) def _printerr(value: str): diff --git a/pyproject.toml b/pyproject.toml index 390693b..6203909 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -31,9 +31,6 @@ optional-dependencies.dev = [ "django-stubs", "jinja2", ] -optional-dependencies.extras = [ - "black" -] optional-dependencies.docs = [ "mkdocs-material==9.5.12", ] diff --git a/tests/test_html2htpy.py b/tests/test_html2htpy.py index c5e8c5f..de82e72 100644 --- a/tests/test_html2htpy.py +++ b/tests/test_html2htpy.py @@ -1,6 +1,6 @@ import textwrap import pytest -from htpy.html2htpy import html2htpy +from htpy.html2htpy import BlackFormatter, html2htpy def test_convert_shorthand_id_and_class(): @@ -10,7 +10,7 @@ def test_convert_shorthand_id_and_class(): """ - actual = html2htpy(input, shorthand_id_class=True, format=True) + actual = html2htpy(input, shorthand_id_class=True, formatter=BlackFormatter()) expected = 'div("#div-id.some-class.other-class")[p["This is a paragraph."]]\n' assert actual == expected @@ -24,7 +24,7 @@ def test_convert_nested_element(): """ - actual = html2htpy(input, format=True) + actual = html2htpy(input, formatter=BlackFormatter()) expected = textwrap.dedent( """\ div[ @@ -78,7 +78,7 @@ def test_convert_f_string_escaping():

{{ variable }} is "a" { paragraph }.

""" - actual = html2htpy(input, format=False) + actual = html2htpy(input) expected = r'p[f"{ variable } is \"a\" {{ paragraph }}."]' assert actual == expected @@ -102,7 +102,7 @@ def test_convert_f_string_escaping_complex(): """ - actual = html2htpy(input, format=True) + actual = html2htpy(input, formatter=BlackFormatter()) expected = textwrap.dedent( """\ body[ @@ -129,7 +129,7 @@ def test_convert_script_style_tags(): """ - actual = html2htpy(input, format=True) + actual = html2htpy(input, formatter=BlackFormatter()) assert actual == textwrap.dedent( """\ [ @@ -220,7 +220,7 @@ def test_convert_section_regular(): """ - actual = html2htpy(input, shorthand_id_class=False, format=True) + actual = html2htpy(input, shorthand_id_class=False, formatter=BlackFormatter()) expected = textwrap.dedent( """\ section(class_="hero is-fullheight is-link")[ @@ -249,7 +249,7 @@ def test_convert_section_shorthand_id_class(): """ - actual = html2htpy(input, shorthand_id_class=True, format=True) + actual = html2htpy(input, shorthand_id_class=True, formatter=BlackFormatter()) assert actual == textwrap.dedent( """\ @@ -273,7 +273,7 @@ def test_convert_nested_element_without_formatting(): """ - actual = html2htpy(input, format=False) + actual = html2htpy(input, formatter=None) expected = 'div[p["This is a ",span["nested"]," element."],p["Another ",a(href="#")["nested ",strong["tag"]],"."]]' @@ -287,7 +287,7 @@ def test_convert_html_to_htpy_svg(): """ - actual_output = html2htpy(input, format=True) + actual_output = html2htpy(input, formatter=BlackFormatter()) expected_output = textwrap.dedent( """\ From f91345fa4e4865b19265e9d18d6299c7a6c47f50 Mon Sep 17 00:00:00 2001 From: Ole-Jakob Olsen Date: Wed, 12 Jun 2024 18:28:31 +0200 Subject: [PATCH 14/35] Default shorthand syntax, --explicit -> kwargs id, class_ --- htpy/html2htpy.py | 17 +++++------------ 1 file changed, 5 insertions(+), 12 deletions(-) diff --git a/htpy/html2htpy.py b/htpy/html2htpy.py index cb45508..a098178 100644 --- a/htpy/html2htpy.py +++ b/htpy/html2htpy.py @@ -4,7 +4,6 @@ import subprocess import argparse import shutil -from dataclasses import dataclass from typing import Literal, Self from html.parser import HTMLParser @@ -199,7 +198,7 @@ def serialize_python( def html2htpy( html: str, - shorthand_id_class: bool = False, + shorthand_id_class: bool = True, formatter: Formatter | None = None, ): parser = HTPYParser() @@ -301,19 +300,13 @@ def _is_package_installed(package_name: str): return shutil.which(package_name) is not None -@dataclass -class ConvertArgs: - shorthand: bool - format: bool - - def main(): parser = argparse.ArgumentParser(prog="html2htpy") parser.add_argument( - "-s", - "--shorthand", - help="Use shorthand syntax for class and id attributes", + "-e", + "--explicit", + help="Use explicit `id` and `class_` kwargs instead of the shorthand #id.class syntax", action="store_true", ) parser.add_argument( @@ -351,7 +344,7 @@ def main(): ) sys.exit(1) - shorthand: bool = args.shorthand + shorthand: bool = False if args.explicit else True formatter = _get_formatter(args.format) From 50c893f660627eae362a4051ff1dea498c8c3109 Mon Sep 17 00:00:00 2001 From: Ole-Jakob Olsen Date: Wed, 12 Jun 2024 18:30:39 +0200 Subject: [PATCH 15/35] Refactor, more descriptive function name --- htpy/html2htpy.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/htpy/html2htpy.py b/htpy/html2htpy.py index a098178..366ec9f 100644 --- a/htpy/html2htpy.py +++ b/htpy/html2htpy.py @@ -266,7 +266,7 @@ def _get_formatter( ) -> Formatter | None: formatter: Formatter | None = None if format == "ruff": - if _is_package_installed("ruff"): + if _is_command_available("ruff"): formatter = RuffFormatter() else: _printerr( @@ -277,7 +277,7 @@ def _get_formatter( sys.exit(1) if format == "black": - if _is_package_installed("black"): + if _is_command_available("black"): formatter = BlackFormatter() else: _printerr( @@ -288,16 +288,16 @@ def _get_formatter( sys.exit(1) elif format == "auto": - if _is_package_installed("ruff"): + if _is_command_available("ruff"): formatter = RuffFormatter() - elif _is_package_installed("black"): + elif _is_command_available("black"): formatter = BlackFormatter() return formatter -def _is_package_installed(package_name: str): - return shutil.which(package_name) is not None +def _is_command_available(command: str): + return shutil.which(command) is not None def main(): From b6bd44ff6d3e0a2364baacafc63c5893e77e5071 Mon Sep 17 00:00:00 2001 From: Ole-Jakob Olsen Date: Wed, 12 Jun 2024 18:52:00 +0200 Subject: [PATCH 16/35] fix failing test --- tests/test_html2htpy.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_html2htpy.py b/tests/test_html2htpy.py index de82e72..afda8a7 100644 --- a/tests/test_html2htpy.py +++ b/tests/test_html2htpy.py @@ -292,12 +292,12 @@ def test_convert_html_to_htpy_svg(): expected_output = textwrap.dedent( """\ svg( + ".w-6.h-6", xmlns="http://www.w3.org/2000/svg", fill="none", viewbox="0 0 24 24", stroke_width="1.5", stroke="currentColor", - class_="w-6 h-6", )[ path( stroke_linecap="round", From 8784204ccefefbf91e3c52ee645ef634a802f6bb Mon Sep 17 00:00:00 2001 From: Ole-Jakob Olsen Date: Wed, 12 Jun 2024 19:01:26 +0200 Subject: [PATCH 17/35] Avoid intermediate var in _get_formatter, immediate returns --- htpy/html2htpy.py | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/htpy/html2htpy.py b/htpy/html2htpy.py index 366ec9f..f9ec0e0 100644 --- a/htpy/html2htpy.py +++ b/htpy/html2htpy.py @@ -264,10 +264,9 @@ def _serialize(el: Tag | str, shorthand_id_class: bool): def _get_formatter( format: Literal["auto", "ruff", "black", "none"] ) -> Formatter | None: - formatter: Formatter | None = None if format == "ruff": if _is_command_available("ruff"): - formatter = RuffFormatter() + return RuffFormatter() else: _printerr( "Selected formatter (ruff) is not installed.", @@ -278,7 +277,7 @@ def _get_formatter( if format == "black": if _is_command_available("black"): - formatter = BlackFormatter() + return BlackFormatter() else: _printerr( "Selected formatter (black) is not installed.", @@ -289,11 +288,11 @@ def _get_formatter( elif format == "auto": if _is_command_available("ruff"): - formatter = RuffFormatter() + return RuffFormatter() elif _is_command_available("black"): - formatter = BlackFormatter() + return BlackFormatter() - return formatter + return None def _is_command_available(command: str): From 5d963a97d9326f2009809b1687aacafd55b8ea05 Mon Sep 17 00:00:00 2001 From: Ole-Jakob Olsen Date: Wed, 12 Jun 2024 19:12:51 +0200 Subject: [PATCH 18/35] html -> HTML, HTPY -> htpy, python -> Python --- docs/html2htpy.md | 20 ++++++++++---------- htpy/html2htpy.py | 2 +- 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/docs/html2htpy.md b/docs/html2htpy.md index 75f55f0..ff6b330 100644 --- a/docs/html2htpy.md +++ b/docs/html2htpy.md @@ -1,16 +1,16 @@ -# Convert HTML to HTPY +# Convert HTML to htpy code -Maybe you already have a bunch of html, or templates that you would like to migrate to htpy. -We got you covered. HTPY ships with a utility command `html2htpy` that can be used to transform existing -html into python code (htpy!). +Maybe you already have a bunch of HTML, or templates that you would like to migrate to htpy. +We got you covered. The utility command `html2htpy` ships with htpy`, and can be used to transform existing +html into Python code (htpy!). ``` $ html2htpy -h usage: html2htpy [-h] [-s] [-f {auto,ruff,black,none}] [input] positional arguments: - input input html from file or stdin + input input HTML from file or stdin options: -h, --help show this help message and exit @@ -20,7 +20,7 @@ options: ``` -Lets say you have an existing html file: +Lets say you have an existing HTML file: ```html title="index.html" @@ -28,7 +28,7 @@ Lets say you have an existing html file: - HTPY Recipes + htpy Recipes