Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add utilities to convert from html to htpy #26

Merged
merged 35 commits into from
Jun 13, 2024
Merged
Show file tree
Hide file tree
Changes from 6 commits
Commits
Show all changes
35 commits
Select commit Hold shift + click to select a range
22ef548
Add utilities to convert from html to htpy
OleJoik Jun 10, 2024
1c7335e
Refactor cli and python interface into html2htpy
OleJoik Jun 11, 2024
698b4bf
Minor refactor, use iterable instead of loop
OleJoik Jun 11, 2024
b1c5aeb
Refactor tests (hardcoded expected, no formatting)
OleJoik Jun 11, 2024
3cf0827
Undo unintentional formatting of __init__.py
OleJoik Jun 11, 2024
98f283f
String content escaping
OleJoik Jun 11, 2024
c4b71a0
simplify cli app, input from file or stdin
OleJoik Jun 11, 2024
158215b
Remove the import of html2htpy in init file
OleJoik Jun 11, 2024
bebe80e
Update import of html2htpy in test
OleJoik Jun 11, 2024
1f93e2e
Bugfix: accept jinja style templates w/period
OleJoik Jun 11, 2024
d1efa4b
Docs for html2htpy
OleJoik Jun 11, 2024
731c97e
Removing select.select when reading stdin
OleJoik Jun 12, 2024
c60dd1e
Update Formatting, -f auto/ruff/black/none from path
OleJoik Jun 12, 2024
f91345f
Default shorthand syntax, --explicit -> kwargs id, class_
OleJoik Jun 12, 2024
50c893f
Refactor, more descriptive function name
OleJoik Jun 12, 2024
b6bd44f
fix failing test
OleJoik Jun 12, 2024
8784204
Avoid intermediate var in _get_formatter, immediate returns
OleJoik Jun 12, 2024
5d963a9
html -> HTML, HTPY -> htpy, python -> Python
OleJoik Jun 12, 2024
02af0f6
Get rid of __name__ == "__main__" in html2htpy
OleJoik Jun 12, 2024
32b5ca6
Adding black as dev dependency
OleJoik Jun 12, 2024
d278063
Updates to docs after changes to --shorthand flag
OleJoik Jun 12, 2024
9bc3b20
Update tests for html2htpy
OleJoik Jun 12, 2024
a5f799e
Ruff lint --fix and ruff format
OleJoik Jun 12, 2024
241e7ca
mypy lint
OleJoik Jun 12, 2024
26b94fa
Remove 'Self', use 'Any'. Python 3.10 compatible
OleJoik Jun 12, 2024
18355e6
Ruff formatting
OleJoik Jun 12, 2024
7c00ffa
-i: Flag to include import htpy elements in output
OleJoik Jun 12, 2024
b59ea12
Remove "expected formatting" from test
OleJoik Jun 12, 2024
be15515
Bugfix: correct handling of void elements without / in endtag
OleJoik Jun 12, 2024
0727a71
Additional import options: notably --imports=h
OleJoik Jun 12, 2024
5ac1e7a
--no-shorthand instead of --explicit options
OleJoik Jun 12, 2024
b0944d3
Fix minor outdated info in docs
OleJoik Jun 12, 2024
2f2cabc
Fix typo in docs
OleJoik Jun 12, 2024
543b80d
Another typo in docs
OleJoik Jun 12, 2024
d79f42e
Remove rogue print()
OleJoik Jun 12, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions htpy/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@
from markupsafe import Markup as _Markup
from markupsafe import escape as _escape

from .html2htpy import *

OleJoik marked this conversation as resolved.
Show resolved Hide resolved
BaseElementSelf = TypeVar("BaseElementSelf", bound="BaseElement")
ElementSelf = TypeVar("ElementSelf", bound="Element")

Expand Down
296 changes: 296 additions & 0 deletions htpy/html2htpy.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,296 @@
import re
import argparse
from dataclasses import dataclass
from typing import Self
from html.parser import HTMLParser

__all__ = ["html2htpy"]


class Tag:
def __init__(
self,
type: str,
attrs: list[tuple[str, str | None]],
parent: Self | None = None,
):
self.type = type
self.attrs = attrs
self.parent = parent
self.children: list[Self | str] = []

def serialize(self, shorthand_id_class: bool = False):
_type = self.type
if "-" in _type:
_type = _type.replace("-", "_")

_positional_attrs: dict[str, str | None] = {}
_attrs = ""
_kwattrs: list[tuple[str, str | None]] = []

for a in self.attrs:
key = a[0]
if key == "class":
if shorthand_id_class:
_positional_attrs[key] = a[1]
else:
_kwattrs.append(a)

elif key == "id":
if shorthand_id_class:
_positional_attrs[key] = a[1]
else:
_kwattrs.append(a)
else:
_kwattrs.append(a)

if _positional_attrs or _kwattrs:
_attrs += "("

if _positional_attrs:
arg0 = ""
if "id" in _positional_attrs:
if _positional_attrs["id"] == None:
raise Exception("Id attribute cannot be none")

arg0 += "#" + _positional_attrs["id"]

if "class" in _positional_attrs:
if _positional_attrs["class"] == None:
raise Exception("Class attribute cannot be none")

classes = ".".join(_positional_attrs["class"].split(" "))
arg0 += "." + classes

_attrs += '"' + arg0 + '",'

if _kwattrs:
for a in _kwattrs:
key = a[0]
if "-" in key:
key = key.replace("-", "_")

if key == "class":
key = "class_"
elif key == "for":
key = "for_"

val = a[1]
if not val:
_attrs += f"{key}=True,"

else:
_attrs += f'{key}="{val}",'

if _positional_attrs or _kwattrs:
_attrs = _attrs[:-1] + ")"

_children: str = ""
if self.children:
_children += "["
for c in self.children:
if isinstance(c, Tag):
_children += c.serialize(shorthand_id_class=shorthand_id_class)
else:
_children += str(c)

_children += ","

_children = _children[:-1] + "]"

return f"{_type}{_attrs}{_children}"


class HTPYParser(HTMLParser):
def __init__(self):
self._collected: list[Tag | str] = []
self._current: Tag | None = None
super().__init__()

def handle_starttag(self, tag: str, attrs: list[tuple[str, str | None]]):
t = Tag(tag, attrs, parent=self._current)

if not self._current:
self._collected.append(t)
else:
self._current.children.append(t)

self._current = t

def handle_startendtag(self, tag: str, attrs: list[tuple[str, str | None]]):
t = Tag(tag, attrs, parent=self._current)

if not self._current:
self._collected.append(t)
else:
self._current.children.append(t)

def handle_endtag(self, tag: str):
if not self._current:
raise Exception(
f"Error parsing html: Closing tag {tag} when not inside any other tag"
)

if not self._current.type == tag:
raise Exception(
f"Error parsing html: Closing tag {tag} does not match the currently open tag ({self._current.type})"
)

self._current = self._current.parent

def handle_data(self, data: str):
if not data.isspace():
stringified_data = _convert_data_to_string(data)

if self._current:
self._current.children.append(stringified_data)
else:
self._collected.append(stringified_data)

def serialize_python(self, shorthand_id_class: bool = False, format: bool = False):
o = ""

if len(self._collected) == 1:
o += _serialize(self._collected[0], shorthand_id_class)

else:
o += "["
for t in self._collected:
o += _serialize(t, shorthand_id_class) + ","
o = o[:-1] + "]"

if format:
try:
import black
except:
raise Exception(
"Cannot import formatter. Please ensure black is installed."
)

return black.format_str(
o, mode=black.FileMode(line_length=80, magic_trailing_comma=False)
)
else:
return o


def html2htpy(html: str, shorthand_id_class: bool = False, format: bool = False):
parser = HTPYParser()
parser.feed(html)

return parser.serialize_python(shorthand_id_class, format)


def _convert_data_to_string(data: str):
_data = str(data)

# escape unescaped dblquote: " -> \"
_data = re.compile(r'(?<![\\])"').sub('\\"', _data)

template_string_pattern = re.compile(r"\{\{\s*(\w+)\s*\}\}")

has_jinja_pattern = re.search(template_string_pattern, _data)
if has_jinja_pattern:
# regex replaces these 3 cases:
# {{ var }} -> { var }
# { -> {{
# } -> }}
template_string_replace_pattern = re.compile(
r"(\{\{\s*(\w+)\s*\}\}|(?<![\{]){(?![\{])|(?<![\}])}(?![\}]))"
)

def replacer(match: re.Match[str]):
captured = match.group(1)

if captured.startswith("{{"):
return captured[1:-1]

if captured == "{":
return "{{"

return "}}"

_data = template_string_replace_pattern.sub(replacer, _data)
_data = 'f"' + _data + '"'
else:
_data = '"' + _data + '"'

return _data


def _serialize(el: Tag | str, shorthand_id_class: bool):
if isinstance(el, Tag):
return el.serialize(shorthand_id_class=shorthand_id_class)
else:
return str(el)


@dataclass
class ConvertArgs:
shorthand: bool
format: bool


def main():
parser = argparse.ArgumentParser(prog="html2htpy")

parser.add_argument(
"-s",
"--shorthand",
help="Use shorthand syntax for class and id attributes",
action="store_true",
)
parser.add_argument(
"-f",
"--format",
help="Format output code (requires black installed)",
action="store_true",
)

def _convert_html(args: ConvertArgs):
convert_html_cli(args.shorthand, args.format)

parser.set_defaults(func=_convert_html)

args = parser.parse_args()

args.func(args)


def convert_html_cli(shorthand_id_class: bool, format: bool):
import time

print("")
print(f"HTML to HTPY converter")
print(f"selected options: ")
print(f" format: {format}")
print(f" shorthand id class: {shorthand_id_class}")
print("\n>>>>>>>>>>>>>>>>>>")
print(">>> paste html >>>")
print(">>>>>>>>>>>>>>>>>>\n")

collected_text = ""
input_starttime = None

try:
while True:
user_input = input()
if not input_starttime:
input_starttime = time.time()

collected_text += user_input

if input_starttime + 0.1 < time.time():
break

output = html2htpy(collected_text, shorthand_id_class, format)
print("\n##############################################")
print("### serialized and formatted python (htpy) ###")
print("##############################################\n")
print(output)
except KeyboardInterrupt:
print("\nInterrupted")


if __name__ == "__main__":
main()
OleJoik marked this conversation as resolved.
Show resolved Hide resolved
6 changes: 6 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,9 @@ optional-dependencies.dev = [
"django-stubs",
"jinja2",
]
optional-dependencies.extras = [
"black"
]
optional-dependencies.docs = [
"mkdocs-material==9.5.12",
]
Expand All @@ -41,6 +44,9 @@ Repository = "https://github.com/pelme/htpy"
Documentation = "https://htpy.dev"
Issues = "https://github.com/pelme/htpy/issues"

[project.scripts]
html2htpy = "htpy.html2htpy:main"

[build-system]
requires = ["flit_core >=3.2,<4"]
build-backend = "flit_core.buildapi"
Expand Down
Loading
Loading