Skip to content

Commit

Permalink
Added some proper logic to the HTML being compressed. This fixes #1
Browse files Browse the repository at this point in the history
  • Loading branch information
mitsuhiko committed Sep 19, 2011
1 parent 5400d4c commit 3beb05a
Showing 1 changed file with 89 additions and 29 deletions.
118 changes: 89 additions & 29 deletions jinja2htmlcompress.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,62 +16,114 @@


_tag_re = re.compile(r'(?:<(/?)([a-zA-Z0-9_-]+)\s*|(>\s*))(?s)')
_ws_normalize_re = re.compile(r'[ \t\r\n]+')


class HTMLCompress(Extension):
isolated_tags = frozenset(['script', 'style', 'pre', 'textarea'])
class StreamProcessContext(object):

def __init__(self, stream):
self.stream = stream
self.token = None
self.stack = []

def fail(self, message):
raise TemplateSyntaxError(message, self.token.lineno,
self.stream.name, self.stream.filename)


def _make_dict_from_listing(listing):
rv = {}
for keys, value in listing:
for key in keys:
rv[key] = value
return rv

def isolated(self, stack):

class HTMLCompress(Extension):
isolated_elements = set(['script', 'style', 'noscript', 'textarea'])
void_elements = set(['br', 'img', 'area', 'hr', 'param', 'input',
'embed', 'col'])
block_elements = set(['div', 'p', 'form', 'ul', 'ol', 'li', 'table', 'tr',
'tbody', 'thead', 'tfoot', 'tr', 'td', 'th', 'dl',
'dt', 'dd', 'blockquote', 'h1', 'h2', 'h3', 'h4',
'h5', 'h6', 'pre'])
breaking_rules = _make_dict_from_listing([
(['p'], set(['#block'])),
(['li'], set(['li'])),
(['td', 'th'], set(['td', 'th', 'tr', 'tbody', 'thead', 'tfoot'])),
(['tr'], set(['tr', 'tbody', 'thead', 'tfoot'])),
(['thead', 'tbody', 'tfoot'], set(['thead', 'tbody', 'tfoot'])),
(['dd', 'dt'], set(['dl', 'dt', 'dd']))
])

def is_isolated(self, stack):
for tag in reversed(stack):
if tag in self.isolated_tags:
if tag in self.isolated_elements:
return True
return False

def normalize(self, token, stack, stream):
def is_breaking(self, tag, other_tag):
breaking = self.breaking_rules.get(other_tag)
return breaking and (tag in breaking or
('#block' in breaking and tag in self.block_elements))

def enter_tag(self, tag, ctx):
while ctx.stack and self.is_breaking(tag, ctx.stack[-1]):
self.leave_tag(ctx.stack[-1], ctx)
if tag not in self.void_elements:
ctx.stack.append(tag)

def leave_tag(self, tag, ctx):
if not ctx.stack:
ctx.fail('Tried to leave "%s" but something closed '
'it already' % tag)
if tag == ctx.stack[-1]:
ctx.stack.pop()
return
for idx, other_tag in enumerate(reversed(ctx.stack)):
if other_tag == tag:
for num in xrange(idx + 1):
ctx.stack.pop()
elif not self.breaking_rules.get(other_tag):
break

def normalize(self, ctx):
pos = 0
buffer = []
def write_data(value):
if not self.isolated(stack):
value = value.strip()
if not self.is_isolated(ctx.stack):
value = _ws_normalize_re.sub(' ', value.strip())
buffer.append(value)

for match in _tag_re.finditer(token.value):
for match in _tag_re.finditer(ctx.token.value):
closes, tag, sole = match.groups()
preamble = token.value[pos:match.start()]
preamble = ctx.token.value[pos:match.start()]
write_data(preamble)
if sole:
write_data(sole)
else:
buffer.append(match.group())
if closes:
if stack.pop() != tag:
raise TemplateSyntaxError('HTML has to be balanced '
'when htmlcompress extension is active',
token.lineno, stream.name, stream.filename)
else:
stack.append(tag)
(closes and self.leave_tag or self.enter_tag)(tag, ctx)
pos = match.end()

write_data(token.value[pos:])
write_data(ctx.token.value[pos:])
return u''.join(buffer)

def filter_stream(self, stream):
stack = []
ctx = StreamProcessContext(stream)
for token in stream:
if token.type != 'data':
yield token
continue
value = self.normalize(token, stack, stream)
ctx.token = token
value = self.normalize(ctx)
yield Token(token.lineno, 'data', value)


class SelectiveHTMLCompress(HTMLCompress):

def filter_stream(self, stream):
def fail(msg):
raise TemplateSyntaxError(msg, stream.current.lineno,
stream.name, stream.filename)
stack = []
ctx = StreamProcessContext(stream)
strip_depth = 0
while 1:
if stream.current.type == 'block_begin':
Expand All @@ -83,14 +135,15 @@ def fail(msg):
else:
strip_depth -= 1
if strip_depth < 0:
fail('Unexpected tag endstrip')
ctx.fail('Unexpected tag endstrip')
stream.skip()
if stream.current.type != 'block_end':
fail('expected end of block, got %s' %
describe_token(stream.current))
ctx.fail('expected end of block, got %s' %
describe_token(stream.current))
stream.skip()
if strip_depth > 0 and stream.current.type == 'data':
value = self.normalize(stream.current, stack, stream)
ctx.token = stream.current
value = self.normalize(ctx)
yield Token(stream.current.lineno, 'data', value)
else:
yield stream.current
Expand All @@ -111,7 +164,8 @@ def test():
}
</script>
<body>
<li><a href="{{ href }}">{{ title }}</a></li>
<li><a href="{{ href }}">{{ title }}</a><br>Test Foo
<li><a href="{{ href }}">{{ title }}</a><img src=test.png>
</body>
</html>
''')
Expand All @@ -122,8 +176,14 @@ def test():
Normal <span> unchanged </span> stuff
{% strip %}Stripped <span class=foo > test </span>
<a href="foo"> test </a> {{ foo }}
{% endstrip %}
Normal <stuff> again {{ foo }} </stuff>
<p>
Foo<br>Bar
Baz
<p>
Moep <span>Test</span> Moep
</p>
{% endstrip %}
''')
print tmpl.render(foo=42)

Expand Down

0 comments on commit 3beb05a

Please sign in to comment.