Skip to content

Commit

Permalink
Merge pull request #16 from marrow/feature/line-number-mapping
Browse files Browse the repository at this point in the history
Capture and preserve source line numbers.

Now ready for application in #4 and #12.
  • Loading branch information
amcgregor committed Dec 6, 2015
2 parents 7d85993 + cf1c867 commit a1a1266
Show file tree
Hide file tree
Showing 4 changed files with 148 additions and 55 deletions.
4 changes: 4 additions & 0 deletions .travis.yml
Original file line number Diff line number Diff line change
@@ -1,6 +1,10 @@
language: python
sudo: false

branches:
except:
- /^feature/.*$/

python:
- pypy
- pypy3
Expand Down
34 changes: 33 additions & 1 deletion cinje/block/module.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,27 @@
# encoding: utf-8

from ..util import py, Line
from __future__ import unicode_literals

from zlib import compress, decompress
from base64 import b64encode, b64decode
from pprint import pformat
from collections import deque

from ..util import py, Line, iterate


def red(numbers):
"""Encode the deltas to reduce entropy."""

line = 0
deltas = []

for value in numbers:
deltas.append(value - line)
line = value

return b64encode(compress(b''.join(chr(i).encode('latin1') for i in deltas))).decode('latin1')



class Module(object):
Expand Down Expand Up @@ -51,4 +72,15 @@ def __call__(self, context):
yield Line(0, '__tmpl__.extend(["' + '", "'.join(context.templates) + '"])')
context.templates = []

# Snapshot the line number mapping.
mapping = deque(context.mapping)
mapping.reverse()

yield Line(0, '')

if __debug__:
yield Line(0, '__mapping__ = [' + ','.join(str(i) for i in mapping) + ']')

yield Line(0, '__gzmapping__ = b"' + red(mapping).replace('"', '\"') + '"')

context.flag.remove('init')
146 changes: 94 additions & 52 deletions cinje/inline/text.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,47 @@
# encoding: utf-8

import ast # Tighten your belts...

from itertools import chain
from pprint import pformat

from ..util import chunk, Line, ensure_buffer
from ..util import iterate, chunk, Line, ensure_buffer


def gather(input):
"""Collect contiguous lines of text, preserving line numbers."""

line = input.next()
lead = True
buffer = []

# Gather contiguous (uninterrupted) lines of template text.
while line.kind == 'text':
value = line.line.rstrip().rstrip('\\') + ('' if line.continued else '\n')

if lead and line.stripped:
yield line.number, value
lead = False

elif not lead:
if line.stripped:
for buf in buffer:
yield buf

buffer = []
yield line.number, value

else:
buffer.append((line.number, value))

try:
line = input.next()
except StopIteration:
line = None
break

if line:
input.push(line) # Put the last line back, as it won't be a text line.


class Text(object):
Expand All @@ -17,79 +55,83 @@ def match(self, context, line):
return line.kind == 'text'

def __call__(self, context):
dirty = False # Used for conditional flushing.
prefix = '' # Prepend to the source line emitted.
suffix = '' # Append to the source line emitted.
input = context.input

line = input.next()
buffer = []

# Make sure we have a buffer to write to.
for i in ensure_buffer(context):
yield i

# Gather contiguous (uninterrupted) lines of template text.
while line.kind == 'text' or ( line.kind == 'comment' and line.stripped.startswith('#{') ):
buffer.append(line.line.rstrip().rstrip('\\') + ('' if line.continued else '\n'))
try:
line = input.next()
except StopIteration:
line = None
break

if line:
input.push(line) # Put the last line back, as it won't be a text line.
lines = gather(context.input)

# Eliminate trailing blank lines.
while buffer and not buffer[-1].strip():
del buffer[-1]
input.push(Line(0, ''))
def inner_chain():
for lineno, line in lines:
for inner_chunk in chunk(line):
yield lineno, inner_chunk

text = "".join(buffer)
chunks = inner_chain()

# Track that the buffer will have content moving forward. Used for conditional flushing.
if text:
context.flag.add('dirty')

# We now have a contiguous block of templated text. Split it up into expressions and wrap as appropriate.

chunks = list(chunk(text)) # Ugh; this breaks streaming, but...
single = len(chunks) == 1

if single:
PREFIX = '__ws('
else:
yield Line(0, '__w((') # Start a call to _buffer.extend()
PREFIX = ''

for token, part in chunks:
for first, last, index, total, (lineno, (token, chunk_)) in iterate(chunks):
prefix = ''
suffix = ''
scope = context.scope + 1
dirty = True

if first and last: # Optimize the single invocation case.
prefix = '__ws('
suffix = ')'
scope = context.scope

elif first:
yield Line(lineno, '__w((')

if not last:
suffix += ','

if token == 'text':
part = pformat(
part,
chunk_ = pformat(
chunk_,
indent = 0,
width = 120 - 4 * (context.scope + (0 if single else 1)),
# compact = True Python 3 only.
).replace("\n ", "\n" + "\t" * (context.scope + (0 if single else 1))).strip()
width = 120 - 4 * scope,
).replace("\n ", "\n").strip()

if part[0] == '(' and part[-1] == ')':
part = part[1:-1]
for line in iterate(chunk_.split('\n')):
value = line.value

if line.first and prefix:
value = prefix + value

if suffix:
value += suffix

yield Line(lineno, value, scope)

if last and not first:
yield Line(lineno, '))', scope - 1) # End the call to _buffer.extend()

yield Line(0, PREFIX + part + (')' if single else ','), (context.scope + (0 if single else 1)))
continue

elif token == 'format':
if token == 'format':
# We need to split the expression defining the format string from the values to pass when formatting.
# We want to allow any Python expression, so we'll need to piggyback on Python's own parser in order
# to exploit the currently available syntax. Apologies, this is probably the scariest thing in here.
split = -1

try:
ast.parse(part)
ast.parse(chunk_)
except SyntaxError as e: # We expect this, and catch it. It'll have exploded after the first expr.
split = part.rfind(' ', 0, e.offset)
split = chunk_.rfind(' ', 0, e.offset)

token = '_bless(' + part[:split].rstrip() + ').format'
part = part[split:].lstrip()
token = '_bless(' + chunk_[:split].rstrip() + ').format'
chunk_ = chunk_[split:].lstrip()

yield Line(lineno, prefix + token + '(' + chunk_ + ')' + suffix, scope)

yield Line(0, PREFIX + token + '(' + part + ')' + (')' if single else ','), (context.scope + (0 if single else 1)))
if last and not first:
yield Line(lineno, '))', scope - 1) # End the call to _buffer.extend()

if not single:
yield Line(0, '))', (context.scope + 1)) # End the call to _buffer.extend()
# Track that the buffer will have content moving forward.
if dirty and 'dirty' not in context.flag:
context.flag.add('dirty')
19 changes: 17 additions & 2 deletions cinje/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -320,7 +320,7 @@ def __init__(self, number, line, scope=None):
super(Line, self).__init__()

def process(self):
if self.stripped.startswith('#'):
if self.stripped.startswith('#') and not self.stripped.startswith('#{'):
self.kind = 'comment'
elif self.stripped.startswith(':'):
self.kind = 'code'
Expand Down Expand Up @@ -434,7 +434,7 @@ class Context(object):
This is the primary entry point for translation.
"""

__slots__ = ('input', 'scope', 'flag', '_handler', 'templates', 'handlers')
__slots__ = ('input', 'scope', 'flag', '_handler', 'templates', 'handlers', 'mapping')

def __init__(self, input):
self.input = Lines(input.decode('utf8') if isinstance(input, bytes) else input)
Expand All @@ -443,6 +443,7 @@ def __init__(self, input):
self._handler = []
self.handlers = []
self.templates = []
self.mapping = None

for translator in map(methodcaller('load'), iter_entry_points('cinje.translator')):
self.handlers.append(translator)
Expand All @@ -453,6 +454,7 @@ def __repr__(self):
def prepare(self):
"""Prepare the ordered list of transformers and reset context state to initial."""
self.scope = 0
self.mapping = deque([0])
self._handler = [i() for i in sorted(self.handlers, key=lambda handler: handler.priority)]

@property
Expand All @@ -463,7 +465,18 @@ def stream(self):
"""

if 'init' not in self.flag:
root = True
self.prepare()
else:
root = False

# Track which lines were generated in response to which lines of source code.
# The end result is that there is one entry here for every line emitted, each integer representing the source
# line number that triggered it. If any lines are returned with missing line numbers, they're inferred from
# the last entry already in the list.
# Fun fact: this list is backwards; we optimize by using a deque and appending to the left edge. this updates
# the head of a linked list; the whole thing needs to be reversed to make sense.
mapping = self.mapping

for line in self.input:
handler = self.classify(line)
Expand All @@ -476,6 +489,8 @@ def stream(self):
self.input.push(line) # Put it back so it can be consumed by the handler.

for line in handler(self): # This re-indents the code to match, if missing explicit scope.
if root: mapping.appendleft(line.number or mapping[0]) # Track source line number.

if line.scope is None:
line = line.clone(scope=self.scope)

Expand Down

0 comments on commit a1a1266

Please sign in to comment.