Merge pull request #16 from marrow/feature/line-number-mapping

Capture and preserve source line numbers. Now ready for application in #4 and #12.
marrow · Dec 6, 2015 · a1a1266 · a1a1266
2 parents 7d85993 + cf1c867
commit a1a1266
Show file tree

Hide file tree

Showing 4 changed files with 148 additions and 55 deletions.
diff --git a/.travis.yml b/.travis.yml
@@ -1,6 +1,10 @@
 language: python
 sudo: false
 
+branches:
+  except:
+    - /^feature/.*$/
+
 python:
   - pypy
   - pypy3

diff --git a/cinje/block/module.py b/cinje/block/module.py
@@ -1,6 +1,27 @@
 # encoding: utf-8
 
-from ..util import py, Line
+from __future__ import unicode_literals
+
+from zlib import compress, decompress
+from base64 import b64encode, b64decode
+from pprint import pformat
+from collections import deque
+
+from ..util import py, Line, iterate
+
+
+def red(numbers):
+	"""Encode the deltas to reduce entropy."""
+
+	line = 0
+	deltas = []
+
+	for value in numbers:
+		deltas.append(value - line)
+		line = value
+
+	return b64encode(compress(b''.join(chr(i).encode('latin1') for i in deltas))).decode('latin1')
+
 
 
 class Module(object):
@@ -51,4 +72,15 @@ def __call__(self, context):
 			yield Line(0, '__tmpl__.extend(["' + '", "'.join(context.templates) + '"])')
 			context.templates = []
 
+		# Snapshot the line number mapping.
+		mapping = deque(context.mapping)
+		mapping.reverse()
+
+		yield Line(0, '')
+
+		if __debug__:
+			yield Line(0, '__mapping__ = [' + ','.join(str(i) for i in mapping) + ']')
+
+		yield Line(0, '__gzmapping__ = b"' + red(mapping).replace('"', '\"') + '"')
+
 		context.flag.remove('init')
diff --git a/cinje/inline/text.py b/cinje/inline/text.py
@@ -1,9 +1,47 @@
 # encoding: utf-8
 
 import ast  # Tighten your belts...
+
+from itertools import chain
 from pprint import pformat
 
-from ..util import chunk, Line, ensure_buffer
+from ..util import iterate, chunk, Line, ensure_buffer
+
+
+def gather(input):
+	"""Collect contiguous lines of text, preserving line numbers."""
+
+	line = input.next()
+	lead = True
+	buffer = []
+
+	# Gather contiguous (uninterrupted) lines of template text.
+	while line.kind == 'text':
+		value = line.line.rstrip().rstrip('\\') + ('' if line.continued else '\n')
+
+		if lead and line.stripped:
+			yield line.number, value
+			lead = False
+
+		elif not lead:
+			if line.stripped:
+				for buf in buffer:
+					yield buf
+
+				buffer = []
+				yield line.number, value
+
+			else:
+				buffer.append((line.number, value))
+
+		try:
+			line = input.next()
+		except StopIteration:
+			line = None
+			break
+
+	if line:
+		input.push(line)  # Put the last line back, as it won't be a text line.
 
 
 class Text(object):
@@ -17,79 +55,83 @@ def match(self, context, line):
 		return line.kind == 'text'
 
 	def __call__(self, context):
+		dirty = False  # Used for conditional flushing.
+		prefix = ''  # Prepend to the source line emitted.
+		suffix = ''   # Append to the source line emitted.
 		input = context.input
 
-		line = input.next()
-		buffer = []
-
 		# Make sure we have a buffer to write to.
 		for i in ensure_buffer(context):
 			yield i
 
-		# Gather contiguous (uninterrupted) lines of template text.
-		while line.kind == 'text' or ( line.kind == 'comment' and line.stripped.startswith('#{') ):
-			buffer.append(line.line.rstrip().rstrip('\\') + ('' if line.continued else '\n'))
-			try:
-				line = input.next()
-			except StopIteration:
-				line = None
-				break
-
-		if line:
-			input.push(line)  # Put the last line back, as it won't be a text line.
+		lines = gather(context.input)
 
-		# Eliminate trailing blank lines.
-		while buffer and not buffer[-1].strip():
-			del buffer[-1]
-			input.push(Line(0, ''))
+		def inner_chain():
+			for lineno, line in lines:
+				for inner_chunk in chunk(line):
+					yield lineno, inner_chunk
 
-		text = "".join(buffer)
+		chunks = inner_chain()
 
-		# Track that the buffer will have content moving forward.  Used for conditional flushing.
-		if text:
-			context.flag.add('dirty')
-
-		# We now have a contiguous block of templated text.  Split it up into expressions and wrap as appropriate.
-
-		chunks = list(chunk(text))  # Ugh; this breaks streaming, but...
-		single = len(chunks) == 1
-
-		if single:
-			PREFIX = '__ws('
-		else:
-			yield Line(0, '__w((')  # Start a call to _buffer.extend()
-			PREFIX = ''
-
-		for token, part in chunks:
+		for first, last, index, total, (lineno, (token, chunk_)) in iterate(chunks):
+			prefix = ''
+			suffix = ''
+			scope = context.scope + 1
+			dirty = True
+
+			if first and last:  # Optimize the single invocation case.
+				prefix = '__ws('
+				suffix = ')'
+				scope = context.scope
+
+			elif first:
+				yield Line(lineno, '__w((')
+
+			if not last:
+				suffix += ','
+
 			if token == 'text':
-				part = pformat(
-						part,
+				chunk_ = pformat(
+						chunk_,
 						indent = 0,
-						width = 120 - 4 * (context.scope + (0 if single else 1)),
-						# compact = True  Python 3 only.
-					).replace("\n ", "\n" + "\t" * (context.scope + (0 if single else 1))).strip()
+						width = 120 - 4 * scope,
+					).replace("\n ", "\n").strip()
 
-				if part[0] == '(' and part[-1] == ')':
-					part = part[1:-1]
+				for line in iterate(chunk_.split('\n')):
+					value = line.value
+
+					if line.first and prefix:
+						value = prefix + value
+
+					if suffix:
+						value += suffix
+
+					yield Line(lineno, value, scope)
+
+				if last and not first:
+					yield Line(lineno, '))', scope - 1)  # End the call to _buffer.extend()
 
-				yield Line(0, PREFIX + part + (')' if single else ','), (context.scope + (0 if single else 1)))
 				continue
 
-			elif token == 'format':
+			if token == 'format':
 				# We need to split the expression defining the format string from the values to pass when formatting.
 				# We want to allow any Python expression, so we'll need to piggyback on Python's own parser in order
 				# to exploit the currently available syntax.  Apologies, this is probably the scariest thing in here.
 				split = -1
 
 				try:
-					ast.parse(part)
+					ast.parse(chunk_)
 				except SyntaxError as e:  # We expect this, and catch it.  It'll have exploded after the first expr.
-					split = part.rfind(' ', 0, e.offset)
+					split = chunk_.rfind(' ', 0, e.offset)
 
-				token = '_bless(' + part[:split].rstrip() + ').format'
-				part = part[split:].lstrip()
+				token = '_bless(' + chunk_[:split].rstrip() + ').format'
+				chunk_ = chunk_[split:].lstrip()
+
+			yield Line(lineno, prefix + token + '(' + chunk_ + ')' + suffix, scope)
 
-			yield Line(0, PREFIX + token + '(' + part + ')' + (')' if single else ','), (context.scope + (0 if single else 1)))
+			if last and not first:
+				yield Line(lineno, '))', scope - 1)  # End the call to _buffer.extend()
 
-		if not single:
-			yield Line(0, '))', (context.scope + 1))  # End the call to _buffer.extend()
+		# Track that the buffer will have content moving forward.
+		if dirty and 'dirty' not in context.flag:
+			context.flag.add('dirty')
diff --git a/cinje/util.py b/cinje/util.py
@@ -320,7 +320,7 @@ def __init__(self, number, line, scope=None):
 		super(Line, self).__init__()
 
 	def process(self):
-		if self.stripped.startswith('#'):
+		if self.stripped.startswith('#') and not self.stripped.startswith('#{'):
 			self.kind = 'comment'
 		elif self.stripped.startswith(':'):
 			self.kind = 'code'
@@ -434,7 +434,7 @@ class Context(object):
 	This is the primary entry point for translation.
 	"""
 
-	__slots__ = ('input', 'scope', 'flag', '_handler', 'templates', 'handlers')
+	__slots__ = ('input', 'scope', 'flag', '_handler', 'templates', 'handlers', 'mapping')
 
 	def __init__(self, input):
 		self.input = Lines(input.decode('utf8') if isinstance(input, bytes) else input)
@@ -443,6 +443,7 @@ def __init__(self, input):
 		self._handler = []
 		self.handlers = []
 		self.templates = []
+		self.mapping = None
 
 		for translator in map(methodcaller('load'), iter_entry_points('cinje.translator')):
 			self.handlers.append(translator)
@@ -453,6 +454,7 @@ def __repr__(self):
 	def prepare(self):
 		"""Prepare the ordered list of transformers and reset context state to initial."""
 		self.scope = 0
+		self.mapping = deque([0])
 		self._handler = [i() for i in sorted(self.handlers, key=lambda handler: handler.priority)]
 
 	@property
@@ -463,7 +465,18 @@ def stream(self):
 		"""
 
 		if 'init' not in self.flag:
+			root = True
 			self.prepare()
+		else:
+			root = False
+
+		# Track which lines were generated in response to which lines of source code.
+		# The end result is that there is one entry here for every line emitted, each integer representing the source
+		# line number that triggered it.  If any lines are returned with missing line numbers, they're inferred from
+		# the last entry already in the list.
+		# Fun fact: this list is backwards; we optimize by using a deque and appending to the left edge. this updates
+		# the head of a linked list; the whole thing needs to be reversed to make sense.
+		mapping = self.mapping
 
 		for line in self.input:
 			handler = self.classify(line)
@@ -476,6 +489,8 @@ def stream(self):
 			self.input.push(line)  # Put it back so it can be consumed by the handler.
 
 			for line in handler(self):  # This re-indents the code to match, if missing explicit scope.
+				if root: mapping.appendleft(line.number or mapping[0])  # Track source line number.
+
 				if line.scope is None:
 					line = line.clone(scope=self.scope)