Skip to content

Commit 91e8510

Browse files
committed
Track source position information in Streams
Prior to this change, the only positional variables which were tracked when moving through the stream was the current index in the stream. We would like to start reporting richer positional information from the parser, specifically the position (in terms of row/column) in the source object. This change allows this by updating the Stream class to additionally track its current (zero-indexed) row and column while moving through the stream. We will persist this information into the syntax tree in the following commits.
1 parent 308e0fc commit 91e8510

File tree

2 files changed

+147
-0
lines changed

2 files changed

+147
-0
lines changed

fluent.syntax/fluent/syntax/stream.py

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,11 @@ class ParserStream:
99
def __init__(self, string: str):
1010
self.string = string
1111
self.index = 0
12+
self.row_index = 0
13+
self.column_index = 0
1214
self.peek_offset = 0
15+
self._peek_row_offset = 0
16+
self._peek_column_offset = 0
1317

1418
def get(self, offset: int) -> Union[str, None]:
1519
try:
@@ -37,9 +41,19 @@ def current_peek(self) -> Union[str, None]:
3741

3842
def next(self) -> Union[str, None]:
3943
self.peek_offset = 0
44+
self._peek_row_offset = 0
45+
self._peek_column_offset = 0
46+
4047
# Skip over CRLF as if it was a single character.
4148
if self.get(self.index) == "\r" and self.get(self.index + 1) == "\n":
4249
self.index += 1
50+
# If we have reached a newline reset the position
51+
if self.get(self.index) == "\n":
52+
self.row_index += 1
53+
self.column_index = 0
54+
else:
55+
self.column_index += 1
56+
4357
self.index += 1
4458
return self.get(self.index)
4559

@@ -50,6 +64,13 @@ def peek(self) -> Union[str, None]:
5064
and self.get(self.index + self.peek_offset + 1) == "\n"
5165
):
5266
self.peek_offset += 1
67+
68+
if self.get(self.index + self.peek_offset) == "\n":
69+
self._peek_row_offset += 1
70+
self._peek_column_offset = 0
71+
else:
72+
self._peek_column_offset += 1
73+
5374
self.peek_offset += 1
5475
return self.get(self.index + self.peek_offset)
5576

@@ -58,7 +79,17 @@ def reset_peek(self, offset: int = 0) -> None:
5879

5980
def skip_to_peek(self) -> None:
6081
self.index += self.peek_offset
82+
self.row_index += self._peek_row_offset
83+
if self._peek_row_offset:
84+
# There have been newlines during the peek, so the column offset is the column index
85+
# since the last newline
86+
self.column_index = self._peek_column_offset
87+
else:
88+
self.column_index += self._peek_column_offset
89+
6190
self.peek_offset = 0
91+
self._peek_row_offset = 0
92+
self._peek_column_offset = 0
6293

6394

6495
EOL = "\n"

fluent.syntax/tests/syntax/test_stream.py

Lines changed: 116 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -106,20 +106,26 @@ def test_skip_to_peek(self):
106106
self.assertEqual("c", ps.current_peek)
107107
self.assertEqual(0, ps.peek_offset)
108108
self.assertEqual(2, ps.index)
109+
self.assertEqual(0, ps.row_index)
110+
self.assertEqual(2, ps.column_index)
109111

110112
ps.peek()
111113

112114
self.assertEqual("c", ps.current_char)
113115
self.assertEqual("d", ps.current_peek)
114116
self.assertEqual(1, ps.peek_offset)
115117
self.assertEqual(2, ps.index)
118+
self.assertEqual(0, ps.row_index)
119+
self.assertEqual(2, ps.column_index)
116120

117121
ps.next()
118122

119123
self.assertEqual("d", ps.current_char)
120124
self.assertEqual("d", ps.current_peek)
121125
self.assertEqual(0, ps.peek_offset)
122126
self.assertEqual(3, ps.index)
127+
self.assertEqual(0, ps.row_index)
128+
self.assertEqual(3, ps.column_index)
123129

124130
def test_reset_peek(self):
125131
ps = ParserStream("abcd")
@@ -159,3 +165,113 @@ def test_reset_peek(self):
159165

160166
self.assertEqual("d", ps.peek())
161167
self.assertEqual(None, ps.peek())
168+
169+
def test_next_with_newline(self):
170+
ps = ParserStream("ab\ncd\r\nef")
171+
172+
self.assertEqual("a", ps.current_char)
173+
self.assertEqual(0, ps.index)
174+
self.assertEqual(0, ps.row_index)
175+
self.assertEqual(0, ps.column_index)
176+
177+
self.assertEqual("b", ps.next())
178+
self.assertEqual("b", ps.current_char)
179+
self.assertEqual(1, ps.index)
180+
self.assertEqual(0, ps.row_index)
181+
self.assertEqual(1, ps.column_index)
182+
183+
184+
self.assertEqual("\n", ps.next())
185+
self.assertEqual("\n", ps.current_char)
186+
self.assertEqual(2, ps.index)
187+
self.assertEqual(0, ps.row_index)
188+
self.assertEqual(2, ps.column_index)
189+
190+
self.assertEqual("c", ps.next())
191+
self.assertEqual("c", ps.current_char)
192+
self.assertEqual(3, ps.index)
193+
self.assertEqual(1, ps.row_index)
194+
self.assertEqual(0, ps.column_index)
195+
196+
self.assertEqual("d", ps.next())
197+
self.assertEqual("d", ps.current_char)
198+
self.assertEqual(4, ps.index)
199+
self.assertEqual(1, ps.row_index)
200+
self.assertEqual(1, ps.column_index)
201+
202+
self.assertEqual("\r", ps.next())
203+
self.assertEqual("\n", ps.current_char)
204+
self.assertEqual(5, ps.index)
205+
self.assertEqual(1, ps.row_index)
206+
self.assertEqual(2, ps.column_index)
207+
208+
self.assertEqual("e", ps.next())
209+
self.assertEqual("e", ps.current_char)
210+
self.assertEqual(7, ps.index)
211+
self.assertEqual(2, ps.row_index)
212+
self.assertEqual(0, ps.column_index)
213+
214+
self.assertEqual("f", ps.next())
215+
self.assertEqual("f", ps.current_char)
216+
self.assertEqual(8, ps.index)
217+
self.assertEqual(2, ps.row_index)
218+
self.assertEqual(1, ps.column_index)
219+
220+
self.assertEqual(None, ps.next())
221+
self.assertEqual(None, ps.current_char)
222+
self.assertEqual(9, ps.index)
223+
self.assertEqual(2, ps.row_index)
224+
self.assertEqual(2, ps.column_index)
225+
226+
def test_skip_to_peek_with_newlines(self):
227+
ps = ParserStream("ab\ncd\r\nef")
228+
229+
ps.peek()
230+
ps.peek()
231+
ps.peek()
232+
233+
ps.skip_to_peek()
234+
235+
self.assertEqual("c", ps.current_char)
236+
self.assertEqual("c", ps.current_peek)
237+
self.assertEqual(0, ps.peek_offset)
238+
self.assertEqual(3, ps.index)
239+
self.assertEqual(1, ps.row_index)
240+
self.assertEqual(0, ps.column_index)
241+
242+
ps.peek()
243+
244+
self.assertEqual("c", ps.current_char)
245+
self.assertEqual("d", ps.current_peek)
246+
self.assertEqual(1, ps.peek_offset)
247+
self.assertEqual(3, ps.index)
248+
self.assertEqual(1, ps.row_index)
249+
self.assertEqual(0, ps.column_index)
250+
251+
ps.next()
252+
253+
self.assertEqual("d", ps.current_char)
254+
self.assertEqual("d", ps.current_peek)
255+
self.assertEqual(0, ps.peek_offset)
256+
self.assertEqual(4, ps.index)
257+
self.assertEqual(1, ps.row_index)
258+
self.assertEqual(1, ps.column_index)
259+
260+
ps.peek()
261+
ps.peek()
262+
263+
self.assertEqual("d", ps.current_char)
264+
self.assertEqual("e", ps.current_peek)
265+
self.assertEqual(3, ps.peek_offset)
266+
self.assertEqual(4, ps.index)
267+
self.assertEqual(1, ps.row_index)
268+
self.assertEqual(1, ps.column_index)
269+
270+
ps.skip_to_peek()
271+
272+
self.assertEqual("e", ps.current_char)
273+
self.assertEqual("e", ps.current_peek)
274+
self.assertEqual(0, ps.peek_offset)
275+
self.assertEqual(7, ps.index)
276+
self.assertEqual(2, ps.row_index)
277+
self.assertEqual(0, ps.column_index)

0 commit comments

Comments
 (0)