Skip to content

Commit

Permalink
gh-120343: Do not reset byte_col_offset_diff after multiline tokens (#…
Browse files Browse the repository at this point in the history
…120352)

Co-authored-by: blurb-it[bot] <43283697+blurb-it[bot]@users.noreply.github.com>
  • Loading branch information
lysnikolaou and blurb-it[bot] authored Jun 11, 2024
1 parent 32a0fab commit 1b62bce
Show file tree
Hide file tree
Showing 3 changed files with 18 additions and 1 deletion.
11 changes: 11 additions & 0 deletions Lib/test/test_tokenize.py
Original file line number Diff line number Diff line change
Expand Up @@ -1199,6 +1199,17 @@ def test_closing_parenthesis_from_different_line(self):
NAME 'x' (1, 3) (1, 4)
""")

def test_multiline_non_ascii_fstring(self):
self.check_tokenize("""\
a = f'''
Autorzy, którzy tą jednostkę mają wpisani jako AKTUALNA -- czyli'''""", """\
NAME 'a' (1, 0) (1, 1)
OP '=' (1, 2) (1, 3)
FSTRING_START "f\'\'\'" (1, 4) (1, 8)
FSTRING_MIDDLE '\\n Autorzy, którzy tą jednostkę mają wpisani jako AKTUALNA -- czyli' (1, 8) (2, 68)
FSTRING_END "\'\'\'" (2, 68) (2, 71)
""")

class GenerateTokensTest(TokenizeTest):
def check_tokenize(self, s, expected):
# Format the tokens in s in a table format.
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Fix column offset reporting for tokens that come after multiline f-strings in the :mod:`tokenize` module.
7 changes: 6 additions & 1 deletion Python/Python-tokenize.c
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ typedef struct
/* Needed to cache line for performance */
PyObject *last_line;
Py_ssize_t last_lineno;
Py_ssize_t last_end_lineno;
Py_ssize_t byte_col_offset_diff;
} tokenizeriterobject;

Expand Down Expand Up @@ -77,6 +78,7 @@ tokenizeriter_new_impl(PyTypeObject *type, PyObject *readline,
self->last_line = NULL;
self->byte_col_offset_diff = 0;
self->last_lineno = 0;
self->last_end_lineno = 0;

return (PyObject *)self;
}
Expand Down Expand Up @@ -227,7 +229,9 @@ tokenizeriter_next(tokenizeriterobject *it)
Py_XDECREF(it->last_line);
line = PyUnicode_DecodeUTF8(line_start, size, "replace");
it->last_line = line;
it->byte_col_offset_diff = 0;
if (it->tok->lineno != it->last_end_lineno) {
it->byte_col_offset_diff = 0;
}
} else {
// Line hasn't changed so we reuse the cached one.
line = it->last_line;
Expand All @@ -241,6 +245,7 @@ tokenizeriter_next(tokenizeriterobject *it)
Py_ssize_t lineno = ISSTRINGLIT(type) ? it->tok->first_lineno : it->tok->lineno;
Py_ssize_t end_lineno = it->tok->lineno;
it->last_lineno = lineno;
it->last_end_lineno = end_lineno;

Py_ssize_t col_offset = -1;
Py_ssize_t end_col_offset = -1;
Expand Down

0 comments on commit 1b62bce

Please sign in to comment.