Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[3.12] gh-120343: Do not reset byte_col_offset_diff after multiline tokens (GH-120352) #120356

Merged
merged 1 commit into from
Jun 11, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 11 additions & 0 deletions Lib/test/test_tokenize.py
Original file line number Diff line number Diff line change
Expand Up @@ -1204,6 +1204,17 @@ def test_closing_parenthesis_from_different_line(self):
NAME 'x' (1, 3) (1, 4)
""")

def test_multiline_non_ascii_fstring(self):
self.check_tokenize("""\
a = f'''
Autorzy, którzy tą jednostkę mają wpisani jako AKTUALNA -- czyli'''""", """\
NAME 'a' (1, 0) (1, 1)
OP '=' (1, 2) (1, 3)
FSTRING_START "f\'\'\'" (1, 4) (1, 8)
FSTRING_MIDDLE '\\n Autorzy, którzy tą jednostkę mają wpisani jako AKTUALNA -- czyli' (1, 8) (2, 68)
FSTRING_END "\'\'\'" (2, 68) (2, 71)
""")

class GenerateTokensTest(TokenizeTest):
def check_tokenize(self, s, expected):
# Format the tokens in s in a table format.
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Fix column offset reporting for tokens that come after multiline f-strings in the :mod:`tokenize` module.
7 changes: 6 additions & 1 deletion Python/Python-tokenize.c
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ typedef struct
/* Needed to cache line for performance */
PyObject *last_line;
Py_ssize_t last_lineno;
Py_ssize_t last_end_lineno;
Py_ssize_t byte_col_offset_diff;
} tokenizeriterobject;

Expand Down Expand Up @@ -76,6 +77,7 @@ tokenizeriter_new_impl(PyTypeObject *type, PyObject *readline,
self->last_line = NULL;
self->byte_col_offset_diff = 0;
self->last_lineno = 0;
self->last_end_lineno = 0;

return (PyObject *)self;
}
Expand Down Expand Up @@ -226,7 +228,9 @@ tokenizeriter_next(tokenizeriterobject *it)
Py_XDECREF(it->last_line);
line = PyUnicode_DecodeUTF8(line_start, size, "replace");
it->last_line = line;
it->byte_col_offset_diff = 0;
if (it->tok->lineno != it->last_end_lineno) {
it->byte_col_offset_diff = 0;
}
} else {
// Line hasn't changed so we reuse the cached one.
line = it->last_line;
Expand All @@ -240,6 +244,7 @@ tokenizeriter_next(tokenizeriterobject *it)
Py_ssize_t lineno = ISSTRINGLIT(type) ? it->tok->first_lineno : it->tok->lineno;
Py_ssize_t end_lineno = it->tok->lineno;
it->last_lineno = lineno;
it->last_end_lineno = end_lineno;

Py_ssize_t col_offset = -1;
Py_ssize_t end_col_offset = -1;
Expand Down
Loading