diff --git a/Lib/test/test_tokenize.py b/Lib/test/test_tokenize.py index 4428e8cea1964c..36dba71766cc20 100644 --- a/Lib/test/test_tokenize.py +++ b/Lib/test/test_tokenize.py @@ -1199,6 +1199,17 @@ def test_closing_parenthesis_from_different_line(self): NAME 'x' (1, 3) (1, 4) """) + def test_multiline_non_ascii_fstring(self): + self.check_tokenize("""\ +a = f''' + Autorzy, którzy tą jednostkę mają wpisani jako AKTUALNA -- czyli'''""", """\ + NAME 'a' (1, 0) (1, 1) + OP '=' (1, 2) (1, 3) + FSTRING_START "f\'\'\'" (1, 4) (1, 8) + FSTRING_MIDDLE '\\n Autorzy, którzy tą jednostkę mają wpisani jako AKTUALNA -- czyli' (1, 8) (2, 68) + FSTRING_END "\'\'\'" (2, 68) (2, 71) + """) + class GenerateTokensTest(TokenizeTest): def check_tokenize(self, s, expected): # Format the tokens in s in a table format. diff --git a/Misc/NEWS.d/next/Library/2024-06-11-16-34-41.gh-issue-120343.hdiXeU.rst b/Misc/NEWS.d/next/Library/2024-06-11-16-34-41.gh-issue-120343.hdiXeU.rst new file mode 100644 index 00000000000000..76714b0c394eef --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-06-11-16-34-41.gh-issue-120343.hdiXeU.rst @@ -0,0 +1 @@ +Fix column offset reporting for tokens that come after multiline f-strings in the :mod:`tokenize` module. diff --git a/Python/Python-tokenize.c b/Python/Python-tokenize.c index 09fad18b5b4df7..2591dae35736ba 100644 --- a/Python/Python-tokenize.c +++ b/Python/Python-tokenize.c @@ -36,6 +36,7 @@ typedef struct /* Needed to cache line for performance */ PyObject *last_line; Py_ssize_t last_lineno; + Py_ssize_t last_end_lineno; Py_ssize_t byte_col_offset_diff; } tokenizeriterobject; @@ -77,6 +78,7 @@ tokenizeriter_new_impl(PyTypeObject *type, PyObject *readline, self->last_line = NULL; self->byte_col_offset_diff = 0; self->last_lineno = 0; + self->last_end_lineno = 0; return (PyObject *)self; } @@ -227,7 +229,9 @@ tokenizeriter_next(tokenizeriterobject *it) Py_XDECREF(it->last_line); line = PyUnicode_DecodeUTF8(line_start, size, "replace"); it->last_line = line; - it->byte_col_offset_diff = 0; + if (it->tok->lineno != it->last_end_lineno) { + it->byte_col_offset_diff = 0; + } } else { // Line hasn't changed so we reuse the cached one. line = it->last_line; @@ -241,6 +245,7 @@ tokenizeriter_next(tokenizeriterobject *it) Py_ssize_t lineno = ISSTRINGLIT(type) ? it->tok->first_lineno : it->tok->lineno; Py_ssize_t end_lineno = it->tok->lineno; it->last_lineno = lineno; + it->last_end_lineno = end_lineno; Py_ssize_t col_offset = -1; Py_ssize_t end_col_offset = -1;