Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Account for problems being fixed in Python 3.12 #110

Merged
merged 20 commits into from
Aug 31, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .github/workflows/build-and-test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ jobs:
- 3.9
- '3.10'
- 3.11
- 3.12.0-rc.1
# As per https://github.com/actions/virtual-environments/blob/main/images/linux/Ubuntu2004-Readme.md#pypy list of versions
- pypy-2.7
- pypy-3.6
Expand Down
29 changes: 28 additions & 1 deletion asttokens/mark_tokens.py
Original file line number Diff line number Diff line change
Expand Up @@ -361,7 +361,34 @@ def visit_joinedstr(self,
last_token, # type: util.Token
):
# type: (...) -> Tuple[util.Token, util.Token]
return self.handle_str(first_token, last_token)
if sys.version_info < (3, 12):
# Older versions don't tokenize the contents of f-strings
return self.handle_str(first_token, last_token)

last = first_token
while True:
if util.match_token(last, getattr(token, "FSTRING_START")):
# Python 3.12+ has tokens for the start (e.g. `f"`) and end (`"`)
# of the f-string. We can't just look for the next FSTRING_END
# because f-strings can be nested, e.g. f"{f'{x}'}", so we need
# to treat this like matching balanced parentheses.
count = 1
while count > 0:
last = self._code.next_token(last)
# mypy complains about token.FSTRING_START and token.FSTRING_END.
if util.match_token(last, getattr(token, "FSTRING_START")):
count += 1
elif util.match_token(last, getattr(token, "FSTRING_END")):
count -= 1
last_token = last
last = self._code.next_token(last_token)
elif util.match_token(last, token.STRING):
# Similar to handle_str, we also need to handle adjacent strings.
last_token = last
last = self._code.next_token(last_token)
else:
break
return (first_token, last_token)

def visit_bytes(self, node, first_token, last_token):
# type: (AstNode, util.Token, util.Token) -> Tuple[util.Token, util.Token]
Expand Down
9 changes: 9 additions & 0 deletions asttokens/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -446,6 +446,10 @@ def annotate_fstring_nodes(tree):
Add a special attribute `_broken_positions` to nodes inside f-strings
if the lineno/col_offset cannot be trusted.
"""
if sys.version_info >= (3, 12):
# f-strings were weirdly implemented until https://peps.python.org/pep-0701/
# In Python 3.12, inner nodes have sensible positions.
return
alexmojaki marked this conversation as resolved.
Show resolved Hide resolved
for joinedstr in walk(tree):
if not isinstance(joinedstr, ast.JoinedStr):
continue
Expand All @@ -457,6 +461,11 @@ def annotate_fstring_nodes(tree):
if not fstring_positions_work():
for child in walk(part.value):
setattr(child, '_broken_positions', True)
if isinstance(child, ast.JoinedStr):
# Recursively handle this inner JoinedStr in the same way.
# While this is usually automatic for other nodes,
# the children of f-strings are explicitly excluded in iter_children_ast.
annotate_fstring_nodes(child)

if part.format_spec: # this is another JoinedStr
# Again, the standard positions span the full f-string.
Expand Down
1 change: 1 addition & 0 deletions setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ classifiers =
Programming Language :: Python :: 3.9
Programming Language :: Python :: 3.10
Programming Language :: Python :: 3.11
Programming Language :: Python :: 3.12
alexmojaki marked this conversation as resolved.
Show resolved Hide resolved
Programming Language :: Python :: Implementation :: CPython
Programming Language :: Python :: Implementation :: PyPy

Expand Down
9 changes: 8 additions & 1 deletion tests/test_astroid.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,5 +35,12 @@ def iter_fields(node):
@staticmethod
def create_asttokens(source):
builder = astroid.builder.AstroidBuilder()
tree = builder.string_build(source)
try:
tree = builder.string_build(source)
except AttributeError as e:
raise AstroidTreeException(str(e))
return ASTTokens(source, tree=tree)


class AstroidTreeException(Exception):
pass
25 changes: 14 additions & 11 deletions tests/test_mark_tokens.py
Original file line number Diff line number Diff line change
Expand Up @@ -627,6 +627,8 @@ def test_sys_modules(self):
so it only tests all modules if the environment variable
ASTTOKENS_SLOW_TESTS has been set.
"""
from .test_astroid import AstroidTreeException

modules = list(sys.modules.values())
if not os.environ.get('ASTTOKENS_SLOW_TESTS'):
modules = modules[:20]
Expand All @@ -640,7 +642,7 @@ def test_sys_modules(self):

try:
filename = inspect.getsourcefile(module)
except TypeError:
except Exception: # some modules raise weird errors
continue

if not filename:
Expand All @@ -657,20 +659,21 @@ def test_sys_modules(self):
if self.is_astroid_test and (
# Astroid fails with a syntax error if a type comment is on its own line
re.search(r'^\s*# type: ', source, re.MULTILINE)
# Astroid can fail on this file, specifically raising an exception at this line of code:
# lambda node: node.name == "NamedTuple" and node.parent.name == "typing"
# with the error:
# AttributeError: 'If' object has no attribute 'name'
# See https://github.com/gristlabs/asttokens/runs/7602147792
# I think the code that causes the problem is:
# if sys.version_info >= (3, 11):
# NamedTuple = typing.NamedTuple
or filename.endswith("typing_extensions.py")
):
print('Skipping', filename)
continue

self.create_mark_checker(source)
try:
self.create_mark_checker(source)
except AstroidTreeException:
# Astroid sometimes fails with errors like:
# AttributeError: 'TreeRebuilder' object has no attribute 'visit_typealias'
# See https://github.com/gristlabs/asttokens/actions/runs/6015907789/job/16318767911?pr=110
# Should be fixed in the next astroid release:
# https://github.com/pylint-dev/pylint/issues/8782#issuecomment-1669967220
# Note that this exception is raised before asttokens is even involved,
# it's purely an astroid bug that we can safely ignore.
continue

if six.PY3:
def test_dict_merge(self):
Expand Down
24 changes: 23 additions & 1 deletion tests/test_tokenless.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,7 @@ def check_node(self, atok, node):
ast_text = ast.get_source_segment(source, node, padded=padded)
atok_text = atok.get_text(node, padded=padded)
if ast_text:
if (
if sys.version_info < (3, 12) and (
ast_text.startswith("f") and isinstance(node, (ast.Str, ast.FormattedValue))
or is_fstring_format_spec(node)
or (not fstring_positions_work() and is_fstring_internal_node(node))
Expand All @@ -120,6 +120,28 @@ def test_lazy_asttext_astroid_errors(self):
with self.assertRaises(NotImplementedError):
ASTText(source, tree)

def test_nested_fstrings(self):
f1 = 'f"a {1+2} b {3+4} c"'
f2 = "f'd {" + f1 + "} e'"
f3 = "f'''{" + f2 + "}{" + f1 + "}'''"
f4 = 'f"""{' + f3 + '}"""'
s = 'f = ' + f4
atok = ASTText(s)
self.assertEqual(atok.get_text(atok.tree), s)
n4 = atok.tree.body[0].value
n3 = n4.values[0].value
n2 = n3.values[0].value
n1 = n2.values[1].value
self.assertEqual(atok.get_text(n4), f4)
if fstring_positions_work():
self.assertEqual(atok.get_text(n3), f3)
self.assertEqual(atok.get_text(n2), f2)
self.assertEqual(atok.get_text(n1), f1)
else:
self.assertEqual(atok.get_text(n3), '')
self.assertEqual(atok.get_text(n2), '')
self.assertEqual(atok.get_text(n1), '')


class TestFstringPositionsWork(unittest.TestCase):
def test_fstring_positions_work(self):
Expand Down
46 changes: 29 additions & 17 deletions tests/test_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@

import ast
import io
import sys
import token
import unittest

Expand Down Expand Up @@ -122,27 +123,38 @@ def test_combine_tokens():
from asttokens.util import combine_tokens, patched_generate_tokens

text = "℘·2=1"
original_tokens = list(generate_tokens(io.StringIO(text).readline))[:4]
assert original_tokens == [
TokenInfo(ERRORTOKEN, string='℘', start=(1, 0), end=(1, 1), line='℘·2=1'),
TokenInfo(ERRORTOKEN, string='·', start=(1, 1), end=(1, 2), line='℘·2=1'),
TokenInfo(NUMBER, string='2', start=(1, 2), end=(1, 3), line='℘·2=1'),
TokenInfo(OP, string='=', start=(1, 3), end=(1, 4), line='℘·2=1'),
]
assert combine_tokens(original_tokens[:1]) == [
TokenInfo(NAME, string='℘', start=(1, 0), end=(1, 1), line='℘·2=1'),
]
assert combine_tokens(original_tokens[:2]) == [
TokenInfo(NAME, string='℘·', start=(1, 0), end=(1, 2), line='℘·2=1'),
]
assert combine_tokens(original_tokens[:3]) == [
TokenInfo(NAME, string='℘·2', start=(1, 0), end=(1, 3), line='℘·2=1'),
]
original_tokens = []
for tok in generate_tokens(io.StringIO(text).readline):
original_tokens.append(tok)
if tok.type == OP:
break

assert list(patched_generate_tokens(iter(original_tokens))) == [
correct_tokens = [
TokenInfo(NAME, string='℘·2', start=(1, 0), end=(1, 3), line='℘·2=1'),
TokenInfo(OP, string='=', start=(1, 3), end=(1, 4), line='℘·2=1'),
]
if sys.version_info >= (3, 12):
# The tokenizing bug was fixed in 3.12, so the original tokens are correct,
# rather than starting with false ERRORTOKENs.
assert original_tokens == correct_tokens
else:
assert original_tokens == [
TokenInfo(ERRORTOKEN, string='℘', start=(1, 0), end=(1, 1), line='℘·2=1'),
TokenInfo(ERRORTOKEN, string='·', start=(1, 1), end=(1, 2), line='℘·2=1'),
TokenInfo(NUMBER, string='2', start=(1, 2), end=(1, 3), line='℘·2=1'),
TokenInfo(OP, string='=', start=(1, 3), end=(1, 4), line='℘·2=1'),
]
assert combine_tokens(original_tokens[:1]) == [
TokenInfo(NAME, string='℘', start=(1, 0), end=(1, 1), line='℘·2=1'),
]
assert combine_tokens(original_tokens[:2]) == [
TokenInfo(NAME, string='℘·', start=(1, 0), end=(1, 2), line='℘·2=1'),
]
assert combine_tokens(original_tokens[:3]) == [
TokenInfo(NAME, string='℘·2', start=(1, 0), end=(1, 3), line='℘·2=1'),
]

assert list(patched_generate_tokens(iter(original_tokens))) == correct_tokens
assert list(patched_generate_tokens(iter(original_tokens[:-1]))) == [
TokenInfo(NAME, string='℘·2', start=(1, 0), end=(1, 3), line='℘·2=1'),
]
Expand Down
Loading