gristlabs · alexmojaki · Aug 31, 2023 · Jun 30, 2023 · Jun 30, 2023 · Jun 30, 2023
diff --git a/.github/workflows/build-and-test.yml b/.github/workflows/build-and-test.yml
@@ -20,6 +20,7 @@ jobs:
           - 3.9
           - '3.10'
           - 3.11
+          - 3.12.0-rc.1
           # As per https://github.com/actions/virtual-environments/blob/main/images/linux/Ubuntu2004-Readme.md#pypy list of versions
           - pypy-2.7
           - pypy-3.6

diff --git a/asttokens/mark_tokens.py b/asttokens/mark_tokens.py
@@ -361,7 +361,34 @@ def visit_joinedstr(self,
                       last_token,  # type: util.Token
                       ):
     # type: (...) -> Tuple[util.Token, util.Token]
-    return self.handle_str(first_token, last_token)
+    if sys.version_info < (3, 12):
+      # Older versions don't tokenize the contents of f-strings
+      return self.handle_str(first_token, last_token)
+
+    last = first_token
+    while True:
+      if util.match_token(last, getattr(token, "FSTRING_START")):
+        # Python 3.12+ has tokens for the start (e.g. `f"`) and end (`"`)
+        # of the f-string. We can't just look for the next FSTRING_END
+        # because f-strings can be nested, e.g. f"{f'{x}'}", so we need
+        # to treat this like matching balanced parentheses.
+        count = 1
+        while count > 0:
+          last = self._code.next_token(last)
+          # mypy complains about token.FSTRING_START and token.FSTRING_END.
+          if util.match_token(last, getattr(token, "FSTRING_START")):
+            count += 1
+          elif util.match_token(last, getattr(token, "FSTRING_END")):
+            count -= 1
+        last_token = last
+        last = self._code.next_token(last_token)
+      elif util.match_token(last, token.STRING):
+        # Similar to handle_str, we also need to handle adjacent strings.
+        last_token = last
+        last = self._code.next_token(last_token)
+      else:
+        break
+    return (first_token, last_token)
 
   def visit_bytes(self, node, first_token, last_token):
     # type: (AstNode, util.Token, util.Token) -> Tuple[util.Token, util.Token]

diff --git a/asttokens/util.py b/asttokens/util.py
@@ -446,6 +446,10 @@ def annotate_fstring_nodes(tree):
     Add a special attribute `_broken_positions` to nodes inside f-strings
     if the lineno/col_offset cannot be trusted.
     """
+    if sys.version_info >= (3, 12):
+      # f-strings were weirdly implemented until https://peps.python.org/pep-0701/
+      # In Python 3.12, inner nodes have sensible positions.
+      return
     for joinedstr in walk(tree):
       if not isinstance(joinedstr, ast.JoinedStr):
         continue
@@ -457,6 +461,11 @@ def annotate_fstring_nodes(tree):
           if not fstring_positions_work():
             for child in walk(part.value):
               setattr(child, '_broken_positions', True)
+              if isinstance(child, ast.JoinedStr):
+                # Recursively handle this inner JoinedStr in the same way.
+                # While this is usually automatic for other nodes,
+                # the children of f-strings are explicitly excluded in iter_children_ast.
+                annotate_fstring_nodes(child)
 
           if part.format_spec:  # this is another JoinedStr
             # Again, the standard positions span the full f-string.

diff --git a/setup.cfg b/setup.cfg
@@ -29,6 +29,7 @@ classifiers =
     Programming Language :: Python :: 3.9
     Programming Language :: Python :: 3.10
     Programming Language :: Python :: 3.11
+    Programming Language :: Python :: 3.12
     Programming Language :: Python :: Implementation :: CPython
     Programming Language :: Python :: Implementation :: PyPy
 

diff --git a/tests/test_astroid.py b/tests/test_astroid.py
@@ -35,5 +35,12 @@ def iter_fields(node):
   @staticmethod
   def create_asttokens(source):
     builder = astroid.builder.AstroidBuilder()
-    tree = builder.string_build(source)
+    try:
+      tree = builder.string_build(source)
+    except AttributeError as e:
+      raise AstroidTreeException(str(e))
     return ASTTokens(source, tree=tree)
+
+
+class AstroidTreeException(Exception):
+  pass
diff --git a/tests/test_mark_tokens.py b/tests/test_mark_tokens.py
@@ -627,6 +627,8 @@ def test_sys_modules(self):
       so it only tests all modules if the environment variable
       ASTTOKENS_SLOW_TESTS has been set.
       """
+      from .test_astroid import AstroidTreeException
+
       modules = list(sys.modules.values())
       if not os.environ.get('ASTTOKENS_SLOW_TESTS'):
         modules = modules[:20]
@@ -640,7 +642,7 @@ def test_sys_modules(self):
 
         try:
           filename = inspect.getsourcefile(module)
-        except TypeError:
+        except Exception:  # some modules raise weird errors
           continue
 
         if not filename:
@@ -657,20 +659,21 @@ def test_sys_modules(self):
         if self.is_astroid_test and (
             # Astroid fails with a syntax error if a type comment is on its own line
             re.search(r'^\s*# type: ', source, re.MULTILINE)
-            # Astroid can fail on this file, specifically raising an exception at this line of code:
-            #     lambda node: node.name == "NamedTuple" and node.parent.name == "typing"
-            # with the error:
-            #     AttributeError: 'If' object has no attribute 'name'
-            # See https://github.com/gristlabs/asttokens/runs/7602147792
-            # I think the code that causes the problem is:
-            #     if sys.version_info >= (3, 11):
-            #         NamedTuple = typing.NamedTuple
-            or filename.endswith("typing_extensions.py")
         ):
           print('Skipping', filename)
           continue
 
-        self.create_mark_checker(source)
+        try:
+          self.create_mark_checker(source)
+        except AstroidTreeException:
+          # Astroid sometimes fails with errors like:
+          #     AttributeError: 'TreeRebuilder' object has no attribute 'visit_typealias'
+          # See https://github.com/gristlabs/asttokens/actions/runs/6015907789/job/16318767911?pr=110
+          # Should be fixed in the next astroid release:
+          #     https://github.com/pylint-dev/pylint/issues/8782#issuecomment-1669967220
+          # Note that this exception is raised before asttokens is even involved,
+          # it's purely an astroid bug that we can safely ignore.
+          continue
 
   if six.PY3:
     def test_dict_merge(self):

diff --git a/tests/test_tokenless.py b/tests/test_tokenless.py
@@ -98,7 +98,7 @@ def check_node(self, atok, node):
       ast_text = ast.get_source_segment(source, node, padded=padded)
       atok_text = atok.get_text(node, padded=padded)
       if ast_text:
-        if (
+        if sys.version_info < (3, 12) and (
           ast_text.startswith("f") and isinstance(node, (ast.Str, ast.FormattedValue))
           or is_fstring_format_spec(node)
           or (not fstring_positions_work() and is_fstring_internal_node(node))
@@ -120,6 +120,28 @@ def test_lazy_asttext_astroid_errors(self):
     with self.assertRaises(NotImplementedError):
       ASTText(source, tree)
 
+  def test_nested_fstrings(self):
+    f1 = 'f"a {1+2} b {3+4} c"'
+    f2 = "f'd {" + f1 + "} e'"
+    f3 = "f'''{" + f2 + "}{" + f1 + "}'''"
+    f4 = 'f"""{' + f3 + '}"""'
+    s = 'f = ' + f4
+    atok = ASTText(s)
+    self.assertEqual(atok.get_text(atok.tree), s)
+    n4 = atok.tree.body[0].value
+    n3 = n4.values[0].value
+    n2 = n3.values[0].value
+    n1 = n2.values[1].value
+    self.assertEqual(atok.get_text(n4), f4)
+    if fstring_positions_work():
+      self.assertEqual(atok.get_text(n3), f3)
+      self.assertEqual(atok.get_text(n2), f2)
+      self.assertEqual(atok.get_text(n1), f1)
+    else:
+      self.assertEqual(atok.get_text(n3), '')
+      self.assertEqual(atok.get_text(n2), '')
+      self.assertEqual(atok.get_text(n1), '')
+
 
 class TestFstringPositionsWork(unittest.TestCase):
   def test_fstring_positions_work(self):

diff --git a/tests/test_util.py b/tests/test_util.py
@@ -3,6 +3,7 @@
 
 import ast
 import io
+import sys
 import token
 import unittest
 
@@ -122,27 +123,38 @@ def test_combine_tokens():
     from asttokens.util import combine_tokens, patched_generate_tokens
 
     text = "℘·2=1"
-    original_tokens = list(generate_tokens(io.StringIO(text).readline))[:4]
-    assert original_tokens == [
-      TokenInfo(ERRORTOKEN, string='℘', start=(1, 0), end=(1, 1), line='℘·2=1'),
-      TokenInfo(ERRORTOKEN, string='·', start=(1, 1), end=(1, 2), line='℘·2=1'),
-      TokenInfo(NUMBER, string='2', start=(1, 2), end=(1, 3), line='℘·2=1'),
-      TokenInfo(OP, string='=', start=(1, 3), end=(1, 4), line='℘·2=1'),
-    ]
-    assert combine_tokens(original_tokens[:1]) == [
-      TokenInfo(NAME, string='℘', start=(1, 0), end=(1, 1), line='℘·2=1'),
-    ]
-    assert combine_tokens(original_tokens[:2]) == [
-      TokenInfo(NAME, string='℘·', start=(1, 0), end=(1, 2), line='℘·2=1'),
-    ]
-    assert combine_tokens(original_tokens[:3]) == [
-      TokenInfo(NAME, string='℘·2', start=(1, 0), end=(1, 3), line='℘·2=1'),
-    ]
+    original_tokens = []
+    for tok in generate_tokens(io.StringIO(text).readline):
+      original_tokens.append(tok)
+      if tok.type == OP:
+        break
 
-    assert list(patched_generate_tokens(iter(original_tokens))) == [
+    correct_tokens = [
       TokenInfo(NAME, string='℘·2', start=(1, 0), end=(1, 3), line='℘·2=1'),
       TokenInfo(OP, string='=', start=(1, 3), end=(1, 4), line='℘·2=1'),
     ]
+    if sys.version_info >= (3, 12):
+      # The tokenizing bug was fixed in 3.12, so the original tokens are correct,
+      # rather than starting with false ERRORTOKENs.
+      assert original_tokens == correct_tokens
+    else:
+      assert original_tokens == [
+        TokenInfo(ERRORTOKEN, string='℘', start=(1, 0), end=(1, 1), line='℘·2=1'),
+        TokenInfo(ERRORTOKEN, string='·', start=(1, 1), end=(1, 2), line='℘·2=1'),
+        TokenInfo(NUMBER, string='2', start=(1, 2), end=(1, 3), line='℘·2=1'),
+        TokenInfo(OP, string='=', start=(1, 3), end=(1, 4), line='℘·2=1'),
+      ]
+      assert combine_tokens(original_tokens[:1]) == [
+        TokenInfo(NAME, string='℘', start=(1, 0), end=(1, 1), line='℘·2=1'),
+      ]
+      assert combine_tokens(original_tokens[:2]) == [
+        TokenInfo(NAME, string='℘·', start=(1, 0), end=(1, 2), line='℘·2=1'),
+      ]
+      assert combine_tokens(original_tokens[:3]) == [
+        TokenInfo(NAME, string='℘·2', start=(1, 0), end=(1, 3), line='℘·2=1'),
+      ]
+
+    assert list(patched_generate_tokens(iter(original_tokens))) == correct_tokens
     assert list(patched_generate_tokens(iter(original_tokens[:-1]))) == [
       TokenInfo(NAME, string='℘·2', start=(1, 0), end=(1, 3), line='℘·2=1'),
     ]