diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index ecc69e5783a..5eb2a244ee5 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -6,11 +6,24 @@ repos:
       - id: trailing-whitespace
       - id: end-of-file-fixer
       - id: check-yaml
-  # isort should run before black as black sometimes tweaks the isort output
+      - id: debug-statements
+      - id: mixed-line-ending
+    # This wants to go before isort & flake8
+  - repo: https://github.com/myint/autoflake
+    rev: "v1.4"
+    hooks:
+      - id: autoflake # isort should run before black as black sometimes tweaks the isort output
+        args: ["--in-place", "--ignore-init-module-imports"]
   - repo: https://github.com/PyCQA/isort
     rev: 5.10.1
     hooks:
       - id: isort
+  - repo: https://github.com/asottile/pyupgrade
+    rev: v2.31.0
+    hooks:
+      - id: pyupgrade
+        args:
+          - "--py37-plus"
   # https://github.com/python/black#version-control-integration
   - repo: https://github.com/psf/black
     rev: 21.12b0
@@ -47,12 +60,3 @@ repos:
             typing-extensions==3.10.0.0,
             numpy,
           ]
-  # run this occasionally, ref discussion https://github.com/pydata/xarray/pull/3194
-  # - repo: https://github.com/asottile/pyupgrade
-  #   rev: v1.22.1
-  #   hooks:
-  #     - id: pyupgrade
-  #       args:
-  #         - "--py3-only"
-  #         # remove on f-strings in Py3.7
-  #         - "--keep-percent-format"
diff --git a/doc/whats-new.rst b/doc/whats-new.rst
index 89040c6dc5b..8896dd62379 100644
--- a/doc/whats-new.rst
+++ b/doc/whats-new.rst
@@ -78,6 +78,8 @@ Internal Changes
 - Removed internal checks for ``pd.Panel`` (:issue:`6145`).
   By `Matthew Roeschke <https://github.com/mroeschke>`_.
 
+- Add ``pyupgrade`` pre-commit hook (:pull:`6152`).
+  By `Maximilian Roos <https://github.com/max-sixty>`_.
 
 .. _whats-new.0.20.2:
 
diff --git a/xarray/backends/plugins.py b/xarray/backends/plugins.py
index 0a9ffcbda22..f03782321e7 100644
--- a/xarray/backends/plugins.py
+++ b/xarray/backends/plugins.py
@@ -168,10 +168,8 @@ def get_backend(engine):
         backend = engine()
     else:
         raise TypeError(
-            (
-                "engine must be a string or a subclass of "
-                f"xarray.backends.BackendEntrypoint: {engine}"
-            )
+            "engine must be a string or a subclass of "
+            f"xarray.backends.BackendEntrypoint: {engine}"
         )
 
     return backend
diff --git a/xarray/coding/cftime_offsets.py b/xarray/coding/cftime_offsets.py
index 2db6d4e8097..6557590dbb8 100644
--- a/xarray/coding/cftime_offsets.py
+++ b/xarray/coding/cftime_offsets.py
@@ -160,7 +160,7 @@ def rollback(self, date):
             return date - type(self)()
 
     def __str__(self):
-        return "<{}: n={}>".format(type(self).__name__, self.n)
+        return f"<{type(self).__name__}: n={self.n}>"
 
     def __repr__(self):
         return str(self)
@@ -399,10 +399,10 @@ def __mul__(self, other):
         return type(self)(n=other * self.n, month=self.month)
 
     def rule_code(self):
-        return "{}-{}".format(self._freq, _MONTH_ABBREVIATIONS[self.month])
+        return f"{self._freq}-{_MONTH_ABBREVIATIONS[self.month]}"
 
     def __str__(self):
-        return "<{}: n={}, month={}>".format(type(self).__name__, self.n, self.month)
+        return f"<{type(self).__name__}: n={self.n}, month={self.month}>"
 
 
 class QuarterBegin(QuarterOffset):
@@ -485,10 +485,10 @@ def __mul__(self, other):
         return type(self)(n=other * self.n, month=self.month)
 
     def rule_code(self):
-        return "{}-{}".format(self._freq, _MONTH_ABBREVIATIONS[self.month])
+        return f"{self._freq}-{_MONTH_ABBREVIATIONS[self.month]}"
 
     def __str__(self):
-        return "<{}: n={}, month={}>".format(type(self).__name__, self.n, self.month)
+        return f"<{type(self).__name__}: n={self.n}, month={self.month}>"
 
 
 class YearBegin(YearOffset):
@@ -741,7 +741,7 @@ def _generate_linear_range(start, end, periods):
 
     total_seconds = (end - start).total_seconds()
     values = np.linspace(0.0, total_seconds, periods, endpoint=True)
-    units = "seconds since {}".format(format_cftime_datetime(start))
+    units = f"seconds since {format_cftime_datetime(start)}"
     calendar = start.calendar
     return cftime.num2date(
         values, units=units, calendar=calendar, only_use_cftime_datetimes=True
diff --git a/xarray/coding/strings.py b/xarray/coding/strings.py
index aeffab0c2d7..e4b1e906160 100644
--- a/xarray/coding/strings.py
+++ b/xarray/coding/strings.py
@@ -18,7 +18,7 @@
 
 def create_vlen_dtype(element_type):
     if element_type not in (str, bytes):
-        raise TypeError("unsupported type for vlen_dtype: {!r}".format(element_type))
+        raise TypeError(f"unsupported type for vlen_dtype: {element_type!r}")
     # based on h5py.special_dtype
     return np.dtype("O", metadata={"element_type": element_type})
 
@@ -227,7 +227,7 @@ def shape(self):
         return self.array.shape[:-1]
 
     def __repr__(self):
-        return "{}({!r})".format(type(self).__name__, self.array)
+        return f"{type(self).__name__}({self.array!r})"
 
     def __getitem__(self, key):
         # require slicing the last dimension completely
diff --git a/xarray/coding/times.py b/xarray/coding/times.py
index c89b0c100cd..0eb8707f0cc 100644
--- a/xarray/coding/times.py
+++ b/xarray/coding/times.py
@@ -131,8 +131,8 @@ def _ensure_padded_year(ref_date):
     matches_start_digits = re.match(r"(\d+)(.*)", ref_date)
     if not matches_start_digits:
         raise ValueError(f"invalid reference date for time units: {ref_date}")
-    ref_year, everything_else = [s for s in matches_start_digits.groups()]
-    ref_date_padded = "{:04d}{}".format(int(ref_year), everything_else)
+    ref_year, everything_else = (s for s in matches_start_digits.groups())
+    ref_date_padded = f"{int(ref_year):04d}{everything_else}"
 
     warning_msg = (
         f"Ambiguous reference date string: {ref_date}. The first value is "
@@ -155,7 +155,7 @@ def _unpack_netcdf_time_units(units):
     if not matches:
         raise ValueError(f"invalid time units: {units}")
 
-    delta_units, ref_date = [s.strip() for s in matches.groups()]
+    delta_units, ref_date = (s.strip() for s in matches.groups())
     ref_date = _ensure_padded_year(ref_date)
 
     return delta_units, ref_date
@@ -545,7 +545,7 @@ def _should_cftime_be_used(source, target_calendar, use_cftime):
 def _cleanup_netcdf_time_units(units):
     delta, ref_date = _unpack_netcdf_time_units(units)
     try:
-        units = "{} since {}".format(delta, format_timestamp(ref_date))
+        units = f"{delta} since {format_timestamp(ref_date)}"
     except (OutOfBoundsDatetime, ValueError):
         # don't worry about reifying the units if they're out of bounds or
         # formatted badly
diff --git a/xarray/core/common.py b/xarray/core/common.py
index b5dc3bf0e20..039b03aec56 100644
--- a/xarray/core/common.py
+++ b/xarray/core/common.py
@@ -8,16 +8,11 @@
     TYPE_CHECKING,
     Any,
     Callable,
-    Dict,
     Hashable,
     Iterable,
     Iterator,
-    List,
     Mapping,
-    Optional,
-    Tuple,
     TypeVar,
-    Union,
     overload,
 )
 
@@ -164,9 +159,7 @@ def __iter__(self: Any) -> Iterator[Any]:
             raise TypeError("iteration over a 0-d array")
         return self._iter()
 
-    def get_axis_num(
-        self, dim: Union[Hashable, Iterable[Hashable]]
-    ) -> Union[int, Tuple[int, ...]]:
+    def get_axis_num(self, dim: Hashable | Iterable[Hashable]) -> int | tuple[int, ...]:
         """Return axis number(s) corresponding to dimension(s) in this array.
 
         Parameters
@@ -244,7 +237,7 @@ def __getattr__(self, name: str) -> Any:
                 with suppress(KeyError):
                     return source[name]
         raise AttributeError(
-            "{!r} object has no attribute {!r}".format(type(self).__name__, name)
+            f"{type(self).__name__!r} object has no attribute {name!r}"
         )
 
     # This complicated two-method design boosts overall performance of simple operations
@@ -284,37 +277,37 @@ def __setattr__(self, name: str, value: Any) -> None:
                 "assignment (e.g., `ds['name'] = ...`) instead of assigning variables."
             ) from e
 
-    def __dir__(self) -> List[str]:
+    def __dir__(self) -> list[str]:
         """Provide method name lookup and completion. Only provide 'public'
         methods.
         """
-        extra_attrs = set(
+        extra_attrs = {
             item
             for source in self._attr_sources
             for item in source
             if isinstance(item, str)
-        )
+        }
         return sorted(set(dir(type(self))) | extra_attrs)
 
-    def _ipython_key_completions_(self) -> List[str]:
+    def _ipython_key_completions_(self) -> list[str]:
         """Provide method for the key-autocompletions in IPython.
         See http://ipython.readthedocs.io/en/stable/config/integrating.html#tab-completion
         For the details.
         """
-        items = set(
+        items = {
             item
             for source in self._item_sources
             for item in source
             if isinstance(item, str)
-        )
+        }
         return list(items)
 
 
 def get_squeeze_dims(
     xarray_obj,
-    dim: Union[Hashable, Iterable[Hashable], None] = None,
-    axis: Union[int, Iterable[int], None] = None,
-) -> List[Hashable]:
+    dim: Hashable | Iterable[Hashable] | None = None,
+    axis: int | Iterable[int] | None = None,
+) -> list[Hashable]:
     """Get a list of dimensions to squeeze out."""
     if dim is not None and axis is not None:
         raise ValueError("cannot use both parameters `axis` and `dim`")
@@ -346,15 +339,15 @@ def get_squeeze_dims(
 class DataWithCoords(AttrAccessMixin):
     """Shared base class for Dataset and DataArray."""
 
-    _close: Optional[Callable[[], None]]
+    _close: Callable[[], None] | None
 
     __slots__ = ("_close",)
 
     def squeeze(
         self,
-        dim: Union[Hashable, Iterable[Hashable], None] = None,
+        dim: Hashable | Iterable[Hashable] | None = None,
         drop: bool = False,
-        axis: Union[int, Iterable[int], None] = None,
+        axis: int | Iterable[int] | None = None,
     ):
         """Return a new object with squeezed data.
 
@@ -416,8 +409,8 @@ def get_index(self, key: Hashable) -> pd.Index:
             return pd.Index(range(self.sizes[key]), name=key)
 
     def _calc_assign_results(
-        self: C, kwargs: Mapping[Any, Union[T, Callable[[C], T]]]
-    ) -> Dict[Hashable, T]:
+        self: C, kwargs: Mapping[Any, T | Callable[[C], T]]
+    ) -> dict[Hashable, T]:
         return {k: v(self) if callable(v) else v for k, v in kwargs.items()}
 
     def assign_coords(self, coords=None, **coords_kwargs):
@@ -535,7 +528,7 @@ def assign_attrs(self, *args, **kwargs):
 
     def pipe(
         self,
-        func: Union[Callable[..., T], Tuple[Callable[..., T], str]],
+        func: Callable[..., T] | tuple[Callable[..., T], str],
         *args,
         **kwargs,
     ) -> T:
@@ -802,7 +795,7 @@ def groupby_bins(
             },
         )
 
-    def weighted(self: T_DataWithCoords, weights: "DataArray") -> Weighted[T_Xarray]:
+    def weighted(self: T_DataWithCoords, weights: DataArray) -> Weighted[T_Xarray]:
         """
         Weighted operations.
 
@@ -825,7 +818,7 @@ def rolling(
         self,
         dim: Mapping[Any, int] = None,
         min_periods: int = None,
-        center: Union[bool, Mapping[Any, bool]] = False,
+        center: bool | Mapping[Any, bool] = False,
         **window_kwargs: int,
     ):
         """
@@ -940,7 +933,7 @@ def coarsen(
         self,
         dim: Mapping[Any, int] = None,
         boundary: str = "exact",
-        side: Union[str, Mapping[Any, str]] = "left",
+        side: str | Mapping[Any, str] = "left",
         coord_func: str = "mean",
         **window_kwargs: int,
     ):
@@ -1290,7 +1283,7 @@ def where(self, cond, other=dtypes.NA, drop: bool = False):
 
         return ops.where_method(self, cond, other)
 
-    def set_close(self, close: Optional[Callable[[], None]]) -> None:
+    def set_close(self, close: Callable[[], None] | None) -> None:
         """Register the function that releases any resources linked to this object.
 
         This method controls how xarray cleans up resources associated
@@ -1523,20 +1516,20 @@ def __getitem__(self, value):
 
 @overload
 def full_like(
-    other: "Dataset",
+    other: Dataset,
     fill_value,
-    dtype: Union[DTypeLike, Mapping[Any, DTypeLike]] = None,
-) -> "Dataset":
+    dtype: DTypeLike | Mapping[Any, DTypeLike] = None,
+) -> Dataset:
     ...
 
 
 @overload
-def full_like(other: "DataArray", fill_value, dtype: DTypeLike = None) -> "DataArray":
+def full_like(other: DataArray, fill_value, dtype: DTypeLike = None) -> DataArray:
     ...
 
 
 @overload
-def full_like(other: "Variable", fill_value, dtype: DTypeLike = None) -> "Variable":
+def full_like(other: Variable, fill_value, dtype: DTypeLike = None) -> Variable:
     ...
 
 
@@ -1815,9 +1808,9 @@ def ones_like(other, dtype: DTypeLike = None):
 
 def get_chunksizes(
     variables: Iterable[Variable],
-) -> Mapping[Any, Tuple[int, ...]]:
+) -> Mapping[Any, tuple[int, ...]]:
 
-    chunks: Dict[Any, Tuple[int, ...]] = {}
+    chunks: dict[Any, tuple[int, ...]] = {}
     for v in variables:
         if hasattr(v.data, "chunks"):
             for dim, c in v.chunksizes.items():
diff --git a/xarray/core/computation.py b/xarray/core/computation.py
index 5e6340feed2..7273d25253d 100644
--- a/xarray/core/computation.py
+++ b/xarray/core/computation.py
@@ -13,15 +13,10 @@
     AbstractSet,
     Any,
     Callable,
-    Dict,
     Hashable,
     Iterable,
-    List,
     Mapping,
-    Optional,
     Sequence,
-    Tuple,
-    Union,
 )
 
 import numpy as np
@@ -197,7 +192,7 @@ def result_name(objects: list) -> Any:
     return name
 
 
-def _get_coords_list(args) -> List[Coordinates]:
+def _get_coords_list(args) -> list[Coordinates]:
     coords_list = []
     for arg in args:
         try:
@@ -214,7 +209,7 @@ def build_output_coords(
     signature: _UFuncSignature,
     exclude_dims: AbstractSet = frozenset(),
     combine_attrs: str = "override",
-) -> "List[Dict[Any, Variable]]":
+) -> list[dict[Any, Variable]]:
     """Build output coordinates for an operation.
 
     Parameters
@@ -309,11 +304,11 @@ def apply_dataarray_vfunc(
     return out
 
 
-def ordered_set_union(all_keys: List[Iterable]) -> Iterable:
+def ordered_set_union(all_keys: list[Iterable]) -> Iterable:
     return {key: None for keys in all_keys for key in keys}.keys()
 
 
-def ordered_set_intersection(all_keys: List[Iterable]) -> Iterable:
+def ordered_set_intersection(all_keys: list[Iterable]) -> Iterable:
     intersection = set(all_keys[0])
     for keys in all_keys[1:]:
         intersection.intersection_update(keys)
@@ -331,7 +326,7 @@ def assert_and_return_exact_match(all_keys):
     return first_keys
 
 
-_JOINERS: Dict[str, Callable] = {
+_JOINERS: dict[str, Callable] = {
     "inner": ordered_set_intersection,
     "outer": ordered_set_union,
     "left": operator.itemgetter(0),
@@ -340,17 +335,15 @@ def assert_and_return_exact_match(all_keys):
 }
 
 
-def join_dict_keys(
-    objects: Iterable[Union[Mapping, Any]], how: str = "inner"
-) -> Iterable:
+def join_dict_keys(objects: Iterable[Mapping | Any], how: str = "inner") -> Iterable:
     joiner = _JOINERS[how]
     all_keys = [obj.keys() for obj in objects if hasattr(obj, "keys")]
     return joiner(all_keys)
 
 
 def collect_dict_values(
-    objects: Iterable[Union[Mapping, Any]], keys: Iterable, fill_value: object = None
-) -> List[list]:
+    objects: Iterable[Mapping | Any], keys: Iterable, fill_value: object = None
+) -> list[list]:
     return [
         [obj.get(key, fill_value) if is_dict_like(obj) else obj for obj in objects]
         for key in keys
@@ -368,9 +361,9 @@ def _as_variables_or_variable(arg):
 
 
 def _unpack_dict_tuples(
-    result_vars: Mapping[Any, Tuple[Variable, ...]], num_outputs: int
-) -> Tuple[Dict[Hashable, Variable], ...]:
-    out: Tuple[Dict[Hashable, Variable], ...] = tuple({} for _ in range(num_outputs))
+    result_vars: Mapping[Any, tuple[Variable, ...]], num_outputs: int
+) -> tuple[dict[Hashable, Variable], ...]:
+    out: tuple[dict[Hashable, Variable], ...] = tuple({} for _ in range(num_outputs))
     for name, values in result_vars.items():
         for value, results_dict in zip(values, out):
             results_dict[name] = value
@@ -398,7 +391,7 @@ def apply_dict_of_variables_vfunc(
 
 
 def _fast_dataset(
-    variables: Dict[Hashable, Variable], coord_variables: Mapping[Hashable, Variable]
+    variables: dict[Hashable, Variable], coord_variables: Mapping[Hashable, Variable]
 ) -> Dataset:
     """Create a dataset as quickly as possible.
 
@@ -528,9 +521,9 @@ def apply_groupby_func(func, *args):
 
 def unified_dim_sizes(
     variables: Iterable[Variable], exclude_dims: AbstractSet = frozenset()
-) -> Dict[Hashable, int]:
+) -> dict[Hashable, int]:
 
-    dim_sizes: Dict[Hashable, int] = {}
+    dim_sizes: dict[Hashable, int] = {}
 
     for var in variables:
         if len(set(var.dims)) < len(var.dims):
@@ -556,8 +549,8 @@ def unified_dim_sizes(
 
 def broadcast_compat_data(
     variable: Variable,
-    broadcast_dims: Tuple[Hashable, ...],
-    core_dims: Tuple[Hashable, ...],
+    broadcast_dims: tuple[Hashable, ...],
+    core_dims: tuple[Hashable, ...],
 ) -> Any:
     data = variable.data
 
@@ -595,7 +588,7 @@ def broadcast_compat_data(
         data = duck_array_ops.transpose(data, order)
 
     if new_dims != reordered_dims:
-        key_parts: List[Optional[slice]] = []
+        key_parts: list[slice | None] = []
         for dim in new_dims:
             if dim in set_old_dims:
                 key_parts.append(SLICE_NONE)
@@ -810,19 +803,19 @@ def apply_ufunc(
     func: Callable,
     *args: Any,
     input_core_dims: Sequence[Sequence] = None,
-    output_core_dims: Optional[Sequence[Sequence]] = ((),),
+    output_core_dims: Sequence[Sequence] | None = ((),),
     exclude_dims: AbstractSet = frozenset(),
     vectorize: bool = False,
     join: str = "exact",
     dataset_join: str = "exact",
     dataset_fill_value: object = _NO_FILL_VALUE,
-    keep_attrs: Union[bool, str] = None,
-    kwargs: Mapping = None,
+    keep_attrs: bool | str | None = None,
+    kwargs: Mapping | None = None,
     dask: str = "forbidden",
-    output_dtypes: Sequence = None,
-    output_sizes: Mapping[Any, int] = None,
+    output_dtypes: Sequence | None = None,
+    output_sizes: Mapping[Any, int] | None = None,
     meta: Any = None,
-    dask_gufunc_kwargs: Dict[str, Any] = None,
+    dask_gufunc_kwargs: dict[str, Any] | None = None,
 ) -> Any:
     """Apply a vectorized function for unlabeled arrays on xarray objects.
 
@@ -1375,8 +1368,8 @@ def _cov_corr(da_a, da_b, dim=None, ddof=0, method=None):
 
 
 def cross(
-    a: Union[DataArray, Variable], b: Union[DataArray, Variable], *, dim: Hashable
-) -> Union[DataArray, Variable]:
+    a: DataArray | Variable, b: DataArray | Variable, *, dim: Hashable
+) -> DataArray | Variable:
     """
     Compute the cross product of two (arrays of) vectors.
 
@@ -1926,7 +1919,7 @@ def _calc_idxminmax(
     return res
 
 
-def unify_chunks(*objects: T_Xarray) -> Tuple[T_Xarray, ...]:
+def unify_chunks(*objects: T_Xarray) -> tuple[T_Xarray, ...]:
     """
     Given any number of Dataset and/or DataArray objects, returns
     new objects with unified chunk size along all chunked dimensions.
diff --git a/xarray/core/concat.py b/xarray/core/concat.py
index 7ead1918e1a..4621e622d42 100644
--- a/xarray/core/concat.py
+++ b/xarray/core/concat.py
@@ -1,18 +1,6 @@
 from __future__ import annotations
 
-from typing import (
-    TYPE_CHECKING,
-    Dict,
-    Hashable,
-    Iterable,
-    List,
-    Literal,
-    Optional,
-    Set,
-    Tuple,
-    Union,
-    overload,
-)
+from typing import TYPE_CHECKING, Hashable, Iterable, Literal, overload
 
 import pandas as pd
 
@@ -35,31 +23,31 @@
 
 @overload
 def concat(
-    objs: Iterable["Dataset"],
-    dim: Hashable | "DataArray" | pd.Index,
-    data_vars: concat_options | List[Hashable] = "all",
-    coords: concat_options | List[Hashable] = "different",
+    objs: Iterable[Dataset],
+    dim: Hashable | DataArray | pd.Index,
+    data_vars: concat_options | list[Hashable] = "all",
+    coords: concat_options | list[Hashable] = "different",
     compat: compat_options = "equals",
-    positions: Optional[Iterable[int]] = None,
+    positions: Iterable[int] | None = None,
     fill_value: object = dtypes.NA,
     join: str = "outer",
     combine_attrs: str = "override",
-) -> "Dataset":
+) -> Dataset:
     ...
 
 
 @overload
 def concat(
-    objs: Iterable["DataArray"],
-    dim: Hashable | "DataArray" | pd.Index,
-    data_vars: concat_options | List[Hashable] = "all",
-    coords: concat_options | List[Hashable] = "different",
+    objs: Iterable[DataArray],
+    dim: Hashable | DataArray | pd.Index,
+    data_vars: concat_options | list[Hashable] = "all",
+    coords: concat_options | list[Hashable] = "different",
     compat: compat_options = "equals",
-    positions: Optional[Iterable[int]] = None,
+    positions: Iterable[int] | None = None,
     fill_value: object = dtypes.NA,
     join: str = "outer",
     combine_attrs: str = "override",
-) -> "DataArray":
+) -> DataArray:
     ...
 
 
@@ -394,14 +382,14 @@ def process_subset_opt(opt, subset):
 
 # determine dimensional coordinate names and a dict mapping name to DataArray
 def _parse_datasets(
-    datasets: Iterable["Dataset"],
-) -> Tuple[Dict[Hashable, Variable], Dict[Hashable, int], Set[Hashable], Set[Hashable]]:
+    datasets: Iterable[Dataset],
+) -> tuple[dict[Hashable, Variable], dict[Hashable, int], set[Hashable], set[Hashable]]:
 
-    dims: Set[Hashable] = set()
-    all_coord_names: Set[Hashable] = set()
-    data_vars: Set[Hashable] = set()  # list of data_vars
-    dim_coords: Dict[Hashable, Variable] = {}  # maps dim name to variable
-    dims_sizes: Dict[Hashable, int] = {}  # shared dimension sizes to expand variables
+    dims: set[Hashable] = set()
+    all_coord_names: set[Hashable] = set()
+    data_vars: set[Hashable] = set()  # list of data_vars
+    dim_coords: dict[Hashable, Variable] = {}  # maps dim name to variable
+    dims_sizes: dict[Hashable, int] = {}  # shared dimension sizes to expand variables
 
     for ds in datasets:
         dims_sizes.update(ds.dims)
@@ -421,16 +409,16 @@ def _parse_datasets(
 
 
 def _dataset_concat(
-    datasets: List["Dataset"],
-    dim: Union[str, "DataArray", pd.Index],
-    data_vars: Union[str, List[str]],
-    coords: Union[str, List[str]],
+    datasets: list[Dataset],
+    dim: str | DataArray | pd.Index,
+    data_vars: str | list[str],
+    coords: str | list[str],
     compat: str,
-    positions: Optional[Iterable[int]],
+    positions: Iterable[int] | None,
     fill_value: object = dtypes.NA,
     join: str = "outer",
     combine_attrs: str = "override",
-) -> "Dataset":
+) -> Dataset:
     """
     Concatenate a sequence of datasets along a new or existing dimension
     """
@@ -477,7 +465,7 @@ def _dataset_concat(
 
     result_vars = {}
     if variables_to_merge:
-        to_merge: Dict[Hashable, List[Variable]] = {
+        to_merge: dict[Hashable, list[Variable]] = {
             var: [] for var in variables_to_merge
         }
 
@@ -552,16 +540,16 @@ def ensure_common_dims(vars):
 
 
 def _dataarray_concat(
-    arrays: Iterable["DataArray"],
-    dim: Union[str, "DataArray", pd.Index],
-    data_vars: Union[str, List[str]],
-    coords: Union[str, List[str]],
+    arrays: Iterable[DataArray],
+    dim: str | DataArray | pd.Index,
+    data_vars: str | list[str],
+    coords: str | list[str],
     compat: str,
-    positions: Optional[Iterable[int]],
+    positions: Iterable[int] | None,
     fill_value: object = dtypes.NA,
     join: str = "outer",
     combine_attrs: str = "override",
-) -> "DataArray":
+) -> DataArray:
     from .dataarray import DataArray
 
     arrays = list(arrays)
diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py
index 7f29d3b6320..81aaf5a50e0 100644
--- a/xarray/core/dataarray.py
+++ b/xarray/core/dataarray.py
@@ -6,16 +6,11 @@
     TYPE_CHECKING,
     Any,
     Callable,
-    Dict,
     Hashable,
     Iterable,
-    List,
     Literal,
     Mapping,
-    Optional,
     Sequence,
-    Tuple,
-    Union,
     cast,
 )
 
@@ -93,7 +88,7 @@
 
 def _infer_coords_and_dims(
     shape, coords, dims
-) -> "Tuple[Dict[Any, Variable], Tuple[Hashable, ...]]":
+) -> tuple[dict[Any, Variable], tuple[Hashable, ...]]:
     """All the logic for creating a new DataArray"""
 
     if (
@@ -131,7 +126,7 @@ def _infer_coords_and_dims(
             if not isinstance(d, str):
                 raise TypeError(f"dimension {d} is not a string")
 
-    new_coords: Dict[Any, Variable] = {}
+    new_coords: dict[Any, Variable] = {}
 
     if utils.is_dict_like(coords):
         for k, v in coords.items():
@@ -192,10 +187,10 @@ def _check_data_shape(data, coords, dims):
 class _LocIndexer:
     __slots__ = ("data_array",)
 
-    def __init__(self, data_array: "DataArray"):
+    def __init__(self, data_array: DataArray):
         self.data_array = data_array
 
-    def __getitem__(self, key) -> "DataArray":
+    def __getitem__(self, key) -> DataArray:
         if not utils.is_dict_like(key):
             # expand the indexer so we can handle Ellipsis
             labels = indexing.expanded_indexer(key, self.data_array.ndim)
@@ -341,11 +336,11 @@ class DataArray(AbstractArray, DataWithCoords, DataArrayArithmetic):
         units:        degC
     """
 
-    _cache: Dict[str, Any]
-    _coords: Dict[Any, Variable]
-    _close: Optional[Callable[[], None]]
-    _indexes: Optional[Dict[Hashable, Index]]
-    _name: Optional[Hashable]
+    _cache: dict[str, Any]
+    _coords: dict[Any, Variable]
+    _close: Callable[[], None] | None
+    _indexes: dict[Hashable, Index] | None
+    _name: Hashable | None
     _variable: Variable
 
     __slots__ = (
@@ -369,12 +364,12 @@ class DataArray(AbstractArray, DataWithCoords, DataArrayArithmetic):
     def __init__(
         self,
         data: Any = dtypes.NA,
-        coords: Union[Sequence[Tuple], Mapping[Any, Any], None] = None,
-        dims: Union[Hashable, Sequence[Hashable], None] = None,
+        coords: Sequence[tuple] | Mapping[Any, Any] | None = None,
+        dims: Hashable | Sequence[Hashable] | None = None,
         name: Hashable = None,
         attrs: Mapping = None,
         # internal parameters
-        indexes: Dict[Hashable, pd.Index] = None,
+        indexes: dict[Hashable, pd.Index] = None,
         fastpath: bool = False,
     ):
         if fastpath:
@@ -425,7 +420,7 @@ def _replace(
         self: T_DataArray,
         variable: Variable = None,
         coords=None,
-        name: Union[Hashable, None, Default] = _default,
+        name: Hashable | None | Default = _default,
         indexes=None,
     ) -> T_DataArray:
         if variable is None:
@@ -437,8 +432,8 @@ def _replace(
         return type(self)(variable, coords, name=name, fastpath=True, indexes=indexes)
 
     def _replace_maybe_drop_dims(
-        self, variable: Variable, name: Union[Hashable, None, Default] = _default
-    ) -> "DataArray":
+        self, variable: Variable, name: Hashable | None | Default = _default
+    ) -> DataArray:
         if variable.dims == self.dims and variable.shape == self.shape:
             coords = self._coords.copy()
             indexes = self._indexes
@@ -464,7 +459,7 @@ def _replace_maybe_drop_dims(
             )
         return self._replace(variable, coords, name, indexes=indexes)
 
-    def _overwrite_indexes(self, indexes: Mapping[Any, Any]) -> "DataArray":
+    def _overwrite_indexes(self, indexes: Mapping[Any, Any]) -> DataArray:
         if not len(indexes):
             return self
         coords = self._coords.copy()
@@ -473,7 +468,7 @@ def _overwrite_indexes(self, indexes: Mapping[Any, Any]) -> "DataArray":
         obj = self._replace(coords=coords)
 
         # switch from dimension to level names, if necessary
-        dim_names: Dict[Any, str] = {}
+        dim_names: dict[Any, str] = {}
         for dim, idx in indexes.items():
             pd_idx = idx.to_pandas_index()
             if not isinstance(idx, pd.MultiIndex) and pd_idx.name != dim:
@@ -486,8 +481,8 @@ def _to_temp_dataset(self) -> Dataset:
         return self._to_dataset_whole(name=_THIS_ARRAY, shallow_copy=False)
 
     def _from_temp_dataset(
-        self, dataset: Dataset, name: Union[Hashable, None, Default] = _default
-    ) -> "DataArray":
+        self, dataset: Dataset, name: Hashable | None | Default = _default
+    ) -> DataArray:
         variable = dataset._variables.pop(_THIS_ARRAY)
         coords = dataset._variables
         indexes = dataset._indexes
@@ -580,12 +575,12 @@ def to_dataset(
         return result
 
     @property
-    def name(self) -> Optional[Hashable]:
+    def name(self) -> Hashable | None:
         """The name of this array."""
         return self._name
 
     @name.setter
-    def name(self, value: Optional[Hashable]) -> None:
+    def name(self, value: Hashable | None) -> None:
         self._name = value
 
     @property
@@ -598,7 +593,7 @@ def dtype(self) -> np.dtype:
         return self.variable.dtype
 
     @property
-    def shape(self) -> Tuple[int, ...]:
+    def shape(self) -> tuple[int, ...]:
         return self.variable.shape
 
     @property
@@ -687,7 +682,7 @@ def to_index(self) -> pd.Index:
         return self.variable.to_index()
 
     @property
-    def dims(self) -> Tuple[Hashable, ...]:
+    def dims(self) -> tuple[Hashable, ...]:
         """Tuple of dimension names associated with this array.
 
         Note that the type of this property is inconsistent with
@@ -710,11 +705,11 @@ def _item_key_to_dict(self, key: Any) -> Mapping[Hashable, Any]:
         return dict(zip(self.dims, key))
 
     @property
-    def _level_coords(self) -> Dict[Hashable, Hashable]:
+    def _level_coords(self) -> dict[Hashable, Hashable]:
         """Return a mapping of all MultiIndex levels and their corresponding
         coordinate name.
         """
-        level_coords: Dict[Hashable, Hashable] = {}
+        level_coords: dict[Hashable, Hashable] = {}
 
         for cname, var in self._coords.items():
             if var.ndim == 1 and isinstance(var, IndexVariable):
@@ -737,7 +732,7 @@ def _getitem_coord(self, key):
 
         return self._replace_maybe_drop_dims(var, name=key)
 
-    def __getitem__(self, key: Any) -> "DataArray":
+    def __getitem__(self, key: Any) -> DataArray:
         if isinstance(key, str):
             return self._getitem_coord(key)
         else:
@@ -790,7 +785,7 @@ def loc(self) -> _LocIndexer:
 
     @property
     # Key type needs to be `Any` because of mypy#4167
-    def attrs(self) -> Dict[Any, Any]:
+    def attrs(self) -> dict[Any, Any]:
         """Dictionary storing arbitrary metadata with this array."""
         return self.variable.attrs
 
@@ -800,7 +795,7 @@ def attrs(self, value: Mapping[Any, Any]) -> None:
         self.variable.attrs = value  # type: ignore[assignment]
 
     @property
-    def encoding(self) -> Dict[Hashable, Any]:
+    def encoding(self) -> dict[Hashable, Any]:
         """Dictionary of format-specific settings for how this array should be
         serialized."""
         return self.variable.encoding
@@ -837,9 +832,9 @@ def coords(self) -> DataArrayCoordinates:
 
     def reset_coords(
         self,
-        names: Union[Iterable[Hashable], Hashable, None] = None,
+        names: Iterable[Hashable] | Hashable | None = None,
         drop: bool = False,
-    ) -> Union[None, "DataArray", Dataset]:
+    ) -> None | DataArray | Dataset:
         """Given names of coordinates, reset them to become variables.
 
         Parameters
@@ -904,7 +899,7 @@ def _dask_finalize(results, name, func, *args, **kwargs):
         coords = ds._variables
         return DataArray(variable, coords, name=name, fastpath=True)
 
-    def load(self, **kwargs) -> "DataArray":
+    def load(self, **kwargs) -> DataArray:
         """Manually trigger loading of this array's data from disk or a
         remote source into memory and return this array.
 
@@ -928,7 +923,7 @@ def load(self, **kwargs) -> "DataArray":
         self._coords = new._coords
         return self
 
-    def compute(self, **kwargs) -> "DataArray":
+    def compute(self, **kwargs) -> DataArray:
         """Manually trigger loading of this array's data from disk or a
         remote source into memory and return a new array. The original is
         left unaltered.
@@ -950,7 +945,7 @@ def compute(self, **kwargs) -> "DataArray":
         new = self.copy(deep=False)
         return new.load(**kwargs)
 
-    def persist(self, **kwargs) -> "DataArray":
+    def persist(self, **kwargs) -> DataArray:
         """Trigger computation in constituent dask arrays
 
         This keeps them as dask arrays but encourages them to keep data in
@@ -1045,10 +1040,10 @@ def copy(self: T_DataArray, deep: bool = True, data: Any = None) -> T_DataArray:
             indexes = {k: v.copy(deep=deep) for k, v in self._indexes.items()}
         return self._replace(variable, coords, indexes=indexes)
 
-    def __copy__(self) -> "DataArray":
+    def __copy__(self) -> DataArray:
         return self.copy(deep=False)
 
-    def __deepcopy__(self, memo=None) -> "DataArray":
+    def __deepcopy__(self, memo=None) -> DataArray:
         # memo does nothing but is required for compatibility with
         # copy.deepcopy
         return self.copy(deep=True)
@@ -1058,7 +1053,7 @@ def __deepcopy__(self, memo=None) -> "DataArray":
     __hash__ = None  # type: ignore[assignment]
 
     @property
-    def chunks(self) -> Optional[Tuple[Tuple[int, ...], ...]]:
+    def chunks(self) -> tuple[tuple[int, ...], ...] | None:
         """
         Tuple of block lengths for this dataarray's data, in order of dimensions, or None if
         the underlying data is not a dask array.
@@ -1072,7 +1067,7 @@ def chunks(self) -> Optional[Tuple[Tuple[int, ...], ...]]:
         return self.variable.chunks
 
     @property
-    def chunksizes(self) -> Mapping[Any, Tuple[int, ...]]:
+    def chunksizes(self) -> Mapping[Any, tuple[int, ...]]:
         """
         Mapping from dimension names to block lengths for this dataarray's data, or None if
         the underlying data is not a dask array.
@@ -1092,17 +1087,17 @@ def chunksizes(self) -> Mapping[Any, Tuple[int, ...]]:
 
     def chunk(
         self,
-        chunks: Union[
-            int,
-            Literal["auto"],
-            Tuple[int, ...],
-            Tuple[Tuple[int, ...], ...],
-            Mapping[Any, Union[None, int, Tuple[int, ...]]],
-        ] = {},  # {} even though it's technically unsafe, is being used intentionally here (#4667)
+        chunks: (
+            int
+            | Literal["auto"]
+            | tuple[int, ...]
+            | tuple[tuple[int, ...], ...]
+            | Mapping[Any, None | int | tuple[int, ...]]
+        ) = {},  # {} even though it's technically unsafe, is being used intentionally here (#4667)
         name_prefix: str = "xarray-",
         token: str = None,
         lock: bool = False,
-    ) -> "DataArray":
+    ) -> DataArray:
         """Coerce this array's data into a dask arrays with the given chunks.
 
         If this variable is a non-dask array, it will be converted to dask
@@ -1144,7 +1139,7 @@ def isel(
         drop: bool = False,
         missing_dims: str = "raise",
         **indexers_kwargs: Any,
-    ) -> "DataArray":
+    ) -> DataArray:
         """Return a new DataArray whose data is given by integer indexing
         along the specified dimension(s).
 
@@ -1228,7 +1223,7 @@ def sel(
         tolerance=None,
         drop: bool = False,
         **indexers_kwargs: Any,
-    ) -> "DataArray":
+    ) -> DataArray:
         """Return a new DataArray whose data is given by selecting index
         labels along the specified dimension(s).
 
@@ -1341,9 +1336,9 @@ def sel(
 
     def head(
         self,
-        indexers: Union[Mapping[Any, int], int] = None,
+        indexers: Mapping[Any, int] | int | None = None,
         **indexers_kwargs: Any,
-    ) -> "DataArray":
+    ) -> DataArray:
         """Return a new DataArray whose data is given by the the first `n`
         values along the specified dimension(s). Default `n` = 5
 
@@ -1358,9 +1353,9 @@ def head(
 
     def tail(
         self,
-        indexers: Union[Mapping[Any, int], int] = None,
+        indexers: Mapping[Any, int] | int | None = None,
         **indexers_kwargs: Any,
-    ) -> "DataArray":
+    ) -> DataArray:
         """Return a new DataArray whose data is given by the the last `n`
         values along the specified dimension(s). Default `n` = 5
 
@@ -1375,9 +1370,9 @@ def tail(
 
     def thin(
         self,
-        indexers: Union[Mapping[Any, int], int] = None,
+        indexers: Mapping[Any, int] | int | None = None,
         **indexers_kwargs: Any,
-    ) -> "DataArray":
+    ) -> DataArray:
         """Return a new DataArray whose data is given by each `n` value
         along the specified dimension(s).
 
@@ -1391,8 +1386,8 @@ def thin(
         return self._from_temp_dataset(ds)
 
     def broadcast_like(
-        self, other: Union["DataArray", Dataset], exclude: Iterable[Hashable] = None
-    ) -> "DataArray":
+        self, other: DataArray | Dataset, exclude: Iterable[Hashable] | None = None
+    ) -> DataArray:
         """Broadcast this DataArray against another Dataset or DataArray.
 
         This is equivalent to xr.broadcast(other, self)[1]
@@ -1466,12 +1461,12 @@ def broadcast_like(
 
     def reindex_like(
         self,
-        other: Union["DataArray", Dataset],
-        method: str = None,
-        tolerance: Union[Union[int, float], Iterable[Union[int, float]]] = None,
+        other: DataArray | Dataset,
+        method: str | None = None,
+        tolerance: int | float | Iterable[int | float] | None = None,
         copy: bool = True,
         fill_value=dtypes.NA,
-    ) -> "DataArray":
+    ) -> DataArray:
         """Conform this object onto the indexes of another object, filling in
         missing values with ``fill_value``. The default fill value is NaN.
 
@@ -1534,11 +1529,11 @@ def reindex(
         self,
         indexers: Mapping[Any, Any] = None,
         method: str = None,
-        tolerance: Union[Union[int, float], Iterable[Union[int, float]]] = None,
+        tolerance: int | float | Iterable[int | float] | None = None,
         copy: bool = True,
         fill_value=dtypes.NA,
         **indexers_kwargs: Any,
-    ) -> "DataArray":
+    ) -> DataArray:
         """Conform this object onto the indexes of another object, filling in
         missing values with ``fill_value``. The default fill value is NaN.
 
@@ -1634,7 +1629,7 @@ def interp(
         assume_sorted: bool = False,
         kwargs: Mapping[str, Any] = None,
         **coords_kwargs: Any,
-    ) -> "DataArray":
+    ) -> DataArray:
         """Multidimensional interpolation of variables.
 
         Parameters
@@ -1759,11 +1754,11 @@ def interp(
 
     def interp_like(
         self,
-        other: Union["DataArray", Dataset],
+        other: DataArray | Dataset,
         method: str = "linear",
         assume_sorted: bool = False,
         kwargs: Mapping[str, Any] = None,
-    ) -> "DataArray":
+    ) -> DataArray:
         """Interpolate this object onto the coordinates of another object,
         filling out of range values with NaN.
 
@@ -1815,9 +1810,9 @@ def interp_like(
 
     def rename(
         self,
-        new_name_or_name_dict: Union[Hashable, Mapping[Any, Hashable]] = None,
+        new_name_or_name_dict: Hashable | Mapping[Any, Hashable] = None,
         **names: Hashable,
-    ) -> "DataArray":
+    ) -> DataArray:
         """Returns a new DataArray with renamed coordinates or a new name.
 
         Parameters
@@ -1854,7 +1849,7 @@ def rename(
 
     def swap_dims(
         self, dims_dict: Mapping[Any, Hashable] = None, **dims_kwargs
-    ) -> "DataArray":
+    ) -> DataArray:
         """Returns a new DataArray with swapped dimensions.
 
         Parameters
@@ -1911,10 +1906,10 @@ def swap_dims(
 
     def expand_dims(
         self,
-        dim: Union[None, Hashable, Sequence[Hashable], Mapping[Any, Any]] = None,
+        dim: None | Hashable | Sequence[Hashable] | Mapping[Any, Any] = None,
         axis=None,
         **dim_kwargs: Any,
-    ) -> "DataArray":
+    ) -> DataArray:
         """Return a new object with an additional axis (or axes) inserted at
         the corresponding position in the array shape. The new object is a
         view into the underlying array, not a copy.
@@ -1963,10 +1958,10 @@ def expand_dims(
 
     def set_index(
         self,
-        indexes: Mapping[Any, Union[Hashable, Sequence[Hashable]]] = None,
+        indexes: Mapping[Any, Hashable | Sequence[Hashable]] = None,
         append: bool = False,
-        **indexes_kwargs: Union[Hashable, Sequence[Hashable]],
-    ) -> "DataArray":
+        **indexes_kwargs: Hashable | Sequence[Hashable],
+    ) -> DataArray:
         """Set DataArray (multi-)indexes using one or more existing
         coordinates.
 
@@ -2020,9 +2015,9 @@ def set_index(
 
     def reset_index(
         self,
-        dims_or_levels: Union[Hashable, Sequence[Hashable]],
+        dims_or_levels: Hashable | Sequence[Hashable],
         drop: bool = False,
-    ) -> "DataArray":
+    ) -> DataArray:
         """Reset the specified index(es) or multi-index level(s).
 
         Parameters
@@ -2053,7 +2048,7 @@ def reorder_levels(
         self,
         dim_order: Mapping[Any, Sequence[int]] = None,
         **dim_order_kwargs: Sequence[int],
-    ) -> "DataArray":
+    ) -> DataArray:
         """Rearrange index levels using input order.
 
         Parameters
@@ -2088,7 +2083,7 @@ def stack(
         self,
         dimensions: Mapping[Any, Sequence[Hashable]] = None,
         **dimensions_kwargs: Sequence[Hashable],
-    ) -> "DataArray":
+    ) -> DataArray:
         """
         Stack any number of existing dimensions into a single new dimension.
 
@@ -2144,10 +2139,10 @@ def stack(
 
     def unstack(
         self,
-        dim: Union[Hashable, Sequence[Hashable], None] = None,
+        dim: Hashable | Sequence[Hashable] | None = None,
         fill_value: Any = dtypes.NA,
         sparse: bool = False,
-    ) -> "DataArray":
+    ) -> DataArray:
         """
         Unstack existing dimensions corresponding to MultiIndexes into
         multiple new dimensions.
@@ -2278,7 +2273,7 @@ def transpose(
         *dims: Hashable,
         transpose_coords: bool = True,
         missing_dims: str = "raise",
-    ) -> "DataArray":
+    ) -> DataArray:
         """Return a new DataArray object with transposed dimensions.
 
         Parameters
@@ -2315,7 +2310,7 @@ def transpose(
             dims = tuple(utils.infix_dims(dims, self.dims, missing_dims))
         variable = self.variable.transpose(*dims)
         if transpose_coords:
-            coords: Dict[Hashable, Variable] = {}
+            coords: dict[Hashable, Variable] = {}
             for name, coord in self.coords.items():
                 coord_dims = tuple(dim for dim in dims if dim in coord.dims)
                 coords[name] = coord.variable.transpose(*coord_dims)
@@ -2324,12 +2319,12 @@ def transpose(
             return self._replace(variable)
 
     @property
-    def T(self) -> "DataArray":
+    def T(self) -> DataArray:
         return self.transpose()
 
     def drop_vars(
-        self, names: Union[Hashable, Iterable[Hashable]], *, errors: str = "raise"
-    ) -> "DataArray":
+        self, names: Hashable | Iterable[Hashable], *, errors: str = "raise"
+    ) -> DataArray:
         """Returns an array with dropped variables.
 
         Parameters
@@ -2356,7 +2351,7 @@ def drop(
         *,
         errors: str = "raise",
         **labels_kwargs,
-    ) -> "DataArray":
+    ) -> DataArray:
         """Backward compatible method based on `drop_vars` and `drop_sel`
 
         Using either `drop_vars` or `drop_sel` is encouraged
@@ -2375,7 +2370,7 @@ def drop_sel(
         *,
         errors: str = "raise",
         **labels_kwargs,
-    ) -> "DataArray":
+    ) -> DataArray:
         """Drop index labels from this DataArray.
 
         Parameters
@@ -2422,9 +2417,7 @@ def drop_isel(self, indexers=None, **indexers_kwargs):
         dataset = dataset.drop_isel(indexers=indexers, **indexers_kwargs)
         return self._from_temp_dataset(dataset)
 
-    def dropna(
-        self, dim: Hashable, how: str = "any", thresh: int = None
-    ) -> "DataArray":
+    def dropna(self, dim: Hashable, how: str = "any", thresh: int = None) -> DataArray:
         """Returns a new array with dropped labels for missing values along
         the provided dimension.
 
@@ -2446,7 +2439,7 @@ def dropna(
         ds = self._to_temp_dataset().dropna(dim, how=how, thresh=thresh)
         return self._from_temp_dataset(ds)
 
-    def fillna(self, value: Any) -> "DataArray":
+    def fillna(self, value: Any) -> DataArray:
         """Fill missing values in this object.
 
         This operation follows the normal broadcasting and alignment rules that
@@ -2478,13 +2471,13 @@ def interpolate_na(
         dim: Hashable = None,
         method: str = "linear",
         limit: int = None,
-        use_coordinate: Union[bool, str] = True,
-        max_gap: Union[
-            int, float, str, pd.Timedelta, np.timedelta64, datetime.timedelta
-        ] = None,
+        use_coordinate: bool | str = True,
+        max_gap: (
+            int | float | str | pd.Timedelta | np.timedelta64 | datetime.timedelta
+        ) = None,
         keep_attrs: bool = None,
         **kwargs: Any,
-    ) -> "DataArray":
+    ) -> DataArray:
         """Fill in NaNs by interpolating according to different methods.
 
         Parameters
@@ -2589,7 +2582,7 @@ def interpolate_na(
             **kwargs,
         )
 
-    def ffill(self, dim: Hashable, limit: int = None) -> "DataArray":
+    def ffill(self, dim: Hashable, limit: int = None) -> DataArray:
         """Fill NaN values by propogating values forward
 
         *Requires bottleneck.*
@@ -2614,7 +2607,7 @@ def ffill(self, dim: Hashable, limit: int = None) -> "DataArray":
 
         return ffill(self, dim, limit=limit)
 
-    def bfill(self, dim: Hashable, limit: int = None) -> "DataArray":
+    def bfill(self, dim: Hashable, limit: int = None) -> DataArray:
         """Fill NaN values by propogating values backward
 
         *Requires bottleneck.*
@@ -2639,7 +2632,7 @@ def bfill(self, dim: Hashable, limit: int = None) -> "DataArray":
 
         return bfill(self, dim, limit=limit)
 
-    def combine_first(self, other: "DataArray") -> "DataArray":
+    def combine_first(self, other: DataArray) -> DataArray:
         """Combine two DataArray objects, with union of coordinates.
 
         This operation follows the normal broadcasting and alignment rules of
@@ -2660,12 +2653,12 @@ def combine_first(self, other: "DataArray") -> "DataArray":
     def reduce(
         self,
         func: Callable[..., Any],
-        dim: Union[None, Hashable, Sequence[Hashable]] = None,
-        axis: Union[None, int, Sequence[int]] = None,
+        dim: None | Hashable | Sequence[Hashable] = None,
+        axis: None | int | Sequence[int] = None,
         keep_attrs: bool = None,
         keepdims: bool = False,
         **kwargs: Any,
-    ) -> "DataArray":
+    ) -> DataArray:
         """Reduce this array by applying `func` along some dimension(s).
 
         Parameters
@@ -2702,7 +2695,7 @@ def reduce(
         var = self.variable.reduce(func, dim, axis, keep_attrs, keepdims, **kwargs)
         return self._replace_maybe_drop_dims(var)
 
-    def to_pandas(self) -> Union["DataArray", pd.Series, pd.DataFrame]:
+    def to_pandas(self) -> DataArray | pd.Series | pd.DataFrame:
         """Convert this array into a pandas object with the same shape.
 
         The type of the returned object depends on the number of DataArray
@@ -2730,7 +2723,7 @@ def to_pandas(self) -> Union["DataArray", pd.Series, pd.DataFrame]:
         return constructor(self.values, *indexes)
 
     def to_dataframe(
-        self, name: Hashable = None, dim_order: List[Hashable] = None
+        self, name: Hashable = None, dim_order: list[Hashable] = None
     ) -> pd.DataFrame:
         """Convert this array and its coordinates into a tidy pandas.DataFrame.
 
@@ -2819,7 +2812,7 @@ def to_masked_array(self, copy: bool = True) -> np.ma.MaskedArray:
         isnull = pd.isnull(values)
         return np.ma.MaskedArray(data=values, mask=isnull, copy=copy)
 
-    def to_netcdf(self, *args, **kwargs) -> Union[bytes, "Delayed", None]:
+    def to_netcdf(self, *args, **kwargs) -> bytes | Delayed | None:
         """Write DataArray contents to a netCDF file.
 
         All parameters are passed directly to :py:meth:`xarray.Dataset.to_netcdf`.
@@ -2878,7 +2871,7 @@ def to_dict(self, data: bool = True) -> dict:
         return d
 
     @classmethod
-    def from_dict(cls, d: dict) -> "DataArray":
+    def from_dict(cls, d: dict) -> DataArray:
         """
         Convert a dictionary into an xarray.DataArray
 
@@ -2934,7 +2927,7 @@ def from_dict(cls, d: dict) -> "DataArray":
         return obj
 
     @classmethod
-    def from_series(cls, series: pd.Series, sparse: bool = False) -> "DataArray":
+    def from_series(cls, series: pd.Series, sparse: bool = False) -> DataArray:
         """Convert a pandas.Series into an xarray.DataArray.
 
         If the series's index is a MultiIndex, it will be expanded into a
@@ -2956,33 +2949,33 @@ def from_series(cls, series: pd.Series, sparse: bool = False) -> "DataArray":
         result.name = series.name
         return result
 
-    def to_cdms2(self) -> "cdms2_Variable":
+    def to_cdms2(self) -> cdms2_Variable:
         """Convert this array into a cdms2.Variable"""
         from ..convert import to_cdms2
 
         return to_cdms2(self)
 
     @classmethod
-    def from_cdms2(cls, variable: "cdms2_Variable") -> "DataArray":
+    def from_cdms2(cls, variable: cdms2_Variable) -> DataArray:
         """Convert a cdms2.Variable into an xarray.DataArray"""
         from ..convert import from_cdms2
 
         return from_cdms2(variable)
 
-    def to_iris(self) -> "iris_Cube":
+    def to_iris(self) -> iris_Cube:
         """Convert this array into a iris.cube.Cube"""
         from ..convert import to_iris
 
         return to_iris(self)
 
     @classmethod
-    def from_iris(cls, cube: "iris_Cube") -> "DataArray":
+    def from_iris(cls, cube: iris_Cube) -> DataArray:
         """Convert a iris.cube.Cube into an xarray.DataArray"""
         from ..convert import from_iris
 
         return from_iris(cube)
 
-    def _all_compat(self, other: "DataArray", compat_str: str) -> bool:
+    def _all_compat(self, other: DataArray, compat_str: str) -> bool:
         """Helper function for equals, broadcast_equals, and identical"""
 
         def compat(x, y):
@@ -2992,7 +2985,7 @@ def compat(x, y):
             self, other
         )
 
-    def broadcast_equals(self, other: "DataArray") -> bool:
+    def broadcast_equals(self, other: DataArray) -> bool:
         """Two DataArrays are broadcast equal if they are equal after
         broadcasting them against each other such that they have the same
         dimensions.
@@ -3007,7 +3000,7 @@ def broadcast_equals(self, other: "DataArray") -> bool:
         except (TypeError, AttributeError):
             return False
 
-    def equals(self, other: "DataArray") -> bool:
+    def equals(self, other: DataArray) -> bool:
         """True if two DataArrays have the same dimensions, coordinates and
         values; otherwise False.
 
@@ -3027,7 +3020,7 @@ def equals(self, other: "DataArray") -> bool:
         except (TypeError, AttributeError):
             return False
 
-    def identical(self, other: "DataArray") -> bool:
+    def identical(self, other: DataArray) -> bool:
         """Like equals, but also checks the array name and attributes, and
         attributes on all coordinates.
 
@@ -3041,7 +3034,7 @@ def identical(self, other: "DataArray") -> bool:
         except (TypeError, AttributeError):
             return False
 
-    def _result_name(self, other: Any = None) -> Optional[Hashable]:
+    def _result_name(self, other: Any = None) -> Hashable | None:
         # use the same naming heuristics as pandas:
         # https://github.com/ContinuumIO/blaze/issues/458#issuecomment-51936356
         other_name = getattr(other, "name", _default)
@@ -3050,7 +3043,7 @@ def _result_name(self, other: Any = None) -> Optional[Hashable]:
         else:
             return None
 
-    def __array_wrap__(self, obj, context=None) -> "DataArray":
+    def __array_wrap__(self, obj, context=None) -> DataArray:
         new_var = self.variable.__array_wrap__(obj, context)
         return self._replace(new_var)
 
@@ -3124,7 +3117,7 @@ def _inplace_binary_op(self, other, f: Callable):
             ) from exc
         return self
 
-    def _copy_attrs_from(self, other: Union["DataArray", Dataset, Variable]) -> None:
+    def _copy_attrs_from(self, other: DataArray | Dataset | Variable) -> None:
         self.attrs = other.attrs
 
     plot = utils.UncachedAccessor(_PlotMethods)
@@ -3162,7 +3155,7 @@ def _title_for_slice(self, truncate: int = 50) -> str:
 
         return title
 
-    def diff(self, dim: Hashable, n: int = 1, label: Hashable = "upper") -> "DataArray":
+    def diff(self, dim: Hashable, n: int = 1, label: Hashable = "upper") -> DataArray:
         """Calculate the n-th order discrete difference along given axis.
 
         Parameters
@@ -3213,7 +3206,7 @@ def shift(
         shifts: Mapping[Any, int] = None,
         fill_value: Any = dtypes.NA,
         **shifts_kwargs: int,
-    ) -> "DataArray":
+    ) -> DataArray:
         """Shift this DataArray by an offset along one or more dimensions.
 
         Only the data is moved; coordinates stay in place. This is consistent
@@ -3263,7 +3256,7 @@ def roll(
         shifts: Mapping[Hashable, int] = None,
         roll_coords: bool = False,
         **shifts_kwargs: int,
-    ) -> "DataArray":
+    ) -> DataArray:
         """Roll this array by an offset along one or more dimensions.
 
         Unlike shift, roll treats the given dimensions as periodic, so will not
@@ -3308,16 +3301,16 @@ def roll(
         return self._from_temp_dataset(ds)
 
     @property
-    def real(self) -> "DataArray":
+    def real(self) -> DataArray:
         return self._replace(self.variable.real)
 
     @property
-    def imag(self) -> "DataArray":
+    def imag(self) -> DataArray:
         return self._replace(self.variable.imag)
 
     def dot(
-        self, other: "DataArray", dims: Union[Hashable, Sequence[Hashable], None] = None
-    ) -> "DataArray":
+        self, other: DataArray, dims: Hashable | Sequence[Hashable] | None = None
+    ) -> DataArray:
         """Perform dot product of two DataArrays along their shared dims.
 
         Equivalent to taking taking tensordot over all shared dims.
@@ -3369,9 +3362,9 @@ def dot(
 
     def sortby(
         self,
-        variables: Union[Hashable, "DataArray", Sequence[Union[Hashable, "DataArray"]]],
+        variables: Hashable | DataArray | Sequence[Hashable | DataArray],
         ascending: bool = True,
-    ) -> "DataArray":
+    ) -> DataArray:
         """Sort object by labels or values (along an axis).
 
         Sorts the dataarray, either along specified dimensions,
@@ -3434,11 +3427,11 @@ def sortby(
     def quantile(
         self,
         q: Any,
-        dim: Union[Hashable, Sequence[Hashable], None] = None,
+        dim: Hashable | Sequence[Hashable] | None = None,
         interpolation: str = "linear",
         keep_attrs: bool = None,
         skipna: bool = True,
-    ) -> "DataArray":
+    ) -> DataArray:
         """Compute the qth quantile of the data along the specified dimension.
 
         Returns the qth quantiles(s) of the array elements.
@@ -3525,7 +3518,7 @@ def quantile(
 
     def rank(
         self, dim: Hashable, pct: bool = False, keep_attrs: bool = None
-    ) -> "DataArray":
+    ) -> DataArray:
         """Ranks the data.
 
         Equal values are assigned a rank that is the average of the ranks that
@@ -3566,7 +3559,7 @@ def rank(
 
     def differentiate(
         self, coord: Hashable, edge_order: int = 1, datetime_unit: str = None
-    ) -> "DataArray":
+    ) -> DataArray:
         """ Differentiate the array with the second order accurate central
         differences.
 
@@ -3625,9 +3618,9 @@ def differentiate(
 
     def integrate(
         self,
-        coord: Union[Hashable, Sequence[Hashable]] = None,
+        coord: Hashable | Sequence[Hashable] = None,
         datetime_unit: str = None,
-    ) -> "DataArray":
+    ) -> DataArray:
         """Integrate along the given coordinate using the trapezoidal rule.
 
         .. note::
@@ -3679,9 +3672,9 @@ def integrate(
 
     def cumulative_integrate(
         self,
-        coord: Union[Hashable, Sequence[Hashable]] = None,
+        coord: Hashable | Sequence[Hashable] = None,
         datetime_unit: str = None,
-    ) -> "DataArray":
+    ) -> DataArray:
         """Integrate cumulatively along the given coordinate using the trapezoidal rule.
 
         .. note::
@@ -3739,7 +3732,7 @@ def cumulative_integrate(
         ds = self._to_temp_dataset().cumulative_integrate(coord, datetime_unit)
         return self._from_temp_dataset(ds)
 
-    def unify_chunks(self) -> "DataArray":
+    def unify_chunks(self) -> DataArray:
         """Unify chunk size along all chunked dimensions of this DataArray.
 
         Returns
@@ -3757,8 +3750,8 @@ def map_blocks(
         self,
         func: Callable[..., T_Xarray],
         args: Sequence[Any] = (),
-        kwargs: Mapping[str, Any] = None,
-        template: Union["DataArray", "Dataset"] = None,
+        kwargs: Mapping[str, Any] | None = None,
+        template: DataArray | Dataset | None = None,
     ) -> T_Xarray:
         """
         Apply a function to each block of this DataArray.
@@ -3861,9 +3854,9 @@ def polyfit(
         self,
         dim: Hashable,
         deg: int,
-        skipna: bool = None,
-        rcond: float = None,
-        w: Union[Hashable, Any] = None,
+        skipna: bool | None = None,
+        rcond: float | None = None,
+        w: Hashable | Any | None = None,
         full: bool = False,
         cov: bool = False,
     ):
@@ -3924,16 +3917,18 @@ def polyfit(
 
     def pad(
         self,
-        pad_width: Mapping[Any, Union[int, Tuple[int, int]]] = None,
+        pad_width: Mapping[Any, int | tuple[int, int]] | None = None,
         mode: str = "constant",
-        stat_length: Union[int, Tuple[int, int], Mapping[Any, Tuple[int, int]]] = None,
-        constant_values: Union[
-            int, Tuple[int, int], Mapping[Any, Tuple[int, int]]
-        ] = None,
-        end_values: Union[int, Tuple[int, int], Mapping[Any, Tuple[int, int]]] = None,
-        reflect_type: str = None,
+        stat_length: int
+        | tuple[int, int]
+        | Mapping[Any, tuple[int, int]]
+        | None = None,
+        constant_values: (int | tuple[int, int] | Mapping[Any, tuple[int, int]])
+        | None = None,
+        end_values: int | tuple[int, int] | Mapping[Any, tuple[int, int]] | None = None,
+        reflect_type: str | None = None,
         **pad_width_kwargs: Any,
-    ) -> "DataArray":
+    ) -> DataArray:
         """Pad this array along one or more dimensions.
 
         .. warning::
@@ -4092,7 +4087,7 @@ def idxmin(
         skipna: bool = None,
         fill_value: Any = dtypes.NA,
         keep_attrs: bool = None,
-    ) -> "DataArray":
+    ) -> DataArray:
         """Return the coordinate label of the minimum value along a dimension.
 
         Returns a new `DataArray` named after the dimension with the values of
@@ -4188,7 +4183,7 @@ def idxmax(
         skipna: bool = None,
         fill_value: Any = dtypes.NA,
         keep_attrs: bool = None,
-    ) -> "DataArray":
+    ) -> DataArray:
         """Return the coordinate label of the maximum value along a dimension.
 
         Returns a new `DataArray` named after the dimension with the values of
@@ -4280,11 +4275,11 @@ def idxmax(
 
     def argmin(
         self,
-        dim: Union[Hashable, Sequence[Hashable]] = None,
+        dim: Hashable | Sequence[Hashable] = None,
         axis: int = None,
         keep_attrs: bool = None,
         skipna: bool = None,
-    ) -> Union["DataArray", Dict[Hashable, "DataArray"]]:
+    ) -> DataArray | dict[Hashable, DataArray]:
         """Index or indices of the minimum of the DataArray over one or more dimensions.
 
         If a sequence is passed to 'dim', then result returned as dict of DataArrays,
@@ -4383,11 +4378,11 @@ def argmin(
 
     def argmax(
         self,
-        dim: Union[Hashable, Sequence[Hashable]] = None,
+        dim: Hashable | Sequence[Hashable] = None,
         axis: int = None,
         keep_attrs: bool = None,
         skipna: bool = None,
-    ) -> Union["DataArray", Dict[Hashable, "DataArray"]]:
+    ) -> DataArray | dict[Hashable, DataArray]:
         """Index or indices of the maximum of the DataArray over one or more dimensions.
 
         If a sequence is passed to 'dim', then result returned as dict of DataArrays,
@@ -4491,7 +4486,7 @@ def query(
         engine: str = None,
         missing_dims: str = "raise",
         **queries_kwargs: Any,
-    ) -> "DataArray":
+    ) -> DataArray:
         """Return a new data array indexed along the specified
         dimension(s), where the indexers are given as strings containing
         Python expressions to be evaluated against the values in the array.
@@ -4561,14 +4556,14 @@ def query(
 
     def curvefit(
         self,
-        coords: Union[Union[str, "DataArray"], Iterable[Union[str, "DataArray"]]],
+        coords: str | DataArray | Iterable[str | DataArray],
         func: Callable[..., Any],
-        reduce_dims: Union[Hashable, Iterable[Hashable]] = None,
+        reduce_dims: Hashable | Iterable[Hashable] = None,
         skipna: bool = True,
-        p0: Dict[str, Any] = None,
-        bounds: Dict[str, Any] = None,
+        p0: dict[str, Any] = None,
+        bounds: dict[str, Any] = None,
         param_names: Sequence[str] = None,
-        kwargs: Dict[str, Any] = None,
+        kwargs: dict[str, Any] = None,
     ):
         """
         Curve fitting optimization for arbitrary functions.
@@ -4640,10 +4635,7 @@ def curvefit(
     def drop_duplicates(
         self,
         dim: Hashable,
-        keep: Union[
-            str,
-            bool,
-        ] = "first",
+        keep: (str | bool) = "first",
     ):
         """Returns a new DataArray with duplicate dimension values removed.
 
@@ -4669,10 +4661,10 @@ def convert_calendar(
         self,
         calendar: str,
         dim: str = "time",
-        align_on: Optional[str] = None,
-        missing: Optional[Any] = None,
-        use_cftime: Optional[bool] = None,
-    ) -> "DataArray":
+        align_on: str | None = None,
+        missing: Any | None = None,
+        use_cftime: bool | None = None,
+    ) -> DataArray:
         """Convert the DataArray to another calendar.
 
         Only converts the individual timestamps, does not modify any data except
@@ -4790,9 +4782,9 @@ def convert_calendar(
 
     def interp_calendar(
         self,
-        target: Union[pd.DatetimeIndex, CFTimeIndex, "DataArray"],
+        target: pd.DatetimeIndex | CFTimeIndex | DataArray,
         dim: str = "time",
-    ) -> "DataArray":
+    ) -> DataArray:
         """Interpolates the DataArray to another calendar based on decimal year measure.
 
         Each timestamp in `source` and `target` are first converted to their decimal
diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py
index 4e8001ca389..26ef95f64f9 100644
--- a/xarray/core/dataset.py
+++ b/xarray/core/dataset.py
@@ -1,3 +1,5 @@
+from __future__ import annotations
+
 import copy
 import datetime
 import inspect
@@ -14,18 +16,12 @@
     Callable,
     Collection,
     DefaultDict,
-    Dict,
     Hashable,
     Iterable,
     Iterator,
-    List,
     Mapping,
     MutableMapping,
-    Optional,
     Sequence,
-    Set,
-    Tuple,
-    Union,
     cast,
     overload,
 )
@@ -144,7 +140,7 @@
 
 def _get_virtual_variable(
     variables, key: Hashable, level_vars: Mapping = None, dim_sizes: Mapping = None
-) -> Tuple[Hashable, Hashable, Variable]:
+) -> tuple[Hashable, Hashable, Variable]:
     """Get a virtual variable (e.g., 'time.year' or a MultiIndex level)
     from a dict of xarray.Variable objects (if possible)
     """
@@ -162,7 +158,7 @@ def _get_virtual_variable(
         raise KeyError(key)
 
     split_key = key.split(".", 1)
-    var_name: Optional[str]
+    var_name: str | None
     if len(split_key) == 2:
         ref_name, var_name = split_key
     elif len(split_key) == 1:
@@ -190,13 +186,13 @@ def _get_virtual_variable(
     return ref_name, var_name, virtual_var
 
 
-def calculate_dimensions(variables: Mapping[Any, Variable]) -> Dict[Hashable, int]:
+def calculate_dimensions(variables: Mapping[Any, Variable]) -> dict[Hashable, int]:
     """Calculate the dimensions corresponding to a set of variables.
 
     Returns dictionary mapping from dimension names to sizes. Raises ValueError
     if any of the dimension sizes conflict.
     """
-    dims: Dict[Hashable, int] = {}
+    dims: dict[Hashable, int] = {}
     last_used = {}
     scalar_vars = {k for k, v in variables.items() if not v.dims}
     for k, var in variables.items():
@@ -217,28 +213,28 @@ def calculate_dimensions(variables: Mapping[Any, Variable]) -> Dict[Hashable, in
 
 
 def merge_indexes(
-    indexes: Mapping[Any, Union[Hashable, Sequence[Hashable]]],
+    indexes: Mapping[Any, Hashable | Sequence[Hashable]],
     variables: Mapping[Any, Variable],
-    coord_names: Set[Hashable],
+    coord_names: set[Hashable],
     append: bool = False,
-) -> Tuple[Dict[Hashable, Variable], Set[Hashable]]:
+) -> tuple[dict[Hashable, Variable], set[Hashable]]:
     """Merge variables into multi-indexes.
 
     Not public API. Used in Dataset and DataArray set_index
     methods.
     """
-    vars_to_replace: Dict[Hashable, Variable] = {}
-    vars_to_remove: List[Hashable] = []
-    dims_to_replace: Dict[Hashable, Hashable] = {}
+    vars_to_replace: dict[Hashable, Variable] = {}
+    vars_to_remove: list[Hashable] = []
+    dims_to_replace: dict[Hashable, Hashable] = {}
     error_msg = "{} is not the name of an existing variable."
 
     for dim, var_names in indexes.items():
         if isinstance(var_names, str) or not isinstance(var_names, Sequence):
             var_names = [var_names]
 
-        names: List[Hashable] = []
-        codes: List[List[int]] = []
-        levels: List[List[int]] = []
+        names: list[Hashable] = []
+        codes: list[list[int]] = []
+        levels: list[list[int]] = []
         current_index_variable = variables.get(dim)
 
         for n in var_names:
@@ -301,12 +297,12 @@ def merge_indexes(
 
 
 def split_indexes(
-    dims_or_levels: Union[Hashable, Sequence[Hashable]],
+    dims_or_levels: Hashable | Sequence[Hashable],
     variables: Mapping[Any, Variable],
-    coord_names: Set[Hashable],
+    coord_names: set[Hashable],
     level_coords: Mapping[Any, Hashable],
     drop: bool = False,
-) -> Tuple[Dict[Hashable, Variable], Set[Hashable]]:
+) -> tuple[dict[Hashable, Variable], set[Hashable]]:
     """Extract (multi-)indexes (levels) as variables.
 
     Not public API. Used in Dataset and DataArray reset_index
@@ -315,7 +311,7 @@ def split_indexes(
     if isinstance(dims_or_levels, str) or not isinstance(dims_or_levels, Sequence):
         dims_or_levels = [dims_or_levels]
 
-    dim_levels: DefaultDict[Any, List[Hashable]] = defaultdict(list)
+    dim_levels: DefaultDict[Any, list[Hashable]] = defaultdict(list)
     dims = []
     for k in dims_or_levels:
         if k in level_coords:
@@ -324,7 +320,7 @@ def split_indexes(
             dims.append(k)
 
     vars_to_replace = {}
-    vars_to_create: Dict[Hashable, Variable] = {}
+    vars_to_create: dict[Hashable, Variable] = {}
     vars_to_remove = []
 
     for d in dims:
@@ -447,7 +443,7 @@ def _maybe_chunk(
         return var
 
 
-def as_dataset(obj: Any) -> "Dataset":
+def as_dataset(obj: Any) -> Dataset:
     """Cast the given object to a Dataset.
 
     Handles Datasets, DataArrays and dictionaries of variables. A new Dataset
@@ -520,7 +516,7 @@ def _initialize_feasible(lb, ub):
 class DataVariables(Mapping[Any, "DataArray"]):
     __slots__ = ("_dataset",)
 
-    def __init__(self, dataset: "Dataset"):
+    def __init__(self, dataset: Dataset):
         self._dataset = dataset
 
     def __iter__(self) -> Iterator[Hashable]:
@@ -536,7 +532,7 @@ def __len__(self) -> int:
     def __contains__(self, key: Hashable) -> bool:
         return key in self._dataset._variables and key not in self._dataset._coord_names
 
-    def __getitem__(self, key: Hashable) -> "DataArray":
+    def __getitem__(self, key: Hashable) -> DataArray:
         if key not in self._dataset._coord_names:
             return cast("DataArray", self._dataset[key])
         raise KeyError(key)
@@ -561,10 +557,10 @@ def _ipython_key_completions_(self):
 class _LocIndexer:
     __slots__ = ("dataset",)
 
-    def __init__(self, dataset: "Dataset"):
+    def __init__(self, dataset: Dataset):
         self.dataset = dataset
 
-    def __getitem__(self, key: Mapping[Any, Any]) -> "Dataset":
+    def __getitem__(self, key: Mapping[Any, Any]) -> Dataset:
         if not utils.is_dict_like(key):
             raise TypeError("can only lookup dictionaries from Dataset.loc")
         return self.dataset.sel(key)
@@ -704,14 +700,14 @@ class Dataset(DataWithCoords, DatasetArithmetic, Mapping):
         description:  Weather related data.
     """
 
-    _attrs: Optional[Dict[Hashable, Any]]
-    _cache: Dict[str, Any]
-    _coord_names: Set[Hashable]
-    _dims: Dict[Hashable, int]
-    _encoding: Optional[Dict[Hashable, Any]]
-    _close: Optional[Callable[[], None]]
-    _indexes: Optional[Dict[Hashable, Index]]
-    _variables: Dict[Hashable, Variable]
+    _attrs: dict[Hashable, Any] | None
+    _cache: dict[str, Any]
+    _coord_names: set[Hashable]
+    _dims: dict[Hashable, int]
+    _encoding: dict[Hashable, Any] | None
+    _close: Callable[[], None] | None
+    _indexes: dict[Hashable, Index] | None
+    _variables: dict[Hashable, Variable]
 
     __slots__ = (
         "_attrs",
@@ -768,7 +764,7 @@ def __init__(
         self._indexes = indexes
 
     @classmethod
-    def load_store(cls, store, decoder=None) -> "Dataset":
+    def load_store(cls, store, decoder=None) -> Dataset:
         """Create a new dataset from the contents of a backends.*DataStore
         object
         """
@@ -791,7 +787,7 @@ def variables(self) -> Mapping[Hashable, Variable]:
         return Frozen(self._variables)
 
     @property
-    def attrs(self) -> Dict[Hashable, Any]:
+    def attrs(self) -> dict[Hashable, Any]:
         """Dictionary of global attributes on this dataset"""
         if self._attrs is None:
             self._attrs = {}
@@ -802,7 +798,7 @@ def attrs(self, value: Mapping[Any, Any]) -> None:
         self._attrs = dict(value)
 
     @property
-    def encoding(self) -> Dict:
+    def encoding(self) -> dict:
         """Dictionary of global encoding attributes on this dataset"""
         if self._encoding is None:
             self._encoding = {}
@@ -839,7 +835,7 @@ def sizes(self) -> Mapping[Hashable, int]:
         """
         return self.dims
 
-    def load(self, **kwargs) -> "Dataset":
+    def load(self, **kwargs) -> Dataset:
         """Manually trigger loading and/or computation of this dataset's data
         from disk or a remote source into memory and return this dataset.
         Unlike compute, the original dataset is modified and returned.
@@ -913,11 +909,11 @@ def __dask_layers__(self):
         import dask
 
         return sum(
-            [
+            (
                 v.__dask_layers__()
                 for v in self.variables.values()
                 if dask.is_dask_collection(v)
-            ],
+            ),
             (),
         )
 
@@ -939,7 +935,7 @@ def __dask_postcompute__(self):
     def __dask_postpersist__(self):
         return self._dask_postpersist, ()
 
-    def _dask_postcompute(self, results: "Iterable[Variable]") -> "Dataset":
+    def _dask_postcompute(self, results: Iterable[Variable]) -> Dataset:
         import dask
 
         variables = {}
@@ -963,7 +959,7 @@ def _dask_postcompute(self, results: "Iterable[Variable]") -> "Dataset":
 
     def _dask_postpersist(
         self, dsk: Mapping, *, rename: Mapping[str, str] = None
-    ) -> "Dataset":
+    ) -> Dataset:
         from dask import is_dask_collection
         from dask.highlevelgraph import HighLevelGraph
         from dask.optimization import cull
@@ -1012,7 +1008,7 @@ def _dask_postpersist(
             self._close,
         )
 
-    def compute(self, **kwargs) -> "Dataset":
+    def compute(self, **kwargs) -> Dataset:
         """Manually trigger loading and/or computation of this dataset's data
         from disk or a remote source into memory and return a new dataset.
         Unlike load, the original dataset is left unaltered.
@@ -1034,7 +1030,7 @@ def compute(self, **kwargs) -> "Dataset":
         new = self.copy(deep=False)
         return new.load(**kwargs)
 
-    def _persist_inplace(self, **kwargs) -> "Dataset":
+    def _persist_inplace(self, **kwargs) -> Dataset:
         """Persist all Dask arrays in memory"""
         # access .data to coerce everything to numpy or dask arrays
         lazy_data = {
@@ -1051,7 +1047,7 @@ def _persist_inplace(self, **kwargs) -> "Dataset":
 
         return self
 
-    def persist(self, **kwargs) -> "Dataset":
+    def persist(self, **kwargs) -> Dataset:
         """Trigger computation, keeping data as dask arrays
 
         This operation can be used to trigger computation on underlying dask
@@ -1075,14 +1071,14 @@ def persist(self, **kwargs) -> "Dataset":
     @classmethod
     def _construct_direct(
         cls,
-        variables: Dict[Any, Variable],
-        coord_names: Set[Hashable],
-        dims: Dict[Any, int] = None,
-        attrs: Dict = None,
-        indexes: Dict[Any, Index] = None,
-        encoding: Dict = None,
+        variables: dict[Any, Variable],
+        coord_names: set[Hashable],
+        dims: dict[Any, int] = None,
+        attrs: dict = None,
+        indexes: dict[Any, Index] = None,
+        encoding: dict = None,
         close: Callable[[], None] = None,
-    ) -> "Dataset":
+    ) -> Dataset:
         """Shortcut around __init__ for internal use when we want to skip
         costly validation
         """
@@ -1100,14 +1096,14 @@ def _construct_direct(
 
     def _replace(
         self,
-        variables: Dict[Hashable, Variable] = None,
-        coord_names: Set[Hashable] = None,
-        dims: Dict[Any, int] = None,
-        attrs: Union[Dict[Hashable, Any], None, Default] = _default,
-        indexes: Union[Dict[Hashable, Index], None, Default] = _default,
-        encoding: Union[dict, None, Default] = _default,
+        variables: dict[Hashable, Variable] = None,
+        coord_names: set[Hashable] = None,
+        dims: dict[Any, int] = None,
+        attrs: dict[Hashable, Any] | None | Default = _default,
+        indexes: dict[Hashable, Index] | None | Default = _default,
+        encoding: dict | None | Default = _default,
         inplace: bool = False,
-    ) -> "Dataset":
+    ) -> Dataset:
         """Fastpath constructor for internal use.
 
         Returns an object with optionally with replaced attributes.
@@ -1150,12 +1146,12 @@ def _replace(
 
     def _replace_with_new_dims(
         self,
-        variables: Dict[Hashable, Variable],
+        variables: dict[Hashable, Variable],
         coord_names: set = None,
-        attrs: Union[Dict[Hashable, Any], None, Default] = _default,
-        indexes: Union[Dict[Hashable, Index], None, Default] = _default,
+        attrs: dict[Hashable, Any] | None | Default = _default,
+        indexes: dict[Hashable, Index] | None | Default = _default,
         inplace: bool = False,
-    ) -> "Dataset":
+    ) -> Dataset:
         """Replace variables with recalculated dimensions."""
         dims = calculate_dimensions(variables)
         return self._replace(
@@ -1164,12 +1160,12 @@ def _replace_with_new_dims(
 
     def _replace_vars_and_dims(
         self,
-        variables: Dict[Hashable, Variable],
+        variables: dict[Hashable, Variable],
         coord_names: set = None,
-        dims: Dict[Hashable, int] = None,
-        attrs: Union[Dict[Hashable, Any], None, Default] = _default,
+        dims: dict[Hashable, int] = None,
+        attrs: dict[Hashable, Any] | None | Default = _default,
         inplace: bool = False,
-    ) -> "Dataset":
+    ) -> Dataset:
         """Deprecated version of _replace_with_new_dims().
 
         Unlike _replace_with_new_dims(), this method always recalculates
@@ -1181,7 +1177,7 @@ def _replace_vars_and_dims(
             variables, coord_names, dims, attrs, indexes=None, inplace=inplace
         )
 
-    def _overwrite_indexes(self, indexes: Mapping[Any, Index]) -> "Dataset":
+    def _overwrite_indexes(self, indexes: Mapping[Any, Index]) -> Dataset:
         if not indexes:
             return self
 
@@ -1193,7 +1189,7 @@ def _overwrite_indexes(self, indexes: Mapping[Any, Index]) -> "Dataset":
         obj = self._replace(variables, indexes=new_indexes)
 
         # switch from dimension to level names, if necessary
-        dim_names: Dict[Hashable, str] = {}
+        dim_names: dict[Hashable, str] = {}
         for dim, idx in indexes.items():
             pd_idx = idx.to_pandas_index()
             if not isinstance(pd_idx, pd.MultiIndex) and pd_idx.name != dim:
@@ -1202,7 +1198,7 @@ def _overwrite_indexes(self, indexes: Mapping[Any, Index]) -> "Dataset":
             obj = obj.rename(dim_names)
         return obj
 
-    def copy(self, deep: bool = False, data: Mapping = None) -> "Dataset":
+    def copy(self, deep: bool = False, data: Mapping = None) -> Dataset:
         """Returns a copy of this dataset.
 
         If `deep=True`, a deep copy is made of each of the component variables.
@@ -1327,7 +1323,7 @@ def copy(self, deep: bool = False, data: Mapping = None) -> "Dataset":
 
         return self._replace(variables, attrs=attrs)
 
-    def as_numpy(self: "Dataset") -> "Dataset":
+    def as_numpy(self: Dataset) -> Dataset:
         """
         Coerces wrapped data and coordinates into numpy arrays, returning a Dataset.
 
@@ -1340,11 +1336,11 @@ def as_numpy(self: "Dataset") -> "Dataset":
         return self._replace(variables=numpy_variables)
 
     @property
-    def _level_coords(self) -> Dict[str, Hashable]:
+    def _level_coords(self) -> dict[str, Hashable]:
         """Return a mapping of all MultiIndex levels and their corresponding
         coordinate name.
         """
-        level_coords: Dict[str, Hashable] = {}
+        level_coords: dict[str, Hashable] = {}
         for name, index in self.xindexes.items():
             # TODO: benbovy - flexible indexes: update when MultIndex has its own xarray class.
             pd_index = index.to_pandas_index()
@@ -1354,13 +1350,13 @@ def _level_coords(self) -> Dict[str, Hashable]:
                 level_coords.update({lname: dim for lname in level_names})
         return level_coords
 
-    def _copy_listed(self, names: Iterable[Hashable]) -> "Dataset":
+    def _copy_listed(self, names: Iterable[Hashable]) -> Dataset:
         """Create a new Dataset with the listed variables from this dataset and
         the all relevant coordinates. Skips all validation.
         """
-        variables: Dict[Hashable, Variable] = {}
+        variables: dict[Hashable, Variable] = {}
         coord_names = set()
-        indexes: Dict[Hashable, Index] = {}
+        indexes: dict[Hashable, Index] = {}
 
         for name in names:
             try:
@@ -1394,7 +1390,7 @@ def _copy_listed(self, names: Iterable[Hashable]) -> "Dataset":
 
         return self._replace(variables, coord_names, dims, indexes=indexes)
 
-    def _construct_dataarray(self, name: Hashable) -> "DataArray":
+    def _construct_dataarray(self, name: Hashable) -> DataArray:
         """Construct a DataArray by indexing this dataset"""
         from .dataarray import DataArray
 
@@ -1407,7 +1403,7 @@ def _construct_dataarray(self, name: Hashable) -> "DataArray":
 
         needed_dims = set(variable.dims)
 
-        coords: Dict[Hashable, Variable] = {}
+        coords: dict[Hashable, Variable] = {}
         # preserve ordering
         for k in self._variables:
             if k in self._coord_names and set(self.variables[k].dims) <= needed_dims:
@@ -1420,10 +1416,10 @@ def _construct_dataarray(self, name: Hashable) -> "DataArray":
 
         return DataArray(variable, coords, name=name, indexes=indexes, fastpath=True)
 
-    def __copy__(self) -> "Dataset":
+    def __copy__(self) -> Dataset:
         return self.copy(deep=False)
 
-    def __deepcopy__(self, memo=None) -> "Dataset":
+    def __deepcopy__(self, memo=None) -> Dataset:
         # memo does nothing but is required for compatibility with
         # copy.deepcopy
         return self.copy(deep=True)
@@ -1482,15 +1478,15 @@ def loc(self) -> _LocIndexer:
 
     # FIXME https://github.com/python/mypy/issues/7328
     @overload
-    def __getitem__(self, key: Mapping) -> "Dataset":  # type: ignore[misc]
+    def __getitem__(self, key: Mapping) -> Dataset:  # type: ignore[misc]
         ...
 
     @overload
-    def __getitem__(self, key: Hashable) -> "DataArray":  # type: ignore[misc]
+    def __getitem__(self, key: Hashable) -> DataArray:  # type: ignore[misc]
         ...
 
     @overload
-    def __getitem__(self, key: Any) -> "Dataset":
+    def __getitem__(self, key: Any) -> Dataset:
         ...
 
     def __getitem__(self, key):
@@ -1507,7 +1503,7 @@ def __getitem__(self, key):
         else:
             return self._copy_listed(key)
 
-    def __setitem__(self, key: Union[Hashable, List[Hashable], Mapping], value) -> None:
+    def __setitem__(self, key: Hashable | list[Hashable] | Mapping, value) -> None:
         """Add an array to this dataset.
         Multiple arrays can be added at the same time, in which case each of
         the following operations is applied to the respective value.
@@ -1616,7 +1612,7 @@ def _setitem_check(self, key, value):
                             f"Variable '{name}': dimension '{dim}' appears in new values "
                             f"but not in the indexed original data"
                         )
-                dims = tuple([dim for dim in var_k.dims if dim in val.dims])
+                dims = tuple(dim for dim in var_k.dims if dim in val.dims)
                 if dims != val.dims:
                     raise ValueError(
                         f"Variable '{name}': dimension order differs between"
@@ -1647,7 +1643,7 @@ def __delitem__(self, key: Hashable) -> None:
     # https://github.com/python/mypy/issues/4266
     __hash__ = None  # type: ignore[assignment]
 
-    def _all_compat(self, other: "Dataset", compat_str: str) -> bool:
+    def _all_compat(self, other: Dataset, compat_str: str) -> bool:
         """Helper function for equals and identical"""
 
         # some stores (e.g., scipy) do not seem to preserve order, so don't
@@ -1659,7 +1655,7 @@ def compat(x: Variable, y: Variable) -> bool:
             self._variables, other._variables, compat=compat
         )
 
-    def broadcast_equals(self, other: "Dataset") -> bool:
+    def broadcast_equals(self, other: Dataset) -> bool:
         """Two Datasets are broadcast equal if they are equal after
         broadcasting all variables against each other.
 
@@ -1677,7 +1673,7 @@ def broadcast_equals(self, other: "Dataset") -> bool:
         except (TypeError, AttributeError):
             return False
 
-    def equals(self, other: "Dataset") -> bool:
+    def equals(self, other: Dataset) -> bool:
         """Two Datasets are equal if they have matching variables and
         coordinates, all of which are equal.
 
@@ -1697,7 +1693,7 @@ def equals(self, other: "Dataset") -> bool:
         except (TypeError, AttributeError):
             return False
 
-    def identical(self, other: "Dataset") -> bool:
+    def identical(self, other: Dataset) -> bool:
         """Like equals, but also checks all dataset attributes and the
         attributes on all variables and coordinates.
 
@@ -1746,7 +1742,7 @@ def data_vars(self) -> DataVariables:
         """Dictionary of DataArray objects corresponding to data variables"""
         return DataVariables(self)
 
-    def set_coords(self, names: "Union[Hashable, Iterable[Hashable]]") -> "Dataset":
+    def set_coords(self, names: Hashable | Iterable[Hashable]) -> Dataset:
         """Given names of one or more variables, set them as coordinates
 
         Parameters
@@ -1777,9 +1773,9 @@ def set_coords(self, names: "Union[Hashable, Iterable[Hashable]]") -> "Dataset":
 
     def reset_coords(
         self,
-        names: "Union[Hashable, Iterable[Hashable], None]" = None,
+        names: Hashable | Iterable[Hashable] | None = None,
         drop: bool = False,
-    ) -> "Dataset":
+    ) -> Dataset:
         """Given names of coordinates, reset them to become variables
 
         Parameters
@@ -1815,7 +1811,7 @@ def reset_coords(
                 del obj._variables[name]
         return obj
 
-    def dump_to_store(self, store: "AbstractDataStore", **kwargs) -> None:
+    def dump_to_store(self, store: AbstractDataStore, **kwargs) -> None:
         """Store dataset contents to a backends.*DataStore object."""
         from ..backends.api import dump_to_store
 
@@ -1834,7 +1830,7 @@ def to_netcdf(
         unlimited_dims: Iterable[Hashable] = None,
         compute: bool = True,
         invalid_netcdf: bool = False,
-    ) -> Union[bytes, "Delayed", None]:
+    ) -> bytes | Delayed | None:
         """Write dataset contents to a netCDF file.
 
         Parameters
@@ -1921,19 +1917,19 @@ def to_netcdf(
 
     def to_zarr(
         self,
-        store: Union[MutableMapping, str, PathLike] = None,
-        chunk_store: Union[MutableMapping, str, PathLike] = None,
+        store: MutableMapping | str | PathLike | None = None,
+        chunk_store: MutableMapping | str | PathLike | None = None,
         mode: str = None,
         synchronizer=None,
         group: str = None,
         encoding: Mapping = None,
         compute: bool = True,
-        consolidated: Optional[bool] = None,
+        consolidated: bool | None = None,
         append_dim: Hashable = None,
         region: Mapping[str, slice] = None,
         safe_chunks: bool = True,
-        storage_options: Dict[str, str] = None,
-    ) -> "ZarrStore":
+        storage_options: dict[str, str] = None,
+    ) -> ZarrStore:
         """Write dataset contents to a zarr group.
 
         Zarr chunks are determined in the following way:
@@ -2101,7 +2097,7 @@ def info(self, buf=None) -> None:
         buf.write("\n".join(lines))
 
     @property
-    def chunks(self) -> Mapping[Hashable, Tuple[int, ...]]:
+    def chunks(self) -> Mapping[Hashable, tuple[int, ...]]:
         """
         Mapping from dimension names to block lengths for this dataset's data, or None if
         the underlying data is not a dask array.
@@ -2118,7 +2114,7 @@ def chunks(self) -> Mapping[Hashable, Tuple[int, ...]]:
         return get_chunksizes(self.variables.values())
 
     @property
-    def chunksizes(self) -> Mapping[Any, Tuple[int, ...]]:
+    def chunksizes(self) -> Mapping[Any, tuple[int, ...]]:
         """
         Mapping from dimension names to block lengths for this dataset's data, or None if
         the underlying data is not a dask array.
@@ -2136,15 +2132,13 @@ def chunksizes(self) -> Mapping[Any, Tuple[int, ...]]:
 
     def chunk(
         self,
-        chunks: Union[
-            int,
-            Literal["auto"],
-            Mapping[Any, Union[None, int, str, Tuple[int, ...]]],
-        ] = {},  # {} even though it's technically unsafe, is being used intentionally here (#4667)
+        chunks: (
+            int | Literal["auto"] | Mapping[Any, None | int | str | tuple[int, ...]]
+        ) = {},  # {} even though it's technically unsafe, is being used intentionally here (#4667)
         name_prefix: str = "xarray-",
         token: str = None,
         lock: bool = False,
-    ) -> "Dataset":
+    ) -> Dataset:
         """Coerce all arrays in this dataset into dask arrays with the given
         chunks.
 
@@ -2203,7 +2197,7 @@ def chunk(
 
     def _validate_indexers(
         self, indexers: Mapping[Any, Any], missing_dims: str = "raise"
-    ) -> Iterator[Tuple[Hashable, Union[int, slice, np.ndarray, Variable]]]:
+    ) -> Iterator[tuple[Hashable, int | slice | np.ndarray | Variable]]:
         """Here we make sure
         + indexer has a valid keys
         + indexer is in a valid data type
@@ -2247,7 +2241,7 @@ def _validate_indexers(
 
     def _validate_interp_indexers(
         self, indexers: Mapping[Any, Any]
-    ) -> Iterator[Tuple[Hashable, Variable]]:
+    ) -> Iterator[tuple[Hashable, Variable]]:
         """Variant of _validate_indexers to be used for interpolation"""
         for k, v in self._validate_indexers(indexers):
             if isinstance(v, Variable):
@@ -2311,7 +2305,7 @@ def isel(
         drop: bool = False,
         missing_dims: str = "raise",
         **indexers_kwargs: Any,
-    ) -> "Dataset":
+    ) -> Dataset:
         """Returns a new dataset with each array indexed along the specified
         dimension(s).
 
@@ -2366,7 +2360,7 @@ def isel(
         indexers = drop_dims_from_indexers(indexers, self.dims, missing_dims)
 
         variables = {}
-        dims: Dict[Hashable, int] = {}
+        dims: dict[Hashable, int] = {}
         coord_names = self._coord_names.copy()
         indexes = self._indexes.copy() if self._indexes is not None else None
 
@@ -2403,13 +2397,13 @@ def _isel_fancy(
         *,
         drop: bool,
         missing_dims: str = "raise",
-    ) -> "Dataset":
+    ) -> Dataset:
         # Note: we need to preserve the original indexers variable in order to merge the
         # coords below
         indexers_list = list(self._validate_indexers(indexers, missing_dims))
 
-        variables: Dict[Hashable, Variable] = {}
-        indexes: Dict[Hashable, Index] = {}
+        variables: dict[Hashable, Variable] = {}
+        indexes: dict[Hashable, Index] = {}
 
         for name, var in self.variables.items():
             var_indexers = {k: v for k, v in indexers_list if k in var.dims}
@@ -2446,7 +2440,7 @@ def sel(
         tolerance: Number = None,
         drop: bool = False,
         **indexers_kwargs: Any,
-    ) -> "Dataset":
+    ) -> Dataset:
         """Returns a new dataset with each array indexed by tick labels
         along the specified dimension(s).
 
@@ -2520,9 +2514,9 @@ def sel(
 
     def head(
         self,
-        indexers: Union[Mapping[Any, int], int] = None,
+        indexers: Mapping[Any, int] | int | None = None,
         **indexers_kwargs: Any,
-    ) -> "Dataset":
+    ) -> Dataset:
         """Returns a new dataset with the first `n` values of each array
         for the specified dimension(s).
 
@@ -2566,9 +2560,9 @@ def head(
 
     def tail(
         self,
-        indexers: Union[Mapping[Any, int], int] = None,
+        indexers: Mapping[Any, int] | int | None = None,
         **indexers_kwargs: Any,
-    ) -> "Dataset":
+    ) -> Dataset:
         """Returns a new dataset with the last `n` values of each array
         for the specified dimension(s).
 
@@ -2615,9 +2609,9 @@ def tail(
 
     def thin(
         self,
-        indexers: Union[Mapping[Any, int], int] = None,
+        indexers: Mapping[Any, int] | int | None = None,
         **indexers_kwargs: Any,
-    ) -> "Dataset":
+    ) -> Dataset:
         """Returns a new dataset with each array indexed along every `n`-th
         value for the specified dimension(s)
 
@@ -2663,8 +2657,8 @@ def thin(
         return self.isel(indexers_slices)
 
     def broadcast_like(
-        self, other: Union["Dataset", "DataArray"], exclude: Iterable[Hashable] = None
-    ) -> "Dataset":
+        self, other: Dataset | DataArray, exclude: Iterable[Hashable] = None
+    ) -> Dataset:
         """Broadcast this DataArray against another Dataset or DataArray.
         This is equivalent to xr.broadcast(other, self)[1]
 
@@ -2688,12 +2682,12 @@ def broadcast_like(
 
     def reindex_like(
         self,
-        other: Union["Dataset", "DataArray"],
+        other: Dataset | DataArray,
         method: str = None,
-        tolerance: Union[Union[int, float], Iterable[Union[int, float]]] = None,
+        tolerance: int | float | Iterable[int | float] | None = None,
         copy: bool = True,
         fill_value: Any = dtypes.NA,
-    ) -> "Dataset":
+    ) -> Dataset:
         """Conform this object onto the indexes of another object, filling in
         missing values with ``fill_value``. The default fill value is NaN.
 
@@ -2755,11 +2749,11 @@ def reindex(
         self,
         indexers: Mapping[Any, Any] = None,
         method: str = None,
-        tolerance: Union[Union[int, float], Iterable[Union[int, float]]] = None,
+        tolerance: int | float | Iterable[int | float] | None = None,
         copy: bool = True,
         fill_value: Any = dtypes.NA,
         **indexers_kwargs: Any,
-    ) -> "Dataset":
+    ) -> Dataset:
         """Conform this object onto a new set of indexes, filling in
         missing values with ``fill_value``. The default fill value is NaN.
 
@@ -2969,12 +2963,12 @@ def _reindex(
         self,
         indexers: Mapping[Any, Any] = None,
         method: str = None,
-        tolerance: Union[Union[int, float], Iterable[Union[int, float]]] = None,
+        tolerance: int | float | Iterable[int | float] | None = None,
         copy: bool = True,
         fill_value: Any = dtypes.NA,
         sparse: bool = False,
         **indexers_kwargs: Any,
-    ) -> "Dataset":
+    ) -> Dataset:
         """
         same to _reindex but support sparse option
         """
@@ -3007,7 +3001,7 @@ def interp(
         kwargs: Mapping[str, Any] = None,
         method_non_numeric: str = "nearest",
         **coords_kwargs: Any,
-    ) -> "Dataset":
+    ) -> Dataset:
         """Multidimensional interpolation of Dataset.
 
         Parameters
@@ -3189,8 +3183,8 @@ def _validate_interp_indexer(x, new_x):
                 for k, (index, dest) in validated_indexers.items()
             }
 
-        variables: Dict[Hashable, Variable] = {}
-        to_reindex: Dict[Hashable, Variable] = {}
+        variables: dict[Hashable, Variable] = {}
+        to_reindex: dict[Hashable, Variable] = {}
         for name, var in obj._variables.items():
             if name in indexers:
                 continue
@@ -3255,12 +3249,12 @@ def _validate_interp_indexer(x, new_x):
 
     def interp_like(
         self,
-        other: Union["Dataset", "DataArray"],
+        other: Dataset | DataArray,
         method: str = "linear",
         assume_sorted: bool = False,
         kwargs: Mapping[str, Any] = None,
         method_non_numeric: str = "nearest",
-    ) -> "Dataset":
+    ) -> Dataset:
         """Interpolate this object onto the coordinates of another object,
         filling the out of range values with NaN.
 
@@ -3306,8 +3300,8 @@ def interp_like(
             kwargs = {}
         coords = alignment.reindex_like_indexers(self, other)
 
-        numeric_coords: Dict[Hashable, pd.Index] = {}
-        object_coords: Dict[Hashable, pd.Index] = {}
+        numeric_coords: dict[Hashable, pd.Index] = {}
+        object_coords: dict[Hashable, pd.Index] = {}
         for k, v in coords.items():
             if v.dtype.kind in "uifcMm":
                 numeric_coords[k] = v
@@ -3373,7 +3367,7 @@ def rename(
         self,
         name_dict: Mapping[Any, Hashable] = None,
         **names: Hashable,
-    ) -> "Dataset":
+    ) -> Dataset:
         """Returns a new object with renamed variables and dimensions.
 
         Parameters
@@ -3413,7 +3407,7 @@ def rename(
 
     def rename_dims(
         self, dims_dict: Mapping[Any, Hashable] = None, **dims: Hashable
-    ) -> "Dataset":
+    ) -> Dataset:
         """Returns a new object with renamed dimensions only.
 
         Parameters
@@ -3458,7 +3452,7 @@ def rename_dims(
 
     def rename_vars(
         self, name_dict: Mapping[Any, Hashable] = None, **names: Hashable
-    ) -> "Dataset":
+    ) -> Dataset:
         """Returns a new object with renamed variables including coordinates
 
         Parameters
@@ -3496,7 +3490,7 @@ def rename_vars(
 
     def swap_dims(
         self, dims_dict: Mapping[Any, Hashable] = None, **dims_kwargs
-    ) -> "Dataset":
+    ) -> Dataset:
         """Returns a new object with swapped dimensions.
 
         Parameters
@@ -3576,8 +3570,8 @@ def swap_dims(
         coord_names = self._coord_names.copy()
         coord_names.update({dim for dim in dims_dict.values() if dim in self.variables})
 
-        variables: Dict[Hashable, Variable] = {}
-        indexes: Dict[Hashable, Index] = {}
+        variables: dict[Hashable, Variable] = {}
+        indexes: dict[Hashable, Index] = {}
         for k, v in self.variables.items():
             dims = tuple(dims_dict.get(dim, dim) for dim in v.dims)
             if k in result_dims:
@@ -3602,10 +3596,10 @@ def swap_dims(
 
     def expand_dims(
         self,
-        dim: Union[None, Hashable, Sequence[Hashable], Mapping[Any, Any]] = None,
-        axis: Union[None, int, Sequence[int]] = None,
+        dim: None | Hashable | Sequence[Hashable] | Mapping[Any, Any] = None,
+        axis: None | int | Sequence[int] = None,
         **dim_kwargs: Any,
-    ) -> "Dataset":
+    ) -> Dataset:
         """Return a new object with an additional axis (or axes) inserted at
         the corresponding position in the array shape.  The new object is a
         view into the underlying array, not a copy.
@@ -3675,7 +3669,7 @@ def expand_dims(
                     " variable name.".format(dim=d)
                 )
 
-        variables: Dict[Hashable, Variable] = {}
+        variables: dict[Hashable, Variable] = {}
         coord_names = self._coord_names.copy()
         # If dim is a dict, then ensure that the values are either integers
         # or iterables.
@@ -3734,10 +3728,10 @@ def expand_dims(
 
     def set_index(
         self,
-        indexes: Mapping[Any, Union[Hashable, Sequence[Hashable]]] = None,
+        indexes: Mapping[Any, Hashable | Sequence[Hashable]] = None,
         append: bool = False,
-        **indexes_kwargs: Union[Hashable, Sequence[Hashable]],
-    ) -> "Dataset":
+        **indexes_kwargs: Hashable | Sequence[Hashable],
+    ) -> Dataset:
         """Set Dataset (multi-)indexes using one or more existing coordinates
         or variables.
 
@@ -3798,9 +3792,9 @@ def set_index(
 
     def reset_index(
         self,
-        dims_or_levels: Union[Hashable, Sequence[Hashable]],
+        dims_or_levels: Hashable | Sequence[Hashable],
         drop: bool = False,
-    ) -> "Dataset":
+    ) -> Dataset:
         """Reset the specified index(es) or multi-index level(s).
 
         Parameters
@@ -3834,7 +3828,7 @@ def reorder_levels(
         self,
         dim_order: Mapping[Any, Sequence[int]] = None,
         **dim_order_kwargs: Sequence[int],
-    ) -> "Dataset":
+    ) -> Dataset:
         """Rearrange index levels using input order.
 
         Parameters
@@ -3905,7 +3899,7 @@ def stack(
         self,
         dimensions: Mapping[Any, Sequence[Hashable]] = None,
         **dimensions_kwargs: Sequence[Hashable],
-    ) -> "Dataset":
+    ) -> Dataset:
         """
         Stack any number of existing dimensions into a single new dimension.
 
@@ -3945,7 +3939,7 @@ def to_stacked_array(
         sample_dims: Collection,
         variable_dim: Hashable = "variable",
         name: Hashable = None,
-    ) -> "DataArray":
+    ) -> DataArray:
         """Combine variables of differing dimensionality into a DataArray
         without broadcasting.
 
@@ -4062,13 +4056,11 @@ def ensure_stackable(val):
 
         return data_array
 
-    def _unstack_once(
-        self, dim: Hashable, fill_value, sparse: bool = False
-    ) -> "Dataset":
+    def _unstack_once(self, dim: Hashable, fill_value, sparse: bool = False) -> Dataset:
         index = self.get_index(dim)
         index = remove_unused_levels_categories(index)
 
-        variables: Dict[Hashable, Variable] = {}
+        variables: dict[Hashable, Variable] = {}
         indexes = {k: v for k, v in self.xindexes.items() if k != dim}
 
         for name, var in self.variables.items():
@@ -4096,9 +4088,7 @@ def _unstack_once(
             variables, coord_names=coord_names, indexes=indexes
         )
 
-    def _unstack_full_reindex(
-        self, dim: Hashable, fill_value, sparse: bool
-    ) -> "Dataset":
+    def _unstack_full_reindex(self, dim: Hashable, fill_value, sparse: bool) -> Dataset:
         index = self.get_index(dim)
         index = remove_unused_levels_categories(index)
         full_idx = pd.MultiIndex.from_product(index.levels, names=index.names)
@@ -4114,7 +4104,7 @@ def _unstack_full_reindex(
         new_dim_names = index.names
         new_dim_sizes = [lev.size for lev in index.levels]
 
-        variables: Dict[Hashable, Variable] = {}
+        variables: dict[Hashable, Variable] = {}
         indexes = {k: v for k, v in self.xindexes.items() if k != dim}
 
         for name, var in obj.variables.items():
@@ -4138,10 +4128,10 @@ def _unstack_full_reindex(
 
     def unstack(
         self,
-        dim: Union[Hashable, Iterable[Hashable]] = None,
+        dim: Hashable | Iterable[Hashable] = None,
         fill_value: Any = dtypes.NA,
         sparse: bool = False,
-    ) -> "Dataset":
+    ) -> Dataset:
         """
         Unstack existing dimensions corresponding to MultiIndexes into
         multiple new dimensions.
@@ -4228,7 +4218,7 @@ def unstack(
                 result = result._unstack_once(dim, fill_value, sparse)
         return result
 
-    def update(self, other: "CoercibleMapping") -> "Dataset":
+    def update(self, other: CoercibleMapping) -> Dataset:
         """Update this dataset's variables with those from another dataset.
 
         Just like :py:meth:`dict.update` this is a in-place operation.
@@ -4269,13 +4259,13 @@ def update(self, other: "CoercibleMapping") -> "Dataset":
 
     def merge(
         self,
-        other: Union["CoercibleMapping", "DataArray"],
-        overwrite_vars: Union[Hashable, Iterable[Hashable]] = frozenset(),
+        other: CoercibleMapping | DataArray,
+        overwrite_vars: Hashable | Iterable[Hashable] = frozenset(),
         compat: str = "no_conflicts",
         join: str = "outer",
         fill_value: Any = dtypes.NA,
         combine_attrs: str = "override",
-    ) -> "Dataset":
+    ) -> Dataset:
         """Merge the arrays of two datasets into a single dataset.
 
         This method generally does not allow for overriding data, with the
@@ -4366,8 +4356,8 @@ def _assert_all_in_dataset(
             )
 
     def drop_vars(
-        self, names: Union[Hashable, Iterable[Hashable]], *, errors: str = "raise"
-    ) -> "Dataset":
+        self, names: Hashable | Iterable[Hashable], *, errors: str = "raise"
+    ) -> Dataset:
         """Drop variables from this dataset.
 
         Parameters
@@ -4583,8 +4573,8 @@ def drop_isel(self, indexers=None, **indexers_kwargs):
         return ds
 
     def drop_dims(
-        self, drop_dims: Union[Hashable, Iterable[Hashable]], *, errors: str = "raise"
-    ) -> "Dataset":
+        self, drop_dims: Hashable | Iterable[Hashable], *, errors: str = "raise"
+    ) -> Dataset:
         """Drop dimensions and associated variables from this dataset.
 
         Parameters
@@ -4624,7 +4614,7 @@ def transpose(
         self,
         *dims: Hashable,
         missing_dims: str = "raise",
-    ) -> "Dataset":
+    ) -> Dataset:
         """Return a new Dataset object with all array dimensions transposed.
 
         Although the order of dimensions on each array will change, the dataset
@@ -4730,7 +4720,7 @@ def dropna(
 
         return self.isel({dim: mask})
 
-    def fillna(self, value: Any) -> "Dataset":
+    def fillna(self, value: Any) -> Dataset:
         """Fill missing values in this object.
 
         This operation follows the normal broadcasting and alignment rules that
@@ -4815,12 +4805,12 @@ def interpolate_na(
         dim: Hashable = None,
         method: str = "linear",
         limit: int = None,
-        use_coordinate: Union[bool, Hashable] = True,
-        max_gap: Union[
-            int, float, str, pd.Timedelta, np.timedelta64, datetime.timedelta
-        ] = None,
+        use_coordinate: bool | Hashable = True,
+        max_gap: (
+            int | float | str | pd.Timedelta | np.timedelta64 | datetime.timedelta
+        ) = None,
         **kwargs: Any,
-    ) -> "Dataset":
+    ) -> Dataset:
         """Fill in NaNs by interpolating according to different methods.
 
         Parameters
@@ -4943,7 +4933,7 @@ def interpolate_na(
         )
         return new
 
-    def ffill(self, dim: Hashable, limit: int = None) -> "Dataset":
+    def ffill(self, dim: Hashable, limit: int = None) -> Dataset:
         """Fill NaN values by propogating values forward
 
         *Requires bottleneck.*
@@ -4969,7 +4959,7 @@ def ffill(self, dim: Hashable, limit: int = None) -> "Dataset":
         new = _apply_over_vars_with_dim(ffill, self, dim=dim, limit=limit)
         return new
 
-    def bfill(self, dim: Hashable, limit: int = None) -> "Dataset":
+    def bfill(self, dim: Hashable, limit: int = None) -> Dataset:
         """Fill NaN values by propogating values backward
 
         *Requires bottleneck.*
@@ -4995,7 +4985,7 @@ def bfill(self, dim: Hashable, limit: int = None) -> "Dataset":
         new = _apply_over_vars_with_dim(bfill, self, dim=dim, limit=limit)
         return new
 
-    def combine_first(self, other: "Dataset") -> "Dataset":
+    def combine_first(self, other: Dataset) -> Dataset:
         """Combine two Datasets, default to data_vars of self.
 
         The new coordinates follow the normal broadcasting and alignment rules
@@ -5017,12 +5007,12 @@ def combine_first(self, other: "Dataset") -> "Dataset":
     def reduce(
         self,
         func: Callable,
-        dim: Union[Hashable, Iterable[Hashable]] = None,
+        dim: Hashable | Iterable[Hashable] = None,
         keep_attrs: bool = None,
         keepdims: bool = False,
         numeric_only: bool = False,
         **kwargs: Any,
-    ) -> "Dataset":
+    ) -> Dataset:
         """Reduce this dataset by applying `func` along some dimension(s).
 
         Parameters
@@ -5075,7 +5065,7 @@ def reduce(
         if keep_attrs is None:
             keep_attrs = _get_keep_attrs(default=False)
 
-        variables: Dict[Hashable, Variable] = {}
+        variables: dict[Hashable, Variable] = {}
         for name, var in self._variables.items():
             reduce_dims = [d for d in var.dims if d in dims]
             if name in self.coords:
@@ -5120,7 +5110,7 @@ def map(
         keep_attrs: bool = None,
         args: Iterable[Any] = (),
         **kwargs: Any,
-    ) -> "Dataset":
+    ) -> Dataset:
         """Apply a function to each variable in this dataset
 
         Parameters
@@ -5180,7 +5170,7 @@ def apply(
         keep_attrs: bool = None,
         args: Iterable[Any] = (),
         **kwargs: Any,
-    ) -> "Dataset":
+    ) -> Dataset:
         """
         Backward compatible implementation of ``map``
 
@@ -5197,7 +5187,7 @@ def apply(
 
     def assign(
         self, variables: Mapping[Any, Any] = None, **variables_kwargs: Hashable
-    ) -> "Dataset":
+    ) -> Dataset:
         """Assign new data variables to a Dataset, returning a new object
         with all the original variables in addition to the new ones.
 
@@ -5322,8 +5312,8 @@ def to_array(self, dim="variable", name=None):
         )
 
     def _normalize_dim_order(
-        self, dim_order: List[Hashable] = None
-    ) -> Dict[Hashable, int]:
+        self, dim_order: list[Hashable] = None
+    ) -> dict[Hashable, int]:
         """
         Check the validity of the provided dimensions if any and return the mapping
         between dimension name and their size.
@@ -5351,7 +5341,7 @@ def _normalize_dim_order(
 
         return ordered_dims
 
-    def to_pandas(self) -> Union[pd.Series, pd.DataFrame]:
+    def to_pandas(self) -> pd.Series | pd.DataFrame:
         """Convert this dataset into a pandas object without changing the number of dimensions.
 
         The type of the returned object depends on the number of Dataset
@@ -5381,7 +5371,7 @@ def _to_dataframe(self, ordered_dims: Mapping[Any, int]):
         index = self.coords.to_index([*ordered_dims])
         return pd.DataFrame(dict(zip(columns, data)), index=index)
 
-    def to_dataframe(self, dim_order: List[Hashable] = None) -> pd.DataFrame:
+    def to_dataframe(self, dim_order: list[Hashable] = None) -> pd.DataFrame:
         """Convert this dataset into a pandas.DataFrame.
 
         Non-index variables in this dataset form the columns of the
@@ -5413,7 +5403,7 @@ def to_dataframe(self, dim_order: List[Hashable] = None) -> pd.DataFrame:
         return self._to_dataframe(ordered_dims=ordered_dims)
 
     def _set_sparse_data_from_dataframe(
-        self, idx: pd.Index, arrays: List[Tuple[Hashable, np.ndarray]], dims: tuple
+        self, idx: pd.Index, arrays: list[tuple[Hashable, np.ndarray]], dims: tuple
     ) -> None:
         from sparse import COO
 
@@ -5445,7 +5435,7 @@ def _set_sparse_data_from_dataframe(
             self[name] = (dims, data)
 
     def _set_numpy_data_from_dataframe(
-        self, idx: pd.Index, arrays: List[Tuple[Hashable, np.ndarray]], dims: tuple
+        self, idx: pd.Index, arrays: list[tuple[Hashable, np.ndarray]], dims: tuple
     ) -> None:
         if not isinstance(idx, pd.MultiIndex):
             for name, values in arrays:
@@ -5482,7 +5472,7 @@ def _set_numpy_data_from_dataframe(
             self[name] = (dims, data)
 
     @classmethod
-    def from_dataframe(cls, dataframe: pd.DataFrame, sparse: bool = False) -> "Dataset":
+    def from_dataframe(cls, dataframe: pd.DataFrame, sparse: bool = False) -> Dataset:
         """Convert a pandas.DataFrame into an xarray.Dataset
 
         Each column will be converted into an independent variable in the
@@ -5918,7 +5908,7 @@ def shift(
         shifts: Mapping[Hashable, int] = None,
         fill_value: Any = dtypes.NA,
         **shifts_kwargs: int,
-    ) -> "Dataset":
+    ) -> Dataset:
 
         """Shift this dataset by an offset along one or more dimensions.
 
@@ -5988,7 +5978,7 @@ def roll(
         shifts: Mapping[Hashable, int] = None,
         roll_coords: bool = False,
         **shifts_kwargs: int,
-    ) -> "Dataset":
+    ) -> Dataset:
         """Roll this dataset by an offset along one or more dimensions.
 
         Unlike shift, roll treats the given dimensions as periodic, so will not
@@ -6056,7 +6046,7 @@ def roll(
                 variables[k] = var
 
         if roll_coords:
-            indexes: Dict[Hashable, Index] = {}
+            indexes: dict[Hashable, Index] = {}
             idx: pd.Index
             for k, idx in self.xindexes.items():
                 (dim,) = self.variables[k].dims
@@ -6408,9 +6398,9 @@ def differentiate(self, coord, edge_order=1, datetime_unit=None):
 
     def integrate(
         self,
-        coord: Union[Hashable, Sequence[Hashable]],
+        coord: Hashable | Sequence[Hashable],
         datetime_unit: str = None,
-    ) -> "Dataset":
+    ) -> Dataset:
         """Integrate along the given coordinate using the trapezoidal rule.
 
         .. note::
@@ -6524,9 +6514,9 @@ def _integrate_one(self, coord, datetime_unit=None, cumulative=False):
 
     def cumulative_integrate(
         self,
-        coord: Union[Hashable, Sequence[Hashable]],
+        coord: Hashable | Sequence[Hashable],
         datetime_unit: str = None,
-    ) -> "Dataset":
+    ) -> Dataset:
         """Integrate along the given coordinate using the trapezoidal rule.
 
         .. note::
@@ -6702,7 +6692,7 @@ def filter_by_attrs(self, **kwargs):
                 selection.append(var_name)
         return self[selection]
 
-    def unify_chunks(self) -> "Dataset":
+    def unify_chunks(self) -> Dataset:
         """Unify chunk size along all chunked dimensions of this Dataset.
 
         Returns
@@ -6718,11 +6708,11 @@ def unify_chunks(self) -> "Dataset":
 
     def map_blocks(
         self,
-        func: "Callable[..., T_Xarray]",
+        func: Callable[..., T_Xarray],
         args: Sequence[Any] = (),
         kwargs: Mapping[str, Any] = None,
-        template: Union["DataArray", "Dataset"] = None,
-    ) -> "T_Xarray":
+        template: DataArray | Dataset | None = None,
+    ) -> T_Xarray:
         """
         Apply a function to each block of this Dataset.
 
@@ -6829,9 +6819,9 @@ def polyfit(
         deg: int,
         skipna: bool = None,
         rcond: float = None,
-        w: Union[Hashable, Any] = None,
+        w: Hashable | Any = None,
         full: bool = False,
-        cov: Union[bool, str] = False,
+        cov: bool | str = False,
     ):
         """
         Least squares polynomial fit.
@@ -6895,7 +6885,7 @@ def polyfit(
         skipna_da = skipna
 
         x = get_clean_interp_index(self, dim, strict=False)
-        xname = "{}_".format(self[dim].name)
+        xname = f"{self[dim].name}_"
         order = int(deg) + 1
         lhs = np.vander(x, order)
 
@@ -6912,7 +6902,7 @@ def polyfit(
             if w.ndim != 1:
                 raise TypeError("Expected a 1-d array for weights.")
             if w.shape[0] != lhs.shape[0]:
-                raise TypeError("Expected w and {} to have the same length".format(dim))
+                raise TypeError(f"Expected w and {dim} to have the same length")
             lhs *= w[:, np.newaxis]
 
         # Scaling
@@ -6949,7 +6939,7 @@ def polyfit(
                 skipna_da = bool(np.any(da.isnull()))
 
             dims_to_stack = [dimname for dimname in da.dims if dimname != dim]
-            stacked_coords: Dict[Hashable, DataArray] = {}
+            stacked_coords: dict[Hashable, DataArray] = {}
             if dims_to_stack:
                 stacked_dim = utils.get_temp_dimname(dims_to_stack, "stacked")
                 rhs = da.transpose(dim, *dims_to_stack).stack(
@@ -6975,7 +6965,7 @@ def polyfit(
                 )
 
             if isinstance(name, str):
-                name = "{}_".format(name)
+                name = f"{name}_"
             else:
                 # Thus a ReprObject => polyfit was called on a DataArray
                 name = ""
@@ -7019,16 +7009,19 @@ def polyfit(
 
     def pad(
         self,
-        pad_width: Mapping[Any, Union[int, Tuple[int, int]]] = None,
+        pad_width: Mapping[Any, int | tuple[int, int]] = None,
         mode: str = "constant",
-        stat_length: Union[int, Tuple[int, int], Mapping[Any, Tuple[int, int]]] = None,
-        constant_values: Union[
-            int, Tuple[int, int], Mapping[Any, Tuple[int, int]]
-        ] = None,
-        end_values: Union[int, Tuple[int, int], Mapping[Any, Tuple[int, int]]] = None,
+        stat_length: int
+        | tuple[int, int]
+        | Mapping[Any, tuple[int, int]]
+        | None = None,
+        constant_values: (
+            int | tuple[int, int] | Mapping[Any, tuple[int, int]] | None
+        ) = None,
+        end_values: int | tuple[int, int] | Mapping[Any, tuple[int, int]] | None = None,
         reflect_type: str = None,
         **pad_width_kwargs: Any,
-    ) -> "Dataset":
+    ) -> Dataset:
         """Pad this dataset along one or more dimensions.
 
         .. warning::
@@ -7175,7 +7168,7 @@ def idxmin(
         skipna: bool = None,
         fill_value: Any = dtypes.NA,
         keep_attrs: bool = None,
-    ) -> "Dataset":
+    ) -> Dataset:
         """Return the coordinate label of the minimum value along a dimension.
 
         Returns a new `Dataset` named after the dimension with the values of
@@ -7272,7 +7265,7 @@ def idxmax(
         skipna: bool = None,
         fill_value: Any = dtypes.NA,
         keep_attrs: bool = None,
-    ) -> "Dataset":
+    ) -> Dataset:
         """Return the coordinate label of the maximum value along a dimension.
 
         Returns a new `Dataset` named after the dimension with the values of
@@ -7485,7 +7478,7 @@ def query(
         engine: str = None,
         missing_dims: str = "raise",
         **queries_kwargs: Any,
-    ) -> "Dataset":
+    ) -> Dataset:
         """Return a new dataset with each array indexed along the specified
         dimension(s), where the indexers are given as strings containing
         Python expressions to be evaluated against the data variables in the
@@ -7576,14 +7569,14 @@ def query(
 
     def curvefit(
         self,
-        coords: Union[Union[str, "DataArray"], Iterable[Union[str, "DataArray"]]],
+        coords: str | DataArray | Iterable[str | DataArray],
         func: Callable[..., Any],
-        reduce_dims: Union[Hashable, Iterable[Hashable]] = None,
+        reduce_dims: Hashable | Iterable[Hashable] = None,
         skipna: bool = True,
-        p0: Dict[str, Any] = None,
-        bounds: Dict[str, Any] = None,
+        p0: dict[str, Any] = None,
+        bounds: dict[str, Any] = None,
         param_names: Sequence[str] = None,
-        kwargs: Dict[str, Any] = None,
+        kwargs: dict[str, Any] = None,
     ):
         """
         Curve fitting optimization for arbitrary functions.
@@ -7753,10 +7746,10 @@ def convert_calendar(
         self,
         calendar: str,
         dim: str = "time",
-        align_on: Optional[str] = None,
-        missing: Optional[Any] = None,
-        use_cftime: Optional[bool] = None,
-    ) -> "Dataset":
+        align_on: str | None = None,
+        missing: Any | None = None,
+        use_cftime: bool | None = None,
+    ) -> Dataset:
         """Convert the Dataset to another calendar.
 
         Only converts the individual timestamps, does not modify any data except
@@ -7874,9 +7867,9 @@ def convert_calendar(
 
     def interp_calendar(
         self,
-        target: Union[pd.DatetimeIndex, CFTimeIndex, "DataArray"],
+        target: pd.DatetimeIndex | CFTimeIndex | DataArray,
         dim: str = "time",
-    ) -> "Dataset":
+    ) -> Dataset:
         """Interpolates the Dataset to another calendar based on decimal year measure.
 
         Each timestamp in `source` and `target` are first converted to their decimal
diff --git a/xarray/core/formatting.py b/xarray/core/formatting.py
index c0633064231..2a9f8a27815 100644
--- a/xarray/core/formatting.py
+++ b/xarray/core/formatting.py
@@ -306,7 +306,7 @@ def summarize_variable(
 
 def _summarize_coord_multiindex(coord, col_width, marker):
     first_col = pretty_print(f"  {marker} {coord.name} ", col_width)
-    return "{}({}) MultiIndex".format(first_col, str(coord.dims[0]))
+    return f"{first_col}({str(coord.dims[0])}) MultiIndex"
 
 
 def _summarize_coord_levels(coord, col_width, marker="-"):
@@ -622,7 +622,7 @@ def array_repr(arr):
 
 
 def dataset_repr(ds):
-    summary = ["<xarray.{}>".format(type(ds).__name__)]
+    summary = [f"<xarray.{type(ds).__name__}>"]
 
     col_width = _calculate_col_width(_get_col_items(ds.variables))
     max_rows = OPTIONS["display_max_rows"]
diff --git a/xarray/core/formatting_html.py b/xarray/core/formatting_html.py
index 072a932b943..36c252f276e 100644
--- a/xarray/core/formatting_html.py
+++ b/xarray/core/formatting_html.py
@@ -266,7 +266,7 @@ def _obj_repr(obj, header_components, sections):
 def array_repr(arr):
     dims = OrderedDict((k, v) for k, v in zip(arr.dims, arr.shape))
 
-    obj_type = "xarray.{}".format(type(arr).__name__)
+    obj_type = f"xarray.{type(arr).__name__}"
     arr_name = f"'{arr.name}'" if getattr(arr, "name", None) else ""
     coord_names = list(arr.coords) if hasattr(arr, "coords") else []
 
@@ -287,7 +287,7 @@ def array_repr(arr):
 
 
 def dataset_repr(ds):
-    obj_type = "xarray.{}".format(type(ds).__name__)
+    obj_type = f"xarray.{type(ds).__name__}"
 
     header_components = [f"<div class='xr-obj-type'>{escape(obj_type)}</div>"]
 
diff --git a/xarray/core/indexes.py b/xarray/core/indexes.py
index 1ded35264f4..844751f24bb 100644
--- a/xarray/core/indexes.py
+++ b/xarray/core/indexes.py
@@ -296,7 +296,7 @@ def from_variables(cls, variables: Mapping[Any, "Variable"]):
         if any([var.ndim != 1 for var in variables.values()]):
             raise ValueError("PandasMultiIndex only accepts 1-dimensional variables")
 
-        dims = set([var.dims for var in variables.values()])
+        dims = {var.dims for var in variables.values()}
         if len(dims) != 1:
             raise ValueError(
                 "unmatched dimensions for variables "
diff --git a/xarray/core/indexing.py b/xarray/core/indexing.py
index c93d797266b..581572cd0e1 100644
--- a/xarray/core/indexing.py
+++ b/xarray/core/indexing.py
@@ -579,7 +579,7 @@ def as_indexable(array):
     if hasattr(array, "__array_function__"):
         return NdArrayLikeIndexingAdapter(array)
 
-    raise TypeError("Invalid array type: {}".format(type(array)))
+    raise TypeError(f"Invalid array type: {type(array)}")
 
 
 def _outer_to_vectorized_indexer(key, shape):
@@ -1051,7 +1051,7 @@ def create_mask(indexer, shape, data=None):
         mask = any(k == -1 for k in indexer.tuple)
 
     else:
-        raise TypeError("unexpected key type: {}".format(type(indexer)))
+        raise TypeError(f"unexpected key type: {type(indexer)}")
 
     return mask
 
@@ -1149,7 +1149,7 @@ def _indexing_array_and_key(self, key):
             # https://docs.scipy.org/doc/numpy/reference/arrays.indexing.html#detailed-notes).
             key = key.tuple + (Ellipsis,)
         else:
-            raise TypeError("unexpected key type: {}".format(type(key)))
+            raise TypeError(f"unexpected key type: {type(key)}")
 
         return array, key
 
diff --git a/xarray/core/merge.py b/xarray/core/merge.py
index 460e02ae10f..d5307678f89 100644
--- a/xarray/core/merge.py
+++ b/xarray/core/merge.py
@@ -4,15 +4,12 @@
     TYPE_CHECKING,
     AbstractSet,
     Any,
-    Dict,
     Hashable,
     Iterable,
-    List,
     Mapping,
     NamedTuple,
     Optional,
     Sequence,
-    Set,
     Tuple,
     Union,
 )
@@ -66,12 +63,12 @@ def __init__(self, func):
         self.func = func
 
 
-def broadcast_dimension_size(variables: List[Variable]) -> Dict[Hashable, int]:
+def broadcast_dimension_size(variables: list[Variable]) -> dict[Hashable, int]:
     """Extract dimension sizes from a dictionary of variables.
 
     Raises ValueError if any dimensions have different sizes.
     """
-    dims: Dict[Hashable, int] = {}
+    dims: dict[Hashable, int] = {}
     for var in variables:
         for dim, size in zip(var.dims, var.shape):
             if dim in dims and size != dims[dim]:
@@ -89,7 +86,7 @@ class MergeError(ValueError):
 
 def unique_variable(
     name: Hashable,
-    variables: List[Variable],
+    variables: list[Variable],
     compat: str = "broadcast_equals",
     equals: bool = None,
 ) -> Variable:
@@ -162,20 +159,18 @@ def unique_variable(
 
 def _assert_compat_valid(compat):
     if compat not in _VALID_COMPAT:
-        raise ValueError(
-            "compat={!r} invalid: must be {}".format(compat, set(_VALID_COMPAT))
-        )
+        raise ValueError(f"compat={compat!r} invalid: must be {set(_VALID_COMPAT)}")
 
 
 MergeElement = Tuple[Variable, Optional[Index]]
 
 
 def merge_collected(
-    grouped: Dict[Hashable, List[MergeElement]],
+    grouped: dict[Hashable, list[MergeElement]],
     prioritized: Mapping[Any, MergeElement] = None,
     compat: str = "minimal",
     combine_attrs="override",
-) -> Tuple[Dict[Hashable, Variable], Dict[Hashable, Index]]:
+) -> tuple[dict[Hashable, Variable], dict[Hashable, Index]]:
     """Merge dicts of variables, while resolving conflicts appropriately.
 
     Parameters
@@ -196,8 +191,8 @@ def merge_collected(
 
     _assert_compat_valid(compat)
 
-    merged_vars: Dict[Hashable, Variable] = {}
-    merged_indexes: Dict[Hashable, Index] = {}
+    merged_vars: dict[Hashable, Variable] = {}
+    merged_indexes: dict[Hashable, Index] = {}
 
     for name, elements_list in grouped.items():
         if name in prioritized:
@@ -255,8 +250,8 @@ def merge_collected(
 
 
 def collect_variables_and_indexes(
-    list_of_mappings: List[DatasetLike],
-) -> Dict[Hashable, List[MergeElement]]:
+    list_of_mappings: list[DatasetLike],
+) -> dict[Hashable, list[MergeElement]]:
     """Collect variables and indexes from list of mappings of xarray objects.
 
     Mappings must either be Dataset objects, or have values of one of the
@@ -269,7 +264,7 @@ def collect_variables_and_indexes(
     from .dataarray import DataArray
     from .dataset import Dataset
 
-    grouped: Dict[Hashable, List[Tuple[Variable, Optional[Index]]]] = {}
+    grouped: dict[Hashable, list[tuple[Variable, Index | None]]] = {}
 
     def append(name, variable, index):
         values = grouped.setdefault(name, [])
@@ -307,10 +302,10 @@ def append_all(variables, indexes):
 
 
 def collect_from_coordinates(
-    list_of_coords: "List[Coordinates]",
-) -> Dict[Hashable, List[MergeElement]]:
+    list_of_coords: list[Coordinates],
+) -> dict[Hashable, list[MergeElement]]:
     """Collect variables and indexes to be merged from Coordinate objects."""
-    grouped: Dict[Hashable, List[Tuple[Variable, Optional[Index]]]] = {}
+    grouped: dict[Hashable, list[tuple[Variable, Index | None]]] = {}
 
     for coords in list_of_coords:
         variables = coords.variables
@@ -322,11 +317,11 @@ def collect_from_coordinates(
 
 
 def merge_coordinates_without_align(
-    objects: "List[Coordinates]",
+    objects: list[Coordinates],
     prioritized: Mapping[Any, MergeElement] = None,
     exclude_dims: AbstractSet = frozenset(),
     combine_attrs: str = "override",
-) -> Tuple[Dict[Hashable, Variable], Dict[Hashable, Index]]:
+) -> tuple[dict[Hashable, Variable], dict[Hashable, Index]]:
     """Merge variables/indexes from coordinates without automatic alignments.
 
     This function is used for merging coordinate from pre-existing xarray
@@ -335,7 +330,7 @@ def merge_coordinates_without_align(
     collected = collect_from_coordinates(objects)
 
     if exclude_dims:
-        filtered: Dict[Hashable, List[MergeElement]] = {}
+        filtered: dict[Hashable, list[MergeElement]] = {}
         for name, elements in collected.items():
             new_elements = [
                 (variable, index)
@@ -351,8 +346,8 @@ def merge_coordinates_without_align(
 
 
 def determine_coords(
-    list_of_mappings: Iterable["DatasetLike"],
-) -> Tuple[Set[Hashable], Set[Hashable]]:
+    list_of_mappings: Iterable[DatasetLike],
+) -> tuple[set[Hashable], set[Hashable]]:
     """Given a list of dicts with xarray object values, identify coordinates.
 
     Parameters
@@ -370,8 +365,8 @@ def determine_coords(
     from .dataarray import DataArray
     from .dataset import Dataset
 
-    coord_names: Set[Hashable] = set()
-    noncoord_names: Set[Hashable] = set()
+    coord_names: set[Hashable] = set()
+    noncoord_names: set[Hashable] = set()
 
     for mapping in list_of_mappings:
         if isinstance(mapping, Dataset):
@@ -388,7 +383,7 @@ def determine_coords(
     return coord_names, noncoord_names
 
 
-def coerce_pandas_values(objects: Iterable["CoercibleMapping"]) -> List["DatasetLike"]:
+def coerce_pandas_values(objects: Iterable[CoercibleMapping]) -> list[DatasetLike]:
     """Convert pandas values found in a list of labeled objects.
 
     Parameters
@@ -408,7 +403,7 @@ def coerce_pandas_values(objects: Iterable["CoercibleMapping"]) -> List["Dataset
     out = []
     for obj in objects:
         if isinstance(obj, Dataset):
-            variables: "DatasetLike" = obj
+            variables: DatasetLike = obj
         else:
             variables = {}
             if isinstance(obj, PANDAS_TYPES):
@@ -422,8 +417,8 @@ def coerce_pandas_values(objects: Iterable["CoercibleMapping"]) -> List["Dataset
 
 
 def _get_priority_vars_and_indexes(
-    objects: List["DatasetLike"], priority_arg: Optional[int], compat: str = "equals"
-) -> Dict[Hashable, MergeElement]:
+    objects: list[DatasetLike], priority_arg: int | None, compat: str = "equals"
+) -> dict[Hashable, MergeElement]:
     """Extract the priority variable from a list of mappings.
 
     We need this method because in some cases the priority argument itself
@@ -448,20 +443,20 @@ def _get_priority_vars_and_indexes(
 
     collected = collect_variables_and_indexes([objects[priority_arg]])
     variables, indexes = merge_collected(collected, compat=compat)
-    grouped: Dict[Hashable, MergeElement] = {}
+    grouped: dict[Hashable, MergeElement] = {}
     for name, variable in variables.items():
         grouped[name] = (variable, indexes.get(name))
     return grouped
 
 
 def merge_coords(
-    objects: Iterable["CoercibleMapping"],
+    objects: Iterable[CoercibleMapping],
     compat: str = "minimal",
     join: str = "outer",
-    priority_arg: Optional[int] = None,
-    indexes: Optional[Mapping[Any, Index]] = None,
+    priority_arg: int | None = None,
+    indexes: Mapping[Any, Index] | None = None,
     fill_value: object = dtypes.NA,
-) -> Tuple[Dict[Hashable, Variable], Dict[Hashable, Index]]:
+) -> tuple[dict[Hashable, Variable], dict[Hashable, Index]]:
     """Merge coordinate variables.
 
     See merge_core below for argument descriptions. This works similarly to
@@ -568,21 +563,21 @@ def merge_attrs(variable_attrs, combine_attrs, context=None):
 
 
 class _MergeResult(NamedTuple):
-    variables: Dict[Hashable, Variable]
-    coord_names: Set[Hashable]
-    dims: Dict[Hashable, int]
-    indexes: Dict[Hashable, pd.Index]
-    attrs: Dict[Hashable, Any]
+    variables: dict[Hashable, Variable]
+    coord_names: set[Hashable]
+    dims: dict[Hashable, int]
+    indexes: dict[Hashable, pd.Index]
+    attrs: dict[Hashable, Any]
 
 
 def merge_core(
-    objects: Iterable["CoercibleMapping"],
+    objects: Iterable[CoercibleMapping],
     compat: str = "broadcast_equals",
     join: str = "outer",
-    combine_attrs: Optional[str] = "override",
-    priority_arg: Optional[int] = None,
-    explicit_coords: Optional[Sequence] = None,
-    indexes: Optional[Mapping[Any, Any]] = None,
+    combine_attrs: str | None = "override",
+    priority_arg: int | None = None,
+    explicit_coords: Sequence | None = None,
+    indexes: Mapping[Any, Any] | None = None,
     fill_value: object = dtypes.NA,
 ) -> _MergeResult:
     """Core logic for merging labeled objects.
@@ -667,12 +662,12 @@ def merge_core(
 
 
 def merge(
-    objects: Iterable[Union["DataArray", "CoercibleMapping"]],
+    objects: Iterable[DataArray | CoercibleMapping],
     compat: str = "no_conflicts",
     join: str = "outer",
     fill_value: object = dtypes.NA,
     combine_attrs: str = "override",
-) -> "Dataset":
+) -> Dataset:
     """Merge any number of xarray objects into a single Dataset as variables.
 
     Parameters
@@ -913,9 +908,9 @@ def merge(
 
 
 def dataset_merge_method(
-    dataset: "Dataset",
-    other: "CoercibleMapping",
-    overwrite_vars: Union[Hashable, Iterable[Hashable]],
+    dataset: Dataset,
+    other: CoercibleMapping,
+    overwrite_vars: Hashable | Iterable[Hashable],
     compat: str,
     join: str,
     fill_value: Any,
@@ -938,8 +933,8 @@ def dataset_merge_method(
         objs = [dataset, other]
         priority_arg = 1
     else:
-        other_overwrite: Dict[Hashable, CoercibleValue] = {}
-        other_no_overwrite: Dict[Hashable, CoercibleValue] = {}
+        other_overwrite: dict[Hashable, CoercibleValue] = {}
+        other_no_overwrite: dict[Hashable, CoercibleValue] = {}
         for k, v in other.items():
             if k in overwrite_vars:
                 other_overwrite[k] = v
@@ -958,9 +953,7 @@ def dataset_merge_method(
     )
 
 
-def dataset_update_method(
-    dataset: "Dataset", other: "CoercibleMapping"
-) -> _MergeResult:
+def dataset_update_method(dataset: Dataset, other: CoercibleMapping) -> _MergeResult:
     """Guts of the Dataset.update method.
 
     This drops a duplicated coordinates from `other` if `other` is not an
diff --git a/xarray/core/missing.py b/xarray/core/missing.py
index acfbb032c23..2525272f719 100644
--- a/xarray/core/missing.py
+++ b/xarray/core/missing.py
@@ -721,7 +721,7 @@ def interp_func(var, x, new_x, method, kwargs):
 
         _, rechunked = da.unify_chunks(*args)
 
-        args = tuple([elem for pair in zip(rechunked, args[1::2]) for elem in pair])
+        args = tuple(elem for pair in zip(rechunked, args[1::2]) for elem in pair)
 
         new_x = rechunked[1 + (len(rechunked) - 1) // 2 :]
 
diff --git a/xarray/core/parallel.py b/xarray/core/parallel.py
index aad1d285377..3f6bb34a36e 100644
--- a/xarray/core/parallel.py
+++ b/xarray/core/parallel.py
@@ -8,14 +8,10 @@
     Any,
     Callable,
     DefaultDict,
-    Dict,
     Hashable,
     Iterable,
-    List,
     Mapping,
     Sequence,
-    Tuple,
-    Union,
 )
 
 import numpy as np
@@ -53,7 +49,7 @@ def assert_chunks_compatible(a: Dataset, b: Dataset):
 
 
 def check_result_variables(
-    result: Union[DataArray, Dataset], expected: Mapping[str, Any], kind: str
+    result: DataArray | Dataset, expected: Mapping[str, Any], kind: str
 ):
 
     if kind == "coords":
@@ -126,7 +122,7 @@ def make_meta(obj):
 
 
 def infer_template(
-    func: Callable[..., T_Xarray], obj: Union[DataArray, Dataset], *args, **kwargs
+    func: Callable[..., T_Xarray], obj: DataArray | Dataset, *args, **kwargs
 ) -> T_Xarray:
     """Infer return object by running the function on meta objects."""
     meta_args = [make_meta(arg) for arg in (obj,) + args]
@@ -148,7 +144,7 @@ def infer_template(
     return template
 
 
-def make_dict(x: Union[DataArray, Dataset]) -> Dict[Hashable, Any]:
+def make_dict(x: DataArray | Dataset) -> dict[Hashable, Any]:
     """Map variable name to numpy(-like) data
     (Dataset.to_dict() is too complicated).
     """
@@ -167,10 +163,10 @@ def _get_chunk_slicer(dim: Hashable, chunk_index: Mapping, chunk_bounds: Mapping
 
 def map_blocks(
     func: Callable[..., T_Xarray],
-    obj: Union[DataArray, Dataset],
+    obj: DataArray | Dataset,
     args: Sequence[Any] = (),
     kwargs: Mapping[str, Any] = None,
-    template: Union[DataArray, Dataset] = None,
+    template: DataArray | Dataset | None = None,
 ) -> T_Xarray:
     """Apply a function to each block of a DataArray or Dataset.
 
@@ -271,7 +267,7 @@ def map_blocks(
 
     def _wrapper(
         func: Callable,
-        args: List,
+        args: list,
         kwargs: dict,
         arg_is_array: Iterable[bool],
         expected: dict,
@@ -415,8 +411,8 @@ def _wrapper(
     # for each variable in the dataset, which is the result of the
     # func applied to the values.
 
-    graph: Dict[Any, Any] = {}
-    new_layers: DefaultDict[str, Dict[Any, Any]] = collections.defaultdict(dict)
+    graph: dict[Any, Any] = {}
+    new_layers: DefaultDict[str, dict[Any, Any]] = collections.defaultdict(dict)
     gname = "{}-{}".format(
         dask.utils.funcname(func), dask.base.tokenize(npargs[0], args, kwargs)
     )
@@ -516,14 +512,14 @@ def subset_dataset_to_block(
         graph[from_wrapper] = (_wrapper, func, blocked_args, kwargs, is_array, expected)
 
         # mapping from variable name to dask graph key
-        var_key_map: Dict[Hashable, str] = {}
+        var_key_map: dict[Hashable, str] = {}
         for name, variable in template.variables.items():
             if name in indexes:
                 continue
             gname_l = f"{name}-{gname}"
             var_key_map[name] = gname_l
 
-            key: Tuple[Any, ...] = (gname_l,)
+            key: tuple[Any, ...] = (gname_l,)
             for dim in variable.dims:
                 if dim in chunk_index:
                     key += (chunk_index[dim],)
diff --git a/xarray/core/rolling.py b/xarray/core/rolling.py
index 0cac9f2b129..0bc07c1aaeb 100644
--- a/xarray/core/rolling.py
+++ b/xarray/core/rolling.py
@@ -175,7 +175,7 @@ def _mapping_to_list(
             return [arg]
         else:
             raise ValueError(
-                "Mapping argument is necessary for {}d-rolling.".format(len(self.dim))
+                f"Mapping argument is necessary for {len(self.dim)}d-rolling."
             )
 
     def _get_keep_attrs(self, keep_attrs):
@@ -803,7 +803,7 @@ def __repr__(self):
         """provide a nice str repr of our coarsen object"""
 
         attrs = [
-            "{k}->{v}".format(k=k, v=getattr(self, k))
+            f"{k}->{getattr(self, k)}"
             for k in self._attributes
             if getattr(self, k, None) is not None
         ]
diff --git a/xarray/core/rolling_exp.py b/xarray/core/rolling_exp.py
index 7a8b0be9bd4..9fd097cd4dc 100644
--- a/xarray/core/rolling_exp.py
+++ b/xarray/core/rolling_exp.py
@@ -1,6 +1,6 @@
 from __future__ import annotations
 
-from typing import Any, Generic, Mapping, Union
+from typing import Any, Generic, Mapping
 
 import numpy as np
 from packaging.version import Version
@@ -101,7 +101,7 @@ class RollingExp(Generic[T_Xarray]):
     def __init__(
         self,
         obj: T_Xarray,
-        windows: Mapping[Any, Union[int, float]],
+        windows: Mapping[Any, int | float],
         window_type: str = "span",
     ):
         self.obj: T_Xarray = obj
diff --git a/xarray/core/utils.py b/xarray/core/utils.py
index 68615eef74f..a9ea0acb267 100644
--- a/xarray/core/utils.py
+++ b/xarray/core/utils.py
@@ -468,7 +468,7 @@ def __contains__(self, key: object) -> bool:
         return key in self.mapping
 
     def __repr__(self) -> str:
-        return "{}({!r})".format(type(self).__name__, self.mapping)
+        return f"{type(self).__name__}({self.mapping!r})"
 
 
 def FrozenDict(*args, **kwargs) -> Frozen:
@@ -544,7 +544,7 @@ def update(self, values: Iterable[T]) -> None:
             self._d[v] = None
 
     def __repr__(self) -> str:
-        return "{}({!r})".format(type(self).__name__, list(self))
+        return f"{type(self).__name__}({list(self)!r})"
 
 
 class NdimSizeLenMixin:
@@ -592,7 +592,7 @@ def __getitem__(self: Any, key):
         return self.array[key]
 
     def __repr__(self: Any) -> str:
-        return "{}(array={!r})".format(type(self).__name__, self.array)
+        return f"{type(self).__name__}(array={self.array!r})"
 
 
 class ReprObject:
diff --git a/xarray/core/variable.py b/xarray/core/variable.py
index 58aeceed3b1..08af2e694df 100644
--- a/xarray/core/variable.py
+++ b/xarray/core/variable.py
@@ -6,18 +6,7 @@
 import warnings
 from collections import defaultdict
 from datetime import timedelta
-from typing import (
-    TYPE_CHECKING,
-    Any,
-    Dict,
-    Hashable,
-    List,
-    Mapping,
-    Optional,
-    Sequence,
-    Tuple,
-    Union,
-)
+from typing import TYPE_CHECKING, Any, Hashable, Mapping, Sequence
 
 import numpy as np
 import pandas as pd
@@ -80,7 +69,7 @@ class MissingDimensionsError(ValueError):
     # TODO: move this to an xarray.exceptions module?
 
 
-def as_variable(obj, name=None) -> Union[Variable, IndexVariable]:
+def as_variable(obj, name=None) -> Variable | IndexVariable:
     """Convert an object into a Variable.
 
     Parameters
@@ -136,7 +125,7 @@ def as_variable(obj, name=None) -> Union[Variable, IndexVariable]:
     elif isinstance(obj, (pd.Index, IndexVariable)) and obj.name is not None:
         obj = Variable(obj.name, obj)
     elif isinstance(obj, (set, dict)):
-        raise TypeError("variable {!r} has invalid type {!r}".format(name, type(obj)))
+        raise TypeError(f"variable {name!r} has invalid type {type(obj)!r}")
     elif name is not None:
         data = as_compatible_data(obj)
         if data.ndim != 1:
@@ -865,7 +854,7 @@ def __setitem__(self, key, value):
         indexable[index_tuple] = value
 
     @property
-    def attrs(self) -> Dict[Hashable, Any]:
+    def attrs(self) -> dict[Hashable, Any]:
         """Dictionary of local attributes on this variable."""
         if self._attrs is None:
             self._attrs = {}
@@ -999,7 +988,7 @@ def __deepcopy__(self, memo=None):
     __hash__ = None  # type: ignore[assignment]
 
     @property
-    def chunks(self) -> Optional[Tuple[Tuple[int, ...], ...]]:
+    def chunks(self) -> tuple[tuple[int, ...], ...] | None:
         """
         Tuple of block lengths for this dataarray's data, in order of dimensions, or None if
         the underlying data is not a dask array.
@@ -1013,7 +1002,7 @@ def chunks(self) -> Optional[Tuple[Tuple[int, ...], ...]]:
         return getattr(self._data, "chunks", None)
 
     @property
-    def chunksizes(self) -> Mapping[Any, Tuple[int, ...]]:
+    def chunksizes(self) -> Mapping[Any, tuple[int, ...]]:
         """
         Mapping from dimension names to block lengths for this variable's data, or None if
         the underlying data is not a dask array.
@@ -1282,7 +1271,7 @@ def shift(self, shifts=None, fill_value=dtypes.NA, **shifts_kwargs):
 
     def _pad_options_dim_to_index(
         self,
-        pad_option: Mapping[Any, Union[int, Tuple[int, int]]],
+        pad_option: Mapping[Any, int | tuple[int, int]],
         fill_with_shape=False,
     ):
         if fill_with_shape:
@@ -1294,14 +1283,16 @@ def _pad_options_dim_to_index(
 
     def pad(
         self,
-        pad_width: Mapping[Any, Union[int, Tuple[int, int]]] = None,
+        pad_width: Mapping[Any, int | tuple[int, int]] | None = None,
         mode: str = "constant",
-        stat_length: Union[int, Tuple[int, int], Mapping[Any, Tuple[int, int]]] = None,
-        constant_values: Union[
-            int, Tuple[int, int], Mapping[Any, Tuple[int, int]]
-        ] = None,
-        end_values: Union[int, Tuple[int, int], Mapping[Any, Tuple[int, int]]] = None,
-        reflect_type: str = None,
+        stat_length: int
+        | tuple[int, int]
+        | Mapping[Any, tuple[int, int]]
+        | None = None,
+        constant_values: (int | tuple[int, int] | Mapping[Any, tuple[int, int]])
+        | None = None,
+        end_values: int | tuple[int, int] | Mapping[Any, tuple[int, int]] | None = None,
+        reflect_type: str | None = None,
         **pad_width_kwargs: Any,
     ):
         """
@@ -1438,7 +1429,7 @@ def transpose(
         self,
         *dims,
         missing_dims: str = "raise",
-    ) -> "Variable":
+    ) -> Variable:
         """Return a new Variable object with transposed dimensions.
 
         Parameters
@@ -1483,7 +1474,7 @@ def transpose(
         return self._replace(dims=dims, data=data)
 
     @property
-    def T(self) -> "Variable":
+    def T(self) -> Variable:
         return self.transpose()
 
     def set_dims(self, dims, shape=None):
@@ -1535,7 +1526,7 @@ def set_dims(self, dims, shape=None):
         )
         return expanded_var.transpose(*dims)
 
-    def _stack_once(self, dims: List[Hashable], new_dim: Hashable):
+    def _stack_once(self, dims: list[Hashable], new_dim: Hashable):
         if not set(dims) <= set(self.dims):
             raise ValueError(f"invalid existing dimensions: {dims}")
 
@@ -1593,7 +1584,7 @@ def stack(self, dimensions=None, **dimensions_kwargs):
 
     def _unstack_once_full(
         self, dims: Mapping[Any, int], old_dim: Hashable
-    ) -> "Variable":
+    ) -> Variable:
         """
         Unstacks the variable without needing an index.
 
@@ -1634,7 +1625,7 @@ def _unstack_once(
         dim: Hashable,
         fill_value=dtypes.NA,
         sparse: bool = False,
-    ) -> "Variable":
+    ) -> Variable:
         """
         Unstacks this variable given an index to unstack and the name of the
         dimension to which the index refers.
@@ -2109,9 +2100,7 @@ def rank(self, dim, pct=False):
                 "prior to calling this method."
             )
         elif not isinstance(data, np.ndarray):
-            raise TypeError(
-                "rank is not implemented for {} objects.".format(type(data))
-            )
+            raise TypeError(f"rank is not implemented for {type(data)} objects.")
 
         axis = self.get_axis_num(dim)
         func = bn.nanrankdata if self.dtype.kind == "f" else bn.rankdata
@@ -2455,11 +2444,11 @@ def _to_numeric(self, offset=None, datetime_unit=None, dtype=float):
     def _unravel_argminmax(
         self,
         argminmax: str,
-        dim: Union[Hashable, Sequence[Hashable], None],
-        axis: Union[int, None],
-        keep_attrs: Optional[bool],
-        skipna: Optional[bool],
-    ) -> Union["Variable", Dict[Hashable, "Variable"]]:
+        dim: Hashable | Sequence[Hashable] | None,
+        axis: int | None,
+        keep_attrs: bool | None,
+        skipna: bool | None,
+    ) -> Variable | dict[Hashable, Variable]:
         """Apply argmin or argmax over one or more dimensions, returning the result as a
         dict of DataArray that can be passed directly to isel.
         """
@@ -2524,11 +2513,11 @@ def _unravel_argminmax(
 
     def argmin(
         self,
-        dim: Union[Hashable, Sequence[Hashable]] = None,
+        dim: Hashable | Sequence[Hashable] = None,
         axis: int = None,
         keep_attrs: bool = None,
         skipna: bool = None,
-    ) -> Union["Variable", Dict[Hashable, "Variable"]]:
+    ) -> Variable | dict[Hashable, Variable]:
         """Index or indices of the minimum of the Variable over one or more dimensions.
         If a sequence is passed to 'dim', then result returned as dict of Variables,
         which can be passed directly to isel(). If a single str is passed to 'dim' then
@@ -2569,11 +2558,11 @@ def argmin(
 
     def argmax(
         self,
-        dim: Union[Hashable, Sequence[Hashable]] = None,
+        dim: Hashable | Sequence[Hashable] = None,
         axis: int = None,
         keep_attrs: bool = None,
         skipna: bool = None,
-    ) -> Union["Variable", Dict[Hashable, "Variable"]]:
+    ) -> Variable | dict[Hashable, Variable]:
         """Index or indices of the maximum of the Variable over one or more dimensions.
         If a sequence is passed to 'dim', then result returned as dict of Variables,
         which can be passed directly to isel(). If a single str is passed to 'dim' then
@@ -2801,7 +2790,7 @@ def to_index(self):
             # set default names for multi-index unnamed levels so that
             # we can safely rename dimension / coordinate later
             valid_level_names = [
-                name or "{}_level_{}".format(self.dims[0], i)
+                name or f"{self.dims[0]}_level_{i}"
                 for i, name in enumerate(index.names)
             ]
             index = index.set_names(valid_level_names)
diff --git a/xarray/plot/dataset_plot.py b/xarray/plot/dataset_plot.py
index c1aedd570bc..527ae121dcf 100644
--- a/xarray/plot/dataset_plot.py
+++ b/xarray/plot/dataset_plot.py
@@ -591,9 +591,9 @@ def streamplot(ds, x, y, ax, u, v, **kwargs):
     if len(ds[y].dims) == 1:
         ydim = ds[y].dims[0]
     if xdim is not None and ydim is None:
-        ydim = set(ds[y].dims) - set([xdim])
+        ydim = set(ds[y].dims) - {xdim}
     if ydim is not None and xdim is None:
-        xdim = set(ds[x].dims) - set([ydim])
+        xdim = set(ds[x].dims) - {ydim}
 
     x, y, u, v = broadcast(ds[x], ds[y], ds[u], ds[v])
 
diff --git a/xarray/testing.py b/xarray/testing.py
index 40ca12852b9..4369b828daf 100644
--- a/xarray/testing.py
+++ b/xarray/testing.py
@@ -82,7 +82,7 @@ def assert_equal(a, b):
     elif isinstance(a, Dataset):
         assert a.equals(b), formatting.diff_dataset_repr(a, b, "equals")
     else:
-        raise TypeError("{} not supported by assertion comparison".format(type(a)))
+        raise TypeError(f"{type(a)} not supported by assertion comparison")
 
 
 @ensure_warnings
@@ -113,7 +113,7 @@ def assert_identical(a, b):
     elif isinstance(a, (Dataset, Variable)):
         assert a.identical(b), formatting.diff_dataset_repr(a, b, "identical")
     else:
-        raise TypeError("{} not supported by assertion comparison".format(type(a)))
+        raise TypeError(f"{type(a)} not supported by assertion comparison")
 
 
 @ensure_warnings
@@ -170,7 +170,7 @@ def compat_variable(a, b):
         )
         assert allclose, formatting.diff_dataset_repr(a, b, compat=equiv)
     else:
-        raise TypeError("{} not supported by assertion comparison".format(type(a)))
+        raise TypeError(f"{type(a)} not supported by assertion comparison")
 
 
 def _format_message(x, y, err_msg, verbose):
diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py
index bffac52e979..356335f47e6 100644
--- a/xarray/tests/test_backends.py
+++ b/xarray/tests/test_backends.py
@@ -743,9 +743,7 @@ def find_and_validate_array(obj):
                     elif isinstance(obj.array, pd.Index):
                         assert isinstance(obj, indexing.PandasIndexingAdapter)
                     else:
-                        raise TypeError(
-                            "{} is wrapped by {}".format(type(obj.array), type(obj))
-                        )
+                        raise TypeError(f"{type(obj.array)} is wrapped by {type(obj)}")
 
         for k, v in ds.variables.items():
             find_and_validate_array(v._data)
@@ -1195,7 +1193,7 @@ def test_multiindex_not_implemented(self):
 @contextlib.contextmanager
 def create_tmp_file(suffix=".nc", allow_cleanup_failure=False):
     temp_dir = tempfile.mkdtemp()
-    path = os.path.join(temp_dir, "temp-{}{}".format(next(_counter), suffix))
+    path = os.path.join(temp_dir, f"temp-{next(_counter)}{suffix}")
     try:
         yield path
     finally:
@@ -4222,8 +4220,8 @@ def create_tmp_geotiff(
             transform = from_origin(*transform_args)
         if additional_attrs is None:
             additional_attrs = {
-                "descriptions": tuple("d{}".format(n + 1) for n in range(nz)),
-                "units": tuple("u{}".format(n + 1) for n in range(nz)),
+                "descriptions": tuple(f"d{n + 1}" for n in range(nz)),
+                "units": tuple(f"u{n + 1}" for n in range(nz)),
             }
         with rasterio.open(
             tmp_file,
diff --git a/xarray/tests/test_concat.py b/xarray/tests/test_concat.py
index a8d06188844..8a37df62261 100644
--- a/xarray/tests/test_concat.py
+++ b/xarray/tests/test_concat.py
@@ -7,7 +7,6 @@
 
 from xarray import DataArray, Dataset, Variable, concat
 from xarray.core import dtypes, merge
-from xarray.core.concat import compat_options, concat_options
 
 from . import (
     InaccessibleArray,
diff --git a/xarray/tests/test_plugins.py b/xarray/tests/test_plugins.py
index 4d1eee6363d..472192d3a9e 100644
--- a/xarray/tests/test_plugins.py
+++ b/xarray/tests/test_plugins.py
@@ -91,7 +91,7 @@ def test_backends_dict_from_pkg() -> None:
     entrypoints = [EntryPoint(name, value, group) for name, value, group in specs]
     engines = plugins.backends_dict_from_pkg(entrypoints)
     assert len(engines) == 2
-    assert engines.keys() == set(("engine1", "engine2"))
+    assert engines.keys() == {"engine1", "engine2"}
 
 
 def test_set_missing_parameters() -> None:
diff --git a/xarray/tests/test_variable.py b/xarray/tests/test_variable.py
index 3267af8b45b..3f69705e3f1 100644
--- a/xarray/tests/test_variable.py
+++ b/xarray/tests/test_variable.py
@@ -232,7 +232,7 @@ def __hash__(self):
                 return hash(self.item)
 
             def __repr__(self):
-                return "{}(item={!r})".format(type(self).__name__, self.item)
+                return f"{type(self).__name__}(item={self.item!r})"
 
         item = HashableItemWrapper((1, 2, 3))
         x = self.cls("x", [item])
diff --git a/xarray/ufuncs.py b/xarray/ufuncs.py
index 7f6eed55e9b..24907a158ef 100644
--- a/xarray/ufuncs.py
+++ b/xarray/ufuncs.py
@@ -53,9 +53,7 @@ def __call__(self, *args, **kwargs):
         new_args = args
         res = _UNDEFINED
         if len(args) > 2 or len(args) == 0:
-            raise TypeError(
-                "cannot handle {} arguments for {!r}".format(len(args), self._name)
-            )
+            raise TypeError(f"cannot handle {len(args)} arguments for {self._name!r}")
         elif len(args) == 1:
             if isinstance(args[0], _xarray_types):
                 res = args[0]._unary_op(self)