pandas-dev · mroeschke · Aug 1, 2025 · Aug 1, 2025
diff --git a/pandas/_libs/index.pyx b/pandas/_libs/index.pyx
@@ -803,7 +803,7 @@ cdef class BaseMultiIndexCodesEngine:
         int_keys : 1-dimensional array of dtype uint64 or object
             Integers representing one combination each
         """
-        level_codes = list(target._recode_for_new_levels(self.levels))
+        level_codes = list(target._recode_for_new_levels(self.levels, copy=True))
         for i, codes in enumerate(level_codes):
             if self.levels[i].hasnans:
                 na_index = self.levels[i].isna().nonzero()[0][0]

diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py
@@ -670,13 +670,15 @@ def _from_inferred_categories(
         if known_categories:
             # Recode from observation order to dtype.categories order.
             categories = dtype.categories
-            codes = recode_for_categories(inferred_codes, cats, categories)
+            codes = recode_for_categories(inferred_codes, cats, categories, copy=False)
         elif not cats.is_monotonic_increasing:
             # Sort categories and recode for unknown categories.
             unsorted = cats.copy()
             categories = cats.sort_values()
 
-            codes = recode_for_categories(inferred_codes, unsorted, categories)
+            codes = recode_for_categories(
+                inferred_codes, unsorted, categories, copy=False
+            )
             dtype = CategoricalDtype(categories, ordered=False)
         else:
             dtype = CategoricalDtype(cats, ordered=False)
@@ -945,7 +947,7 @@ def _set_categories(self, categories, fastpath: bool = False) -> None:
 
         super().__init__(self._ndarray, new_dtype)
 
-    def _set_dtype(self, dtype: CategoricalDtype, copy: bool = True) -> Self:
+    def _set_dtype(self, dtype: CategoricalDtype, *, copy: bool) -> Self:
         """
         Internal method for directly updating the CategoricalDtype
 
@@ -959,7 +961,7 @@ def _set_dtype(self, dtype: CategoricalDtype, copy: bool = True) -> Self:
         a (valid) instance of `CategoricalDtype`.
         """
         codes = recode_for_categories(
-            self.codes, self.categories, dtype.categories, copy
+            self.codes, self.categories, dtype.categories, copy=copy
         )
         return type(self)._simple_new(codes, dtype=dtype)
 
@@ -1154,7 +1156,7 @@ def set_categories(
             codes = cat._codes
         else:
             codes = recode_for_categories(
-                cat.codes, cat.categories, new_dtype.categories
+                cat.codes, cat.categories, new_dtype.categories, copy=False
             )
         NDArrayBacked.__init__(cat, codes, new_dtype)
         return cat
@@ -3006,7 +3008,7 @@ def _get_codes_for_values(
 
 
 def recode_for_categories(
-    codes: np.ndarray, old_categories, new_categories, copy: bool = True
+    codes: np.ndarray, old_categories, new_categories, *, copy: bool
 ) -> np.ndarray:
     """
     Convert a set of codes for to a new set of categories
@@ -3027,7 +3029,7 @@ def recode_for_categories(
     >>> old_cat = pd.Index(["b", "a", "c"])
     >>> new_cat = pd.Index(["a", "b"])
     >>> codes = np.array([0, 1, 1, 2])
-    >>> recode_for_categories(codes, old_cat, new_cat)
+    >>> recode_for_categories(codes, old_cat, new_cat, copy=True)
     array([ 1,  0,  0, -1], dtype=int8)
     """
     if len(old_categories) == 0:

diff --git a/pandas/core/dtypes/concat.py b/pandas/core/dtypes/concat.py
@@ -318,7 +318,8 @@ def _maybe_unwrap(x):
             categories = categories.sort_values()
 
         new_codes = [
-            recode_for_categories(c.codes, c.categories, categories) for c in to_union
+            recode_for_categories(c.codes, c.categories, categories, copy=False)
+            for c in to_union
         ]
         new_codes = np.concatenate(new_codes)
     else:

@@ -53,7 +53,7 @@ def recode_for_groupby(c: Categorical, sort: bool, observed: bool) -> Categorica
 
         # we recode according to the uniques
         categories = c.categories.take(take_codes)
-        codes = recode_for_categories(c.codes, c.categories, categories)
+        codes = recode_for_categories(c.codes, c.categories, categories, copy=False)
 
         # return a new categorical that maps our new codes
         # and categories

diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py
@@ -2675,7 +2675,7 @@ def _reorder_ilevels(self, order) -> MultiIndex:
         )
 
     def _recode_for_new_levels(
-        self, new_levels, copy: bool = True
+        self, new_levels, *, copy: bool
     ) -> Generator[np.ndarray]:
         if len(new_levels) > self.nlevels:
             raise AssertionError(

diff --git a/pandas/tests/arrays/categorical/test_api.py b/pandas/tests/arrays/categorical/test_api.py
@@ -480,7 +480,7 @@ def test_recode_to_categories(self, codes, old, new, expected):
         expected = np.asanyarray(expected, dtype=np.int8)
         old = Index(old)
         new = Index(new)
-        result = recode_for_categories(codes, old, new)
+        result = recode_for_categories(codes, old, new, copy=True)
         tm.assert_numpy_array_equal(result, expected)
 
     def test_recode_to_categories_large(self):
@@ -489,5 +489,5 @@ def test_recode_to_categories_large(self):
         old = Index(codes)
         expected = np.arange(N - 1, -1, -1, dtype=np.int16)
         new = Index(expected)
-        result = recode_for_categories(codes, old, new)
+        result = recode_for_categories(codes, old, new, copy=True)
         tm.assert_numpy_array_equal(result, expected)
diff --git a/pandas/tests/arrays/categorical/test_dtypes.py b/pandas/tests/arrays/categorical/test_dtypes.py
@@ -49,12 +49,12 @@ def test_categories_match_up_to_permutation(self):
 
     def test_set_dtype_same(self):
         c = Categorical(["a", "b", "c"])
-        result = c._set_dtype(CategoricalDtype(["a", "b", "c"]))
+        result = c._set_dtype(CategoricalDtype(["a", "b", "c"]), copy=True)
         tm.assert_categorical_equal(result, c)
 
     def test_set_dtype_new_categories(self):
         c = Categorical(["a", "b", "c"])
-        result = c._set_dtype(CategoricalDtype(list("abcd")))
+        result = c._set_dtype(CategoricalDtype(list("abcd")), copy=True)
         tm.assert_numpy_array_equal(result.codes, c.codes)
         tm.assert_index_equal(result.dtype.categories, Index(list("abcd")))
 
@@ -86,12 +86,12 @@ def test_set_dtype_new_categories(self):
     def test_set_dtype_many(self, values, categories, new_categories, ordered):
         c = Categorical(values, categories)
         expected = Categorical(values, new_categories, ordered)
-        result = c._set_dtype(expected.dtype)
+        result = c._set_dtype(expected.dtype, copy=True)
         tm.assert_categorical_equal(result, expected)
 
     def test_set_dtype_no_overlap(self):
         c = Categorical(["a", "b", "c"], ["d", "e"])
-        result = c._set_dtype(CategoricalDtype(["a", "b"]))
+        result = c._set_dtype(CategoricalDtype(["a", "b"]), copy=True)
         expected = Categorical([None, None, None], categories=["a", "b"])
         tm.assert_categorical_equal(result, expected)
 

diff --git a/pandas/tests/arrays/categorical/test_missing.py b/pandas/tests/arrays/categorical/test_missing.py
@@ -52,7 +52,7 @@ def test_nan_handling(self):
 
     def test_set_dtype_nans(self):
         c = Categorical(["a", "b", np.nan])
-        result = c._set_dtype(CategoricalDtype(["a", "c"]))
+        result = c._set_dtype(CategoricalDtype(["a", "c"]), copy=True)
         tm.assert_numpy_array_equal(result.codes, np.array([0, -1, -1], dtype="int8"))
 
     def test_set_item_nan(self):