Skip to content

REF: make copy keyword in recode_for_categories keyword only #62019

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 2 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion pandas/_libs/index.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -803,7 +803,7 @@ cdef class BaseMultiIndexCodesEngine:
int_keys : 1-dimensional array of dtype uint64 or object
Integers representing one combination each
"""
level_codes = list(target._recode_for_new_levels(self.levels))
level_codes = list(target._recode_for_new_levels(self.levels, copy=True))
for i, codes in enumerate(level_codes):
if self.levels[i].hasnans:
na_index = self.levels[i].isna().nonzero()[0][0]
Expand Down
16 changes: 9 additions & 7 deletions pandas/core/arrays/categorical.py
Original file line number Diff line number Diff line change
Expand Up @@ -670,13 +670,15 @@ def _from_inferred_categories(
if known_categories:
# Recode from observation order to dtype.categories order.
categories = dtype.categories
codes = recode_for_categories(inferred_codes, cats, categories)
codes = recode_for_categories(inferred_codes, cats, categories, copy=False)
elif not cats.is_monotonic_increasing:
# Sort categories and recode for unknown categories.
unsorted = cats.copy()
categories = cats.sort_values()

codes = recode_for_categories(inferred_codes, unsorted, categories)
codes = recode_for_categories(
inferred_codes, unsorted, categories, copy=False
)
dtype = CategoricalDtype(categories, ordered=False)
else:
dtype = CategoricalDtype(cats, ordered=False)
Expand Down Expand Up @@ -945,7 +947,7 @@ def _set_categories(self, categories, fastpath: bool = False) -> None:

super().__init__(self._ndarray, new_dtype)

def _set_dtype(self, dtype: CategoricalDtype, copy: bool = True) -> Self:
def _set_dtype(self, dtype: CategoricalDtype, *, copy: bool) -> Self:
"""
Internal method for directly updating the CategoricalDtype

Expand All @@ -959,7 +961,7 @@ def _set_dtype(self, dtype: CategoricalDtype, copy: bool = True) -> Self:
a (valid) instance of `CategoricalDtype`.
"""
codes = recode_for_categories(
self.codes, self.categories, dtype.categories, copy
self.codes, self.categories, dtype.categories, copy=copy
)
return type(self)._simple_new(codes, dtype=dtype)

Expand Down Expand Up @@ -1154,7 +1156,7 @@ def set_categories(
codes = cat._codes
else:
codes = recode_for_categories(
cat.codes, cat.categories, new_dtype.categories
cat.codes, cat.categories, new_dtype.categories, copy=False
)
NDArrayBacked.__init__(cat, codes, new_dtype)
return cat
Expand Down Expand Up @@ -3006,7 +3008,7 @@ def _get_codes_for_values(


def recode_for_categories(
codes: np.ndarray, old_categories, new_categories, copy: bool = True
codes: np.ndarray, old_categories, new_categories, *, copy: bool
) -> np.ndarray:
"""
Convert a set of codes for to a new set of categories
Expand All @@ -3027,7 +3029,7 @@ def recode_for_categories(
>>> old_cat = pd.Index(["b", "a", "c"])
>>> new_cat = pd.Index(["a", "b"])
>>> codes = np.array([0, 1, 1, 2])
>>> recode_for_categories(codes, old_cat, new_cat)
>>> recode_for_categories(codes, old_cat, new_cat, copy=True)
array([ 1, 0, 0, -1], dtype=int8)
"""
if len(old_categories) == 0:
Expand Down
3 changes: 2 additions & 1 deletion pandas/core/dtypes/concat.py
Original file line number Diff line number Diff line change
Expand Up @@ -318,7 +318,8 @@ def _maybe_unwrap(x):
categories = categories.sort_values()

new_codes = [
recode_for_categories(c.codes, c.categories, categories) for c in to_union
recode_for_categories(c.codes, c.categories, categories, copy=False)
for c in to_union
]
new_codes = np.concatenate(new_codes)
else:
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/groupby/categorical.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ def recode_for_groupby(c: Categorical, sort: bool, observed: bool) -> Categorica

# we recode according to the uniques
categories = c.categories.take(take_codes)
codes = recode_for_categories(c.codes, c.categories, categories)
codes = recode_for_categories(c.codes, c.categories, categories, copy=False)

# return a new categorical that maps our new codes
# and categories
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/indexes/multi.py
Original file line number Diff line number Diff line change
Expand Up @@ -2675,7 +2675,7 @@ def _reorder_ilevels(self, order) -> MultiIndex:
)

def _recode_for_new_levels(
self, new_levels, copy: bool = True
self, new_levels, *, copy: bool
) -> Generator[np.ndarray]:
if len(new_levels) > self.nlevels:
raise AssertionError(
Expand Down
4 changes: 2 additions & 2 deletions pandas/tests/arrays/categorical/test_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -480,7 +480,7 @@ def test_recode_to_categories(self, codes, old, new, expected):
expected = np.asanyarray(expected, dtype=np.int8)
old = Index(old)
new = Index(new)
result = recode_for_categories(codes, old, new)
result = recode_for_categories(codes, old, new, copy=True)
tm.assert_numpy_array_equal(result, expected)

def test_recode_to_categories_large(self):
Expand All @@ -489,5 +489,5 @@ def test_recode_to_categories_large(self):
old = Index(codes)
expected = np.arange(N - 1, -1, -1, dtype=np.int16)
new = Index(expected)
result = recode_for_categories(codes, old, new)
result = recode_for_categories(codes, old, new, copy=True)
tm.assert_numpy_array_equal(result, expected)
8 changes: 4 additions & 4 deletions pandas/tests/arrays/categorical/test_dtypes.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,12 +49,12 @@ def test_categories_match_up_to_permutation(self):

def test_set_dtype_same(self):
c = Categorical(["a", "b", "c"])
result = c._set_dtype(CategoricalDtype(["a", "b", "c"]))
result = c._set_dtype(CategoricalDtype(["a", "b", "c"]), copy=True)
tm.assert_categorical_equal(result, c)

def test_set_dtype_new_categories(self):
c = Categorical(["a", "b", "c"])
result = c._set_dtype(CategoricalDtype(list("abcd")))
result = c._set_dtype(CategoricalDtype(list("abcd")), copy=True)
tm.assert_numpy_array_equal(result.codes, c.codes)
tm.assert_index_equal(result.dtype.categories, Index(list("abcd")))

Expand Down Expand Up @@ -86,12 +86,12 @@ def test_set_dtype_new_categories(self):
def test_set_dtype_many(self, values, categories, new_categories, ordered):
c = Categorical(values, categories)
expected = Categorical(values, new_categories, ordered)
result = c._set_dtype(expected.dtype)
result = c._set_dtype(expected.dtype, copy=True)
tm.assert_categorical_equal(result, expected)

def test_set_dtype_no_overlap(self):
c = Categorical(["a", "b", "c"], ["d", "e"])
result = c._set_dtype(CategoricalDtype(["a", "b"]))
result = c._set_dtype(CategoricalDtype(["a", "b"]), copy=True)
expected = Categorical([None, None, None], categories=["a", "b"])
tm.assert_categorical_equal(result, expected)

Expand Down
2 changes: 1 addition & 1 deletion pandas/tests/arrays/categorical/test_missing.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ def test_nan_handling(self):

def test_set_dtype_nans(self):
c = Categorical(["a", "b", np.nan])
result = c._set_dtype(CategoricalDtype(["a", "c"]))
result = c._set_dtype(CategoricalDtype(["a", "c"]), copy=True)
tm.assert_numpy_array_equal(result.codes, np.array([0, -1, -1], dtype="int8"))

def test_set_item_nan(self):
Expand Down
Loading