diff --git a/pandas/core/reshape/encoding.py b/pandas/core/reshape/encoding.py index ad4a5db441b89..67fb075110f0d 100644 --- a/pandas/core/reshape/encoding.py +++ b/pandas/core/reshape/encoding.py @@ -390,7 +390,9 @@ def from_dummies( The default category is the implied category when a value has none of the listed categories specified with a one, i.e. if all dummies in a row are zero. Can be a single value for all variables or a dict directly mapping - the default categories to a prefix of a variable. + the default categories to a prefix of a variable. The default category + will be coerced to the dtype of ``data.columns`` if such coercion is + lossless, and will raise otherwise. Returns ------- diff --git a/pandas/tests/reshape/test_from_dummies.py b/pandas/tests/reshape/test_from_dummies.py index c7b7992a78232..dfb691c785404 100644 --- a/pandas/tests/reshape/test_from_dummies.py +++ b/pandas/tests/reshape/test_from_dummies.py @@ -333,9 +333,7 @@ def test_no_prefix_string_cats_default_category( ): dummies = DataFrame({"a": [1, 0, 0], "b": [0, 1, 0]}) result = from_dummies(dummies, default_category=default_category) - expected = DataFrame(expected) - if using_infer_string: - expected[""] = expected[""].astype("str") + expected = DataFrame(expected, dtype=dummies.columns.dtype) tm.assert_frame_equal(result, expected) @@ -449,3 +447,31 @@ def test_maintain_original_index(): result = from_dummies(df) expected = DataFrame({"": list("abca")}, index=list("abcd")) tm.assert_frame_equal(result, expected) + + +def test_int_columns_with_float_default(): + # https://github.com/pandas-dev/pandas/pull/60694 + df = DataFrame( + { + 3: [1, 0, 0], + 4: [0, 1, 0], + }, + ) + with pytest.raises(ValueError, match="Trying to coerce float values to integers"): + from_dummies(df, default_category=0.5) + + +def test_object_dtype_preserved(): + # https://github.com/pandas-dev/pandas/pull/60694 + # When the input has object dtype, the result should as + # well even when infer_string is True. + df = DataFrame( + { + "x": [1, 0, 0], + "y": [0, 1, 0], + }, + ) + df.columns = df.columns.astype("object") + result = from_dummies(df, default_category="z") + expected = DataFrame({"": ["x", "y", "z"]}, dtype="object") + tm.assert_frame_equal(result, expected)