diff --git a/python/pyarrow/interchange/column.py b/python/pyarrow/interchange/column.py index a9b89586165fa..eaf7834d5b563 100644 --- a/python/pyarrow/interchange/column.py +++ b/python/pyarrow/interchange/column.py @@ -312,7 +312,9 @@ def _dtype_from_arrowdtype( return kind, bit_width, f_string, Endianness.NATIVE elif pa.types.is_dictionary(dtype): kind = DtypeKind.CATEGORICAL - f_string = "L" + arr = self._col + indices_dtype = arr.indices.type + _, f_string = _PYARROW_KINDS.get(indices_dtype) return kind, bit_width, f_string, Endianness.NATIVE else: kind, f_string = _PYARROW_KINDS.get(dtype, (None, None)) diff --git a/python/pyarrow/tests/interchange/test_interchange_spec.py b/python/pyarrow/tests/interchange/test_interchange_spec.py index 7b2b8eb7208de..826089652bca6 100644 --- a/python/pyarrow/tests/interchange/test_interchange_spec.py +++ b/python/pyarrow/tests/interchange/test_interchange_spec.py @@ -266,3 +266,23 @@ def test_buffer(int, use_batch): for idx, truth in enumerate(arr): val = ctype.from_address(dataBuf.ptr + idx * (bitwidth // 8)).value assert val == truth, f"Buffer at index {idx} mismatch" + + +@pytest.mark.parametrize( + "indices_type, bitwidth, f_string", [ + (pa.int8(), 8, "c"), + (pa.int16(), 16, "s"), + (pa.int32(), 32, "i"), + (pa.int64(), 64, "l") + ] +) +def test_categorical_dtype(indices_type, bitwidth, f_string): + type = pa.dictionary(indices_type, pa.string()) + arr = pa.array(["a", "b", None, "d"], type) + table = pa.table({'a': arr}) + + df = table.__dataframe__() + col = df.get_column(0) + assert col.dtype[0] == 23 # + assert col.dtype[1] == bitwidth + assert col.dtype[2] == f_string