diff --git a/crates/polars-core/src/chunked_array/array/mod.rs b/crates/polars-core/src/chunked_array/array/mod.rs index c82c73949fd9..291e51ec1532 100644 --- a/crates/polars-core/src/chunked_array/array/mod.rs +++ b/crates/polars-core/src/chunked_array/array/mod.rs @@ -29,6 +29,51 @@ impl ArrayChunked { fld.coerce(DataType::Array(Box::new(inner_dtype), width)) } + /// Convert a non-logical [`ArrayChunked`] back into a logical [`ArrayChunked`] without casting. + /// + /// # Safety + /// + /// This can lead to invalid memory access in downstream code. + pub unsafe fn from_physical_unchecked(&self, to_inner_dtype: DataType) -> PolarsResult { + debug_assert!(!self.inner_dtype().is_logical()); + + let chunks = self + .downcast_iter() + .map(|chunk| chunk.values()) + .cloned() + .collect(); + + let inner = unsafe { + Series::from_chunks_and_dtype_unchecked(PlSmallStr::EMPTY, chunks, self.inner_dtype()) + }; + let inner = unsafe { inner.from_physical_unchecked(&to_inner_dtype) }?; + + let chunks: Vec<_> = self + .downcast_iter() + .zip(inner.into_chunks()) + .map(|(chunk, values)| { + FixedSizeListArray::new( + ArrowDataType::FixedSizeList( + Box::new(ArrowField::new( + PlSmallStr::from_static("item"), + values.dtype().clone(), + true, + )), + self.width(), + ), + chunk.len(), + values, + chunk.validity().cloned(), + ) + .to_boxed() + }) + .collect(); + + let name = self.name().clone(); + let dtype = DataType::Array(Box::new(to_inner_dtype), self.width()); + Ok(unsafe { Self::from_chunks_and_dtype_unchecked(name, chunks, dtype) }) + } + /// Get the inner values as `Series` pub fn get_inner(&self) -> Series { let chunks: Vec<_> = self.downcast_iter().map(|c| c.values().clone()).collect(); diff --git a/crates/polars-core/src/chunked_array/list/mod.rs b/crates/polars-core/src/chunked_array/list/mod.rs index 7aff61172f04..8e3a67b348b7 100644 --- a/crates/polars-core/src/chunked_array/list/mod.rs +++ b/crates/polars-core/src/chunked_array/list/mod.rs @@ -36,6 +36,55 @@ impl ListChunked { fld.coerce(DataType::List(Box::new(inner_dtype))) } + /// Convert a non-logical [`ListChunked`] back into a logical [`ListChunked`] without casting. + /// + /// # Safety + /// + /// This can lead to invalid memory access in downstream code. + pub unsafe fn from_physical_unchecked( + &self, + to_inner_dtype: DataType, + ) -> PolarsResult { + debug_assert!(!self.inner_dtype().is_logical()); + + let inner_chunks = self + .downcast_iter() + .map(|chunk| chunk.values()) + .cloned() + .collect(); + + let inner = unsafe { + Series::from_chunks_and_dtype_unchecked( + PlSmallStr::EMPTY, + inner_chunks, + self.inner_dtype(), + ) + }; + let inner = unsafe { inner.from_physical_unchecked(&to_inner_dtype) }?; + + let chunks: Vec<_> = self + .downcast_iter() + .zip(inner.into_chunks()) + .map(|(chunk, values)| { + LargeListArray::new( + ArrowDataType::LargeList(Box::new(ArrowField::new( + PlSmallStr::from_static("item"), + values.dtype().clone(), + true, + ))), + chunk.offsets().clone(), + values, + chunk.validity().cloned(), + ) + .to_boxed() + }) + .collect(); + + let name = self.name().clone(); + let dtype = DataType::List(Box::new(to_inner_dtype)); + Ok(unsafe { ListChunked::from_chunks_and_dtype_unchecked(name, chunks, dtype) }) + } + /// Get the inner values as [`Series`], ignoring the list offsets. pub fn get_inner(&self) -> Series { let chunks: Vec<_> = self.downcast_iter().map(|c| c.values().clone()).collect(); diff --git a/crates/polars-core/src/chunked_array/struct_/mod.rs b/crates/polars-core/src/chunked_array/struct_/mod.rs index 7e45b6ad11ff..2dc07984d8af 100644 --- a/crates/polars-core/src/chunked_array/struct_/mod.rs +++ b/crates/polars-core/src/chunked_array/struct_/mod.rs @@ -139,6 +139,34 @@ impl StructChunked { constructor(name, length, new_fields.iter()) } + /// Convert a non-logical [`StructChunked`] back into a logical [`StructChunked`] without casting. + /// + /// # Safety + /// + /// This can lead to invalid memory access in downstream code. + pub unsafe fn from_physical_unchecked( + &self, + to_fields: &[Field], + ) -> PolarsResult { + if cfg!(debug_assertions) { + for f in self.struct_fields() { + assert!(!f.dtype().is_logical()); + } + } + + let length = self.len(); + let fields = self + .fields_as_series() + .iter() + .zip(to_fields) + .map(|(f, to)| unsafe { f.from_physical_unchecked(to.dtype()) }) + .collect::>>()?; + + let mut out = StructChunked::from_series(self.name().clone(), length, fields.iter())?; + out.zip_outer_validity(self); + Ok(out) + } + pub fn struct_fields(&self) -> &[Field] { let DataType::Struct(fields) = self.dtype() else { unreachable!() diff --git a/crates/polars-core/src/series/mod.rs b/crates/polars-core/src/series/mod.rs index 753138166a5f..6767906dc4d2 100644 --- a/crates/polars-core/src/series/mod.rs +++ b/crates/polars-core/src/series/mod.rs @@ -547,34 +547,25 @@ impl Series { }, (D::Int64, D::Time) => feature_gated!("dtype-time", Ok(self.clone().into_time())), - (D::List(_), D::List(to)) => Ok(self - .list() - .unwrap() - .apply_to_inner(&|inner| unsafe { inner.from_physical_unchecked(to) })? - .into_series()), + (D::List(_), D::List(to)) => unsafe { + self.list() + .unwrap() + .from_physical_unchecked(to.as_ref().clone()) + .map(|ca| ca.into_series()) + }, #[cfg(feature = "dtype-array")] - (D::Array(_, lw), D::Array(to, rw)) if lw == rw => Ok(self - .array() - .unwrap() - .apply_to_inner(&|inner| unsafe { inner.from_physical_unchecked(to) })? - .into_series()), + (D::Array(_, lw), D::Array(to, rw)) if lw == rw => unsafe { + self.array() + .unwrap() + .from_physical_unchecked(to.as_ref().clone()) + .map(|ca| ca.into_series()) + }, #[cfg(feature = "dtype-struct")] - (D::Struct(_), D::Struct(to)) => { - let slf = self.struct_().unwrap(); - - let length = slf.len(); - - let fields = slf - .fields_as_series() - .iter() - .zip(to) - .map(|(f, to)| unsafe { f.from_physical_unchecked(to.dtype()) }) - .collect::>>()?; - - let mut out = - StructChunked::from_series(slf.name().clone(), length, fields.iter())?; - out.zip_outer_validity(slf); - Ok(out.into_series()) + (D::Struct(_), D::Struct(to)) => unsafe { + self.struct_() + .unwrap() + .from_physical_unchecked(to.as_slice()) + .map(|ca| ca.into_series()) }, _ => panic!("invalid from_physical({dtype:?}) for {:?}", self.dtype()), @@ -1203,6 +1194,26 @@ mod test { let _ = ca.into_series(); } + #[test] + #[cfg(feature = "dtype-date")] + fn roundtrip_list_logical_20311() { + let list = ListChunked::from_chunk_iter( + PlSmallStr::from_static("a"), + [ListArray::new( + ArrowDataType::LargeList(Box::new(ArrowField::new( + PlSmallStr::from_static("item"), + ArrowDataType::Int32, + true, + ))), + unsafe { Offsets::new_unchecked(vec![0, 1]) }.into(), + PrimitiveArray::new(ArrowDataType::Int32, vec![1i32].into(), None).to_boxed(), + None, + )], + ); + let list = unsafe { list.from_physical_unchecked(DataType::Date) }.unwrap(); + assert_eq!(list.dtype(), &DataType::List(Box::new(DataType::Date))); + } + #[test] #[cfg(feature = "dtype-struct")] fn new_series_from_empty_structs() {