From 204beba9202211fadbd8224085264f4c2f0a223f Mon Sep 17 00:00:00 2001 From: Stijn de Gooijer Date: Thu, 14 Mar 2024 23:39:34 +0100 Subject: [PATCH 1/7] Extract object logic to separate function --- crates/polars-core/src/series/any_value.rs | 85 ++++++++++++---------- 1 file changed, 48 insertions(+), 37 deletions(-) diff --git a/crates/polars-core/src/series/any_value.rs b/crates/polars-core/src/series/any_value.rs index b59eeac8a70a..b6f95adcbf0d 100644 --- a/crates/polars-core/src/series/any_value.rs +++ b/crates/polars-core/src/series/any_value.rs @@ -1,5 +1,7 @@ use std::fmt::Write; +#[cfg(feature = "object")] +use crate::chunked_array::object::registry::ObjectRegistry; use crate::prelude::*; use crate::utils::try_get_supertype; @@ -135,43 +137,6 @@ impl Series { } return StructChunked::new(name, &series_fields).map(|ca| ca.into_series()); }, - #[cfg(feature = "object")] - DataType::Object(_, registry) => { - match registry { - None => { - use crate::chunked_array::object::registry; - let converter = registry::get_object_converter(); - let mut builder = registry::get_object_builder(name, av.len()); - for av in av { - match av { - AnyValue::Object(val) => builder.append_value(val.as_any()), - AnyValue::Null => builder.append_null(), - _ => { - // This is needed because in python people can send mixed types. - // This only works if you set a global converter. - let any = converter(av.as_borrowed()); - builder.append_value(&*any) - }, - } - } - return Ok(builder.to_series()); - }, - Some(registry) => { - let mut builder = (*registry.builder_constructor)(name, av.len()); - for av in av { - match av { - AnyValue::Object(val) => builder.append_value(val.as_any()), - AnyValue::Null => builder.append_null(), - _ => { - polars_bail!(ComputeError: "expected object"); - }, - } - } - return Ok(builder.to_series()); - }, - } - }, - DataType::Null => Series::new_null(name, av.len()), #[cfg(feature = "dtype-categorical")] dt @ (DataType::Categorical(_, _) | DataType::Enum(_, _)) => { let ca = if let Some(single_av) = av.first() { @@ -191,6 +156,9 @@ impl Series { ca.cast(dt).unwrap() }, + #[cfg(feature = "object")] + DataType::Object(_, registry) => any_values_to_object(av, registry, name)?, + DataType::Null => Series::new_null(name, av.len()), dt => panic!("{dt:?} not supported"), }; s.rename(name); @@ -637,6 +605,49 @@ fn any_values_to_list( Ok(out) } +#[cfg(feature = "object")] +fn any_values_to_object( + values: &[AnyValue], + registry: &Option>, + name: &str, +) -> PolarsResult { + let mut builder = match registry { + None => { + use crate::chunked_array::object::registry; + let converter = registry::get_object_converter(); + let mut builder = registry::get_object_builder(name, values.len()); + for av in values { + match av { + AnyValue::Object(val) => builder.append_value(val.as_any()), + AnyValue::Null => builder.append_null(), + _ => { + // This is needed because in Python users can send mixed types. + // This only works if you set a global converter. + let any = converter(av.as_borrowed()); + builder.append_value(&*any) + }, + } + } + builder + }, + Some(registry) => { + let mut builder = (*registry.builder_constructor)(name, values.len()); + for av in values { + match av { + AnyValue::Object(val) => builder.append_value(val.as_any()), + AnyValue::Null => builder.append_null(), + _ => { + polars_bail!(ComputeError: "expected object"); + }, + } + } + builder + }, + }; + + Ok(builder.to_series()) +} + fn invalid_value_error(dtype: &DataType, value: &AnyValue) -> PolarsError { polars_err!( SchemaMismatch: From 183e780b60f79a95915690dde75b948cecb4974b Mon Sep 17 00:00:00 2001 From: Stijn de Gooijer Date: Thu, 14 Mar 2024 23:41:14 +0100 Subject: [PATCH 2/7] Switch methods --- crates/polars-core/src/series/any_value.rs | 120 ++++++++++----------- 1 file changed, 60 insertions(+), 60 deletions(-) diff --git a/crates/polars-core/src/series/any_value.rs b/crates/polars-core/src/series/any_value.rs index b6f95adcbf0d..7a4614289859 100644 --- a/crates/polars-core/src/series/any_value.rs +++ b/crates/polars-core/src/series/any_value.rs @@ -13,6 +13,66 @@ impl<'a, T: AsRef<[AnyValue<'a>]>> NamedFrom]> for Series { } impl Series { + /// Construct a new [`Series`] from a slice of AnyValues. + /// + /// The data type of the resulting Series is determined by the `values` + /// and the `strict` parameter: + /// - If `strict` is `true`, the data type is equal to the data type of the + /// first non-null value. If any other non-null values do not match this + /// data type, an error is raised. + /// - If `strict` is `false`, the data type is the supertype of the `values`. + /// An error is returned if no supertype can be determined. + /// **WARNING**: A full pass over the values is required to determine the supertype. + /// - If no values were passed, the resulting data type is `Null`. + pub fn from_any_values(name: &str, values: &[AnyValue], strict: bool) -> PolarsResult { + fn get_first_non_null_dtype(values: &[AnyValue]) -> DataType { + let mut all_flat_null = true; + let first_non_null = values.iter().find(|av| { + if !av.is_null() { + all_flat_null = false + }; + !av.is_nested_null() + }); + match first_non_null { + Some(av) => av.dtype(), + None => { + if all_flat_null { + DataType::Null + } else { + // Second pass to check for the nested null value that + // toggled `all_flat_null` to false, e.g. a List(Null) + let first_nested_null = values.iter().find(|av| !av.is_null()).unwrap(); + first_nested_null.dtype() + } + }, + } + } + fn get_any_values_supertype(values: &[AnyValue]) -> PolarsResult { + let mut supertype = DataType::Null; + let mut dtypes = PlHashSet::::new(); + for av in values { + if dtypes.insert(av.dtype()) { + supertype = try_get_supertype(&supertype, &av.dtype()).map_err(|_| { + polars_err!( + SchemaMismatch: + "failed to infer supertype of values; partial supertype is {:?}, found value of type {:?}: {}", + supertype, av.dtype(), av + ) + } + )?; + } + } + Ok(supertype) + } + + let dtype = if strict { + get_first_non_null_dtype(values) + } else { + get_any_values_supertype(values)? + }; + Self::from_any_values_and_dtype(name, values, &dtype, strict) + } + /// Construct a new [`Series`]` with the given `dtype` from a slice of AnyValues. pub fn from_any_values_and_dtype( name: &str, @@ -164,66 +224,6 @@ impl Series { s.rename(name); Ok(s) } - - /// Construct a new [`Series`] from a slice of AnyValues. - /// - /// The data type of the resulting Series is determined by the `values` - /// and the `strict` parameter: - /// - If `strict` is `true`, the data type is equal to the data type of the - /// first non-null value. If any other non-null values do not match this - /// data type, an error is raised. - /// - If `strict` is `false`, the data type is the supertype of the `values`. - /// An error is returned if no supertype can be determined. - /// **WARNING**: A full pass over the values is required to determine the supertype. - /// - If no values were passed, the resulting data type is `Null`. - pub fn from_any_values(name: &str, values: &[AnyValue], strict: bool) -> PolarsResult { - fn get_first_non_null_dtype(values: &[AnyValue]) -> DataType { - let mut all_flat_null = true; - let first_non_null = values.iter().find(|av| { - if !av.is_null() { - all_flat_null = false - }; - !av.is_nested_null() - }); - match first_non_null { - Some(av) => av.dtype(), - None => { - if all_flat_null { - DataType::Null - } else { - // Second pass to check for the nested null value that - // toggled `all_flat_null` to false, e.g. a List(Null) - let first_nested_null = values.iter().find(|av| !av.is_null()).unwrap(); - first_nested_null.dtype() - } - }, - } - } - fn get_any_values_supertype(values: &[AnyValue]) -> PolarsResult { - let mut supertype = DataType::Null; - let mut dtypes = PlHashSet::::new(); - for av in values { - if dtypes.insert(av.dtype()) { - supertype = try_get_supertype(&supertype, &av.dtype()).map_err(|_| { - polars_err!( - SchemaMismatch: - "failed to infer supertype of values; partial supertype is {:?}, found value of type {:?}: {}", - supertype, av.dtype(), av - ) - } - )?; - } - } - Ok(supertype) - } - - let dtype = if strict { - get_first_non_null_dtype(values) - } else { - get_any_values_supertype(values)? - }; - Self::from_any_values_and_dtype(name, values, &dtype, strict) - } } fn any_values_to_primitive_nonstrict(values: &[AnyValue]) -> ChunkedArray { From 0c25d6bce3ae421efa53a7782b7e5fa13ae176f4 Mon Sep 17 00:00:00 2001 From: Stijn de Gooijer Date: Thu, 14 Mar 2024 23:54:21 +0100 Subject: [PATCH 3/7] Extract struct logic --- crates/polars-core/src/series/any_value.rs | 151 +++++++++++---------- 1 file changed, 77 insertions(+), 74 deletions(-) diff --git a/crates/polars-core/src/series/any_value.rs b/crates/polars-core/src/series/any_value.rs index 7a4614289859..c39a707611bc 100644 --- a/crates/polars-core/src/series/any_value.rs +++ b/crates/polars-core/src/series/any_value.rs @@ -123,80 +123,6 @@ impl Series { DataType::Array(inner, size) => any_values_to_array(av, inner, strict, *size)? .into_series() .cast(&DataType::Array(inner.clone(), *size))?, - #[cfg(feature = "dtype-struct")] - DataType::Struct(dtype_fields) => { - // fast path for empty structs - if dtype_fields.is_empty() { - return Ok(StructChunked::full_null(name, av.len()).into_series()); - } - // the physical series fields of the struct - let mut series_fields = Vec::with_capacity(dtype_fields.len()); - for (i, field) in dtype_fields.iter().enumerate() { - let mut field_avs = Vec::with_capacity(av.len()); - - for av in av.iter() { - match av { - AnyValue::StructOwned(payload) => { - // TODO: optimize - let av_fields = &payload.1; - let av_values = &payload.0; - - let mut append_by_search = || { - // search for the name - let mut pushed = false; - for (av_fld, av_val) in av_fields.iter().zip(av_values) { - if av_fld.name == field.name { - field_avs.push(av_val.clone()); - pushed = true; - break; - } - } - if !pushed { - field_avs.push(AnyValue::Null) - } - }; - - // all fields are available in this single value - // we can use the index to get value - if dtype_fields.len() == av_fields.len() { - let mut search = false; - for (l, r) in dtype_fields.iter().zip(av_fields.iter()) { - if l.name() != r.name() { - search = true; - } - } - if search { - append_by_search() - } else { - let av_val = - av_values.get(i).cloned().unwrap_or(AnyValue::Null); - field_avs.push(av_val) - } - } - // not all fields are available, we search the proper field - else { - // search for the name - append_by_search() - } - }, - _ => field_avs.push(AnyValue::Null), - } - } - // if the inferred dtype is null, we let auto inference work - let s = if matches!(field.dtype, DataType::Null) { - Series::new(field.name(), &field_avs) - } else { - Series::from_any_values_and_dtype( - field.name(), - &field_avs, - &field.dtype, - strict, - )? - }; - series_fields.push(s) - } - return StructChunked::new(name, &series_fields).map(|ca| ca.into_series()); - }, #[cfg(feature = "dtype-categorical")] dt @ (DataType::Categorical(_, _) | DataType::Enum(_, _)) => { let ca = if let Some(single_av) = av.first() { @@ -216,6 +142,8 @@ impl Series { ca.cast(dt).unwrap() }, + #[cfg(feature = "dtype-struct")] + DataType::Struct(dtype_fields) => any_values_to_struct(av, dtype_fields, name, strict)?, #[cfg(feature = "object")] DataType::Object(_, registry) => any_values_to_object(av, registry, name)?, DataType::Null => Series::new_null(name, av.len()), @@ -605,6 +533,81 @@ fn any_values_to_list( Ok(out) } +#[cfg(feature = "dtype-struct")] +fn any_values_to_struct( + values: &[AnyValue], + fields: &[Field], + name: &str, + strict: bool, +) -> PolarsResult { + // Fast path for empty structs. + if fields.is_empty() { + return Ok(StructChunked::full_null(name, values.len()).into_series()); + } + + // The physical series fields of the struct. + let mut series_fields = Vec::with_capacity(fields.len()); + for (i, field) in fields.iter().enumerate() { + let mut field_avs = Vec::with_capacity(values.len()); + + for av in values.iter() { + match av { + AnyValue::StructOwned(payload) => { + // TODO: Optimize. + let av_fields = &payload.1; + let av_values = &payload.0; + + let mut append_by_search = || { + // Search for the name. + let mut pushed = false; + for (av_fld, av_val) in av_fields.iter().zip(av_values) { + if av_fld.name == field.name { + field_avs.push(av_val.clone()); + pushed = true; + break; + } + } + if !pushed { + field_avs.push(AnyValue::Null) + } + }; + + // All fields are available in this single value. + // We can use the index to get value. + if fields.len() == av_fields.len() { + let mut search = false; + for (l, r) in fields.iter().zip(av_fields.iter()) { + if l.name() != r.name() { + search = true; + } + } + if search { + append_by_search() + } else { + let av_val = av_values.get(i).cloned().unwrap_or(AnyValue::Null); + field_avs.push(av_val) + } + } + // Not all fields are available, we search the proper field. + else { + // Search for the name. + append_by_search() + } + }, + _ => field_avs.push(AnyValue::Null), + } + } + // If the inferred dtype is null, we let auto inference work. + let s = if matches!(field.dtype, DataType::Null) { + Series::new(field.name(), &field_avs) + } else { + Series::from_any_values_and_dtype(field.name(), &field_avs, &field.dtype, strict)? + }; + series_fields.push(s) + } + StructChunked::new(name, &series_fields).map(|ca| ca.into_series()) +} + #[cfg(feature = "object")] fn any_values_to_object( values: &[AnyValue], From d7d86e50f5626a9ea1c86922e8fbcbd86ca601a7 Mon Sep 17 00:00:00 2001 From: Stijn de Gooijer Date: Fri, 15 Mar 2024 00:10:24 +0100 Subject: [PATCH 4/7] Extract categorical --- crates/polars-core/src/series/any_value.rs | 181 +++++++++++---------- 1 file changed, 96 insertions(+), 85 deletions(-) diff --git a/crates/polars-core/src/series/any_value.rs b/crates/polars-core/src/series/any_value.rs index c39a707611bc..e53b0a44ce4f 100644 --- a/crates/polars-core/src/series/any_value.rs +++ b/crates/polars-core/src/series/any_value.rs @@ -76,77 +76,64 @@ impl Series { /// Construct a new [`Series`]` with the given `dtype` from a slice of AnyValues. pub fn from_any_values_and_dtype( name: &str, - av: &[AnyValue], + values: &[AnyValue], dtype: &DataType, strict: bool, ) -> PolarsResult { let mut s = match dtype { #[cfg(feature = "dtype-i8")] - DataType::Int8 => any_values_to_integer::(av, strict)?.into_series(), + DataType::Int8 => any_values_to_integer::(values, strict)?.into_series(), #[cfg(feature = "dtype-i16")] - DataType::Int16 => any_values_to_integer::(av, strict)?.into_series(), - DataType::Int32 => any_values_to_integer::(av, strict)?.into_series(), - DataType::Int64 => any_values_to_integer::(av, strict)?.into_series(), + DataType::Int16 => any_values_to_integer::(values, strict)?.into_series(), + DataType::Int32 => any_values_to_integer::(values, strict)?.into_series(), + DataType::Int64 => any_values_to_integer::(values, strict)?.into_series(), #[cfg(feature = "dtype-u8")] - DataType::UInt8 => any_values_to_integer::(av, strict)?.into_series(), + DataType::UInt8 => any_values_to_integer::(values, strict)?.into_series(), #[cfg(feature = "dtype-u16")] - DataType::UInt16 => any_values_to_integer::(av, strict)?.into_series(), - DataType::UInt32 => any_values_to_integer::(av, strict)?.into_series(), - DataType::UInt64 => any_values_to_integer::(av, strict)?.into_series(), - DataType::Float32 => any_values_to_f32(av, strict)?.into_series(), - DataType::Float64 => any_values_to_f64(av, strict)?.into_series(), - DataType::String => any_values_to_string(av, strict)?.into_series(), - DataType::Binary => any_values_to_binary(av, strict)?.into_series(), - DataType::Boolean => any_values_to_bool(av, strict)?.into_series(), + DataType::UInt16 => any_values_to_integer::(values, strict)?.into_series(), + DataType::UInt32 => any_values_to_integer::(values, strict)?.into_series(), + DataType::UInt64 => any_values_to_integer::(values, strict)?.into_series(), + DataType::Float32 => any_values_to_f32(values, strict)?.into_series(), + DataType::Float64 => any_values_to_f64(values, strict)?.into_series(), + DataType::Boolean => any_values_to_bool(values, strict)?.into_series(), + DataType::String => any_values_to_string(values, strict)?.into_series(), + DataType::Binary => any_values_to_binary(values, strict)?.into_series(), #[cfg(feature = "dtype-date")] - DataType::Date => any_values_to_primitive_nonstrict::(av) + DataType::Date => any_values_to_primitive_nonstrict::(values) .into_date() .into_series(), #[cfg(feature = "dtype-datetime")] - DataType::Datetime(tu, tz) => any_values_to_primitive_nonstrict::(av) + DataType::Datetime(tu, tz) => any_values_to_primitive_nonstrict::(values) .into_datetime(*tu, (*tz).clone()) .into_series(), #[cfg(feature = "dtype-time")] - DataType::Time => any_values_to_primitive_nonstrict::(av) + DataType::Time => any_values_to_primitive_nonstrict::(values) .into_time() .into_series(), #[cfg(feature = "dtype-duration")] - DataType::Duration(tu) => any_values_to_primitive_nonstrict::(av) + DataType::Duration(tu) => any_values_to_primitive_nonstrict::(values) .into_duration(*tu) .into_series(), + #[cfg(feature = "dtype-categorical")] + dt @ (DataType::Categorical(_, _) | DataType::Enum(_, _)) => { + any_values_to_categorical(values, dt, strict)? + }, #[cfg(feature = "dtype-decimal")] DataType::Decimal(precision, scale) => { - any_values_to_decimal(av, *precision, *scale)?.into_series() + any_values_to_decimal(values, *precision, *scale)?.into_series() }, - DataType::List(inner) => any_values_to_list(av, inner, strict)?.into_series(), + DataType::List(inner) => any_values_to_list(values, inner, strict)?.into_series(), #[cfg(feature = "dtype-array")] - DataType::Array(inner, size) => any_values_to_array(av, inner, strict, *size)? + DataType::Array(inner, size) => any_values_to_array(values, inner, strict, *size)? .into_series() .cast(&DataType::Array(inner.clone(), *size))?, - #[cfg(feature = "dtype-categorical")] - dt @ (DataType::Categorical(_, _) | DataType::Enum(_, _)) => { - let ca = if let Some(single_av) = av.first() { - match single_av { - AnyValue::String(_) | AnyValue::StringOwned(_) | AnyValue::Null => { - any_values_to_string(av, strict)? - }, - _ => polars_bail!( - ComputeError: - "categorical dtype with any-values of dtype {} not supported", - single_av.dtype() - ), - } - } else { - StringChunked::full("", "", 0) - }; - - ca.cast(dt).unwrap() - }, #[cfg(feature = "dtype-struct")] - DataType::Struct(dtype_fields) => any_values_to_struct(av, dtype_fields, name, strict)?, + DataType::Struct(dtype_fields) => { + any_values_to_struct(values, dtype_fields, name, strict)? + }, #[cfg(feature = "object")] - DataType::Object(_, registry) => any_values_to_object(av, registry, name)?, - DataType::Null => Series::new_null(name, av.len()), + DataType::Object(_, registry) => any_values_to_object(values, registry, name)?, + DataType::Null => Series::new_null(name, values.len()), dt => panic!("{dt:?} not supported"), }; s.rename(name); @@ -330,6 +317,30 @@ fn any_values_to_binary(values: &[AnyValue], strict: bool) -> PolarsResult PolarsResult { + let ca = if let Some(single_av) = values.first() { + match single_av { + AnyValue::String(_) | AnyValue::StringOwned(_) | AnyValue::Null => { + any_values_to_string(values, strict)? + }, + _ => polars_bail!( + ComputeError: + "categorical dtype with any-values of dtype {} not supported", + single_av.dtype() + ), + } + } else { + StringChunked::full("", "", 0) + }; + + ca.cast(dtype) +} + #[cfg(feature = "dtype-decimal")] fn any_values_to_decimal( avs: &[AnyValue], @@ -397,52 +408,40 @@ fn any_values_to_decimal( builder.finish().into_decimal(precision, scale) } -#[cfg(feature = "dtype-array")] -fn any_values_to_array( +fn any_values_to_list( avs: &[AnyValue], inner_type: &DataType, strict: bool, - width: usize, -) -> PolarsResult { - fn to_arr(s: &Series) -> Option { - if s.chunks().len() > 1 { - let s = s.rechunk(); - Some(s.chunks()[0].clone()) - } else { - Some(s.chunks()[0].clone()) - } - } - - let target_dtype = DataType::Array(Box::new(inner_type.clone()), width); +) -> PolarsResult { + let target_dtype = DataType::List(Box::new(inner_type.clone())); // this is handled downstream. The builder will choose the first non null type let mut valid = true; #[allow(unused_mut)] - let mut out: ArrayChunked = if inner_type == &DataType::Null { + let mut out: ListChunked = if inner_type == &DataType::Null { avs.iter() .map(|av| match av { - AnyValue::List(b) | AnyValue::Array(b, _) => to_arr(b), + AnyValue::List(b) => Some(b.clone()), AnyValue::Null => None, _ => { valid = false; None }, }) - .collect_ca_with_dtype("", target_dtype.clone()) + .collect_trusted() } // make sure that wrongly inferred AnyValues don't deviate from the datatype else { avs.iter() .map(|av| match av { - AnyValue::List(b) | AnyValue::Array(b, _) => { + AnyValue::List(b) => { if b.dtype() == inner_type { - to_arr(b) + Some(b.clone()) } else { - let s = match b.cast(inner_type) { - Ok(out) => out, - Err(_) => Series::full_null(b.name(), b.len(), inner_type), - }; - to_arr(&s) + match b.cast(inner_type) { + Ok(out) => Some(out), + Err(_) => Some(Series::full_null(b.name(), b.len(), inner_type)), + } } }, AnyValue::Null => None, @@ -451,16 +450,12 @@ fn any_values_to_array( None }, }) - .collect_ca_with_dtype("", target_dtype.clone()) + .collect_trusted() }; if strict && !valid { polars_bail!(SchemaMismatch: "unexpected value while building Series of type {:?}", target_dtype); } - polars_ensure!( - out.width() == width, - SchemaMismatch: "got mixed size array widths where width {} was expected", width - ); // Ensure the logical type is correct for nested types #[cfg(feature = "dtype-struct")] @@ -473,40 +468,52 @@ fn any_values_to_array( Ok(out) } -fn any_values_to_list( +#[cfg(feature = "dtype-array")] +fn any_values_to_array( avs: &[AnyValue], inner_type: &DataType, strict: bool, -) -> PolarsResult { - let target_dtype = DataType::List(Box::new(inner_type.clone())); + width: usize, +) -> PolarsResult { + fn to_arr(s: &Series) -> Option { + if s.chunks().len() > 1 { + let s = s.rechunk(); + Some(s.chunks()[0].clone()) + } else { + Some(s.chunks()[0].clone()) + } + } + + let target_dtype = DataType::Array(Box::new(inner_type.clone()), width); // this is handled downstream. The builder will choose the first non null type let mut valid = true; #[allow(unused_mut)] - let mut out: ListChunked = if inner_type == &DataType::Null { + let mut out: ArrayChunked = if inner_type == &DataType::Null { avs.iter() .map(|av| match av { - AnyValue::List(b) => Some(b.clone()), + AnyValue::List(b) | AnyValue::Array(b, _) => to_arr(b), AnyValue::Null => None, _ => { valid = false; None }, }) - .collect_trusted() + .collect_ca_with_dtype("", target_dtype.clone()) } // make sure that wrongly inferred AnyValues don't deviate from the datatype else { avs.iter() .map(|av| match av { - AnyValue::List(b) => { + AnyValue::List(b) | AnyValue::Array(b, _) => { if b.dtype() == inner_type { - Some(b.clone()) + to_arr(b) } else { - match b.cast(inner_type) { - Ok(out) => Some(out), - Err(_) => Some(Series::full_null(b.name(), b.len(), inner_type)), - } + let s = match b.cast(inner_type) { + Ok(out) => out, + Err(_) => Series::full_null(b.name(), b.len(), inner_type), + }; + to_arr(&s) } }, AnyValue::Null => None, @@ -515,12 +522,16 @@ fn any_values_to_list( None }, }) - .collect_trusted() + .collect_ca_with_dtype("", target_dtype.clone()) }; if strict && !valid { polars_bail!(SchemaMismatch: "unexpected value while building Series of type {:?}", target_dtype); } + polars_ensure!( + out.width() == width, + SchemaMismatch: "got mixed size array widths where width {} was expected", width + ); // Ensure the logical type is correct for nested types #[cfg(feature = "dtype-struct")] From 31c37cdce23176fa155a9a2894185f5824e25368 Mon Sep 17 00:00:00 2001 From: Stijn de Gooijer Date: Fri, 15 Mar 2024 00:30:44 +0100 Subject: [PATCH 5/7] Remove name param --- crates/polars-core/src/series/any_value.rs | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/crates/polars-core/src/series/any_value.rs b/crates/polars-core/src/series/any_value.rs index e53b0a44ce4f..d8b5747c52eb 100644 --- a/crates/polars-core/src/series/any_value.rs +++ b/crates/polars-core/src/series/any_value.rs @@ -74,6 +74,10 @@ impl Series { } /// Construct a new [`Series`]` with the given `dtype` from a slice of AnyValues. + /// + /// If `strict` is `true`, an error is raised if the values do not match the given + /// data type. If `strict` is `false`, values that do not match the given data type + /// are cast. If casting is not possible, the values are set to null instead.` pub fn from_any_values_and_dtype( name: &str, values: &[AnyValue], @@ -128,11 +132,9 @@ impl Series { .into_series() .cast(&DataType::Array(inner.clone(), *size))?, #[cfg(feature = "dtype-struct")] - DataType::Struct(dtype_fields) => { - any_values_to_struct(values, dtype_fields, name, strict)? - }, + DataType::Struct(fields) => any_values_to_struct(values, fields, strict)?, #[cfg(feature = "object")] - DataType::Object(_, registry) => any_values_to_object(values, registry, name)?, + DataType::Object(_, registry) => any_values_to_object(values, registry)?, DataType::Null => Series::new_null(name, values.len()), dt => panic!("{dt:?} not supported"), }; @@ -548,12 +550,11 @@ fn any_values_to_array( fn any_values_to_struct( values: &[AnyValue], fields: &[Field], - name: &str, strict: bool, ) -> PolarsResult { // Fast path for empty structs. if fields.is_empty() { - return Ok(StructChunked::full_null(name, values.len()).into_series()); + return Ok(StructChunked::full_null("", values.len()).into_series()); } // The physical series fields of the struct. @@ -616,20 +617,19 @@ fn any_values_to_struct( }; series_fields.push(s) } - StructChunked::new(name, &series_fields).map(|ca| ca.into_series()) + StructChunked::new("", &series_fields).map(|ca| ca.into_series()) } #[cfg(feature = "object")] fn any_values_to_object( values: &[AnyValue], registry: &Option>, - name: &str, ) -> PolarsResult { let mut builder = match registry { None => { use crate::chunked_array::object::registry; let converter = registry::get_object_converter(); - let mut builder = registry::get_object_builder(name, values.len()); + let mut builder = registry::get_object_builder("", values.len()); for av in values { match av { AnyValue::Object(val) => builder.append_value(val.as_any()), @@ -645,7 +645,7 @@ fn any_values_to_object( builder }, Some(registry) => { - let mut builder = (*registry.builder_constructor)(name, values.len()); + let mut builder = (*registry.builder_constructor)("", values.len()); for av in values { match av { AnyValue::Object(val) => builder.append_value(val.as_any()), From e51de4fa673999da31640dbca927fc06ae4cfada Mon Sep 17 00:00:00 2001 From: Stijn de Gooijer Date: Fri, 15 Mar 2024 00:37:43 +0100 Subject: [PATCH 6/7] Change panic to InvalidOperation error --- crates/polars-core/src/series/any_value.rs | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/crates/polars-core/src/series/any_value.rs b/crates/polars-core/src/series/any_value.rs index d8b5747c52eb..49a844554120 100644 --- a/crates/polars-core/src/series/any_value.rs +++ b/crates/polars-core/src/series/any_value.rs @@ -75,7 +75,7 @@ impl Series { /// Construct a new [`Series`]` with the given `dtype` from a slice of AnyValues. /// - /// If `strict` is `true`, an error is raised if the values do not match the given + /// If `strict` is `true`, an error is returned if the values do not match the given /// data type. If `strict` is `false`, values that do not match the given data type /// are cast. If casting is not possible, the values are set to null instead.` pub fn from_any_values_and_dtype( @@ -136,7 +136,12 @@ impl Series { #[cfg(feature = "object")] DataType::Object(_, registry) => any_values_to_object(values, registry)?, DataType::Null => Series::new_null(name, values.len()), - dt => panic!("{dt:?} not supported"), + dt @ (DataType::BinaryOffset | DataType::Unknown) => { + polars_bail!( + InvalidOperation: + "constructing a Series with data type {dt:?} from AnyValues is not supported" + ) + }, }; s.rename(name); Ok(s) From b9829c48c711e356bd5b6f0c82ddbf4bd0b9f2fa Mon Sep 17 00:00:00 2001 From: Stijn de Gooijer Date: Fri, 15 Mar 2024 00:52:11 +0100 Subject: [PATCH 7/7] Wildcard --- crates/polars-core/src/series/any_value.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/polars-core/src/series/any_value.rs b/crates/polars-core/src/series/any_value.rs index 49a844554120..280a66cae564 100644 --- a/crates/polars-core/src/series/any_value.rs +++ b/crates/polars-core/src/series/any_value.rs @@ -136,7 +136,7 @@ impl Series { #[cfg(feature = "object")] DataType::Object(_, registry) => any_values_to_object(values, registry)?, DataType::Null => Series::new_null(name, values.len()), - dt @ (DataType::BinaryOffset | DataType::Unknown) => { + dt => { polars_bail!( InvalidOperation: "constructing a Series with data type {dt:?} from AnyValues is not supported"