@@ -906,15 +906,53 @@ def test_df_to_pandas_batches(scalars_dfs):
906
906
assert_pandas_df_equal (pd .concat (filtered_batches ), pd_result )
907
907
908
908
909
- def test_assign_new_column (scalars_dfs ):
909
+ @pytest .mark .parametrize (
910
+ ("literal" , "expected_dtype" ),
911
+ (
912
+ pytest .param (
913
+ 2 ,
914
+ dtypes .INT_DTYPE ,
915
+ id = "INT64" ,
916
+ ),
917
+ # ====================================================================
918
+ # NULL values
919
+ #
920
+ # These are regression tests for b/428999884. It needs to be possible to
921
+ # set a column to NULL with a desired type (not just the pandas default
922
+ # of float64).
923
+ # ====================================================================
924
+ pytest .param (None , dtypes .FLOAT_DTYPE , id = "NULL-None" ),
925
+ pytest .param (
926
+ pa .scalar (None , type = pa .int64 ()),
927
+ dtypes .INT_DTYPE ,
928
+ id = "NULL-pyarrow-TIMESTAMP" ,
929
+ ),
930
+ pytest .param (
931
+ pa .scalar (None , type = pa .timestamp ("us" , tz = "UTC" )),
932
+ dtypes .TIMESTAMP_DTYPE ,
933
+ id = "NULL-pyarrow-TIMESTAMP" ,
934
+ ),
935
+ pytest .param (
936
+ pa .scalar (None , type = pa .timestamp ("us" )),
937
+ dtypes .DATETIME_DTYPE ,
938
+ id = "NULL-pyarrow-DATETIME" ,
939
+ ),
940
+ ),
941
+ )
942
+ def test_assign_new_column_w_literal (scalars_dfs , literal , expected_dtype ):
910
943
scalars_df , scalars_pandas_df = scalars_dfs
911
- kwargs = {"new_col" : 2 }
912
- df = scalars_df .assign (** kwargs )
944
+ df = scalars_df .assign (new_col = literal )
913
945
bf_result = df .to_pandas ()
914
- pd_result = scalars_pandas_df .assign (** kwargs )
915
946
916
- # Convert default pandas dtypes `int64` to match BigQuery DataFrames dtypes.
917
- pd_result ["new_col" ] = pd_result ["new_col" ].astype ("Int64" )
947
+ new_col_pd = literal
948
+ if isinstance (literal , pa .Scalar ):
949
+ # PyArrow integer scalars aren't yet supported in pandas Int64Dtype.
950
+ new_col_pd = literal .as_py ()
951
+
952
+ # Pandas might not pick the same dtype as BigFrames, but it should at least
953
+ # be castable to it.
954
+ pd_result = scalars_pandas_df .assign (new_col = new_col_pd )
955
+ pd_result ["new_col" ] = pd_result ["new_col" ].astype (expected_dtype )
918
956
919
957
assert_pandas_df_equal (bf_result , pd_result )
920
958
0 commit comments