@@ -136,6 +136,14 @@ impl ExecutionPlan for IcebergCommitExec {
136136 vec ! [ & self . input]
137137 }
138138
139+ fn required_input_distribution ( & self ) -> Vec < datafusion:: physical_plan:: Distribution > {
140+ vec ! [ datafusion:: physical_plan:: Distribution :: SinglePartition ; self . children( ) . len( ) ]
141+ }
142+
143+ fn benefits_from_input_partitioning ( & self ) -> Vec < bool > {
144+ vec ! [ false ]
145+ }
146+
139147 fn with_new_children (
140148 self : Arc < Self > ,
141149 children : Vec < Arc < dyn ExecutionPlan > > ,
@@ -262,14 +270,16 @@ mod tests {
262270 use std:: fmt;
263271 use std:: sync:: Arc ;
264272
265- use datafusion:: arrow:: array:: { ArrayRef , RecordBatch , StringArray } ;
273+ use datafusion:: arrow:: array:: { ArrayRef , Int32Array , RecordBatch , StringArray , UInt64Array } ;
266274 use datafusion:: arrow:: datatypes:: { DataType , Field , Schema as ArrowSchema } ;
275+ use datafusion:: datasource:: MemTable ;
267276 use datafusion:: execution:: context:: TaskContext ;
268277 use datafusion:: physical_expr:: { EquivalenceProperties , Partitioning } ;
269278 use datafusion:: physical_plan:: common:: collect;
270279 use datafusion:: physical_plan:: execution_plan:: Boundedness ;
271280 use datafusion:: physical_plan:: stream:: RecordBatchStreamAdapter ;
272281 use datafusion:: physical_plan:: { DisplayAs , DisplayFormatType , ExecutionPlan , PlanProperties } ;
282+ use datafusion:: prelude:: * ;
273283 use futures:: StreamExt ;
274284 use iceberg:: memory:: { MEMORY_CATALOG_WAREHOUSE , MemoryCatalogBuilder } ;
275285 use iceberg:: spec:: {
@@ -280,6 +290,7 @@ mod tests {
280290
281291 use super :: * ;
282292 use crate :: physical_plan:: DATA_FILES_COL_NAME ;
293+ use crate :: table:: IcebergTableProvider ;
283294
284295 // A mock execution plan that returns record batches with serialized data files
285296 #[ derive( Debug ) ]
@@ -510,4 +521,102 @@ mod tests {
510521
511522 Ok ( ( ) )
512523 }
524+
525+ #[ tokio:: test]
526+ async fn test_datafusion_execution_partitioned_source ( ) -> Result < ( ) , Box < dyn std:: error:: Error > >
527+ {
528+ let catalog = Arc :: new (
529+ MemoryCatalogBuilder :: default ( )
530+ . load (
531+ "memory" ,
532+ HashMap :: from ( [ (
533+ MEMORY_CATALOG_WAREHOUSE . to_string ( ) ,
534+ "memory://root" . to_string ( ) ,
535+ ) ] ) ,
536+ )
537+ . await ?,
538+ ) ;
539+
540+ let namespace = NamespaceIdent :: new ( "test_namespace" . to_string ( ) ) ;
541+ catalog. create_namespace ( & namespace, HashMap :: new ( ) ) . await ?;
542+
543+ let schema = Schema :: builder ( )
544+ . with_schema_id ( 1 )
545+ . with_fields ( vec ! [
546+ NestedField :: required( 1 , "id" , Type :: Primitive ( PrimitiveType :: Int ) ) . into( ) ,
547+ NestedField :: required( 2 , "name" , Type :: Primitive ( PrimitiveType :: String ) ) . into( ) ,
548+ ] )
549+ . build ( ) ?;
550+
551+ let table_name = "test_table" ;
552+ let table_creation = TableCreation :: builder ( )
553+ . name ( table_name. to_string ( ) )
554+ . schema ( schema)
555+ . location ( "memory://root/test_table" . to_string ( ) )
556+ . properties ( HashMap :: new ( ) )
557+ . build ( ) ;
558+ let _ = catalog. create_table ( & namespace, table_creation) . await ?;
559+
560+ let arrow_schema = Arc :: new ( ArrowSchema :: new ( vec ! [
561+ Field :: new( "id" , DataType :: Int32 , false ) ,
562+ Field :: new( "name" , DataType :: Utf8 , false ) ,
563+ ] ) ) ;
564+
565+ let batches: Vec < RecordBatch > = ( 1 ..4 )
566+ . map ( |idx| {
567+ RecordBatch :: try_new ( arrow_schema. clone ( ) , vec ! [
568+ Arc :: new( Int32Array :: from( vec![ idx] ) ) as ArrayRef ,
569+ Arc :: new( StringArray :: from( vec![ format!( "Name{idx}" ) ] ) ) as ArrayRef ,
570+ ] )
571+ } )
572+ . collect :: < Result < _ , _ > > ( ) ?;
573+
574+ // Create DataFusion context with specific partition configuration
575+ let mut config = SessionConfig :: new ( ) ;
576+ config = config. set_usize ( "datafusion.execution.target_partitions" , 8 ) ;
577+ let ctx = SessionContext :: new_with_config ( config) ;
578+
579+ // Create multiple partitions - each batch becomes a separate partition
580+ let partitions: Vec < Vec < RecordBatch > > =
581+ batches. into_iter ( ) . map ( |batch| vec ! [ batch] ) . collect ( ) ;
582+ let source_table = Arc :: new ( MemTable :: try_new ( Arc :: clone ( & arrow_schema) , partitions) ?) ;
583+ ctx. register_table ( "source_table" , source_table) ?;
584+
585+ let iceberg_table_provider = IcebergTableProvider :: try_new (
586+ catalog. clone ( ) ,
587+ namespace. clone ( ) ,
588+ table_name. to_string ( ) ,
589+ )
590+ . await ?;
591+ ctx. register_table ( "iceberg_table" , Arc :: new ( iceberg_table_provider) ) ?;
592+
593+ let insert_plan = ctx
594+ . sql ( "INSERT INTO iceberg_table SELECT * FROM source_table" )
595+ . await ?;
596+
597+ let physical_plan = insert_plan. create_physical_plan ( ) . await ?;
598+
599+ let actual_plan = format ! (
600+ "{}" ,
601+ datafusion:: physical_plan:: displayable( physical_plan. as_ref( ) ) . indent( false )
602+ ) ;
603+
604+ println ! ( "Physical plan:\n {actual_plan}" ) ;
605+
606+ let expected_plan = "\
607+ IcebergCommitExec: table=test_namespace.test_table
608+ CoalescePartitionsExec
609+ IcebergWriteExec: table=test_namespace.test_table
610+ DataSourceExec: partitions=3, partition_sizes=[1, 1, 1]" ;
611+
612+ assert_eq ! (
613+ actual_plan. trim( ) ,
614+ expected_plan. trim( ) ,
615+ "Physical plan does not match expected\n \n Expected:\n {}\n \n Actual:\n {}" ,
616+ expected_plan. trim( ) ,
617+ actual_plan. trim( )
618+ ) ;
619+
620+ Ok ( ( ) )
621+ }
513622}
0 commit comments