@@ -21,14 +21,24 @@ use std::sync::Arc;
2121use std:: sync:: atomic:: AtomicU64 ;
2222
2323use crate :: Result ;
24- use crate :: spec:: { DataFileFormat , TableMetadata } ;
24+ use crate :: spec:: { DataFileFormat , PartitionKey , TableMetadata } ;
2525
2626/// `LocationGenerator` used to generate the location of data file.
2727pub trait LocationGenerator : Clone + Send + ' static {
28- /// Generate an absolute path for the given file name.
29- /// e.g.
30- /// For file name "part-00000.parquet", the generated location maybe "/table/data/part-00000.parquet"
31- fn generate_location ( & self , file_name : & str ) -> String ;
28+ /// Generate an absolute path for the given file name that includes the partition path.
29+ ///
30+ /// # Arguments
31+ ///
32+ /// * `partition_key` - The partition key of the file. If None, generate a non-partitioned path.
33+ /// * `file_name` - The name of the file
34+ ///
35+ /// # Returns
36+ ///
37+ /// An absolute path that includes the partition path, e.g.,
38+ /// "/table/data/id=1/name=alice/part-00000.parquet"
39+ /// or non-partitioned path:
40+ /// "/table/data/part-00000.parquet"
41+ fn generate_location ( & self , partition_key : Option < & PartitionKey > , file_name : & str ) -> String ;
3242}
3343
3444const WRITE_DATA_LOCATION : & str = "write.data.path" ;
@@ -39,29 +49,38 @@ const DEFAULT_DATA_DIR: &str = "/data";
3949/// `DefaultLocationGenerator` used to generate the data dir location of data file.
4050/// The location is generated based on the table location and the data location in table properties.
4151pub struct DefaultLocationGenerator {
42- dir_path : String ,
52+ data_location : String ,
4353}
4454
4555impl DefaultLocationGenerator {
4656 /// Create a new `DefaultLocationGenerator`.
4757 pub fn new ( table_metadata : TableMetadata ) -> Result < Self > {
4858 let table_location = table_metadata. location ( ) ;
4959 let prop = table_metadata. properties ( ) ;
50- let data_location = prop
60+ let configured_data_location = prop
5161 . get ( WRITE_DATA_LOCATION )
5262 . or ( prop. get ( WRITE_FOLDER_STORAGE_LOCATION ) ) ;
53- let dir_path = if let Some ( data_location) = data_location {
63+ let data_location = if let Some ( data_location) = configured_data_location {
5464 data_location. clone ( )
5565 } else {
5666 format ! ( "{}{}" , table_location, DEFAULT_DATA_DIR )
5767 } ;
58- Ok ( Self { dir_path } )
68+ Ok ( Self { data_location } )
5969 }
6070}
6171
6272impl LocationGenerator for DefaultLocationGenerator {
63- fn generate_location ( & self , file_name : & str ) -> String {
64- format ! ( "{}/{}" , self . dir_path, file_name)
73+ fn generate_location ( & self , partition_key : Option < & PartitionKey > , file_name : & str ) -> String {
74+ if PartitionKey :: is_effectively_none ( partition_key) {
75+ format ! ( "{}/{}" , self . data_location, file_name)
76+ } else {
77+ format ! (
78+ "{}/{}/{}" ,
79+ self . data_location,
80+ partition_key. unwrap( ) . to_path( ) ,
81+ file_name
82+ )
83+ }
6584 }
6685}
6786
@@ -115,11 +134,15 @@ impl FileNameGenerator for DefaultFileNameGenerator {
115134#[ cfg( test) ]
116135pub ( crate ) mod test {
117136 use std:: collections:: HashMap ;
137+ use std:: sync:: Arc ;
118138
119139 use uuid:: Uuid ;
120140
121141 use super :: LocationGenerator ;
122- use crate :: spec:: { FormatVersion , PartitionSpec , StructType , TableMetadata } ;
142+ use crate :: spec:: {
143+ FormatVersion , Literal , NestedField , PartitionKey , PartitionSpec , PrimitiveType , Schema ,
144+ Struct , StructType , TableMetadata , Transform , Type ,
145+ } ;
123146 use crate :: writer:: file_writer:: location_generator:: {
124147 FileNameGenerator , WRITE_DATA_LOCATION , WRITE_FOLDER_STORAGE_LOCATION ,
125148 } ;
@@ -136,8 +159,17 @@ pub(crate) mod test {
136159 }
137160
138161 impl LocationGenerator for MockLocationGenerator {
139- fn generate_location ( & self , file_name : & str ) -> String {
140- format ! ( "{}/{}" , self . root, file_name)
162+ fn generate_location ( & self , partition : Option < & PartitionKey > , file_name : & str ) -> String {
163+ if PartitionKey :: is_effectively_none ( partition) {
164+ format ! ( "{}/{}" , self . root, file_name)
165+ } else {
166+ format ! (
167+ "{}/{}/{}" ,
168+ self . root,
169+ partition. unwrap( ) . to_path( ) ,
170+ file_name
171+ )
172+ }
141173 }
142174 }
143175
@@ -169,7 +201,7 @@ pub(crate) mod test {
169201 encryption_keys : HashMap :: new ( ) ,
170202 } ;
171203
172- let file_name_genertaor = super :: DefaultFileNameGenerator :: new (
204+ let file_name_generator = super :: DefaultFileNameGenerator :: new (
173205 "part" . to_string ( ) ,
174206 Some ( "test" . to_string ( ) ) ,
175207 crate :: spec:: DataFileFormat :: Parquet ,
@@ -179,7 +211,7 @@ pub(crate) mod test {
179211 let location_generator =
180212 super :: DefaultLocationGenerator :: new ( table_metadata. clone ( ) ) . unwrap ( ) ;
181213 let location =
182- location_generator. generate_location ( & file_name_genertaor . generate_file_name ( ) ) ;
214+ location_generator. generate_location ( None , & file_name_generator . generate_file_name ( ) ) ;
183215 assert_eq ! ( location, "s3://data.db/table/data/part-00000-test.parquet" ) ;
184216
185217 // test custom data location
@@ -190,7 +222,7 @@ pub(crate) mod test {
190222 let location_generator =
191223 super :: DefaultLocationGenerator :: new ( table_metadata. clone ( ) ) . unwrap ( ) ;
192224 let location =
193- location_generator. generate_location ( & file_name_genertaor . generate_file_name ( ) ) ;
225+ location_generator. generate_location ( None , & file_name_generator . generate_file_name ( ) ) ;
194226 assert_eq ! (
195227 location,
196228 "s3://data.db/table/data_1/part-00001-test.parquet"
@@ -203,7 +235,7 @@ pub(crate) mod test {
203235 let location_generator =
204236 super :: DefaultLocationGenerator :: new ( table_metadata. clone ( ) ) . unwrap ( ) ;
205237 let location =
206- location_generator. generate_location ( & file_name_genertaor . generate_file_name ( ) ) ;
238+ location_generator. generate_location ( None , & file_name_generator . generate_file_name ( ) ) ;
207239 assert_eq ! (
208240 location,
209241 "s3://data.db/table/data_2/part-00002-test.parquet"
@@ -217,7 +249,79 @@ pub(crate) mod test {
217249 let location_generator =
218250 super :: DefaultLocationGenerator :: new ( table_metadata. clone ( ) ) . unwrap ( ) ;
219251 let location =
220- location_generator. generate_location ( & file_name_genertaor . generate_file_name ( ) ) ;
252+ location_generator. generate_location ( None , & file_name_generator . generate_file_name ( ) ) ;
221253 assert_eq ! ( location, "s3://data.db/data_3/part-00003-test.parquet" ) ;
222254 }
255+
256+ #[ test]
257+ fn test_location_generate_with_partition ( ) {
258+ // Create a schema with two fields: id (int) and name (string)
259+ let schema = Arc :: new (
260+ Schema :: builder ( )
261+ . with_schema_id ( 1 )
262+ . with_fields ( vec ! [
263+ NestedField :: required( 1 , "id" , Type :: Primitive ( PrimitiveType :: Int ) ) . into( ) ,
264+ NestedField :: required( 2 , "name" , Type :: Primitive ( PrimitiveType :: String ) ) . into( ) ,
265+ ] )
266+ . build ( )
267+ . unwrap ( ) ,
268+ ) ;
269+
270+ // Create a partition spec with both fields
271+ let partition_spec = PartitionSpec :: builder ( schema. clone ( ) )
272+ . add_partition_field ( "id" , "id" , Transform :: Identity )
273+ . unwrap ( )
274+ . add_partition_field ( "name" , "name" , Transform :: Identity )
275+ . unwrap ( )
276+ . build ( )
277+ . unwrap ( ) ;
278+
279+ // Create partition data with values
280+ let partition_data =
281+ Struct :: from_iter ( [ Some ( Literal :: int ( 42 ) ) , Some ( Literal :: string ( "alice" ) ) ] ) ;
282+
283+ // Create a partition key
284+ let partition_key = PartitionKey :: new ( partition_spec, schema, partition_data) ;
285+
286+ // Test with MockLocationGenerator
287+ let mock_location_gen = MockLocationGenerator :: new ( "/base/path" . to_string ( ) ) ;
288+ let file_name = "data-00000.parquet" ;
289+ let location = mock_location_gen. generate_location ( Some ( & partition_key) , file_name) ;
290+ assert_eq ! ( location, "/base/path/id=42/name=alice/data-00000.parquet" ) ;
291+
292+ // Create a table metadata for DefaultLocationGenerator
293+ let table_metadata = TableMetadata {
294+ format_version : FormatVersion :: V2 ,
295+ table_uuid : Uuid :: parse_str ( "fb072c92-a02b-11e9-ae9c-1bb7bc9eca94" ) . unwrap ( ) ,
296+ location : "s3://data.db/table" . to_string ( ) ,
297+ last_updated_ms : 1515100955770 ,
298+ last_column_id : 2 ,
299+ schemas : HashMap :: new ( ) ,
300+ current_schema_id : 1 ,
301+ partition_specs : HashMap :: new ( ) ,
302+ default_spec : PartitionSpec :: unpartition_spec ( ) . into ( ) ,
303+ default_partition_type : StructType :: new ( vec ! [ ] ) ,
304+ last_partition_id : 1000 ,
305+ default_sort_order_id : 0 ,
306+ sort_orders : HashMap :: from_iter ( vec ! [ ] ) ,
307+ snapshots : HashMap :: default ( ) ,
308+ current_snapshot_id : None ,
309+ last_sequence_number : 1 ,
310+ properties : HashMap :: new ( ) ,
311+ snapshot_log : Vec :: new ( ) ,
312+ metadata_log : vec ! [ ] ,
313+ refs : HashMap :: new ( ) ,
314+ statistics : HashMap :: new ( ) ,
315+ partition_statistics : HashMap :: new ( ) ,
316+ encryption_keys : HashMap :: new ( ) ,
317+ } ;
318+
319+ // Test with DefaultLocationGenerator
320+ let default_location_gen = super :: DefaultLocationGenerator :: new ( table_metadata) . unwrap ( ) ;
321+ let location = default_location_gen. generate_location ( Some ( & partition_key) , file_name) ;
322+ assert_eq ! (
323+ location,
324+ "s3://data.db/table/data/id=42/name=alice/data-00000.parquet"
325+ ) ;
326+ }
223327}
0 commit comments