@@ -23,8 +23,7 @@ use aws_sdk_s3::error::SdkError;
2323use aws_sdk_s3:: operation:: create_bucket:: CreateBucketError ;
2424use aws_sdk_s3:: operation:: get_object:: GetObjectOutput ;
2525use aws_sdk_s3:: primitives:: ByteStream ;
26- use aws_sdk_s3:: types:: CompletedMultipartUpload ;
27- use aws_sdk_s3:: types:: CompletedPart ;
26+ use aws_sdk_s3:: types:: { CompletedMultipartUpload , CompletedPart , Delete , ObjectIdentifier } ;
2827use aws_smithy_types:: byte_stream:: Length ;
2928use bytes:: Bytes ;
3029use chroma_config:: registry:: Registry ;
@@ -43,6 +42,12 @@ use std::time::Duration;
4342use tokio:: io:: AsyncReadExt ;
4443use tracing:: Instrument ;
4544
45+ #[ derive( Clone , Debug , Default ) ]
46+ pub struct DeletedObjects {
47+ pub deleted : Vec < String > ,
48+ pub errors : Vec < StorageError > ,
49+ }
50+
4651#[ derive( Clone ) ]
4752pub struct S3Storage {
4853 pub ( super ) bucket : String ,
@@ -659,6 +664,67 @@ impl S3Storage {
659664 }
660665 }
661666
667+ #[ tracing:: instrument( skip( self , keys) , level = "trace" ) ]
668+ pub async fn delete_many < S : AsRef < str > + std:: fmt:: Debug , I : IntoIterator < Item = S > > (
669+ & self ,
670+ keys : I ,
671+ ) -> Result < DeletedObjects , StorageError > {
672+ tracing:: trace!( "Deleting objects from S3" ) ;
673+ let mut objects = vec ! [ ] ;
674+ for key in keys {
675+ objects. push (
676+ ObjectIdentifier :: builder ( )
677+ . key ( key. as_ref ( ) )
678+ . build ( )
679+ . map_err ( |err| StorageError :: Generic {
680+ source : Arc :: new ( err) ,
681+ } ) ?,
682+ ) ;
683+ }
684+ let delete = Delete :: builder ( )
685+ . set_objects ( Some ( objects) )
686+ . build ( )
687+ . map_err ( |err| StorageError :: Generic {
688+ source : Arc :: new ( err) ,
689+ } ) ?;
690+
691+ let req = self
692+ . client
693+ . delete_objects ( )
694+ . bucket ( & self . bucket )
695+ . delete ( delete) ;
696+
697+ match req. send ( ) . await {
698+ Ok ( resp) => {
699+ let mut out = DeletedObjects :: default ( ) ;
700+ for deleted in resp. deleted ( ) {
701+ if let Some ( key) = deleted. key . clone ( ) {
702+ out. deleted . push ( key) ;
703+ }
704+ }
705+ for error in resp. errors ( ) {
706+ out. errors . push ( if Some ( "NoSuchKey" ) == error. code ( ) {
707+ StorageError :: NotFound {
708+ path : error. key . clone ( ) . unwrap_or ( String :: new ( ) ) ,
709+ source : Arc :: new ( StorageError :: Message {
710+ message : format ! ( "{error:#?}" ) ,
711+ } ) ,
712+ }
713+ } else {
714+ StorageError :: Message {
715+ message : format ! ( "{error:#?}" ) ,
716+ }
717+ } ) ;
718+ }
719+ tracing:: trace!( "Successfully deleted objects from S3" ) ;
720+ Ok ( out)
721+ }
722+ Err ( e) => Err ( StorageError :: Generic {
723+ source : Arc :: new ( e) ,
724+ } ) ,
725+ }
726+ }
727+
662728 #[ tracing:: instrument( skip( self ) , level = "trace" ) ]
663729 pub async fn rename ( & self , src_key : & str , dst_key : & str ) -> Result < ( ) , StorageError > {
664730 // S3 doesn't have a native rename operation, so we need to copy and delete
@@ -1214,4 +1280,50 @@ mod tests {
12141280 assert_eq ! ( format!( "test/{:02x}" , i) , * result, "index = {}" , i) ;
12151281 }
12161282 }
1283+
1284+ #[ tokio:: test]
1285+ async fn test_k8s_integration_delete_many ( ) {
1286+ let storage = setup_with_bucket ( 1024 * 1024 * 8 , 1024 * 1024 * 8 ) . await ;
1287+
1288+ // Create every other file (0, 2, 4, 6, 8, 10, 12, 14)
1289+ let mut created_files = Vec :: new ( ) ;
1290+ for i in ( 0 ..16 ) . step_by ( 2 ) {
1291+ let key = format ! ( "test/{:02x}" , i) ;
1292+ storage
1293+ . oneshot_upload (
1294+ & key,
1295+ 0 ,
1296+ |_| Box :: pin ( ready ( Ok ( ByteStream :: from ( Bytes :: new ( ) ) ) ) ) as _ ,
1297+ PutOptions {
1298+ if_not_exists : true ,
1299+ if_match : None ,
1300+ priority : StorageRequestPriority :: P0 ,
1301+ } ,
1302+ )
1303+ . await
1304+ . unwrap ( ) ;
1305+ created_files. push ( key) ;
1306+ }
1307+
1308+ // Try to delete all files (0-15), including ones that don't exist
1309+ let mut all_keys = Vec :: new ( ) ;
1310+ for i in 0 ..16 {
1311+ all_keys. push ( format ! ( "test/{:02x}" , i) ) ;
1312+ }
1313+ let all_key_refs: Vec < & str > = all_keys. iter ( ) . map ( |s| s. as_str ( ) ) . collect ( ) ;
1314+
1315+ let delete_result = storage. delete_many ( & all_key_refs) . await . unwrap ( ) ;
1316+
1317+ // Verify that every created file appears in the deleted files
1318+ for created_file in & created_files {
1319+ assert ! (
1320+ delete_result. deleted. contains( created_file) ,
1321+ "Created file {} should be in deleted files" ,
1322+ created_file
1323+ ) ;
1324+ }
1325+
1326+ eprintln ! ( "Successfully deleted: {:#?}" , delete_result. deleted) ;
1327+ eprintln ! ( "Errors for non-existent files: {:#?}" , delete_result. errors) ;
1328+ }
12171329}
0 commit comments