diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java index 304ba032b416a..a45ed720b9ce3 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java @@ -684,9 +684,8 @@ public void initialize(URI name, Configuration originalConf) s3ExpressStore = isS3ExpressStore(bucket, endpoint); // should the delete also purge uploads? - // happens if explicitly enabled, or if the store is S3Express storage. dirOperationsPurgeUploads = conf.getBoolean(DIRECTORY_OPERATIONS_PURGE_UPLOADS, - s3ExpressStore); + DIRECTORY_OPERATIONS_PURGE_UPLOADS_DEFAULT); this.isMultipartUploadEnabled = conf.getBoolean(MULTIPART_UPLOADS_ENABLED, DEFAULT_MULTIPART_UPLOAD_ENABLED); diff --git a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/troubleshooting_s3a.md b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/troubleshooting_s3a.md index 6520e0dc02620..151ee5bd8a465 100644 --- a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/troubleshooting_s3a.md +++ b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/troubleshooting_s3a.md @@ -1218,10 +1218,29 @@ java.io.FileNotFoundException: Completing multi-part upload on fork-5/test/multi This can happen when all outstanding uploads have been aborted, including the active ones. -If the bucket has a lifecycle policy of deleting multipart uploads, make sure -that the expiry time of the deletion is greater than that required for all open -writes to complete the write, -*and for all jobs using the S3A committers to commit their work.* +When working with S3A committers and multipart uploads (MPUs), consider these important guidelines: + +1. **Bucket Lifecycle Policies:** + - If your bucket has a lifecycle policy for deleting multipart uploads + - Set the deletion expiry time long enough to: + - Complete all open write operations + - Allow S3A committers to finish their commit process + +2. **Directory Operations and MPUs:** + - Setting `fs.s3a.directory.operations.purge.uploads=true` will abort all pending MPUs before directory cleanup + - For jobs using S3A committers: + - Set `fs.s3a.directory.operations.purge.uploads=false` when directories need to be overwritten before job completion + - This prevents accidental abortion of active uploads during the commit phase + + +### S3 Express Store directory object not getting deleted + +When working with S3 Express store buckets (unlike standard S3 buckets), follow these steps to purge a directory object: + +1. Set `fs.s3a.directory.operations.purge.uploads=true` if you need to delete a directory object that has pending multipart uploads (MPUs). + +2. This setting ensures that all pending MPUs are aborted before the directory object is deleted, which is a requirement specific to S3 Express store buckets. + ### Application hangs after reading a number of files