Skip to content

Commit

Permalink
fix corruption due to data race when concurrently calling
Browse files Browse the repository at this point in the history
  • Loading branch information
TangSiyang2001 committed Feb 24, 2024
1 parent 3486dec commit 32a024a
Show file tree
Hide file tree
Showing 2 changed files with 26 additions and 5 deletions.
12 changes: 7 additions & 5 deletions cpp/src/arrow/filesystem/s3fs.cc
Original file line number Diff line number Diff line change
Expand Up @@ -2898,12 +2898,13 @@ struct AwsInstance {
if (is_finalized_.load()) {
return Status::Invalid("Attempt to initialize S3 after it has been finalized");
}
if (!is_initialized_.exchange(true)) {
// Not already initialized
bool newly_initialized = false;
std::call_once(initialize_flag_, [&]() {
DoInitialize(options);
return true;
}
return false;
is_initialized_.exchange(true);
newly_initialized = true;
});
return newly_initialized;
}

bool IsInitialized() { return !is_finalized_ && is_initialized_; }
Expand Down Expand Up @@ -2979,6 +2980,7 @@ struct AwsInstance {
Aws::SDKOptions aws_options_;
std::atomic<bool> is_initialized_;
std::atomic<bool> is_finalized_;
std::once_flag initialize_flag_;
};

AwsInstance* GetAwsInstance() {
Expand Down
19 changes: 19 additions & 0 deletions python/pyarrow/tests/test_fs.py
Original file line number Diff line number Diff line change
Expand Up @@ -1904,3 +1904,22 @@ def test_s3_finalize_region_resolver():
resolve_s3_region('voltrondata-labs-datasets')
"""
subprocess.check_call([sys.executable, "-c", code])

@pytest.mark.s3
def test_concurrent_fs_init():
code = """if 1:
import threading
import pytest
from pyarrow.fs import (FileSystem, S3FileSystem
ensure_s3_initialized, finalize_s3)
threads = []
for i in range(4):
thread = threading.Thread(target = lambda: FileSystem.from_uri('s3://mf-nwp-models/README.txt'))
threads.append(thread)
thread.start()
for thread in threads:
thread.join()
finalize_s3()
"""

0 comments on commit 32a024a

Please sign in to comment.