From 7d6173185bd6b54bcfbc6e1ae098645b9cba2821 Mon Sep 17 00:00:00 2001 From: Brian Bockelman Date: Mon, 22 Jul 2024 21:03:32 -0400 Subject: [PATCH] Correctly handle the case with trailing slash in the service URL The service URL could technically contain a resource. E.g., ``` https://foo.example.com/prefix ``` or simply ``` https://foo.example.com/ ``` In this case, the code logic didn't handle bucket names correctly, generating invalid URLs for directory listings and stat's. This fixes said logic and adds some simple regression test coverage. --- src/S3Commands.cc | 14 +++++++++++++- test/s3_tests.cc | 29 +++++++++++++++++++++++++++++ 2 files changed, 42 insertions(+), 1 deletion(-) diff --git a/src/S3Commands.cc b/src/S3Commands.cc index bb28928..37b3b49 100644 --- a/src/S3Commands.cc +++ b/src/S3Commands.cc @@ -100,10 +100,22 @@ bool AmazonRequest::parseURL(const std::string &url, std::string &bucket_path, if (m_style == "path") { host = substring(url, hostStartIdx, resourceStartIdx); - path = substring(url, resourceStartIdx) + "/" + bucket + "/" + object; + auto resourcePrefix = substring(url, resourceStartIdx); + if (resourcePrefix[resourcePrefix.size() - 1] == '/') { + resourcePrefix = + substring(resourcePrefix, 0, resourcePrefix.size() - 1); + } + if (bucket.empty()) { + path = resourcePrefix + object; + bucket_path = resourcePrefix + object.substr(0, object.find('/')); + } else { + path = resourcePrefix + "/" + bucket + "/" + object; + bucket_path = resourcePrefix + "/" + bucket; + } } else { host = bucket + "." + substring(url, hostStartIdx, resourceStartIdx); path = substring(url, resourceStartIdx) + object; + bucket_path = "/"; } return true; diff --git a/test/s3_tests.cc b/test/s3_tests.cc index 0711c3b..4c346fc 100644 --- a/test/s3_tests.cc +++ b/test/s3_tests.cc @@ -162,6 +162,23 @@ s3.end } }; +// Regression test for when the service_url ends in a `/` +class FileSystemS3PathBucketSlash : public FileSystemFixtureBase { + protected: + virtual std::string GetConfig() override { + return R"( +s3.begin +s3.path_name /test +s3.service_name s3.amazonaws.com +s3.region us-east-1 +s3.bucket_name genome-browser +s3.service_url https://s3.us-east-1.amazonaws.com/ +s3.url_style path +s3.end +)"; + } +}; + void TestDirectoryContents(S3FileSystem &fs, const std::string &dirname) { std::unique_ptr dir(fs.newDir()); ASSERT_TRUE(dir); @@ -279,6 +296,18 @@ TEST_F(FileSystemS3PathNoBucket, List) { TestDirectoryContents(fs, "/test/genome-browser/cells/tabula-sapiens/"); } +TEST_F(FileSystemS3PathBucketSlash, Stat) { + S3FileSystem fs(m_log.get(), m_configfn.c_str(), nullptr); + struct stat buff; + auto rv = fs.Stat("/test/cells/tabula-sapiens/cellbrowser.json.bak", &buff); + ASSERT_EQ(rv, 0) << "Failed to stat AWS bucket (" << strerror(-rv) << ")"; +} + +TEST_F(FileSystemS3PathBucketSlash, List) { + S3FileSystem fs(m_log.get(), m_configfn.c_str(), nullptr); + TestDirectoryContents(fs, "/test/cells/tabula-sapiens"); +} + int main(int argc, char **argv) { ::testing::InitGoogleTest(&argc, argv); return RUN_ALL_TESTS();