From d55510ab92684fb74f8f055cc03f0694c93c2eba Mon Sep 17 00:00:00 2001 From: Thomas Newton Date: Sun, 15 Dec 2024 11:10:35 +0000 Subject: [PATCH] Only allow query parameters we know can be part of a sas token --- cpp/src/arrow/filesystem/azurefs.cc | 22 ++++++++++++++++++---- cpp/src/arrow/filesystem/azurefs_test.cc | 10 ++++++++++ 2 files changed, 28 insertions(+), 4 deletions(-) diff --git a/cpp/src/arrow/filesystem/azurefs.cc b/cpp/src/arrow/filesystem/azurefs.cc index 297edd480b1c7..cf238647a3368 100644 --- a/cpp/src/arrow/filesystem/azurefs.cc +++ b/cpp/src/arrow/filesystem/azurefs.cc @@ -106,6 +106,18 @@ Status AzureOptions::ExtractFromUriQuery(const Uri& uri) { std::string tenant_id; std::string client_id; std::string client_secret; + + // These query parameters are the union of the following docs: + // https://learn.microsoft.com/en-us/rest/api/storageservices/create-account-sas#specify-the-account-sas-parameters + // https://learn.microsoft.com/en-us/rest/api/storageservices/create-service-sas#construct-a-service-sas + // (excluding parameters for table storage only) + // https://learn.microsoft.com/en-us/rest/api/storageservices/create-user-delegation-sas#construct-a-user-delegation-sas + constexpr std::array sas_token_query_parameters = { + "sv", "ss", "sr", "st", "se", "sp", "si", "sip", "spr", + "skoid", "sktid", "srt", "skt", "ske", "skv", "sks", "saoid", "suoid", + "scid", "sdd", "ses", "sig", "rscc", "rscd", "rsce", "rscl", "rsct", + }; + ARROW_ASSIGN_OR_RAISE(const auto options_items, uri.query_items()); for (const auto& kv : options_items) { if (kv.first == "blob_storage_authority") { @@ -147,11 +159,13 @@ Status AzureOptions::ExtractFromUriQuery(const Uri& uri) { } else if (kv.first == "background_writes") { ARROW_ASSIGN_OR_RAISE(background_writes, ::arrow::internal::ParseBoolean(kv.second)); - } else { - // Assume these are part of a SAS token. Its not ideal to make such an assumption - // but given that a SAS token is a complex set of URI parameters, that could be - // tricky to exhaustively list I think its the best option. + } else if (std::find(sas_token_query_parameters.begin(), + sas_token_query_parameters.end(), + kv.first) != sas_token_query_parameters.end()) { credential_kind = CredentialKind::kSASToken; + } else { + return Status::Invalid( + "Unexpected query parameter in Azure Blob File System URI: '", kv.first, "'"); } } diff --git a/cpp/src/arrow/filesystem/azurefs_test.cc b/cpp/src/arrow/filesystem/azurefs_test.cc index 7abbca7d1d81b..5d649e5084b35 100644 --- a/cpp/src/arrow/filesystem/azurefs_test.cc +++ b/cpp/src/arrow/filesystem/azurefs_test.cc @@ -744,6 +744,13 @@ class TestAzureOptions : public ::testing::Test { ASSERT_EQ(options.dfs_storage_authority, ".dfs.local"); } + void TestFromUriInvalidQueryParameter() { + ASSERT_RAISES(Invalid, AzureOptions::FromUri( + "abfs://file_system@account.dfs.core.windows.net/dir/file?" + "unknown=invalid", + nullptr)); + } + void TestMakeBlobServiceClientInvalidAccountName() { AzureOptions options; ASSERT_RAISES_WITH_MESSAGE( @@ -809,6 +816,9 @@ TEST_F(TestAzureOptions, FromUriBlobStorageAuthority) { TestFromUriBlobStorageAuthority(); } TEST_F(TestAzureOptions, FromUriDfsStorageAuthority) { TestFromUriDfsStorageAuthority(); } +TEST_F(TestAzureOptions, FromUriInvalidQueryParameter) { + TestFromUriInvalidQueryParameter(); +} TEST_F(TestAzureOptions, MakeBlobServiceClientInvalidAccountName) { TestMakeBlobServiceClientInvalidAccountName(); }