From e61cbb74dad41c0c68c365d7968002cec7eda919 Mon Sep 17 00:00:00 2001 From: Brian Bockelman Date: Tue, 15 Oct 2024 12:03:33 -0500 Subject: [PATCH 01/10] Replace use of `curl_easy_perform` with a multi-handle engine --- CMakeLists.txt | 4 +- src/CurlUtil.cc | 320 ++++++++++++++++++++++++++++++++++++++++++++ src/CurlUtil.hh | 67 ++++++++++ src/CurlWorker.hh | 52 +++++++ src/HTTPCommands.cc | 273 +++++++++++++++++++++---------------- src/HTTPCommands.hh | 69 ++++++++-- src/HTTPFile.cc | 8 +- src/S3Commands.cc | 10 +- src/S3Commands.hh | 4 +- src/S3File.cc | 11 +- test/CMakeLists.txt | 2 + test/s3_tests.cc | 3 + 12 files changed, 679 insertions(+), 144 deletions(-) create mode 100644 src/CurlUtil.cc create mode 100644 src/CurlUtil.hh create mode 100644 src/CurlWorker.hh diff --git a/CMakeLists.txt b/CMakeLists.txt index 1238fa4..4578acf 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -57,8 +57,8 @@ endif() include_directories(${XROOTD_INCLUDES} ${CURL_INCLUDE_DIRS} ${LIBCRYPTO_INCLUDE_DIRS}) -add_library(XrdS3 SHARED src/S3File.cc src/S3Directory.cc src/S3AccessInfo.cc src/S3FileSystem.cc src/AWSv4-impl.cc src/S3Commands.cc src/HTTPCommands.cc src/TokenFile.cc src/stl_string_utils.cc src/shortfile.cc src/logging.cc) -add_library(XrdHTTPServer SHARED src/HTTPFile.cc src/HTTPFileSystem.cc src/HTTPCommands.cc src/TokenFile.cc src/stl_string_utils.cc src/shortfile.cc src/logging.cc) +add_library(XrdS3 SHARED src/CurlUtil.cc src/S3File.cc src/S3Directory.cc src/S3AccessInfo.cc src/S3FileSystem.cc src/AWSv4-impl.cc src/S3Commands.cc src/HTTPCommands.cc src/TokenFile.cc src/stl_string_utils.cc src/shortfile.cc src/logging.cc) +add_library(XrdHTTPServer SHARED src/CurlUtil.cc src/HTTPFile.cc src/HTTPFileSystem.cc src/HTTPCommands.cc src/TokenFile.cc src/stl_string_utils.cc src/shortfile.cc src/logging.cc) target_link_libraries(XrdS3 -ldl ${XROOTD_UTILS_LIB} ${XROOTD_SERVER_LIB} ${CURL_LIBRARIES} ${LIBCRYPTO_LIBRARIES} tinyxml2::tinyxml2 Threads::Threads) target_link_libraries(XrdHTTPServer -ldl ${XROOTD_UTILS_LIB} ${XROOTD_SERVER_LIB} ${CURL_LIBRARIES} ${LIBCRYPTO_LIBRARIES} Threads::Threads) diff --git a/src/CurlUtil.cc b/src/CurlUtil.cc new file mode 100644 index 0000000..9e23265 --- /dev/null +++ b/src/CurlUtil.cc @@ -0,0 +1,320 @@ +/*************************************************************** + * + * Copyright (C) 2024, Pelican Project, Morgridge Institute for Research + * + * Licensed under the Apache License, Version 2.0 (the "License"); you + * may not use this file except in compliance with the License. You may + * obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************/ + +#include "CurlUtil.hh" +#include "CurlWorker.hh" +#include "HTTPCommands.hh" +#include "logging.hh" + +#include +#include + +#include +#include + +#include +#include +#include +#include + +using namespace XrdHTTPServer; + +thread_local std::vector HandlerQueue::m_handles; + +HandlerQueue::HandlerQueue() { + int filedes[2]; + auto result = pipe(filedes); + if (result == -1) { + throw std::runtime_error(strerror(errno)); + } + m_read_fd = filedes[0]; + m_write_fd = filedes[1]; +}; + +namespace { + +// Simple debug function for getting information from libcurl; to enable, you +// need to recompile with GetHandle(true); +int dump_header(CURL *handle, curl_infotype type, char *data, size_t size, + void *clientp) { + (void)handle; + (void)clientp; + + switch (type) { + case CURLINFO_HEADER_OUT: + printf("Header > %s\n", std::string(data, size).c_str()); + break; + default: + printf("Info: %s", std::string(data, size).c_str()); + break; + } + return 0; +} + +} // namespace + +CURL *GetHandle(bool verbose) { + auto result = curl_easy_init(); + if (result == nullptr) { + return result; + } + + curl_easy_setopt(result, CURLOPT_USERAGENT, "xrootd-s3/devel"); + curl_easy_setopt(result, CURLOPT_DEBUGFUNCTION, dump_header); + if (verbose) + curl_easy_setopt(result, CURLOPT_VERBOSE, 1L); + + curl_easy_setopt(result, CURLOPT_BUFFERSIZE, 32 * 1024); + + return result; +} + +CURL *HandlerQueue::GetHandle() { + if (m_handles.size()) { + auto result = m_handles.back(); + m_handles.pop_back(); + return result; + } + + return ::GetHandle(false); +} + +void HandlerQueue::RecycleHandle(CURL *curl) { m_handles.push_back(curl); } + +void HandlerQueue::Produce(HTTPRequest *handler) { + std::unique_lock lk{m_mutex}; + m_cv.wait(lk, [&] { return m_ops.size() < m_max_pending_ops; }); + + m_ops.push_back(handler); + char ready[] = "1"; + while (true) { + auto result = write(m_write_fd, ready, 1); + if (result == -1) { + if (errno == EINTR) { + continue; + } + throw std::runtime_error(strerror(errno)); + } + break; + } + + lk.unlock(); + m_cv.notify_one(); +} + +HTTPRequest *HandlerQueue::Consume() { + std::unique_lock lk(m_mutex); + m_cv.wait(lk, [&] { return m_ops.size() > 0; }); + + auto result = std::move(m_ops.front()); + m_ops.pop_front(); + + char ready[1]; + while (true) { + auto result = read(m_read_fd, ready, 1); + if (result == -1) { + if (errno == EINTR) { + continue; + } + throw std::runtime_error(strerror(errno)); + } + break; + } + + lk.unlock(); + m_cv.notify_one(); + + return result; +} + +HTTPRequest *HandlerQueue::TryConsume() { + std::unique_lock lk(m_mutex); + if (m_ops.size() == 0) { + return nullptr; + } + + auto result = std::move(m_ops.front()); + m_ops.pop_front(); + + char ready[1]; + while (true) { + auto result = read(m_read_fd, ready, 1); + if (result == -1) { + if (errno == EINTR) { + continue; + } + throw std::runtime_error(strerror(errno)); + } + break; + } + + lk.unlock(); + m_cv.notify_one(); + + return result; +} + +void CurlWorker::RunStatic(CurlWorker *myself) { + try { + myself->Run(); + } catch (...) { + myself->m_logger.Log(LogMask::Debug, "CurlWorker::RunStatic", + "Curl worker got an exception"); + } +} + +void CurlWorker::Run() { + // Create a copy of the shared_ptr here. Otherwise, when the main thread's + // destructors run, there won't be any other live references to the + // shared_ptr, triggering cleanup of the condition variable. Because we + // purposely don't shutdown the worker threads, those threads may be waiting + // on the condition variable; destroying a condition variable while a thread + // is waiting on it is undefined behavior. + auto queue_ref = m_queue; + auto &queue = *queue_ref.get(); + m_logger.Log(LogMask::Debug, "CurlWorker::Run", "Started a curl worker"); + + CURLM *multi_handle = curl_multi_init(); + if (multi_handle == nullptr) { + throw std::runtime_error("Failed to create curl multi-handle"); + } + + int running_handles = 0; + time_t last_marker = time(NULL); + CURLMcode mres = CURLM_OK; + + std::vector waitfds; + waitfds.resize(1); + waitfds[0].fd = queue.PollFD(); + waitfds[0].events = CURL_WAIT_POLLIN; + waitfds[0].revents = 0; + + while (true) { + while (running_handles < static_cast(m_max_ops)) { + auto op = + running_handles == 0 ? queue.Consume() : queue.TryConsume(); + if (!op) { + break; + } + auto curl = queue.GetHandle(); + if (curl == nullptr) { + m_logger.Log(LogMask::Debug, "CurlWorker", + "Unable to allocate a curl handle"); + op->Fail("E_NOMEM", "Unable to get allocate a curl handle"); + continue; + } + try { + if (!op->SetupHandle(curl)) { + op->Fail(op->getErrorCode(), op->getErrorMessage()); + } + } catch (...) { + m_logger.Log(LogMask::Debug, "CurlWorker", + "Unable to setup the curl handle"); + op->Fail("E_NOMEM", + "Failed to setup the curl handle for the operation"); + continue; + } + m_op_map[curl] = op; + auto mres = curl_multi_add_handle(multi_handle, curl); + if (mres != CURLM_OK) { + if (m_logger.getMsgMask() & LogMask::Debug) { + std::stringstream ss; + ss << "Unable to add operation to the curl multi-handle: " + << curl_multi_strerror(mres); + m_logger.Log(LogMask::Debug, "CurlWorker", + ss.str().c_str()); + } + op->Fail("E_CURL_LIB", + "Unable to add operation to the curl multi-handle"); + continue; + } + running_handles += 1; + } + + // Maintain the periodic reporting of thread activity + time_t now = time(NULL); + time_t next_marker = last_marker + m_marker_period; + if (now >= next_marker) { + if (m_logger.getMsgMask() & LogMask::Debug) { + std::stringstream ss; + ss << "Curl worker thread " << getpid() << " is running " + << running_handles << "operations"; + m_logger.Log(LogMask::Debug, "CurlWorker", ss.str().c_str()); + } + last_marker = now; + } + + mres = curl_multi_wait(multi_handle, &waitfds[0], waitfds.size(), 50, + nullptr); + if (mres != CURLM_OK) { + if (m_logger.getMsgMask() & LogMask::Warning) { + std::stringstream ss; + ss << "Failed to wait on multi-handle: " << mres; + m_logger.Log(LogMask::Warning, "CurlWorker", ss.str().c_str()); + } + } + + // Do maintenance on the multi-handle + int still_running; + auto mres = curl_multi_perform(multi_handle, &still_running); + if (mres == CURLM_CALL_MULTI_PERFORM) { + continue; + } else if (mres != CURLM_OK) { + if (m_logger.getMsgMask() & LogMask::Warning) { + std::stringstream ss; + ss << "Failed to perform multi-handle operation: " << mres; + m_logger.Log(LogMask::Warning, "CurlWorker", ss.str().c_str()); + } + break; + } + + CURLMsg *msg; + do { + int msgq = 0; + msg = curl_multi_info_read(multi_handle, &msgq); + if (msg && (msg->msg == CURLMSG_DONE)) { + auto iter = m_op_map.find(msg->easy_handle); + if (iter == m_op_map.end()) { + m_logger.Log(LogMask::Error, "CurlWorker", + "Logic error: got a callback for an entry " + "that doesn't exist"); + mres = CURLM_BAD_EASY_HANDLE; + break; + } + auto &op = iter->second; + auto res = msg->data.result; + op->ProcessCurlResult(iter->first, res); + op->ReleaseHandle(iter->first); + running_handles -= 1; + curl_multi_remove_handle(multi_handle, iter->first); + if (res == CURLE_OK) { + // If the handle was successful, then we can recycle it. + queue.RecycleHandle(iter->first); + } else { + curl_easy_cleanup(iter->first); + m_op_map.erase(iter); + } + } + } while (msg); + } + + for (auto &map_entry : m_op_map) { + map_entry.second->Fail("E_CURL_LIB", curl_multi_strerror(mres)); + } + m_op_map.clear(); +} diff --git a/src/CurlUtil.hh b/src/CurlUtil.hh new file mode 100644 index 0000000..d97fbe9 --- /dev/null +++ b/src/CurlUtil.hh @@ -0,0 +1,67 @@ +/*************************************************************** + * + * Copyright (C) 2024, Pelican Project, Morgridge Institute for Research + * + * Licensed under the Apache License, Version 2.0 (the "License"); you + * may not use this file except in compliance with the License. You may + * obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************/ + +#pragma once + +#include +#include +#include +#include +#include +#include + +// Forward dec'ls +typedef void CURL; +struct curl_slist; + +class HTTPRequest; + +// Returns a newly-created curl handle (no internal caching) +CURL *GetHandle(bool verbose); + +/** + * HandlerQueue is a deque of curl operations that need + * to be performed. The object is thread safe and can + * be waited on via poll(). + * + * The fact that it's poll'able is necessary because the + * multi-curl driver thread is based on polling FD's + */ +class HandlerQueue { +public: + HandlerQueue(); + + void Produce(HTTPRequest *handler); + + HTTPRequest *Consume(); + HTTPRequest *TryConsume(); + + int PollFD() const {return m_read_fd;} + + CURL *GetHandle(); + void RecycleHandle(CURL *); + +private: + std::deque m_ops; + thread_local static std::vector m_handles; + std::condition_variable m_cv; + std::mutex m_mutex; + const static unsigned m_max_pending_ops{20}; + int m_read_fd{-1}; + int m_write_fd{-1}; +}; diff --git a/src/CurlWorker.hh b/src/CurlWorker.hh new file mode 100644 index 0000000..928dd11 --- /dev/null +++ b/src/CurlWorker.hh @@ -0,0 +1,52 @@ +/*************************************************************** + * + * Copyright (C) 2024, Pelican Project, Morgridge Institute for Research + * + * Licensed under the Apache License, Version 2.0 (the "License"); you + * may not use this file except in compliance with the License. You may + * obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************/ + +#pragma once + +#include +#include + +typedef void CURL; + +class XrdSysError; + +class HTTPRequest; +class HandlerQueue; + +class CurlWorker { +public: + CurlWorker(std::shared_ptr queue, XrdSysError &logger) : + m_queue(queue), + m_logger(logger) + {} + + CurlWorker(const CurlWorker &) = delete; + + void Run(); + static void RunStatic(CurlWorker *myself); + static unsigned GetPollThreads() {return m_workers;} + +private: + std::shared_ptr m_queue; + std::unordered_map m_op_map; + XrdSysError &m_logger; + + const static unsigned m_workers{5}; + const static unsigned m_max_ops{20}; + const static unsigned m_marker_period{5}; +}; diff --git a/src/HTTPCommands.cc b/src/HTTPCommands.cc index cdbe22d..d29f46c 100644 --- a/src/HTTPCommands.cc +++ b/src/HTTPCommands.cc @@ -24,11 +24,14 @@ #include #include #include +#include #include #include #include +#include "CurlUtil.hh" +#include "CurlWorker.hh" #include "HTTPCommands.hh" #include "logging.hh" #include "shortfile.hh" @@ -36,6 +39,13 @@ using namespace XrdHTTPServer; +std::shared_ptr HTTPRequest::m_queue = + std::make_unique(); +bool HTTPRequest::m_workers_initialized = false; +std::vector HTTPRequest::m_workers; + +namespace { + // // "This function gets called by libcurl as soon as there is data received // that needs to be saved. The size of the data pointed to by ptr is size @@ -59,14 +69,16 @@ size_t appendToString(const void *ptr, size_t size, size_t nmemb, void *str) { return (size * nmemb); } +} // namespace + HTTPRequest::~HTTPRequest() {} #define SET_CURL_SECURITY_OPTION(A, B, C) \ { \ CURLcode rv##B = curl_easy_setopt(A, B, C); \ if (rv##B != CURLE_OK) { \ - this->errorCode = "E_CURL_LIB"; \ - this->errorMessage = "curl_easy_setopt( " #B " ) failed."; \ + errorCode = "E_CURL_LIB"; \ + errorMessage = "curl_easy_setopt( " #B " ) failed."; \ return false; \ } \ } @@ -83,9 +95,9 @@ bool HTTPRequest::parseProtocol(const std::string &url, std::string &protocol) { } bool HTTPRequest::SendHTTPRequest(const std::string &payload) { - if ((protocol != "http") && (protocol != "https")) { - this->errorCode = "E_INVALID_SERVICE_URL"; - this->errorMessage = "Service URL not of a known protocol (http[s])."; + if ((m_protocol != "http") && (m_protocol != "https")) { + errorCode = "E_INVALID_SERVICE_URL"; + errorMessage = "Service URL not of a known protocol (http[s])."; m_log.Log(LogMask::Warning, "HTTPRequest::SendHTTPRequest", "Host URL '", hostUrl.c_str(), "' not of a known protocol (http[s])."); @@ -100,7 +112,7 @@ bool HTTPRequest::SendHTTPRequest(const std::string &payload) { // by default for "PUT", which we really don't want. headers["Transfer-Encoding"] = ""; - return sendPreparedRequest(protocol, hostUrl, payload); + return sendPreparedRequest(hostUrl, payload); } static void dump(const char *text, FILE *stream, unsigned char *ptr, @@ -208,31 +220,64 @@ size_t read_callback(char *buffer, size_t size, size_t n, void *v) { return request; } -bool HTTPRequest::sendPreparedRequest(const std::string &protocol, - const std::string &uri, +bool HTTPRequest::sendPreparedRequest(const std::string &uri, const std::string &payload) { + m_uri = uri; + m_payload = payload; - m_log.Log(XrdHTTPServer::Debug, "SendRequest", "Sending HTTP request", - uri.c_str()); + m_queue->Produce(this); + std::unique_lock lk(m_mtx); + m_cv.wait(lk, [&] { return m_result_ready; }); - std::unique_ptr curl( - curl_easy_init(), &curl_easy_cleanup); + return errorCode.empty(); +} - if (curl.get() == NULL) { - this->errorCode = "E_CURL_LIB"; - this->errorMessage = "curl_easy_init() failed."; +bool HTTPRequest::ReleaseHandle(CURL *curl) { + if (curl == nullptr) + return false; + // Note: Any option that's conditionally set in `HTTPRequest::SetupHandle` + // must be restored to the original state here. + // + // Only changing back the things we explicitly set is a conscious decision + // here versus using `curl_easy_reset`; we are trying to avoid whacking + // all the configuration of the handle. + curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, nullptr); + curl_easy_setopt(curl, CURLOPT_WRITEDATA, nullptr); + curl_easy_setopt(curl, CURLOPT_HTTPHEADER, nullptr); + curl_easy_setopt(curl, CURLOPT_OPENSOCKETFUNCTION, nullptr); + curl_easy_setopt(curl, CURLOPT_OPENSOCKETDATA, nullptr); + curl_easy_setopt(curl, CURLOPT_SOCKOPTFUNCTION, nullptr); + curl_easy_setopt(curl, CURLOPT_SOCKOPTDATA, nullptr); + curl_easy_setopt(curl, CURLOPT_DEBUGFUNCTION, nullptr); + curl_easy_setopt(curl, CURLOPT_VERBOSE, 0L); + curl_easy_setopt(curl, CURLOPT_NOBODY, 0); + curl_easy_setopt(curl, CURLOPT_POST, 0); + curl_easy_setopt(curl, CURLOPT_UPLOAD, 0); + curl_easy_setopt(curl, CURLOPT_HEADER, 0); + curl_easy_setopt(curl, CURLOPT_SSLCERT, nullptr); + curl_easy_setopt(curl, CURLOPT_SSLKEY, nullptr); + + return true; +} + +bool HTTPRequest::SetupHandle(CURL *curl) { + m_log.Log(XrdHTTPServer::Debug, "SetupHandle", "Sending HTTP request", + m_uri.c_str()); + + if (curl == nullptr) { + errorCode = "E_CURL_LIB"; + errorMessage = "curl_easy_init() failed."; return false; } - char errorBuffer[CURL_ERROR_SIZE]; - auto rv = curl_easy_setopt(curl.get(), CURLOPT_ERRORBUFFER, errorBuffer); + auto rv = curl_easy_setopt(curl, CURLOPT_ERRORBUFFER, m_errorBuffer); if (rv != CURLE_OK) { this->errorCode = "E_CURL_LIB"; this->errorMessage = "curl_easy_setopt( CURLOPT_ERRORBUFFER ) failed."; return false; } - rv = curl_easy_setopt(curl.get(), CURLOPT_URL, uri.c_str()); + rv = curl_easy_setopt(curl, CURLOPT_URL, m_uri.c_str()); if (rv != CURLE_OK) { this->errorCode = "E_CURL_LIB"; this->errorMessage = "curl_easy_setopt( CURLOPT_URL ) failed."; @@ -240,7 +285,7 @@ bool HTTPRequest::sendPreparedRequest(const std::string &protocol, } if (httpVerb == "HEAD") { - rv = curl_easy_setopt(curl.get(), CURLOPT_NOBODY, 1); + rv = curl_easy_setopt(curl, CURLOPT_NOBODY, 1); if (rv != CURLE_OK) { this->errorCode = "E_CURL_LIB"; this->errorMessage = "curl_easy_setopt( CURLOPT_HEAD ) failed."; @@ -249,14 +294,14 @@ bool HTTPRequest::sendPreparedRequest(const std::string &protocol, } if (httpVerb == "POST") { - rv = curl_easy_setopt(curl.get(), CURLOPT_POST, 1); + rv = curl_easy_setopt(curl, CURLOPT_POST, 1); if (rv != CURLE_OK) { this->errorCode = "E_CURL_LIB"; this->errorMessage = "curl_easy_setopt( CURLOPT_POST ) failed."; return false; } - rv = curl_easy_setopt(curl.get(), CURLOPT_POSTFIELDS, payload.c_str()); + rv = curl_easy_setopt(curl, CURLOPT_POSTFIELDS, m_payload.c_str()); if (rv != CURLE_OK) { this->errorCode = "E_CURL_LIB"; this->errorMessage = @@ -266,7 +311,7 @@ bool HTTPRequest::sendPreparedRequest(const std::string &protocol, } if (httpVerb == "PUT") { - rv = curl_easy_setopt(curl.get(), CURLOPT_UPLOAD, 1); + rv = curl_easy_setopt(curl, CURLOPT_UPLOAD, 1); if (rv != CURLE_OK) { this->errorCode = "E_CURL_LIB"; this->errorMessage = "curl_easy_setopt( CURLOPT_UPLOAD ) failed."; @@ -276,17 +321,16 @@ bool HTTPRequest::sendPreparedRequest(const std::string &protocol, // Our HTTPRequest instance should have a pointer to the payload data // and the offset of the data Here, we tell curl_easy_setopt to use the // read_callback function to read the data from the payload - this->callback_payload = std::unique_ptr( - new HTTPRequest::Payload{&payload, 0}); - rv = curl_easy_setopt(curl.get(), CURLOPT_READDATA, - callback_payload.get()); + m_callback_payload = std::unique_ptr( + new HTTPRequest::Payload{&m_payload, 0}); + rv = curl_easy_setopt(curl, CURLOPT_READDATA, m_callback_payload.get()); if (rv != CURLE_OK) { this->errorCode = "E_CURL_LIB"; this->errorMessage = "curl_easy_setopt( CURLOPT_READDATA ) failed."; return false; } - rv = curl_easy_setopt(curl.get(), CURLOPT_READFUNCTION, read_callback); + rv = curl_easy_setopt(curl, CURLOPT_READFUNCTION, read_callback); if (rv != CURLE_OK) { this->errorCode = "E_CURL_LIB"; this->errorMessage = @@ -295,7 +339,7 @@ bool HTTPRequest::sendPreparedRequest(const std::string &protocol, } } - rv = curl_easy_setopt(curl.get(), CURLOPT_NOPROGRESS, 1); + rv = curl_easy_setopt(curl, CURLOPT_NOPROGRESS, 1); if (rv != CURLE_OK) { this->errorCode = "E_CURL_LIB"; this->errorMessage = "curl_easy_setopt( CURLOPT_NOPROGRESS ) failed."; @@ -303,7 +347,7 @@ bool HTTPRequest::sendPreparedRequest(const std::string &protocol, } if (includeResponseHeader) { - rv = curl_easy_setopt(curl.get(), CURLOPT_HEADER, 1); + rv = curl_easy_setopt(curl, CURLOPT_HEADER, 1); if (rv != CURLE_OK) { this->errorCode = "E_CURL_LIB"; this->errorMessage = "curl_easy_setopt( CURLOPT_HEADER ) failed."; @@ -311,7 +355,7 @@ bool HTTPRequest::sendPreparedRequest(const std::string &protocol, } } - rv = curl_easy_setopt(curl.get(), CURLOPT_WRITEFUNCTION, &appendToString); + rv = curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, &appendToString); if (rv != CURLE_OK) { this->errorCode = "E_CURL_LIB"; this->errorMessage = @@ -319,14 +363,14 @@ bool HTTPRequest::sendPreparedRequest(const std::string &protocol, return false; } - rv = curl_easy_setopt(curl.get(), CURLOPT_WRITEDATA, &this->resultString); + rv = curl_easy_setopt(curl, CURLOPT_WRITEDATA, &m_result); if (rv != CURLE_OK) { this->errorCode = "E_CURL_LIB"; this->errorMessage = "curl_easy_setopt( CURLOPT_WRITEDATA ) failed."; return false; } - if (curl_easy_setopt(curl.get(), CURLOPT_FOLLOWLOCATION, 1) != CURLE_OK) { + if (curl_easy_setopt(curl, CURLOPT_FOLLOWLOCATION, 1) != CURLE_OK) { this->errorCode = "E_CURL_LIB"; this->errorMessage = "curl_easy_setopt( CURLOPT_FOLLOWLOCATION ) failed."; @@ -336,53 +380,36 @@ bool HTTPRequest::sendPreparedRequest(const std::string &protocol, // // Set security options. // - SET_CURL_SECURITY_OPTION(curl.get(), CURLOPT_SSL_VERIFYPEER, 1); - SET_CURL_SECURITY_OPTION(curl.get(), CURLOPT_SSL_VERIFYHOST, 2); - - // NB: Contrary to libcurl's manual, it doesn't strdup() strings passed - // to it, so they MUST remain in scope until after we call - // curl_easy_cleanup(). Otherwise, curl_perform() will fail with - // a completely bogus error, number 60, claiming that there's a - // 'problem with the SSL CA cert'. + SET_CURL_SECURITY_OPTION(curl, CURLOPT_SSL_VERIFYPEER, 1); + SET_CURL_SECURITY_OPTION(curl, CURLOPT_SSL_VERIFYHOST, 2); + std::string CAFile = ""; std::string CAPath = ""; - - char *x509_ca_dir = getenv("X509_CERT_DIR"); - if (x509_ca_dir != NULL) { - CAPath = x509_ca_dir; - } - - char *x509_ca_file = getenv("X509_CERT_FILE"); - if (x509_ca_file != NULL) { - CAFile = x509_ca_file; - } - - if (!CAPath.empty()) { - SET_CURL_SECURITY_OPTION(curl.get(), CURLOPT_CAPATH, CAPath.c_str()); + auto x509_ca_dir = getenv("X509_CERT_DIR"); + if (x509_ca_dir != nullptr && x509_ca_dir[0] != '\0') { + SET_CURL_SECURITY_OPTION(curl, CURLOPT_CAPATH, x509_ca_dir); } - if (!CAFile.empty()) { - SET_CURL_SECURITY_OPTION(curl.get(), CURLOPT_CAINFO, CAFile.c_str()); - } - - if (setenv("OPENSSL_ALLOW_PROXY", "1", 0) != 0) { + auto x509_ca_file = getenv("X509_CERT_FILE"); + if (x509_ca_file != nullptr) { + SET_CURL_SECURITY_OPTION(curl, CURLOPT_CAINFO, x509_ca_file); } // // Configure for x.509 operation. // - if (protocol == "x509" && requiresSignature) { - const std::string *accessKeyFilePtr = this->getAccessKey(); - const std::string *secretKeyFilePtr = this->getSecretKey(); + if (m_protocol == "x509" && requiresSignature) { + auto accessKeyFilePtr = getAccessKey(); + auto secretKeyFilePtr = getSecretKey(); if (accessKeyFilePtr && secretKeyFilePtr) { - SET_CURL_SECURITY_OPTION(curl.get(), CURLOPT_SSLKEYTYPE, "PEM"); - SET_CURL_SECURITY_OPTION(curl.get(), CURLOPT_SSLKEY, + SET_CURL_SECURITY_OPTION(curl, CURLOPT_SSLKEYTYPE, "PEM"); + SET_CURL_SECURITY_OPTION(curl, CURLOPT_SSLKEY, *secretKeyFilePtr->c_str()); - SET_CURL_SECURITY_OPTION(curl.get(), CURLOPT_SSLCERTTYPE, "PEM"); - SET_CURL_SECURITY_OPTION(curl.get(), CURLOPT_SSLCERT, + SET_CURL_SECURITY_OPTION(curl, CURLOPT_SSLCERTTYPE, "PEM"); + SET_CURL_SECURITY_OPTION(curl, CURLOPT_SSLCERT, *accessKeyFilePtr->c_str()); } } @@ -399,98 +426,103 @@ bool HTTPRequest::sendPreparedRequest(const std::string &protocol, } } } - { - const auto iter = headers.find("User-Agent"); - if (iter == headers.end()) { - headers["User-Agent"] = "xrootd-http/devel"; - } - } std::string headerPair; - struct curl_slist *header_slist = NULL; + m_header_list.reset(); for (auto i = headers.begin(); i != headers.end(); ++i) { formatstr(headerPair, "%s: %s", i->first.c_str(), i->second.c_str()); - header_slist = curl_slist_append(header_slist, headerPair.c_str()); - if (header_slist == NULL) { + auto tmp_headers = + curl_slist_append(m_header_list.get(), headerPair.c_str()); + if (tmp_headers == nullptr) { this->errorCode = "E_CURL_LIB"; this->errorMessage = "curl_slist_append() failed."; return false; } + m_header_list.release(); + m_header_list.reset(tmp_headers); } - rv = curl_easy_setopt(curl.get(), CURLOPT_HTTPHEADER, header_slist); + rv = curl_easy_setopt(curl, CURLOPT_HTTPHEADER, m_header_list.get()); if (rv != CURLE_OK) { this->errorCode = "E_CURL_LIB"; this->errorMessage = "curl_easy_setopt( CURLOPT_HTTPHEADER ) failed."; - if (header_slist) { - curl_slist_free_all(header_slist); - } return false; } if (m_log.getMsgMask() & LogMask::Dump) { - rv = curl_easy_setopt(curl.get(), CURLOPT_DEBUGFUNCTION, debugCallback); - rv = curl_easy_setopt(curl.get(), CURLOPT_VERBOSE, 1L); + rv = curl_easy_setopt(curl, CURLOPT_DEBUGFUNCTION, debugCallback); + rv = curl_easy_setopt(curl, CURLOPT_VERBOSE, 1L); } -retry: - rv = curl_easy_perform(curl.get()); + return true; +} - if (rv != 0) { +bool HTTPRequest::Fail(const std::string &ecode, const std::string &emsg) { + errorCode = ecode; + errorMessage = emsg; + + Notify(); + return true; +} - this->errorCode = "E_CURL_IO"; +void HTTPRequest::Notify() { + std::lock_guard lk(m_mtx); + m_result_ready = true; + m_cv.notify_one(); +} + +HTTPRequest::CurlResult HTTPRequest::ProcessCurlResult(CURL *curl, + CURLcode rv) { + + auto cleaner = [&](void *) { Notify(); }; + auto unique = std::unique_ptr((void *)1, cleaner); + + if (rv != 0) { + errorCode = "E_CURL_IO"; std::ostringstream error; - error << "curl_easy_perform() failed (" << rv << "): '" - << curl_easy_strerror(rv) << "'."; - this->errorMessage = error.str(); - if (header_slist) { - curl_slist_free_all(header_slist); - } + error << "curl failed (" << rv << "): '" << curl_easy_strerror(rv) + << "'."; + errorMessage = error.str(); - return false; + return CurlResult::Fail; } responseCode = 0; - rv = curl_easy_getinfo(curl.get(), CURLINFO_RESPONSE_CODE, &responseCode); + rv = curl_easy_getinfo(curl, CURLINFO_RESPONSE_CODE, &responseCode); if (rv != CURLE_OK) { // So we contacted the server but it returned such gibberish that // CURL couldn't identify the response code. Let's assume that's // bad news. Since we're already terminally failing the request, // don't bother to check if this was our last chance at retrying. - this->errorCode = "E_CURL_LIB"; - this->errorMessage = "curl_easy_getinfo() failed."; - if (header_slist) { - curl_slist_free_all(header_slist); - } + errorCode = "E_CURL_LIB"; + errorMessage = "curl_easy_getinfo() failed."; - return false; + return CurlResult::Fail; } if (responseCode == 503 && - (resultString.find("RequestLimitExceeded") != - std::string::npos)) { - resultString.clear(); - goto retry; - } - - if (header_slist) { - curl_slist_free_all(header_slist); + (m_result.find("RequestLimitExceeded") != + std::string::npos) && + m_retry_count == 0) { + m_result.clear(); + m_retry_count++; + return CurlResult::Retry; } - if (responseCode != this->expectedResponseCode) { - formatstr(this->errorCode, + if (responseCode != expectedResponseCode) { + formatstr(errorCode, "E_HTTP_RESPONSE_NOT_EXPECTED (response %lu != expected %lu)", - responseCode, this->expectedResponseCode); - this->errorMessage = resultString; - if (this->errorMessage.empty()) { + responseCode, expectedResponseCode); + errorMessage = m_result; + if (errorMessage.empty()) { formatstr( - this->errorMessage, + errorMessage, "HTTP response was %lu, not %lu, and no body was returned.", - responseCode, this->expectedResponseCode); + responseCode, expectedResponseCode); } - return false; + return CurlResult::Fail; } - return true; + return CurlResult::Ok; } // --------------------------------------------------------------------------- @@ -510,7 +542,16 @@ bool HTTPUpload::SendRequest(const std::string &payload, off_t offset, return SendHTTPRequest(payload); } -void HTTPRequest::init() { +void HTTPRequest::Init(XrdSysError &log) { + if (!m_workers_initialized) { + for (unsigned idx = 0; idx < CurlWorker::GetPollThreads(); idx++) { + m_workers.push_back(new CurlWorker(m_queue, log)); + std::thread t(CurlWorker::RunStatic, m_workers.back()); + t.detach(); + } + m_workers_initialized = true; + } + CURLcode rv = curl_global_init(CURL_GLOBAL_ALL); if (rv != 0) { throw std::runtime_error("libcurl failed to initialize"); diff --git a/src/HTTPCommands.hh b/src/HTTPCommands.hh index cd8bb42..520c8fe 100644 --- a/src/HTTPCommands.hh +++ b/src/HTTPCommands.hh @@ -20,21 +20,31 @@ #include "TokenFile.hh" +#include #include #include +#include #include +#include + +#include class XrdSysError; +class HandlerQueue; +class CurlWorker; class HTTPRequest { + friend class CurlWorker; + public: HTTPRequest(const std::string &hostUrl, XrdSysError &log, const TokenFile *token) - : hostUrl(hostUrl), m_log(log), m_token(token) { + : hostUrl(hostUrl), m_header_list(nullptr, &curl_slist_free_all), + m_log(log), m_token(token) { // Parse the URL and populate // What to do if the function returns false? // TODO: Figure out best way to deal with this - if (!parseProtocol(hostUrl, protocol)) { + if (!parseProtocol(hostUrl, m_protocol)) { errorCode = "E_INVALID_HOST_URL"; errorMessage = "Failed to parse protocol from host/service URL."; } @@ -51,7 +61,7 @@ class HTTPRequest { unsigned long getResponseCode() const { return responseCode; } const std::string &getErrorCode() const { return errorCode; } const std::string &getErrorMessage() const { return errorMessage; } - const std::string &getResultString() const { return resultString; } + const std::string &getResultString() const { return m_result; } // Currently only used in PUTS, but potentially useful elsewhere struct Payload { @@ -63,19 +73,19 @@ class HTTPRequest { // // Should be called at least once per application from a non-threaded // context. - static void init(); + static void Init(XrdSysError &); protected: - bool sendPreparedRequest(const std::string &protocol, - const std::string &uri, + bool sendPreparedRequest(const std::string &uri, const std::string &payload); + const std::string &getProtocol() { return m_protocol; } + typedef std::map AttributeValueMap; AttributeValueMap query_parameters; AttributeValueMap headers; std::string hostUrl; - std::string protocol; bool requiresSignature{false}; struct timespec signatureTime; @@ -83,18 +93,57 @@ class HTTPRequest { std::string errorMessage; std::string errorCode; - std::string resultString; + std::string m_result; unsigned long responseCode{0}; unsigned long expectedResponseCode = 200; bool includeResponseHeader{false}; std::string httpVerb{"POST"}; - std::unique_ptr callback_payload; + std::unique_ptr m_callback_payload; + + std::unique_ptr + m_header_list; // Headers associated with the request XrdSysError &m_log; private: - const TokenFile *m_token; + enum class CurlResult { Ok, Fail, Retry }; + + void Notify(); // Notify the main request thread the request has been + // processed by a worker + virtual bool SetupHandle( + CURL *curl); // Configure the curl handle to be used by a given request. + CurlResult ProcessCurlResult( + CURL *curl, + CURLcode rv); // Process a curl command that ran to completion. + bool + Fail(const std::string &ecode, + const std::string &emsg); // Record a failure occurring for the request + // (curl request did not complete) + bool ReleaseHandle( + CURL *curl); // Cleanup any resources associated with the curl handle + + const TokenFile *m_token{nullptr}; + + // The following members manage the work queue and workers. + static bool + m_workers_initialized; // The global state of the worker initialization. + static std::shared_ptr + m_queue; // Global queue for all HTTP requests to be processed. + static std::vector + m_workers; // Set of all the curl worker threads. + + // The following variables manage the state of the request. + std::mutex + m_mtx; // Mutex guarding the results from the curl worker's callback + std::condition_variable m_cv; // Condition variable to notify the curl + // worker completed the callback + bool m_result_ready{false}; // Flag indicating the results data is ready. + std::string m_protocol; + std::string m_uri; // URL to request from libcurl + std::string m_payload; + char m_errorBuffer[CURL_ERROR_SIZE]; // Static error buffer for libcurl + unsigned m_retry_count{0}; }; class HTTPUpload : public HTTPRequest { diff --git a/src/HTTPFile.cc b/src/HTTPFile.cc index e15e9c7..3bc6eea 100644 --- a/src/HTTPFile.cc +++ b/src/HTTPFile.cc @@ -274,7 +274,7 @@ extern "C" { XrdOss *XrdOssAddStorageSystem2(XrdOss *curr_oss, XrdSysLogger *Logger, const char *config_fn, const char *parms, XrdOucEnv *envP) { - XrdSysError log(Logger, "s3_"); + XrdSysError log(Logger, "httpserver_"); log.Emsg("Initialize", "HTTP filesystem cannot be stacked with other filesystems"); @@ -288,16 +288,16 @@ XrdOss *XrdOssAddStorageSystem2(XrdOss *curr_oss, XrdSysLogger *Logger, XrdOss *XrdOssGetStorageSystem2(XrdOss *native_oss, XrdSysLogger *Logger, const char *config_fn, const char *parms, XrdOucEnv *envP) { - XrdSysError log(Logger, "httpserver_"); + auto log = new XrdSysError(Logger, "httpserver_"); envP->Export("XRDXROOTD_NOPOSC", "1"); try { - HTTPRequest::init(); + HTTPRequest::Init(*log); g_http_oss = new HTTPFileSystem(Logger, config_fn, envP); return g_http_oss; } catch (std::runtime_error &re) { - log.Emsg("Initialize", "Encountered a runtime failure", re.what()); + log->Emsg("Initialize", "Encountered a runtime failure", re.what()); return nullptr; } } diff --git a/src/S3Commands.cc b/src/S3Commands.cc index 7cdcb24..587994b 100644 --- a/src/S3Commands.cc +++ b/src/S3Commands.cc @@ -410,7 +410,7 @@ bool AmazonRequest::createV4Signature(const std::string &payload, bool AmazonRequest::sendV4Request(const std::string &payload, bool sendContentSHA) { - if ((protocol != "http") && (protocol != "https")) { + if ((getProtocol() != "http") && (getProtocol() != "https")) { this->errorCode = "E_INVALID_SERVICE_URL"; this->errorMessage = "Service URL not of a known protocol (http[s])."; return false; @@ -438,7 +438,7 @@ bool AmazonRequest::sendV4Request(const std::string &payload, if (!canonicalQueryString.empty()) { url += "?" + canonicalQueryString; } - return sendPreparedRequest(protocol, url, payload); + return sendPreparedRequest(url, payload); } // It's stated in the API documentation that you can upload to any region @@ -563,7 +563,7 @@ bool AmazonS3List::SendRequest(const std::string &continuationToken) { httpVerb = "GET"; // Operation is on the bucket itself; alter the URL to remove the object - hostUrl = protocol + "://" + host + bucketPath; + hostUrl = getProtocol() + "://" + host + bucketPath; return SendS3Request(""); } @@ -571,7 +571,7 @@ bool AmazonS3List::SendRequest(const std::string &continuationToken) { bool AmazonS3CreateMultipartUpload::Results(std::string &uploadId, std::string &errMsg) { tinyxml2::XMLDocument doc; - auto err = doc.Parse(resultString.c_str()); + auto err = doc.Parse(getResultString().c_str()); if (err != tinyxml2::XML_SUCCESS) { errMsg = doc.ErrorStr(); return false; @@ -614,7 +614,7 @@ bool AmazonS3List::Results(std::vector &objInfo, std::vector &commonPrefixes, std::string &ct, std::string &errMsg) { tinyxml2::XMLDocument doc; - auto err = doc.Parse(resultString.c_str()); + auto err = doc.Parse(m_result.c_str()); if (err != tinyxml2::XML_SUCCESS) { errMsg = doc.ErrorStr(); return false; diff --git a/src/S3Commands.hh b/src/S3Commands.hh index 71e1a5c..56a5a29 100644 --- a/src/S3Commands.hh +++ b/src/S3Commands.hh @@ -64,7 +64,7 @@ class AmazonRequest : public HTTPRequest { // requests, and // --> "https://my-url.com:443/my-bucket/my-object" for path style // requests. - hostUrl = protocol + "://" + host + canonicalURI; + hostUrl = getProtocol() + "://" + host + canonicalURI; // If we can, set the region based on the host. size_t secondDot = host.find(".", 2 + 1); @@ -83,7 +83,7 @@ class AmazonRequest : public HTTPRequest { virtual bool SendRequest(); virtual bool SendS3Request(const std::string &payload); - static void init() { HTTPRequest::init(); } + static void Init(XrdSysError &log) { HTTPRequest::Init(log); } protected: bool sendV4Request(const std::string &payload, bool sendContentSHA = false); diff --git a/src/S3File.cc b/src/S3File.cc index b92318c..d78828e 100644 --- a/src/S3File.cc +++ b/src/S3File.cc @@ -17,6 +17,7 @@ ***************************************************************/ #include "S3File.hh" +#include "CurlWorker.hh" #include "S3Commands.hh" #include "S3FileSystem.hh" #include "logging.hh" @@ -51,10 +52,10 @@ S3File::S3File(XrdSysError &log, S3FileSystem *oss) write_buffer(""), partNumber(1) {} int S3File::Open(const char *path, int Oflag, mode_t Mode, XrdOucEnv &env) { - if (Oflag && O_CREAT) { + if (Oflag & O_CREAT) { m_log.Log(LogMask::Info, "File opened for creation: ", path); } - if (Oflag && O_APPEND) { + if (Oflag & O_APPEND) { m_log.Log(LogMask::Info, "File opened for append: ", path); } @@ -294,16 +295,16 @@ XrdOss *XrdOssAddStorageSystem2(XrdOss *curr_oss, XrdSysLogger *Logger, XrdOss *XrdOssGetStorageSystem2(XrdOss *native_oss, XrdSysLogger *Logger, const char *config_fn, const char *parms, XrdOucEnv *envP) { - XrdSysError log(Logger, "s3_"); + auto log = new XrdSysError(Logger, "s3_"); envP->Export("XRDXROOTD_NOPOSC", "1"); try { - AmazonRequest::init(); + AmazonRequest::Init(*log); g_s3_oss = new S3FileSystem(Logger, config_fn, envP); return g_s3_oss; } catch (std::runtime_error &re) { - log.Emsg("Initialize", "Encountered a runtime failure", re.what()); + log->Emsg("Initialize", "Encountered a runtime failure", re.what()); return nullptr; } } diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 0c68eb2..24e26c8 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -1,5 +1,6 @@ add_executable( s3-gtest s3_tests.cc ../src/AWSv4-impl.cc + ../src/CurlUtil.cc ../src/logging.cc ../src/S3AccessInfo.cc ../src/S3Directory.cc @@ -13,6 +14,7 @@ add_executable( s3-gtest s3_tests.cc ) add_executable( http-gtest http_tests.cc + ../src/CurlUtil.cc ../src/HTTPFile.cc ../src/HTTPFileSystem.cc ../src/HTTPCommands.cc diff --git a/test/s3_tests.cc b/test/s3_tests.cc index 4c346fc..099b334 100644 --- a/test/s3_tests.cc +++ b/test/s3_tests.cc @@ -309,6 +309,9 @@ TEST_F(FileSystemS3PathBucketSlash, List) { } int main(int argc, char **argv) { + auto logger = new XrdSysLogger(2, 0); + auto log = new XrdSysError(logger, "curl_"); + AmazonRequest::Init(*log); ::testing::InitGoogleTest(&argc, argv); return RUN_ALL_TESTS(); } From a81fd194a0aabc525a3797349c67e7d6f1558b32 Mon Sep 17 00:00:00 2001 From: Brian Bockelman Date: Mon, 21 Oct 2024 08:57:07 +0200 Subject: [PATCH 02/10] Switch to using xrootd.org debian builds We now depend on XRootD 5.7.0 or later -- that means we'll need upstream builds. --- .github/workflows/test.yml | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index e436560..c7f87d5 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -19,8 +19,8 @@ jobs: build: strategy: matrix: - external-gtest: [ YES, NO ] - os: [ ubuntu-latest, ubuntu-22.04 ] + external-gtest: [ YES ] + os: [ ubuntu-24.04 ] runs-on: ${{ matrix.os }} name: Build with external_gtest=${{ matrix.external-gtest }} on ${{ matrix.os }} @@ -31,6 +31,8 @@ jobs: submodules: recursive - name: install deps run: | + sudo curl -L https://xrootd.web.cern.ch/repo/RPM-GPG-KEY.txt -o /etc/apt/trusted.gpg.d/xrootd.asc + sudo /bin/sh -c 'echo "deb https://xrootd.web.cern.ch/ubuntu noble stable" >> /etc/apt/sources.list.d/xrootd.list' sudo apt update && sudo apt-get install -y cmake libcurl4-openssl-dev libcurl4 pkg-config libssl-dev xrootd-server libxrootd-dev libxrootd-server-dev libgtest-dev - name: Create Build Environment From eac4e674a35ca6ea765248ab3a15f50737a7bdf6 Mon Sep 17 00:00:00 2001 From: Brian Bockelman Date: Mon, 21 Oct 2024 08:24:45 +0200 Subject: [PATCH 03/10] Switch to modern config file parsing framework Start using the new XrdOucGatherConf-style syntax to process S3/HTTP server configurations. Used while otherwise debugging the branch. --- src/HTTPFileSystem.cc | 40 +++++++++++++++------------------------- src/S3FileSystem.cc | 37 ++++++++++--------------------------- src/logging.cc | 4 ++-- src/logging.hh | 4 ++-- 4 files changed, 29 insertions(+), 56 deletions(-) diff --git a/src/HTTPFileSystem.cc b/src/HTTPFileSystem.cc index 186147b..7a5cbad 100644 --- a/src/HTTPFileSystem.cc +++ b/src/HTTPFileSystem.cc @@ -22,7 +22,7 @@ #include "logging.hh" #include -#include +#include #include #include @@ -75,33 +75,32 @@ bool HTTPFileSystem::handle_required_config(const std::string &name_from_config, bool HTTPFileSystem::Config(XrdSysLogger *lp, const char *configfn) { XrdOucEnv myEnv; - XrdOucStream Config(&m_log, getenv("XRDINSTANCE"), &myEnv, "=====> "); - - int cfgFD = open(configfn, O_RDONLY, 0); - if (cfgFD < 0) { - m_log.Emsg("Config", errno, "open config file", configfn); + XrdOucGatherConf httpserver_conf("httpserver.", &m_log); + int result; + if ((result = httpserver_conf.Gather(configfn, + XrdOucGatherConf::full_lines)) < 0) { + m_log.Emsg("Config", -result, "parsing config file", configfn); return false; } - char *temporary; - std::string value; std::string attribute; std::string token_file; - Config.Attach(cfgFD); - while ((temporary = Config.GetMyFirstWord())) { - attribute = temporary; - if (attribute == "httpserver.trace") { - if (!XrdHTTPServer::ConfigLog(Config, m_log)) { + + m_log.setMsgMask(0); + + while (httpserver_conf.GetLine()) { + auto attribute = httpserver_conf.GetToken(); + if (!strcmp(attribute, "httpserver.trace")) { + if (!XrdHTTPServer::ConfigLog(httpserver_conf, m_log)) { m_log.Emsg("Config", "Failed to configure the log level"); } continue; } - temporary = Config.GetWord(); - if (!temporary) { + auto value = httpserver_conf.GetToken(); + if (!value) { continue; } - value = temporary; if (!handle_required_config(attribute, "httpserver.host_name", value, http_host_name) || @@ -113,7 +112,6 @@ bool HTTPFileSystem::Config(XrdSysLogger *lp, const char *configfn) { value, m_storage_prefix) || !handle_required_config(attribute, "httpserver.token_file", value, token_file)) { - Config.Close(); return false; } } @@ -135,14 +133,6 @@ bool HTTPFileSystem::Config(XrdSysLogger *lp, const char *configfn) { m_token = std::move(TokenFile(token_file, &m_log)); } - int retc = Config.LastError(); - if (retc) { - m_log.Emsg("Config", -retc, "read config file", configfn); - Config.Close(); - return false; - } - - Config.Close(); return true; } diff --git a/src/S3FileSystem.cc b/src/S3FileSystem.cc index 72fb6fe..01cd042 100644 --- a/src/S3FileSystem.cc +++ b/src/S3FileSystem.cc @@ -25,7 +25,7 @@ #include "stl_string_utils.hh" #include -#include +#include #include #include @@ -64,31 +64,30 @@ bool S3FileSystem::handle_required_config(const char *desired_name, bool S3FileSystem::Config(XrdSysLogger *lp, const char *configfn) { XrdOucEnv myEnv; - XrdOucStream Config(&m_log, getenv("XRDINSTANCE"), &myEnv, "=====> "); - - int cfgFD = open(configfn, O_RDONLY, 0); - if (cfgFD < 0) { - m_log.Emsg("Config", errno, "open config file", configfn); + XrdOucGatherConf s3server_conf("s3.", &m_log); + int result; + if ((result = s3server_conf.Gather(configfn, + XrdOucGatherConf::full_lines)) < 0) { + m_log.Emsg("Config", -result, "parsing config file", configfn); return false; } char *temporary; std::string value; std::string attribute; - Config.Attach(cfgFD); std::shared_ptr newAccessInfo(new S3AccessInfo()); std::string exposedPath; m_log.setMsgMask(0); - while ((temporary = Config.GetMyFirstWord())) { - attribute = temporary; + while ((temporary = s3server_conf.GetLine())) { + attribute = s3server_conf.GetToken(); if (attribute == "s3.trace") { - if (!XrdHTTPServer::ConfigLog(Config, m_log)) { + if (!XrdHTTPServer::ConfigLog(s3server_conf, m_log)) { m_log.Emsg("Config", "Failed to configure the log level"); } continue; } - temporary = Config.GetWord(); + temporary = s3server_conf.GetToken(); if (attribute == "s3.end") { m_s3_access_map[exposedPath] = newAccessInfo; if (newAccessInfo->getS3ServiceName().empty()) { @@ -124,35 +123,27 @@ bool S3FileSystem::Config(XrdSysLogger *lp, const char *configfn) { value = temporary; if (!handle_required_config("s3.path_name", value)) { - Config.Close(); return false; } if (!handle_required_config("s3.bucket_name", value)) { - Config.Close(); return false; } if (!handle_required_config("s3.service_name", value)) { - Config.Close(); return false; } if (!handle_required_config("s3.region", value)) { - Config.Close(); return false; } if (!handle_required_config("s3.service_url", value)) { - Config.Close(); return false; } if (!handle_required_config("s3.access_key_file", value)) { - Config.Close(); return false; } if (!handle_required_config("s3.secret_key_file", value)) { - Config.Close(); return false; } if (!handle_required_config("s3.url_style", value)) { - Config.Close(); return false; } @@ -195,14 +186,6 @@ bool S3FileSystem::Config(XrdSysLogger *lp, const char *configfn) { return false; } - int retc = Config.LastError(); - if (retc) { - m_log.Emsg("Config", -retc, "read config file", configfn); - Config.Close(); - return false; - } - - Config.Close(); return true; } diff --git a/src/logging.cc b/src/logging.cc index b8307f6..db1492b 100644 --- a/src/logging.cc +++ b/src/logging.cc @@ -18,7 +18,7 @@ #include "logging.hh" -#include +#include #include #include @@ -55,7 +55,7 @@ std::string XrdHTTPServer::LogMaskToString(int mask) { return ss.str(); } -bool XrdHTTPServer::ConfigLog(XrdOucStream &conf, XrdSysError &log) { +bool XrdHTTPServer::ConfigLog(XrdOucGatherConf &conf, XrdSysError &log) { std::string map_filename; char *val = nullptr; if (!(val = conf.GetToken())) { diff --git a/src/logging.hh b/src/logging.hh index 349798e..0384ce2 100644 --- a/src/logging.hh +++ b/src/logging.hh @@ -20,7 +20,7 @@ #include -class XrdOucStream; +class XrdOucGatherConf; class XrdSysError; namespace XrdHTTPServer { @@ -40,6 +40,6 @@ std::string LogMaskToString(int mask); // Given an xrootd configuration object that matched on httpserver.trace, parse // the remainder of the line and configure the logger appropriately. -bool ConfigLog(XrdOucStream &conf, XrdSysError &log); +bool ConfigLog(XrdOucGatherConf &conf, XrdSysError &log); } // namespace XrdHTTPServer From 4fc85cff1e18641376486fb3bdf56c62f8a034f3 Mon Sep 17 00:00:00 2001 From: Brian Bockelman Date: Mon, 21 Oct 2024 08:30:15 +0200 Subject: [PATCH 04/10] Handle the case where no token is provided -- not an error. --- src/HTTPCommands.cc | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/HTTPCommands.cc b/src/HTTPCommands.cc index d29f46c..ab18452 100644 --- a/src/HTTPCommands.cc +++ b/src/HTTPCommands.cc @@ -418,11 +418,14 @@ bool HTTPRequest::SetupHandle(CURL *curl) { const auto iter = headers.find("Authorization"); if (iter == headers.end()) { std::string token; - if (m_token->Get(token) && !token.empty()) { - headers["Authorization"] = "Bearer " + token; + if (m_token->Get(token)) { + if (!token.empty()) { + headers["Authorization"] = "Bearer " + token; + } } else { errorCode = "E_TOKEN"; errorMessage = "failed to load authorization token from file"; + return false; } } } From f59f8c5b9ac7d9b4749eed501b5b7a631f1a3bb9 Mon Sep 17 00:00:00 2001 From: Brian Bockelman Date: Mon, 21 Oct 2024 08:34:37 +0200 Subject: [PATCH 05/10] Add test fixtures and integration test for HTTP This adds a new test fixture that launches an xrootd daemon for use during the HTTP unit tests. --- test/CMakeLists.txt | 41 +++++++- test/http_tests.cc | 76 +++++++++++++- test/xrdhttp-setup.sh | 215 +++++++++++++++++++++++++++++++++++++++ test/xrdhttp-teardown.sh | 28 +++++ test/xrdhttp-test.sh | 41 ++++++++ 5 files changed, 399 insertions(+), 2 deletions(-) create mode 100755 test/xrdhttp-setup.sh create mode 100755 test/xrdhttp-teardown.sh create mode 100755 test/xrdhttp-test.sh diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 24e26c8..5c66143 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -49,5 +49,44 @@ add_test( NAME http-unit COMMAND - ${CMAKE_CURRENT_BINARY_DIR}/http-gtest + ${CMAKE_CURRENT_BINARY_DIR}/http-gtest "${CMAKE_BINARY_DIR}/tests/basic/setup.sh" +) + +set_tests_properties(http-unit + PROPERTIES + FIXTURES_REQUIRED HTTP::basic +) + +###################################### +# Integration tests. +###################################### +add_test(NAME HTTP::basic::setup + COMMAND "${CMAKE_CURRENT_SOURCE_DIR}/xrdhttp-setup.sh" basic) + +set_tests_properties(HTTP::basic::setup + PROPERTIES + FIXTURES_SETUP HTTP::basic + ENVIRONMENT "BINARY_DIR=${CMAKE_BINARY_DIR};SOURCE_DIR=${CMAKE_SOURCE_DIR}" +) + +add_test(NAME HTTP::basic::teardown + COMMAND "${CMAKE_CURRENT_SOURCE_DIR}/xrdhttp-teardown.sh" basic) + +set_tests_properties(HTTP::basic::teardown + PROPERTIES + FIXTURES_CLEANUP HTTP::basic + ENVIRONMENT "BINARY_DIR=${CMAKE_BINARY_DIR}" +) + +add_test(NAME HTTP::basic::test + COMMAND "${CMAKE_CURRENT_SOURCE_DIR}/xrdhttp-test.sh" basic) + +list(APPEND BASIC_TEST_LOGS ${CMAKE_CURRENT_BINARY_DIR}/tests/basic/server.log) +list(APPEND BASIC_TEST_LOGS ${CMAKE_CURRENT_BINARY_DIR}/tests/basic/client.log) + +set_tests_properties(HTTP::basic::test + PROPERTIES + FIXTURES_REQUIRED HTTP::basic + ENVIRONMENT "BINARY_DIR=${CMAKE_BINARY_DIR}" + ATTACHED_FILES_ON_FAIL "${BASIC_TEST_LOGS}" ) diff --git a/test/http_tests.cc b/test/http_tests.cc index e665592..4429f53 100644 --- a/test/http_tests.cc +++ b/test/http_tests.cc @@ -17,15 +17,75 @@ ***************************************************************/ #include "../src/HTTPCommands.hh" +#include "../src/HTTPFileSystem.hh" +#include #include #include #include +#include +#include +#include +#include + +std::string g_ca_file; +std::string g_config_file; +std::string g_url; + +void parseEnvFile(const std::string &fname) { + std::ifstream fh(fname); + if (!fh.is_open()) { + std::cerr << "Failed to open env file: " << strerror(errno); + exit(1); + } + std::string line; + while (std::getline(fh, line)) { + auto idx = line.find("="); + if (idx == std::string::npos) { + continue; + } + auto key = line.substr(0, idx); + auto val = line.substr(idx + 1); + if (key == "X509_CA_FILE") { + g_ca_file = val; + setenv("X509_CERT_FILE", g_ca_file.c_str(), 1); + } else if (key == "XROOTD_URL") { + g_url = val; + } else if (key == "XROOTD_CFG") { + g_config_file = val; + } + } +} + +TEST(TestHTTPFile, TestXfer) { + XrdSysLogger log; + + HTTPFileSystem fs(&log, g_config_file.c_str(), nullptr); + + struct stat si; + auto rc = fs.Stat("/hello_world.txt", &si); + ASSERT_EQ(rc, 0); + ASSERT_EQ(si.st_size, 13); + + auto fh = fs.newFile(); + XrdOucEnv env; + rc = fh->Open("/hello_world.txt", O_RDONLY, 0700, env); + ASSERT_EQ(rc, 0); + + char buf[12]; + auto res = fh->Read(buf, 0, 12); + ASSERT_EQ(res, 12); + + ASSERT_EQ(memcmp(buf, "Hello, World", 12), 0); + + ASSERT_EQ(fh->Close(), 0); +} + class TestHTTPRequest : public HTTPRequest { public: XrdSysLogger log{}; - XrdSysError err{&log, "TestS3CommandsLog"}; + XrdSysError err{&log, "TestHTTPR3equest"}; TestHTTPRequest(const std::string &url) : HTTPRequest(url, err, nullptr) {} }; @@ -46,5 +106,19 @@ TEST(TestHTTPParseProtocol, Test1) { int main(int argc, char **argv) { ::testing::InitGoogleTest(&argc, argv); + + if (argc != 2) { + printf("Usage: %s test_env_file", argv[0]); + return 1; + } + setenv("XRDINSTANCE", "xrootd", 1); + std::cout << "Running HTTP test with environment file " << argv[1] + << std::endl; + parseEnvFile(argv[1]); + + auto logger = new XrdSysLogger(2, 0); + auto log = new XrdSysError(logger, "curl_"); + HTTPRequest::Init(*log); + return RUN_ALL_TESTS(); } diff --git a/test/xrdhttp-setup.sh b/test/xrdhttp-setup.sh new file mode 100755 index 0000000..62861f0 --- /dev/null +++ b/test/xrdhttp-setup.sh @@ -0,0 +1,215 @@ +#!/bin/sh + +TEST_NAME=$1 + +if [ -z "$BINARY_DIR" ]; then + echo "\$BINARY_DIR environment variable is not set; cannot run test" + exit 1 +fi +if [ ! -d "$BINARY_DIR" ]; then + echo "$BINARY_DIR is not a directory; cannot run test" + exit 1 +fi +if [ -z "$SOURCE_DIR" ]; then + echo "\$SOURCE_DIR environment variable is not set; cannot run test" + exit 1 +fi +if [ ! -d "$SOURCE_DIR" ]; then + echo "\$SOURCE_DIR environment variable is not set; cannot run test" + exit 1 +fi + +echo "Setting up HTTP server for $TEST_NAME test" + +XROOTD_BIN="$(command -v xrootd)" + +if [ -z "XROOTD_BIN" ]; then + echo "xrootd binary not found; cannot run unit test" + exit 1 +fi + +mkdir -p "$BINARY_DIR/tests/$TEST_NAME" +RUNDIR=$(mktemp -d -p "$BINARY_DIR/tests/$TEST_NAME" test_run.XXXXXXXX) + +if [ ! -d "$RUNDIR" ]; then + echo "Failed to create test run directory; cannot run xrootd" + exit 1 +fi + +echo "Using $RUNDIR as the test run's home directory." +cd "$RUNDIR" + +export XROOTD_CONFIGDIR="$RUNDIR/xrootd-config" +mkdir -p "$XROOTD_CONFIGDIR/ca" + +echo > "$BINARY_DIR/tests/$TEST_NAME/server.log" + +# Create the TLS credentials for the test +openssl genrsa -out "$XROOTD_CONFIGDIR/tlscakey.pem" 4096 >> "$BINARY_DIR/tests/$TEST_NAME/server.log" +touch "$XROOTD_CONFIGDIR/ca/index.txt" +echo '01' > "$XROOTD_CONFIGDIR/ca/serial.txt" + +cat > "$XROOTD_CONFIGDIR/tlsca.ini" <> "$BINARY_DIR/tests/$TEST_NAME/server.log" +if [ "$?" -ne 0 ]; then + echo "Failed to generate CA request" + exit 1 +fi + +# Create the host certificate request +openssl genrsa -out "$XROOTD_CONFIGDIR/tls.key" 4096 >> "$BINARY_DIR/tests/$TEST_NAME/server.log" +openssl req -new -key "$XROOTD_CONFIGDIR/tls.key" -config "$XROOTD_CONFIGDIR/tlsca.ini" -out "$XROOTD_CONFIGDIR/tls.csr" -outform PEM -subj "/CN=$(hostname)" 0<&- >> "$BINARY_DIR/tests/$TEST_NAME/server.log" +if [ "$?" -ne 0 ]; then + echo "Failed to generate host certificate request" + exit 1 +fi + +openssl ca -config "$XROOTD_CONFIGDIR/tlsca.ini" -batch -policy signing_policy -extensions cert_extensions -out "$XROOTD_CONFIGDIR/tls.crt" -infiles "$XROOTD_CONFIGDIR/tls.csr" 0<&- 2>> "$BINARY_DIR/tests/$TEST_NAME/server.log" +if [ "$?" -ne 0 ]; then + echo "Failed to sign host certificate request" + exit 1 +fi + + +# Create xrootd configuration and runtime directory structure +XROOTD_EXPORTDIR="$RUNDIR/xrootd-export" +mkdir -p "$XROOTD_EXPORTDIR" + +# XRootD has strict length limits on the admin path location. +# Therefore, we also create a directory in /tmp. +XROOTD_RUNDIR=$(mktemp -d -p /tmp xrootd_test.XXXXXXXX) + +export XROOTD_CONFIG="$XROOTD_CONFIGDIR/xrootd.cfg" +cat > "$XROOTD_CONFIG" < $XROOTD_CONFIGDIR/authdb < "$XROOTD_EXPORTDIR/hello_world.txt" + +# Launch XRootD daemon. +"$XROOTD_BIN" -c "$XROOTD_CONFIG" -l "$BINARY_DIR/tests/$TEST_NAME/server.log" 0<&- >>"$BINARY_DIR/tests/$TEST_NAME/server.log" 2>>"$BINARY_DIR/tests/$TEST_NAME/server.log" & +XROOTD_PID=$! +echo "xrootd daemon PID: $XROOTD_PID" + +echo "XRootD logs are available at $BINARY_DIR/tests/$TEST_NAME/server.log" + +# Build environment file for remainder of tests +XROOTD_URL=$(grep "Xrd_ProtLoad: enabling port" "$BINARY_DIR/tests/$TEST_NAME/server.log" | grep 'for protocol XrdHttp' | awk '{print $7}') +IDX=0 +while [ -z "$XROOTD_URL" ]; do + sleep 1 + XROOTD_URL=$(grep "Xrd_ProtLoad: enabling port" "$BINARY_DIR/tests/$TEST_NAME/server.log" | grep 'for protocol XrdHttp' | awk '{print $7}') + IDX=$(($IDX+1)) + if [ $IDX -gt 1 ]; then + echo "Waiting for xrootd to start ($IDX seconds so far) ..." + fi + if [ $IDX -eq 10 ]; then + echo "xrootd failed to start - failing" + exit 1 + fi +done +XROOTD_URL="https://$(hostname):$XROOTD_URL/" +echo "xrootd started at $XROOTD_URL" + +XROOTD_HTTPSERVER_CONFIG="$XROOTD_CONFIGDIR/xrootd-httpserver.cfg" +cat > "$XROOTD_HTTPSERVER_CONFIG" < "$BINARY_DIR/tests/$TEST_NAME/setup.sh" < "$BINARY_DIR/tests/$TEST_NAME/client.log") +CURL_EXIT=$? + +if [ $CURL_EXIT -ne 0 ]; then + echo "Download of hello-world text failed" + exit 1 +fi + +if [ "$CONTENTS" != "Hello, World" ]; then + echo "Downloaded hello-world text is incorrect: $CONTENTS" + exit 1 +fi + +echo "Running $TEST_NAME - missing object" + +HTTP_CODE=$(curl --cacert $X509_CA_FILE --output /dev/null -v --write-out '%{http_code}' "$XROOTD_URL/missing.txt" 2>> "$BINARY_DIR/tests/$TEST_NAME/client.log") +if [ "$HTTP_CODE" -ne 404 ]; then + echo "Expected HTTP code is 404; actual was $HTTP_CODE" + exit 1 +fi From 9206647bb80e57222ea943a3dd3637c2889d2be2 Mon Sep 17 00:00:00 2001 From: "Nicholas J. Kisseberth" Date: Fri, 18 Oct 2024 14:20:22 -0700 Subject: [PATCH 06/10] Enhancements so that 'debug' can spit out headers without having to also log data --- src/HTTPCommands.cc | 29 +++++++++++++++++++++++++++-- 1 file changed, 27 insertions(+), 2 deletions(-) diff --git a/src/HTTPCommands.cc b/src/HTTPCommands.cc index ab18452..4e8f9d0 100644 --- a/src/HTTPCommands.cc +++ b/src/HTTPCommands.cc @@ -159,6 +159,27 @@ int debugCallback(CURL *handle, curl_infotype ci, char *data, size_t size, (void)handle; /* prevent compiler warning */ (void)clientp; + switch (ci) { + case CURLINFO_TEXT: + fputs("== Info: ", stderr); + fwrite(data, size, 1, stderr); + default: /* in case a new one is introduced to shock us */ + return 0; + + case CURLINFO_HEADER_OUT: + text = "=> Send header"; + dump_plain(text, stderr, (unsigned char *)data, size); + break; + } + return 0; +} + +int debugAndDumpCallback(CURL *handle, curl_infotype ci, char *data, + size_t size, void *clientp) { + const char *text; + (void)handle; /* prevent compiler warning */ + (void)clientp; + switch (ci) { case CURLINFO_TEXT: fputs("== Info: ", stderr); @@ -187,7 +208,6 @@ int debugCallback(CURL *handle, curl_infotype ci, char *data, size_t size, break; } dump(text, stderr, (unsigned char *)data, size); - return 0; } @@ -450,10 +470,15 @@ bool HTTPRequest::SetupHandle(CURL *curl) { this->errorMessage = "curl_easy_setopt( CURLOPT_HTTPHEADER ) failed."; return false; } - if (m_log.getMsgMask() & LogMask::Dump) { + if (m_log.getMsgMask() & LogMask::Debug) { rv = curl_easy_setopt(curl, CURLOPT_DEBUGFUNCTION, debugCallback); rv = curl_easy_setopt(curl, CURLOPT_VERBOSE, 1L); } + if (m_log.getMsgMask() & LogMask::Dump) { + rv = + curl_easy_setopt(curl, CURLOPT_DEBUGFUNCTION, debugAndDumpCallback); + rv = curl_easy_setopt(curl, CURLOPT_VERBOSE, 1L); + } return true; } From f43595dcd1e95487a476e35298cab0ea667a58b9 Mon Sep 17 00:00:00 2001 From: Brian Bockelman Date: Mon, 21 Oct 2024 21:06:39 +0200 Subject: [PATCH 07/10] Do not assume buffer from curl is null-terminated Print a fixed size of the buffer, not a string. curl doesn't necessarily null-terminate. --- src/HTTPCommands.cc | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/HTTPCommands.cc b/src/HTTPCommands.cc index 4e8f9d0..de6a6fa 100644 --- a/src/HTTPCommands.cc +++ b/src/HTTPCommands.cc @@ -150,7 +150,8 @@ static void dump_plain(const char *text, FILE *stream, unsigned char *ptr, size_t size) { fprintf(stream, "%s, %10.10ld bytes (0x%8.8lx)\n", text, (long)size, (long)size); - fprintf(stream, "%s\n", ptr); + fwrite(ptr, 1, size, stream); + fputs("\n", stream); } int debugCallback(CURL *handle, curl_infotype ci, char *data, size_t size, From 2a59d212d019b81e55bea43c41ca7fc0e1b6bc6f Mon Sep 17 00:00:00 2001 From: Brian Bockelman Date: Mon, 21 Oct 2024 21:24:36 +0200 Subject: [PATCH 08/10] Add integration test for S3 backends with minio. --- .github/workflows/test.yml | 4 + test/CMakeLists.txt | 34 +++++ test/s3-setup.sh | 277 +++++++++++++++++++++++++++++++++++++ test/s3-teardown.sh | 32 +++++ test/s3-test.sh | 46 ++++++ 5 files changed, 393 insertions(+) create mode 100755 test/s3-setup.sh create mode 100755 test/s3-teardown.sh create mode 100755 test/s3-test.sh diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index c7f87d5..9c91ab8 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -34,6 +34,10 @@ jobs: sudo curl -L https://xrootd.web.cern.ch/repo/RPM-GPG-KEY.txt -o /etc/apt/trusted.gpg.d/xrootd.asc sudo /bin/sh -c 'echo "deb https://xrootd.web.cern.ch/ubuntu noble stable" >> /etc/apt/sources.list.d/xrootd.list' sudo apt update && sudo apt-get install -y cmake libcurl4-openssl-dev libcurl4 pkg-config libssl-dev xrootd-server libxrootd-dev libxrootd-server-dev libgtest-dev + sudo curl -L https://dl.min.io/server/minio/release/linux-amd64/minio -o /usr/local/bin/minio + sudo chmod +x /usr/local/bin/minio + sudo curl -L https://dl.min.io/client/mc/release/linux-amd64/mc -o /usr/local/bin/mc + sudo chmod +x /usr/local/bin/mc - name: Create Build Environment # Some projects don't allow in-source building, so create a separate build directory diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 5c66143..6279637 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -90,3 +90,37 @@ set_tests_properties(HTTP::basic::test ENVIRONMENT "BINARY_DIR=${CMAKE_BINARY_DIR}" ATTACHED_FILES_ON_FAIL "${BASIC_TEST_LOGS}" ) + +#### +# Start of S3 tests +#### +add_test(NAME S3::s3_basic::setup + COMMAND "${CMAKE_CURRENT_SOURCE_DIR}/s3-setup.sh" s3_basic) + +set_tests_properties(S3::s3_basic::setup + PROPERTIES + FIXTURES_SETUP S3::s3_basic + ENVIRONMENT "BINARY_DIR=${CMAKE_BINARY_DIR};SOURCE_DIR=${CMAKE_SOURCE_DIR}" +) + +add_test(NAME S3::s3_basic::teardown + COMMAND "${CMAKE_CURRENT_SOURCE_DIR}/s3-teardown.sh" s3_basic) + +set_tests_properties(S3::s3_basic::teardown + PROPERTIES + FIXTURES_CLEANUP S3::s3_basic + ENVIRONMENT "BINARY_DIR=${CMAKE_BINARY_DIR}" +) + +add_test(NAME S3::s3_basic::test + COMMAND "${CMAKE_CURRENT_SOURCE_DIR}/s3-test.sh" s3_basic) + +list(APPEND S3_BASIC_TEST_LOGS ${CMAKE_CURRENT_BINARY_DIR}/tests/s3_basic/server.log) +list(APPEND S3_BASIC_TEST_LOGS ${CMAKE_CURRENT_BINARY_DIR}/tests/s3_basic/client.log) + +set_tests_properties(S3::s3_basic::test + PROPERTIES + FIXTURES_REQUIRED S3::s3_basic + ENVIRONMENT "BINARY_DIR=${CMAKE_BINARY_DIR}" + ATTACHED_FILES_ON_FAIL "${S3_BASIC_TEST_LOGS}" +) diff --git a/test/s3-setup.sh b/test/s3-setup.sh new file mode 100755 index 0000000..5e274b2 --- /dev/null +++ b/test/s3-setup.sh @@ -0,0 +1,277 @@ +#!/bin/sh + +TEST_NAME=$1 + +if [ -z "$BINARY_DIR" ]; then + echo "\$BINARY_DIR environment variable is not set; cannot run test" + exit 1 +fi +if [ ! -d "$BINARY_DIR" ]; then + echo "$BINARY_DIR is not a directory; cannot run test" + exit 1 +fi +if [ -z "$SOURCE_DIR" ]; then + echo "\$SOURCE_DIR environment variable is not set; cannot run test" + exit 1 +fi +if [ ! -d "$SOURCE_DIR" ]; then + echo "\$SOURCE_DIR environment variable is not set; cannot run test" + exit 1 +fi + +echo "Setting up S3 server for $TEST_NAME test" + +MINIO_BIN="$(command -v minio)" +if [ -z "$MINIO_BIN" ]; then + echo "minio binary not found; cannot run unit test" + exit 1 +fi + +MC_BIN="$(command -v mc)" +if [ -z "$MC_BIN" ]; then + echo "mc binary not found; cannot run unit test" + exit 1 +fi + +XROOTD_BIN="$(command -v xrootd)" +if [ -z "XROOTD_BIN" ]; then + echo "xrootd binary not found; cannot run unit test" + exit 1 +fi + +mkdir -p "$BINARY_DIR/tests/$TEST_NAME" +RUNDIR=$(mktemp -d -p "$BINARY_DIR/tests/$TEST_NAME" test_run.XXXXXXXX) + +if [ ! -d "$RUNDIR" ]; then + echo "Failed to create test run directory; cannot run minio" + exit 1 +fi + +echo "Using $RUNDIR as the test run's home directory." +cd "$RUNDIR" + +MINIO_DATADIR="$RUNDIR/minio-data" +MINIO_CLIENTDIR="$RUNDIR/minio-client" +MINIO_CERTSDIR="$RUNDIR/minio-certs" +XROOTD_CONFIGDIR="$RUNDIR/xrootd-config" +mkdir -p "$XROOTD_CONFIGDIR" +XROOTD_RUNDIR=$(mktemp -d -p /tmp xrootd_test.XXXXXXXX) + +mkdir -p "$MINIO_DATADIR" +mkdir -p "$MINIO_CERTSDIR/ca" +mkdir -p "$MINIO_CERTSDIR/CAs" +mkdir -p "$MINIO_CLIENTDIR" + +echo > "$BINARY_DIR/tests/$TEST_NAME/server.log" + +# Create the TLS credentials for the test +openssl genrsa -out "$MINIO_CERTSDIR/tlscakey.pem" 4096 >> "$BINARY_DIR/tests/$TEST_NAME/server.log" +touch "$MINIO_CERTSDIR/ca/index.txt" +echo '01' > "$MINIO_CERTSDIR/ca/serial.txt" + +cat > "$MINIO_CERTSDIR/tlsca.ini" <> "$BINARY_DIR/tests/$TEST_NAME/server.log" +if [ "$?" -ne 0 ]; then + echo "Failed to generate CA request" + exit 1 +fi + +# Create the host certificate request +openssl genrsa -out "$MINIO_CERTSDIR/private.key" 4096 >> "$BINARY_DIR/tests/$TEST_NAME/server.log" +openssl req -new -key "$MINIO_CERTSDIR/private.key" -config "$MINIO_CERTSDIR/tlsca.ini" -out "$MINIO_CERTSDIR/public.csr" -outform PEM -subj "/CN=$(hostname)" 0<&- >> "$BINARY_DIR/tests/$TEST_NAME/server.log" +if [ "$?" -ne 0 ]; then + echo "Failed to generate host certificate request" + exit 1 +fi + +openssl ca -config "$MINIO_CERTSDIR/tlsca.ini" -batch -policy signing_policy -extensions cert_extensions -out "$MINIO_CERTSDIR/public.crt" -infiles "$MINIO_CERTSDIR/public.csr" 0<&- 2>> "$BINARY_DIR/tests/$TEST_NAME/server.log" +if [ "$?" -ne 0 ]; then + echo "Failed to sign host certificate request" + exit 1 +fi + +# Set the minio root credentials: + +export MINIO_ROOT_USER=minioadmin +export MINIO_ROOT_PASSWORD=QXDEiQxQw8qY +MINIO_USER=miniouser +MINIO_PASSWORD=2Z303QCzRI7s + +# Launch minio +"$MINIO_BIN" --certs-dir "$MINIO_CERTSDIR" server --address "$(hostname):0" "$MINIO_DATADIR" 0<&- >"$BINARY_DIR/tests/$TEST_NAME/server.log" 2>&1 & +MINIO_PID=$! +echo "minio daemon PID: $MINIO_PID" +sleep 1 +MINIO_URL=$(grep "API: " "$BINARY_DIR/tests/$TEST_NAME/server.log" | tr ':' ' ' | awk '{print $NF}' | tail -n 1) +IDX=0 +while [ -z "$MINIO_URL" ]; do + sleep 1 + MINIO_URL=$(grep "API: " "$BINARY_DIR/tests/$TEST_NAME/server.log" | tr ':' ' ' | awk '{print $NF}' | tail -n 1) + IDX=$(($IDX+1)) + if [ $IDX -gt 1 ]; then + echo "Waiting for minio to start ($IDX seconds so far) ..." + fi + if [ $IDX -eq 10 ]; then + echo "minio failed to start - failing" + exit 1 + fi +done +MINIO_URL=https://$(hostname):$MINIO_URL +echo "Minio API server started on $MINIO_URL" + +cat > "$BINARY_DIR/tests/$TEST_NAME/setup.sh" < "$RUNDIR/hello_world.txt" +"$MC_BIN" --insecure --config-dir "$MINIO_CLIENTDIR" cp "$RUNDIR/hello_world.txt" userminio/test-bucket/hello_world.txt + +#### +# Starting XRootD config with S3 backend +#### + +export XROOTD_CONFIG="$XROOTD_CONFIGDIR/xrootd.cfg" +cat > "$XROOTD_CONFIG" < $XROOTD_CONFIGDIR/authdb < $XROOTD_CONFIGDIR/access_key +echo "$MINIO_PASSWORD" > $XROOTD_CONFIGDIR/secret_key + +export X509_CERT_FILE=$MINIO_CERTSDIR/CAs/tlsca.pem +"$XROOTD_BIN" -c "$XROOTD_CONFIG" -l "$BINARY_DIR/tests/$TEST_NAME/server.log" 0<&- 2>/dev/null >/dev/null & +XROOTD_PID=$! +echo "xrootd daemon PID: $XROOTD_PID" + +XROOTD_URL=$(grep "Xrd_ProtLoad: enabling port" "$BINARY_DIR/tests/$TEST_NAME/server.log" | grep 'for protocol XrdHttp' | awk '{print $7}') +IDX=0 +while [ -z "$XROOTD_URL" ]; do + sleep 1 + XROOTD_URL=$(grep "Xrd_ProtLoad: enabling port" "$BINARY_DIR/tests/$TEST_NAME/server.log" | grep 'for protocol XrdHttp' | awk '{print $7}') + IDX=$(($IDX+1)) + if [ $IDX -gt 1 ]; then + echo "Waiting for xrootd to start ($IDX seconds so far) ..." + fi + if [ $IDX -eq 10 ]; then + echo "xrootd failed to start - failing" + exit 1 + fi +done +XROOTD_URL="https://$(hostname):$XROOTD_URL/" +echo "xrootd started at $XROOTD_URL" + +cat >> "$BINARY_DIR/tests/$TEST_NAME/setup.sh" < "$BINARY_DIR/tests/$TEST_NAME/client.log") +CURL_EXIT=$? + +if [ $CURL_EXIT -ne 0 ]; then + echo "Download of hello-world text failed" + exit 1 +fi + +if [ "$CONTENTS" != "Hello, World" ]; then + echo "Downloaded hello-world text is incorrect: $CONTENTS" + exit 1 +fi + +echo "Running $TEST_NAME - missing object" + +HTTP_CODE=$(curl --cacert $X509_CA_FILE --output /dev/null -v --write-out '%{http_code}' "$XROOTD_URL/test/missing.txt" 2>> "$BINARY_DIR/tests/$TEST_NAME/client.log") +if [ "$HTTP_CODE" -ne 404 ]; then + echo "Expected HTTP code is 404; actual was $HTTP_CODE" + exit 1 +fi From 1a1126c5b4039676132845c6f34d973941dadf39 Mon Sep 17 00:00:00 2001 From: Brian Bockelman Date: Tue, 22 Oct 2024 11:47:41 +0200 Subject: [PATCH 09/10] Add stress test for S3 and allow enabling of valgrind --- test/CMakeLists.txt | 10 ++++++++ test/s3-setup.sh | 13 ++++++++-- test/s3-stress-test.sh | 56 ++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 77 insertions(+), 2 deletions(-) create mode 100755 test/s3-stress-test.sh diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 6279637..21d5c8a 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -115,6 +115,9 @@ set_tests_properties(S3::s3_basic::teardown add_test(NAME S3::s3_basic::test COMMAND "${CMAKE_CURRENT_SOURCE_DIR}/s3-test.sh" s3_basic) +add_test(NAME S3::s3_basic::stress_test + COMMAND "${CMAKE_CURRENT_SOURCE_DIR}/s3-stress-test.sh" s3_basic) + list(APPEND S3_BASIC_TEST_LOGS ${CMAKE_CURRENT_BINARY_DIR}/tests/s3_basic/server.log) list(APPEND S3_BASIC_TEST_LOGS ${CMAKE_CURRENT_BINARY_DIR}/tests/s3_basic/client.log) @@ -124,3 +127,10 @@ set_tests_properties(S3::s3_basic::test ENVIRONMENT "BINARY_DIR=${CMAKE_BINARY_DIR}" ATTACHED_FILES_ON_FAIL "${S3_BASIC_TEST_LOGS}" ) + +set_tests_properties(S3::s3_basic::stress_test + PROPERTIES + FIXTURES_REQUIRED S3::s3_basic + ENVIRONMENT "BINARY_DIR=${CMAKE_BINARY_DIR}" + ATTACHED_FILES_ON_FAIL "${S3_BASIC_TEST_LOGS}" +) diff --git a/test/s3-setup.sh b/test/s3-setup.sh index 5e274b2..1665ad2 100755 --- a/test/s3-setup.sh +++ b/test/s3-setup.sh @@ -2,6 +2,11 @@ TEST_NAME=$1 +VALGRIND=0 +if [ "$2" = "valgrind" ]; then + VALGRIND=1 +fi + if [ -z "$BINARY_DIR" ]; then echo "\$BINARY_DIR environment variable is not set; cannot run test" exit 1 @@ -250,7 +255,11 @@ echo "$MINIO_USER" > $XROOTD_CONFIGDIR/access_key echo "$MINIO_PASSWORD" > $XROOTD_CONFIGDIR/secret_key export X509_CERT_FILE=$MINIO_CERTSDIR/CAs/tlsca.pem -"$XROOTD_BIN" -c "$XROOTD_CONFIG" -l "$BINARY_DIR/tests/$TEST_NAME/server.log" 0<&- 2>/dev/null >/dev/null & +if [ "$VALGRIND" -eq 1 ]; then + valgrind --leak-check=full --track-origins=yes "$XROOTD_BIN" -c "$XROOTD_CONFIG" -l "$BINARY_DIR/tests/$TEST_NAME/server.log" 0<&- 2>>"$BINARY_DIR/tests/$TEST_NAME/server.log" >>"$BINARY_DIR/tests/$TEST_NAME/server.log" & +else + "$XROOTD_BIN" -c "$XROOTD_CONFIG" -l "$BINARY_DIR/tests/$TEST_NAME/server.log" 0<&- 2>>"$BINARY_DIR/tests/$TEST_NAME/server.log" >>"$BINARY_DIR/tests/$TEST_NAME/server.log" & +fi XROOTD_PID=$! echo "xrootd daemon PID: $XROOTD_PID" @@ -263,7 +272,7 @@ while [ -z "$XROOTD_URL" ]; do if [ $IDX -gt 1 ]; then echo "Waiting for xrootd to start ($IDX seconds so far) ..." fi - if [ $IDX -eq 10 ]; then + if [ $IDX -eq 20 ]; then echo "xrootd failed to start - failing" exit 1 fi diff --git a/test/s3-stress-test.sh b/test/s3-stress-test.sh new file mode 100755 index 0000000..7076a79 --- /dev/null +++ b/test/s3-stress-test.sh @@ -0,0 +1,56 @@ +#!/bin/sh + +TEST_NAME=$1 + +if [ -z "$BINARY_DIR" ]; then + echo "\$BINARY_DIR environment variable is not set; cannot run test" + exit 1 +fi +if [ ! -d "$BINARY_DIR" ]; then + echo "$BINARY_DIR is not a directory; cannot run test" + exit 1 +fi + +echo "Running $TEST_NAME - simple download" + +if [ ! -f "$BINARY_DIR/tests/$TEST_NAME/setup.sh" ]; then + echo "Test environment file $BINARY_DIR/tests/$TEST_NAME/setup.sh does not exist - cannot run test" + exit 1 +fi +. "$BINARY_DIR/tests/$TEST_NAME/setup.sh" + +if [ -z "$XROOTD_URL" ]; then + echo "XRootD URL is not set; cannot test" + exit 1 +fi + +IDX=1 +while [ $IDX -le 100 ]; do + IDX=$(($IDX+1)) + + curl --cacert $X509_CA_FILE -v --fail "$XROOTD_URL/test/hello_world.txt" 2> "$BINARY_DIR/tests/$TEST_NAME/client-$IDX.log" > "$BINARY_DIR/tests/$TEST_NAME/client-$IDX.out" & + export CURL_${IDX}_PID=$! + +done + +IDX=1 +while [ $IDX -le 100 ]; do + IDX=$(($IDX+1)) + + CURL_NAME="CURL_${IDX}_PID" + eval CURL_NAME='\$CURL_${IDX}_PID' + eval CURL_PID=$CURL_NAME + wait $CURL_PID + CURL_EXIT=$? + + if [ $CURL_EXIT -ne 0 ]; then + echo "Download of hello-world text failed for worker $IDX" + exit 1 + fi + + CONTENTS=$(cat "$BINARY_DIR/tests/$TEST_NAME/client-$IDX.out") + if [ "$CONTENTS" != "Hello, World" ]; then + echo "Downloaded hello-world text for worker $IDX is incorrect: $CONTENTS" + exit 1 + fi +done From d6fd8533a897099ed92ced927a15f51a50d2c5a8 Mon Sep 17 00:00:00 2001 From: Brian Bockelman Date: Tue, 22 Oct 2024 11:54:18 +0200 Subject: [PATCH 10/10] Fully initialize the struct in the Stat response When providing a successful response to a Stat request, initialize all the fields inside the struct. This ensures everything is zeroed out when it is later used. (Use of uninitialized data was reported by valgrind) --- src/HTTPFile.cc | 2 ++ src/S3File.cc | 1 + src/S3FileSystem.cc | 3 +++ 3 files changed, 6 insertions(+) diff --git a/src/HTTPFile.cc b/src/HTTPFile.cc index 3bc6eea..0506de7 100644 --- a/src/HTTPFile.cc +++ b/src/HTTPFile.cc @@ -145,6 +145,7 @@ ssize_t HTTPFile::Read(void *buffer, off_t offset, size_t size) { int HTTPFile::Fstat(struct stat *buff) { if (m_stat) { + memset(buff, '\0', sizeof(struct stat)); buff->st_mode = 0600 | S_IFREG; buff->st_nlink = 1; buff->st_uid = 1; @@ -232,6 +233,7 @@ int HTTPFile::Fstat(struct stat *buff) { } if (buff) { + memset(buff, '\0', sizeof(struct stat)); buff->st_mode = 0600 | S_IFREG; buff->st_nlink = 1; buff->st_uid = 1; diff --git a/src/S3File.cc b/src/S3File.cc index d78828e..6ad133e 100644 --- a/src/S3File.cc +++ b/src/S3File.cc @@ -172,6 +172,7 @@ int S3File::Fstat(struct stat *buff) { current_newline = next_newline; } + memset(buff, '\0', sizeof(struct stat)); buff->st_mode = 0600 | S_IFREG; buff->st_nlink = 1; buff->st_uid = 1; diff --git a/src/S3FileSystem.cc b/src/S3FileSystem.cc index 01cd042..d8d10d1 100644 --- a/src/S3FileSystem.cc +++ b/src/S3FileSystem.cc @@ -270,6 +270,7 @@ int S3FileSystem::Stat(const char *path, struct stat *buff, int opts, } if (object.empty()) { + memset(buff, '\0', sizeof(struct stat)); buff->st_mode = 0700 | S_IFDIR; buff->st_nlink = 0; buff->st_uid = 1; @@ -291,6 +292,7 @@ int S3FileSystem::Stat(const char *path, struct stat *buff, int opts, } } if (foundObj) { + memset(buff, '\0', sizeof(struct stat)); buff->st_mode = 0600 | S_IFREG; buff->st_nlink = 1; buff->st_uid = buff->st_gid = 1; @@ -313,6 +315,7 @@ int S3FileSystem::Stat(const char *path, struct stat *buff, int opts, return -ENOENT; } + memset(buff, '\0', sizeof(struct stat)); buff->st_mode = 0700 | S_IFDIR; buff->st_nlink = 0; buff->st_uid = 1;