From 3e98332c5b9c293d3d0baf36626ac39605c01623 Mon Sep 17 00:00:00 2001 From: dentinyhao Date: Sat, 27 Sep 2025 00:28:30 +0000 Subject: [PATCH 1/2] Check libcurl return value --- src/httpfs_curl_client.cpp | 102 +++++++++++++++++-------------------- src/s3fs.cpp | 2 +- 2 files changed, 49 insertions(+), 55 deletions(-) diff --git a/src/httpfs_curl_client.cpp b/src/httpfs_curl_client.cpp index 6de3c85..c350214 100644 --- a/src/httpfs_curl_client.cpp +++ b/src/httpfs_curl_client.cpp @@ -7,9 +7,7 @@ #include #include "duckdb/common/exception/http_exception.hpp" -#ifndef EMSCRIPTEN -#include "httpfs_curl_client.hpp" -#endif +#define CHECK_CURL_OK(expr) D_ASSERT((expr) == CURLE_OK) namespace duckdb { @@ -91,11 +89,11 @@ CURLHandle::CURLHandle(const string &token, const string &cert_path) { throw InternalException("Failed to initialize curl"); } if (!token.empty()) { - curl_easy_setopt(curl, CURLOPT_XOAUTH2_BEARER, token.c_str()); - curl_easy_setopt(curl, CURLOPT_HTTPAUTH, CURLAUTH_BEARER); + CHECK_CURL_OK(curl_easy_setopt(curl, CURLOPT_XOAUTH2_BEARER, token.c_str())); + CHECK_CURL_OK(curl_easy_setopt(curl, CURLOPT_HTTPAUTH, CURLAUTH_BEARER)); } if (!cert_path.empty()) { - curl_easy_setopt(curl, CURLOPT_CAINFO, cert_path.c_str()); + CHECK_CURL_OK(curl_easy_setopt(curl, CURLOPT_CAINFO, cert_path.c_str())); } } @@ -110,7 +108,7 @@ struct RequestInfo { std::vector header_collection; }; -static idx_t httpfs_client_count = 0; +static std::atomic httpfs_client_count {0}; class HTTPFSCurlClient : public HTTPClient { public: @@ -129,45 +127,48 @@ class HTTPFSCurlClient : public HTTPClient { // set curl options // follow redirects - curl_easy_setopt(*curl, CURLOPT_FOLLOWLOCATION, 1L); + CHECK_CURL_OK(curl_easy_setopt(*curl, CURLOPT_FOLLOWLOCATION, 1L)); // Curl re-uses connections by default if (!http_params.keep_alive) { - curl_easy_setopt(*curl, CURLOPT_FORBID_REUSE, 1L); + CHECK_CURL_OK(curl_easy_setopt(*curl, CURLOPT_FORBID_REUSE, 1L)); } if (http_params.enable_curl_server_cert_verification) { - curl_easy_setopt(*curl, CURLOPT_SSL_VERIFYPEER, 1L); // Verify the cert - curl_easy_setopt(*curl, CURLOPT_SSL_VERIFYHOST, 2L); // Verify that the cert matches the hostname + CHECK_CURL_OK(curl_easy_setopt(*curl, CURLOPT_SSL_VERIFYPEER, 1L)); // Verify the cert + CHECK_CURL_OK( + curl_easy_setopt(*curl, CURLOPT_SSL_VERIFYHOST, 2L)); // Verify that the cert matches the hostname } else { - curl_easy_setopt(*curl, CURLOPT_SSL_VERIFYPEER, 0L); // Override default, don't verify the cert - curl_easy_setopt(*curl, CURLOPT_SSL_VERIFYHOST, - 0L); // Override default, don't verify that the cert matches the hostname + CHECK_CURL_OK( + curl_easy_setopt(*curl, CURLOPT_SSL_VERIFYPEER, 0L)); // Override default, don't verify the cert + CHECK_CURL_OK(curl_easy_setopt(*curl, CURLOPT_SSL_VERIFYHOST, + 0L)); // Override default, don't verify that the cert matches the hostname } // set read timeout - curl_easy_setopt(*curl, CURLOPT_TIMEOUT, http_params.timeout); + CHECK_CURL_OK(curl_easy_setopt(*curl, CURLOPT_TIMEOUT, http_params.timeout)); // set connection timeout - curl_easy_setopt(*curl, CURLOPT_CONNECTTIMEOUT, http_params.timeout); + CHECK_CURL_OK(curl_easy_setopt(*curl, CURLOPT_CONNECTTIMEOUT, http_params.timeout)); // accept content as-is (i.e no decompressing) - curl_easy_setopt(*curl, CURLOPT_ACCEPT_ENCODING, "identity"); + CHECK_CURL_OK(curl_easy_setopt(*curl, CURLOPT_ACCEPT_ENCODING, "identity")); // follow redirects - curl_easy_setopt(*curl, CURLOPT_FOLLOWLOCATION, 1L); + CHECK_CURL_OK(curl_easy_setopt(*curl, CURLOPT_FOLLOWLOCATION, 1L)); // define the header callback - curl_easy_setopt(*curl, CURLOPT_HEADERFUNCTION, RequestHeaderCallback); - curl_easy_setopt(*curl, CURLOPT_HEADERDATA, &request_info->header_collection); + CHECK_CURL_OK(curl_easy_setopt(*curl, CURLOPT_HEADERFUNCTION, RequestHeaderCallback)); + CHECK_CURL_OK(curl_easy_setopt(*curl, CURLOPT_HEADERDATA, &request_info->header_collection)); // define the write data callback (for get requests) - curl_easy_setopt(*curl, CURLOPT_WRITEFUNCTION, RequestWriteCallback); - curl_easy_setopt(*curl, CURLOPT_WRITEDATA, &request_info->body); + CHECK_CURL_OK(curl_easy_setopt(*curl, CURLOPT_WRITEFUNCTION, RequestWriteCallback)); + CHECK_CURL_OK(curl_easy_setopt(*curl, CURLOPT_WRITEDATA, &request_info->body)); if (!http_params.http_proxy.empty()) { - curl_easy_setopt(*curl, CURLOPT_PROXY, - StringUtil::Format("%s:%s", http_params.http_proxy, http_params.http_proxy_port).c_str()); + CHECK_CURL_OK(curl_easy_setopt( + *curl, CURLOPT_PROXY, + StringUtil::Format("%s:%s", http_params.http_proxy, http_params.http_proxy_port).c_str())); if (!http_params.http_proxy_username.empty()) { - curl_easy_setopt(*curl, CURLOPT_PROXYUSERNAME, http_params.http_proxy_username.c_str()); - curl_easy_setopt(*curl, CURLOPT_PROXYPASSWORD, http_params.http_proxy_password.c_str()); + CHECK_CURL_OK(curl_easy_setopt(*curl, CURLOPT_PROXYUSERNAME, http_params.http_proxy_username.c_str())); + CHECK_CURL_OK(curl_easy_setopt(*curl, CURLOPT_PROXYPASSWORD, http_params.http_proxy_password.c_str())); } } } @@ -191,9 +192,9 @@ class HTTPFSCurlClient : public HTTPClient { CURLcode res; { // If the same handle served a HEAD request, we must set NOBODY back to 0L to request content again - curl_easy_setopt(*curl, CURLOPT_NOBODY, 0L); - curl_easy_setopt(*curl, CURLOPT_URL, request_info->url.c_str()); - curl_easy_setopt(*curl, CURLOPT_HTTPHEADER, curl_headers ? curl_headers.headers : nullptr); + CHECK_CURL_OK(curl_easy_setopt(*curl, CURLOPT_NOBODY, 0L)); + CHECK_CURL_OK(curl_easy_setopt(*curl, CURLOPT_URL, request_info->url.c_str())); + CHECK_CURL_OK(curl_easy_setopt(*curl, CURLOPT_HTTPHEADER, curl_headers ? curl_headers.headers : nullptr)); res = curl->Execute(); } @@ -237,15 +238,15 @@ class HTTPFSCurlClient : public HTTPClient { CURLcode res; { - curl_easy_setopt(*curl, CURLOPT_URL, request_info->url.c_str()); + CHECK_CURL_OK(curl_easy_setopt(*curl, CURLOPT_URL, request_info->url.c_str())); // Perform PUT - curl_easy_setopt(*curl, CURLOPT_CUSTOMREQUEST, "PUT"); + CHECK_CURL_OK(curl_easy_setopt(*curl, CURLOPT_CUSTOMREQUEST, "PUT")); // Include PUT body - curl_easy_setopt(*curl, CURLOPT_POSTFIELDS, const_char_ptr_cast(info.buffer_in)); - curl_easy_setopt(*curl, CURLOPT_POSTFIELDSIZE, info.buffer_in_len); + CHECK_CURL_OK(curl_easy_setopt(*curl, CURLOPT_POSTFIELDS, const_char_ptr_cast(info.buffer_in))); + CHECK_CURL_OK(curl_easy_setopt(*curl, CURLOPT_POSTFIELDSIZE, info.buffer_in_len)); // Apply headers - curl_easy_setopt(*curl, CURLOPT_HTTPHEADER, curl_headers ? curl_headers.headers : nullptr); + CHECK_CURL_OK(curl_easy_setopt(*curl, CURLOPT_HTTPHEADER, curl_headers ? curl_headers.headers : nullptr)); res = curl->Execute(); } @@ -271,14 +272,14 @@ class HTTPFSCurlClient : public HTTPClient { CURLcode res; { // Set URL - curl_easy_setopt(*curl, CURLOPT_URL, request_info->url.c_str()); + CHECK_CURL_OK(curl_easy_setopt(*curl, CURLOPT_URL, request_info->url.c_str())); // Perform HEAD request instead of GET - curl_easy_setopt(*curl, CURLOPT_NOBODY, 1L); - curl_easy_setopt(*curl, CURLOPT_HTTPGET, 0L); + CHECK_CURL_OK(curl_easy_setopt(*curl, CURLOPT_NOBODY, 1L)); + CHECK_CURL_OK(curl_easy_setopt(*curl, CURLOPT_HTTPGET, 0L)); // Add headers if any - curl_easy_setopt(*curl, CURLOPT_HTTPHEADER, curl_headers ? curl_headers.headers : nullptr); + CHECK_CURL_OK(curl_easy_setopt(*curl, CURLOPT_HTTPHEADER, curl_headers ? curl_headers.headers : nullptr)); // Execute HEAD request res = curl->Execute(); @@ -304,16 +305,16 @@ class HTTPFSCurlClient : public HTTPClient { CURLcode res; { // Set URL - curl_easy_setopt(*curl, CURLOPT_URL, request_info->url.c_str()); + CHECK_CURL_OK(curl_easy_setopt(*curl, CURLOPT_URL, request_info->url.c_str())); // Set DELETE request method - curl_easy_setopt(*curl, CURLOPT_CUSTOMREQUEST, "DELETE"); + CHECK_CURL_OK(curl_easy_setopt(*curl, CURLOPT_CUSTOMREQUEST, "DELETE")); // Follow redirects - curl_easy_setopt(*curl, CURLOPT_FOLLOWLOCATION, 1L); + CHECK_CURL_OK(curl_easy_setopt(*curl, CURLOPT_FOLLOWLOCATION, 1L)); // Add headers if any - curl_easy_setopt(*curl, CURLOPT_HTTPHEADER, curl_headers ? curl_headers.headers : nullptr); + CHECK_CURL_OK(curl_easy_setopt(*curl, CURLOPT_HTTPHEADER, curl_headers ? curl_headers.headers : nullptr)); // Execute DELETE request res = curl->Execute(); @@ -342,15 +343,15 @@ class HTTPFSCurlClient : public HTTPClient { CURLcode res; { - curl_easy_setopt(*curl, CURLOPT_URL, request_info->url.c_str()); - curl_easy_setopt(*curl, CURLOPT_POST, 1L); + CHECK_CURL_OK(curl_easy_setopt(*curl, CURLOPT_URL, request_info->url.c_str())); + CHECK_CURL_OK(curl_easy_setopt(*curl, CURLOPT_POST, 1L)); // Set POST body - curl_easy_setopt(*curl, CURLOPT_POSTFIELDS, const_char_ptr_cast(info.buffer_in)); - curl_easy_setopt(*curl, CURLOPT_POSTFIELDSIZE, info.buffer_in_len); + CHECK_CURL_OK(curl_easy_setopt(*curl, CURLOPT_POSTFIELDS, const_char_ptr_cast(info.buffer_in))); + CHECK_CURL_OK(curl_easy_setopt(*curl, CURLOPT_POSTFIELDSIZE, info.buffer_in_len)); // Add headers if any - curl_easy_setopt(*curl, CURLOPT_HTTPHEADER, curl_headers ? curl_headers.headers : nullptr); + CHECK_CURL_OK(curl_easy_setopt(*curl, CURLOPT_HTTPHEADER, curl_headers ? curl_headers.headers : nullptr)); // Execute POST request res = curl->Execute(); @@ -430,13 +431,7 @@ class HTTPFSCurlClient : public HTTPClient { optional_ptr state; unique_ptr request_info; - static std::mutex &GetRefLock() { - static std::mutex mtx; - return mtx; - } - static void InitCurlGlobal() { - GetRefLock(); if (httpfs_client_count == 0) { curl_global_init(CURL_GLOBAL_DEFAULT); } @@ -446,7 +441,6 @@ class HTTPFSCurlClient : public HTTPClient { static void DestroyCurlGlobal() { // TODO: when to call curl_global_cleanup() // calling it on client destruction causes SSL errors when verification is on (due to many requests). - // GetRefLock(); // if (httpfs_client_count == 0) { // throw InternalException("Destroying Httpfs client that did not initialize CURL"); // } diff --git a/src/s3fs.cpp b/src/s3fs.cpp index 0c50bff..2a92f22 100644 --- a/src/s3fs.cpp +++ b/src/s3fs.cpp @@ -89,7 +89,7 @@ static HTTPHeaders create_s3_header(string url, string query, string host, strin if (use_sse_kms) { signed_headers += ";x-amz-server-side-encryption;x-amz-server-side-encryption-aws-kms-key-id"; } - auto canonical_request = method + "\n" + S3FileSystem::UrlEncode(url) + "\n" + query; + auto canonical_request = method + "\n" + S3FileSystem::UrlEncode(url) + "\n" + query; if (content_type.length() > 0) { canonical_request += "\ncontent-type:" + content_type; } From 574039f10cadb981ef1aaafa7cad368a4926e796 Mon Sep 17 00:00:00 2001 From: dentinyhao Date: Sat, 27 Sep 2025 00:43:18 +0000 Subject: [PATCH 2/2] fix header inclusion --- src/httpfs_curl_client.cpp | 4 ++++ src/include/httpfs_curl_client.hpp | 2 +- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/src/httpfs_curl_client.cpp b/src/httpfs_curl_client.cpp index c350214..bd8d26b 100644 --- a/src/httpfs_curl_client.cpp +++ b/src/httpfs_curl_client.cpp @@ -7,6 +7,10 @@ #include #include "duckdb/common/exception/http_exception.hpp" +#ifndef EMSCRIPTEN +#include "httpfs_curl_client.hpp" +#endif + #define CHECK_CURL_OK(expr) D_ASSERT((expr) == CURLE_OK) namespace duckdb { diff --git a/src/include/httpfs_curl_client.hpp b/src/include/httpfs_curl_client.hpp index d165912..2f0fca8 100644 --- a/src/include/httpfs_curl_client.hpp +++ b/src/include/httpfs_curl_client.hpp @@ -2,13 +2,13 @@ #include "duckdb/common/http_util.hpp" +#include namespace duckdb { class HTTPLogger; class FileOpener; struct FileOpenerInfo; class HTTPState; -#include class CURLHandle { public: CURLHandle(const string &token, const string &cert_path);