From 90988fe2721567cd4e8e129b5d6b9a10b6b48637 Mon Sep 17 00:00:00 2001 From: Travis Downs Date: Sun, 29 Sep 2024 00:12:29 -0300 Subject: [PATCH] allow setting buffer sizes on server_socket We add two options to set the recv and send (SO_RCVBUF, ...) buffer sizes on a listening socket (server_socket). This is mostly useful to propagate said sizes to all sockets returned by accept(). It is already possible to set the socket option directly on the connected socket after it returned by accept() but experimentally this results in a socket with the specified buffer size but whose receive window will not be advertised to the client beyond the default (64K for current typical kernel defaults). So you get only some of the benefit of the larger buffer. Setting the buffer size on the listening socket, however, is mentioned as the correct approach in tcp(7) and does not suffer from the same limitation. A test is included which checks that the mechanism, including the inheritance, works. Closes #2458 (cherry picked from commit 4cb7f8eb1bccf033b25bb982b98558f5bbf0cafc) --- include/seastar/net/api.hh | 22 ++++++++++- src/core/reactor.cc | 9 +++++ tests/unit/socket_test.cc | 79 +++++++++++++++++++++++++++++++++++++- 3 files changed, 107 insertions(+), 3 deletions(-) diff --git a/include/seastar/net/api.hh b/include/seastar/net/api.hh index 2d8d0d9d61d..ff51148e370 100644 --- a/include/seastar/net/api.hh +++ b/include/seastar/net/api.hh @@ -396,6 +396,11 @@ public: /// @} +/// Options for creating a listening socket. +/// +/// WARNING: these options currently only have an effect when using +/// the POSIX stack: all options are ignored on the native stack as they +/// are not implemented there. struct listen_options { bool reuse_address = false; server_socket::load_balancing_algorithm lba = server_socket::load_balancing_algorithm::default_; @@ -403,6 +408,19 @@ struct listen_options { int listen_backlog = 100; unsigned fixed_cpu = 0u; std::optional unix_domain_socket_permissions; + + /// If set, the SO_SNDBUF size will be set to the given value on the listening socket + /// via setsockopt. This buffer size is inherited by the sockets returned by + /// accept and is the preferred way to set the buffer size for these sockets since + /// setting it directly on the already-accepted socket is ineffective (see TCP(7)). + std::optional so_sndbuf; + + /// If set, the SO_RCVBUF size will be set to the given value on the listening socket + /// via setsockopt. This buffer size is inherited by the sockets returned by + /// accept and is the preferred way to set the buffer size for these sockets since + /// setting it directly on the already-accepted socket is ineffective (see TCP(7)). + std::optional so_rcvbuf; + void set_fixed_cpu(unsigned cpu) { lba = server_socket::load_balancing_algorithm::fixed; fixed_cpu = cpu; @@ -457,8 +475,8 @@ public: return false; } - /** - * Returns available network interfaces. This represents a + /** + * Returns available network interfaces. This represents a * snapshot of interfaces available at call time, hence the * return by value. */ diff --git a/src/core/reactor.cc b/src/core/reactor.cc index 97e15772b84..37a45440640 100644 --- a/src/core/reactor.cc +++ b/src/core/reactor.cc @@ -1660,6 +1660,15 @@ reactor::posix_listen(socket_address sa, listen_options opts) { if (opts.reuse_address) { fd.setsockopt(SOL_SOCKET, SO_REUSEADDR, 1); } + + if (opts.so_sndbuf) { + fd.setsockopt(SOL_SOCKET, SO_SNDBUF, *opts.so_sndbuf); + } + + if (opts.so_rcvbuf) { + fd.setsockopt(SOL_SOCKET, SO_RCVBUF, *opts.so_rcvbuf); + } + if (_reuseport && !sa.is_af_unix()) fd.setsockopt(SOL_SOCKET, SO_REUSEPORT, 1); diff --git a/tests/unit/socket_test.cc b/tests/unit/socket_test.cc index 5ebbac1852c..e3ce8e1557a 100644 --- a/tests/unit/socket_test.cc +++ b/tests/unit/socket_test.cc @@ -27,13 +27,17 @@ #include #include #include +#include #include #include #include #include - +#include #include +#include +#include + using namespace seastar; future<> handle_connection(connected_socket s) { @@ -258,3 +262,76 @@ SEASTAR_TEST_CASE(socket_connect_abort_test) { when_all(std::move(cf), std::move(check), std::move(abort)).get(); }); } + +SEASTAR_THREAD_TEST_CASE(socket_bufsize) { + + // Test that setting the send and recv buffer sizes on the listening + // socket is propagated to the socket returned by accept(). + + auto buf_size = [](std::optional snd_size, std::optional rcv_size) { + listen_options lo{ + .reuse_address = true, + .lba = server_socket::load_balancing_algorithm::fixed, + .so_sndbuf = snd_size, + .so_rcvbuf = rcv_size + }; + + ipv4_addr addr("127.0.0.1", 1234); + server_socket ss = seastar::listen(addr, lo); + connected_socket client = connect(addr).get(); + connected_socket server = ss.accept().get().connection; + + auto sockopt = [&](int option) { + int val{}; + int ret = server.get_sockopt(SOL_SOCKET, option, &val, sizeof(val)); + BOOST_REQUIRE_EQUAL(ret, 0); + return val; + }; + + int send = sockopt(SO_SNDBUF); + int recv = sockopt(SO_RCVBUF); + + ss.abort_accept(); + client.shutdown_output(); + server.shutdown_output(); + + + return std::make_tuple(send, recv); + }; + + constexpr int small_size = 8192, big_size = 128 * 1024; + + // we pass different sizes for send and recv to catch any copy/paste + // style bugs + auto [send_small, recv_small] = buf_size(small_size, small_size * 2); + auto [send_big, recv_big] = buf_size(big_size, big_size * 2); + + // Setting socket buffer sizes isn't an exact science: the kernel does + // some rounding, and also (currently) doubles the requested size and + // also applies so limits. So as a basic check, assert simply that the + // explicit small buffer ends up smaller than the explicit big buffer, + // and that both results are at least as large as the requested amount. + // The latter condition could plausibly fail if the OS clamped the size + // at a small amount, but this is unlikely for the chosen buffer sizes. + + BOOST_CHECK_LT(send_small, send_big); + BOOST_CHECK_LT(recv_small, recv_big); + + BOOST_CHECK_GE(send_small, small_size); + BOOST_CHECK_GE(send_big, big_size); + + BOOST_CHECK_GE(recv_small, small_size * 2); + BOOST_CHECK_GE(recv_big, big_size * 2); + + // not much to check here with "default" sizes, but let's at least call it + // and check that we get a reasonable answer + auto [send_default, recv_default] = buf_size({}, {}); + + BOOST_CHECK_GE(send_default, 4096); + BOOST_CHECK_GE(recv_default, 4096); + + // we don't really know the default socket size and it can vary by kernel + // config, but 20 MB should be enough for everyone. + BOOST_CHECK_LT(send_default, 20'000'000); + BOOST_CHECK_LT(recv_default, 20'000'000); +}