diff --git a/src/core/ddsi/CMakeLists.txt b/src/core/ddsi/CMakeLists.txt index f5a6fd82f2..40c61c2fcb 100644 --- a/src/core/ddsi/CMakeLists.txt +++ b/src/core/ddsi/CMakeLists.txt @@ -74,7 +74,6 @@ set(srcs_ddsi q_qosmatch.c q_radmin.c q_receive.c - q_sockwaitset.c q_thread.c q_transmit.c q_inverse_uint32_set.c @@ -152,7 +151,6 @@ set(hdrs_private_ddsi q_radmin.h q_receive.h q_rtps.h - q_sockwaitset.h q_thread.h q_transmit.h q_inverse_uint32_set.h @@ -160,6 +158,7 @@ set(hdrs_private_ddsi q_whc.h q_xevent.h q_xmsg.h + q_receive.h sysdeps.h) if(ENABLE_LIFESPAN) diff --git a/src/core/ddsi/include/dds/ddsi/ddsi_domaingv.h b/src/core/ddsi/include/dds/ddsi/ddsi_domaingv.h index 2e8ce0f4eb..70ba623848 100644 --- a/src/core/ddsi/include/dds/ddsi/ddsi_domaingv.h +++ b/src/core/ddsi/include/dds/ddsi/ddsi_domaingv.h @@ -22,11 +22,11 @@ #include "dds/ddsrt/sync.h" #include "dds/ddsrt/fibheap.h" #include "dds/ddsrt/avl.h" +#include "dds/ddsrt/event.h" #include "dds/ddsi/ddsi_plist.h" #include "dds/ddsi/ddsi_ownip.h" #include "dds/ddsi/q_protocol.h" -#include "dds/ddsi/q_sockwaitset.h" #include "dds/ddsi/q_config.h" #if defined (__cplusplus) @@ -80,7 +80,7 @@ struct recv_thread_arg { struct ddsi_tran_conn *conn; } single; struct { - os_sockWaitset ws; + ddsrt_loop_t loop; } many; } u; }; diff --git a/src/core/ddsi/include/dds/ddsi/ddsi_tran.h b/src/core/ddsi/include/dds/ddsi/ddsi_tran.h index 6239af3fc5..3b89d676f9 100644 --- a/src/core/ddsi/include/dds/ddsi/ddsi_tran.h +++ b/src/core/ddsi/include/dds/ddsi/ddsi_tran.h @@ -14,6 +14,7 @@ /* DDSI Transport module */ +#include "dds/ddsrt/event.h" #include "dds/ddsrt/ifaddrs.h" #include "dds/ddsrt/atomics.h" #include "dds/ddsi/ddsi_locator.h" @@ -50,6 +51,7 @@ typedef ssize_t (*ddsi_tran_write_fn_t) (ddsi_tran_conn_t, const ddsi_locator_t typedef int (*ddsi_tran_locator_fn_t) (ddsi_tran_factory_t, ddsi_tran_base_t, ddsi_locator_t *); typedef bool (*ddsi_tran_supports_fn_t) (const struct ddsi_tran_factory *, int32_t); typedef ddsrt_socket_t (*ddsi_tran_handle_fn_t) (ddsi_tran_base_t); +typedef ddsrt_event_t *(*ddsi_tran_event_fn_t) (ddsi_tran_base_t); typedef int (*ddsi_tran_listen_fn_t) (ddsi_tran_listener_t); typedef void (*ddsi_tran_free_fn_t) (ddsi_tran_factory_t); typedef void (*ddsi_tran_peer_locator_fn_t) (ddsi_tran_conn_t, ddsi_locator_t *); @@ -104,6 +106,7 @@ struct ddsi_tran_base /* Functions */ ddsi_tran_handle_fn_t m_handle_fn; + ddsi_tran_event_fn_t m_event_fn; }; struct ddsi_tran_conn @@ -262,9 +265,15 @@ void ddsi_tran_free (ddsi_tran_base_t base); DDS_INLINE_EXPORT inline ddsrt_socket_t ddsi_tran_handle (ddsi_tran_base_t base) { return base->m_handle_fn (base); } +DDS_INLINE_EXPORT inline ddsrt_event_t *ddsi_tran_event (ddsi_tran_base_t base) { + return base->m_event_fn (base); +} DDS_INLINE_EXPORT inline ddsrt_socket_t ddsi_conn_handle (ddsi_tran_conn_t conn) { return conn->m_base.m_handle_fn (&conn->m_base); } +DDS_INLINE_EXPORT inline ddsrt_event_t *ddsi_conn_event (ddsi_tran_conn_t conn) { + return conn->m_base.m_event_fn (&conn->m_base); +} DDS_INLINE_EXPORT inline uint32_t ddsi_conn_type (const struct ddsi_tran_conn *conn) { return conn->m_base.m_trantype; } @@ -326,6 +335,9 @@ DDS_INLINE_EXPORT inline int ddsi_listener_listen (ddsi_tran_listener_t listener DDS_INLINE_EXPORT inline ddsi_tran_conn_t ddsi_listener_accept (ddsi_tran_listener_t listener) { return listener->m_accept_fn (listener); } +DDS_INLINE_EXPORT inline ddsrt_event_t *ddsi_listener_event (ddsi_tran_listener_t listener) { + return listener->m_base.m_event_fn (&listener->m_base); +} void ddsi_listener_unblock (ddsi_tran_listener_t listener); void ddsi_listener_free (ddsi_tran_listener_t listener); diff --git a/src/core/ddsi/include/dds/ddsi/q_sockwaitset.h b/src/core/ddsi/include/dds/ddsi/q_sockwaitset.h deleted file mode 100644 index 3842d7f253..0000000000 --- a/src/core/ddsi/include/dds/ddsi/q_sockwaitset.h +++ /dev/null @@ -1,110 +0,0 @@ -/* - * Copyright(c) 2006 to 2018 ADLINK Technology Limited and others - * - * This program and the accompanying materials are made available under the - * terms of the Eclipse Public License v. 2.0 which is available at - * http://www.eclipse.org/legal/epl-2.0, or the Eclipse Distribution License - * v. 1.0 which is available at - * http://www.eclipse.org/org/documents/edl-v10.php. - * - * SPDX-License-Identifier: EPL-2.0 OR BSD-3-Clause - */ -#ifndef Q_SOCKWAITSET_H -#define Q_SOCKWAITSET_H - -#if defined (__cplusplus) -extern "C" { -#endif - -typedef struct os_sockWaitset * os_sockWaitset; -typedef struct os_sockWaitsetCtx * os_sockWaitsetCtx; -struct ddsi_tran_conn; - -/* - Allocates a new connection waitset. The waitset is thread-safe in - that multiple threads may add and remove connections from the wait set - or trigger it. However only a single thread may process events from - the wait set using the Wait and NextEvent functions in a single handling - loop. -*/ -os_sockWaitset os_sockWaitsetNew (void); - -/* - Frees the waitset WS. Any connections associated with it will - be closed. -*/ -void os_sockWaitsetFree (os_sockWaitset ws); - -/* - Triggers the waitset, from any thread. It is level - triggered, when called while no thread is waiting in - os_sockWaitsetWait the trigger will cause an (early) wakeup on the - next call to os_sockWaitsetWait. Returns DDS_RETCODE_OK if - successfully triggered, DDS_RETCODE_BAD_PARAMETER if an error occurs. - - Triggering a waitset may require resources and they may be counted. - Do not trigger a waitset arbitrarily often without ensuring - os_sockWaitsetWait is called often enough to let it release any - resources used. - - Shared state updates preceding os_sockWaitsetTrigger are visible - following os_sockWaitsetWait. -*/ -void os_sockWaitsetTrigger (os_sockWaitset ws); - -/* - A connection may be associated with only one waitset at any time, and - may be added to the waitset only once. Failure to comply with this - restriction results in undefined behaviour. - - Closing a connection associated with a waitset is handled gracefully: no - operations will signal errors because of it. - - Returns < 0 on error, 0 if already present, 1 if added -*/ -int os_sockWaitsetAdd (os_sockWaitset ws, struct ddsi_tran_conn * conn); - -/* - Drops all connections from the waitset from index onwards. Index - 0 corresponds to the first connection added to the waitset, index 1 to - the second, etc. Behaviour is undefined when called after a successful wait - but before all events had been enumerated. -*/ -void os_sockWaitsetPurge (os_sockWaitset ws, unsigned index); - -/* - Waits until some of the connections in WS have data to be read. - - Returns a new wait set context if one or more connections have data to read. - However, the return may be spurious (NULL) (i.e., no events) - - If a context is returned it must be enumerated before os_sockWaitsetWait - may be called again. - - Shared state updates preceding os_sockWaitsetTrigger are visible - following os_sockWaitsetWait. -*/ -os_sockWaitsetCtx os_sockWaitsetWait (os_sockWaitset ws); - -/* - Returns the index of the next triggered connection in the - waitset contect ctx, or -1 if the set of available events has been - exhausted. Index 0 is the first connection added to the waitset, index - 1 the second, &c. - - Following a call to os_sockWaitsetWait on waitset that returned - a context, one MUST enumerate all available events before - os_sockWaitsetWait may be called again. - - If the return value is >= 0, *conn contains the connection on which - data is available. -*/ -int os_sockWaitsetNextEvent (os_sockWaitsetCtx ctx, struct ddsi_tran_conn ** conn); - -/* Remove connection */ -void os_sockWaitsetRemove (os_sockWaitset ws, struct ddsi_tran_conn * conn); - -#if defined (__cplusplus) -} -#endif -#endif /* Q_SOCKWAITSET_H */ diff --git a/src/core/ddsi/src/ddsi_raweth.c b/src/core/ddsi/src/ddsi_raweth.c index 03d5eaec98..893c501bc2 100644 --- a/src/core/ddsi/src/ddsi_raweth.c +++ b/src/core/ddsi/src/ddsi_raweth.c @@ -21,6 +21,8 @@ #include "dds/ddsrt/heap.h" #include "dds/ddsrt/log.h" #include "dds/ddsrt/sockets.h" +#include "dds/ddsi/q_entity.h" +#include "q_receive.h" #if defined(__linux) && !LWIP_SOCKET #include @@ -33,7 +35,7 @@ typedef struct ddsi_raweth_conn { struct ddsi_tran_conn m_base; - ddsrt_socket_t m_sock; + ddsrt_event_t m_event; int m_ifindex; } *ddsi_raweth_conn_t; @@ -72,7 +74,7 @@ static ssize_t ddsi_raweth_conn_read (ddsi_tran_conn_t conn, unsigned char * buf msghdr.msg_iovlen = 1; do { - rc = ddsrt_recvmsg(((ddsi_raweth_conn_t) conn)->m_sock, &msghdr, 0, &ret); + rc = ddsrt_recvmsg(((ddsi_raweth_conn_t) conn)->m_event.source.socket.socketfd, &msghdr, 0, &ret); } while (rc == DDS_RETCODE_INTERRUPTED); if (ret > 0) @@ -103,7 +105,7 @@ static ssize_t ddsi_raweth_conn_read (ddsi_tran_conn_t conn, unsigned char * buf rc != DDS_RETCODE_BAD_PARAMETER && rc != DDS_RETCODE_NO_CONNECTION) { - DDS_CERROR(&conn->m_base.gv->logconfig, "UDP recvmsg sock %d: ret %d retcode %d\n", (int) ((ddsi_raweth_conn_t) conn)->m_sock, (int) ret, rc); + DDS_CERROR(&conn->m_base.gv->logconfig, "UDP recvmsg sock %d: ret %d retcode %d\n", (int) ((ddsi_raweth_conn_t) conn)->m_event.source.socket.socketfd, (int) ret, rc); } return ret; } @@ -134,7 +136,7 @@ static ssize_t ddsi_raweth_conn_write (ddsi_tran_conn_t conn, const ddsi_locator sendflags |= MSG_NOSIGNAL; #endif do { - rc = ddsrt_sendmsg (uc->m_sock, &msg, sendflags, &ret); + rc = ddsrt_sendmsg (uc->m_event.source.socket.socketfd, &msg, sendflags, &ret); } while ((rc == DDS_RETCODE_INTERRUPTED) || (rc == DDS_RETCODE_TRY_AGAIN) || (rc == DDS_RETCODE_NOT_ALLOWED && retry-- > 0)); @@ -148,9 +150,36 @@ static ssize_t ddsi_raweth_conn_write (ddsi_tran_conn_t conn, const ddsi_locator return (rc == DDS_RETCODE_OK ? ret : -1); } +static dds_return_t ddsi_raweth_read_callback(ddsrt_event_t *event, uint32_t flags, const void *data, void *user_data) +{ + struct ddsi_raweth_conn *conn = (ddsi_raweth_conn_t)((uintptr_t)event - offsetof(struct ddsi_raweth_conn, m_event)); + struct recv_thread *recv = user_data; + + if (!(flags & DDSRT_READ)) + return DDS_RETCODE_OK; + + (void) data; + assert (conn); + assert (recv); + + const ddsi_guid_prefix_t *guid_prefix; + if (event->user_data) + guid_prefix = (ddsi_guid_prefix_t *)&((struct participant *)user_data)->e.guid.prefix; + else + guid_prefix = NULL; + + do_packet (recv->ts, recv->arg.gv, (ddsi_tran_conn_t)conn, guid_prefix, recv->arg.rbpool); + return DDS_RETCODE_OK; +} + static ddsrt_socket_t ddsi_raweth_conn_handle (ddsi_tran_base_t base) { - return ((ddsi_raweth_conn_t) base)->m_sock; + return ((ddsi_raweth_conn_t) base)->m_event.source.socket.socketfd; +} + +static ddsrt_event_t *ddsi_raweth_conn_event (ddsi_tran_base_t base) +{ + return &((ddsi_raweth_conn_t) base)->m_event; } static bool ddsi_raweth_supports (const struct ddsi_tran_factory *fact, int32_t kind) @@ -164,7 +193,7 @@ static int ddsi_raweth_conn_locator (ddsi_tran_factory_t fact, ddsi_tran_base_t ddsi_raweth_conn_t uc = (ddsi_raweth_conn_t) base; int ret = -1; (void) fact; - if (uc->m_sock != DDSRT_INVALID_SOCKET) + if (uc->m_event.source.socket.socketfd != DDSRT_INVALID_SOCKET) { loc->kind = NN_LOCATOR_KIND_RAWETH; loc->port = uc->m_base.m_base.m_port; @@ -219,19 +248,24 @@ static dds_return_t ddsi_raweth_create_conn (ddsi_tran_conn_t *conn_out, ddsi_tr } memset (uc, 0, sizeof (*uc)); - uc->m_sock = sock; + uc->m_event.flags = DDSRT_READ; + uc->m_event.loop = NULL; + uc->m_event.callback = ddsi_raweth_read_callback; + uc->m_event.user_data = NULL; + uc->m_event.source.socket.socketfd = sock; uc->m_ifindex = addr.sll_ifindex; ddsi_factory_conn_init (fact, intf, &uc->m_base); uc->m_base.m_base.m_port = port; uc->m_base.m_base.m_trantype = DDSI_TRAN_CONN; uc->m_base.m_base.m_multicast = mcast; uc->m_base.m_base.m_handle_fn = ddsi_raweth_conn_handle; + uc->m_base.m_base.m_event_fn = ddsi_raweth_conn_event; uc->m_base.m_locator_fn = ddsi_raweth_conn_locator; uc->m_base.m_read_fn = ddsi_raweth_conn_read; uc->m_base.m_write_fn = ddsi_raweth_conn_write; uc->m_base.m_disable_multiplexing_fn = 0; - DDS_CTRACE (&fact->gv->logconfig, "ddsi_raweth_create_conn %s socket %d port %u\n", mcast ? "multicast" : "unicast", uc->m_sock, uc->m_base.m_base.m_port); + DDS_CTRACE (&fact->gv->logconfig, "ddsi_raweth_create_conn %s socket %d port %u\n", mcast ? "multicast" : "unicast", uc->m_event.source.socket.socketfd, uc->m_base.m_base.m_port); *conn_out = &uc->m_base; return DDS_RETCODE_OK; } @@ -265,7 +299,7 @@ static int ddsi_raweth_join_mc (ddsi_tran_conn_t conn, const ddsi_locator_t *src { ddsi_raweth_conn_t uc = (ddsi_raweth_conn_t) conn; (void)srcloc; - return joinleave_asm_mcgroup(uc->m_sock, 1, mcloc, interf); + return joinleave_asm_mcgroup(uc->m_event.source.socket.socketfd, 1, mcloc, interf); } } @@ -277,7 +311,7 @@ static int ddsi_raweth_leave_mc (ddsi_tran_conn_t conn, const ddsi_locator_t *sr { ddsi_raweth_conn_t uc = (ddsi_raweth_conn_t) conn; (void)srcloc; - return joinleave_asm_mcgroup(uc->m_sock, 0, mcloc, interf); + return joinleave_asm_mcgroup(uc->m_event.source.socket.socketfd, 0, mcloc, interf); } } @@ -287,9 +321,9 @@ static void ddsi_raweth_release_conn (ddsi_tran_conn_t conn) DDS_CTRACE (&conn->m_base.gv->logconfig, "ddsi_raweth_release_conn %s socket %d port %d\n", conn->m_base.m_multicast ? "multicast" : "unicast", - uc->m_sock, + uc->m_event.source.socket.socketfd, uc->m_base.m_base.m_port); - ddsrt_close (uc->m_sock); + ddsrt_close (uc->m_event.source.socket.socketfd); ddsrt_free (conn); } diff --git a/src/core/ddsi/src/ddsi_tcp.c b/src/core/ddsi/src/ddsi_tcp.c index bd901c7915..e4cf5545e7 100644 --- a/src/core/ddsi/src/ddsi_tcp.c +++ b/src/core/ddsi/src/ddsi_tcp.c @@ -27,6 +27,7 @@ #include "dds/ddsi/q_entity.h" #include "dds/ddsi/ddsi_domaingv.h" #include "dds/ddsi/ddsi_ssl.h" +#include "q_receive.h" #define INVALID_PORT (~0u) @@ -53,8 +54,8 @@ typedef struct ddsi_tcp_conn { struct ddsi_tran_conn m_base; union addr m_peer_addr; uint32_t m_peer_port; - ddsrt_mutex_t m_mutex; - ddsrt_socket_t m_sock; + ddsrt_mutex_t m_mutex; // FIXME: is this really necessary? + ddsrt_event_t m_event; #ifdef DDS_HAS_SSL SSL * m_ssl; #endif @@ -62,7 +63,7 @@ typedef struct ddsi_tcp_conn { typedef struct ddsi_tcp_listener { struct ddsi_tran_listener m_base; - ddsrt_socket_t m_sock; + ddsrt_event_t m_event; #ifdef DDS_HAS_SSL BIO * m_bio; #endif @@ -143,6 +144,11 @@ static void ddsi_tcp_cache_dump (void) } */ +static inline ddsrt_socket_t ddsi_tcp_conn_socket (const ddsi_tcp_conn_t conn) +{ + return conn->m_event.source.socket.socketfd; +} + static uint16_t get_socket_port (struct ddsi_domaingv const * const gv, ddsrt_socket_t socket) { union addr addr; @@ -160,7 +166,8 @@ static uint16_t get_socket_port (struct ddsi_domaingv const * const gv, ddsrt_so static void ddsi_tcp_conn_set_socket (ddsi_tcp_conn_t conn, ddsrt_socket_t sock) { struct ddsi_domaingv const * const gv = conn->m_base.m_base.gv; - conn->m_sock = sock; + // other event properties are set later on + conn->m_event.source.socket.socketfd = sock; conn->m_base.m_base.m_port = (sock == DDSRT_INVALID_SOCKET) ? INVALID_PORT : get_socket_port (gv, sock); } @@ -207,6 +214,9 @@ static dds_return_t ddsi_tcp_sock_new (struct ddsi_tran_factory_tcp * const fact goto fail; } + /* Make socket non-blocking before so it can be polled */ + (void)ddsrt_setsocknonblocking (*sock, true); + /* If we're binding to a port number, allow others to bind to the same port */ if (port && (rc = ddsrt_setsockreuse (*sock, true)) != DDS_RETCODE_OK) { if (rc != DDS_RETCODE_UNSUPPORTED) { @@ -257,6 +267,33 @@ static void ddsi_tcp_node_free (void * ptr) ddsrt_free (node); } +static dds_return_t ddsi_tcp_read_callback (ddsrt_event_t *event, uint32_t flags, const void *data, void *user_data) +{ + struct ddsi_tcp_conn *conn = (struct ddsi_tcp_conn *)((uintptr_t)event - offsetof(struct ddsi_tcp_conn, m_event)); + struct recv_thread *recv = user_data; + + if (!(flags & DDSRT_READ)) + return DDS_RETCODE_OK; + + (void) data; + assert (conn && !conn->m_base.m_connless); + assert (recv); + + const ddsi_guid_prefix_t *guid_prefix; + if (event->user_data) + guid_prefix = (ddsi_guid_prefix_t *)&((struct participant *)event->user_data)->e.guid.prefix; + else + guid_prefix = NULL; + + if (do_packet (recv->ts, recv->arg.gv, (ddsi_tran_conn_t)conn, guid_prefix, recv->arg.rbpool)) + return DDS_RETCODE_OK; + assert(!conn->m_base.m_connless); + /* Do not cleanup connection to avoid possible race condition should write + operation try to remove it simultaneously. Timed event thread should + notice eventually and cleanup for us. */ + return DDS_RETCODE_OK; +} + static void ddsi_tcp_conn_connect (ddsi_tcp_conn_t conn, const ddsrt_msghdr_t * msg) { struct ddsi_tran_factory_tcp * const fact = (struct ddsi_tran_factory_tcp *) conn->m_base.m_factory; @@ -275,7 +312,7 @@ static void ddsi_tcp_conn_connect (ddsi_tcp_conn_t conn, const ddsrt_msghdr_t * do { ret = ddsrt_connect(sock, msg->msg_name, msg->msg_namelen); } while (ret == DDS_RETCODE_INTERRUPTED); - if (ret != DDS_RETCODE_OK) + if (ret != DDS_RETCODE_OK && ret != DDS_RETCODE_IN_PROGRESS) goto fail_w_socket; ddsi_tcp_conn_set_socket (conn, sock); @@ -285,7 +322,6 @@ static void ddsi_tcp_conn_connect (ddsi_tcp_conn_t conn, const ddsrt_msghdr_t * conn->m_ssl = (fact->ddsi_tcp_ssl_plugin.connect) (conn->m_base.m_base.gv, sock); if (conn->m_ssl == NULL) { - ddsi_tcp_conn_set_socket (conn, DDSRT_INVALID_SOCKET); goto fail_w_socket; } } @@ -296,15 +332,29 @@ static void ddsi_tcp_conn_connect (ddsi_tcp_conn_t conn, const ddsrt_msghdr_t * /* Also may need to receive on connection so add to waitset */ - (void)ddsrt_setsocknonblocking(conn->m_sock, true); + (void)ddsrt_setsocknonblocking(ddsi_tcp_conn_socket(conn), true); assert (conn->m_base.m_base.gv->n_recv_threads > 0); assert (conn->m_base.m_base.gv->recv_threads[0].arg.mode == RTM_MANY); - os_sockWaitsetAdd (conn->m_base.m_base.gv->recv_threads[0].arg.u.many.ws, &conn->m_base); - os_sockWaitsetTrigger (conn->m_base.m_base.gv->recv_threads[0].arg.u.many.ws); + + conn->m_event.flags = DDSRT_READ; + conn->m_event.loop = NULL; + conn->m_event.callback = &ddsi_tcp_read_callback; + conn->m_event.user_data = NULL; + ret = ddsrt_add_event (&conn->m_base.m_base.gv->recv_threads[0].arg.u.many.loop, &conn->m_event); + if (ret != DDS_RETCODE_OK) + { + goto fail_w_ssl; + } + ddsrt_trigger_loop (&conn->m_base.m_base.gv->recv_threads[0].arg.u.many.loop); return; +fail_w_ssl: +#if DDS_HAS_SSL + (fact->ddsi_tcp_ssl_plugin.ssl_free) (conn->m_ssl); +#endif fail_w_socket: + ddsi_tcp_conn_set_socket (conn, DDSRT_INVALID_SOCKET); ddsi_tcp_sock_free (gv, sock, NULL); } @@ -344,7 +394,7 @@ static void ddsi_tcp_cache_add (struct ddsi_tran_factory_tcp *fact, ddsi_tcp_con } sockaddr_to_string_with_port(buff, sizeof(buff), &conn->m_peer_addr.a); - GVLOG (DDS_LC_TCP, "tcp cache %s %s socket %"PRIdSOCK" to %s\n", action, conn->m_base.m_server ? "server" : "client", conn->m_sock, buff); + GVLOG (DDS_LC_TCP, "tcp cache %s %s socket %"PRIdSOCK" to %s\n", action, conn->m_base.m_server ? "server" : "client", ddsi_tcp_conn_socket(conn), buff); } static void ddsi_tcp_cache_remove (ddsi_tcp_conn_t conn) @@ -360,7 +410,7 @@ static void ddsi_tcp_cache_remove (ddsi_tcp_conn_t conn) if (node) { sockaddr_to_string_with_port(buff, sizeof(buff), &conn->m_peer_addr.a); - GVLOG (DDS_LC_TCP, "tcp cache removed socket %"PRIdSOCK" to %s\n", conn->m_sock, buff); + GVLOG (DDS_LC_TCP, "tcp cache removed socket %"PRIdSOCK" to %s\n", ddsi_tcp_conn_socket(conn), buff); ddsrt_avl_delete_dpath (&ddsi_tcp_treedef, &fact->ddsi_tcp_cache_g, node, &path); ddsi_tcp_node_free (node); } @@ -414,7 +464,7 @@ static ssize_t ddsi_tcp_conn_read_plain (ddsi_tcp_conn_t tcp, void * buf, size_t ssize_t rcvd = -1; assert(rc != NULL); - *rc = ddsrt_recv(tcp->m_sock, buf, len, 0, &rcvd); + *rc = ddsrt_recv(ddsi_tcp_conn_socket(tcp), buf, len, 0, &rcvd); return (*rc == DDS_RETCODE_OK ? rcvd : -1); } @@ -498,7 +548,7 @@ static ssize_t ddsi_tcp_conn_read (ddsi_tran_conn_t conn, unsigned char *buf, si } else if (n == 0) { - GVLOG (DDS_LC_TCP, "tcp read: sock %"PRIdSOCK" closed-by-peer\n", tcp->m_sock); + GVLOG (DDS_LC_TCP, "tcp read: sock %"PRIdSOCK" closed-by-peer\n", ddsi_tcp_conn_socket(tcp)); break; } else @@ -510,19 +560,22 @@ static ssize_t ddsi_tcp_conn_read (ddsi_tran_conn_t conn, unsigned char *buf, si if (allow_spurious && pos == 0) return 0; const int64_t timeout = gv->config.tcp_read_timeout; - if (ddsi_tcp_select (gv, tcp->m_sock, true, pos, timeout) == false) + if (ddsi_tcp_select (gv, ddsi_tcp_conn_socket(tcp), true, pos, timeout) == false) break; } else { - GVLOG (DDS_LC_TCP, "tcp read: sock %"PRIdSOCK" error %"PRId32"\n", tcp->m_sock, rc); + GVLOG (DDS_LC_TCP, "tcp read: sock %"PRIdSOCK" error %"PRId32"\n", ddsi_tcp_conn_socket(tcp), rc); break; } } } } +#if 0 + /* Remove connection from write path instead to avoid race conditions. */ ddsi_tcp_cache_remove (tcp); +#endif return -1; } @@ -534,7 +587,7 @@ static ssize_t ddsi_tcp_conn_write_plain (ddsi_tcp_conn_t conn, const void * buf #ifdef MSG_NOSIGNAL sendflags |= MSG_NOSIGNAL; #endif - *rc = ddsrt_send(conn->m_sock, buf, len, sendflags, &sent); + *rc = ddsrt_send(ddsi_tcp_conn_socket(conn), buf, len, sendflags, &sent); return (*rc == DDS_RETCODE_OK ? sent : -1); } @@ -570,14 +623,14 @@ static ssize_t ddsi_tcp_block_write (ssize_t (*wr) (ddsi_tcp_conn_t, const void if (rc == DDS_RETCODE_TRY_AGAIN) { const int64_t timeout = gv->config.tcp_write_timeout; - if (ddsi_tcp_select (gv, conn->m_sock, false, pos, timeout) == false) + if (ddsi_tcp_select (gv, ddsi_tcp_conn_socket(conn), false, pos, timeout) == false) { break; } } else { - GVLOG (DDS_LC_TCP, "tcp write: sock %"PRIdSOCK" error %"PRId32"\n", conn->m_sock, rc); + GVLOG (DDS_LC_TCP, "tcp write: sock %"PRIdSOCK" error %"PRId32"\n", ddsi_tcp_conn_socket(conn), rc); break; } } @@ -639,13 +692,13 @@ static ssize_t ddsi_tcp_conn_write (ddsi_tran_conn_t base, const ddsi_locator_t ddsrt_mutex_lock (&conn->m_mutex); - /* If not connected attempt to conect */ + /* If not connected attempt to connect */ - if (conn->m_sock == DDSRT_INVALID_SOCKET) + if (ddsi_tcp_conn_socket(conn) == DDSRT_INVALID_SOCKET) { assert (!conn->m_base.m_server); ddsi_tcp_conn_connect (conn, &msg); - if (conn->m_sock == DDSRT_INVALID_SOCKET) + if (ddsi_tcp_conn_socket(conn) == DDSRT_INVALID_SOCKET) { ddsrt_mutex_unlock (&conn->m_mutex); return -1; @@ -657,7 +710,7 @@ static ssize_t ddsi_tcp_conn_write (ddsi_tran_conn_t base, const ddsi_locator_t if (!connect && ((flags & DDSI_TRAN_ON_CONNECT) != 0)) { - GVLOG (DDS_LC_TCP, "tcp write: sock %"PRIdSOCK" message filtered\n", conn->m_sock); + GVLOG (DDS_LC_TCP, "tcp write: sock %"PRIdSOCK" message filtered\n", ddsi_tcp_conn_socket(conn)); ddsrt_mutex_unlock (&conn->m_mutex); return (ssize_t) len; } @@ -699,7 +752,7 @@ static ssize_t ddsi_tcp_conn_write (ddsi_tran_conn_t base, const ddsi_locator_t msg.msg_namelen = 0; do { - rc = ddsrt_sendmsg (conn->m_sock, &msg, sendflags, &ret); + rc = ddsrt_sendmsg (ddsi_tcp_conn_socket(conn), &msg, sendflags, &ret); } while (rc == DDS_RETCODE_INTERRUPTED); if (ret == -1) @@ -716,11 +769,11 @@ static ssize_t ddsi_tcp_conn_write (ddsi_tran_conn_t base, const ddsi_locator_t { case DDS_RETCODE_NO_CONNECTION: case DDS_RETCODE_ILLEGAL_OPERATION: - GVLOG (DDS_LC_TCP, "tcp write: sock %"PRIdSOCK" DDS_RETCODE_NO_CONNECTION\n", conn->m_sock); + GVLOG (DDS_LC_TCP, "tcp write: sock %"PRIdSOCK" DDS_RETCODE_NO_CONNECTION\n", ddsi_tcp_conn_socket(conn)); break; default: - if (! conn->m_base.m_closed && (conn->m_sock != DDSRT_INVALID_SOCKET)) - GVWARNING ("tcp write failed on socket %"PRIdSOCK" with errno %"PRId32"\n", conn->m_sock, rc); + if (! conn->m_base.m_closed && (ddsi_tcp_conn_socket(conn) != DDSRT_INVALID_SOCKET)) + GVWARNING ("tcp write failed on socket %"PRIdSOCK" with errno %"PRId32"\n", ddsi_tcp_conn_socket(conn), rc); break; } } @@ -729,7 +782,7 @@ static ssize_t ddsi_tcp_conn_write (ddsi_tran_conn_t base, const ddsi_locator_t { if (ret == 0) { - GVLOG (DDS_LC_TCP, "tcp write: sock %"PRIdSOCK" eof\n", conn->m_sock); + GVLOG (DDS_LC_TCP, "tcp write: sock %"PRIdSOCK" eof\n", ddsi_tcp_conn_socket(conn)); } piecewise = (ret > 0 && (size_t) ret < len); } @@ -781,7 +834,12 @@ static ssize_t ddsi_tcp_conn_write (ddsi_tran_conn_t base, const ddsi_locator_t static ddsrt_socket_t ddsi_tcp_conn_handle (ddsi_tran_base_t base) { - return ((ddsi_tcp_conn_t) base)->m_sock; + return ddsi_tcp_conn_socket((ddsi_tcp_conn_t)base); +} + +static ddsrt_event_t *ddsi_tcp_conn_event (ddsi_tran_base_t base) +{ + return &((ddsi_tcp_conn_t) base)->m_event; } ddsrt_attribute_no_sanitize (("thread")) @@ -815,12 +873,12 @@ static int ddsi_tcp_listen (ddsi_tran_listener_t listener) struct ddsi_tran_factory_tcp * const fact = (struct ddsi_tran_factory_tcp *) listener->m_factory; #endif ddsi_tcp_listener_t tl = (ddsi_tcp_listener_t) listener; - int ret = listen (tl->m_sock, 4); + int ret = listen (tl->m_event.source.socket.socketfd, 4); #ifdef DDS_HAS_SSL if ((ret == 0) && fact->ddsi_tcp_ssl_plugin.listen) { - tl->m_bio = (fact->ddsi_tcp_ssl_plugin.listen) (tl->m_sock); + tl->m_bio = (fact->ddsi_tcp_ssl_plugin.listen) (tl->m_event.source.socket.socketfd); } #endif @@ -856,7 +914,7 @@ static ddsi_tran_conn_t ddsi_tcp_accept (ddsi_tran_listener_t listener) else #endif { - rc = ddsrt_accept(tl->m_sock, NULL, NULL, &sock); + rc = ddsrt_accept(tl->m_event.source.socket.socketfd, NULL, NULL, &sock); } if (!ddsrt_atomic_ld32(&gv->rtps_keepgoing)) { @@ -867,25 +925,33 @@ static ddsi_tran_conn_t ddsi_tcp_accept (ddsi_tran_listener_t listener) if (sock == DDSRT_INVALID_SOCKET) { - (void)ddsrt_getsockname (tl->m_sock, &addr.a, &addrlen); + (void)ddsrt_getsockname (tl->m_event.source.socket.socketfd, &addr.a, &addrlen); sockaddr_to_string_with_port(buff, sizeof(buff), &addr.a); - GVLOG ((rc == DDS_RETCODE_OK) ? DDS_LC_ERROR : DDS_LC_FATAL, "tcp accept failed on socket %"PRIdSOCK" at %s retcode %"PRId32"\n", tl->m_sock, buff, rc); + GVLOG ((rc == DDS_RETCODE_OK) ? DDS_LC_ERROR : DDS_LC_FATAL, "tcp accept failed on socket %"PRIdSOCK" at %s retcode %"PRId32"\n", tl->m_event.source.socket.socketfd, buff, rc); } else if (getpeername (sock, &addr.a, &addrlen) == -1) { - GVWARNING ("tcp accepted new socket %"PRIdSOCK" on socket %"PRIdSOCK" but no peer address, errno %"PRId32"\n", sock, tl->m_sock, rc); + GVWARNING ("tcp accepted new socket %"PRIdSOCK" on socket %"PRIdSOCK" but no peer address, errno %"PRId32"\n", sock, tl->m_event.source.socket.socketfd, rc); ddsrt_close (sock); } else { sockaddr_to_string_with_port(buff, sizeof(buff), &addr.a); - GVLOG (DDS_LC_TCP, "tcp accept new socket %"PRIdSOCK" on socket %"PRIdSOCK" from %s\n", sock, tl->m_sock, buff); + GVLOG (DDS_LC_TCP, "tcp accept new socket %"PRIdSOCK" on socket %"PRIdSOCK" from %s\n", sock, tl->m_event.source.socket.socketfd, buff); (void)ddsrt_setsocknonblocking (sock, true); tcp = ddsi_tcp_new_conn (fact, NULL, sock, true, &addr.a); #ifdef DDS_HAS_SSL tcp->m_ssl = ssl; #endif + tcp->m_event.flags = DDSRT_READ; + tcp->m_event.loop = NULL; + tcp->m_event.callback = &ddsi_tcp_read_callback; + if (gv->config.many_sockets_mode != DDSI_MSM_MANY_UNICAST) + tcp->m_event.user_data = NULL; + else /* Propagate participant if connection belongs to an participant */ + tcp->m_event.user_data = tl->m_event.user_data; + tcp->m_base.m_listener = listener; tcp->m_base.m_conn = listener->m_connections; listener->m_connections = &tcp->m_base; @@ -895,13 +961,22 @@ static ddsi_tran_conn_t ddsi_tcp_accept (ddsi_tran_listener_t listener) ddsrt_mutex_lock (&fact->ddsi_tcp_cache_lock_g); ddsi_tcp_cache_add (fact, tcp, NULL); ddsrt_mutex_unlock (&fact->ddsi_tcp_cache_lock_g); + + /* Register connection with event loop */ + (void)ddsrt_add_event((ddsrt_loop_t *)&gv->recv_threads[0].arg.u.many.loop, &tcp->m_event); + /* No need to trigger event loop, connection is added automatically */ } return tcp ? &tcp->m_base : NULL; } static ddsrt_socket_t ddsi_tcp_listener_handle (ddsi_tran_base_t base) { - return ((ddsi_tcp_listener_t) base)->m_sock; + return ((ddsi_tcp_listener_t) base)->m_event.source.socket.socketfd; +} + +static ddsrt_event_t *ddsi_tcp_listener_event (ddsi_tran_base_t base) +{ + return &((ddsi_tcp_listener_t) base)->m_event; } /* @@ -920,7 +995,7 @@ static void ddsi_tcp_conn_peer_locator (ddsi_tran_conn_t conn, ddsi_locator_t * struct ddsi_domaingv const * const gv = conn->m_base.gv; char buff[DDSI_LOCSTRLEN]; ddsi_tcp_conn_t tc = (ddsi_tcp_conn_t) conn; - assert (tc->m_sock != DDSRT_INVALID_SOCKET); + assert (tc->m_event.source.socket.socketfd != DDSRT_INVALID_SOCKET); addr_to_loc (loc, &tc->m_peer_addr); ddsi_locator_to_string(buff, sizeof(buff), loc); GVLOG (DDS_LC_TCP, "(tcp EP:%s)", buff); @@ -931,6 +1006,7 @@ static void ddsi_tcp_base_init (const struct ddsi_tran_factory_tcp *fact, const ddsi_factory_conn_init (&fact->fact, interf, base); base->m_base.m_trantype = DDSI_TRAN_CONN; base->m_base.m_handle_fn = ddsi_tcp_conn_handle; + base->m_base.m_event_fn = ddsi_tcp_conn_event; base->m_read_fn = ddsi_tcp_conn_read; base->m_write_fn = ddsi_tcp_conn_write; base->m_peer_locator_fn = ddsi_tcp_conn_peer_locator; @@ -945,7 +1021,7 @@ static ddsi_tcp_conn_t ddsi_tcp_new_conn (struct ddsi_tran_factory_tcp *fact, co memset (conn, 0, sizeof (*conn)); ddsi_tcp_base_init (fact, interf, &conn->m_base); ddsrt_mutex_init (&conn->m_mutex); - conn->m_sock = DDSRT_INVALID_SOCKET; + conn->m_event.source.socket.socketfd = DDSRT_INVALID_SOCKET; (void)memcpy(&conn->m_peer_addr, peer, (size_t)ddsrt_sockaddr_get_size(peer)); conn->m_peer_port = ddsrt_sockaddr_get_port (peer); conn->m_base.m_server = server; @@ -955,6 +1031,23 @@ static ddsi_tcp_conn_t ddsi_tcp_new_conn (struct ddsi_tran_factory_tcp *fact, co return conn; } +static dds_return_t ddsi_tcp_accept_callback (ddsrt_event_t *event, uint32_t flags, const void *data, void *user_data) +{ + struct ddsi_tcp_listener *listener; + + (void) flags; + (void) data; + (void) user_data; + + assert (event); + listener = (struct ddsi_tcp_listener *)((uintptr_t)event - offsetof(struct ddsi_tcp_listener, m_event)); + assert (listener); + + /* Accept connection from listener */ + (void)ddsi_listener_accept ((ddsi_tran_listener_t)listener); + return DDS_RETCODE_OK; +} + static dds_return_t ddsi_tcp_create_listener (ddsi_tran_listener_t *listener_out, ddsi_tran_factory_t fact, uint32_t port, const struct ddsi_tran_qos *qos) { struct ddsi_tran_factory_tcp * const fact_tcp = (struct ddsi_tran_factory_tcp *) fact; @@ -981,7 +1074,11 @@ static dds_return_t ddsi_tcp_create_listener (ddsi_tran_listener_t *listener_out ddsi_tcp_listener_t tl = ddsrt_malloc (sizeof (*tl)); memset (tl, 0, sizeof (*tl)); - tl->m_sock = sock; + tl->m_event.flags = DDSRT_READ; + tl->m_event.loop = NULL; + tl->m_event.callback = &ddsi_tcp_accept_callback; + tl->m_event.user_data = NULL; + tl->m_event.source.socket.socketfd = sock; tl->m_base.m_base.gv = fact->gv; tl->m_base.m_listen_fn = ddsi_tcp_listen; @@ -991,6 +1088,7 @@ static dds_return_t ddsi_tcp_create_listener (ddsi_tran_listener_t *listener_out tl->m_base.m_base.m_port = get_socket_port (gv, sock); tl->m_base.m_base.m_trantype = DDSI_TRAN_LISTENER; tl->m_base.m_base.m_handle_fn = ddsi_tcp_listener_handle; + tl->m_base.m_base.m_event_fn = ddsi_tcp_listener_event; tl->m_base.m_locator_fn = ddsi_tcp_locator; *listener_out = &tl->m_base; return DDS_RETCODE_OK; @@ -1002,7 +1100,7 @@ static void ddsi_tcp_conn_delete (ddsi_tcp_conn_t conn) struct ddsi_domaingv const * const gv = fact->fact.gv; char buff[DDSI_LOCSTRLEN]; sockaddr_to_string_with_port(buff, sizeof(buff), &conn->m_peer_addr.a); - GVLOG (DDS_LC_TCP, "tcp free %s connection on socket %"PRIdSOCK" to %s\n", conn->m_base.m_server ? "server" : "client", conn->m_sock, buff); + GVLOG (DDS_LC_TCP, "tcp free %s connection on socket %"PRIdSOCK" to %s\n", conn->m_base.m_server ? "server" : "client", conn->m_event.source.socket.socketfd, buff); #ifdef DDS_HAS_SSL if (fact->ddsi_tcp_ssl_plugin.ssl_free) @@ -1012,7 +1110,7 @@ static void ddsi_tcp_conn_delete (ddsi_tcp_conn_t conn) else #endif { - ddsi_tcp_sock_free (gv, conn->m_sock, "connection"); + ddsi_tcp_sock_free (gv, conn->m_event.source.socket.socketfd, "connection"); } ddsrt_mutex_destroy (&conn->m_mutex); ddsrt_free (conn); @@ -1028,8 +1126,8 @@ static void ddsi_tcp_close_conn (ddsi_tran_conn_t tc) ddsi_xlocator_t loc; ddsi_tcp_conn_t conn = (ddsi_tcp_conn_t) tc; sockaddr_to_string_with_port(buff, sizeof(buff), &conn->m_peer_addr.a); - GVLOG (DDS_LC_TCP, "tcp close %s connection on socket %"PRIdSOCK" to %s\n", conn->m_base.m_server ? "server" : "client", conn->m_sock, buff); - (void) shutdown (conn->m_sock, 2); + GVLOG (DDS_LC_TCP, "tcp close %s connection on socket %"PRIdSOCK" to %s\n", conn->m_base.m_server ? "server" : "client", conn->m_event.source.socket.socketfd, buff); + (void) shutdown (conn->m_event.source.socket.socketfd, 2); ddsi_ipaddr_to_loc(&loc.c, &conn->m_peer_addr.a, addrfam_to_locator_kind(conn->m_peer_addr.a.sa_family)); loc.c.port = conn->m_peer_port; loc.conn = tc; @@ -1046,55 +1144,6 @@ static void ddsi_tcp_release_conn (ddsi_tran_conn_t conn) } } -static void ddsi_tcp_unblock_listener (ddsi_tran_listener_t listener) -{ - struct ddsi_tran_factory_tcp * const fact_tcp = (struct ddsi_tran_factory_tcp *) listener->m_factory; - struct ddsi_domaingv const * const gv = fact_tcp->fact.gv; - ddsi_tcp_listener_t tl = (ddsi_tcp_listener_t) listener; - ddsrt_socket_t sock; - dds_return_t ret; - - /* Connect to own listener socket to wake listener from blocking 'accept()' */ - if (ddsi_tcp_sock_new (fact_tcp, &sock, 0) != DDS_RETCODE_OK) - goto fail; - - union addr addr; - socklen_t addrlen = sizeof (addr); - if ((ret = ddsrt_getsockname (tl->m_sock, &addr.a, &addrlen)) != DDS_RETCODE_OK) - { - GVWARNING ("tcp failed to get listener address error %"PRId32"\n", ret); - goto fail_w_socket; - } - switch (addr.a.sa_family) - { - case AF_INET: - if (addr.a4.sin_addr.s_addr == htonl (INADDR_ANY)) - addr.a4.sin_addr.s_addr = htonl (INADDR_LOOPBACK); - break; -#if DDSRT_HAVE_IPV6 - case AF_INET6: - if (memcmp (&addr.a6.sin6_addr, &ddsrt_in6addr_any, sizeof (addr.a6.sin6_addr)) == 0) - addr.a6.sin6_addr = ddsrt_in6addr_loopback; - break; -#endif - } - - do { - ret = ddsrt_connect (sock, &addr.a, ddsrt_sockaddr_get_size (&addr.a)); - } while (ret == DDS_RETCODE_INTERRUPTED); - if (ret != DDS_RETCODE_OK) - { - char buff[DDSI_LOCSTRLEN]; - sockaddr_to_string_with_port (buff, sizeof (buff), &addr.a); - GVWARNING ("tcp failed to connect to own listener (%s) error %"PRId32"\n", buff, ret); - } - -fail_w_socket: - ddsi_tcp_sock_free (gv, sock, NULL); -fail: - return; -} - static void ddsi_tcp_release_listener (ddsi_tran_listener_t listener) { ddsi_tcp_listener_t tl = (ddsi_tcp_listener_t) listener; @@ -1106,7 +1155,7 @@ static void ddsi_tcp_release_listener (ddsi_tran_listener_t listener) (fact->ddsi_tcp_ssl_plugin.bio_vfree) (tl->m_bio); } #endif - ddsi_tcp_sock_free (gv, tl->m_sock, "listener"); + ddsi_tcp_sock_free (gv, tl->m_event.source.socket.socketfd, "listener"); ddsrt_free (tl); } @@ -1224,7 +1273,7 @@ int ddsi_tcp_init (struct ddsi_domaingv *gv) fact->fact.m_create_conn_fn = ddsi_tcp_create_conn; fact->fact.m_release_conn_fn = ddsi_tcp_release_conn; fact->fact.m_close_conn_fn = ddsi_tcp_close_conn; - fact->fact.m_unblock_listener_fn = ddsi_tcp_unblock_listener; + fact->fact.m_unblock_listener_fn = 0; fact->fact.m_release_listener_fn = ddsi_tcp_release_listener; fact->fact.m_free_fn = ddsi_tcp_release_factory; fact->fact.m_locator_from_string_fn = ddsi_tcp_address_from_string; diff --git a/src/core/ddsi/src/ddsi_tran.c b/src/core/ddsi/src/ddsi_tran.c index ff10f0c395..976096162a 100644 --- a/src/core/ddsi/src/ddsi_tran.c +++ b/src/core/ddsi/src/ddsi_tran.c @@ -30,12 +30,15 @@ DDS_EXPORT extern inline bool ddsi_factory_supports (const struct ddsi_tran_fact DDS_EXPORT extern inline int ddsi_is_valid_port (const struct ddsi_tran_factory *factory, uint32_t port); DDS_EXPORT extern inline uint32_t ddsi_receive_buffer_size (const struct ddsi_tran_factory *factory); DDS_EXPORT extern inline ddsrt_socket_t ddsi_conn_handle (ddsi_tran_conn_t conn); +DDS_EXPORT extern inline ddsrt_event_t *ddsi_conn_event (ddsi_tran_conn_t conn); DDS_EXPORT extern inline int ddsi_conn_locator (ddsi_tran_conn_t conn, ddsi_locator_t * loc); DDS_EXPORT extern inline ddsrt_socket_t ddsi_tran_handle (ddsi_tran_base_t base); +DDS_EXPORT extern inline ddsrt_event_t *ddsi_tran_event (ddsi_tran_base_t base); DDS_EXPORT extern inline dds_return_t ddsi_factory_create_conn (ddsi_tran_conn_t *conn, ddsi_tran_factory_t factory, uint32_t port, const struct ddsi_tran_qos *qos); DDS_EXPORT extern inline int ddsi_listener_locator (ddsi_tran_listener_t listener, ddsi_locator_t * loc); DDS_EXPORT extern inline int ddsi_listener_listen (ddsi_tran_listener_t listener); DDS_EXPORT extern inline ddsi_tran_conn_t ddsi_listener_accept (ddsi_tran_listener_t listener); +DDS_EXPORT extern inline ddsrt_event_t *ddsi_listener_event (ddsi_tran_listener_t listener); DDS_EXPORT extern inline ssize_t ddsi_conn_read (ddsi_tran_conn_t conn, unsigned char * buf, size_t len, bool allow_spurious, ddsi_locator_t *srcloc); DDS_EXPORT extern inline ssize_t ddsi_conn_write (ddsi_tran_conn_t conn, const ddsi_locator_t *dst, size_t niov, const ddsrt_iovec_t *iov, uint32_t flags); @@ -140,7 +143,7 @@ void ddsi_conn_free (ddsi_tran_conn_t conn) switch (conn->m_base.gv->recv_threads[i].arg.mode) { case RTM_MANY: - os_sockWaitsetRemove (conn->m_base.gv->recv_threads[i].arg.u.many.ws, conn); + ddsrt_delete_event (&conn->m_base.gv->recv_threads[i].arg.u.many.loop, ddsi_conn_event (conn)); break; case RTM_SINGLE: if (conn->m_base.gv->recv_threads[i].arg.u.single.conn == conn) @@ -213,7 +216,6 @@ void ddsi_tran_free (ddsi_tran_base_t base) } else { - ddsi_listener_unblock ((ddsi_tran_listener_t) base); ddsi_listener_free ((ddsi_tran_listener_t) base); } } diff --git a/src/core/ddsi/src/ddsi_udp.c b/src/core/ddsi/src/ddsi_udp.c index be94750b43..a2003dcf51 100644 --- a/src/core/ddsi/src/ddsi_udp.c +++ b/src/core/ddsi/src/ddsi_udp.c @@ -25,8 +25,10 @@ #include "dds/ddsi/ddsi_mcgroup.h" #include "dds/ddsi/q_config.h" #include "dds/ddsi/q_log.h" +#include "dds/ddsi/q_entity.h" #include "dds/ddsi/q_pcap.h" #include "dds/ddsi/ddsi_domaingv.h" +#include "q_receive.h" union addr { struct sockaddr_storage x; @@ -39,8 +41,8 @@ union addr { typedef struct ddsi_udp_conn { struct ddsi_tran_conn m_base; - ddsrt_socket_t m_sock; -#if defined _WIN32 + ddsrt_event_t m_event; +#if _WIN32 WSAEVENT m_sockEvent; #endif int m_diffserv; @@ -55,6 +57,11 @@ typedef struct ddsi_udp_tran_factory { ddsrt_atomic_uint32_t receive_buf_size; } *ddsi_udp_tran_factory_t; +static inline ddsrt_socket_t ddsi_udp_conn_socket(ddsi_udp_conn_t conn) +{ + return conn->m_event.source.socket.socketfd; +} + static void addr_to_loc (const struct ddsi_tran_factory *tran, ddsi_locator_t *dst, const union addr *src) { (void) tran; @@ -89,7 +96,7 @@ static ssize_t ddsi_udp_conn_read (ddsi_tran_conn_t conn_cmn, unsigned char * bu #endif do { - rc = ddsrt_recvmsg (conn->m_sock, &msghdr, 0, &ret); + rc = ddsrt_recvmsg (ddsi_udp_conn_socket(conn), &msghdr, 0, &ret); } while (rc == DDS_RETCODE_INTERRUPTED); if (ret > 0) @@ -101,7 +108,7 @@ static ssize_t ddsi_udp_conn_read (ddsi_tran_conn_t conn_cmn, unsigned char * bu { union addr dest; socklen_t dest_len = sizeof (dest); - if (ddsrt_getsockname (conn->m_sock, &dest.a, &dest_len) != DDS_RETCODE_OK) + if (ddsrt_getsockname (ddsi_udp_conn_socket(conn), &dest.a, &dest_len) != DDS_RETCODE_OK) memset (&dest, 0, sizeof (dest)); write_pcap_received (gv, ddsrt_time_wallclock (), &src.x, &dest.x, buf, (size_t) ret); } @@ -123,7 +130,7 @@ static ssize_t ddsi_udp_conn_read (ddsi_tran_conn_t conn_cmn, unsigned char * bu } else if (rc != DDS_RETCODE_BAD_PARAMETER && rc != DDS_RETCODE_NO_CONNECTION) { - GVERROR ("UDP recvmsg sock %d: ret %d retcode %"PRId32"\n", (int) conn->m_sock, (int) ret, rc); + GVERROR ("UDP recvmsg sock %d: ret %d retcode %"PRId32"\n", (int) ddsi_udp_conn_socket(conn), (int) ret, rc); ret = -1; } return ret; @@ -166,13 +173,13 @@ static ssize_t ddsi_udp_conn_write (ddsi_tran_conn_t conn_cmn, const ddsi_locato sendflags |= MSG_NOSIGNAL; #endif do { - rc = ddsrt_sendmsg (conn->m_sock, &msg, sendflags, &ret); + rc = ddsrt_sendmsg (ddsi_udp_conn_socket(conn), &msg, sendflags, &ret); #if defined _WIN32 && !defined WINCE if (rc == DDS_RETCODE_TRY_AGAIN) { WSANETWORKEVENTS ev; WaitForSingleObject (conn->m_sockEvent, INFINITE); - WSAEnumNetworkEvents (conn->m_sock, conn->m_sockEvent, &ev); + WSAEnumNetworkEvents (ddsi_udp_conn_socket(conn), conn->m_sockEvent, &ev); } #endif } while (rc == DDS_RETCODE_INTERRUPTED || rc == DDS_RETCODE_TRY_AGAIN || (rc == DDS_RETCODE_NOT_ALLOWED && retry-- > 0)); @@ -180,7 +187,7 @@ static ssize_t ddsi_udp_conn_write (ddsi_tran_conn_t conn_cmn, const ddsi_locato { union addr sa; socklen_t alen = sizeof (sa); - if (ddsrt_getsockname (conn->m_sock, &sa.a, &alen) != DDS_RETCODE_OK) + if (ddsrt_getsockname (ddsi_udp_conn_socket(conn), &sa.a, &alen) != DDS_RETCODE_OK) memset(&sa, 0, sizeof(sa)); write_pcap_sent (gv, ddsrt_time_wallclock (), &sa.x, &msg, (size_t) ret); } @@ -192,14 +199,36 @@ static ssize_t ddsi_udp_conn_write (ddsi_tran_conn_t conn_cmn, const ddsi_locato return (rc == DDS_RETCODE_OK) ? ret : -1; } +static dds_return_t ddsi_udp_read_callback (ddsrt_event_t *event, uint32_t flags, const void *data, void *user_data) +{ + struct ddsi_udp_conn *conn = (struct ddsi_udp_conn *)((uintptr_t)event - offsetof(struct ddsi_udp_conn, m_event)); + struct recv_thread *recv = user_data; + + if (!(flags & DDSRT_READ)) + return DDS_RETCODE_OK; + + (void) data; + assert (conn && conn->m_base.m_connless); + assert (recv); + + const ddsi_guid_prefix_t *guid_prefix; + if (event->user_data) + guid_prefix = (ddsi_guid_prefix_t *)&((struct participant *)event->user_data)->e.guid.prefix; + else + guid_prefix = NULL; + + do_packet (recv->ts, recv->arg.gv, (ddsi_tran_conn_t)conn, guid_prefix, recv->arg.rbpool); + return DDS_RETCODE_OK; +} + static void ddsi_udp_disable_multiplexing (ddsi_tran_conn_t conn_cmn) { #if defined _WIN32 && !defined WINCE ddsi_udp_conn_t conn = (ddsi_udp_conn_t) conn_cmn; uint32_t zero = 0; DWORD dummy; - WSAEventSelect (conn->m_sock, 0, 0); - WSAIoctl (conn->m_sock, FIONBIO, &zero,sizeof(zero), NULL,0, &dummy, NULL,NULL); + WSAEventSelect (ddsi_udp_conn_socket(conn), 0, 0); + WSAIoctl (ddsi_udp_conn_socket(conn), FIONBIO, &zero,sizeof(zero), NULL,0, &dummy, NULL,NULL); #else (void) conn_cmn; #endif @@ -208,7 +237,13 @@ static void ddsi_udp_disable_multiplexing (ddsi_tran_conn_t conn_cmn) static ddsrt_socket_t ddsi_udp_conn_handle (ddsi_tran_base_t conn_cmn) { ddsi_udp_conn_t conn = (ddsi_udp_conn_t) conn_cmn; - return conn->m_sock; + return ddsi_udp_conn_socket(conn); +} + +static ddsrt_event_t *ddsi_udp_conn_event (ddsi_tran_base_t conn_cmn) +{ + ddsi_udp_conn_t conn = (ddsi_udp_conn_t) conn_cmn; + return &conn->m_event; } static bool ddsi_udp_supports (const struct ddsi_tran_factory *fact_cmn, int32_t kind) @@ -222,7 +257,7 @@ static int ddsi_udp_conn_locator (ddsi_tran_factory_t fact_cmn, ddsi_tran_base_t struct ddsi_udp_tran_factory const * const fact = (const struct ddsi_udp_tran_factory *) fact_cmn; ddsi_udp_conn_t conn = (ddsi_udp_conn_t) conn_cmn; int ret = -1; - if (conn->m_sock != DDSRT_INVALID_SOCKET) + if (ddsi_udp_conn_socket(conn) != DDSRT_INVALID_SOCKET) { loc->kind = fact->m_kind; loc->port = conn->m_base.m_base.m_port; @@ -553,11 +588,15 @@ static dds_return_t ddsi_udp_create_conn (ddsi_tran_conn_t *conn_out, ddsi_tran_ ddsi_udp_conn_t conn = ddsrt_malloc (sizeof (*conn)); memset (conn, 0, sizeof (*conn)); - conn->m_sock = sock; + conn->m_event.flags = DDSRT_READ; + conn->m_event.loop = NULL; + conn->m_event.callback = ddsi_udp_read_callback; + conn->m_event.user_data = NULL; + conn->m_event.source.socket.socketfd = sock; conn->m_diffserv = qos->m_diffserv; #if defined _WIN32 && !defined WINCE conn->m_sockEvent = WSACreateEvent (); - WSAEventSelect (conn->m_sock, conn->m_sockEvent, FD_WRITE); + WSAEventSelect (conn->m_event.source.socket.socketfd, conn->m_sockEvent, FD_WRITE); #endif ddsi_factory_conn_init (&fact->fact, intf, &conn->m_base); @@ -565,13 +604,14 @@ static dds_return_t ddsi_udp_create_conn (ddsi_tran_conn_t *conn_out, ddsi_tran_ conn->m_base.m_base.m_trantype = DDSI_TRAN_CONN; conn->m_base.m_base.m_multicast = (qos->m_purpose == DDSI_TRAN_QOS_RECV_MC); conn->m_base.m_base.m_handle_fn = ddsi_udp_conn_handle; + conn->m_base.m_base.m_event_fn = ddsi_udp_conn_event; conn->m_base.m_read_fn = ddsi_udp_conn_read; conn->m_base.m_write_fn = ddsi_udp_conn_write; conn->m_base.m_disable_multiplexing_fn = ddsi_udp_disable_multiplexing; conn->m_base.m_locator_fn = ddsi_udp_conn_locator; - GVTRACE ("ddsi_udp_create_conn %s socket %"PRIdSOCK" port %"PRIu32"\n", purpose_str, conn->m_sock, conn->m_base.m_base.m_port); + GVTRACE ("ddsi_udp_create_conn %s socket %"PRIdSOCK" port %"PRIu32"\n", purpose_str, conn->m_event.source.socket.socketfd, conn->m_base.m_base.m_port); *conn_out = &conn->m_base; return DDS_RETCODE_OK; @@ -653,10 +693,10 @@ static int ddsi_udp_join_mc (ddsi_tran_conn_t conn_cmn, const ddsi_locator_t *sr (void) srcloc; #ifdef DDS_HAS_SSM if (srcloc) - return joinleave_ssm_mcgroup (conn->m_sock, 1, srcloc, mcloc, interf); + return joinleave_ssm_mcgroup (conn->m_event.source.socket.socketfd, 1, srcloc, mcloc, interf); else #endif - return joinleave_asm_mcgroup (conn->m_sock, 1, mcloc, interf); + return joinleave_asm_mcgroup (conn->m_event.source.socket.socketfd, 1, mcloc, interf); } static int ddsi_udp_leave_mc (ddsi_tran_conn_t conn_cmn, const ddsi_locator_t *srcloc, const ddsi_locator_t *mcloc, const struct nn_interface *interf) @@ -665,10 +705,10 @@ static int ddsi_udp_leave_mc (ddsi_tran_conn_t conn_cmn, const ddsi_locator_t *s (void) srcloc; #ifdef DDS_HAS_SSM if (srcloc) - return joinleave_ssm_mcgroup (conn->m_sock, 0, srcloc, mcloc, interf); + return joinleave_ssm_mcgroup (conn->m_event.source.socket.socketfd, 0, srcloc, mcloc, interf); else #endif - return joinleave_asm_mcgroup (conn->m_sock, 0, mcloc, interf); + return joinleave_asm_mcgroup (conn->m_event.source.socket.socketfd, 0, mcloc, interf); } static void ddsi_udp_release_conn (ddsi_tran_conn_t conn_cmn) @@ -677,8 +717,8 @@ static void ddsi_udp_release_conn (ddsi_tran_conn_t conn_cmn) struct ddsi_domaingv const * const gv = conn->m_base.m_base.gv; GVTRACE ("ddsi_udp_release_conn %s socket %"PRIdSOCK" port %"PRIu32"\n", conn_cmn->m_base.m_multicast ? "multicast" : "unicast", - conn->m_sock, conn->m_base.m_base.m_port); - ddsrt_close (conn->m_sock); + conn->m_event.source.socket.socketfd, conn->m_base.m_base.m_port); + ddsrt_close (conn->m_event.source.socket.socketfd); #if defined _WIN32 && !defined WINCE WSACloseEvent (conn->m_sockEvent); #endif diff --git a/src/core/ddsi/src/ddsi_vnet.c b/src/core/ddsi/src/ddsi_vnet.c index 84abbdf828..a27d03f122 100644 --- a/src/core/ddsi/src/ddsi_vnet.c +++ b/src/core/ddsi/src/ddsi_vnet.c @@ -57,6 +57,12 @@ static ddsrt_socket_t ddsi_vnet_conn_handle (ddsi_tran_base_t conn) return DDSRT_INVALID_SOCKET; } +static ddsrt_event_t *ddsi_vnet_conn_event (ddsi_tran_base_t conn) +{ + (void) conn; + return NULL; +} + static int ddsi_vnet_conn_locator (ddsi_tran_factory_t vfact, ddsi_tran_base_t base, ddsi_locator_t *loc) { (void) base; (void) loc; @@ -79,6 +85,7 @@ static dds_return_t ddsi_vnet_create_conn (ddsi_tran_conn_t *conn_out, ddsi_tran x->m_base.m_base.m_trantype = DDSI_TRAN_CONN; x->m_base.m_base.m_multicast = false; x->m_base.m_base.m_handle_fn = ddsi_vnet_conn_handle; + x->m_base.m_base.m_event_fn = ddsi_vnet_conn_event; x->m_base.m_locator_fn = ddsi_vnet_conn_locator; x->m_base.m_read_fn = 0; x->m_base.m_write_fn = 0; diff --git a/src/core/ddsi/src/q_entity.c b/src/core/ddsi/src/q_entity.c index 1b2f7c537d..9fc3296744 100644 --- a/src/core/ddsi/src/q_entity.c +++ b/src/core/ddsi/src/q_entity.c @@ -1014,6 +1014,7 @@ dds_return_t new_participant_guid (ddsi_guid_t *ppguid, struct ddsi_domaingv *gv struct whc_writer_info *wrinfo; dds_return_t ret = DDS_RETCODE_OK; ddsi_tran_conn_t ppconn; + ddsrt_event_t *ppevent; /* no reserved bits may be set */ assert ((flags & ~(RTPS_PF_NO_BUILTIN_READERS | RTPS_PF_NO_BUILTIN_WRITERS | RTPS_PF_PRIVILEGED_PP | RTPS_PF_IS_DDSI2_PP | RTPS_PF_ONLY_LOCAL)) == 0); @@ -1031,7 +1032,10 @@ dds_return_t new_participant_guid (ddsi_guid_t *ppguid, struct ddsi_domaingv *gv return DDS_RETCODE_PRECONDITION_NOT_MET; if (gv->config.many_sockets_mode != DDSI_MSM_MANY_UNICAST) + { ppconn = NULL; + ppevent = NULL; + } else { const ddsi_tran_qos_t qos = { .m_purpose = DDSI_TRAN_QOS_RECV_UC, .m_diffserv = 0, .m_interface = NULL }; @@ -1040,6 +1044,7 @@ dds_return_t new_participant_guid (ddsi_guid_t *ppguid, struct ddsi_domaingv *gv GVERROR ("new_participant("PGUIDFMT", %x) failed: could not create network endpoint\n", PGUID (*ppguid), flags); return DDS_RETCODE_OUT_OF_RESOURCES; } + ppevent = ddsi_tran_event ((ddsi_tran_base_t) ppconn); } if (gv->config.max_participants == 0) @@ -1097,7 +1102,7 @@ dds_return_t new_participant_guid (ddsi_guid_t *ppguid, struct ddsi_domaingv *gv ddsi_tkmap_instance_unref (gv->m_tkmap, pp->e.tk); pp->e.tk = builtintopic_get_tkmap_entry (gv->builtin_topic_interface, &pp->e.guid); pp->e.iid = pp->e.tk->m_iid; - } + } #else if (ddsi_xqos_has_prop_prefix (&pp->plist->qos, "dds.sec.")) { @@ -1118,6 +1123,11 @@ dds_return_t new_participant_guid (ddsi_guid_t *ppguid, struct ddsi_domaingv *gv pp->m_conn = ppconn; if (gv->config.many_sockets_mode == DDSI_MSM_MANY_UNICAST) ddsi_conn_locator (pp->m_conn, &pp->m_locator); + /* Event user_data field is currently used to reference the participant + if many sockets mode is enabled. The participant is required to + determine the guid_prefix. */ + if (ppconn && ppevent) + ppevent->user_data = ppconn; ddsrt_fibheap_init (&lease_fhdef_pp, &pp->leaseheap_man); ddsrt_atomic_stvoidp (&pp->minl_man, NULL); diff --git a/src/core/ddsi/src/q_init.c b/src/core/ddsi/src/q_init.c index ae54a4290a..035f09023d 100644 --- a/src/core/ddsi/src/q_init.c +++ b/src/core/ddsi/src/q_init.c @@ -973,6 +973,7 @@ static bool use_multiple_receive_threads (const struct ddsi_config *cfg) static int setup_and_start_recv_threads (struct ddsi_domaingv *gv) { const bool multi_recv_thr = use_multiple_receive_threads (&gv->config); + uint32_t initcnt = 0u; for (uint32_t i = 0; i < MAX_RECV_THREADS; i++) { @@ -1014,7 +1015,7 @@ static int setup_and_start_recv_threads (struct ddsi_domaingv *gv) assert (gv->n_recv_threads <= MAX_RECV_THREADS); /* For each thread, create rbufpool and waitset if needed, then start it */ - for (uint32_t i = 0; i < gv->n_recv_threads; i++) + for (uint32_t i = 0; i < gv->n_recv_threads; i++, initcnt++) { /* We create the rbufpool for the receive thread, and so we'll become the initial owner thread. The receive thread will change @@ -1024,15 +1025,13 @@ static int setup_and_start_recv_threads (struct ddsi_domaingv *gv) GVERROR ("rtps_init: can't allocate receive buffer pool for thread %s\n", gv->recv_threads[i].name); goto fail; } - if (gv->recv_threads[i].arg.mode == RTM_MANY) + struct recv_thread_arg *arg = &gv->recv_threads[i].arg; + if (arg->mode == RTM_MANY && ddsrt_create_loop(&arg->u.many.loop)) { - if ((gv->recv_threads[i].arg.u.many.ws = os_sockWaitsetNew ()) == NULL) - { - GVERROR ("rtps_init: can't allocate sock waitset for thread %s\n", gv->recv_threads[i].name); - goto fail; - } + GVERROR ("rtps_init: can't create event loop for thread %s\n", gv->recv_threads[i].name); + goto fail; } - if (create_thread (&gv->recv_threads[i].ts, gv, gv->recv_threads[i].name, recv_thread, &gv->recv_threads[i].arg) != DDS_RETCODE_OK) + if (create_thread (&gv->recv_threads[i].ts, gv, gv->recv_threads[i].name, recv_thread, arg) != DDS_RETCODE_OK) { GVERROR ("rtps_init: failed to start thread %s\n", gv->recv_threads[i].name); goto fail; @@ -1044,10 +1043,10 @@ static int setup_and_start_recv_threads (struct ddsi_domaingv *gv) /* to trigger any threads we already started to stop - xevent thread has already been started */ rtps_term_prep (gv); wait_for_receive_threads (gv); - for (uint32_t i = 0; i < gv->n_recv_threads; i++) + for (uint32_t i = 0; i < initcnt; i++) { - if (gv->recv_threads[i].arg.mode == RTM_MANY && gv->recv_threads[i].arg.u.many.ws) - os_sockWaitsetFree (gv->recv_threads[i].arg.u.many.ws); + if (gv->recv_threads[i].arg.mode == RTM_MANY) + ddsrt_destroy_loop (&gv->recv_threads[i].arg.u.many.loop); if (gv->recv_threads[i].arg.rbpool) nn_rbufpool_free (gv->recv_threads[i].arg.rbpool); } @@ -1705,7 +1704,6 @@ int rtps_init (struct ddsi_domaingv *gv) ddsi_listener_free(gv->listener); goto err_mc_conn; } - /* Set unicast locators from listener */ set_unspec_locator (&gv->loc_spdp_mc); set_unspec_locator (&gv->loc_meta_mc); @@ -2016,17 +2014,6 @@ int rtps_start (struct ddsi_domaingv *gv) xeventq_stop (gv->xevents); return -1; } - if (gv->listener) - { - if (create_thread (&gv->listen_ts, gv, "listen", (uint32_t (*) (void *)) listen_thread, gv->listener) != DDS_RETCODE_OK) - { - GVERROR ("failed to create TCP listener thread\n"); - ddsi_listener_free (gv->listener); - gv->listener = NULL; - rtps_stop (gv); - return -1; - } - } if (gv->config.monitor_port >= 0) { if ((gv->debmon = new_debug_monitor (gv, gv->config.monitor_port)) == NULL) @@ -2075,11 +2062,7 @@ void rtps_stop (struct ddsi_domaingv *gv) wait_for_receive_threads (gv); if (gv->listener) - { - ddsi_listener_unblock(gv->listener); - join_thread (gv->listen_ts); ddsi_listener_free(gv->listener); - } xeventq_stop (gv->xevents); #ifdef DDS_HAS_NETWORK_CHANNELS @@ -2281,7 +2264,7 @@ void rtps_fini (struct ddsi_domaingv *gv) for (uint32_t i = 0; i < gv->n_recv_threads; i++) { if (gv->recv_threads[i].arg.mode == RTM_MANY) - os_sockWaitsetFree (gv->recv_threads[i].arg.u.many.ws); + ddsrt_destroy_loop (&gv->recv_threads[i].arg.u.many.loop); nn_rbufpool_free (gv->recv_threads[i].arg.rbpool); } diff --git a/src/core/ddsi/src/q_receive.c b/src/core/ddsi/src/q_receive.c index 1ed32c03fb..0c6d9b5391 100644 --- a/src/core/ddsi/src/q_receive.c +++ b/src/core/ddsi/src/q_receive.c @@ -56,6 +56,7 @@ #include "dds/ddsi/ddsi_serdata_default.h" /* FIXME: get rid of this */ #include "dds/ddsi/ddsi_security_omg.h" #include "dds/ddsi/ddsi_acknack.h" +#include "q_receive.h" #include "dds/ddsi/sysdeps.h" #include "dds__whc.h" @@ -3193,7 +3194,7 @@ void ddsi_handle_rtps_message (struct thread_state1 * const ts1, struct ddsi_dom handle_rtps_message (ts1, gv, conn, guidprefix, rbpool, rmsg, sz, msg, srcloc); } -static bool do_packet (struct thread_state1 * const ts1, struct ddsi_domaingv *gv, ddsi_tran_conn_t conn, const ddsi_guid_prefix_t *guidprefix, struct nn_rbufpool *rbpool) +bool do_packet (struct thread_state1 * const ts1, struct ddsi_domaingv *gv, ddsi_tran_conn_t conn, const ddsi_guid_prefix_t *guidprefix, struct nn_rbufpool *rbpool) { /* UDP max packet size is 64kB */ @@ -3287,150 +3288,7 @@ static bool do_packet (struct thread_state1 * const ts1, struct ddsi_domaingv *g return (sz > 0); } -struct local_participant_desc -{ - ddsi_tran_conn_t m_conn; - ddsi_guid_prefix_t guid_prefix; -}; - -static int local_participant_cmp (const void *va, const void *vb) -{ - const struct local_participant_desc *a = va; - const struct local_participant_desc *b = vb; - ddsrt_socket_t h1 = ddsi_conn_handle (a->m_conn); - ddsrt_socket_t h2 = ddsi_conn_handle (b->m_conn); - return (h1 == h2) ? 0 : (h1 < h2) ? -1 : 1; -} - -static size_t dedup_sorted_array (void *base, size_t nel, size_t width, int (*compar) (const void *, const void *)) -{ - if (nel <= 1) - return nel; - else - { - char * const end = (char *) base + nel * width; - char *last_unique = base; - char *cursor = (char *) base + width; - size_t n_unique = 1; - while (cursor != end) - { - if (compar (cursor, last_unique) != 0) - { - n_unique++; - last_unique += width; - if (last_unique != cursor) - memcpy (last_unique, cursor, width); - } - cursor += width; - } - return n_unique; - } -} - -struct local_participant_set { - struct local_participant_desc *ps; - uint32_t nps; - uint32_t gen; -}; - -static void local_participant_set_init (struct local_participant_set *lps, ddsrt_atomic_uint32_t *ppset_generation) -{ - lps->ps = NULL; - lps->nps = 0; - lps->gen = ddsrt_atomic_ld32 (ppset_generation) - 1; -} - -static void local_participant_set_fini (struct local_participant_set *lps) -{ - ddsrt_free (lps->ps); -} - -static void rebuild_local_participant_set (struct thread_state1 * const ts1, struct ddsi_domaingv *gv, struct local_participant_set *lps) -{ - struct entidx_enum_participant est; - struct participant *pp; - unsigned nps_alloc; - GVTRACE ("pp set gen changed: local %"PRIu32" global %"PRIu32"\n", lps->gen, ddsrt_atomic_ld32 (&gv->participant_set_generation)); - thread_state_awake_fixed_domain (ts1); - restart: - lps->gen = ddsrt_atomic_ld32 (&gv->participant_set_generation); - /* Actual local set of participants may never be older than the - local generation count => membar to guarantee the ordering */ - ddsrt_atomic_fence_acq (); - nps_alloc = gv->nparticipants; - ddsrt_free (lps->ps); - lps->nps = 0; - lps->ps = (nps_alloc == 0) ? NULL : ddsrt_malloc (nps_alloc * sizeof (*lps->ps)); - entidx_enum_participant_init (&est, gv->entity_index); - while ((pp = entidx_enum_participant_next (&est)) != NULL) - { - if (lps->nps == nps_alloc) - { - /* New participants may get added while we do this (or - existing ones removed), so we may have to restart if it - turns out we didn't allocate enough memory [an - alternative would be to realloc on the fly]. */ - entidx_enum_participant_fini (&est); - GVTRACE (" need more memory - restarting\n"); - goto restart; - } - else - { - lps->ps[lps->nps].m_conn = pp->m_conn; - lps->ps[lps->nps].guid_prefix = pp->e.guid.prefix; - GVTRACE (" pp "PGUIDFMT" handle %"PRIdSOCK"\n", PGUID (pp->e.guid), ddsi_conn_handle (pp->m_conn)); - lps->nps++; - } - } - entidx_enum_participant_fini (&est); - - /* There is a (very small) probability of a participant - disappearing and new one appearing with the same socket while - we are enumerating, which would cause us to misinterpret the - participant guid prefix for a directed packet without an - explicit destination. Membar because we must have completed - the loop before testing the generation again. */ - ddsrt_atomic_fence_acq (); - if (lps->gen != ddsrt_atomic_ld32 (&gv->participant_set_generation)) - { - GVTRACE (" set changed - restarting\n"); - goto restart; - } - thread_state_asleep (ts1); - - /* The definition of the hash enumeration allows visiting one - participant multiple times, so guard against that, too. Note - that there's no requirement that the set be ordered on - socket: it is merely a convenient way of finding - duplicates. */ - if (lps->nps) - { - qsort (lps->ps, lps->nps, sizeof (*lps->ps), local_participant_cmp); - lps->nps = (unsigned) dedup_sorted_array (lps->ps, lps->nps, sizeof (*lps->ps), local_participant_cmp); - } - GVTRACE (" nparticipants %"PRIu32"\n", lps->nps); -} - -uint32_t listen_thread (struct ddsi_tran_listener *listener) -{ - struct ddsi_domaingv *gv = listener->m_base.gv; - ddsi_tran_conn_t conn; - - while (ddsrt_atomic_ld32 (&gv->rtps_keepgoing)) - { - /* Accept connection from listener */ - - conn = ddsi_listener_accept (listener); - if (conn) - { - os_sockWaitsetAdd (gv->recv_threads[0].arg.u.many.ws, conn); - os_sockWaitsetTrigger (gv->recv_threads[0].arg.u.many.ws); - } - } - return 0; -} - -static int recv_thread_waitset_add_conn (os_sockWaitset ws, ddsi_tran_conn_t conn) +static int recv_thread_waitset_add_conn (ddsrt_loop_t *loop, ddsi_tran_conn_t conn) { if (conn == NULL) return 0; @@ -3440,7 +3298,7 @@ static int recv_thread_waitset_add_conn (os_sockWaitset ws, ddsi_tran_conn_t con for (uint32_t i = 0; i < gv->n_recv_threads; i++) if (gv->recv_threads[i].arg.mode == RTM_SINGLE && gv->recv_threads[i].arg.u.single.conn == conn) return 0; - return os_sockWaitsetAdd (ws, conn); + return ddsrt_add_event (loop, ddsi_tran_event((ddsi_tran_base_t) conn)) == 0 ? 0 : -1; } } @@ -3465,8 +3323,8 @@ void trigger_recv_threads (const struct ddsi_domaingv *gv) break; } case RTM_MANY: { - GVTRACE ("trigger_recv_threads: %"PRIu32" many %p\n", i, (void *) gv->recv_threads[i].arg.u.many.ws); - os_sockWaitsetTrigger (gv->recv_threads[i].arg.u.many.ws); + GVTRACE ("trigger_recv_threads: %"PRIu32" many %p\n", i, (void *) &gv->recv_threads[i].arg.u.many.loop); + ddsrt_trigger_loop(&gv->recv_threads[i].arg.u.many.loop); break; } } @@ -3479,11 +3337,11 @@ uint32_t recv_thread (void *vrecv_thread_arg) struct recv_thread_arg *recv_thread_arg = vrecv_thread_arg; struct ddsi_domaingv * const gv = recv_thread_arg->gv; struct nn_rbufpool *rbpool = recv_thread_arg->rbpool; - os_sockWaitset waitset = recv_thread_arg->mode == RTM_MANY ? recv_thread_arg->u.many.ws : NULL; + ddsrt_loop_t *loop = recv_thread_arg->mode == RTM_MANY ? &recv_thread_arg->u.many.loop : NULL; ddsrt_mtime_t next_thread_cputime = { 0 }; nn_rbufpool_setowner (rbpool, ddsrt_thread_self ()); - if (waitset == NULL) + if (loop == NULL) { struct ddsi_tran_conn *conn = recv_thread_arg->u.single.conn; while (ddsrt_atomic_ld32 (&gv->rtps_keepgoing)) @@ -3494,25 +3352,22 @@ uint32_t recv_thread (void *vrecv_thread_arg) } else { - struct local_participant_set lps; unsigned num_fixed = 0, num_fixed_uc = 0; - os_sockWaitsetCtx ctx; - local_participant_set_init (&lps, &gv->participant_set_generation); if (gv->m_factory->m_connless) { int rc; - if ((rc = recv_thread_waitset_add_conn (waitset, gv->disc_conn_uc)) < 0) - DDS_FATAL("recv_thread: failed to add disc_conn_uc to waitset\n"); + if ((rc = recv_thread_waitset_add_conn (loop, gv->disc_conn_uc)) < 0) + DDS_FATAL("recv_thread: failed to add disc_conn_uc to event loop\n"); num_fixed_uc += (unsigned)rc; - if ((rc = recv_thread_waitset_add_conn (waitset, gv->data_conn_uc)) < 0) - DDS_FATAL("recv_thread: failed to add data_conn_uc to waitset\n"); + if ((rc = recv_thread_waitset_add_conn (loop, gv->data_conn_uc)) < 0) + DDS_FATAL("recv_thread: failed to add data_conn_uc to event loop\n"); num_fixed_uc += (unsigned)rc; num_fixed += num_fixed_uc; - if ((rc = recv_thread_waitset_add_conn (waitset, gv->disc_conn_mc)) < 0) - DDS_FATAL("recv_thread: failed to add disc_conn_mc to waitset\n"); + if ((rc = recv_thread_waitset_add_conn (loop, gv->disc_conn_mc)) < 0) + DDS_FATAL("recv_thread: failed to add disc_conn_mc to event loop\n"); num_fixed += (unsigned)rc; - if ((rc = recv_thread_waitset_add_conn (waitset, gv->data_conn_mc)) < 0) - DDS_FATAL("recv_thread: failed to add data_conn_mc to waitset\n"); + if ((rc = recv_thread_waitset_add_conn (loop, gv->data_conn_mc)) < 0) + DDS_FATAL("recv_thread: failed to add data_conn_mc to event loop\n"); num_fixed += (unsigned)rc; // OpenDDS doesn't respect the locator lists and insists on sending to the @@ -3523,56 +3378,27 @@ uint32_t recv_thread (void *vrecv_thread_arg) // for input on if (ddsi_conn_handle (gv->xmit_conns[i]) == DDSRT_INVALID_SOCKET) continue; - if ((rc = recv_thread_waitset_add_conn (waitset, gv->xmit_conns[i])) < 0) - DDS_FATAL("recv_thread: failed to add transmit_conn[%d] to waitset\n", i); + if ((rc = recv_thread_waitset_add_conn (loop, gv->xmit_conns[i])) < 0) + DDS_FATAL("recv_thread: failed to add transmit_conn[%d] to event loop\n", i); num_fixed += (unsigned)rc; } } + else + { + assert (gv->listener); + dds_return_t rc; + ddsrt_event_t *event = ddsi_listener_event (gv->listener); + if ((rc = ddsrt_add_event(loop, event)) < 0) + DDS_FATAL("recv_thread: failed to add listener to event loop\n"); + } while (ddsrt_atomic_ld32 (&gv->rtps_keepgoing)) { - int rebuildws = 0; LOG_THREAD_CPUTIME (&gv->logconfig, next_thread_cputime); - if (gv->config.many_sockets_mode != DDSI_MSM_MANY_UNICAST) - { - /* no other sockets to check */ - } - else if (ddsrt_atomic_ld32 (&gv->participant_set_generation) != lps.gen) - { - rebuildws = 1; - } - - if (rebuildws && waitset && gv->config.many_sockets_mode == DDSI_MSM_MANY_UNICAST) - { - /* first rebuild local participant set - unless someone's toggling "deafness", this - only happens when the participant set has changed, so might as well rebuild it */ - rebuild_local_participant_set (ts1, gv, &lps); - os_sockWaitsetPurge (waitset, num_fixed); - for (uint32_t i = 0; i < lps.nps; i++) - { - if (lps.ps[i].m_conn) - os_sockWaitsetAdd (waitset, lps.ps[i].m_conn); - } - } - - if ((ctx = os_sockWaitsetWait (waitset)) != NULL) - { - int idx; - ddsi_tran_conn_t conn; - while ((idx = os_sockWaitsetNextEvent (ctx, &conn)) >= 0) - { - const ddsi_guid_prefix_t *guid_prefix; - if (((unsigned)idx < num_fixed) || gv->config.many_sockets_mode != DDSI_MSM_MANY_UNICAST) - guid_prefix = NULL; - else - guid_prefix = &lps.ps[(unsigned)idx - num_fixed].guid_prefix; - /* Process message and clean out connection if failed or closed */ - if (!do_packet (ts1, gv, conn, guid_prefix, rbpool) && !conn->m_connless) - ddsi_conn_free (conn); - } - } + struct recv_thread *recv_thread = + (struct recv_thread *)((uintptr_t)recv_thread_arg - offsetof(struct recv_thread, arg)); + (void) ddsrt_run_loop(loop, DDSRT_RUN_ONCE, recv_thread); } - local_participant_set_fini (&lps); } GVTRACE ("done\n"); diff --git a/src/core/ddsi/src/q_receive.h b/src/core/ddsi/src/q_receive.h new file mode 100644 index 0000000000..85bca0ca66 --- /dev/null +++ b/src/core/ddsi/src/q_receive.h @@ -0,0 +1,6 @@ +#ifndef Q_DO_PACKET_H +#define Q_DO_PACKET_H + +bool do_packet (struct thread_state1 * const ts1, struct ddsi_domaingv *gv, ddsi_tran_conn_t conn, const ddsi_guid_prefix_t *guidprefix, struct nn_rbufpool *rbpool); + +#endif diff --git a/src/core/ddsi/src/q_sockwaitset.c b/src/core/ddsi/src/q_sockwaitset.c deleted file mode 100644 index 9f4d581af1..0000000000 --- a/src/core/ddsi/src/q_sockwaitset.c +++ /dev/null @@ -1,934 +0,0 @@ -/* - * Copyright(c) 2006 to 2018 ADLINK Technology Limited and others - * - * This program and the accompanying materials are made available under the - * terms of the Eclipse Public License v. 2.0 which is available at - * http://www.eclipse.org/legal/epl-2.0, or the Eclipse Distribution License - * v. 1.0 which is available at - * http://www.eclipse.org/org/documents/edl-v10.php. - * - * SPDX-License-Identifier: EPL-2.0 OR BSD-3-Clause - */ -#include -#include -#include - -#include "dds/ddsrt/heap.h" -#include "dds/ddsrt/sockets.h" -#include "dds/ddsrt/sync.h" - -#include "dds/ddsi/q_sockwaitset.h" -#include "dds/ddsi/q_config.h" -#include "dds/ddsi/q_log.h" -#include "dds/ddsi/ddsi_tran.h" - -#define WAITSET_DELTA 8 - -#define MODE_KQUEUE 1 -#define MODE_SELECT 2 -#define MODE_WFMEVS 3 - -#if defined __APPLE__ -#define MODE_SEL MODE_KQUEUE -#elif defined WINCE -#define MODE_SEL MODE_WFMEVS -#else -#define MODE_SEL MODE_SELECT -#endif - -#if MODE_SEL == MODE_KQUEUE - -#include -#include -#include -#ifndef __QNXNTO__ -#include -#endif // __QNXNTO__ -#include -#include -#include - -struct os_sockWaitsetCtx -{ - struct kevent *evs; - uint32_t nevs; - uint32_t evs_sz; - uint32_t index; /* cursor for enumerating */ -}; - -struct entry { - uint32_t index; - int fd; - ddsi_tran_conn_t conn; -}; - -struct os_sockWaitset -{ - int kqueue; - int pipe[2]; /* pipe used for triggering */ - ddsrt_atomic_uint32_t sz; - struct entry *entries; - struct os_sockWaitsetCtx ctx; /* set of descriptors being handled */ - ddsrt_mutex_t lock; /* for add/delete */ -}; - -static int add_entry_locked (os_sockWaitset ws, ddsi_tran_conn_t conn, int fd) -{ - uint32_t idx, fidx, sz, n; - struct kevent kev; - assert (fd >= 0); - sz = ddsrt_atomic_ld32 (&ws->sz); - for (idx = 0, fidx = UINT32_MAX, n = 0; idx < sz; idx++) - { - if (ws->entries[idx].fd == -1) - fidx = (idx < fidx) ? idx : fidx; - else if (ws->entries[idx].conn == conn) - return 0; - else - n++; - } - - if (fidx == UINT32_MAX) - { - const uint32_t newsz = ddsrt_atomic_add32_nv (&ws->sz, WAITSET_DELTA); - ws->entries = ddsrt_realloc (ws->entries, newsz * sizeof (*ws->entries)); - for (idx = sz; idx < newsz; idx++) - ws->entries[idx].fd = -1; - fidx = sz; - } - EV_SET (&kev, (unsigned)fd, EVFILT_READ, EV_ADD, 0, 0, &ws->entries[fidx]); - if (kevent(ws->kqueue, &kev, 1, NULL, 0, NULL) == -1) - return -1; - ws->entries[fidx].conn = conn; - ws->entries[fidx].fd = fd; - ws->entries[fidx].index = n; - return 1; -} - -os_sockWaitset os_sockWaitsetNew (void) -{ - const uint32_t sz = WAITSET_DELTA; - os_sockWaitset ws; - uint32_t i; - if ((ws = ddsrt_malloc (sizeof (*ws))) == NULL) - goto fail_waitset; - ddsrt_atomic_st32 (&ws->sz, sz); - if ((ws->entries = ddsrt_malloc (sz * sizeof (*ws->entries))) == NULL) - goto fail_entries; - for (i = 0; i < sz; i++) - ws->entries[i].fd = -1; - ws->ctx.nevs = 0; - ws->ctx.index = 0; - ws->ctx.evs_sz = sz; - if ((ws->ctx.evs = ddsrt_malloc (ws->ctx.evs_sz * sizeof (*ws->ctx.evs))) == NULL) - goto fail_ctx_evs; - if ((ws->kqueue = kqueue ()) == -1) - goto fail_kqueue; - if (pipe (ws->pipe) == -1) - goto fail_pipe; - if (add_entry_locked (ws, NULL, ws->pipe[0]) < 0) - goto fail_add_trigger; - assert (ws->entries[0].fd == ws->pipe[0]); - if (fcntl (ws->kqueue, F_SETFD, fcntl (ws->kqueue, F_GETFD) | FD_CLOEXEC) == -1) - goto fail_fcntl; - if (fcntl (ws->pipe[0], F_SETFD, fcntl (ws->pipe[0], F_GETFD) | FD_CLOEXEC) == -1) - goto fail_fcntl; - if (fcntl (ws->pipe[1], F_SETFD, fcntl (ws->pipe[1], F_GETFD) | FD_CLOEXEC) == -1) - goto fail_fcntl; - ddsrt_mutex_init (&ws->lock); - return ws; - -fail_fcntl: -fail_add_trigger: - close (ws->pipe[0]); - close (ws->pipe[1]); -fail_pipe: - close (ws->kqueue); -fail_kqueue: - ddsrt_free (ws->ctx.evs); -fail_ctx_evs: - ddsrt_free (ws->entries); -fail_entries: - ddsrt_free (ws); -fail_waitset: - return NULL; -} - -void os_sockWaitsetFree (os_sockWaitset ws) -{ - ddsrt_mutex_destroy (&ws->lock); - close (ws->pipe[0]); - close (ws->pipe[1]); - close (ws->kqueue); - ddsrt_free (ws->entries); - ddsrt_free (ws->ctx.evs); - ddsrt_free (ws); -} - -void os_sockWaitsetTrigger (os_sockWaitset ws) -{ - char buf = 0; - int n; - n = (int)write (ws->pipe[1], &buf, 1); - if (n != 1) - { - DDS_WARNING("os_sockWaitsetTrigger: read failed on trigger pipe, errno = %d\n", errno); - } -} - -int os_sockWaitsetAdd (os_sockWaitset ws, ddsi_tran_conn_t conn) -{ - int ret; - ddsrt_mutex_lock (&ws->lock); - ret = add_entry_locked (ws, conn, ddsi_conn_handle (conn)); - ddsrt_mutex_unlock (&ws->lock); - return ret; -} - -void os_sockWaitsetPurge (os_sockWaitset ws, unsigned index) -{ - /* Sockets may have been closed by the Purge is called, but any closed sockets - are automatically deleted from the kqueue and the file descriptors be reused - in the meantime. It therefore seems wiser replace the kqueue then to delete - entries */ - uint32_t i, sz; - struct kevent kev; - ddsrt_mutex_lock (&ws->lock); - sz = ddsrt_atomic_ld32 (&ws->sz); - close (ws->kqueue); - if ((ws->kqueue = kqueue()) == -1) - abort (); /* FIXME */ - for (i = 0; i <= index; i++) - { - assert (ws->entries[i].fd >= 0); - EV_SET(&kev, (unsigned)ws->entries[i].fd, EVFILT_READ, EV_ADD, 0, 0, &ws->entries[i]); - if (kevent(ws->kqueue, &kev, 1, NULL, 0, NULL) == -1) - abort (); /* FIXME */ - } - for (; i < sz; i++) - { - ws->entries[i].conn = NULL; - ws->entries[i].fd = -1; - } - ddsrt_mutex_unlock (&ws->lock); -} - -void os_sockWaitsetRemove (os_sockWaitset ws, ddsi_tran_conn_t conn) -{ - const int fd = ddsi_conn_handle (conn); - uint32_t i, sz; - assert (fd >= 0); - ddsrt_mutex_lock (&ws->lock); - sz = ddsrt_atomic_ld32 (&ws->sz); - for (i = 1; i < sz; i++) - if (ws->entries[i].fd == fd) - break; - if (i < sz) - { - struct kevent kev; - EV_SET(&kev, (unsigned)ws->entries[i].fd, EVFILT_READ, EV_DELETE, 0, 0, 0); - if (kevent(ws->kqueue, &kev, 1, NULL, 0, NULL) == -1) - abort (); /* FIXME */ - ws->entries[i].fd = -1; - } - ddsrt_mutex_unlock (&ws->lock); -} - -os_sockWaitsetCtx os_sockWaitsetWait (os_sockWaitset ws) -{ - /* if the array of events is smaller than the number of file descriptors in the - kqueue, things will still work fine, as the kernel will just return what can - be stored, and the set will be grown on the next call */ - uint32_t ws_sz = ddsrt_atomic_ld32 (&ws->sz); - int nevs; - if (ws->ctx.evs_sz < ws_sz) - { - ws->ctx.evs_sz = ws_sz; - ws->ctx.evs = ddsrt_realloc (ws->ctx.evs, ws_sz * sizeof(*ws->ctx.evs)); - } - nevs = kevent (ws->kqueue, NULL, 0, ws->ctx.evs, (int)ws->ctx.evs_sz, NULL); - if (nevs < 0) - { - if (errno == EINTR) - nevs = 0; - else - { - DDS_WARNING("os_sockWaitsetWait: kevent failed, errno = %d\n", errno); - return NULL; - } - } - ws->ctx.nevs = (uint32_t)nevs; - ws->ctx.index = 0; - return &ws->ctx; -} - -int os_sockWaitsetNextEvent (os_sockWaitsetCtx ctx, ddsi_tran_conn_t *conn) -{ - while (ctx->index < ctx->nevs) - { - uint32_t idx = ctx->index++; - struct entry * const entry = ctx->evs[idx].udata; - if (entry->index > 0) - { - *conn = entry->conn; - return (int)(entry->index - 1); - } - else - { - /* trigger pipe, read & try again */ - char dummy; - read ((int)ctx->evs[idx].ident, &dummy, 1); - } - } - return -1; -} - -#elif MODE_SEL == MODE_WFMEVS - -struct os_sockWaitsetCtx -{ - ddsi_tran_conn_t conns[MAXIMUM_WAIT_OBJECTS]; /* connections and listeners */ - WSAEVENT events[MAXIMUM_WAIT_OBJECTS]; /* events associated with sockets */ - int index; /* last wakeup index, or -1 */ - unsigned n; /* sockets/events [0 .. n-1] are occupied */ -}; - -struct os_sockWaitset -{ - ddsrt_mutex_t mutex; /* concurrency guard */ - struct os_sockWaitsetCtx ctx; - struct os_sockWaitsetCtx ctx0; -}; - -os_sockWaitset os_sockWaitsetNew (void) -{ - os_sockWaitset ws = ddsrt_malloc (sizeof (*ws)); - ws->ctx.conns[0] = NULL; - ws->ctx.events[0] = WSACreateEvent (); - ws->ctx.n = 1; - ws->ctx.index = -1; - ddsrt_mutex_init (&ws->mutex); - return ws; -} - -void os_sockWaitsetFree (os_sockWaitset ws) -{ - for (unsigned i = 0; i < ws->ctx.n; i++) - { - WSACloseEvent (ws->ctx.events[i]); - } - ddsrt_mutex_destroy (&ws->mutex); - ddsrt_free (ws); -} - -void os_sockWaitsetPurge (os_sockWaitset ws, unsigned index) -{ - ddsrt_mutex_lock (&ws->mutex); - for (unsigned i = index + 1; i < ws->ctx.n; i++) - { - ws->ctx.conns[i] = NULL; - if (!WSACloseEvent (ws->ctx.events[i])) - { - DDS_WARNING("os_sockWaitsetPurge: WSACloseEvent (%x failed, error %d\n", (os_uint32) ws->ctx.events[i], os_getErrno ()); - } - } - ws->ctx.n = index + 1; - ddsrt_mutex_unlock (&ws->mutex); -} - -void os_sockWaitsetRemove (os_sockWaitset ws, ddsi_tran_conn_t conn) -{ - ddsrt_mutex_lock (&ws->mutex); - for (unsigned i = 0; i < ws->ctx.n; i++) - { - if (conn == ws->ctx.conns[i]) - { - WSACloseEvent (ws->ctx.events[i]); - ws->ctx.n--; - if (i != ws->ctx.n) - { - ws->ctx.events[i] = ws->ctx.events[ws->ctx.n]; - ws->ctx.conns[i] = ws->ctx.conns[ws->ctx.n]; - } - break; - } - } - ddsrt_mutex_unlock (&ws->mutex); -} - -void os_sockWaitsetTrigger (os_sockWaitset ws) -{ - if (! WSASetEvent (ws->ctx.events[0])) - { - DDS_WARNING("os_sockWaitsetTrigger: WSASetEvent(%x) failed, error %d\n", (os_uint32) ws->ctx.events[0], os_getErrno ()); - } -} - -int os_sockWaitsetAdd (os_sockWaitset ws, ddsi_tran_conn_t conn) -{ - WSAEVENT ev; - os_socket sock = ddsi_conn_handle (conn); - unsigned idx; - int ret; - - ddsrt_mutex_lock (&ws->mutex); - - for (idx = 0; idx < ws->ctx.n; idx++) - { - if (ws->ctx.conns[idx] == conn) - break; - } - if (idx < ws->ctx.n) - ret = 0; - else - { - assert (ws->n < MAXIMUM_WAIT_OBJECTS); - if ((ev = WSACreateEvent ()) == WSA_INVALID_EVENT) - ret = -1; - else - { - if (WSAEventSelect (sock, ev, FD_READ) == SOCKET_ERROR) - { - DDS_WARNING("os_sockWaitsetAdd: WSAEventSelect(%x,%x) failed, error %d\n", (os_uint32) sock, (os_uint32) ev, os_getErrno ()); - WSACloseEvent (ev); - ret = -1; - } - else - { - ws->ctx.conns[ws->ctx.n] = conn; - ws->ctx.events[ws->ctx.n] = ev; - ws->ctx.n++; - ret = 1; - } - } - } - - ddsrt_mutex_unlock (&ws->mutex); - return ret; -} - -os_sockWaitsetCtx os_sockWaitsetWait (os_sockWaitset ws) -{ - unsigned idx; - - assert (ws->index == -1); - - ddsrt_mutex_lock (&ws->mutex); - ws->ctx0 = ws->ctx; - ddsrt_mutex_unlock (&ws->mutex); - - if ((idx = WSAWaitForMultipleEvents (ws->ctx0.n, ws->ctx0.events, FALSE, WSA_INFINITE, FALSE)) == WSA_WAIT_FAILED) - { - DDS_WARNING("os_sockWaitsetWait: WSAWaitForMultipleEvents(%d,...,0,0,0) failed, error %d\n", ws->ctx0.n, os_getErrno ()); - return NULL; - } - -#ifndef WAIT_IO_COMPLETION /* curious omission in the WinCE headers */ -#define TEMP_DEF_WAIT_IO_COMPLETION -#define WAIT_IO_COMPLETION 0xc0L -#endif - if (idx >= WSA_WAIT_EVENT_0 && idx < WSA_WAIT_EVENT_0 + ws->ctx0.n) - { - ws->ctx0.index = idx - WSA_WAIT_EVENT_0; - if (ws->ctx0.index == 0) - { - /* pretend a spurious wakeup */ - WSAResetEvent (ws->ctx0.events[0]); - ws->ctx0.index = -1; - } - return &ws->ctx0; - } - - if (idx == WAIT_IO_COMPLETION) - { - /* Presumably can't happen with alertable = FALSE */ - DDS_WARNING("os_sockWaitsetWait: WSAWaitForMultipleEvents(%d,...,0,0,0) returned unexpected WAIT_IO_COMPLETION\n", ws->ctx0.n); - } - else - { - DDS_WARNING("os_sockWaitsetWait: WSAWaitForMultipleEvents(%d,...,0,0,0) returned unrecognised %d\n", ws->ctx0.n, idx); - } -#ifdef TEMP_DEF_WAIT_IO_COMPLETION -#undef WAIT_IO_COMPLETION -#undef TEMP_DEF_WAIT_IO_COMPLETION -#endif - return NULL; -} - -/* This implementation follows the pattern of simply looking at the - socket that triggered the wakeup; alternatively, one could scan the - entire set as we do for select(). If the likelihood of two sockets - having an event simultaneously is small, this is better, but if it - is large, the lower indices may get a disproportionally large - amount of attention. */ - -int os_sockWaitsetNextEvent (os_sockWaitsetCtx ctx, ddsi_tran_conn_t * conn) -{ - assert (-1 <= ctx->index && ctx->index < ctx->n); - assert (0 < ctx->n && ctx->n <= ctx->sz); - if (ctx->index == -1) - { - return -1; - } - else - { - WSANETWORKEVENTS nwev; - int idx = ctx->index; - os_socket handle; - - ctx->index = -1; - handle = ddsi_conn_handle (ctx->conns[idx]); - if (WSAEnumNetworkEvents (handle, ctx->events[idx], &nwev) == SOCKET_ERROR) - { - int err = os_getErrno (); - if (err != WSAENOTSOCK) - { - /* May have a wakeup and a close in parallel, so the handle - need not exist anymore. */ - DDS_ERROR("os_sockWaitsetNextEvent: WSAEnumNetworkEvents(%x,%x,...) failed, error %d", (os_uint32) handle, (os_uint32) ctx->events[idx], err); - } - return -1; - } - - *conn = ctx->conns[idx]; - return idx - 1; - } -} - -#elif MODE_SEL == MODE_SELECT - -#ifdef __VXWORKS__ -#include -#include -#include -#include -#define OSPL_PIPENAMESIZE 26 -#endif - -#if !_WIN32 && !LWIP_SOCKET - -#if ! __VXWORKS__&& !__QNXNTO__ -#include -#endif /* __VXWORKS__ __QNXNTO__ */ - -#ifndef _WRS_KERNEL -#include -#endif -#ifdef __sun -#include -#include -#include -#endif - -#endif /* !_WIN32 && !LWIP_SOCKET */ - -typedef struct os_sockWaitsetSet -{ - ddsi_tran_conn_t * conns; /* connections in set */ - ddsrt_socket_t * fds; /* file descriptors in set */ - unsigned sz; /* max number of fds in context */ - unsigned n; /* actual number of fds in context */ -} os_sockWaitsetSet; - -struct os_sockWaitsetCtx -{ - os_sockWaitsetSet set; /* set of connections and descriptors */ - unsigned index; /* cursor for enumerating */ - fd_set rdset; /* read file descriptors */ -}; - -struct os_sockWaitset -{ - ddsrt_socket_t pipe[2]; /* pipe used for triggering */ - ddsrt_mutex_t mutex; /* concurrency guard */ - int fdmax_plus_1; /* value for first parameter of select() */ - os_sockWaitsetSet set; /* set of descriptors handled next */ - struct os_sockWaitsetCtx ctx; /* set of descriptors being handled */ -}; - -#if defined (_WIN32) -static int make_pipe (ddsrt_socket_t fd[2]) -{ - struct sockaddr_in addr; - socklen_t asize = sizeof (addr); - ddsrt_socket_t listener = socket (AF_INET, SOCK_STREAM, 0); - ddsrt_socket_t s1 = socket (AF_INET, SOCK_STREAM, 0); - ddsrt_socket_t s2 = DDSRT_INVALID_SOCKET; - - addr.sin_family = AF_INET; - addr.sin_addr.s_addr = htonl (INADDR_LOOPBACK); - addr.sin_port = 0; - if (bind (listener, (struct sockaddr *)&addr, sizeof (addr)) == -1) - goto fail; - if (getsockname (listener, (struct sockaddr *)&addr, &asize) == -1) - goto fail; - if (listen (listener, 1) == -1) - goto fail; - if (connect (s1, (struct sockaddr *)&addr, sizeof (addr)) == -1) - goto fail; - if ((s2 = accept (listener, 0, 0)) == INVALID_SOCKET) - goto fail; - closesocket (listener); - /* Equivalent to FD_CLOEXEC */ - SetHandleInformation ((HANDLE) s1, HANDLE_FLAG_INHERIT, 0); - SetHandleInformation ((HANDLE) s2, HANDLE_FLAG_INHERIT, 0); - fd[0] = s1; - fd[1] = s2; - return 0; - -fail: - closesocket (listener); - closesocket (s1); - closesocket (s2); - return -1; -} -#elif defined(__VXWORKS__) -static int make_pipe (int pfd[2]) -{ - char pipename[OSPL_PIPENAMESIZE]; - int pipecount = 0; - do { - snprintf ((char*)&pipename, sizeof (pipename), "/pipe/ospl%d", pipecount++); - } while ((result = pipeDevCreate ((char*)&pipename, 1, 1)) == -1 && os_getErrno() == EINVAL); - if (result == -1) - goto fail_pipedev; - if ((pfd[0] = open ((char*)&pipename, O_RDWR, 0644)) == -1) - goto fail_open0; - if ((pfd[1] = open ((char*)&pipename, O_RDWR, 0644)) == -1) - goto fail_open1; - return 0; - -fail_open1: - close (pfd[0]); -fail_open0: - pipeDevDelete (pipename, 0); -fail_pipedev: - return -1; -} -#elif !defined(LWIP_SOCKET) -static int make_pipe (int pfd[2]) -{ - return pipe (pfd); -} -#endif - -static void os_sockWaitsetNewSet (os_sockWaitsetSet * set) -{ - set->fds = ddsrt_malloc (WAITSET_DELTA * sizeof (*set->fds)); - set->conns = ddsrt_malloc (WAITSET_DELTA * sizeof (*set->conns)); - set->sz = WAITSET_DELTA; - set->n = 1; -} - -static void os_sockWaitsetFreeSet (os_sockWaitsetSet * set) -{ - ddsrt_free (set->fds); - ddsrt_free (set->conns); -} - -static void os_sockWaitsetNewCtx (os_sockWaitsetCtx ctx) -{ - os_sockWaitsetNewSet (&ctx->set); - FD_ZERO (&ctx->rdset); -} - -static void os_sockWaitsetFreeCtx (os_sockWaitsetCtx ctx) -{ - os_sockWaitsetFreeSet (&ctx->set); -} - -os_sockWaitset os_sockWaitsetNew (void) -{ - int result; - os_sockWaitset ws = ddsrt_malloc (sizeof (*ws)); - - os_sockWaitsetNewSet (&ws->set); - os_sockWaitsetNewCtx (&ws->ctx); - -#if ! defined (_WIN32) - ws->fdmax_plus_1 = 0; -#else - ws->fdmax_plus_1 = FD_SETSIZE; -#endif - -#if defined(LWIP_SOCKET) - ws->pipe[0] = -1; - ws->pipe[1] = -1; - result = 0; -#else - result = make_pipe (ws->pipe); -#endif - if (result == -1) - { - os_sockWaitsetFreeCtx (&ws->ctx); - os_sockWaitsetFreeSet (&ws->set); - ddsrt_free (ws); - return NULL; - } - -#if !defined(LWIP_SOCKET) - ws->set.fds[0] = ws->pipe[0]; -#else - ws->set.fds[0] = 0; -#endif - ws->set.conns[0] = NULL; - -#if !defined(__VXWORKS__) && !defined(_WIN32) && !defined(LWIP_SOCKET) && !defined(__QNXNTO__) - (void) fcntl (ws->pipe[0], F_SETFD, fcntl (ws->pipe[0], F_GETFD) | FD_CLOEXEC); - (void) fcntl (ws->pipe[1], F_SETFD, fcntl (ws->pipe[1], F_GETFD) | FD_CLOEXEC); -#endif -#if !defined(LWIP_SOCKET) - FD_SET (ws->set.fds[0], &ws->ctx.rdset); -#endif -#if !defined(_WIN32) - ws->fdmax_plus_1 = ws->set.fds[0] + 1; -#endif - - ddsrt_mutex_init (&ws->mutex); - - return ws; -} - -static void os_sockWaitsetGrow (os_sockWaitsetSet * set) -{ - set->sz += WAITSET_DELTA; - set->conns = ddsrt_realloc (set->conns, set->sz * sizeof (*set->conns)); - set->fds = ddsrt_realloc (set->fds, set->sz * sizeof (*set->fds)); -} - -void os_sockWaitsetFree (os_sockWaitset ws) -{ -#if defined(__VXWORKS__) && defined(__RTP__) - char nameBuf[OSPL_PIPENAMESIZE]; - ioctl (ws->pipe[0], FIOGETNAME, &nameBuf); -#endif -#if defined(_WIN32) - closesocket (ws->pipe[0]); - closesocket (ws->pipe[1]); -#elif !defined(LWIP_SOCKET) - (void) close (ws->pipe[0]); - (void) close (ws->pipe[1]); -#endif -#if defined(__VXWORKS__) && defined(__RTP__) - pipeDevDelete ((char*) &nameBuf, 0); -#endif - os_sockWaitsetFreeSet (&ws->set); - os_sockWaitsetFreeCtx (&ws->ctx); - ddsrt_mutex_destroy (&ws->mutex); - ddsrt_free (ws); -} - -void os_sockWaitsetTrigger (os_sockWaitset ws) -{ -#if defined(LWIP_SOCKET) - (void)ws; -#else - char buf = 0; - int n; - -#if defined (_WIN32) - n = send (ws->pipe[1], &buf, 1, 0); -#else - n = (int) write (ws->pipe[1], &buf, 1); -#endif - if (n != 1) - { - DDS_WARNING("os_sockWaitsetTrigger: write failed on trigger pipe\n"); - } -#endif -} - -int os_sockWaitsetAdd (os_sockWaitset ws, ddsi_tran_conn_t conn) -{ - ddsrt_socket_t handle = ddsi_conn_handle (conn); - os_sockWaitsetSet * set = &ws->set; - unsigned idx; - int ret; - -#if ! defined (_WIN32) - assert (handle >= 0); - assert (handle < FD_SETSIZE); -#endif - - ddsrt_mutex_lock (&ws->mutex); - for (idx = 0; idx < set->n; idx++) - { - if (set->conns[idx] == conn) - break; - } - if (idx < set->n) - ret = 0; - else - { - if (set->n == set->sz) - os_sockWaitsetGrow (set); -#if ! defined (_WIN32) - if ((int) handle >= ws->fdmax_plus_1) - ws->fdmax_plus_1 = handle + 1; -#endif - set->conns[set->n] = conn; - set->fds[set->n] = handle; - set->n++; - ret = 1; - } - ddsrt_mutex_unlock (&ws->mutex); - return ret; -} - -void os_sockWaitsetPurge (os_sockWaitset ws, unsigned index) -{ - os_sockWaitsetSet * set = &ws->set; - - ddsrt_mutex_lock (&ws->mutex); - if (index + 1 <= set->n) - { - for (unsigned i = index + 1; i < set->n; i++) - { - set->conns[i] = NULL; - set->fds[i] = 0; - } - set->n = index + 1; - } - ddsrt_mutex_unlock (&ws->mutex); -} - -void os_sockWaitsetRemove (os_sockWaitset ws, ddsi_tran_conn_t conn) -{ - os_sockWaitsetSet * set = &ws->set; - - ddsrt_mutex_lock (&ws->mutex); - for (unsigned i = 0; i < set->n; i++) - { - if (conn == set->conns[i]) - { - set->n--; - if (i != set->n) - { - set->fds[i] = set->fds[set->n]; - set->conns[i] = set->conns[set->n]; - } - break; - } - } - ddsrt_mutex_unlock (&ws->mutex); -} - -os_sockWaitsetCtx os_sockWaitsetWait (os_sockWaitset ws) -{ - unsigned u; -#if !_WIN32 - int fdmax; -#endif - fd_set * rdset = NULL; - os_sockWaitsetCtx ctx = &ws->ctx; - os_sockWaitsetSet * dst = &ctx->set; - os_sockWaitsetSet * src = &ws->set; - - ddsrt_mutex_lock (&ws->mutex); - -#if !_WIN32 - fdmax = ws->fdmax_plus_1; -#endif - - /* Copy context to working context */ - - while (dst->sz < src->sz) - { - os_sockWaitsetGrow (dst); - } - dst->n = src->n; - - for (u = 0; u < src->sz; u++) - { - dst->conns[u] = src->conns[u]; - dst->fds[u] = src->fds[u]; - } - - ddsrt_mutex_unlock (&ws->mutex); - - /* Copy file descriptors into select read set */ - - rdset = &ctx->rdset; - FD_ZERO (rdset); -#if !defined(LWIP_SOCKET) - for (u = 0; u < dst->n; u++) - { - FD_SET (dst->fds[u], rdset); - } -#else - for (u = 1; u < dst->n; u++) - { - DDSRT_WARNING_GNUC_OFF(sign-conversion) - FD_SET (dst->fds[u], rdset); - DDSRT_WARNING_GNUC_ON(sign-conversion) - } -#endif /* LWIP_SOCKET */ - - dds_return_t rc; - do - { - rc = ddsrt_select (fdmax, rdset, NULL, NULL, DDS_INFINITY); - if (rc < 0 && rc != DDS_RETCODE_INTERRUPTED && rc != DDS_RETCODE_TRY_AGAIN) - { - DDS_WARNING("os_sockWaitsetWait: select failed, retcode = %"PRId32, rc); - break; - } - } while (rc < 0); - - if (rc > 0) - { - /* this simply skips the trigger fd */ - ctx->index = 1; -#if ! defined(LWIP_SOCKET) - if (FD_ISSET (dst->fds[0], rdset)) - { - char buf; - int n1; -#if defined (_WIN32) - n1 = recv (dst->fds[0], &buf, 1, 0); -#else - n1 = (int) read (dst->fds[0], &buf, 1); -#endif - if (n1 != 1) - { - DDS_WARNING("os_sockWaitsetWait: read failed on trigger pipe\n"); - assert (0); - } - } -#endif /* LWIP_SOCKET */ - return ctx; - } - - return NULL; -} - -#if defined(LWIP_SOCKET) -DDSRT_WARNING_GNUC_OFF(sign-conversion) -#endif - -int os_sockWaitsetNextEvent (os_sockWaitsetCtx ctx, ddsi_tran_conn_t * conn) -{ - while (ctx->index < ctx->set.n) - { - unsigned idx = ctx->index++; - ddsrt_socket_t fd = ctx->set.fds[idx]; -#if ! defined (LWIP_SOCKET) - assert(idx > 0); -#endif - if (FD_ISSET (fd, &ctx->rdset)) - { - *conn = ctx->set.conns[idx]; - - return (int) (idx - 1); - } - } - return -1; -} - -#if defined(LWIP_SOCKET) -DDSRT_WARNING_GNUC_ON(sign-conversion) -#endif - -#else -#error "no mode selected" -#endif diff --git a/src/ddsrt/CMakeLists.txt b/src/ddsrt/CMakeLists.txt index 461f7eb03f..196b6aab10 100644 --- a/src/ddsrt/CMakeLists.txt +++ b/src/ddsrt/CMakeLists.txt @@ -23,6 +23,24 @@ include(CheckSymbolExists) option(WITH_LWIP "Use lightweight IP stack" OFF) option(WITH_FREERTOS "Build for FreeRTOS" OFF) +if(WITH_LWIP) + set(event "select") +elseif(CMAKE_SYSTEM MATCHES "Linux" OR + CMAKE_SYSTEM MATCHES "Windows") + set(event "epoll") + set(DDSRT_HAVE_NETLINK_EVENT TRUE) +elseif(CMAKE_SYSTEM MATCHES "Darwin" OR + CMAKE_SYSTEM MATCHES "BSD") + set(event "kqueue") + set(DDSRT_HAVE_NETLINK_EVENT TRUE) +else() + set(event "select") +endif() + +set(WITH_EVENT "${event}" CACHE STRING "Use specified backend for event framework") +set_property(CACHE WITH_EVENT PROPERTY STRINGS "epoll" "kqueue" "select") +set(EMBEDDED_EVENTS "6" CACHE STRING "Minimum number of events to reserve space for") + set(source_dir "${CMAKE_CURRENT_SOURCE_DIR}") set(binary_dir "${CMAKE_CURRENT_BINARY_DIR}") @@ -159,6 +177,34 @@ if(SIZEOF_SOCKADDR_IN6) set(DDSRT_HAVE_IPV6 TRUE) endif() +# EVENT +set(CMAKE_REQUIRED_INCLUDES "${source_dir}/src/event/${WITH_EVENT}") +# The configuration header must contain the minimum number of events and the +# size alignment of the internal event buffer. A cyclic dependency is That introduces a cyclic +# dependency because the size can only be determined with the minimum number +# of events to reserve space for, define it manually to workaround it. +set(CMAKE_REQUIRED_DEFINITIONS "-DDDSRT_EMBEDDED_EVENTS=${EMBEDDED_EVENTS}") +set(CMAKE_EXTRA_INCLUDE_FILES "eventlist.h") +check_type_size("struct eventlist" DDSRT_SIZEOF_EVENTLIST) + +target_include_directories( + ddsrt INTERFACE + "$") + +list(APPEND headers + "${source_dir}/include/dds/ddsrt/event.h" + "${source_dir}/src/event.h" + "${source_dir}/src/event/${WITH_EVENT}/eventlist.h") +list(APPEND sources + "${source_dir}/src/event.c" + "${source_dir}/src/event/${WITH_EVENT}/event.c") +if(WIN32 AND WITH_EVENT STREQUAL "epoll") + list(APPEND headers + "${source_dir}/src/event/epoll/wepoll.h") + list(APPEND sources + "${source_dir}/src/event/epoll/wepoll.c") +endif() + if(WITH_FREERTOS) list(APPEND headers "${source_dir}/include/dds/ddsrt/sync/freertos.h" @@ -273,6 +319,8 @@ endif() # Generate configuration file set(DDSRT_WITH_LWIP ${WITH_LWIP}) set(DDSRT_WITH_FREERTOS ${WITH_FREERTOS}) +string(TOUPPER "${WITH_EVENT}" DDSRT_EVENT) +set(DDSRT_EMBEDDED_EVENTS ${EMBEDDED_EVENTS}) foreach(feature SSL SECURITY LIFESPAN DEADLINE_MISSED NETWORK_PARTITIONS SSM TYPE_DISCOVERY TOPIC_DISCOVERY SHM) diff --git a/src/ddsrt/include/dds/config.h.in b/src/ddsrt/include/dds/config.h.in index 87493ab3a8..1606cb783d 100644 --- a/src/ddsrt/include/dds/config.h.in +++ b/src/ddsrt/include/dds/config.h.in @@ -15,6 +15,11 @@ #cmakedefine DDSRT_WITH_LWIP 1 #cmakedefine DDSRT_WITH_FREERTOS 1 +#define DDSRT_EVENT_SELECT 1 +#define DDSRT_EVENT_EPOLL 2 +#define DDSRT_EVENT_KQUEUE 3 +#cmakedefine DDSRT_EVENT DDSRT_EVENT_@DDSRT_EVENT@ + #cmakedefine DDSRT_HAVE_DYNLIB 1 #cmakedefine DDSRT_HAVE_FILESYSTEM 1 #cmakedefine DDSRT_HAVE_NETSTAT 1 @@ -28,4 +33,8 @@ #cmakedefine DDSRT_HAVE_INET_NTOP 1 #cmakedefine DDSRT_HAVE_INET_PTON 1 +#cmakedefine DDSRT_SIZEOF_EVENTLIST @DDSRT_SIZEOF_EVENTLIST@ +#cmakedefine DDSRT_EMBEDDED_EVENTS @DDSRT_EMBEDDED_EVENTS@ +#cmakedefine DDSRT_HAVE_NETLINK_EVENT 1 + #endif diff --git a/src/ddsrt/include/dds/ddsrt/event.h b/src/ddsrt/include/dds/ddsrt/event.h new file mode 100644 index 0000000000..32e1333122 --- /dev/null +++ b/src/ddsrt/include/dds/ddsrt/event.h @@ -0,0 +1,193 @@ +/* + * Copyright(c) 2022 ADLINK Technology Limited and others + * + * This program and the accompanying materials are made available under the + * terms of the Eclipse Public License v. 2.0 which is available at + * http://www.eclipse.org/legal/epl-2.0, or the Eclipse Distribution License + * v. 1.0 which is available at + * http://www.eclipse.org/org/documents/edl-v10.php. + * + * SPDX-License-Identifier: EPL-2.0 OR BSD-3-Clause + */ +#ifndef DDSRT_EVENT_H +#define DDSRT_EVENT_H + +#include "dds/export.h" +#include "dds/config.h" +#include "dds/ddsrt/atomics.h" +#include "dds/ddsrt/sockets.h" +#include "dds/ddsrt/retcode.h" +#include "dds/ddsrt/sync.h" + +#if defined(__cplusplus) +extern "C" { +#endif + +#define DDSRT_READ (1u<<0) +#define DDSRT_WRITE (1u<<1) +#define DDSRT_IPV4_ADDED (1u<<2) +#define DDSRT_IPV4_DELETED (1u<<3) +#if DDSRT_HAVE_IPV6 +# define DDSRT_IPV6_ADDED (1u<<4) +# define DDSRT_IPV6_DELETED (1u<<5) +#endif +#define DDSRT_LINK_UP (1u<<6) +#define DDSRT_LINK_DOWN (1u<<7) + +#if DDSRT_HAVE_NETLINK_EVENT +// events are socket events by default +# define DDSRT_NETLINK (1u<<31) +#endif +// more event types to follow, e.g. TIMER_EVENT, SIGNAL_EVENT + +#define DDSRT_RUN_ONCE (1u<<0) + +typedef struct ddsrt_event ddsrt_event_t; +struct ddsrt_event; + +typedef struct ddsrt_loop ddsrt_loop_t; +struct ddsrt_loop; + +typedef dds_return_t(*ddsrt_event_callback_t)( + ddsrt_event_t *event, uint32_t flags, const void *data, void *user_data); + +struct ddsrt_event { + uint32_t flags; + const ddsrt_loop_t *loop; + ddsrt_event_callback_t callback; + void *user_data; + union { + struct { + ddsrt_socket_t socketfd; + } socket; +#if DDSRT_HAVE_NETLINK_EVENT + struct { +# if _WIN32 + ddsrt_socket_t pipefds[2]; + HANDLE address_handle; + HANDLE interface_handle; +# else + ddsrt_socket_t socketfd; +# endif + } netlink; +#endif + } source; +}; + +typedef struct ddsrt_netlink_message ddsrt_netlink_message_t; +struct ddsrt_netlink_message { + uint32_t index; + struct sockaddr_storage address; // zeroed out on LINK_UP/LINK_DOWN +}; + +typedef struct ddsrt_eventlist ddsrt_eventlist_t; +struct ddsrt_eventlist { + size_t length; /**< number of slots available for use */ + size_t count; /**< number of slots currently in use */ + size_t start; + size_t end; + union { + ddsrt_event_t *embedded[ DDSRT_EMBEDDED_EVENTS ]; + ddsrt_event_t **dynamic; + } events; +}; + +#if _WIN32 +typedef HANDLE ddsrt_epoll_t; +# define DDSRT_INVALID_EPOLL NULL +#else +typedef int ddsrt_epoll_t; +# define DDSRT_INVALID_EPOLL (-1) +#endif + +struct ddsrt_loop { + ddsrt_atomic_uint32_t terminate; + ddsrt_socket_t pipefds[2]; + // owner field is used to avoid recursive deadlocks. owner is set atomically + // when a thread starts the event loop and unset when it stops. operations + // that modify the event queue check if the owner matches the identifier of + // the calling thread and makes locking a no-op if it does + ddsrt_atomic_uintptr_t owner; /**< thread identifier of dispatcher */ + ddsrt_mutex_t lock; + ddsrt_cond_t condition; + ddsrt_eventlist_t active; + ddsrt_eventlist_t cancelled; + // type-punned representation of eventlist used by event backend. + union { + char data[ DDSRT_SIZEOF_EVENTLIST ]; + void *align; + } ready; +#if DDSRT_EVENT == DDSRT_EVENT_EPOLL + ddsrt_epoll_t epollfd; +#elif DDSRT_EVENT == DDSRT_EVENT_KQUEUE + int kqueuefd; +#else + ddsrt_socket_t fdmax_plus_1; + fd_set readfds; + fd_set writefds; +#endif +}; + +DDS_EXPORT dds_return_t +ddsrt_create_event( + ddsrt_event_t *event, + ddsrt_socket_t socketfd, + uint32_t flags, + ddsrt_event_callback_t callback, + void *user_data) +ddsrt_nonnull((1)); + +DDS_EXPORT dds_return_t +ddsrt_destroy_event( + ddsrt_event_t *event); + +DDS_EXPORT dds_return_t +ddsrt_handle_event( + ddsrt_event_t *event, + uint32_t flags, + void *user_data) +ddsrt_nonnull((1)); + +DDS_EXPORT ddsrt_socket_t +ddsrt_event_socket( + ddsrt_event_t *event) +ddsrt_nonnull_all; + +DDS_EXPORT dds_return_t +ddsrt_add_event( + ddsrt_loop_t *loop, + ddsrt_event_t *event) +ddsrt_nonnull_all; + +DDS_EXPORT dds_return_t +ddsrt_delete_event( + ddsrt_loop_t *loop, + ddsrt_event_t *event) +ddsrt_nonnull_all; + +DDS_EXPORT dds_return_t +ddsrt_create_loop( + ddsrt_loop_t *loop) +ddsrt_nonnull_all; + +DDS_EXPORT void +ddsrt_destroy_loop( + ddsrt_loop_t *loop); + +DDS_EXPORT void +ddsrt_trigger_loop( + ddsrt_loop_t *loop) +ddsrt_nonnull_all; + +DDS_EXPORT dds_return_t +ddsrt_run_loop( + ddsrt_loop_t *loop, + uint32_t flags, + void *user_data) +ddsrt_nonnull((1)); + +#if defined(__cplusplus) +} +#endif + +#endif // DDSRT_EVENT_H diff --git a/src/ddsrt/src/event.c b/src/ddsrt/src/event.c new file mode 100644 index 0000000000..a95447f33d --- /dev/null +++ b/src/ddsrt/src/event.c @@ -0,0 +1,669 @@ +/* + * Copyright(c) 2022 ADLINK Technology Limited and others + * + * This program and the accompanying materials are made available under the + * terms of the Eclipse Public License v. 2.0 which is available at + * http://www.eclipse.org/legal/epl-2.0, or the Eclipse Distribution License + * v. 1.0 which is available at + * http://www.eclipse.org/org/documents/edl-v10.php. + * + * SPDX-License-Identifier: EPL-2.0 OR BSD-3-Clause + */ +#include +#include +#include +#include +#include +#include + +#if _WIN32 +# ifndef WIN32_LEAN_AND_MEAN +# define WIN32_LEAN_AND_MEAN +# endif +# include +# include +# include +# include +# include +#elif __APPLE__ +# include +# include +# include // struct kev_in_data +# include // struct kev_in6_data +# include // struct net_event_data +#elif __FreeBSD__ +# include +# include +# include +# include +# include +#elif __linux +# include +# include +# include +# include +#endif + +#include "event.h" +#include "dds/ddsrt/static_assert.h" + +#if _WIN32 +static int dgram_pipe(SOCKET sv[2]) +{ + struct sockaddr_in addr; + socklen_t addrlen = sizeof(addr); + SOCKET fds[2] = { INVALID_SOCKET, INVALID_SOCKET }; + + addr.sin_family = AF_INET; + addr.sin_addr.s_addr = htonl(INADDR_LOOPBACK); + addr.sin_port = 0; + if ((fds[0] = socket(AF_INET, SOCK_DGRAM, 0)) == INVALID_SOCKET) + goto err_socket_fd0; + if (bind(fds[0], (struct sockaddr *)&addr, addrlen) == SOCKET_ERROR) + goto err_bind; + if (getsockname(fds[0], (struct sockaddr *)&addr, &addrlen) == SOCKET_ERROR) + goto err_bind; + if ((fds[1] = socket(AF_INET, SOCK_DGRAM, 0)) == INVALID_SOCKET) + goto err_socket_fd1; + if (connect(fds[1], (struct sockaddr *)&addr, addrlen) == -1) + goto err_connect; + // equivalent to FD_CLOEXEC + SetHandleInformation((HANDLE) fds[0], HANDLE_FLAG_INHERIT, 0); + SetHandleInformation((HANDLE) fds[1], HANDLE_FLAG_INHERIT, 0); + sv[0] = fds[0]; + sv[1] = fds[1]; + return 0; +err_connect: + closesocket(fds[1]); +err_socket_fd1: +err_bind: + closesocket(fds[0]); +err_socket_fd0: + return -1; +} + +static dds_return_t +destroy_netlink_event(ddsrt_event_t *event) +{ + if (event->source.netlink.address_handle) + CancelMibChangeNotify2(event->source.netlink.address_handle); + event->source.netlink.address_handle = NULL; + if (event->source.netlink.interface_handle) + CancelMibChangeNotify2(event->source.netlink.interface_handle); + event->source.netlink.interface_handle = NULL; + closesocket(event->source.netlink.pipefds[0]); + closesocket(event->source.netlink.pipefds[1]); + event->source.netlink.pipefds[0] = INVALID_SOCKET; + event->source.netlink.pipefds[1] = INVALID_SOCKET; + return DDS_RETCODE_OK; +} + +// use same structure for every ipchange event for convenience +struct netlink_message { + uint32_t flags; + NET_LUID luid; + NET_IFINDEX index; + ADDRESS_FAMILY family; + SOCKADDR_INET address; // zeroed out on NotifyIpInterfaceChange +}; + +static inline int +read_netlink_message(SOCKET fd, struct netlink_message *message) +{ + int cnt, off = 0, len = sizeof(*message); + uint8_t *buf = (uint8_t *)message; + + do { + cnt = recv(fd, buf + off, len - off, 0); + if (cnt == SOCKET_ERROR && WSAGetLastError() == WSAEINTR) + continue; + if (cnt == SOCKET_ERROR) + return -1; + assert(cnt >= 0); + off += cnt; + } while (off < len); + + assert(off == len); + return 0; +} + +static inline int +write_netlink_message(SOCKET fd, const struct netlink_message *message) +{ + int cnt, off = 0, len = sizeof(*message); + uint8_t *buf = (void *)message; + + do { + cnt = send(fd, buf + (size_t)off, len, 0); + if (cnt == SOCKET_ERROR && WSAGetLastError() == WSAEINTR) + continue; + if (cnt == SOCKET_ERROR) + return -1; + assert(cnt >= 0); + off += cnt; + } while (off < len); + + assert(off == len); + return 0; +} + +static void +do_address_change( + void *caller_context, + MIB_UNICASTIPADDRESS_ROW *row, + MIB_NOTIFICATION_TYPE notification_type) +{ + struct netlink_message msg; + + assert(caller_context); + + if (!row) // initial notification, unused + return; + assert(notification_type != MibInitialNotification); + if (notification_type == MibParameterNotification) + return; + + if (row->Address.si_family == AF_INET6) + msg.flags = notification_type == MibAddInstance + ? DDSRT_IPV6_ADDED : DDSRT_IPV6_DELETED; + else + msg.flags = notification_type == MibAddInstance + ? DDSRT_IPV4_ADDED : DDSRT_IPV4_DELETED; + msg.luid = row->InterfaceLuid; + msg.index = row->InterfaceIndex; + msg.family = row->Address.si_family; + msg.address = row->Address; + write_netlink_message((SOCKET)caller_context, &msg); +} + +static void +do_interface_change( + void *caller_context, + MIB_IPINTERFACE_ROW *row, + MIB_NOTIFICATION_TYPE notification_type) +{ + struct netlink_message msg; + + assert(caller_context); + + if (!row) // initial notification, unused + return; + assert(notification_type != MibInitialNotification); + if (notification_type == MibParameterNotification) + return; + + msg.flags = notification_type == MibAddInstance + ? DDSRT_LINK_UP : DDSRT_LINK_DOWN; + msg.luid = row->InterfaceLuid; + msg.index = row->InterfaceIndex; + msg.family = row->Family; + memset(&msg.address, 0, sizeof(msg.address)); + + write_netlink_message((SOCKET)caller_context, &msg); +} + +static dds_return_t +create_netlink_event( + ddsrt_event_t *event, + uint32_t flags, + ddsrt_event_callback_t callback, + void *user_data) +{ + SOCKET fds[2]; + HANDLE addr_hdl = NULL, iface_hdl = NULL; + bool ip4 = (flags & (DDSRT_IPV4_ADDED|DDSRT_IPV4_DELETED)); + bool ip6 = (flags & (DDSRT_IPV6_ADDED|DDSRT_IPV6_DELETED)); + ADDRESS_FAMILY af = (ip4 && ip6) ? AF_UNSPEC : (ip6 ? AF_INET6 : AF_INET); + + (void)socket; + + // use a SOCK_DGRAM socket pair to deal with partial writes + if (dgram_pipe(fds) == -1) + goto err_pipe; + // register callbacks to send notifications over socket pair + if ((flags & (DDSRT_LINK_UP|DDSRT_LINK_DOWN)) && + NO_ERROR != NotifyIpInterfaceChange( + AF_UNSPEC, &do_interface_change, (void*)fds[1], false, &iface_hdl)) + goto err_iface; + if ((ip4 || ip6) && + NO_ERROR != NotifyUnicastIpAddressChange( + af, &do_address_change, (void*)fds[1], false, &addr_hdl)) + goto err_addr; + + event->flags = DDSRT_NETLINK | (flags & (DDSRT_LINK_UP|DDSRT_LINK_DOWN| + DDSRT_IPV4_ADDED|DDSRT_IPV4_DELETED| + DDSRT_IPV6_ADDED|DDSRT_IPV6_DELETED)); + event->loop = NULL; + event->callback = callback; + event->user_data = user_data; + event->source.netlink.pipefds[0] = fds[0]; + event->source.netlink.pipefds[1] = fds[1]; + event->source.netlink.address_handle = addr_hdl; + event->source.netlink.interface_handle = iface_hdl; + return DDS_RETCODE_OK; +err_addr: + if (iface_hdl) + CancelMibChangeNotify2(iface_hdl); +err_iface: + closesocket(fds[0]); + closesocket(fds[1]); +err_pipe: + return DDS_RETCODE_OUT_OF_RESOURCES; +} + +static dds_return_t +proxy_netlink_event(ddsrt_event_t *event, void *user_data) +{ + struct netlink_message msg = { 0 }; + ddsrt_netlink_message_t nlmsg = { 0 }; + + DDSRT_STATIC_ASSERT(sizeof(struct sockaddr_in) == sizeof(msg.address.Ipv4)); + DDSRT_STATIC_ASSERT(sizeof(struct sockaddr_in6) == sizeof(msg.address.Ipv6)); + + if (read_netlink_message(event->source.netlink.pipefds[0], &msg) != 0) + abort(); // never happens, presumably + // discard unwanted events + if (!(msg.flags & event->flags)) + return DDS_RETCODE_OK; + + //nlmsg.interface.luid = msg.luid; // FIXME: required? + nlmsg.index = msg.index; + if (msg.flags & (DDSRT_LINK_UP|DDSRT_LINK_DOWN)) + return event->callback(event, msg.flags | DDSRT_NETLINK, &nlmsg, user_data); + if (msg.flags & (DDSRT_IPV4_ADDED|DDSRT_IPV4_DELETED)) + memmove((struct sockaddr_in*)&nlmsg.address, &msg.address.Ipv4, sizeof(msg.address.Ipv4)); + else if (msg.flags & (DDSRT_IPV6_ADDED|DDSRT_IPV6_DELETED)) + memmove((struct sockaddr_in6*)&nlmsg.address, &msg.address.Ipv6, sizeof(msg.address.Ipv6)); + else + assert(0); + return event->callback(event, DDSRT_NETLINK | msg.flags, &nlmsg, user_data); +} + +#elif __APPLE__ || __FreeBSD__ || __linux__ +static dds_return_t +create_netlink_event( + ddsrt_event_t *event, + uint32_t flags, + ddsrt_event_callback_t callback, + void *user_data) +{ + int fd; + +# if __APPLE__ + struct kev_request req; + if ((fd = socket(PF_SYSTEM, SOCK_RAW, SYSPROTO_EVENT)) == -1) + goto err_socket; + req.vendor_code = KEV_VENDOR_APPLE; + req.kev_class = KEV_NETWORK_CLASS; + req.kev_subclass = KEV_ANY_SUBCLASS; + if (ioctl(fd, SIOCSKEVFILT, &req) == -1) + goto err_ioctl; +# elif __FreeBSD__ + if ((fd = socket(AF_ROUTE, SOCK_RAW, AF_UNSPEC)) == -1) + goto err_socket; +# elif __linux__ + struct sockaddr_nl sa; + if ((fd = socket(PF_NETLINK, SOCK_RAW, NETLINK_ROUTE)) == -1) + goto err_socket; + memset(&sa, 0, sizeof(sa)); + sa.nl_family = AF_NETLINK; + if (flags & (DDSRT_LINK_UP|DDSRT_LINK_DOWN)) + sa.nl_groups |= RTMGRP_LINK; + if (flags & (DDSRT_IPV4_ADDED|DDSRT_IPV4_DELETED)) + sa.nl_groups |= RTMGRP_IPV4_IFADDR; + if (flags & (DDSRT_IPV6_ADDED|DDSRT_IPV6_DELETED)) + sa.nl_groups |= RTMGRP_IPV6_IFADDR; + if (bind(fd, (struct sockaddr *)&sa, sizeof(sa)) == -1) + goto err_bind; +# endif + + event->flags = DDSRT_NETLINK | (flags & (DDSRT_LINK_UP|DDSRT_LINK_DOWN| + DDSRT_IPV4_ADDED|DDSRT_IPV4_DELETED| + DDSRT_IPV6_ADDED|DDSRT_IPV6_DELETED)); + event->loop = NULL; + event->callback = callback; + event->user_data = user_data; + event->source.netlink.socketfd = fd; + return DDS_RETCODE_OK; +# if __APPLE__ +err_ioctl: +# elif __linux__ +err_bind: +# endif + close(fd); +err_socket: + return DDS_RETCODE_OUT_OF_RESOURCES; +} + +static dds_return_t +destroy_netlink_event(ddsrt_event_t *event) +{ + close(event->source.netlink.socketfd); + event->source.netlink.socketfd = -1; + return DDS_RETCODE_OK; +} + +# if __APPLE__ +// macOS offers a bunch of mechanisms for notification on address changes +// 1. System Configuration framework +// 2. IOKit +// 3. PF_ROUTE socket +// 4. PF_SYSTEM socket +// +// the System Configuration framework allows the user to create notification +// ports (not a mach_port_t), but a CFRunLoop is required and therefore seems +// primarily intented to be used in Cocoa applications. IOKit allows for +// creation of an IONotificationPortRef from which a mach_port_t can be +// retrieved and which can be monitored by kqueue with EVFILTER_MACH, but no +// notifications were received on IP address changes in tests. PF_ROUTE +// sockets are frequently used on BSD systems to monitor for changes to the +// routing database, but notifications were kind of a hit and miss in tests. +// PF_SYSTEM (1) sockets provide exactly what is required. +// +// 1: http://newosxbook.com/bonus/vol1ch16.html +static dds_return_t +proxy_netlink_event(ddsrt_event_t *event, void *user_data) +{ + char buf[1024]; + const struct kern_event_msg *msg = (const struct kern_event_msg *)buf; + ssize_t msglen = 0; + + do { + msglen = read(event->source.socket.socketfd, buf, sizeof(buf)); + assert(msglen != -1 || errno == EINTR); + } while (msglen == -1); + + assert((size_t)msglen == (size_t)msg->total_size); + // discard non-networking events + if (msg->kev_class != KEV_NETWORK_CLASS) + return DDS_RETCODE_OK; + + unsigned int flags = 0u; + ddsrt_netlink_message_t nlmsg = { 0 }; + + switch (msg->kev_subclass) { + case KEV_INET_SUBCLASS: { + struct kev_in_data *in_data = (struct kev_in_data *)msg->event_data; + if (msg->event_code == KEV_INET_NEW_ADDR) + flags = DDSRT_IPV4_ADDED; + else if (msg->event_code == KEV_INET_ADDR_DELETED) + flags = DDSRT_IPV4_DELETED; + else + break; + //nlmsg.interface.unit = in_data->link_data.if_unit; + nlmsg.index = in_data->link_data.if_unit; + struct sockaddr_in *sin = (struct sockaddr_in *)&nlmsg.address; + sin->sin_family = AF_INET; + assert(sizeof(sin->sin_addr) == sizeof(in_data->ia_addr)); + memmove(&sin->sin_addr, &in_data->ia_addr, sizeof(sin->sin_addr)); + } break; + case KEV_INET6_SUBCLASS: { + struct kev_in6_data *in6_data = (struct kev_in6_data *)msg->event_data; + if (msg->event_code == KEV_INET6_NEW_USER_ADDR) + flags = DDSRT_IPV6_ADDED; + else if (msg->event_code == KEV_INET6_ADDR_DELETED) + flags = DDSRT_IPV6_DELETED; + else + break; + //nlmsg.interface.unit = in6_data->link_data.if_unit; + nlmsg.index = in6_data->link_data.if_unit; + struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)&nlmsg.address; + sin6->sin6_family = AF_INET6; + assert(sizeof(sin6->sin6_addr) == sizeof(in6_data->ia_addr)); + memmove(&sin6->sin6_addr, &in6_data->ia_addr, sizeof(sin6->sin6_addr)); + } break; + case KEV_DL_SUBCLASS: { + struct net_event_data *data = (struct net_event_data *)msg->event_data; + if (msg->event_code == KEV_DL_PROTO_ATTACHED) + flags = DDSRT_LINK_UP; + else if (msg->event_code == KEV_DL_PROTO_DETACHED) + flags = DDSRT_LINK_DOWN; + else + break; + nlmsg.index = data->if_unit; + } break; + default: + break; + } + + // discard unwanted events + if (!(event->flags & flags)) + return DDS_RETCODE_OK; + + return event->callback(event, DDSRT_NETLINK | flags, &nlmsg, user_data); +} + +# elif __FreeBSD__ +// https://www.freebsd.org/cgi/man.cgi?query=route&apropos=0&sektion=4&manpath=FreeBSD+1.1-RELEASE&arch=default&format=html +// also see UNIX Network Programming volume 1 chapter 18 + +/* + * Round up 'a' to next multiple of 'size', which must be a power of 2 + */ +#define ROUNDUP(a, size) (((a) & ((size)-1)) ? (1 + ((a) | ((size)-1))) : (a)) + +/* + * Step to next socket address structure; + * if sa_len is 0, assume it is sizeof(u_long). + */ +#define NEXT_SA(ap) ap = (struct sockaddr *) \ + ((caddr_t) ap + (ap->sa_len ? ROUNDUP(ap->sa_len, sizeof (u_long)) : \ + sizeof(u_long))) + +static void get_rtaddrs(int addrs, const struct sockaddr *sa, const struct sockaddr **rti_info) +{ + for (int i = 0; i < RTAX_MAX; i++) { + if (addrs & (1 << i)) { + rti_info[i] = sa; + NEXT_SA(sa); + } else { + rti_info[i] = NULL; + } + } +} + +static dds_return_t +proxy_netlink_event(ddsrt_event_t *event, void *user_data) +{ + char buf[ sizeof(struct rt_msghdr) + 512 ]; + const struct rt_msghdr *rtm = (struct rt_msghdr *)buf; + ssize_t rtmlen; + + do { + rtmlen = read(event->source.socket.socketfd, buf, sizeof(buf)); + if (rtmlen == -1 && errno != EINTR) + return DDS_RETCODE_ERROR; + } while (rtmlen == -1); + + uint32_t flags = 0u; + ddsrt_netlink_message_t nlmsg = { 0 }; + + assert((size_t)rtmlen == (size_t)rtm->rtm_msglen); + switch (rtm->rtm_type) { + case RTM_NEWADDR: + case RTM_DELADDR: { + const struct ifa_msghdr *ifam = (void *)buf; + const struct sockaddr *sa, *rti_info[RTAX_MAX]; + sa = (const struct sockaddr *)(ifam + 1); + get_rtaddrs(ifam->ifam_addrs, sa, rti_info); + sa = rti_info[RTAX_IFA]; + nlmsg.index = ifam->ifam_index; + if (sa->sa_family == AF_INET) { + flags = (rtm->rtm_type == RTM_NEWADDR) ? DDSRT_IPV4_ADDED : DDSRT_IPV4_DELETED; + memmove(&nlmsg.address, sa, sizeof(struct sockaddr_in)); + } else { + flags = (rtm->rtm_type == RTM_NEWADDR) ? DDSRT_IPV6_ADDED : DDSRT_IPV6_DELETED; + memmove(&nlmsg.address, sa, sizeof(struct sockaddr_in6)); + } + } break; + case RTM_IFINFO: { + const struct if_msghdr *ifm = (void *)buf; + flags = (ifm->ifm_flags & IFF_UP) ? DDSRT_LINK_UP : DDSRT_LINK_DOWN; + } break; + default: + break; + } + + // discard unwanted events + if (!(event->flags & flags)) + return DDS_RETCODE_OK; + return event->callback(event, DDSRT_NETLINK | flags, &nlmsg, user_data); +} + +# elif __linux +// inspired by get_rtaddrs and parse_rtaddrs +static void +get_rtattrs( + struct rtattr *attrs, + size_t len, + struct rtattr *rta_info[], + size_t max) +{ + memset(rta_info, 0, sizeof(*attrs) * max); + for (struct rtattr *attr = attrs; RTA_OK(attr, len); attr = RTA_NEXT(attr, len)) { + assert(attr->rta_type <= max); + rta_info[attr->rta_type] = attr; + } +} + +static dds_return_t +proxy_netlink_event(ddsrt_event_t *event, void *user_data) +{ + char buf[8192]; // opensplice uses 8k, seems a bit excessive? + const struct nlmsghdr *nlm = (struct nlmsghdr *)buf; + struct iovec iov = { buf, sizeof(buf) }; + struct sockaddr_nl sa; + struct msghdr msg = { &sa, sizeof(sa), &iov, 1, NULL, 0, 0 }; + ssize_t nlmlen; + + do { + nlmlen = recvmsg(event->source.netlink.socketfd, &msg, 0); + if (nlmlen == -1 && errno != EINTR) + return DDS_RETCODE_ERROR; + } while (nlmlen == -1); + + for (; NLMSG_OK(nlm, nlmlen); nlm = NLMSG_NEXT(nlm, nlmlen)) { + // end of multipart message + if (nlm->nlmsg_type == NLMSG_DONE) + break; + + unsigned int flags = 0u; + ddsrt_netlink_message_t nlmsg = { 0 }; + + switch (nlm->nlmsg_type) { + case RTM_NEWADDR: + case RTM_DELADDR: { + const struct ifaddrmsg *ifa = NLMSG_DATA(nlm); + struct rtattr *rta_info[IFA_MAX + 1]; + get_rtattrs(IFA_RTA(ifa), nlm->nlmsg_len - NLMSG_LENGTH(sizeof(*ifa)), rta_info, IFA_MAX); + const void *rta_data = RTA_DATA(rta_info[IFA_ADDRESS]); + nlmsg.index = ifa->ifa_index; + if (ifa->ifa_family == AF_INET) { + struct sockaddr_in *sin = (struct sockaddr_in *)&nlmsg.address; + flags = (nlm->nlmsg_type == RTM_NEWADDR) ? DDSRT_IPV4_ADDED : DDSRT_IPV4_DELETED; + sin->sin_family = AF_INET; + memmove(&sin->sin_addr, rta_data, sizeof(sin->sin_addr)); + } else { + struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)&nlmsg.address; + flags = (nlm->nlmsg_type == RTM_NEWADDR) ? DDSRT_IPV6_ADDED : DDSRT_IPV6_DELETED; + sin6->sin6_family = AF_INET6; + memmove(&sin6->sin6_addr, rta_data, sizeof(sin6->sin6_addr)); + } + } break; + case RTM_NEWLINK: + case RTM_DELLINK: { + const struct ifinfomsg *ifi = NLMSG_DATA(nlm); + flags = (ifi->ifi_flags & IFF_UP) ? DDSRT_LINK_UP : DDSRT_LINK_DOWN; + nlmsg.index = (uint32_t)ifi->ifi_index; + } break; + default: + break; + } + + if (!(event->flags & flags)) + continue; + + dds_return_t ret; + if ((ret = event->callback(event, DDSRT_NETLINK | flags, &nlmsg, user_data))) + return ret; + } + + return DDS_RETCODE_OK; +} +# endif +#endif + +dds_return_t +ddsrt_handle_event( + ddsrt_event_t *event, + uint32_t flags, + void *user_data) +{ +#if DDSRT_HAVE_NETLINK_EVENT + if (event->flags & DDSRT_NETLINK) + return proxy_netlink_event(event, user_data); +#endif + return event->callback(event, flags, NULL, user_data); +} + +ddsrt_socket_t +ddsrt_event_socket( + ddsrt_event_t *event) +{ +#if DDSRT_HAVE_NETLINK_EVENT +# if _WIN32 + if (event->flags & DDSRT_NETLINK) + return event->source.netlink.pipefds[0]; +# else + if (event->flags & DDSRT_NETLINK) + return event->source.netlink.socketfd; +# endif +#endif + return event->source.socket.socketfd; +} + +dds_return_t +ddsrt_create_event( + ddsrt_event_t *event, + ddsrt_socket_t socket, + uint32_t flags, + ddsrt_event_callback_t callback, + void *user_data) +{ + assert(event); + assert(callback); + +#if DDSRT_HAVE_NETLINK_EVENT + if (flags & DDSRT_NETLINK) + return create_netlink_event(event, flags, callback, user_data); +#endif + + assert(flags & (DDSRT_READ|DDSRT_WRITE)); + assert(socket != DDSRT_INVALID_SOCKET); + + event->flags = flags & (DDSRT_READ|DDSRT_WRITE); + event->callback = callback; + event->loop = NULL; + event->user_data = user_data; + event->source.socket.socketfd = socket; + return DDS_RETCODE_OK; +} + +dds_return_t +ddsrt_destroy_event(ddsrt_event_t *event) +{ + assert(event && !event->loop); +#if DDSRT_HAVE_NETLINK_EVENT + if (event->flags & DDSRT_NETLINK) + return destroy_netlink_event(event); +#endif + + event->source.socket.socketfd = DDSRT_INVALID_SOCKET; + return DDS_RETCODE_OK; +} + +void ddsrt_trigger_loop(ddsrt_loop_t *loop) +{ + char buf[1] = { '\0' }; + write_pipe(loop->pipefds[1], buf, sizeof(buf)); +} diff --git a/src/ddsrt/src/event.h b/src/ddsrt/src/event.h new file mode 100644 index 0000000000..6a1db11072 --- /dev/null +++ b/src/ddsrt/src/event.h @@ -0,0 +1,526 @@ +#ifndef EVENT_H +#define EVENT_H + +#include +#include + +#include "dds/config.h" +#include "dds/ddsrt/event.h" +#include "dds/ddsrt/heap.h" +#include "dds/ddsrt/threads.h" + +static inline void create_eventlist(ddsrt_eventlist_t *list) +{ + list->length = DDSRT_EMBEDDED_EVENTS; + list->count = 0u; + list->start = list->end = 0u; + memset(list->events.embedded, 0, list->length * sizeof(*list->events.dynamic)); +} + +static inline void destroy_eventlist(ddsrt_eventlist_t *list) +{ + if (!list) + return; + if (list->length > DDSRT_EMBEDDED_EVENTS) + ddsrt_free(list->events.dynamic); + list->length = DDSRT_EMBEDDED_EVENTS; + list->count = 0; + list->start = list->end = 0u; +} + +static inline ddsrt_event_t **get_events(ddsrt_eventlist_t *list) +{ + if (list->length > DDSRT_EMBEDDED_EVENTS) + return list->events.dynamic; + return list->events.embedded; +} + +#ifndef NDEBUG +static inline void assert_eventlist(ddsrt_eventlist_t *list) +{ + assert(list); + assert(list->count <= list->length); + if (list->count <= 1) { + assert(list->start == list->end); + assert(list->length == DDSRT_EMBEDDED_EVENTS); + } else { + assert(list->start != list->end); + assert(list->length % DDSRT_EMBEDDED_EVENTS == 0); + } + + size_t cnt = 0; + ddsrt_event_t **buf = get_events(list); + for (size_t i = 0; i < list->length; i++) + cnt += buf[i] != NULL; + assert(list->count == cnt); +} +#else +# define assert_eventlist(list) +#endif + +static void left_trim(ddsrt_eventlist_t *list) +{ + size_t cnt = list->start; + ddsrt_event_t **buf = get_events(list); + + if (list->start > list->end) { + // move start if 1st entry was removed and buffer wraps around + // ------------------------- ------------------------- + // | X . . . . | . . . . X | >> | X . . . . | . . . . X | + // --^---------------^---^-- --^-------------------^-- + // nth 1st 2nd nth 1st + for (; cnt < (list->length - 1) && !buf[cnt]; cnt++) ; + list->start = cnt; + if (buf[cnt]) + return; + // start from beginning if last entry before wrap around was removed + // ------------------------- ------------------------- + // | . . X . . | . . . . . | >> | . . X . . | . . . . . | + // ------^---------------^-- --^---^------------------ + // nth 1st 1st nth + assert(cnt == list->length - 1); + cnt = list->start = 0; + } + // move start if 1st entry was removed and buffer does not wrap around + // ------------------------- ------------------------- + // | X . X . . | X . . . . | >> | . . X . . | X . . . . | + // --^---^-------^---------- ------^-------^---------- + // 1st 2nd nth 1st nth + for (; cnt < list->end && !buf[cnt]; cnt++) ; + list->start = cnt; + assert(list->start == list->end || list->count > 1); +} + +static void right_trim(ddsrt_eventlist_t *list) +{ + size_t cnt = list->end; + ddsrt_event_t **buf = get_events(list); + + if (list->end < list->start) { + // move end if last entry was removed and buffer wraps around + // ------------------------- ------------------------- + // | X . . . . | . . . X . | >> | X . . . . | . . . X . | + // --^---^-------------^---- --^-----------------^---- + // 2nd nth 1st nth 1st + for (; cnt > 0 && !buf[cnt]; cnt--) ; + list->end = cnt; + if (buf[cnt]) + return; + // start from end if first entry before wrap around was removed + // ------------------------- ------------------------- + // | . . . . . | . X . X . | >> | . . . . . | . X . X . | + // ----^-----------^-------- ----------------^-----^-- + // nth 1st 1st nth + assert(cnt == 0); + cnt = list->end = list->length - 1; + } + // move end if last entry was removed and buffer does not wrap around + // ------------------------- ------------------------- + // | . . . . X | . X . . . | >> | . . . . X | . X . . . | + // ----------^-----^-----^-- ----------^-----^-------- + // 1st 2nd nth 1st nth + for (; cnt > list->start && !buf[cnt]; cnt--) ; + list->end = cnt; + assert(list->end == list->start || list->count > 1); +} + +static inline void +pack_eventlist(ddsrt_eventlist_t *list) +{ + assert_eventlist(list); + ddsrt_event_t **buf = get_events(list); + + if (list->start > list->end) { + // compress tail on buffer wrap around + // ------------------------- ------------------------- + // | X . X . X | . X . X . | >> | X X X . . | . X . X . | + // ----------^-----^-------- ------^---------^-------- + // nth 1st nth 1st + size_t i, j; + for (i = j = 0; i <= list->end; i++) { + if (!buf[i]) + continue; + if (i != j) + (void)(buf[j] = buf[i]), buf[i] = NULL; + j++; + } + assert(j != 0); + list->end = j - 1; + // compress head on buffer wrap around + // ------------------------- ------------------------- + // | X X X . . | . X . X . | >> | X X X . . | . . . X X | + // ------^---------^-------- ------^-------------^---- + // nth 1st nth 1st + for (i = j = list->length - 1; i >= list->start; i--) { + if (!buf[i]) + continue; + if (i != j) + (void)(buf[j] = buf[1]), buf[i] = NULL; + j--; + } + assert(j != list->length - 1); + list->start = j + 1; + } else if (list->count != 0) { + // compress + // ------------------------- ------------------------- + // | . . X . . | X . X X X | >> | X X X X X | . . . . . | + // ------^---------------^-- --^-------^-------------- + // 1st nth 1st nth + size_t i, j; + for (i = j = 0; i <= list->end; i++) { + if (!buf[i]) + continue; + if (i != j) + (void)(buf[j] = buf[i]), buf[i] = NULL; + j++; + } + list->start = 0; + list->end = j - 1; + assert(list->end == list->count - 1); + } +} + +static inline dds_return_t +grow_eventlist(ddsrt_eventlist_t *list, size_t max) +{ + static const size_t min = DDSRT_EMBEDDED_EVENTS; + size_t len; + ddsrt_event_t **buf = get_events(list); + + assert_eventlist(list); + assert(list->count == list->length); + + len = list->length + min; + if (len > max) { + return DDS_RETCODE_OUT_OF_RESOURCES; + } else if (list->length == min) { + if (!(buf = ddsrt_malloc(len * sizeof(*buf)))) + return DDS_RETCODE_OUT_OF_RESOURCES; + memmove(buf, list->events.embedded, list->length * sizeof(*buf)); + } else { + if (!(buf = ddsrt_realloc(list->events.dynamic, len * sizeof(*buf)))) + return DDS_RETCODE_OUT_OF_RESOURCES; + } + + // move head to end of newly allocated buffer + if (list->start > list->end) { + size_t mov = list->length - list->start; + memmove(buf + (list->start + min), buf + list->start, mov * sizeof(*buf)); + } + + // zero newly allocated memory + memset(buf + (list->end + 1), 0, min * sizeof(*buf)); + + list->length = len; + list->events.dynamic = buf; + return DDS_RETCODE_OK; +} + +static inline dds_return_t +maybe_shrink_eventlist(ddsrt_eventlist_t *list) +{ + static const size_t min = DDSRT_EMBEDDED_EVENTS; + ddsrt_event_t **buf; + + assert_eventlist(list); + assert(list->length > min); + + if (!(list->count == min || list->count < list->length - min)) + return DDS_RETCODE_OK; + // eventlist can be sparse + pack_eventlist(list); + + buf = list->events.dynamic; + // pack operation moved head to end of buffer on wrap around. move head to + // front to not discard it on reallocation + if (list->count <= min) { + ddsrt_event_t **embuf = list->events.embedded; + if (list->end < list->start) { + assert(list->count > 1); + size_t mov = list->length - list->start; + memmove(embuf, buf + list->start, mov * sizeof(*buf)); + memmove(embuf + mov, buf, (list->end + 1) * sizeof(*buf)); + list->start = 0u; + list->end = list->count - 1; + } else { + assert(list->start == 0u); + memmove(embuf, buf, list->end * sizeof(*buf)); + } + ddsrt_free(buf); + list->length = min; + } else { + size_t mov = 0; + if (list->end < list->start) { + assert(list->start - min > list->end); + mov = (list->length - list->start) * sizeof(*buf); + memmove(buf + (list->start - min), buf + list->start, mov); + list->start -= min; + } else { + assert(list->start == 0u); + } + + size_t len = ((list->count/min) + 1) * min; + if (!(buf = ddsrt_realloc(buf, len * sizeof(*buf)))) { + // move head back to end of buffer + if (mov != 0) { + list->start += min; + memmove(buf + list->start, buf + (list->start - min), mov); + } + return DDS_RETCODE_OUT_OF_RESOURCES; + } + list->length = len; + list->events.dynamic = buf; + } + + return DDS_RETCODE_OK; +} + +static inline ssize_t +find_event(ddsrt_eventlist_t *list, ddsrt_event_t *event) +{ + assert(list); + + ddsrt_event_t **buf = get_events(list); + // buffer is circular, so window does not have to be consecutive + size_t len = list->start > list->end ? list->length - 1 : list->end; + for (size_t cnt = list->start; cnt <= len; cnt++) + if (buf[cnt] == event) + return (ssize_t)cnt; + + if (list->start < list->end) + return -1; + + len = list->end; + for (size_t cnt = 0; cnt <= len; cnt++) + if (buf[cnt] == event) + return (ssize_t)cnt; + + return -1; +} + +static inline dds_return_t +add_event(ddsrt_eventlist_t *list, ddsrt_event_t *event, size_t max) +{ + ssize_t cnt; + ddsrt_event_t **buf; + + assert(list); + assert(event); + // ensure event is not listed + assert(find_event(list, event) == -1); + + // allocate more space if list is full + if (list->count == list->length) { + assert(list->end != list->start); + if (grow_eventlist(list, max) == -1) + return DDS_RETCODE_OUT_OF_RESOURCES; + cnt = (ssize_t)(list->end += 1); + } else if (list->end < list->start) { + if (list->end + 1 == list->start) + cnt = find_event(list, NULL); + else + cnt = (ssize_t)(list->end += 1); + // take into account wrap around + } else if (list->end == list->length - 1) { + if (list->start == 0) + cnt = find_event(list, NULL); + else + cnt = (ssize_t)(list->end = 0); + } else if (list->end > list->start) { + cnt = (ssize_t)(list->end += 1); + } else { + cnt = (ssize_t)(list->end += (list->count != 0)); + } + + buf = get_events(list); + buf[cnt] = event; + list->count++; + + return DDS_RETCODE_OK; +} + +static inline dds_return_t +delete_event(ddsrt_eventlist_t *list, ddsrt_event_t *event) +{ + static const size_t min = DDSRT_EMBEDDED_EVENTS; + ssize_t cnt; + ddsrt_event_t **buf; + + assert(list); + assert(event); + + if ((cnt = find_event(list, event)) == -1) + return DDS_RETCODE_OK; + + buf = get_events(list); + buf[cnt] = NULL; + + list->count--; + if (list->count == 0) + list->start = list->end = 0; + else if (cnt == (ssize_t)list->start) + left_trim(list); + else if (cnt == (ssize_t)list->end) + right_trim(list); + + // do not attempt to shrink embedded buffer + if (list->length == min) + return DDS_RETCODE_OK; + (void)maybe_shrink_eventlist(list); // failure can safely be ignored + + return DDS_RETCODE_OK; +} + +#if _WIN32 +static inline int open_pipe(ddsrt_socket_t fds[2]) +{ + struct sockaddr_in addr; + socklen_t asize = sizeof (addr); + ddsrt_socket_t listener = socket (AF_INET, SOCK_STREAM, 0); + ddsrt_socket_t s1 = socket (AF_INET, SOCK_STREAM, 0); + ddsrt_socket_t s2 = DDSRT_INVALID_SOCKET; + + addr.sin_family = AF_INET; + addr.sin_addr.s_addr = htonl (INADDR_LOOPBACK); + addr.sin_port = 0; + if (bind (listener, (struct sockaddr *)&addr, sizeof (addr)) == -1) + goto fail; + if (getsockname (listener, (struct sockaddr *)&addr, &asize) == -1) + goto fail; + if (listen (listener, 1) == -1) + goto fail; + if (connect (s1, (struct sockaddr *)&addr, sizeof (addr)) == -1) + goto fail; + if ((s2 = accept (listener, 0, 0)) == INVALID_SOCKET) + goto fail; + closesocket (listener); + /* Equivalent to FD_CLOEXEC */ + SetHandleInformation ((HANDLE) s1, HANDLE_FLAG_INHERIT, 0); + SetHandleInformation ((HANDLE) s2, HANDLE_FLAG_INHERIT, 0); + fds[0] = s1; + fds[1] = s2; + return 0; + +fail: + closesocket (listener); + closesocket (s1); + closesocket (s2); + return -1; +} + +static inline void close_pipe(ddsrt_socket_t fds[2]) +{ + closesocket(fds[0]); + closesocket(fds[1]); +} +#else +#include +#include + +static inline int open_pipe(int fds[2]) +{ + int pipefds[2]; + + if (pipe(pipefds) == -1) + goto err_pipe; + if (fcntl(pipefds[0], F_SETFD, fcntl(pipefds[0], F_GETFD)|O_NONBLOCK|FD_CLOEXEC) == -1) + goto err_fcntl; + if (fcntl(pipefds[1], F_SETFD, fcntl(pipefds[1], F_GETFD)|O_NONBLOCK|FD_CLOEXEC) == -1) + goto err_fcntl; + fds[0] = pipefds[0]; + fds[1] = pipefds[1]; + return 0; +err_fcntl: + close(pipefds[0]); + close(pipefds[1]); +err_pipe: + return -1; +} + +static inline void close_pipe(int fds[2]) +{ + close(fds[0]); + close(fds[1]); +} +#endif + +static ssize_t read_pipe(ddsrt_socket_t fd, void *buf, size_t len) +{ +#if _WIN32 + return recv(fd, buf, (int)len, 0); +#else + return read(fd, buf, len); +#endif +} + +static ssize_t write_pipe(ddsrt_socket_t fd, const void *buf, size_t len) +{ +#if _WIN32 + return send(fd, buf, (int)len, 0); +#else + return write(fd, buf, len); +#endif +} + +#define WRITE_FLAGS (DDSRT_WRITE) +#if DDSRT_HAVE_NETLINK_EVENT +# define READ_FLAGS (DDSRT_READ|DDSRT_NETLINK) +#else +# define READ_FLAGS (DDSRT_READ) +#endif + +#define FIXED DDSRT_FIXED_EVENTS + +static inline ddsrt_socket_t event_socket(ddsrt_event_t *event) +{ + ddsrt_socket_t fd; +#if DDSRT_HAVE_NETLINK_EVENT + if (event->flags & DDSRT_NETLINK) +# if _WIN32 + fd = event->source.netlink.pipefds[0]; +# else + fd = event->source.netlink.socketfd; +# endif + else +#endif + fd = event->source.socket.socketfd; + + assert(fd != DDSRT_INVALID_SOCKET); +#if !_WIN32 + assert(fd >= 0); + assert(fd < FD_SETSIZE); +#endif + + return fd; +} + +static inline bool lock_loop(ddsrt_loop_t *loop) +{ + uintptr_t tid = (uintptr_t)ddsrt_gettid(); + if (ddsrt_atomic_ldptr(&loop->owner) == tid) + return false; + ddsrt_mutex_lock(&loop->lock); + return true; +} + +static inline void unlock_loop(ddsrt_loop_t *loop, bool release) +{ + uintptr_t tid = (uintptr_t)ddsrt_gettid(); + assert(release || ddsrt_atomic_ldptr(&loop->owner) == tid); + if (!release) + return; // no-op + ddsrt_mutex_unlock(&loop->lock); +} + +static inline void wait_for_loop(ddsrt_loop_t *loop, bool release) +{ + char buf[1] = { '\0' }; + uintptr_t tid = (uintptr_t)ddsrt_gettid(); + assert(release || ddsrt_atomic_ldptr(&loop->owner) == tid); + if (!release) + return; // no-op + write_pipe(loop->pipefds[0], buf, sizeof(buf)); + ddsrt_cond_wait(&loop->condition, &loop->lock); +} + +#endif // EVENT_H diff --git a/src/ddsrt/src/event/epoll/event.c b/src/ddsrt/src/event/epoll/event.c new file mode 100644 index 0000000000..c65111147c --- /dev/null +++ b/src/ddsrt/src/event/epoll/event.c @@ -0,0 +1,294 @@ +/* + * Copyright(c) 2022 ADLINK Technology Limited and others + * + * This program and the accompanying materials are made available under the + * terms of the Eclipse Public License v. 2.0 which is available at + * http://www.eclipse.org/legal/epl-2.0, or the Eclipse Distribution License + * v. 1.0 which is available at + * http://www.eclipse.org/org/documents/edl-v10.php. + * + * SPDX-License-Identifier: EPL-2.0 OR BSD-3-Clause + */ + +// Windows offers three (four?) flavours of event handling mechanisms. +// 1. select (or WSAPoll) +// 2. WSAWaitForMultipleEvents (WaitForMultipleObjects) +// 3. I/O Completion Ports +// 4. Windows Registered I/O +// +// select is notoriously slow on Windows, which is not a big problem if used +// for two udp sockets (discovery+data), but is a problem if tcp connections +// are used. WSAPoll is broken (1) up to Windows 10 version 2004 (2), which was +// released in May of 2020. WSAWaitForMultipleEvents is more performant, which +// is why it is used for Windows CE already, but only allows for +// WSA_MAXIMUM_WAIT_EVENTS (MAXIMUM_WAIT_OBJECTS, or 64) sockets to be polled +// simultaneously, which again may be a problem if tcp connections are used. +// select is also limited to 64 sockets unless FD_SETSIZE is defined to a +// higher number before including winsock2.h (3). For high-performance I/O +// on Windows, OVERLAPPED sockets in combination with I/O Completion Ports is +// recommended, but the interface is completely different from interfaces like +// epoll and kqueue (4). Zero byte receives can of course be used (5,6,7), but +// it seems suboptimal to do so(?) Asynchronous I/O, which is offered by the +// likes of I/O Completion Ports and io_uring, seems worthwile, but the +// changes seem a bit to substantial at this point. +// +// OPTION #5: wepoll, epoll for windows (8) +// +// wepoll implements the epoll API for Windows using the Ancillart Function +// Driver, i.e. Winsock. wepoll was developed by one of the libuv authors (9) +// and is used by libevent (10,11) and ZeroMQ (12). +// +// 1: https://daniel.haxx.se/blog/2012/10/10/wsapoll-is-broken/ +// 2: https://docs.microsoft.com/en-us/windows/win32/api/winsock2/nf-winsock2-wsapoll +// 3: https://docs.microsoft.com/en-us/windows/win32/winsock/maximum-number-of-sockets-supported-2 +// 4: https://sudonull.com/post/14582-epoll-and-Windows-IO-Completion-Ports-The-Practical-Difference +// 5: https://stackoverflow.com/questions/49970454/zero-byte-receives-purpose-clarification +// 6: https://stackoverflow.com/questions/10635976/iocp-notifications-without-bytes-copy +// 7: https://stackoverflow.com/questions/24434289/select-equivalence-in-i-o-completion-ports +// 8: https://github.com/piscisaureus/wepoll +// 9: https://news.ycombinator.com/item?id=15978372 +// 10: https://github.com/libevent/libevent/pull/1006 +// 11: https://libev.schmorp.narkive.com/tXCCS0na/better-windows-backend-using-wepoll +// 12: https://github.com/zeromq/libzmq/pull/3127 + +#include +#include +#include +#if !_WIN32 +# include +#endif + +#include "event.h" +#include "eventlist.h" +#include "dds/ddsrt/static_assert.h" + +dds_return_t +ddsrt_add_event(ddsrt_loop_t *loop, ddsrt_event_t *event) +{ + dds_return_t err; + ddsrt_socket_t fd = DDSRT_INVALID_SOCKET; + bool release = true; + struct epoll_event ev = { .events = 0u, { .ptr = event } }; + + assert(loop); + assert(event); + + if (event->loop) + return event->loop == loop ? DDS_RETCODE_OK : DDS_RETCODE_BAD_PARAMETER; + + fd = event_socket(event); + release = lock_loop(loop); + + if ((err = add_event(&loop->active, event, INT_MAX - 1))) + goto err_event; + if (event->flags & READ_FLAGS) + ev.events |= EPOLLIN; + if (event->flags & DDSRT_WRITE) + ev.events |= EPOLLOUT; + if (epoll_ctl(loop->epollfd, EPOLL_CTL_ADD, fd, &ev) == -1) + goto err_epoll; + + event->loop = loop; + unlock_loop(loop, release); + return DDS_RETCODE_OK; +err_epoll: + err = (errno == ENOMEM) + ? DDS_RETCODE_OUT_OF_RESOURCES : DDS_RETCODE_BAD_PARAMETER; + delete_event(&loop->active, event); +err_event: + unlock_loop(loop, release); + return err; +} + +dds_return_t +ddsrt_delete_event(ddsrt_loop_t *loop, ddsrt_event_t *event) +{ + dds_return_t err; + bool release = true; + uint64_t owner; + + assert(loop); + assert(event); + + if (event->loop != loop) + return DDS_RETCODE_BAD_PARAMETER; + + release = lock_loop(loop); + // remove descriptor from epoll instance immediately to avoid + // having to retrieve the socket from possibly freed memory + ddsrt_socket_t fd = ddsrt_event_socket(event); + if (epoll_ctl(loop->epollfd, EPOLL_CTL_DEL, fd, NULL) == -1) + goto err_epoll; + + if ((owner = ddsrt_atomic_ldptr(&loop->owner))) { + if ((err = add_event(&loop->cancelled, event, INT_MAX - 1))) + goto err_event; + wait_for_loop(loop, release); + } else { + delete_event(&loop->active, event); + } + + event->loop = NULL; + unlock_loop(loop, release); + return DDS_RETCODE_OK; +err_epoll: + err = (errno == ENOMEM) + ? DDS_RETCODE_OUT_OF_RESOURCES : DDS_RETCODE_BAD_PARAMETER; +err_event: + unlock_loop(loop, release); + return err; +} + +#if !_WIN32 +# define epoll_close(x) close(x) +#endif + +dds_return_t +ddsrt_create_loop(ddsrt_loop_t *loop) +{ + struct eventlist *evset; + ddsrt_socket_t pipefds[2]; + ddsrt_epoll_t epollfd = DDSRT_INVALID_EPOLL; + int flags = 0; + struct epoll_event ev = { .events = EPOLLIN, { .ptr = NULL } }; + +#if !_WIN32 + flags = EPOLL_CLOEXEC; +#endif + + assert(loop); + + DDSRT_STATIC_ASSERT(sizeof(loop->ready) == sizeof(struct eventlist)); + if ((epollfd = epoll_create1(flags)) == DDSRT_INVALID_EPOLL) + goto err_epoll; + ev.data.ptr = loop; + if (open_pipe(pipefds) == -1) + goto err_pipe; + if (epoll_ctl(epollfd, EPOLL_CTL_ADD, pipefds[0], &ev) == -1) + goto err_epoll_ctl; + ddsrt_atomic_st32(&loop->terminate, 0u); + ddsrt_atomic_stptr(&loop->owner, 0u); + loop->epollfd = epollfd; + loop->pipefds[0] = pipefds[0]; + loop->pipefds[1] = pipefds[1]; + evset = (struct eventlist *)&loop->ready; + evset->size = DDSRT_EMBEDDED_EVENTS; + create_eventlist(&loop->active); + create_eventlist(&loop->cancelled); + ddsrt_mutex_init(&loop->lock); + ddsrt_cond_init(&loop->condition); + return DDS_RETCODE_OK; +err_epoll_ctl: +err_pipe: + epoll_close(epollfd); +err_epoll: + return DDS_RETCODE_OUT_OF_RESOURCES; +} + +void +ddsrt_destroy_loop(ddsrt_loop_t *loop) +{ + struct eventlist *evset; + + assert(loop); + assert(ddsrt_atomic_ldptr(&loop->owner) == 0u); + close_pipe(loop->pipefds); + ddsrt_cond_destroy(&loop->condition); + ddsrt_mutex_destroy(&loop->lock); + evset = (struct eventlist *)&loop->ready; + if (evset->size > DDSRT_EMBEDDED_EVENTS) + ddsrt_free(evset->events.dynamic); + destroy_eventlist(&loop->active); + destroy_eventlist(&loop->cancelled); +} + +static inline struct epoll_event * +fit_eventlist(struct eventlist *eventlist, size_t size) +{ + static const size_t embedded = DDSRT_EMBEDDED_EVENTS; + struct epoll_event *events; + + assert(eventlist); + assert(eventlist->size >= embedded); + + if (size < embedded) { + if (eventlist->size > embedded) + ddsrt_free(eventlist->events.dynamic); + eventlist->size = embedded; + return eventlist->events.embedded; + } else if (eventlist->size / embedded != size / embedded + 1) { + events = eventlist->size == embedded ? + NULL : eventlist->events.dynamic; + size = (size / embedded + 1) * embedded; + if (!(events = ddsrt_realloc(events, size * sizeof(*events)))) + return eventlist->size == embedded + ? eventlist->events.embedded : eventlist->events.dynamic; + eventlist->size = size; + eventlist->events.dynamic = events; + return eventlist->events.dynamic; + } + return eventlist->size == embedded + ? eventlist->events.embedded : eventlist->events.dynamic; +} + +static void delete_cancelled(ddsrt_loop_t *loop) +{ + if (!loop->cancelled.count) + return; + ddsrt_event_t **cancelled = get_events(&loop->cancelled); + for (size_t i=0; i < loop->cancelled.count; i++) + delete_event(&loop->active, cancelled[i]); + destroy_eventlist(&loop->cancelled); + // notify (potentially) blocking threads + ddsrt_cond_broadcast(&loop->condition); +} + +dds_return_t +ddsrt_run_loop(ddsrt_loop_t *loop, uint32_t flags, void *user_data) +{ + dds_return_t err = DDS_RETCODE_OK; + + assert(loop); + + ddsrt_mutex_lock(&loop->lock); + assert(!ddsrt_atomic_ldptr(&loop->owner)); + ddsrt_atomic_stptr(&loop->owner, (uintptr_t)ddsrt_gettid()); + struct eventlist *list = (struct eventlist *)&loop->ready; + + do { + delete_cancelled(loop); + assert(loop->active.count + 1 <= INT_MAX); + struct epoll_event *events = fit_eventlist( + (struct eventlist *)&loop->ready, loop->active.count + 1); + ddsrt_mutex_unlock(&loop->lock); + int ready = epoll_wait(loop->epollfd, events, (int)list->size, -1); + ddsrt_mutex_lock(&loop->lock); + + assert(ready >= 0 || errno == EINTR); + if (ready == -1) + continue; + + for (int i=0; i < ready && !loop->cancelled.count; i++) { + if (events[i].data.ptr == (void*)loop) { + char buf[1]; + read_pipe(loop->pipefds[0], buf, sizeof(buf)); + break; + } else { + uint32_t flags = 0u; + if (events[i].events & EPOLLIN) + flags |= DDSRT_READ; + if (events[i].events & EPOLLOUT) + flags |= DDSRT_WRITE; + if ((err = ddsrt_handle_event(events[i].data.ptr, flags, user_data))) + goto err_event; + } + } + } while (!ddsrt_atomic_ld32(&loop->terminate) && !(flags & DDSRT_RUN_ONCE)); + +err_event: + ddsrt_atomic_stptr(&loop->owner, 0u); + ddsrt_atomic_st32(&loop->terminate, 0u); + delete_cancelled(loop); + ddsrt_mutex_unlock(&loop->lock); + return err; +} diff --git a/src/ddsrt/src/event/epoll/eventlist.h b/src/ddsrt/src/event/epoll/eventlist.h new file mode 100644 index 0000000000..2257f19901 --- /dev/null +++ b/src/ddsrt/src/event/epoll/eventlist.h @@ -0,0 +1,31 @@ +/* + * Copyright(c) 2022 ADLINK Technology Limited and others + * + * This program and the accompanying materials are made available under the + * terms of the Eclipse Public License v. 2.0 which is available at + * http://www.eclipse.org/legal/epl-2.0, or the Eclipse Distribution License + * v. 1.0 which is available at + * http://www.eclipse.org/org/documents/edl-v10.php. + * + * SPDX-License-Identifier: EPL-2.0 OR BSD-3-Clause + */ +#ifndef EVENTLIST_H +#define EVENTLIST_H + +#include + +#if _WIN32 +# include "wepoll.h" +#else +# include +#endif + +struct eventlist { + size_t size; + struct { + struct epoll_event embedded[ DDSRT_EMBEDDED_EVENTS ]; + struct epoll_event *dynamic; + } events; +}; + +#endif // EVENTSET_H diff --git a/src/ddsrt/src/event/kqueue/event.c b/src/ddsrt/src/event/kqueue/event.c new file mode 100644 index 0000000000..60be52d133 --- /dev/null +++ b/src/ddsrt/src/event/kqueue/event.c @@ -0,0 +1,264 @@ +/* + * Copyright(c) 2022 ADLINK Technology Limited and others + * + * This program and the accompanying materials are made available under the + * terms of the Eclipse Public License v. 2.0 which is available at + * http://www.eclipse.org/legal/epl-2.0, or the Eclipse Distribution License + * v. 1.0 which is available at + * http://www.eclipse.org/org/documents/edl-v10.php. + * + * SPDX-License-Identifier: EPL-2.0 OR BSD-3-Clause + */ +#include +#include +#include + +#include "event.h" +#include "eventlist.h" +#include "dds/ddsrt/static_assert.h" + +dds_return_t +ddsrt_add_event(ddsrt_loop_t *loop, ddsrt_event_t *event) +{ + int err, fd; + bool release; + struct kevent ev; + unsigned short flags = 0u; + + assert(loop); + assert(loop->kqueuefd != -1); + assert(event); + + if (event->loop) + return event->loop == loop ? DDS_RETCODE_OK : DDS_RETCODE_BAD_PARAMETER; + + fd = event_socket(event); + release = lock_loop(loop); + + if (add_event(&loop->active, event, INT_MAX)) + goto err_event; + if (event->flags & READ_FLAGS) + flags |= EVFILT_READ; + if (event->flags & WRITE_FLAGS) + flags |= EVFILT_WRITE; + + EV_SET(&ev, fd, flags, EV_ADD, 0, 0, event); + do { + err = kevent(loop->kqueuefd, &ev, 1, NULL, 0, NULL); + } while (err == -1 && errno == EINTR); + if (err == -1) + goto err_kevent; + + event->loop = loop; + unlock_loop(loop, release); + return DDS_RETCODE_OK; +err_kevent: + delete_event(&loop->active, event); +err_event: + unlock_loop(loop, release); + return DDS_RETCODE_OUT_OF_RESOURCES; +} + +dds_return_t +ddsrt_delete_event(ddsrt_loop_t *loop, ddsrt_event_t *event) +{ + dds_return_t err; + bool release = true; + uint64_t owner; + + assert(loop); + assert(loop->kqueuefd != -1); + assert(event); + + if (event->loop != loop) + return DDS_RETCODE_BAD_PARAMETER; + + release = lock_loop(loop); + int fd = event_socket(event); + struct kevent ev; + EV_SET(&ev, fd, 0, EV_DELETE, 0, 0, NULL); + do { + if (kevent(loop->kqueuefd, &ev, 1, NULL, 0, NULL) == 0) + break; + if (errno != EINTR) + goto err_kevent; + } while (1); + + if ((owner = ddsrt_atomic_ldptr(&loop->owner))) { + if ((err = add_event(&loop->cancelled, event, INT_MAX))) + goto err_event; + wait_for_loop(loop, release); + } else { + delete_event(&loop->active, event); + } + + event->loop = NULL; + unlock_loop(loop, release); + return DDS_RETCODE_OK; +err_kevent: + err = (errno == ENOMEM) + ? DDS_RETCODE_OUT_OF_RESOURCES : DDS_RETCODE_BAD_PARAMETER; +err_event: + unlock_loop(loop, release); + return err; +} + +dds_return_t +ddsrt_create_loop(ddsrt_loop_t *loop) +{ + int pipefds[2] = { -1, -1 }; + int kqueuefd = -1; + struct kevent ev; + struct eventlist *evset; + + assert(loop); + + DDSRT_STATIC_ASSERT(sizeof(loop->ready) == sizeof(struct eventlist)); + if ((kqueuefd = kqueue()) == -1) + goto err_kqueue; + if (fcntl(kqueuefd, F_SETFD, fcntl(kqueuefd, F_GETFD)|FD_CLOEXEC) == -1) + goto err_fcntl; + if (open_pipe(pipefds) == -1) + goto err_pipe; + EV_SET(&ev, pipefds[0], EVFILT_READ, EV_ADD, 0, 0, loop); + if (kevent(kqueuefd, &ev, 1, NULL, 0, NULL) == -1) + goto err_kevent; + ddsrt_atomic_st32(&loop->terminate, 0u); + ddsrt_atomic_stptr(&loop->owner, 0u); + loop->pipefds[0] = pipefds[0]; + loop->pipefds[1] = pipefds[1]; + evset = (struct eventlist *)&loop->ready; + evset->size = DDSRT_EMBEDDED_EVENTS; + loop->kqueuefd = kqueuefd; + create_eventlist(&loop->active); + create_eventlist(&loop->cancelled); + ddsrt_mutex_init(&loop->lock); + ddsrt_cond_init(&loop->condition); + return DDS_RETCODE_OK; +err_kevent: + close(pipefds[0]); + close(pipefds[1]); +err_pipe: +err_fcntl: + close(kqueuefd); +err_kqueue: + return DDS_RETCODE_OUT_OF_RESOURCES; +} + +static const size_t embedded = DDSRT_EMBEDDED_EVENTS; + +void +ddsrt_destroy_loop(ddsrt_loop_t *loop) +{ + struct eventlist *eventlist; + + if (!loop) + return; + assert(ddsrt_atomic_ldptr(&loop->owner) == 0u); + eventlist = (struct eventlist *)&loop->ready; + if (eventlist->size > embedded) + ddsrt_free(eventlist->events.dynamic); + destroy_eventlist(&loop->active); + destroy_eventlist(&loop->cancelled); + ddsrt_mutex_destroy(&loop->lock); + ddsrt_cond_destroy(&loop->condition); +} + +static inline struct kevent * +fit_eventlist(struct eventlist *eventlist, size_t size) +{ + struct kevent *events; + + assert(eventlist); + assert(eventlist->size >= embedded); + + if (size < embedded) { + if (eventlist->size > embedded) + ddsrt_free(eventlist->events.dynamic); + eventlist->size = embedded; + return eventlist->events.embedded; + } else if (eventlist->size / embedded != size / embedded + 1) { + events = eventlist->size == embedded ? + NULL : eventlist->events.dynamic; + size = (size / embedded + 1) * embedded; + if (!(events = ddsrt_realloc(events, size * sizeof(*events)))) + return eventlist->size == embedded ? + eventlist->events.embedded : eventlist->events.dynamic; + eventlist->size = size; + eventlist->events.dynamic = events; + return eventlist->events.dynamic; + } + + return eventlist->size > embedded + ? eventlist->events.dynamic : eventlist->events.embedded; +} + +static void delete_cancelled(ddsrt_loop_t *loop) +{ + if (!loop->cancelled.count) + return; + ddsrt_event_t **cancelled = get_events(&loop->cancelled); + for (size_t i=0; i < loop->cancelled.count; i++) + delete_event(&loop->active, cancelled[i]); + destroy_eventlist(&loop->cancelled); + ddsrt_cond_broadcast(&loop->condition); +} + +dds_return_t +ddsrt_run_loop(ddsrt_loop_t *loop, uint32_t flags, void *user_data) +{ + dds_return_t err = DDS_RETCODE_OK; + + assert(loop); + + ddsrt_mutex_lock(&loop->lock); + assert(!ddsrt_atomic_ldptr(&loop->owner)); + ddsrt_atomic_stptr(&loop->owner, (uintptr_t)ddsrt_gettid()); + struct eventlist *list = (struct eventlist *)&loop->ready; + + do { + delete_cancelled(loop); + int ready; + struct kevent *events = fit_eventlist(list, loop->active.count + 1); + ddsrt_mutex_unlock(&loop->lock); + do { + ready = kevent(loop->kqueuefd, NULL, 0, events, (int)list->size, NULL); + if (ready >= 0) + break; + if (errno == EINTR) + continue; + err = (errno == ENOMEM) + ? DDS_RETCODE_OUT_OF_RESOURCES : DDS_RETCODE_ERROR; + goto err_kevent; + } while (1); + ddsrt_mutex_lock(&loop->lock); + + assert(ready >= 0 || errno == EINTR); + if (ready == -1) + continue; + + for (int i=0; i < ready && !loop->cancelled.count; i++) { + if (events[i].udata == (void*)loop) { + char buf[1]; + read_pipe(loop->pipefds[0], buf, sizeof(buf)); + break; + } else { + uint32_t evflags = 0u; + if (events[i].flags & EVFILT_READ) + evflags |= DDSRT_READ; + if (events[i].flags & EVFILT_WRITE) + evflags |= DDSRT_WRITE; + if ((err = ddsrt_handle_event(events[i].udata, evflags, user_data))) + goto err_event; + } + } + } while (!ddsrt_atomic_ld32(&loop->terminate) && !(flags & DDSRT_RUN_ONCE)); + +err_kevent: +err_event: + ddsrt_atomic_stptr(&loop->owner, 0u); + ddsrt_atomic_st32(&loop->terminate, 0u); + delete_cancelled(loop); + ddsrt_mutex_unlock(&loop->lock); + return err; +} diff --git a/src/ddsrt/src/event/kqueue/eventlist.h b/src/ddsrt/src/event/kqueue/eventlist.h new file mode 100644 index 0000000000..4130d3bf01 --- /dev/null +++ b/src/ddsrt/src/event/kqueue/eventlist.h @@ -0,0 +1,27 @@ +/* + * Copyright(c) 2022 ADLINK Technology Limited and others + * + * This program and the accompanying materials are made available under the + * terms of the Eclipse Public License v. 2.0 which is available at + * http://www.eclipse.org/legal/epl-2.0, or the Eclipse Distribution License + * v. 1.0 which is available at + * http://www.eclipse.org/org/documents/edl-v10.php. + * + * SPDX-License-Identifier: EPL-2.0 OR BSD-3-Clause + */ +#ifndef EVENTLIST_H +#define EVENTLIST_H + +#include +#include +#include + +struct eventlist { + size_t size; + struct { + struct kevent embedded[ DDSRT_EMBEDDED_EVENTS ]; + struct kevent *dynamic; + } events; +}; + +#endif // EVENTSET_H diff --git a/src/ddsrt/src/event/select/event.c b/src/ddsrt/src/event/select/event.c new file mode 100644 index 0000000000..aead18aaf3 --- /dev/null +++ b/src/ddsrt/src/event/select/event.c @@ -0,0 +1,243 @@ +/* + * Copyright(c) 2022 ADLINK Technology Limited and others + * + * This program and the accompanying materials are made available under the + * terms of the Eclipse Public License v. 2.0 which is available at + * http://www.eclipse.org/legal/epl-2.0, or the Eclipse Distribution License + * v. 1.0 which is available at + * http://www.eclipse.org/org/documents/edl-v10.php. + * + * SPDX-License-Identifier: EPL-2.0 OR BSD-3-Clause + */ +#include +#include + +#include "dds/ddsrt/static_assert.h" + +#include "event.h" +#include "eventlist.h" + +dds_return_t +ddsrt_add_event(ddsrt_loop_t *loop, ddsrt_event_t *event) +{ + dds_return_t err = DDS_RETCODE_OK; + ddsrt_socket_t fd; + bool release = true; + + assert(loop); + assert(event); + + if (event->loop) + return event->loop == loop ? DDS_RETCODE_OK : DDS_RETCODE_BAD_PARAMETER; + + fd = event_socket(event); +#if !_WIN32 + assert(fd >= 0); + assert(fd < FD_SETSIZE); +#endif + release = lock_loop(loop); + + if ((err = add_event(&loop->active, event, FD_SETSIZE))) + goto err_event; + if (event->flags & READ_FLAGS) + FD_SET(fd, &loop->readfds); + if (event->flags & WRITE_FLAGS) + FD_SET(fd, &loop->writefds); + +#if !_WIN32 + if (loop->fdmax_plus_1 < fd) + loop->fdmax_plus_1 = fd + 1; +#endif + event->loop = loop; +err_event: + unlock_loop(loop, release); + return err; +} + +#if !_WIN32 +static inline ddsrt_socket_t greatest_fd(ddsrt_loop_t *loop) +{ + assert(loop); + assert(loop->pipefds[0] != DDSRT_INVALID_SOCKET); + if (loop->active.count == 0) + return loop->pipefds[0]; + ddsrt_socket_t fdmax = loop->pipefds[0]; + ddsrt_event_t **events = get_events(&loop->active); + for (size_t i = 0; i < loop->active.count; i++) { + ddsrt_socket_t fd = event_socket(events[i]); + if (fd > fdmax) + fdmax = fd; + } + assert(fdmax >= 0); + assert(fdmax < FD_SETSIZE); + return fdmax; +} +#endif + +dds_return_t +ddsrt_delete_event(ddsrt_loop_t *loop, ddsrt_event_t *event) +{ + dds_return_t err = DDS_RETCODE_OK; + bool release = true; + + assert(loop); + assert(event); + + if (event->loop != loop) + return DDS_RETCODE_BAD_PARAMETER; + + release = lock_loop(loop); + ddsrt_socket_t fd = event_socket(event); + if (event->flags & READ_FLAGS) + FD_CLR(fd, &loop->readfds); + if (event->flags & WRITE_FLAGS) + FD_CLR(fd, &loop->writefds); +#if !_WIN32 + if (fd == loop->fdmax_plus_1) + loop->fdmax_plus_1 = greatest_fd(loop) + 1; +#endif + + if (ddsrt_atomic_ldptr(&loop->owner) != 0u) { + if ((err = add_event(&loop->cancelled, event, FD_SETSIZE))) + goto err_event; + wait_for_loop(loop, release); + } else { + delete_event(&loop->active, event); + } + + event->loop = NULL; +err_event: + unlock_loop(loop, release); + return err; +} + +dds_return_t +ddsrt_create_loop(ddsrt_loop_t *loop) +{ + ddsrt_socket_t pipefds[2]; + + assert(loop); + + DDSRT_STATIC_ASSERT(sizeof(loop->ready) == sizeof(struct eventlist)); + if (open_pipe(pipefds)) + return DDS_RETCODE_OUT_OF_RESOURCES; + ddsrt_atomic_st32(&loop->terminate, 0u); + loop->pipefds[0] = pipefds[0]; + loop->pipefds[1] = pipefds[1]; + ddsrt_atomic_stptr(&loop->owner, 0u); + FD_ZERO(&loop->readfds); + FD_SET(loop->pipefds[0], &loop->readfds); + FD_ZERO(&loop->writefds); + create_eventlist(&loop->active); + create_eventlist(&loop->cancelled); +#if _WIN32 + loop->fdmax_plus_1 = FD_SETSIZE; +#else + loop->fdmax_plus_1 = loop->pipefds[0] + 1; +#endif + ddsrt_mutex_init(&loop->lock); + ddsrt_cond_init(&loop->condition); + return DDS_RETCODE_OK; +} + +void +ddsrt_destroy_loop(ddsrt_loop_t *loop) +{ + if (!loop) + return; + assert(ddsrt_atomic_ldptr(&loop->owner) == 0u); + close_pipe(loop->pipefds); + ddsrt_cond_destroy(&loop->condition); + ddsrt_mutex_destroy(&loop->lock); + destroy_eventlist(&loop->active); + destroy_eventlist(&loop->cancelled); +} + +static void delete_cancelled(ddsrt_loop_t *loop) +{ + if (!loop->cancelled.count) + return; + ddsrt_event_t **events = get_events(&loop->cancelled); + for (size_t i=0; i < loop->cancelled.count; i++) + delete_event(&loop->active, events[i]); + destroy_eventlist(&loop->cancelled); + // notify (potentially) blocking threads + ddsrt_cond_broadcast(&loop->condition); +} + +dds_return_t +ddsrt_run_loop(ddsrt_loop_t *loop, uint32_t flags, void *user_data) +{ + dds_return_t err = DDS_RETCODE_OK; + + assert(loop); + assert(!ddsrt_atomic_ldptr(&loop->owner)); + + struct eventlist *evset = (struct eventlist *)&loop->ready; + + ddsrt_mutex_lock(&loop->lock); + ddsrt_atomic_stptr(&loop->owner, (uintptr_t)ddsrt_gettid()); + + do { + delete_cancelled(loop); +#if !_WIN32 + evset->fdmax_plus_1 = loop->fdmax_plus_1; +#endif + memcpy(&evset->readfds, &loop->readfds, sizeof(evset->readfds)); + memcpy(&evset->writefds, &loop->writefds, sizeof(evset->writefds)); + + ddsrt_mutex_unlock(&loop->lock); + int32_t ready = ddsrt_select( + evset->fdmax_plus_1, &evset->readfds, &evset->writefds, NULL, DDS_INFINITY); + ddsrt_mutex_lock(&loop->lock); + + if (ready < 0) + switch (ready) { + case DDS_RETCODE_TRY_AGAIN: + case DDS_RETCODE_INTERRUPTED: + case DDS_RETCODE_TIMEOUT: + ready = 0; + break; + default: + err = ready; + goto err_select; + } + +#if !LWIP_SOCKET + // pipe can safely be read, it is not an event + if (ready && FD_ISSET(loop->pipefds[0], &evset->readfds)) { + char buf[1]; + (void)read_pipe(loop->pipefds[0], buf, sizeof(buf)); + ready = 0; // continue with next iteration + } +#endif + + if (ready) { + ddsrt_event_t **events = get_events(&loop->active); + for (size_t i=0; i < loop->active.count && ready > 0; i++) { + // callback may have cancelled one or more events + if (loop->cancelled.count) + break; + ddsrt_socket_t fd = event_socket(events[i]); + uint32_t evflags = 0u; + if (FD_ISSET(fd, &evset->readfds)) + evflags |= DDSRT_READ; + if (FD_ISSET(fd, &evset->writefds)) + evflags |= DDSRT_WRITE; + if (!evflags) + continue; + ready--; + if ((err = ddsrt_handle_event(events[i], evflags, user_data))) + goto err_handle; + } + } + } while (!ddsrt_atomic_ld32(&loop->terminate) && !(flags & DDSRT_RUN_ONCE)); + +err_handle: +err_select: + ddsrt_atomic_stptr(&loop->owner, 0u); + ddsrt_atomic_st32(&loop->terminate, 0u); + delete_cancelled(loop); + ddsrt_mutex_unlock(&loop->lock); + return err; +} diff --git a/src/ddsrt/src/event/select/eventlist.h b/src/ddsrt/src/event/select/eventlist.h new file mode 100644 index 0000000000..6d64558268 --- /dev/null +++ b/src/ddsrt/src/event/select/eventlist.h @@ -0,0 +1,29 @@ +/* + * Copyright(c) 2022 ADLINK Technology Limited and others + * + * This program and the accompanying materials are made available under the + * terms of the Eclipse Public License v. 2.0 which is available at + * http://www.eclipse.org/legal/epl-2.0, or the Eclipse Distribution License + * v. 1.0 which is available at + * http://www.eclipse.org/org/documents/edl-v10.php. + * + * SPDX-License-Identifier: EPL-2.0 OR BSD-3-Clause + */ +#ifndef EVENTLIST_H +#define EVENTLIST_H + +#if !_WIN32 +#include +#else +#include +#endif + +struct eventlist { +#if !_WIN32 + int fdmax_plus_1; +#endif + fd_set readfds; + fd_set writefds; +}; + +#endif diff --git a/src/ddsrt/src/io.c b/src/ddsrt/src/io.c index 9a09112e06..568536b55c 100644 --- a/src/ddsrt/src/io.c +++ b/src/ddsrt/src/io.c @@ -78,6 +78,7 @@ ddsrt_asprintf( } #if defined(_MSC_VER) && (_MSC_VER < 1900) +// FIXME: this should really be in compat int snprintf( char *str, diff --git a/src/ddsrt/tests/CMakeLists.txt b/src/ddsrt/tests/CMakeLists.txt index eab4ec7469..c80d853939 100644 --- a/src/ddsrt/tests/CMakeLists.txt +++ b/src/ddsrt/tests/CMakeLists.txt @@ -27,7 +27,8 @@ set(sources retcode.c strlcpy.c socket.c - select.c) + select.c + eventlist.c) if(WITH_FREERTOS) list(APPEND sources tasklist.c) diff --git a/src/ddsrt/tests/eventlist.c b/src/ddsrt/tests/eventlist.c new file mode 100644 index 0000000000..a965ac7010 --- /dev/null +++ b/src/ddsrt/tests/eventlist.c @@ -0,0 +1,335 @@ +/* + * Copyright(c) 2022 ADLINK Technology Limited and others + * + * This program and the accompanying materials are made available under the + * terms of the Eclipse Public License v. 2.0 which is available at + * http://www.eclipse.org/legal/epl-2.0, or the Eclipse Distribution License + * v. 1.0 which is available at + * http://www.eclipse.org/org/documents/edl-v10.php. + * + * SPDX-License-Identifier: EPL-2.0 OR BSD-3-Clause + */ +#include + +#include "event.h" + +#include "CUnit/Test.h" + +static void assert_list(ddsrt_eventlist_t *list) +{ + ddsrt_event_t **events; + static const size_t embedded = + sizeof(list->events.embedded)/sizeof(list->events.embedded[0]); + + CU_ASSERT_FATAL(list->count <= list->length); + CU_ASSERT_FATAL((list->count <= 1) == (list->start == list->end)); + if (list->count <= embedded) { + CU_ASSERT_EQUAL_FATAL(list->length, embedded); + events = get_events(list); + CU_ASSERT_PTR_EQUAL_FATAL(events, list->events.embedded); + } else { + CU_ASSERT_EQUAL_FATAL(list->length % embedded, 0); + events = get_events(list); + CU_ASSERT_PTR_EQUAL_FATAL(events, list->events.dynamic); + } +} + +static void create_list(ddsrt_eventlist_t *list) +{ + create_eventlist(list); + assert_list(list); +} + +static void destroy_list(ddsrt_eventlist_t *list) +{ + assert_list(list); + destroy_eventlist(list); +} + +#define ARRAY_SIZE(array) (sizeof(array)/sizeof(array[0])) + +CU_Test(ddsrt_event, empty_list_noops) +{ + ddsrt_event_t **events; + ddsrt_eventlist_t list; + static const size_t embedded = ARRAY_SIZE(list.events.embedded); + + create_list(&list); + // ensure pack is a noop + pack_eventlist(&list); + assert_list(&list); + // ensure left_trim is a noop + left_trim(&list); + assert_list(&list); + // ensure right_trim is a noop + right_trim(&list); + assert_list(&list); + + events = get_events(&list); + for (size_t i=0; i < embedded; i++) { + CU_ASSERT_PTR_NULL(events[i]); + } + + destroy_list(&list); +} + +static ddsrt_event_t *fill(ddsrt_eventlist_t *list, int length, int shift) +{ + dds_return_t ret; + assert(length >= 0); + ddsrt_event_t *events = ddsrt_malloc((size_t)length * sizeof(*events)); + + CU_ASSERT_PTR_NOT_NULL_FATAL(events); + memset(events, 0, (size_t)length * sizeof(*events)); + for (int i=0; i < length; i++) { + ret = add_event(list, &events[i], INT_MAX); + CU_ASSERT_EQUAL_FATAL(ret, DDS_RETCODE_OK); + assert_list(list); + } + + CU_ASSERT_EQUAL_FATAL(list->length, length); + CU_ASSERT_EQUAL_FATAL(list->count, length); + CU_ASSERT_EQUAL_FATAL(list->start, 0); + CU_ASSERT_EQUAL_FATAL(list->end, length - 1); + + if (shift > 0) { + ddsrt_event_t **buf = ddsrt_malloc((size_t)length * sizeof(*buf)); + ddsrt_event_t **ptr = get_events(list); + CU_ASSERT_PTR_NOT_NULL_FATAL(buf); + memmove(buf, ptr + (length - shift), (size_t)shift * sizeof(*buf)); + memmove(buf + shift, ptr, (size_t)(length - shift) * sizeof(*buf)); + memmove(ptr, buf, (size_t)length * sizeof(*buf)); + list->start += (size_t)shift; + list->end = (size_t)shift - 1; + ddsrt_free(buf); + } + + return events; +} + +#define EMBEDDED (DDSRT_EMBEDDED_EVENTS) +#define DYNAMIC (EMBEDDED*2) + +CU_Test(ddsrt_eventlist, resize) +{ + ddsrt_event_t events[EMBEDDED*4]; + ddsrt_eventlist_t list; + int count = 0; + + create_list(&list); + + // grow one past embedded. buffer should grow by embedded + for (; count < EMBEDDED+1; count++) + add_event(&list, &events[count], INT_MAX); + assert_list(&list); + CU_ASSERT_EQUAL_FATAL(list.length, (EMBEDDED*2)); + CU_ASSERT_EQUAL_FATAL(list.count, count); + CU_ASSERT_EQUAL_FATAL(list.start, 0); + CU_ASSERT_EQUAL_FATAL(list.end, EMBEDDED); + + // grow one past embedded*3.0 buffer should grow to embedded*4.0 + for (; count < (EMBEDDED*3)+1; count++) + add_event(&list, &events[count], INT_MAX); + assert_list(&list); + CU_ASSERT_EQUAL_FATAL(list.length, (EMBEDDED*4)); + CU_ASSERT_EQUAL_FATAL(list.count, count); + CU_ASSERT_EQUAL_FATAL(list.start, 0); + CU_ASSERT_EQUAL_FATAL(list.end, (EMBEDDED*3)); + + static const struct { + int count; + int expected; + } shrink[] = { + // shrink to embedded*3.0. buffer should not shrink + { (EMBEDDED*3), (EMBEDDED*4) }, + // shrink to embedded*2.0 - 1. buffer should shrink to embedded*2.0 + { (EMBEDDED*2) - 1, (EMBEDDED*2) }, + // shrink to embedded. buffer should shrink to embedded + { EMBEDDED, EMBEDDED }, + // shrink to embedded - 1. buffer should shrink to embedded + { EMBEDDED - 1, EMBEDDED } + }; + + for (size_t i=0, n=sizeof(shrink)/sizeof(shrink[0]); i < n; i++) { + for (; count > shrink[i].count; ) + delete_event(&list, &events[--count]); + assert_list(&list); + CU_ASSERT_EQUAL(list.length, shrink[i].expected); + } + + destroy_list(&list); +} + +CU_Test(ddsrt_eventlist, resize_wrapped) +{ + ddsrt_event_t *events; + ddsrt_eventlist_t list; + int count = (EMBEDDED*4); + + create_list(&list); + events = fill(&list, count, 5); + assert_list(&list); + + // FIXME: the fill function requires us to fill to a multiple of EMBEDDED + for (int n=(EMBEDDED*3)+1; count > n; ) + delete_event(&list, &events[--count]); + + static const struct { + int count; + int expected; + } shrink[] = { + // shrink to embedded*3.0. buffer should not shrink + { (EMBEDDED*3), (EMBEDDED*4) }, + // shrink to embedded*2.0 - 1. buffer should shrink to embedded*2.0 + { (EMBEDDED*2) - 1, (EMBEDDED*2) }, + // shrink to embedded. buffer should shrink to embedded + { EMBEDDED, EMBEDDED }, + // shrink to embedded - 1. buffer should shrink to embedded + { EMBEDDED - 1, EMBEDDED } + }; + + for (size_t i=0, n=sizeof(shrink)/sizeof(shrink[0]); i < n; i++) { + for (; count > shrink[i].count; ) { + dds_return_t ret = delete_event(&list, &events[--count]); + CU_ASSERT_EQUAL(ret, DDS_RETCODE_OK); + } + //assert_list(&list); + CU_ASSERT_EQUAL(list.length, shrink[i].expected); + } + + ddsrt_free(events); + destroy_list(&list); +} + +CU_Test(ddsrt_eventlist, remove_from_start) +{ + static const struct { + int length; + int start; + int end; + int remove; + } tests[] = { + // embedded list + { EMBEDDED, 0, EMBEDDED - 1, 1 }, + { EMBEDDED, 0, EMBEDDED - 1, 2 }, + // embedded list with wrap around + { EMBEDDED, 2, 1, 1 }, + { EMBEDDED, 2, 1, 2 }, + { EMBEDDED, 2, 1, 3 }, + // dynamic list + { DYNAMIC, 0, DYNAMIC - 1, 1 }, + { DYNAMIC, 0, DYNAMIC - 1, 2 }, + // dynamic list with wrap around + { DYNAMIC, 2, 1, 1 }, + { DYNAMIC, 2, 1, 2 }, + { DYNAMIC, 2, 1, 3 }, + }; + + for (size_t i=0, n=sizeof(tests)/sizeof(tests[0]); i < n; i++) { + ddsrt_event_t *events; + ddsrt_eventlist_t list; + int count; + + create_list(&list); + events = fill(&list, tests[i].length, tests[i].start); + CU_ASSERT_PTR_NOT_NULL_FATAL(events); + + CU_ASSERT_EQUAL_FATAL(list.start, tests[i].start); + CU_ASSERT_EQUAL_FATAL(list.end, tests[i].end); + count = (int)list.count; + for (int delete=tests[i].remove; delete > 0; ) { + count--; + delete_event(&list, &events[ --delete ]); + assert_list(&list); + CU_ASSERT_EQUAL_FATAL(list.length, tests[i].length); + CU_ASSERT_EQUAL_FATAL(list.count, count); + if (delete > 0) + CU_ASSERT_EQUAL_FATAL(list.start, tests[i].start); + } + if (tests[i].start >= tests[i].length - tests[i].remove) { + int start = tests[i].start - (tests[i].length - tests[i].remove); + CU_ASSERT_EQUAL_FATAL(list.start, start); + } else { + int start = tests[i].start + tests[i].remove; + CU_ASSERT_EQUAL_FATAL(list.start, start); + } + for (int add = 0; add < tests[i].remove; add++) { + add_event(&list, &events[ add ], INT_MAX); + assert_list(&list); + CU_ASSERT_EQUAL_FATAL(list.count, (tests[i].length - (tests[i].remove - (add + 1)))); + } + if (tests[i].end >= tests[i].length - tests[i].remove) { + int end = tests[i].end - (tests[i].length - tests[i].remove); + CU_ASSERT_EQUAL(list.end, end); + } else { + int end = tests[i].end + tests[i].remove; + CU_ASSERT_EQUAL(list.end, end); + } + ddsrt_free(events); + destroy_list(&list); + } +} + +CU_Test(ddsrt_eventlist, remove_from_end) +{ + static const struct { + int length; + int shift; + int end; + int remove; + } tests[] = { + // embedded list + { EMBEDDED, 0, EMBEDDED - 1, -1 }, + { EMBEDDED, 0, EMBEDDED - 1, -2 }, + // embedded list with wrap around + { EMBEDDED, 2, 1, -1 }, + { EMBEDDED, 2, 1, -2 }, + { EMBEDDED, 2, 1, -3 }, + // dynamic list + { DYNAMIC, 0, DYNAMIC - 1, -1 }, + { DYNAMIC, 0, DYNAMIC - 1, -2 }, + // dynamic list with wrap around + { DYNAMIC, 2, 1, -1 }, + { DYNAMIC, 2, 1, -2 }, + { DYNAMIC, 2, 1, -3 }, + }; + + for (size_t i=0, n=sizeof(tests)/sizeof(tests[0]); i < n; i++) + { + ddsrt_event_t *events; + ddsrt_eventlist_t list; + size_t cnt; + + create_list(&list); + events = fill(&list, tests[i].length, tests[i].shift); + CU_ASSERT_PTR_NOT_NULL_FATAL(events); + cnt = list.count; + + CU_ASSERT_EQUAL_FATAL(list.start, tests[i].shift); + CU_ASSERT_EQUAL_FATAL(list.end, tests[i].end); + for (int del=tests[i].remove; del < 0; del++) { + cnt--; + delete_event(&list, &events[ tests[i].length + del ]); + assert_list(&list); + CU_ASSERT_EQUAL_FATAL(list.length, tests[i].length); + CU_ASSERT_EQUAL_FATAL(list.count, cnt); + if (del < -1) + CU_ASSERT_EQUAL_FATAL(list.end, tests[i].end); + } + if (tests[i].end < (tests[i].remove * -1)) { + int del = tests[i].remove + tests[i].end; + CU_ASSERT_EQUAL_FATAL(list.end, (tests[i].length + del)); + } else { + int del = tests[i].remove; + CU_ASSERT_EQUAL_FATAL(list.end, (tests[i].end + del)); + } + for (int add=tests[i].remove; add < 0; add++) { + add_event(&list, &events[ tests[i].length + add], INT_MAX); + assert_list(&list); + CU_ASSERT_EQUAL_FATAL(list.length, tests[i].length); + } + CU_ASSERT_EQUAL_FATAL(list.end, tests[i].end); + ddsrt_free(events); + destroy_list(&list); + } +} diff --git a/src/tools/CMakeLists.txt b/src/tools/CMakeLists.txt index 71e7bb44a6..f6f1aee2f9 100644 --- a/src/tools/CMakeLists.txt +++ b/src/tools/CMakeLists.txt @@ -17,3 +17,4 @@ if(BUILD_IDLC) add_subdirectory(idlc) endif() add_subdirectory(ddsperf) +add_subdirectory(netlink) diff --git a/src/tools/netlink/CMakeLists.txt b/src/tools/netlink/CMakeLists.txt new file mode 100644 index 0000000000..4bc656c256 --- /dev/null +++ b/src/tools/netlink/CMakeLists.txt @@ -0,0 +1,17 @@ +# +# Copyright(c) 2021 ADLINK Technology Limited and others +# +# This program and the accompanying materials are made available under the +# terms of the Eclipse Public License v. 2.0 which is available at +# http://www.eclipse.org/legal/epl-2.0, or the Eclipse Distribution License +# v. 1.0 which is available at +# http://www.eclipse.org/org/documents/edl-v10.php. +# +# SPDX-License-Identifier: EPL-2.0 OR BSD-3-Clause +# +add_executable(netlink netlink.c) +target_link_libraries(netlink PRIVATE ddsc) + +if(WIN32) + target_link_libraries(netlink PRIVATE ws2_32 iphlpapi) +endif() diff --git a/src/tools/netlink/netlink.c b/src/tools/netlink/netlink.c new file mode 100644 index 0000000000..6f6c3b87b6 --- /dev/null +++ b/src/tools/netlink/netlink.c @@ -0,0 +1,71 @@ +#include +#include +#if _WIN32 +# ifndef WIN32_LEAN_AND_MEAN +# define WIN32_LEAN_AND_MEAN +# endif +# include +# include +# include +#else +# include +# include +#endif + +#include "dds/ddsrt/retcode.h" +#include "dds/ddsrt/event.h" + +static dds_return_t +callback(ddsrt_event_t *event, uint32_t flags, const void *data, void *user_data) +{ + char ip[INET6_ADDRSTRLEN + 1]; + const ddsrt_netlink_message_t *msg = data; + + assert(event && (event->flags & DDSRT_NETLINK)); + + (void)event; + (void)flags; + (void)user_data; + + if (flags & (DDSRT_LINK_UP|DDSRT_LINK_DOWN)) { + fprintf(stderr, "got link %s event\n", (flags & DDSRT_LINK_UP) ? "up" : "down"); + } else if (flags & (DDSRT_IPV4_ADDED|DDSRT_IPV4_DELETED)) { + const char *ev = (flags & DDSRT_IPV4_ADDED) ? "added" : "deleted"; + inet_ntop(AF_INET, &((struct sockaddr_in *)&msg->address)->sin_addr, ip, sizeof(ip)); + fprintf(stderr, "got ip4 (%s) %s event\n", ip, ev); + } else if (flags & (DDSRT_IPV6_ADDED|DDSRT_IPV6_DELETED)) { + const char *ev = (flags & DDSRT_IPV6_ADDED) ? "added" : "deleted"; + inet_ntop(AF_INET6, &((struct sockaddr_in6 *)&msg->address)->sin6_addr, ip, sizeof(ip)); + fprintf(stderr, "got ip6 (%s) %s event\n", ip, ev); + } else { + assert(0); + } + + return DDS_RETCODE_OK; +} + +int main(int argc, char *argv[]) +{ + ddsrt_event_t event; + ddsrt_loop_t loop; + uint32_t flags = DDSRT_NETLINK | (DDSRT_LINK_UP | DDSRT_LINK_DOWN | + DDSRT_IPV4_ADDED | DDSRT_IPV4_DELETED | + DDSRT_IPV6_ADDED | DDSRT_IPV6_DELETED); + + (void)argc; + (void)argv; + +#if _WIN32 + WSADATA wsa_data; + if (WSAStartup(MAKEWORD(2,0), &wsa_data) != 0) + return 1; +#endif + + ddsrt_create_event(&event, DDSRT_INVALID_SOCKET, flags, &callback, NULL); + ddsrt_create_loop(&loop); + ddsrt_add_event(&loop, &event); + + ddsrt_run_loop(&loop, 0u, NULL); + + return 0; +}