From b67c90e6a69541ca5423dc39623ea7cba97fe6ea Mon Sep 17 00:00:00 2001 From: Erik Boasson Date: Thu, 13 Jul 2023 10:04:30 +0200 Subject: [PATCH 01/13] Refactor SPDP handling --- src/core/ddsi/src/ddsi_discovery_spdp.c | 354 +++++++++++++----------- 1 file changed, 199 insertions(+), 155 deletions(-) diff --git a/src/core/ddsi/src/ddsi_discovery_spdp.c b/src/core/ddsi/src/ddsi_discovery_spdp.c index c97114f704..57f76e5ce2 100644 --- a/src/core/ddsi/src/ddsi_discovery_spdp.c +++ b/src/core/ddsi/src/ddsi_discovery_spdp.c @@ -502,7 +502,7 @@ static void respond_to_spdp (const struct ddsi_domaingv *gv, const ddsi_guid_t * ddsi_entidx_enum_participant_fini (&est); } -static int handle_spdp_dead (const struct ddsi_receiver_state *rst, ddsi_entityid_t pwr_entityid, ddsrt_wctime_t timestamp, const ddsi_plist_t *datap, unsigned statusinfo) +static void handle_spdp_dead (const struct ddsi_receiver_state *rst, ddsi_entityid_t pwr_entityid, ddsrt_wctime_t timestamp, const ddsi_plist_t *datap, unsigned statusinfo) { struct ddsi_domaingv * const gv = rst->gv; ddsi_guid_t guid; @@ -534,7 +534,6 @@ static int handle_spdp_dead (const struct ddsi_receiver_state *rst, ddsi_entityi { GVWARNING ("data (SPDP, vendor %u.%u): no/invalid payload\n", rst->vendor.id[0], rst->vendor.id[1]); } - return 1; } static struct ddsi_proxy_participant *find_ddsi2_proxy_participant (const struct ddsi_entity_index *entidx, const ddsi_guid_t *ppguid) @@ -634,43 +633,73 @@ static bool accept_packet_from_interface (const struct ddsi_domaingv *gv, const return false; } -static int handle_spdp_alive (const struct ddsi_receiver_state *rst, ddsi_seqno_t seq, ddsrt_wctime_t timestamp, const ddsi_plist_t *datap) +enum participant_guid_is_known_result { + PGIKR_UNKNOWN, + PGIKR_KNOWN, + PGIKR_KNOWN_BUT_INTERESTING +}; + +static enum participant_guid_is_known_result participant_guid_is_known (const struct ddsi_receiver_state *rst, ddsi_seqno_t seq, ddsrt_wctime_t timestamp, const ddsi_plist_t *datap) { struct ddsi_domaingv * const gv = rst->gv; - const unsigned bes_sedp_announcer_mask = - DDSI_DISC_BUILTIN_ENDPOINT_SUBSCRIPTION_ANNOUNCER | - DDSI_DISC_BUILTIN_ENDPOINT_PUBLICATION_ANNOUNCER; - struct ddsi_addrset *as_meta, *as_default; - uint32_t builtin_endpoint_set; - ddsi_guid_t privileged_pp_guid; - dds_duration_t lease_duration; - unsigned custom_flags = 0; - - // Don't just process any SPDP packet but look at the network interface and uni/multicast - // One could refine this even further by also looking at the locators advertised in the - // packet, but this should suffice to drop unwanted multicast packets, which is the only - // use case I am currently aware of. - if (!accept_packet_from_interface (gv, rst)) - return 0; - - /* If advertised domain id or domain tag doesn't match, ignore the message. Do this first to - minimize the impact such messages have. */ + struct ddsi_entity_common *existing_entity; + if ((existing_entity = ddsi_entidx_lookup_guid_untyped (gv->entity_index, &datap->participant_guid)) == NULL) + { + /* Local SPDP packets may be looped back, and that can include ones + for participants currently being deleted. The first thing that + happens when deleting a participant is removing it from the hash + table, and consequently the looped back packet may appear to be + from an unknown participant. So we handle that. */ + if (ddsi_is_deleted_participant_guid (gv->deleted_participants, &datap->participant_guid, DDSI_DELETED_PPGUID_REMOTE)) + { + RSTTRACE ("SPDP ST0 "PGUIDFMT" (recently deleted)", PGUID (datap->participant_guid)); + return PGIKR_KNOWN; + } + } + else if (existing_entity->kind == DDSI_EK_PARTICIPANT) { - const uint32_t domain_id = (datap->present & PP_DOMAIN_ID) ? datap->domain_id : gv->config.extDomainId.value; - const char *domain_tag = (datap->present & PP_DOMAIN_TAG) ? datap->domain_tag : ""; - if (domain_id != gv->config.extDomainId.value || strcmp (domain_tag, gv->config.domainTag) != 0) + RSTTRACE ("SPDP ST0 "PGUIDFMT" (local)", PGUID (datap->participant_guid)); + return PGIKR_KNOWN; + } + else if (existing_entity->kind == DDSI_EK_PROXY_PARTICIPANT) + { + struct ddsi_proxy_participant *proxypp = (struct ddsi_proxy_participant *) existing_entity; + struct ddsi_lease *lease; + int interesting = 0; + RSTTRACE ("SPDP ST0 "PGUIDFMT" (known)", PGUID (datap->participant_guid)); + /* SPDP processing is so different from normal processing that we are + even skipping the automatic lease renewal. Note that proxy writers + that are not alive are not set alive here. This is done only when + data is received from a particular pwr (in handle_regular) */ + if ((lease = ddsrt_atomic_ldvoidp (&proxypp->minl_auto)) != NULL) + ddsi_lease_renew (lease, ddsrt_time_elapsed ()); + ddsrt_mutex_lock (&proxypp->e.lock); + if (proxypp->implicitly_created || seq > proxypp->seq) { - GVTRACE ("ignore remote participant in mismatching domain %"PRIu32" tag \"%s\"\n", domain_id, domain_tag); - return 0; + interesting = 1; + if (!(gv->logconfig.c.mask & DDS_LC_TRACE)) + GVLOGDISC ("SPDP ST0 "PGUIDFMT, PGUID (datap->participant_guid)); + GVLOGDISC (proxypp->implicitly_created ? " (NEW was-implicitly-created)" : " (update)"); + proxypp->implicitly_created = 0; + ddsi_update_proxy_participant_plist_locked (proxypp, seq, datap, timestamp); } + ddsrt_mutex_unlock (&proxypp->e.lock); + return interesting ? PGIKR_KNOWN_BUT_INTERESTING : PGIKR_KNOWN; } - - if (!(datap->present & PP_PARTICIPANT_GUID) || !(datap->present & PP_BUILTIN_ENDPOINT_SET)) + else { - GVWARNING ("data (SPDP, vendor %u.%u): no/invalid payload\n", rst->vendor.id[0], rst->vendor.id[1]); - return 0; + /* mismatch on entity kind: that should never have gotten past the input validation */ + GVWARNING ("data (SPDP, vendor %u.%u): "PGUIDFMT" kind mismatch\n", rst->vendor.id[0], rst->vendor.id[1], PGUID (datap->participant_guid)); + return PGIKR_KNOWN; } + return PGIKR_UNKNOWN; +} +static uint32_t get_builtin_endpoint_set (const struct ddsi_receiver_state *rst, const ddsi_plist_t *datap, bool is_secure) +{ + struct ddsi_domaingv * const gv = rst->gv; + uint32_t builtin_endpoint_set; + assert (datap->present & PP_BUILTIN_ENDPOINT_SET); /* At some point the RTI implementation didn't mention BUILTIN_ENDPOINT_DDSI_PARTICIPANT_MESSAGE_DATA_READER & ...WRITER, or so it seemed; and yet they are necessary for correct operation, @@ -685,79 +714,25 @@ static int handle_spdp_alive (const struct ddsi_receiver_state *rst, ddsi_seqno_ gv->config.assume_rti_has_pmd_endpoints) { GVLOGDISC ("data (SPDP, vendor %u.%u): assuming unadvertised PMD endpoints do exist\n", - rst->vendor.id[0], rst->vendor.id[1]); + rst->vendor.id[0], rst->vendor.id[1]); builtin_endpoint_set |= - DDSI_BUILTIN_ENDPOINT_PARTICIPANT_MESSAGE_DATA_READER | - DDSI_BUILTIN_ENDPOINT_PARTICIPANT_MESSAGE_DATA_WRITER; - } - - /* Do we know this GUID already? */ - { - struct ddsi_entity_common *existing_entity; - if ((existing_entity = ddsi_entidx_lookup_guid_untyped (gv->entity_index, &datap->participant_guid)) == NULL) - { - /* Local SPDP packets may be looped back, and that can include ones - for participants currently being deleted. The first thing that - happens when deleting a participant is removing it from the hash - table, and consequently the looped back packet may appear to be - from an unknown participant. So we handle that. */ - if (ddsi_is_deleted_participant_guid (gv->deleted_participants, &datap->participant_guid, DDSI_DELETED_PPGUID_REMOTE)) - { - RSTTRACE ("SPDP ST0 "PGUIDFMT" (recently deleted)", PGUID (datap->participant_guid)); - return 0; - } - } - else if (existing_entity->kind == DDSI_EK_PARTICIPANT) - { - RSTTRACE ("SPDP ST0 "PGUIDFMT" (local)", PGUID (datap->participant_guid)); - return 0; - } - else if (existing_entity->kind == DDSI_EK_PROXY_PARTICIPANT) - { - struct ddsi_proxy_participant *proxypp = (struct ddsi_proxy_participant *) existing_entity; - struct ddsi_lease *lease; - int interesting = 0; - RSTTRACE ("SPDP ST0 "PGUIDFMT" (known)", PGUID (datap->participant_guid)); - /* SPDP processing is so different from normal processing that we are - even skipping the automatic lease renewal. Note that proxy writers - that are not alive are not set alive here. This is done only when - data is received from a particular pwr (in handle_regular) */ - if ((lease = ddsrt_atomic_ldvoidp (&proxypp->minl_auto)) != NULL) - ddsi_lease_renew (lease, ddsrt_time_elapsed ()); - ddsrt_mutex_lock (&proxypp->e.lock); - if (proxypp->implicitly_created || seq > proxypp->seq) - { - interesting = 1; - if (!(gv->logconfig.c.mask & DDS_LC_TRACE)) - GVLOGDISC ("SPDP ST0 "PGUIDFMT, PGUID (datap->participant_guid)); - GVLOGDISC (proxypp->implicitly_created ? " (NEW was-implicitly-created)" : " (update)"); - proxypp->implicitly_created = 0; - ddsi_update_proxy_participant_plist_locked (proxypp, seq, datap, timestamp); - } - ddsrt_mutex_unlock (&proxypp->e.lock); - return interesting; - } - else - { - /* mismatch on entity kind: that should never have gotten past the - input validation */ - GVWARNING ("data (SPDP, vendor %u.%u): "PGUIDFMT" kind mismatch\n", rst->vendor.id[0], rst->vendor.id[1], PGUID (datap->participant_guid)); - return 0; - } + DDSI_BUILTIN_ENDPOINT_PARTICIPANT_MESSAGE_DATA_READER | + DDSI_BUILTIN_ENDPOINT_PARTICIPANT_MESSAGE_DATA_WRITER; } - - const bool is_secure = ((datap->builtin_endpoint_set & DDSI_DISC_BUILTIN_ENDPOINT_PARTICIPANT_SECURE_ANNOUNCER) != 0 && - (datap->present & PP_IDENTITY_TOKEN)); /* Make sure we don't create any security builtin endpoint when it's considered unsecure. */ if (!is_secure) builtin_endpoint_set &= DDSI_BES_MASK_NON_SECURITY; - GVLOGDISC ("SPDP ST0 "PGUIDFMT" bes %"PRIx32"%s NEW", PGUID (datap->participant_guid), builtin_endpoint_set, is_secure ? " (secure)" : ""); + return builtin_endpoint_set; +} - if (datap->present & PP_ADLINK_PARTICIPANT_VERSION_INFO) { +static unsigned get_custom_flags (const struct ddsi_domaingv *gv, const ddsi_plist_t *datap) +{ + unsigned custom_flags = 0; + if (datap->present & PP_ADLINK_PARTICIPANT_VERSION_INFO) + { if ((datap->adlink_participant_version_info.flags & DDSI_ADLINK_FL_DDSI2_PARTICIPANT_FLAG) && (datap->adlink_participant_version_info.flags & DDSI_ADLINK_FL_PARTICIPANT_IS_DDSI2)) custom_flags |= DDSI_CF_PARTICIPANT_IS_DDSI2; - GVLOGDISC (" (0x%08"PRIx32"-0x%08"PRIx32"-0x%08"PRIx32"-0x%08"PRIx32"-0x%08"PRIx32" %s)", datap->adlink_participant_version_info.version, datap->adlink_participant_version_info.flags, @@ -766,85 +741,155 @@ static int handle_spdp_alive (const struct ddsi_receiver_state *rst, ddsi_seqno_ datap->adlink_participant_version_info.unused[2], datap->adlink_participant_version_info.internals); } + return custom_flags; +} - /* Can't do "mergein_missing" because of constness of *datap */ - if (datap->qos.present & DDSI_QP_LIVELINESS) - lease_duration = datap->qos.liveliness.lease_duration; - else - { - assert (ddsi_default_qos_participant.present & DDSI_QP_LIVELINESS); - lease_duration = ddsi_default_qos_participant.liveliness.lease_duration; - } +static bool get_privileged_participant (const struct ddsi_receiver_state *rst, const ddsi_plist_t *datap, uint32_t builtin_endpoint_set, unsigned custom_flags, ddsi_guid_t *privileged_pp_guid) +{ + struct ddsi_domaingv * const gv = rst->gv; + const unsigned bes_sedp_announcer_mask = + DDSI_DISC_BUILTIN_ENDPOINT_SUBSCRIPTION_ANNOUNCER | + DDSI_DISC_BUILTIN_ENDPOINT_PUBLICATION_ANNOUNCER; /* If any of the SEDP announcer are missing AND the guid prefix of the SPDP writer differs from the guid prefix of the new participant, we make it dependent on the writer's participant. See also the lease expiration handling. Note that the entityid MUST be DDSI_ENTITYID_PARTICIPANT or entidx_lookup will assert. So we only zero the prefix. */ - privileged_pp_guid.prefix = rst->src_guid_prefix; - privileged_pp_guid.entityid.u = DDSI_ENTITYID_PARTICIPANT; + privileged_pp_guid->prefix = rst->src_guid_prefix; + privileged_pp_guid->entityid.u = DDSI_ENTITYID_PARTICIPANT; if ((builtin_endpoint_set & bes_sedp_announcer_mask) != bes_sedp_announcer_mask && - memcmp (&privileged_pp_guid, &datap->participant_guid, sizeof (ddsi_guid_t)) != 0) + memcmp (privileged_pp_guid, &datap->participant_guid, sizeof (ddsi_guid_t)) != 0) { - GVLOGDISC (" (depends on "PGUIDFMT")", PGUID (privileged_pp_guid)); - /* never expire lease for this proxy: it won't actually expire - until the "privileged" one expires anyway */ - lease_duration = DDS_INFINITY; + return true; } else if (ddsi_vendor_is_eclipse_or_opensplice (rst->vendor) && !(custom_flags & DDSI_CF_PARTICIPANT_IS_DDSI2)) { /* Non-DDSI2 participants are made dependent on DDSI2 (but DDSI2 itself need not be discovered yet) */ struct ddsi_proxy_participant *ddsi2; - if ((ddsi2 = find_ddsi2_proxy_participant (gv->entity_index, &datap->participant_guid)) == NULL) - memset (&privileged_pp_guid.prefix, 0, sizeof (privileged_pp_guid.prefix)); - else + if ((ddsi2 = find_ddsi2_proxy_participant (gv->entity_index, &datap->participant_guid)) != NULL) { - privileged_pp_guid.prefix = ddsi2->e.guid.prefix; - lease_duration = DDS_INFINITY; - GVLOGDISC (" (depends on "PGUIDFMT")", PGUID (privileged_pp_guid)); + privileged_pp_guid->prefix = ddsi2->e.guid.prefix; + return true; } } + // clear privileged_pp_guid prefix, but leave the magic entity id + memset (&privileged_pp_guid->prefix, 0, sizeof (privileged_pp_guid->prefix)); + return false; +} + +static bool get_locators (const struct ddsi_receiver_state *rst, const ddsi_plist_t *datap, struct ddsi_addrset **as_default, struct ddsi_addrset **as_meta) +{ + struct ddsi_domaingv * const gv = rst->gv; + const ddsi_locators_t emptyset = { .n = 0, .first = NULL, .last = NULL }; + const ddsi_locators_t *uc; + const ddsi_locators_t *mc; + bool allow_srcloc; + ddsi_interface_set_t inherited_intfs; + + uc = (datap->present & PP_DEFAULT_UNICAST_LOCATOR) ? &datap->default_unicast_locators : ∅ + mc = (datap->present & PP_DEFAULT_MULTICAST_LOCATOR) ? &datap->default_multicast_locators : ∅ + if (gv->config.tcp_use_peeraddr_for_unicast) + uc = ∅ // force use of source locator + allow_srcloc = (uc == &emptyset) && !ddsi_is_unspec_locator (&rst->pktinfo.src); + ddsi_interface_set_init (&inherited_intfs); + *as_default = ddsi_addrset_from_locatorlists (gv, uc, mc, &rst->pktinfo, allow_srcloc, &inherited_intfs); + + uc = (datap->present & PP_METATRAFFIC_UNICAST_LOCATOR) ? &datap->metatraffic_unicast_locators : ∅ + mc = (datap->present & PP_METATRAFFIC_MULTICAST_LOCATOR) ? &datap->metatraffic_multicast_locators : ∅ + if (gv->config.tcp_use_peeraddr_for_unicast) + uc = ∅ // force use of source locator + allow_srcloc = (uc == &emptyset) && !ddsi_is_unspec_locator (&rst->pktinfo.src); + ddsi_interface_set_init (&inherited_intfs); + *as_meta = ddsi_addrset_from_locatorlists (gv, uc, mc, &rst->pktinfo, allow_srcloc, &inherited_intfs); + + ddsi_log_addrset (gv, DDS_LC_DISCOVERY, " (data", *as_default); + ddsi_log_addrset (gv, DDS_LC_DISCOVERY, " meta", *as_meta); + GVLOGDISC (")"); + + if (ddsi_addrset_contains_non_psmx_uc (*as_default) && ddsi_addrset_contains_non_psmx_uc (*as_meta)) + return true; else { - memset (&privileged_pp_guid.prefix, 0, sizeof (privileged_pp_guid.prefix)); + GVLOGDISC (" (no unicast address"); + ddsi_unref_addrset (*as_default); + ddsi_unref_addrset (*as_meta); + return false; + } +} + +// Result for handle_spdp_alive, "interesting" vs "not interesting" affects the +// logging category for subsequent logging output +enum handle_spdp_result { + HSR_NOT_INTERESTING, + HSR_INTERESTING +}; + +static enum handle_spdp_result handle_spdp_alive (const struct ddsi_receiver_state *rst, ddsi_seqno_t seq, ddsrt_wctime_t timestamp, const ddsi_plist_t *datap) +{ + struct ddsi_domaingv * const gv = rst->gv; + + // Don't just process any SPDP packet but look at the network interface and uni/multicast + // One could refine this even further by also looking at the locators advertised in the + // packet, but this should suffice to drop unwanted multicast packets, which is the only + // use case I am currently aware of. + if (!accept_packet_from_interface (gv, rst)) + return HSR_NOT_INTERESTING; + + /* If advertised domain id or domain tag doesn't match, ignore the message. Do this first to + minimize the impact such messages have. */ + { + const uint32_t domain_id = (datap->present & PP_DOMAIN_ID) ? datap->domain_id : gv->config.extDomainId.value; + const char *domain_tag = (datap->present & PP_DOMAIN_TAG) ? datap->domain_tag : ""; + if (domain_id != gv->config.extDomainId.value || strcmp (domain_tag, gv->config.domainTag) != 0) + { + GVTRACE ("ignore remote participant in mismatching domain %"PRIu32" tag \"%s\"\n", domain_id, domain_tag); + return HSR_NOT_INTERESTING; + } } - /* Choose locators */ + if (!(datap->present & PP_PARTICIPANT_GUID) || !(datap->present & PP_BUILTIN_ENDPOINT_SET)) { - const ddsi_locators_t emptyset = { .n = 0, .first = NULL, .last = NULL }; - const ddsi_locators_t *uc; - const ddsi_locators_t *mc; - bool allow_srcloc; - ddsi_interface_set_t inherited_intfs; + GVWARNING ("data (SPDP, vendor %u.%u): no/invalid payload\n", rst->vendor.id[0], rst->vendor.id[1]); + return HSR_NOT_INTERESTING; + } + + const enum participant_guid_is_known_result pgik_result = participant_guid_is_known (rst, seq, timestamp, datap); + if (pgik_result != PGIKR_UNKNOWN) + return (pgik_result == PGIKR_KNOWN_BUT_INTERESTING) ? HSR_INTERESTING : HSR_NOT_INTERESTING; + + const bool is_secure = ((datap->builtin_endpoint_set & DDSI_DISC_BUILTIN_ENDPOINT_PARTICIPANT_SECURE_ANNOUNCER) != 0 && (datap->present & PP_IDENTITY_TOKEN)); - uc = (datap->present & PP_DEFAULT_UNICAST_LOCATOR) ? &datap->default_unicast_locators : ∅ - mc = (datap->present & PP_DEFAULT_MULTICAST_LOCATOR) ? &datap->default_multicast_locators : ∅ - if (gv->config.tcp_use_peeraddr_for_unicast) - uc = ∅ // force use of source locator - allow_srcloc = (uc == &emptyset) && !ddsi_is_unspec_locator (&rst->pktinfo.src); - ddsi_interface_set_init (&inherited_intfs); - as_default = ddsi_addrset_from_locatorlists (gv, uc, mc, &rst->pktinfo, allow_srcloc, &inherited_intfs); + const uint32_t builtin_endpoint_set = get_builtin_endpoint_set (rst, datap, is_secure); + GVLOGDISC ("SPDP ST0 "PGUIDFMT" bes %"PRIx32"%s NEW", PGUID (datap->participant_guid), builtin_endpoint_set, is_secure ? " (secure)" : ""); - uc = (datap->present & PP_METATRAFFIC_UNICAST_LOCATOR) ? &datap->metatraffic_unicast_locators : ∅ - mc = (datap->present & PP_METATRAFFIC_MULTICAST_LOCATOR) ? &datap->metatraffic_multicast_locators : ∅ - if (gv->config.tcp_use_peeraddr_for_unicast) - uc = ∅ // force use of source locator - allow_srcloc = (uc == &emptyset) && !ddsi_is_unspec_locator (&rst->pktinfo.src); - ddsi_interface_set_init (&inherited_intfs); - as_meta = ddsi_addrset_from_locatorlists (gv, uc, mc, &rst->pktinfo, allow_srcloc, &inherited_intfs); + const unsigned custom_flags = get_custom_flags (gv, datap); - ddsi_log_addrset (gv, DDS_LC_DISCOVERY, " (data", as_default); - ddsi_log_addrset (gv, DDS_LC_DISCOVERY, " meta", as_meta); - GVLOGDISC (")"); + /* Can't do "mergein_missing" because of constness of *datap */ + dds_duration_t lease_duration; + if (datap->qos.present & DDSI_QP_LIVELINESS) + lease_duration = datap->qos.liveliness.lease_duration; + else + { + assert (ddsi_default_qos_participant.present & DDSI_QP_LIVELINESS); + lease_duration = ddsi_default_qos_participant.liveliness.lease_duration; } - if (!(ddsi_addrset_contains_non_psmx_uc (as_default) && ddsi_addrset_contains_non_psmx_uc (as_meta))) + ddsi_guid_t privileged_pp_guid; + if (get_privileged_participant (rst, datap, builtin_endpoint_set, custom_flags, &privileged_pp_guid)) + { + GVLOGDISC (" (depends on "PGUIDFMT")", PGUID (privileged_pp_guid)); + /* never expire lease for this proxy: it won't actually expire + until the "privileged" one expires anyway */ + lease_duration = DDS_INFINITY; + } + + struct ddsi_addrset *as_meta, *as_default; + if (!get_locators (rst, datap, &as_default, &as_meta)) { - GVLOGDISC (" (no unicast address)"); - ddsi_unref_addrset (as_default); - ddsi_unref_addrset (as_meta); - return 1; + GVLOGDISC (" (no unicast address"); + return HSR_INTERESTING; } GVLOGDISC (" QOS={"); @@ -857,23 +902,21 @@ static int handle_spdp_alive (const struct ddsi_receiver_state *rst, ddsi_seqno_ if (!ddsi_new_proxy_participant (&proxy_participant, gv, &datap->participant_guid, builtin_endpoint_set, &privileged_pp_guid, as_default, as_meta, datap, lease_duration, rst->vendor, custom_flags, timestamp, seq)) { /* If no proxy participant was created, don't respond */ - return 0; + return HSR_NOT_INTERESTING; } else { /* Force transmission of SPDP messages - we're not very careful in avoiding the processing of SPDP packets addressed to others so filter here */ - int have_dst = (rst->dst_guid_prefix.u[0] != 0 || rst->dst_guid_prefix.u[1] != 0 || rst->dst_guid_prefix.u[2] != 0); - if (!have_dst) + const bool have_dst = (rst->dst_guid_prefix.u[0] != 0 || rst->dst_guid_prefix.u[1] != 0 || rst->dst_guid_prefix.u[2] != 0); + if (have_dst) + GVLOGDISC ("directed SPDP packet -> not responding\n"); + else { GVLOGDISC ("broadcasted SPDP packet -> answering"); respond_to_spdp (gv, &datap->participant_guid); } - else - { - GVLOGDISC ("directed SPDP packet -> not responding\n"); - } if (custom_flags & DDSI_CF_PARTICIPANT_IS_DDSI2) { @@ -893,7 +936,7 @@ static int handle_spdp_alive (const struct ddsi_receiver_state *rst, ddsi_seqno_ ddsi_delete_proxy_participant_by_guid (gv, &datap->participant_guid, timestamp, 1); } } - return 1; + return HSR_INTERESTING; } } @@ -903,7 +946,7 @@ void ddsi_handle_spdp (const struct ddsi_receiver_state *rst, ddsi_entityid_t pw ddsi_plist_t decoded_data; if (ddsi_serdata_to_sample (serdata, &decoded_data, NULL, NULL)) { - int interesting = 0; + enum handle_spdp_result interesting = HSR_NOT_INTERESTING; switch (serdata->statusinfo & (DDSI_STATUSINFO_DISPOSE | DDSI_STATUSINFO_UNREGISTER)) { case 0: @@ -913,11 +956,12 @@ void ddsi_handle_spdp (const struct ddsi_receiver_state *rst, ddsi_entityid_t pw case DDSI_STATUSINFO_DISPOSE: case DDSI_STATUSINFO_UNREGISTER: case (DDSI_STATUSINFO_DISPOSE | DDSI_STATUSINFO_UNREGISTER): - interesting = handle_spdp_dead (rst, pwr_entityid, serdata->timestamp, &decoded_data, serdata->statusinfo); + handle_spdp_dead (rst, pwr_entityid, serdata->timestamp, &decoded_data, serdata->statusinfo); + interesting = HSR_INTERESTING; break; } ddsi_plist_fini (&decoded_data); - GVLOG (interesting ? DDS_LC_DISCOVERY : DDS_LC_TRACE, "\n"); + GVLOG (interesting == HSR_INTERESTING ? DDS_LC_DISCOVERY : DDS_LC_TRACE, "\n"); } } From 9d3067b516c012e1a590ae2923a7912ffe199702 Mon Sep 17 00:00:00 2001 From: Erik Boasson Date: Thu, 10 Aug 2023 10:04:43 +0200 Subject: [PATCH 02/13] Centralized SPDP message scheduler This replaces the per-participant timed-event for periodically broadcasting its SPDP message by a central device that schedules and sends the SPDP messages for all participants and maintains the set of addresses to which to send them. There are a number of benefits to this model: * We can make it more likely (certain, if we want) that SPDP messages to a single destination go out in a single packet, reducing the packet rate if there are many participants. * We are no longer dependent on an "addrset" for tracking the set of addresses to publish to (and we don't need to special-case SPDP writers). That means we now have the option of "aging" locators and lowering the frequency or giving up on them altogether if there just doesn't seem to be anything at that address. --- src/core/ddsc/tests/test_oneliner.c | 3 +- src/core/ddsi/CMakeLists.txt | 2 + .../ddsi/include/dds/ddsi/ddsi_domaingv.h | 2 + .../ddsi/include/dds/ddsi/ddsi_participant.h | 1 - src/core/ddsi/src/ddsi__spdp_schedule.h | 55 ++ src/core/ddsi/src/ddsi__xmsg.h | 17 +- src/core/ddsi/src/ddsi_discovery_spdp.c | 22 +- src/core/ddsi/src/ddsi_init.c | 7 + src/core/ddsi/src/ddsi_participant.c | 98 +-- src/core/ddsi/src/ddsi_proxy_participant.c | 35 + src/core/ddsi/src/ddsi_spdp_schedule.c | 611 ++++++++++++++++++ src/core/ddsi/src/ddsi_xmsg.c | 19 + 12 files changed, 784 insertions(+), 88 deletions(-) create mode 100644 src/core/ddsi/src/ddsi__spdp_schedule.h create mode 100644 src/core/ddsi/src/ddsi_spdp_schedule.c diff --git a/src/core/ddsc/tests/test_oneliner.c b/src/core/ddsc/tests/test_oneliner.c index eb14da1a99..7b29f924b3 100644 --- a/src/core/ddsc/tests/test_oneliner.c +++ b/src/core/ddsc/tests/test_oneliner.c @@ -30,6 +30,7 @@ #include "ddsi__lease.h" #include "ddsi__participant.h" #include "ddsi__proxy_participant.h" +#include "ddsi__spdp_schedule.h" #include "dds/dds.h" #include "dds__types.h" #include "dds__entity.h" @@ -1857,7 +1858,7 @@ static void dohearing_maybe_imm (struct oneliner_ctx *ctx, bool immediate) wait_for_cleanup (ctx, ctx->es[ent], &xprime->m_guid); ddsi_thread_state_awake (ddsi_lookup_thread_state (), &xprime->m_domain->gv); if ((pp = ddsi_entidx_lookup_participant_guid (xprime->m_domain->gv.entity_index, &xprime->m_guid)) != NULL) - ddsi_resched_xevent_if_earlier (pp->spdp_xevent, ddsrt_mtime_add_duration (ddsrt_time_monotonic (), DDS_MSECS (100))); + ddsi_spdp_force_republish (pp->e.gv->spdp_schedule, pp); ddsi_thread_state_asleep (ddsi_lookup_thread_state ()); dds_entity_unpin (xprime); } diff --git a/src/core/ddsi/CMakeLists.txt b/src/core/ddsi/CMakeLists.txt index 685594b693..ca342eaf98 100644 --- a/src/core/ddsi/CMakeLists.txt +++ b/src/core/ddsi/CMakeLists.txt @@ -76,6 +76,7 @@ set(srcs_ddsi ddsi_radmin.c ddsi_receive.c ddsi_sockwaitset.c + ddsi_spdp_schedule.c ddsi_sysdeps.c ddsi_thread.c ddsi_transmit.c @@ -203,6 +204,7 @@ set(hdrs_private_ddsi ddsi__radmin.h ddsi__receive.h ddsi__sockwaitset.h + ddsi__spdp_schedule.h ddsi__thread.h ddsi__transmit.h ddsi__whc.h diff --git a/src/core/ddsi/include/dds/ddsi/ddsi_domaingv.h b/src/core/ddsi/include/dds/ddsi/ddsi_domaingv.h index f35ba0b1c6..5bfdc910be 100644 --- a/src/core/ddsi/include/dds/ddsi/ddsi_domaingv.h +++ b/src/core/ddsi/include/dds/ddsi/ddsi_domaingv.h @@ -49,6 +49,7 @@ struct ddsi_tkmap; struct dds_security_context; struct dds_security_match_index; struct ddsi_hsadmin; +struct spdp_admin; struct ddsi_config_in_addr_node { ddsi_locator_t loc; @@ -205,6 +206,7 @@ struct ddsi_domaingv { set. These are the addresses that SPDP pings get sent to. */ struct ddsi_addrset *as_disc; + struct spdp_admin *spdp_schedule; ddsrt_mutex_t lock; diff --git a/src/core/ddsi/include/dds/ddsi/ddsi_participant.h b/src/core/ddsi/include/dds/ddsi/ddsi_participant.h index 8924b0ec98..b8e4913fd0 100644 --- a/src/core/ddsi/include/dds/ddsi/ddsi_participant.h +++ b/src/core/ddsi/include/dds/ddsi/ddsi_participant.h @@ -66,7 +66,6 @@ struct ddsi_participant unsigned is_ddsi2_pp: 1; /* true for the "federation leader", the ddsi2 participant itself in OSPL; FIXME: probably should use this for broker mode as well ... */ uint32_t flags; /* flags used when creating this participant */ struct ddsi_plist *plist; /* settings/QoS for this participant */ - struct ddsi_xevent *spdp_xevent; /* timed event for periodically publishing SPDP */ struct ddsi_xevent *pmd_update_xevent; /* timed event for periodically publishing ParticipantMessageData */ ddsi_locator_t m_locator; /* this is always a unicast address, it is set if it is in the many unicast mode */ struct ddsi_tran_conn * m_conn; /* this is connection to m_locator, if it is set, this is used */ diff --git a/src/core/ddsi/src/ddsi__spdp_schedule.h b/src/core/ddsi/src/ddsi__spdp_schedule.h new file mode 100644 index 0000000000..6ae1d1f99a --- /dev/null +++ b/src/core/ddsi/src/ddsi__spdp_schedule.h @@ -0,0 +1,55 @@ +// Copyright(c) 2023 ZettaScale Technology and others +// +// This program and the accompanying materials are made available under the +// terms of the Eclipse Public License v. 2.0 which is available at +// http://www.eclipse.org/legal/epl-2.0, or the Eclipse Distribution License +// v. 1.0 which is available at +// http://www.eclipse.org/org/documents/edl-v10.php. +// +// SPDX-License-Identifier: EPL-2.0 OR BSD-3-Clause + +#ifndef DDSI__SPDP_SCHEDULE_H +#define DDSI__SPDP_SCHEDULE_H + +#include "dds/ddsi/ddsi_unused.h" +#include "dds/ddsi/ddsi_domaingv.h" // FIXME: MAX_XMIT_CONNS + +#if defined (__cplusplus) +extern "C" { +#endif + +struct spdp_admin; + +struct spdp_admin *ddsi_spdp_scheduler_new (struct ddsi_domaingv *gv) + ddsrt_nonnull_all ddsrt_attribute_warn_unused_result; + +void ddsi_spdp_scheduler_delete (struct spdp_admin *adm) + ddsrt_nonnull_all; + +dds_return_t ddsi_spdp_register_participant (struct spdp_admin *adm, const struct ddsi_participant *pp) + ddsrt_nonnull_all ddsrt_attribute_warn_unused_result; + +// Not an error if `pp` is not registered +void ddsi_spdp_unregister_participant (struct spdp_admin *adm, const struct ddsi_participant *pp) + ddsrt_nonnull_all; + +dds_return_t ddsi_spdp_ref_locator (struct spdp_admin *adm, const ddsi_xlocator_t *xloc) + ddsrt_nonnull_all ddsrt_attribute_warn_unused_result; + +void ddsi_spdp_unref_locator (struct spdp_admin *adm, const ddsi_xlocator_t *xloc, bool on_lease_expiry) + ddsrt_nonnull_all; + +void ddsi_spdp_handle_aging_locators_xevent_cb (struct ddsi_domaingv *gv, struct ddsi_xevent *xev, struct ddsi_xpack *xp, void *varg, ddsrt_mtime_t tnow) + ddsrt_nonnull ((1, 2, 3)); + +void ddsi_spdp_handle_live_locators_xevent_cb (struct ddsi_domaingv *gv, struct ddsi_xevent *xev, struct ddsi_xpack *xp, void *varg, ddsrt_mtime_t tnow) + ddsrt_nonnull ((1, 2, 3)); + +void ddsi_spdp_force_republish (struct spdp_admin *adm, const struct ddsi_participant *pp) + ddsrt_nonnull_all; + +#if defined (__cplusplus) +} +#endif + +#endif /* DDSI__SPDP_SCHEDULE_H */ diff --git a/src/core/ddsi/src/ddsi__xmsg.h b/src/core/ddsi/src/ddsi__xmsg.h index 9bcab56391..1219bb29fd 100644 --- a/src/core/ddsi/src/ddsi__xmsg.h +++ b/src/core/ddsi/src/ddsi__xmsg.h @@ -72,6 +72,20 @@ void ddsi_xmsgpool_free (struct ddsi_xmsgpool *pool) struct ddsi_xmsg *ddsi_xmsg_new (struct ddsi_xmsgpool *pool, const ddsi_guid_t *src_guid, struct ddsi_participant *pp, size_t expected_size, enum ddsi_xmsg_kind kind) ddsrt_nonnull ((1, 2)) ddsrt_attribute_warn_unused_result; +/** + * @brief For sending to a particular destination (participant) + * @component rtps_submsg + * + * m may not have a destination yet + * + * @param gv domain globals + * @param m xmsg + * @param gp guid prefix + * @param loc destination locator + */ +void ddsi_xmsg_setdst1 (struct ddsi_domaingv *gv, struct ddsi_xmsg *m, const ddsi_guid_prefix_t *gp, const ddsi_xlocator_t *loc) + ddsrt_nonnull_all; + /** * @brief For sending to a particular destination (participant) * @component rtps_submsg @@ -81,7 +95,8 @@ struct ddsi_xmsg *ddsi_xmsg_new (struct ddsi_xmsgpool *pool, const ddsi_guid_t * * @param gp guid prefix * @param loc destination locator */ -void ddsi_xmsg_setdst1 (struct ddsi_domaingv *gv, struct ddsi_xmsg *m, const ddsi_guid_prefix_t *gp, const ddsi_xlocator_t *loc); +void ddsi_xmsg_setdst1_generic (struct ddsi_domaingv *gv, struct ddsi_xmsg *m, const ddsi_guid_prefix_t *gp, const ddsi_xlocator_t *loc) +ddsrt_nonnull_all; /** @component rtps_submsg */ bool ddsi_xmsg_getdst1_prefix (struct ddsi_xmsg *m, ddsi_guid_prefix_t *gp) diff --git a/src/core/ddsi/src/ddsi_discovery_spdp.c b/src/core/ddsi/src/ddsi_discovery_spdp.c index 57f76e5ce2..4257b59b71 100644 --- a/src/core/ddsi/src/ddsi_discovery_spdp.c +++ b/src/core/ddsi/src/ddsi_discovery_spdp.c @@ -34,22 +34,10 @@ #include "ddsi__lease.h" #include "ddsi__xqos.h" -static void maybe_add_pp_as_meta_to_as_disc (struct ddsi_domaingv *gv, const struct ddsi_addrset *as_meta) -{ - // FIXME: this is mostly equivalent to the pre-per-interface "allow_multicast" setting, but we can do much better - // because we know the interface on which received it, whether it was a multicast, and, for Cyclone peers, whether - // it was spontaneous or in response to one we sent - bool allow_mc_spdp = false; - for (int i = 0; i < gv->n_interfaces && !allow_mc_spdp; i++) - if (gv->interfaces[i].allow_multicast & DDSI_AMC_SPDP) - allow_mc_spdp = true; - if (ddsi_addrset_empty_mc (as_meta) || !allow_mc_spdp) - { - ddsi_xlocator_t loc; - ddsi_addrset_any_uc (as_meta, &loc); - ddsi_add_xlocator_to_addrset (gv, gv->as_disc, &loc); - } -} +static bool get_pp_and_spdp_wr (struct ddsi_domaingv *gv, const ddsi_guid_t *pp_guid, struct ddsi_participant **pp, struct ddsi_writer **spdp_wr) + ddsrt_nonnull_all; +static bool resend_spdp_sample_by_guid_key (struct ddsi_writer *wr, const ddsi_guid_t *guid, struct ddsi_proxy_reader *prd) + ddsrt_nonnull ((1, 2)); struct locators_builder { ddsi_locators_t *dst; @@ -896,8 +884,6 @@ static enum handle_spdp_result handle_spdp_alive (const struct ddsi_receiver_sta ddsi_xqos_log (DDS_LC_DISCOVERY, &gv->logconfig, &datap->qos); GVLOGDISC ("}\n"); - maybe_add_pp_as_meta_to_as_disc (gv, as_meta); - struct ddsi_proxy_participant *proxy_participant; if (!ddsi_new_proxy_participant (&proxy_participant, gv, &datap->participant_guid, builtin_endpoint_set, &privileged_pp_guid, as_default, as_meta, datap, lease_duration, rst->vendor, custom_flags, timestamp, seq)) { diff --git a/src/core/ddsi/src/ddsi_init.c b/src/core/ddsi/src/ddsi_init.c index c69610fc30..e059fefb90 100644 --- a/src/core/ddsi/src/ddsi_init.c +++ b/src/core/ddsi/src/ddsi_init.c @@ -69,6 +69,7 @@ #include "ddsi__typelib.h" #include "ddsi__vendor.h" #include "ddsi__sockwaitset.h" +#include "ddsi__spdp_schedule.h" #include "dds__whc.h" #include "dds/cdr/dds_cdrstream.h" @@ -1708,6 +1709,9 @@ int ddsi_init (struct ddsi_domaingv *gv, struct ddsi_psmx_instance_locators *psm add_peer_addresses (gv, gv->as_disc, gv->config.peers); } + gv->spdp_schedule = ddsi_spdp_scheduler_new (gv); + if (gv->spdp_schedule == NULL) abort (); // FIXME: handle OOM here, it is not that hard ... + gv->gcreq_queue = ddsi_gcreq_queue_new (gv); ddsrt_atomic_st32 (&gv->rtps_keepgoing, 1); @@ -2011,6 +2015,9 @@ void ddsi_stop (struct ddsi_domaingv *gv) void ddsi_fini (struct ddsi_domaingv *gv) { + /* No participants or proxy participants left, so this is safe */ + ddsi_spdp_scheduler_delete (gv->spdp_schedule); + /* The receive threads have already been stopped, therefore defragmentation and reorder state can't change anymore and can be freed. */ diff --git a/src/core/ddsi/src/ddsi_participant.c b/src/core/ddsi/src/ddsi_participant.c index efc8f1ce7b..fa8af5062a 100644 --- a/src/core/ddsi/src/ddsi_participant.c +++ b/src/core/ddsi/src/ddsi_participant.c @@ -37,6 +37,7 @@ #include "ddsi__vendor.h" #include "ddsi__xqos.h" #include "ddsi__inverse_uint32_set.h" +#include "ddsi__spdp_schedule.h" #include "dds__whc.h" static const unsigned builtin_writers_besmask = @@ -179,16 +180,6 @@ void ddsi_participant_release_entityid (struct ddsi_participant *pp, ddsi_entity ddsrt_mutex_unlock (&pp->e.lock); } -static void force_as_disc_address (struct ddsi_domaingv *gv, const ddsi_guid_t *subguid) -{ - struct ddsi_writer *wr = ddsi_entidx_lookup_writer_guid (gv->entity_index, subguid); - assert (wr != NULL); - ddsrt_mutex_lock (&wr->e.lock); - ddsi_unref_addrset (wr->as); - wr->as = ddsi_ref_addrset (gv->as_disc); - ddsrt_mutex_unlock (&wr->e.lock); -} - #ifdef DDS_HAS_SECURITY static void add_security_builtin_endpoints (struct ddsi_participant *pp, ddsi_guid_t *subguid, const ddsi_guid_t *group_guid, struct ddsi_domaingv *gv, bool add_writers, bool add_readers) { @@ -200,9 +191,7 @@ static void add_security_builtin_endpoints (struct ddsi_participant *pp, ddsi_gu wrinfo = dds_whc_make_wrinfo (NULL, &gv->builtin_endpoint_xqos_wr); ddsi_new_writer (NULL, subguid, group_guid, pp, DDS_BUILTIN_TOPIC_PARTICIPANT_SECURE_NAME, gv->spdp_secure_type, &gv->builtin_endpoint_xqos_wr, dds_whc_new(gv, wrinfo), NULL, NULL, NULL); dds_whc_free_wrinfo (wrinfo); - /* But we need the as_disc address set for SPDP, because we need to - send it to everyone regardless of the existence of readers. */ - force_as_disc_address(gv, subguid); + // FIXME: where does it publish secure SPDP? pp->bes |= DDSI_DISC_BUILTIN_ENDPOINT_PARTICIPANT_SECURE_ANNOUNCER; subguid->entityid = ddsi_to_entityid (DDSI_ENTITYID_P2P_BUILTIN_PARTICIPANT_STATELESS_MESSAGE_WRITER); @@ -655,6 +644,7 @@ void ddsi_unref_participant (struct ddsi_participant *pp, const struct ddsi_guid DDSI_ENTITYID_P2P_BUILTIN_PARTICIPANT_VOLATILE_SECURE_WRITER, DDSI_ENTITYID_P2P_BUILTIN_PARTICIPANT_VOLATILE_SECURE_READER, }; + struct ddsi_domaingv * const gv = pp->e.gv; ddsrt_mutex_lock (&pp->refc_lock); update_pp_refc (pp, guid_of_refing_entity, -1); @@ -663,8 +653,8 @@ void ddsi_unref_participant (struct ddsi_participant *pp, const struct ddsi_guid stguid = *guid_of_refing_entity; else memset (&stguid, 0, sizeof (stguid)); - ELOGDISC (pp, "ddsi_unref_participant("PGUIDFMT" @ %p <- "PGUIDFMT" @ %p) user %"PRId32" builtin %"PRId32"\n", - PGUID (pp->e.guid), (void*)pp, PGUID (stguid), (void*)guid_of_refing_entity, pp->user_refc, pp->builtin_refc); + GVLOGDISC ("ddsi_unref_participant("PGUIDFMT" @ %p <- "PGUIDFMT" @ %p) user %"PRId32" builtin %"PRId32"\n", + PGUID (pp->e.guid), (void *)pp, PGUID (stguid), (void *)guid_of_refing_entity, pp->user_refc, pp->builtin_refc); if (pp->user_refc == 0 && pp->bes != 0 && pp->state < DDSI_PARTICIPANT_STATE_DELETING_BUILTINS) { @@ -692,30 +682,24 @@ void ddsi_unref_participant (struct ddsi_participant *pp, const struct ddsi_guid pp->state = DDSI_PARTICIPANT_STATE_DELETING_BUILTINS; ddsrt_mutex_unlock (&pp->refc_lock); - if (pp->spdp_xevent) - ddsi_delete_xevent (pp->spdp_xevent); if (pp->pmd_update_xevent) ddsi_delete_xevent (pp->pmd_update_xevent); - - /* SPDP relies on the WHC, but dispose-unregister will empty - it. The event handler verifies the event has already been - scheduled for deletion when it runs into an empty WHC */ - ddsi_spdp_dispose_unregister (pp); + ddsi_spdp_unregister_participant (gv->spdp_schedule, pp); /* If this happens to be the privileged_pp, clear it */ - ddsrt_mutex_lock (&pp->e.gv->privileged_pp_lock); - if (pp == pp->e.gv->privileged_pp) - pp->e.gv->privileged_pp = NULL; - ddsrt_mutex_unlock (&pp->e.gv->privileged_pp_lock); + ddsrt_mutex_lock (&gv->privileged_pp_lock); + if (pp == gv->privileged_pp) + gv->privileged_pp = NULL; + ddsrt_mutex_unlock (&gv->privileged_pp_lock); for (i = 0; i < (int) (sizeof (builtin_endpoints_tab) / sizeof (builtin_endpoints_tab[0])); i++) - delete_builtin_endpoint (pp->e.gv, &pp->e.guid, builtin_endpoints_tab[i]); + delete_builtin_endpoint (gv, &pp->e.guid, builtin_endpoints_tab[i]); } else if (pp->user_refc == 0 && pp->builtin_refc == 0) { ddsrt_mutex_unlock (&pp->refc_lock); - if (!(pp->e.onlylocal)) + if (!pp->e.onlylocal) { if ((pp->bes & builtin_writers_besmask) != builtin_writers_besmask && !(pp->flags & RTPS_PF_NO_PRIVILEGED_PP)) { @@ -731,23 +715,23 @@ void ddsi_unref_participant (struct ddsi_participant *pp, const struct ddsi_guid the ddsi_unref_participant, because we may trigger a clean-up of it. */ struct ddsi_participant *ppp; - ddsrt_mutex_lock (&pp->e.gv->privileged_pp_lock); - ppp = pp->e.gv->privileged_pp; - ddsrt_mutex_unlock (&pp->e.gv->privileged_pp_lock); + ddsrt_mutex_lock (&gv->privileged_pp_lock); + ppp = gv->privileged_pp; + ddsrt_mutex_unlock (&gv->privileged_pp_lock); assert (ppp != NULL); ddsi_unref_participant (ppp, &pp->e.guid); } } - ddsrt_mutex_lock (&pp->e.gv->participant_set_lock); - assert (pp->e.gv->nparticipants > 0); - if (--pp->e.gv->nparticipants == 0) - ddsrt_cond_broadcast (&pp->e.gv->participant_set_cond); - ddsrt_mutex_unlock (&pp->e.gv->participant_set_lock); - if (pp->e.gv->config.many_sockets_mode == DDSI_MSM_MANY_UNICAST) + ddsrt_mutex_lock (&gv->participant_set_lock); + assert (gv->nparticipants > 0); + if (--gv->nparticipants == 0) + ddsrt_cond_broadcast (&gv->participant_set_cond); + ddsrt_mutex_unlock (&gv->participant_set_lock); + if (gv->config.many_sockets_mode == DDSI_MSM_MANY_UNICAST) { ddsrt_atomic_fence_rel (); - ddsrt_atomic_inc32 (&pp->e.gv->participant_set_generation); + ddsrt_atomic_inc32 (&gv->participant_set_generation); /* Deleting the socket will usually suffice to wake up the receiver threads, but in general, no one cares if it takes a @@ -761,8 +745,8 @@ void ddsi_unref_participant (struct ddsi_participant *pp, const struct ddsi_guid ddsrt_free (pp->plist); ddsrt_mutex_destroy (&pp->refc_lock); ddsi_entity_common_fini (&pp->e); - ddsi_remove_deleted_participant_guid (pp->e.gv->deleted_participants, &pp->e.guid, DDSI_DELETED_PPGUID_LOCAL); - ddsi_inverse_uint32_set_fini(&pp->avail_entityids.x); + ddsi_remove_deleted_participant_guid (gv->deleted_participants, &pp->e.guid, DDSI_DELETED_PPGUID_LOCAL); + ddsi_inverse_uint32_set_fini (&pp->avail_entityids.x); ddsrt_free (pp); } else @@ -902,7 +886,6 @@ dds_return_t ddsi_new_participant (ddsi_guid_t *ppguid, struct ddsi_domaingv *gv /* Before we create endpoints -- and may call ddsi_unref_participant if things go wrong -- we must initialize all that ddsi_unref_participant depends on. */ - pp->spdp_xevent = NULL; pp->pmd_update_xevent = NULL; /* Create built-in endpoints (note: these have no GID, and no group GUID). */ @@ -917,9 +900,6 @@ dds_return_t ddsi_new_participant (ddsi_guid_t *ppguid, struct ddsi_domaingv *gv wrinfo = dds_whc_make_wrinfo (NULL, &gv->spdp_endpoint_xqos); ddsi_new_writer (NULL, &subguid, &group_guid, pp, DDS_BUILTIN_TOPIC_PARTICIPANT_NAME, gv->spdp_type, &gv->spdp_endpoint_xqos, dds_whc_new(gv, wrinfo), NULL, NULL, NULL); dds_whc_free_wrinfo (wrinfo); - /* But we need the as_disc address set for SPDP, because we need to - send it to everyone regardless of the existence of readers. */ - force_as_disc_address (gv, &subguid); pp->bes |= DDSI_DISC_BUILTIN_ENDPOINT_PARTICIPANT_ANNOUNCER; } @@ -977,29 +957,12 @@ dds_return_t ddsi_new_participant (ddsi_guid_t *ppguid, struct ddsi_domaingv *gv if (!(flags & RTPS_PF_NO_BUILTIN_WRITERS) || !(flags & RTPS_PF_NO_PRIVILEGED_PP)) { - /* SPDP periodic broadcast uses the retransmit path, so the initial - publication must be done differently. Must be later than making - the participant globally visible, or the SPDP processing won't - recognise the participant as a local one. */ - if (ddsi_spdp_write (pp) >= 0) - { - /* Once the initial sample has been written, the automatic and - asynchronous broadcasting required by SPDP can start. Also, - since we're new alive, PMD updates can now start, too. - Schedule the first update for 100ms in the future to reduce the - impact of the first sample getting lost. Note: these two may - fire before the calls return. If the initial sample wasn't - accepted, all is lost, but we continue nonetheless, even though - the participant won't be able to discover or be discovered. */ - struct ddsi_spdp_broadcast_xevent_cb_arg arg = { .pp_guid = pp->e.guid }; - pp->spdp_xevent = ddsi_qxev_callback (gv->xevents, ddsrt_mtime_add_duration (ddsrt_time_monotonic (), DDS_MSECS (100)), ddsi_spdp_broadcast_xevent_cb, &arg, sizeof (arg), false); - } + if (ddsi_spdp_register_participant (gv->spdp_schedule, pp) != 0) + abort (); // FIXME - { - ddsrt_mtime_t tsched = (pp->plist->qos.liveliness.lease_duration == DDS_INFINITY) ? DDSRT_MTIME_NEVER : (ddsrt_mtime_t){0}; - struct ddsi_write_pmd_message_xevent_cb_arg arg = { .pp_guid = pp->e.guid }; - pp->pmd_update_xevent = ddsi_qxev_callback (gv->xevents, tsched, ddsi_write_pmd_message_xevent_cb, &arg, sizeof (arg), false); - } + ddsrt_mtime_t tsched = (pp->plist->qos.liveliness.lease_duration == DDS_INFINITY) ? DDSRT_MTIME_NEVER : (ddsrt_mtime_t){0}; + struct ddsi_write_pmd_message_xevent_cb_arg arg = { .pp_guid = pp->e.guid }; + pp->pmd_update_xevent = ddsi_qxev_callback (gv->xevents, tsched, ddsi_write_pmd_message_xevent_cb, &arg, sizeof (arg), false); } #ifdef DDS_HAS_SECURITY @@ -1041,8 +1004,9 @@ void ddsi_update_participant_plist (struct ddsi_participant *pp, const ddsi_plis { ddsrt_mutex_lock (&pp->e.lock); if (ddsi_update_qos_locked (&pp->e, &pp->plist->qos, &plist->qos, ddsrt_time_wallclock ())) - ddsi_spdp_write (pp); + ddsi_spdp_write (pp); // FIXME: this takes care of generating new SPDP message; it seems to not handle the secure one ddsrt_mutex_unlock (&pp->e.lock); + ddsi_spdp_force_republish (pp->e.gv->spdp_schedule, pp); } static void gc_delete_participant (struct ddsi_gcreq *gcreq) diff --git a/src/core/ddsi/src/ddsi_proxy_participant.c b/src/core/ddsi/src/ddsi_proxy_participant.c index ea1143706f..6c5281804d 100644 --- a/src/core/ddsi/src/ddsi_proxy_participant.c +++ b/src/core/ddsi/src/ddsi_proxy_participant.c @@ -34,6 +34,7 @@ #include "ddsi__tran.h" #include "ddsi__vendor.h" #include "ddsi__addrset.h" +#include "ddsi__spdp_schedule.h" typedef struct proxy_purge_data { struct ddsi_proxy_participant *proxypp; @@ -46,6 +47,37 @@ const ddsrt_avl_treedef_t ddsi_proxypp_proxytp_treedef = DDSRT_AVL_TREEDEF_INITIALIZER (offsetof (struct ddsi_proxy_topic, avlnode), offsetof (struct ddsi_proxy_topic, entityid), ddsi_compare_entityid, 0); #endif +enum maybe_update_as_disc_for_proxypp_op { MUADFPOP_ADD, MUADFPOP_REMOVE_ON_DELETE, MUADFPOP_REMOVE_ON_EXPIRY }; + +static void maybe_update_as_disc_for_proxypp (struct ddsi_domaingv *gv, const struct ddsi_addrset *as_meta, enum maybe_update_as_disc_for_proxypp_op op) +{ + // FIXME: this is mostly equivalent to the pre-per-interface "allow_multicast" setting, but we can do much better + // because we know the interface on which received it, whether it was a multicast, and, for Cyclone peers, whether + // it was spontaneous or in response to one we sent + bool allow_mc_spdp = false; + for (int i = 0; i < gv->n_interfaces && !allow_mc_spdp; i++) + if (gv->interfaces[i].allow_multicast & DDSI_AMC_SPDP) + allow_mc_spdp = true; + if (ddsi_addrset_empty_mc (as_meta) || !allow_mc_spdp) + { + // FIXME: should perhaps do all unicast addresses + // FIXME: relying on "any_uc" to always return the same address is very bad + ddsi_xlocator_t loc; + ddsi_addrset_any_uc (as_meta, &loc); + switch (op) + { + case MUADFPOP_ADD: + if (ddsi_spdp_ref_locator (gv->spdp_schedule, &loc) != DDS_RETCODE_OK) + abort (); // FIXME: propagate and delete proxy participant + break; + case MUADFPOP_REMOVE_ON_DELETE: + case MUADFPOP_REMOVE_ON_EXPIRY: + ddsi_spdp_unref_locator (gv->spdp_schedule, &loc, (op == MUADFPOP_REMOVE_ON_EXPIRY)); + break; + } + } +} + static void proxy_participant_replace_minl (struct ddsi_proxy_participant *proxypp, bool manbypp, struct ddsi_lease *lnew) { /* By loading/storing the pointer atomically, we ensure we always @@ -479,11 +511,13 @@ bool ddsi_new_proxy_participant (struct ddsi_proxy_participant **proxy_participa } #endif *proxy_participant = proxypp; + maybe_update_as_disc_for_proxypp (gv, proxypp->as_meta, MUADFPOP_ADD); return true; } int ddsi_update_proxy_participant_plist_locked (struct ddsi_proxy_participant *proxypp, ddsi_seqno_t seq, const struct ddsi_plist *datap, ddsrt_wctime_t timestamp) { + // FIXME: need to (eventually) support updating locator lists, that now also involves the SPDP schedule tables if (seq > proxypp->seq) { proxypp->seq = seq; @@ -682,6 +716,7 @@ static void delete_ppt (struct ddsi_proxy_participant *proxypp, ddsrt_wctime_t t } ddsrt_free (child_entities); + maybe_update_as_disc_for_proxypp (proxypp->e.gv, proxypp->as_meta, isimplicit ? MUADFPOP_REMOVE_ON_EXPIRY : MUADFPOP_REMOVE_ON_DELETE); gcreq_proxy_participant (proxypp); } diff --git a/src/core/ddsi/src/ddsi_spdp_schedule.c b/src/core/ddsi/src/ddsi_spdp_schedule.c new file mode 100644 index 0000000000..02f4f7e8c9 --- /dev/null +++ b/src/core/ddsi/src/ddsi_spdp_schedule.c @@ -0,0 +1,611 @@ +// Copyright(c) 2023 ZettaScale Technology and others +// +// This program and the accompanying materials are made available under the +// terms of the Eclipse Public License v. 2.0 which is available at +// http://www.eclipse.org/legal/epl-2.0, or the Eclipse Distribution License +// v. 1.0 which is available at +// http://www.eclipse.org/org/documents/edl-v10.php. +// +// SPDX-License-Identifier: EPL-2.0 OR BSD-3-Clause + +#include +#include +#include +#include + +#include "dds/version.h" +#include "dds/ddsrt/heap.h" +#include "dds/ddsrt/string.h" +#include "dds/ddsi/ddsi_domaingv.h" +#include "ddsi__discovery_spdp.h" +#include "ddsi__discovery_addrset.h" +#include "ddsi__discovery_endpoint.h" +#include "ddsi__spdp_schedule.h" +#include "ddsi__addrset.h" +#include "ddsi__serdata_plist.h" +#include "ddsi__entity_index.h" +#include "ddsi__entity.h" +#include "ddsi__security_omg.h" +#include "ddsi__endpoint.h" +#include "ddsi__plist.h" +#include "ddsi__proxy_participant.h" +#include "ddsi__topic.h" +#include "ddsi__vendor.h" +#include "ddsi__xevent.h" +#include "ddsi__transmit.h" +#include "ddsi__lease.h" +#include "ddsi__xqos.h" + +/* +X + + "aging" locators can be skipped every now and then + - L has T_aging_start + - f(T_now, T_next, T_aging_start) -> Bool that says whether locator with given T_aging_start + should be sent an SPDP now for the participant that plans to send its next SPDP at T_next + actually: "aging" happens when there are no proxy participants known at the address + so it is purely for discovery purposes and we don't need to worry about a per-participant SPDP interval + + P_i needs to send SPDP no later than T_i + Most of the time, all P have same interval + Most of the time, there aren't all that many P's + Arguably, it is ok to loop over them without worrying + Basically, as long as there is a P, there are SPDPs to be sent + not strictly true, but the case where this isn't true is rare indeed, so let's accept some unnecessary work + there is almost always a P + + Want to send them ordered by locator + + Could do table (T, P, L) ordered in some sane way but that means a lot of duplication ... + + Indexing on locator address, makes the most sense + + spdp_locator_ref (const struct ddsi_xlocator *loc) + L = find loc somewhere + if !L.is_fixed && L.proxypp_refc++ == 0 + delete L from L_aging + insert L in L_live + + spdp_locator_unref (const struct ddsi_xlocator *loc) + L = find loc L_live + if !L.is_fixed && --L.proxypp_refc == 0 + delete L from L_live + L.interval[0] = 1 + L.interval[1] = 0 + insert L in L_aging + + foreach L_aging: + assert L.proxypp_refc == 0 + if T_now > L.T_sched + foreach P: + resend_spdp(P, L) + n = L.interval[0] + L.interval[1] + if (n > max_n) + delete L + else + L.interval[1] = L.interval[0] + L.interval[0] = n + L.T_sched = T_now + function-that-maps-F_n-to-SPDP-interval + resched at min(L.T_sched for L in L_aging) + + foreach L_live: + assert L.is_fixed || L.proxypp_refc > 0 + foreach P: + if (P.T_sched <= T_now + a bit) + resend_spdp(P, L) + foreach P: + if (P.T_sched <= T_now + a bit) + P.T_sched = T_now + intv(P) + resched at min(P.T_sched) + +X +*/ + +struct spdp_loc_common { + ddsrt_avl_node_t avlnode; // indexed on address + ddsi_xlocator_t xloc; // the address +}; + +struct spdp_loc_aging { + struct spdp_loc_common c; + ddsrt_mtime_t tsched; // time at which to ping this locator again + uint32_t age; // decremented, entry is deleted when it reaches 0 +}; + +struct spdp_loc_live { + struct spdp_loc_common c; + uint32_t proxypp_refc; // number of proxy participants known at this address + bool lease_expiry_occurred; // if refc goes to 0 and not lease expiry, then no need to age +}; + +union spdp_loc_union { + struct spdp_loc_common c; + struct spdp_loc_live live; + struct spdp_loc_aging aging; +}; + +struct spdp_pp { + ddsrt_avl_node_t avlnode; // indexed on pp's GUID but needs a struct spdp_pp in lookup + const struct ddsi_participant *pp; + ddsrt_mtime_t tsched; +}; + +struct spdp_admin { + struct ddsi_domaingv *gv; + struct ddsi_xevent *live_xev; + struct ddsi_xevent *aging_xev; + ddsrt_mutex_t lock; + // Locators to send SPDP messages to: those where there are live proxy participants + // and those that we're aging because there aren't any + ddsrt_avl_tree_t live; + ddsrt_avl_tree_t aging; + // use global table of participants and: + // - embed schedule info in them + // - worry about race conditions + // or: have a separate table here ... what to do ... + // + // I like keeping the schedule info out of the participant. That settles it. + ddsrt_avl_tree_t pp; +}; + +static int compare_xlocators_vwrap (const void *va, const void *vb) { + return ddsi_compare_xlocators (va, vb); +} + +static int compare_spdp_pp (const void *va, const void *vb) { + const struct spdp_pp *a = va; + const struct spdp_pp *b = vb; + return ddsi_compare_guid (&a->pp->e.guid, &b->pp->e.guid); +} + +static const ddsrt_avl_treedef_t spdp_loc_td = DDSRT_AVL_TREEDEF_INITIALIZER(offsetof (union spdp_loc_union, c.avlnode), offsetof (union spdp_loc_union, c.xloc), compare_xlocators_vwrap, NULL); +static const ddsrt_avl_treedef_t spdp_pp_td = DDSRT_AVL_TREEDEF_INITIALIZER(offsetof (struct spdp_pp, avlnode), 0, compare_spdp_pp, NULL); + +struct add_as_disc_helper_arg { + struct spdp_admin *adm; + bool all_ok; +}; + +static void add_as_disc_helper (const ddsi_xlocator_t *loc, void *varg) +{ + struct add_as_disc_helper_arg * const arg = varg; + // FIXME: some but not all initial locators need to go into aging + if (arg->all_ok && ddsi_spdp_ref_locator (arg->adm, loc) != DDS_RETCODE_OK) + arg->all_ok = false; +} + +static void remove_as_disc_helper (const ddsi_xlocator_t *loc, void *varg) +{ + struct spdp_admin * const adm = varg; + ddsi_spdp_unref_locator (adm, loc, false); +} + +struct spdp_admin *ddsi_spdp_scheduler_new (struct ddsi_domaingv *gv) +{ + struct spdp_admin *adm; + if ((adm = ddsrt_malloc_s (sizeof (*adm))) == NULL) + return NULL; + ddsrt_mutex_init (&adm->lock); + adm->gv = gv; + ddsrt_avl_init (&spdp_loc_td, &adm->aging); + ddsrt_avl_init (&spdp_loc_td, &adm->live); + ddsrt_avl_init (&spdp_pp_td, &adm->pp); + + struct add_as_disc_helper_arg arg = { .adm = adm, .all_ok = true }; + ddsi_addrset_forall (gv->as_disc, add_as_disc_helper, &arg); + if (!arg.all_ok) + { + ddsrt_avl_free (&spdp_loc_td, &adm->live, ddsrt_free); + // FIXME: need to do aging of initial locators, too + ddsrt_avl_free (&spdp_loc_td, &adm->aging, ddsrt_free); + ddsrt_mutex_destroy (&adm->lock); + ddsrt_free (adm); + return NULL; + } + + // from here on we potentially have multiple threads messing with `adm` + const ddsrt_mtime_t t_sched = ddsrt_mtime_add_duration (ddsrt_time_monotonic (), DDS_MSECS (0)); + adm->aging_xev = ddsi_qxev_callback (gv->xevents, t_sched, ddsi_spdp_handle_aging_locators_xevent_cb, NULL, 0, true); + adm->live_xev = ddsi_qxev_callback (gv->xevents, t_sched, ddsi_spdp_handle_live_locators_xevent_cb, NULL, 0, true); + return adm; +} + +void ddsi_spdp_scheduler_delete (struct spdp_admin *adm) +{ + // FIXME: Initial addresses may still be around, probably should invert it, check refc=1, check present in as_disc, then free + ddsi_addrset_forall (adm->gv->as_disc, remove_as_disc_helper, adm); + assert (ddsrt_avl_is_empty (&adm->live)); + assert (ddsrt_avl_is_empty (&adm->pp)); + ddsi_delete_xevent (adm->aging_xev); + ddsi_delete_xevent (adm->live_xev); + // intrusive data structures, can simply free everything + ddsrt_avl_free (&spdp_loc_td, &adm->aging, ddsrt_free); + ddsrt_mutex_destroy (&adm->lock); + ddsrt_free (adm); +} + +#ifndef NDEBUG +static bool spdp_message_exists (const struct ddsi_participant *pp) +{ + struct ddsi_writer *spdp_wr; + dds_return_t ret = ddsi_get_builtin_writer (pp, DDSI_ENTITYID_SPDP_BUILTIN_PARTICIPANT_WRITER, &spdp_wr); + if (ret != DDS_RETCODE_OK || spdp_wr == NULL) + return true; + else + { + // Construct a key for looking up the SPDP sample in the WHC ... + ddsi_plist_t ps; + ddsi_plist_init_empty (&ps); + ps.present |= PP_PARTICIPANT_GUID; + ps.participant_guid = pp->e.guid; + struct ddsi_serdata * const sdkey = ddsi_serdata_from_sample (pp->e.gv->spdp_type, SDK_KEY, &ps); + ddsi_plist_fini (&ps); + // ... then borrow it, surreptitiously increment its refcount before returning it so we can unlock earlier ... + struct ddsi_whc_borrowed_sample sample; + ddsrt_mutex_lock (&spdp_wr->e.lock); + const bool exists = ddsi_whc_borrow_sample_key (spdp_wr->whc, sdkey, &sample); + if (exists) + ddsi_whc_return_sample (spdp_wr->whc, &sample, false); + ddsrt_mutex_unlock (&spdp_wr->e.lock); + return exists; + } +} +#endif + +dds_return_t ddsi_spdp_register_participant (struct spdp_admin *adm, const struct ddsi_participant *pp) +{ + /* SPDP periodic broadcast uses the retransmit path, so the initial + publication must be done differently. Must be later than making + the participant globally visible, or the SPDP processing won't + recognise the participant as a local one. */ + if (ddsi_spdp_write ((struct ddsi_participant *) pp) < 0) + return DDS_RETCODE_OK; // FIXME: Need to check why it can fail; this is what it used to do + + ddsi_spdp_force_republish (adm, pp); + + // FIXME: let's just cache the serdata in the participant and do all the publishing in this file + + // FIXME: what about the secure writer? It overwrites its as with as_disc on creation, but it is reliable and therefore the matching code will recompute it, so I don't think there is any need to do anything for that one (except possibly making sure it does get updated on a QoS change) + + // FIXME: why would I register a participant if its SPDP writer cannot be found? Maybe it can change ... + assert (spdp_message_exists (pp)); + ddsrt_mutex_lock (&adm->lock); + ddsrt_avl_ipath_t ip; + const struct spdp_pp template = { .pp = pp }; + struct spdp_pp *ppn; + ppn = ddsrt_avl_lookup_ipath (&spdp_pp_td, &adm->pp, &template, &ip); + assert (ppn == NULL); + (void) ppn; + if ((ppn = ddsrt_malloc_s (sizeof (*ppn))) == NULL) + { + ddsrt_mutex_unlock (&adm->lock); + return DDS_RETCODE_OUT_OF_RESOURCES; + } + else + { + const ddsrt_mtime_t tsched = ddsrt_mtime_add_duration (ddsrt_time_monotonic(), DDS_MSECS (100)); // FIXME: initial schedule ... + ppn->pp = pp; + ppn->tsched = tsched; + ddsrt_avl_insert_ipath (&spdp_pp_td, &adm->pp, ppn, &ip); + ddsrt_mutex_unlock (&adm->lock); + ddsi_resched_xevent_if_earlier (adm->live_xev, tsched); + return DDS_RETCODE_OK; + } +} + +static struct ddsi_addrset *make_spdp_addrset (struct spdp_admin *adm) +{ + struct ddsi_addrset *as = ddsi_new_addrset (); + ddsrt_mutex_lock (&adm->lock); + ddsrt_avl_iter_t loc_it; + for (struct spdp_loc_live *n = ddsrt_avl_iter_first (&spdp_loc_td, &adm->live, &loc_it); n; n = ddsrt_avl_iter_next (&loc_it)) + ddsi_add_xlocator_to_addrset (adm->gv, as, &n->c.xloc); + for (struct spdp_loc_aging *n = ddsrt_avl_iter_first (&spdp_loc_td, &adm->aging, &loc_it); n; n = ddsrt_avl_iter_next (&loc_it)) + ddsi_add_xlocator_to_addrset (adm->gv, as, &n->c.xloc); + ddsrt_mutex_unlock (&adm->lock); + return as; +} + +void ddsi_spdp_unregister_participant (struct spdp_admin *adm, const struct ddsi_participant *pp) +{ + ddsrt_mutex_lock (&adm->lock); + ddsrt_avl_dpath_t dp; + const struct spdp_pp template = { .pp = pp }; + struct spdp_pp *ppn; + // FIXME: do I really have to allow for ppn == NULL? + if ((ppn = ddsrt_avl_lookup_dpath (&spdp_pp_td, &adm->pp, &template, &dp)) != NULL) + { + assert (ppn->pp == pp); + ddsrt_avl_delete_dpath (&spdp_pp_td, &adm->pp, ppn, &dp); + ddsrt_free (ppn); + } + ddsrt_mutex_unlock (&adm->lock); + + /* SPDP relies on the WHC, but dispose-unregister will empty + it. The event handler verifies the event has already been + scheduled for deletion when it runs into an empty WHC */ + // FIXME: store serdata in pp and this hack can go away + // HACK HACK HACK + struct ddsi_addrset *as = make_spdp_addrset (adm); + const ddsi_guid_t subguid = { .prefix = pp->e.guid.prefix, .entityid = { .u = DDSI_ENTITYID_SPDP_BUILTIN_PARTICIPANT_WRITER } }; + struct ddsi_writer *wr = ddsi_entidx_lookup_writer_guid (pp->e.gv->entity_index, &subguid); + assert (wr != NULL); + ddsrt_mutex_lock (&wr->e.lock); + ddsi_unref_addrset (wr->as); + wr->as = as; + ddsrt_mutex_unlock (&wr->e.lock); + // HACK HACK HACK + // FIXME: this currently also handles the secure SPDP message, but that may change + ddsi_spdp_dispose_unregister ((struct ddsi_participant *) pp); + // HACK HACK HACK + ddsrt_mutex_lock (&wr->e.lock); + ddsi_unref_addrset (wr->as); + wr->as = ddsi_new_addrset (); + ddsrt_mutex_unlock (&wr->e.lock); + // HACK HACK HACK +} + +dds_return_t ddsi_spdp_ref_locator (struct spdp_admin *adm, const ddsi_xlocator_t *xloc) +{ + dds_return_t ret = DDS_RETCODE_OK; + union spdp_loc_union *n; + ddsrt_mutex_lock (&adm->lock); + union { + ddsrt_avl_ipath_t ip; + ddsrt_avl_dpath_t dp; + } avlpath; + if ((n = ddsrt_avl_lookup_dpath (&spdp_loc_td, &adm->aging, xloc, &avlpath.dp)) != NULL) + { + ddsrt_avl_delete_dpath (&spdp_loc_td, &adm->aging, n, &avlpath.dp); + n->live.proxypp_refc = 1; + n->live.lease_expiry_occurred = false; + ddsrt_avl_insert (&spdp_loc_td, &adm->live, n); + } + else if ((n = ddsrt_avl_lookup_ipath (&spdp_loc_td, &adm->live, xloc, &avlpath.ip)) != NULL) + { + n->live.proxypp_refc++; + } + else if ((n = ddsrt_malloc_s (sizeof (*n))) != NULL) + { + n->c.xloc = *xloc; + n->live.proxypp_refc = 1; + n->live.lease_expiry_occurred = false; + ddsrt_avl_insert_ipath (&spdp_loc_td, &adm->live, n, &avlpath.ip); + } + else + { + // fatal: we could kinda continue even in the absence of memory, but must prevent the + // proxy from attempting to decrement the refcount when it gets deleted + ret = DDS_RETCODE_OUT_OF_RESOURCES; + } + ddsrt_mutex_unlock (&adm->lock); + return ret; +} + +void ddsi_spdp_unref_locator (struct spdp_admin *adm, const ddsi_xlocator_t *xloc, bool on_lease_expiry) +{ + union spdp_loc_union *n; + ddsrt_mutex_lock (&adm->lock); + ddsrt_avl_dpath_t dp; + n = ddsrt_avl_lookup_dpath (&spdp_loc_td, &adm->live, xloc, &dp); + assert (n != NULL); + assert (n->live.proxypp_refc > 0); + if (on_lease_expiry) + n->live.lease_expiry_occurred = true; + if (--n->live.proxypp_refc == 0) + { + ddsrt_avl_delete_dpath (&spdp_loc_td, &adm->live, n, &dp); + assert (ddsrt_avl_lookup (&spdp_loc_td, &adm->aging, xloc) == NULL); + // If all proxy participants informed us they were being deleted, then we don't need to + // start aging it + // + // What if it is shortly after startup and we'd still be pinging it if there hadn't been + // a proxy participant at this address? It is unicast, so if there are others it is + // reasonable to assume they would all have discovered us at the same time (true for + // Cyclone anyway) and so there won't be anything at this locator until a new one is + // created. For that case, we can reasonably rely on that new one. + if (!n->live.lease_expiry_occurred) + ddsrt_free (n); + else + { + // FIXME: Discovery/Peers: user needs to set timeout for each locator, that should be used here for those in the initial set + // FIXME: for those learnt along the way, an appropriate configuration setting needs to be added (here 10 times/10 minutes) + // the idea is to ping at least several (10) times and keep trying for at least several (10) minutes + const dds_duration_t base_intv = adm->gv->config.spdp_interval.isdefault ? DDS_SECS (30) : adm->gv->config.spdp_interval.value; + n->aging.age = (base_intv < 1 || base_intv >= DDS_SECS (60)) ? 10 : (uint32_t) (DDS_SECS (600) / base_intv); + n->aging.tsched = ddsrt_mtime_add_duration (ddsrt_time_monotonic (), base_intv); + ddsrt_avl_insert (&spdp_loc_td, &adm->aging, n); + } + } + ddsrt_mutex_unlock (&adm->lock); +} + +ddsrt_nonnull ((2, 3)) +static void resend_spdp (struct ddsi_xpack *xp, const struct ddsi_participant *pp, const ddsi_xlocator_t *xloc) +{ + struct ddsi_writer *spdp_wr; + dds_return_t ret = ddsi_get_builtin_writer (pp, DDSI_ENTITYID_SPDP_BUILTIN_PARTICIPANT_WRITER, &spdp_wr); + if (ret != DDS_RETCODE_OK || spdp_wr == NULL) + return; + // Construct a key for looking up the SPDP sample in the WHC ... + // FIXME: this is just way too inefficient (and we do it over and over ...) + // FIXME: why not cache a pointer to it in pp? + ddsi_plist_t ps; + ddsi_plist_init_empty (&ps); + ps.present |= PP_PARTICIPANT_GUID; + ps.participant_guid = pp->e.guid; + struct ddsi_serdata * const sdkey = ddsi_serdata_from_sample (pp->e.gv->spdp_type, SDK_KEY, &ps); + ddsi_plist_fini (&ps); + // ... then borrow it, surreptitiously increment its refcount before returning it so we can unlock earlier ... + struct ddsi_whc_borrowed_sample sample; + ddsrt_mutex_lock (&spdp_wr->e.lock); + if (!ddsi_whc_borrow_sample_key (spdp_wr->whc, sdkey, &sample)) + sample.serdata = NULL; + else + { + (void) ddsi_serdata_ref (sample.serdata); + ddsi_whc_return_sample (spdp_wr->whc, &sample, false); + } + ddsrt_mutex_unlock (&spdp_wr->e.lock); + ddsi_serdata_unref (sdkey); + // ... and send it + if (sample.serdata) + { + static const ddsi_guid_prefix_t nullguidprefix; + struct ddsi_xmsg *msg; + if (ddsi_create_fragment_message (spdp_wr, sample.seq, sample.serdata, 0, UINT16_MAX, NULL, &msg, 1, UINT32_MAX) >= 0) + { + // FIXME: ddsi_create_fragment_message set the wrong destination, maybe refactor that? + ddsi_xmsg_setdst1_generic (pp->e.gv, msg, &nullguidprefix, xloc); + if (xp) + ddsi_xpack_addmsg (xp, msg, 0); + else + ddsi_qxev_msg (pp->e.gv->xevents, msg); + } + ddsi_serdata_unref (sample.serdata); + } +} + +ddsrt_nonnull_all +static ddsrt_mtime_t spdp_do_aging_locators (struct spdp_admin *adm, struct ddsi_xpack *xp, ddsrt_mtime_t tnow) +{ + const dds_duration_t t_coalesce = DDS_SECS (1); // aging, so low rate + const ddsrt_mtime_t t_cutoff = ddsrt_mtime_add_duration (tnow, t_coalesce); + ddsrt_mtime_t t_sched = DDSRT_MTIME_NEVER; + ddsrt_mutex_lock (&adm->lock); + struct spdp_loc_aging *n = ddsrt_avl_find_max (&spdp_loc_td, &adm->aging); + while (n != NULL) + { + struct spdp_loc_aging * const nextn = ddsrt_avl_find_succ (&spdp_loc_td, &adm->aging, n); + if (t_cutoff.v < n->tsched.v) + { + if (n->tsched.v < t_sched.v) + t_sched = n->tsched; + } + else + { + ddsrt_avl_iter_t it; + for (struct spdp_pp *ppn = ddsrt_avl_iter_first (&spdp_pp_td, &adm->pp, &it); ppn; ppn = ddsrt_avl_iter_next (&it)) + resend_spdp (xp, ppn->pp, &n->c.xloc); + // Why do we keep trying an address where there used to be one if there's no one anymore? Well, + // there might be someone else in the same situation with the cable cut ... + // + // That of course is interesting only if the disappearance of that node was detected by a lease + // expiration. If all participants behind the locator tell us they will be gone, it is a + // different situation: then we can (possibly) delete it immedialy. + // + // There is also the case of the initial set of addresses, there we (probably) want to drop the + // rate over time, but let's not do so now. + // + // So the strategy is to resend for a long time with a decreasing frequency + // default interval is 30s FIXME: tweak configurability + // exponential back-off would be the classic trick but is very rapid + // in this case, arguably, just pinging at the default frequency for + // a limited amount of time seems to make more sense + if (--n->age == 0) + { + ddsrt_avl_delete (&spdp_loc_td, &adm->aging, n); + ddsrt_free (n); + } + else + { + // FIXME: this base_intv thing needs to be done smarter, or else combined with other places + const dds_duration_t base_intv = adm->gv->config.spdp_interval.isdefault ? DDS_SECS (30) : adm->gv->config.spdp_interval.value; + n->tsched = ddsrt_mtime_add_duration (tnow, base_intv); + if (n->tsched.v < t_sched.v) + t_sched = n->tsched; + } + } + n = nextn; + } + ddsrt_mutex_unlock (&adm->lock); + return t_sched; +} + +// unlocked access, but that's ok because the lease duration can't be changed +// and therefore it could even be marked pure +ddsrt_nonnull_all +static dds_duration_t spdp_intv (const struct ddsi_participant *pp) +{ + // FIXME: can easily cache this in pp + if (!pp->e.gv->config.spdp_interval.isdefault) + return pp->e.gv->config.spdp_interval.value; + else + { + // Default interval is 80% of the lease duration with a bit of fiddling around the + // edges (similar to PMD), and with an upper limit + const dds_duration_t mindelta = DDS_MSECS (10); + const dds_duration_t ldur = pp->plist->qos.liveliness.lease_duration; + dds_duration_t intv; + if (ldur < 5 * mindelta / 4) + intv = mindelta; + else if (ldur < DDS_SECS (10)) + intv = 4 * ldur / 5; + else + intv = ldur - DDS_SECS (2); + // Historical maximum interval is 30s, stick to that + if (intv > DDS_SECS (30)) + intv = DDS_SECS (30); + return intv; + } +} + +ddsrt_nonnull_all +static ddsrt_mtime_t spdp_do_live_locators (struct spdp_admin *adm, struct ddsi_xpack *xp, ddsrt_mtime_t tnow) +{ + const dds_duration_t t_coalesce = DDS_MSECS (100); // let's be a bit more precise than for aging + const ddsrt_mtime_t t_cutoff = ddsrt_mtime_add_duration (tnow, t_coalesce); + // Send SPDP messages first, then sort out the new times for sending SPDP messages + // this is because we really want to order them by destination address, because that + // way we can combine the SPDP messages into larger RTPS messages + ddsrt_mutex_lock (&adm->lock); + ddsrt_avl_iter_t loc_it, pp_it; + for (struct spdp_loc_live *n = ddsrt_avl_iter_first (&spdp_loc_td, &adm->live, &loc_it); n; n = ddsrt_avl_iter_next (&loc_it)) + for (struct spdp_pp *ppn = ddsrt_avl_iter_first (&spdp_pp_td, &adm->pp, &pp_it); ppn; ppn = ddsrt_avl_iter_next (&pp_it)) + if (t_cutoff.v >= ppn->tsched.v) + resend_spdp (xp, ppn->pp, &n->c.xloc); + // Update schedule + ddsrt_mtime_t t_sched = DDSRT_MTIME_NEVER; + for (struct spdp_pp *ppn = ddsrt_avl_iter_first (&spdp_pp_td, &adm->pp, &pp_it); ppn; ppn = ddsrt_avl_iter_next (&pp_it)) + { + if (t_cutoff.v >= ppn->tsched.v) + ppn->tsched = ddsrt_mtime_add_duration (tnow, spdp_intv (ppn->pp)); + if (ppn->tsched.v < t_sched.v) + t_sched = ppn->tsched; + } + ddsrt_mutex_unlock (&adm->lock); + return t_sched; +} + +void ddsi_spdp_handle_aging_locators_xevent_cb (struct ddsi_domaingv *gv, struct ddsi_xevent *xev, struct ddsi_xpack *xp, void *varg, ddsrt_mtime_t tnow) +{ + (void) varg; + const ddsrt_mtime_t t_sched = spdp_do_aging_locators (gv->spdp_schedule, xp, tnow); + ddsi_resched_xevent_if_earlier (xev, t_sched); +} + +void ddsi_spdp_handle_live_locators_xevent_cb (struct ddsi_domaingv *gv, struct ddsi_xevent *xev, struct ddsi_xpack *xp, void *varg, ddsrt_mtime_t tnow) +{ + (void) varg; + const ddsrt_mtime_t t_sched = spdp_do_live_locators (gv->spdp_schedule, xp, tnow); + ddsi_resched_xevent_if_earlier (xev, t_sched); +} + +void ddsi_spdp_force_republish (struct spdp_admin *adm, const struct ddsi_participant *pp) +{ + // Used for: initial publication, QoS update, dispose+unregister, faster rediscovery in + // oneliner in implementation of "hearing!" + // + // It seems there's no need to update the scheduled next publication for any of these cases. + // + // This gets called from various threads and not all of them have a message packer at hand. + // Passing a null pointer for "xpack" hands it off to the tev thread for publication. + ddsrt_mutex_lock (&adm->lock); + ddsrt_avl_iter_t loc_it; + for (struct spdp_loc_live *n = ddsrt_avl_iter_first (&spdp_loc_td, &adm->live, &loc_it); n; n = ddsrt_avl_iter_next (&loc_it)) + resend_spdp (NULL, pp, &n->c.xloc); + for (struct spdp_loc_aging *n = ddsrt_avl_iter_first (&spdp_loc_td, &adm->aging, &loc_it); n; n = ddsrt_avl_iter_next (&loc_it)) + resend_spdp (NULL, pp, &n->c.xloc); + ddsrt_mutex_unlock (&adm->lock); +} + diff --git a/src/core/ddsi/src/ddsi_xmsg.c b/src/core/ddsi/src/ddsi_xmsg.c index fa2a31f30d..897dcbfedd 100644 --- a/src/core/ddsi/src/ddsi_xmsg.c +++ b/src/core/ddsi/src/ddsi_xmsg.c @@ -708,6 +708,25 @@ void ddsi_xmsg_setdst1 (struct ddsi_domaingv *gv, struct ddsi_xmsg *m, const dds ddsi_xmsg_setdst1_common (gv, m, gp); } +void ddsi_xmsg_setdst1_generic (struct ddsi_domaingv *gv, struct ddsi_xmsg *m, const ddsi_guid_prefix_t *gp, const ddsi_xlocator_t *loc) +{ + switch (m->dstmode) + { + case NN_XMSG_DST_UNSET: + case NN_XMSG_DST_ONE: + break; + case NN_XMSG_DST_ALL: + ddsi_unref_addrset (m->dstaddr.all.as); + break; + case NN_XMSG_DST_ALL_UC: + ddsi_unref_addrset (m->dstaddr.all_uc.as); + break; + } + m->dstmode = NN_XMSG_DST_ONE; + m->dstaddr.one.loc = *loc; + ddsi_xmsg_setdst1_common (gv, m, gp); +} + bool ddsi_xmsg_getdst1_prefix (struct ddsi_xmsg *m, ddsi_guid_prefix_t *gp) { if (m->dstmode == NN_XMSG_DST_ONE) From 0685ed1c09aa003e57e82b8f5b7a0fd3510ab445 Mon Sep 17 00:00:00 2001 From: Erik Boasson Date: Fri, 3 Nov 2023 16:21:31 +0100 Subject: [PATCH 03/13] Cache SPDP sample in participant This eliminates looking up the SPDP sample in the WHC, which became rather painful with the introduction of serdata_plist, and eliminates the problem of the dispose+unregister not getting stored in the WHC (removed from the index, best-effort so not retained until ACKs come in). It also solves having to construct an address set based on the addresses in the SPDP administration and patching that into the SPDP writer, which was a horrible hack to begin with. --- src/core/ddsc/tests/test_oneliner.c | 2 +- .../ddsi/include/dds/ddsi/ddsi_participant.h | 2 + src/core/ddsi/src/ddsi__discovery_spdp.h | 13 - src/core/ddsi/src/ddsi__spdp_schedule.h | 11 +- src/core/ddsi/src/ddsi__transmit.h | 2 +- src/core/ddsi/src/ddsi_discovery_spdp.c | 185 ++------------ src/core/ddsi/src/ddsi_participant.c | 45 +++- src/core/ddsi/src/ddsi_spdp_schedule.c | 228 ++++++++---------- src/core/ddsi/src/ddsi_transmit.c | 2 +- 9 files changed, 180 insertions(+), 310 deletions(-) diff --git a/src/core/ddsc/tests/test_oneliner.c b/src/core/ddsc/tests/test_oneliner.c index 7b29f924b3..5f6d2d989c 100644 --- a/src/core/ddsc/tests/test_oneliner.c +++ b/src/core/ddsc/tests/test_oneliner.c @@ -1858,7 +1858,7 @@ static void dohearing_maybe_imm (struct oneliner_ctx *ctx, bool immediate) wait_for_cleanup (ctx, ctx->es[ent], &xprime->m_guid); ddsi_thread_state_awake (ddsi_lookup_thread_state (), &xprime->m_domain->gv); if ((pp = ddsi_entidx_lookup_participant_guid (xprime->m_domain->gv.entity_index, &xprime->m_guid)) != NULL) - ddsi_spdp_force_republish (pp->e.gv->spdp_schedule, pp); + ddsi_spdp_force_republish (pp->e.gv->spdp_schedule, pp, NULL); ddsi_thread_state_asleep (ddsi_lookup_thread_state ()); dds_entity_unpin (xprime); } diff --git a/src/core/ddsi/include/dds/ddsi/ddsi_participant.h b/src/core/ddsi/include/dds/ddsi/ddsi_participant.h index b8e4913fd0..1e17d88dae 100644 --- a/src/core/ddsi/include/dds/ddsi/ddsi_participant.h +++ b/src/core/ddsi/include/dds/ddsi/ddsi_participant.h @@ -66,6 +66,8 @@ struct ddsi_participant unsigned is_ddsi2_pp: 1; /* true for the "federation leader", the ddsi2 participant itself in OSPL; FIXME: probably should use this for broker mode as well ... */ uint32_t flags; /* flags used when creating this participant */ struct ddsi_plist *plist; /* settings/QoS for this participant */ + struct ddsi_serdata *spdp_serdata; /* SPDP message: we no longer have a writer for it */ + ddsi_seqno_t spdp_seqno; /* sequence number of spdp_serdata; FIXME: move seqno into serdata and delete this */ struct ddsi_xevent *pmd_update_xevent; /* timed event for periodically publishing ParticipantMessageData */ ddsi_locator_t m_locator; /* this is always a unicast address, it is set if it is in the many unicast mode */ struct ddsi_tran_conn * m_conn; /* this is connection to m_locator, if it is set, this is used */ diff --git a/src/core/ddsi/src/ddsi__discovery_spdp.h b/src/core/ddsi/src/ddsi__discovery_spdp.h index 4368a053ca..058e5a6f4f 100644 --- a/src/core/ddsi/src/ddsi__discovery_spdp.h +++ b/src/core/ddsi/src/ddsi__discovery_spdp.h @@ -41,19 +41,6 @@ struct ddsi_participant_builtin_topic_data_locators { void ddsi_get_participant_builtin_topic_data (const struct ddsi_participant *pp, ddsi_plist_t *dst, struct ddsi_participant_builtin_topic_data_locators *locs) ddsrt_nonnull_all; -/** @component discovery */ -int ddsi_spdp_write (struct ddsi_participant *pp); - -/** @component discovery */ -int ddsi_spdp_dispose_unregister (struct ddsi_participant *pp); - -struct ddsi_spdp_broadcast_xevent_cb_arg { - ddsi_guid_t pp_guid; -}; - -/** @component discovery */ -void ddsi_spdp_broadcast_xevent_cb (struct ddsi_domaingv *gv, struct ddsi_xevent *ev, UNUSED_ARG (struct ddsi_xpack *xp), void *varg, ddsrt_mtime_t tnow); - /** @component discovery */ void ddsi_handle_spdp (const struct ddsi_receiver_state *rst, ddsi_entityid_t pwr_entityid, ddsi_seqno_t seq, const struct ddsi_serdata *serdata); diff --git a/src/core/ddsi/src/ddsi__spdp_schedule.h b/src/core/ddsi/src/ddsi__spdp_schedule.h index 6ae1d1f99a..2a14065739 100644 --- a/src/core/ddsi/src/ddsi__spdp_schedule.h +++ b/src/core/ddsi/src/ddsi__spdp_schedule.h @@ -18,7 +18,8 @@ extern "C" { #endif -struct spdp_admin; +struct spdp_admin; // ddsi_participant::e.lock gets locked while spdp_admin::lock is held +struct ddsi_proxy_reader; struct spdp_admin *ddsi_spdp_scheduler_new (struct ddsi_domaingv *gv) ddsrt_nonnull_all ddsrt_attribute_warn_unused_result; @@ -26,10 +27,12 @@ struct spdp_admin *ddsi_spdp_scheduler_new (struct ddsi_domaingv *gv) void ddsi_spdp_scheduler_delete (struct spdp_admin *adm) ddsrt_nonnull_all; +// Locks `pp->e.lock` dds_return_t ddsi_spdp_register_participant (struct spdp_admin *adm, const struct ddsi_participant *pp) ddsrt_nonnull_all ddsrt_attribute_warn_unused_result; // Not an error if `pp` is not registered +// Locks `pp->e.lock` void ddsi_spdp_unregister_participant (struct spdp_admin *adm, const struct ddsi_participant *pp) ddsrt_nonnull_all; @@ -45,8 +48,10 @@ void ddsi_spdp_handle_aging_locators_xevent_cb (struct ddsi_domaingv *gv, struct void ddsi_spdp_handle_live_locators_xevent_cb (struct ddsi_domaingv *gv, struct ddsi_xevent *xev, struct ddsi_xpack *xp, void *varg, ddsrt_mtime_t tnow) ddsrt_nonnull ((1, 2, 3)); -void ddsi_spdp_force_republish (struct spdp_admin *adm, const struct ddsi_participant *pp) - ddsrt_nonnull_all; +// Locks `pp->e.lock` +// returns false iff there is no SPDP sample yet +bool ddsi_spdp_force_republish (struct spdp_admin *adm, const struct ddsi_participant *pp, const struct ddsi_proxy_reader *prd) + ddsrt_nonnull ((1, 2)); #if defined (__cplusplus) } diff --git a/src/core/ddsi/src/ddsi__transmit.h b/src/core/ddsi/src/ddsi__transmit.h index 8f2de97fa9..279bddc287 100644 --- a/src/core/ddsi/src/ddsi__transmit.h +++ b/src/core/ddsi/src/ddsi__transmit.h @@ -50,7 +50,7 @@ int ddsi_write_and_fini_plist (struct ddsi_writer *wr, ddsi_plist_t *ps, bool al /* When calling the following functions, wr->lock must be held */ /** @component outgoing_rtps */ -dds_return_t ddsi_create_fragment_message (struct ddsi_writer *wr, ddsi_seqno_t seq, struct ddsi_serdata *serdata, uint32_t fragnum, uint16_t nfrags, struct ddsi_proxy_reader *prd,struct ddsi_xmsg **msg, int isnew, uint32_t advertised_fragnum); +dds_return_t ddsi_create_fragment_message (struct ddsi_writer *wr, ddsi_seqno_t seq, struct ddsi_serdata *serdata, uint32_t fragnum, uint16_t nfrags, const struct ddsi_proxy_reader *prd,struct ddsi_xmsg **msg, int isnew, uint32_t advertised_fragnum); /** @component outgoing_rtps */ int ddsi_enqueue_sample_wrlock_held (struct ddsi_writer *wr, ddsi_seqno_t seq, struct ddsi_serdata *serdata, struct ddsi_proxy_reader *prd, int isnew); diff --git a/src/core/ddsi/src/ddsi_discovery_spdp.c b/src/core/ddsi/src/ddsi_discovery_spdp.c index 4257b59b71..b4d255ad76 100644 --- a/src/core/ddsi/src/ddsi_discovery_spdp.c +++ b/src/core/ddsi/src/ddsi_discovery_spdp.c @@ -33,11 +33,10 @@ #include "ddsi__transmit.h" #include "ddsi__lease.h" #include "ddsi__xqos.h" +#include "ddsi__spdp_schedule.h" static bool get_pp_and_spdp_wr (struct ddsi_domaingv *gv, const ddsi_guid_t *pp_guid, struct ddsi_participant **pp, struct ddsi_writer **spdp_wr) ddsrt_nonnull_all; -static bool resend_spdp_sample_by_guid_key (struct ddsi_writer *wr, const ddsi_guid_t *guid, struct ddsi_proxy_reader *prd) - ddsrt_nonnull ((1, 2)); struct locators_builder { ddsi_locators_t *dst; @@ -217,106 +216,12 @@ void ddsi_get_participant_builtin_topic_data (const struct ddsi_participant *pp, #endif } -int ddsi_spdp_write (struct ddsi_participant *pp) -{ - struct ddsi_writer *wr; - ddsi_plist_t ps; - struct ddsi_participant_builtin_topic_data_locators locs; - - if (pp->e.onlylocal) { - /* This topic is only locally available. */ - return 0; - } - - dds_return_t ret = ddsi_get_builtin_writer (pp, DDSI_ENTITYID_SPDP_BUILTIN_PARTICIPANT_WRITER, &wr); - if (ret == DDS_RETCODE_OK && wr == NULL) - return 0; - - ETRACE (pp, "ddsi_spdp_write("PGUIDFMT")\n", PGUID (pp->e.guid)); - if (ret != DDS_RETCODE_OK) - { - ETRACE (pp, "ddsi_spdp_write("PGUIDFMT") - builtin participant writer not found\n", PGUID (pp->e.guid)); - return 0; - } - assert (wr != NULL); - ddsi_get_participant_builtin_topic_data (pp, &ps, &locs); - return ddsi_write_and_fini_plist (wr, &ps, true); -} - -static int ddsi_spdp_dispose_unregister_with_wr (struct ddsi_participant *pp, unsigned entityid) -{ - ddsi_plist_t ps; - struct ddsi_writer *wr; - - dds_return_t ret = ddsi_get_builtin_writer (pp, entityid, &wr); - if (ret == DDS_RETCODE_OK && wr == NULL) - return 0; - else if (ret != DDS_RETCODE_OK) - { - ETRACE (pp, "ddsi_spdp_dispose_unregister("PGUIDFMT") - builtin participant %s writer not found\n", - PGUID (pp->e.guid), entityid == DDSI_ENTITYID_SPDP_RELIABLE_BUILTIN_PARTICIPANT_SECURE_WRITER ? "secure" : ""); - return 0; - } - assert (wr != NULL); - ddsi_plist_init_empty (&ps); - ps.present |= PP_PARTICIPANT_GUID; - ps.participant_guid = pp->e.guid; - return ddsi_write_and_fini_plist (wr, &ps, false); -} - -int ddsi_spdp_dispose_unregister (struct ddsi_participant *pp) -{ - /* - * When disposing a participant, it should be announced on both the - * non-secure and secure writers. - * The receiver will decide from which writer it accepts the dispose. - */ - int ret = ddsi_spdp_dispose_unregister_with_wr(pp, DDSI_ENTITYID_SPDP_BUILTIN_PARTICIPANT_WRITER); - if ((ret > 0) && ddsi_omg_participant_is_secure(pp)) - { - ret = ddsi_spdp_dispose_unregister_with_wr(pp, DDSI_ENTITYID_SPDP_RELIABLE_BUILTIN_PARTICIPANT_SECURE_WRITER); - } - return ret; -} - struct ddsi_spdp_directed_xevent_cb_arg { ddsi_guid_t pp_guid; int nrepeats; ddsi_guid_prefix_t dest_proxypp_guid_prefix; }; -static bool resend_spdp_sample_by_guid_key (struct ddsi_writer *wr, const ddsi_guid_t *guid, struct ddsi_proxy_reader *prd) -{ - /* Look up data in (transient-local) WHC by key value -- FIXME: clearly - a slightly more efficient and elegant way of looking up the key value - is to be preferred */ - struct ddsi_domaingv *gv = wr->e.gv; - bool sample_found; - ddsi_plist_t ps; - ddsi_plist_init_empty (&ps); - ps.present |= PP_PARTICIPANT_GUID; - ps.participant_guid = *guid; - struct ddsi_serdata *sd = ddsi_serdata_from_sample (gv->spdp_type, SDK_KEY, &ps); - ddsi_plist_fini (&ps); - struct ddsi_whc_borrowed_sample sample; - - ddsrt_mutex_lock (&wr->e.lock); - sample_found = ddsi_whc_borrow_sample_key (wr->whc, sd, &sample); - if (sample_found) - { - /* Claiming it is new rather than a retransmit so that the rexmit - limiting won't kick in. It is best-effort and therefore the - updating of the last transmitted sequence number won't take - place anyway. Nor is it necessary to fiddle with heartbeat - control stuff. */ - ddsi_enqueue_spdp_sample_wrlock_held (wr, sample.seq, sample.serdata, prd); - ddsi_whc_return_sample(wr->whc, &sample, false); - } - ddsrt_mutex_unlock (&wr->e.lock); - ddsi_serdata_unref (sd); - return sample_found; -} - static bool get_pp_and_spdp_wr (struct ddsi_domaingv *gv, const ddsi_guid_t *pp_guid, struct ddsi_participant **pp, struct ddsi_writer **spdp_wr) { if ((*pp = ddsi_entidx_lookup_participant_guid (gv->entity_index, pp_guid)) == NULL) @@ -349,15 +254,23 @@ static void ddsi_spdp_directed_xevent_cb (struct ddsi_domaingv *gv, struct ddsi_ const ddsi_guid_t guid = { .prefix = arg->dest_proxypp_guid_prefix, .entityid = { .u = DDSI_ENTITYID_SPDP_BUILTIN_PARTICIPANT_READER } }; struct ddsi_proxy_reader *prd; if ((prd = ddsi_entidx_lookup_proxy_reader_guid (gv->entity_index, &guid)) == NULL) + { GVTRACE ("xmit spdp: no proxy reader "PGUIDFMT"\n", PGUID (guid)); - else if (!resend_spdp_sample_by_guid_key (spdp_wr, &arg->pp_guid, prd)) - GVTRACE ("xmit spdp: suppressing early spdp response from "PGUIDFMT" to %"PRIx32":%"PRIx32":%"PRIx32":%x\n", - PGUID (pp->e.guid), PGUIDPREFIX (arg->dest_proxypp_guid_prefix), DDSI_ENTITYID_PARTICIPANT); - - // Directed events are used to send SPDP packets to newly discovered peers, and used just once - if (--arg->nrepeats == 0 || - pp->plist->qos.liveliness.lease_duration < DDS_SECS (1) || - (!gv->config.spdp_interval.isdefault && gv->config.spdp_interval.value < DDS_SECS (1))) + ddsi_delete_xevent (ev); + } + else if (!ddsi_spdp_force_republish (gv->spdp_schedule, pp, prd)) + { + // it is just a local race, so a few milliseconds should be plenty + ddsrt_mtime_t tnext = ddsrt_mtime_add_duration (tnow, DDS_MSECS (10)); + GVTRACE ("xmit spdp "PGUIDFMT" to %"PRIx32":%"PRIx32":%"PRIx32":%x too early (resched %gs)\n", + PGUID (pp->e.guid), + PGUIDPREFIX (arg->dest_proxypp_guid_prefix), DDSI_ENTITYID_SPDP_BUILTIN_PARTICIPANT_READER, + (double)(tnext.v - tnow.v) / 1e9); + (void) ddsi_resched_xevent_if_earlier (ev, tnext); + } + else if (--arg->nrepeats == 0 || + pp->plist->qos.liveliness.lease_duration < DDS_SECS (1) || + (!gv->config.spdp_interval.isdefault && gv->config.spdp_interval.value < DDS_SECS (1))) { ddsi_delete_xevent (ev); } @@ -372,70 +285,6 @@ static void ddsi_spdp_directed_xevent_cb (struct ddsi_domaingv *gv, struct ddsi_ } } -static void resched_spdp_broadcast (struct ddsi_xevent *ev, struct ddsi_participant *pp, ddsrt_mtime_t tnow) -{ - struct ddsi_domaingv * const gv = pp->e.gv; - const dds_duration_t mindelta = DDS_MSECS (10); - ddsrt_mtime_t tnext; - dds_duration_t intv; - - if (!gv->config.spdp_interval.isdefault) - intv = gv->config.spdp_interval.value; - else - { - // Default interval is 80% of the lease duration with a bit of fiddling around the - // edges (similar to PMD), and with an upper limit - const dds_duration_t ldur = pp->plist->qos.liveliness.lease_duration; - if (ldur < 5 * mindelta / 4) - intv = mindelta; - else if (ldur < DDS_SECS (10)) - intv = 4 * ldur / 5; - else - intv = ldur - DDS_SECS (2); - // Historical maximum interval is 30s, stick to that - if (intv > DDS_SECS (30)) - intv = DDS_SECS (30); - } - - tnext = ddsrt_mtime_add_duration (tnow, intv); - GVTRACE ("xmit spdp "PGUIDFMT" to %"PRIx32":%"PRIx32":%"PRIx32":%x (resched %gs)\n", - PGUID (pp->e.guid), - 0,0,0, DDSI_ENTITYID_SPDP_BUILTIN_PARTICIPANT_READER, - (double)(tnext.v - tnow.v) / 1e9); - (void) ddsi_resched_xevent_if_earlier (ev, tnext); -} - -void ddsi_spdp_broadcast_xevent_cb (struct ddsi_domaingv *gv, struct ddsi_xevent *ev, UNUSED_ARG (struct ddsi_xpack *xp), void *varg, ddsrt_mtime_t tnow) -{ - /* Like the writer pointer in the heartbeat event, the participant pointer in the spdp event is assumed valid. */ - struct ddsi_spdp_broadcast_xevent_cb_arg * const arg = varg; - struct ddsi_participant *pp; - struct ddsi_writer *spdp_wr; - - if (!get_pp_and_spdp_wr (gv, &arg->pp_guid, &pp, &spdp_wr)) - return; - - if (!resend_spdp_sample_by_guid_key (spdp_wr, &arg->pp_guid, NULL)) - { -#ifndef NDEBUG - /* If undirected, it is pp->spdp_xevent, and that one must never - run into an empty WHC unless it is already marked for deletion. - - If directed, it may happen in response to an SPDP packet during - creation of the participant. This is because pp is inserted in - the hash table quite early on, which, in turn, is because it - needs to be visible for creating its builtin endpoints. But in - this case, the initial broadcast of the SPDP packet of pp will - happen shortly. */ - ddsrt_mutex_lock (&pp->e.lock); - assert (ddsi_delete_xevent_pending (ev)); - ddsrt_mutex_unlock (&pp->e.lock); -#endif - } - - resched_spdp_broadcast (ev, pp, tnow); -} - static unsigned pseudo_random_delay (const ddsi_guid_t *x, const ddsi_guid_t *y, ddsrt_mtime_t tnow) { /* You know, an ordinary random generator would be even better, but diff --git a/src/core/ddsi/src/ddsi_participant.c b/src/core/ddsi/src/ddsi_participant.c index fa8af5062a..152b2e4aca 100644 --- a/src/core/ddsi/src/ddsi_participant.c +++ b/src/core/ddsi/src/ddsi_participant.c @@ -609,6 +609,10 @@ struct ddsi_participant *ddsi_ref_participant (struct ddsi_participant *pp, cons return pp; } +// FIXME: do this in the proper location +ddsrt_nonnull_all ddsrt_attribute_warn_unused_result +static int update_participant_spdp_sample (struct ddsi_participant *pp, bool isalive); + void ddsi_unref_participant (struct ddsi_participant *pp, const struct ddsi_guid *guid_of_refing_entity) { static const unsigned builtin_endpoints_tab[] = { @@ -684,7 +688,11 @@ void ddsi_unref_participant (struct ddsi_participant *pp, const struct ddsi_guid if (pp->pmd_update_xevent) ddsi_delete_xevent (pp->pmd_update_xevent); + + // FIXME: locking + update_participant_spdp_sample (pp, false); ddsi_spdp_unregister_participant (gv->spdp_schedule, pp); + ddsi_serdata_unref (pp->spdp_serdata); /* If this happens to be the privileged_pp, clear it */ ddsrt_mutex_lock (&gv->privileged_pp_lock); @@ -755,6 +763,36 @@ void ddsi_unref_participant (struct ddsi_participant *pp, const struct ddsi_guid } } +ddsrt_nonnull_all ddsrt_attribute_warn_unused_result +static int update_participant_spdp_sample_locked (struct ddsi_participant *pp, bool isalive) +{ + if (pp->e.onlylocal) + return 0; + + ddsi_plist_t ps; + struct ddsi_participant_builtin_topic_data_locators locs; + ddsi_get_participant_builtin_topic_data (pp, &ps, &locs); + struct ddsi_serdata * const serdata = ddsi_serdata_from_sample (pp->e.gv->spdp_type, isalive ? SDK_DATA : SDK_KEY, &ps); + ddsi_plist_fini (&ps); + serdata->statusinfo = isalive ? 0 : (DDSI_STATUSINFO_DISPOSE | DDSI_STATUSINFO_UNREGISTER); + serdata->timestamp = ddsrt_time_wallclock (); + + ++pp->spdp_seqno; + if (pp->spdp_serdata) + ddsi_serdata_unref (pp->spdp_serdata); + pp->spdp_serdata = serdata; + return 0; +} + +ddsrt_nonnull_all ddsrt_attribute_warn_unused_result +static int update_participant_spdp_sample (struct ddsi_participant *pp, bool isalive) +{ + ddsrt_mutex_lock (&pp->e.lock); + const int ret = update_participant_spdp_sample_locked (pp, isalive); + ddsrt_mutex_unlock (&pp->e.lock); + return ret; +} + dds_return_t ddsi_new_participant (ddsi_guid_t *ppguid, struct ddsi_domaingv *gv, unsigned flags, const ddsi_plist_t *plist) { struct ddsi_participant *pp; @@ -833,6 +871,8 @@ dds_return_t ddsi_new_participant (ddsi_guid_t *ppguid, struct ddsi_domaingv *gv pp->plist = ddsrt_malloc (sizeof (*pp->plist)); ddsi_plist_copy (pp->plist, plist); ddsi_xqos_mergein_missing(&pp->plist->qos, &gv->default_local_xqos_pp, ~(uint64_t)0); + pp->spdp_seqno = 0; + pp->spdp_serdata = NULL; #ifdef DDS_HAS_SECURITY pp->sec_attr = NULL; @@ -957,6 +997,7 @@ dds_return_t ddsi_new_participant (ddsi_guid_t *ppguid, struct ddsi_domaingv *gv if (!(flags & RTPS_PF_NO_BUILTIN_WRITERS) || !(flags & RTPS_PF_NO_PRIVILEGED_PP)) { + update_participant_spdp_sample (pp, true); if (ddsi_spdp_register_participant (gv->spdp_schedule, pp) != 0) abort (); // FIXME @@ -1004,9 +1045,9 @@ void ddsi_update_participant_plist (struct ddsi_participant *pp, const ddsi_plis { ddsrt_mutex_lock (&pp->e.lock); if (ddsi_update_qos_locked (&pp->e, &pp->plist->qos, &plist->qos, ddsrt_time_wallclock ())) - ddsi_spdp_write (pp); // FIXME: this takes care of generating new SPDP message; it seems to not handle the secure one + update_participant_spdp_sample_locked (pp, true); ddsrt_mutex_unlock (&pp->e.lock); - ddsi_spdp_force_republish (pp->e.gv->spdp_schedule, pp); + ddsi_spdp_force_republish (pp->e.gv->spdp_schedule, pp, NULL); } static void gc_delete_participant (struct ddsi_gcreq *gcreq) diff --git a/src/core/ddsi/src/ddsi_spdp_schedule.c b/src/core/ddsi/src/ddsi_spdp_schedule.c index 02f4f7e8c9..351e476413 100644 --- a/src/core/ddsi/src/ddsi_spdp_schedule.c +++ b/src/core/ddsi/src/ddsi_spdp_schedule.c @@ -190,7 +190,7 @@ struct spdp_admin *ddsi_spdp_scheduler_new (struct ddsi_domaingv *gv) ddsrt_avl_init (&spdp_loc_td, &adm->aging); ddsrt_avl_init (&spdp_loc_td, &adm->live); ddsrt_avl_init (&spdp_pp_td, &adm->pp); - + struct add_as_disc_helper_arg arg = { .adm = adm, .all_ok = true }; ddsi_addrset_forall (gv->as_disc, add_as_disc_helper, &arg); if (!arg.all_ok) @@ -202,7 +202,7 @@ struct spdp_admin *ddsi_spdp_scheduler_new (struct ddsi_domaingv *gv) ddsrt_free (adm); return NULL; } - + // from here on we potentially have multiple threads messing with `adm` const ddsrt_mtime_t t_sched = ddsrt_mtime_add_duration (ddsrt_time_monotonic (), DDS_MSECS (0)); adm->aging_xev = ddsi_qxev_callback (gv->xevents, t_sched, ddsi_spdp_handle_aging_locators_xevent_cb, NULL, 0, true); @@ -224,51 +224,21 @@ void ddsi_spdp_scheduler_delete (struct spdp_admin *adm) ddsrt_free (adm); } -#ifndef NDEBUG -static bool spdp_message_exists (const struct ddsi_participant *pp) -{ - struct ddsi_writer *spdp_wr; - dds_return_t ret = ddsi_get_builtin_writer (pp, DDSI_ENTITYID_SPDP_BUILTIN_PARTICIPANT_WRITER, &spdp_wr); - if (ret != DDS_RETCODE_OK || spdp_wr == NULL) - return true; - else - { - // Construct a key for looking up the SPDP sample in the WHC ... - ddsi_plist_t ps; - ddsi_plist_init_empty (&ps); - ps.present |= PP_PARTICIPANT_GUID; - ps.participant_guid = pp->e.guid; - struct ddsi_serdata * const sdkey = ddsi_serdata_from_sample (pp->e.gv->spdp_type, SDK_KEY, &ps); - ddsi_plist_fini (&ps); - // ... then borrow it, surreptitiously increment its refcount before returning it so we can unlock earlier ... - struct ddsi_whc_borrowed_sample sample; - ddsrt_mutex_lock (&spdp_wr->e.lock); - const bool exists = ddsi_whc_borrow_sample_key (spdp_wr->whc, sdkey, &sample); - if (exists) - ddsi_whc_return_sample (spdp_wr->whc, &sample, false); - ddsrt_mutex_unlock (&spdp_wr->e.lock); - return exists; - } -} -#endif - dds_return_t ddsi_spdp_register_participant (struct spdp_admin *adm, const struct ddsi_participant *pp) { - /* SPDP periodic broadcast uses the retransmit path, so the initial - publication must be done differently. Must be later than making - the participant globally visible, or the SPDP processing won't - recognise the participant as a local one. */ - if (ddsi_spdp_write ((struct ddsi_participant *) pp) < 0) - return DDS_RETCODE_OK; // FIXME: Need to check why it can fail; this is what it used to do - - ddsi_spdp_force_republish (adm, pp); +#ifndef NDEBUG + ddsrt_mutex_lock ((ddsrt_mutex_t *) &pp->e.lock); + assert (pp->spdp_seqno == 1); + assert (pp->spdp_serdata); + assert (pp->spdp_serdata->statusinfo == 0); + ddsrt_mutex_unlock ((ddsrt_mutex_t *) &pp->e.lock); +#endif + ddsi_spdp_force_republish (adm, pp, NULL); // FIXME: let's just cache the serdata in the participant and do all the publishing in this file // FIXME: what about the secure writer? It overwrites its as with as_disc on creation, but it is reliable and therefore the matching code will recompute it, so I don't think there is any need to do anything for that one (except possibly making sure it does get updated on a QoS change) - - // FIXME: why would I register a participant if its SPDP writer cannot be found? Maybe it can change ... - assert (spdp_message_exists (pp)); + ddsrt_mutex_lock (&adm->lock); ddsrt_avl_ipath_t ip; const struct spdp_pp template = { .pp = pp }; @@ -293,25 +263,22 @@ dds_return_t ddsi_spdp_register_participant (struct spdp_admin *adm, const struc } } -static struct ddsi_addrset *make_spdp_addrset (struct spdp_admin *adm) -{ - struct ddsi_addrset *as = ddsi_new_addrset (); - ddsrt_mutex_lock (&adm->lock); - ddsrt_avl_iter_t loc_it; - for (struct spdp_loc_live *n = ddsrt_avl_iter_first (&spdp_loc_td, &adm->live, &loc_it); n; n = ddsrt_avl_iter_next (&loc_it)) - ddsi_add_xlocator_to_addrset (adm->gv, as, &n->c.xloc); - for (struct spdp_loc_aging *n = ddsrt_avl_iter_first (&spdp_loc_td, &adm->aging, &loc_it); n; n = ddsrt_avl_iter_next (&loc_it)) - ddsi_add_xlocator_to_addrset (adm->gv, as, &n->c.xloc); - ddsrt_mutex_unlock (&adm->lock); - return as; -} - void ddsi_spdp_unregister_participant (struct spdp_admin *adm, const struct ddsi_participant *pp) { +#ifndef NDEBUG + ddsrt_mutex_lock ((ddsrt_mutex_t *) &pp->e.lock); + assert (pp->spdp_seqno > 1); + assert (pp->spdp_serdata); + assert (pp->spdp_serdata->statusinfo == (DDSI_STATUSINFO_DISPOSE | DDSI_STATUSINFO_UNREGISTER)); + ddsrt_mutex_unlock ((ddsrt_mutex_t *) &pp->e.lock); +#endif + ddsi_spdp_force_republish (adm, pp, NULL); + ddsrt_mutex_lock (&adm->lock); ddsrt_avl_dpath_t dp; const struct spdp_pp template = { .pp = pp }; struct spdp_pp *ppn; + // FIXME: do I really have to allow for ppn == NULL? if ((ppn = ddsrt_avl_lookup_dpath (&spdp_pp_td, &adm->pp, &template, &dp)) != NULL) { @@ -320,29 +287,6 @@ void ddsi_spdp_unregister_participant (struct spdp_admin *adm, const struct ddsi ddsrt_free (ppn); } ddsrt_mutex_unlock (&adm->lock); - - /* SPDP relies on the WHC, but dispose-unregister will empty - it. The event handler verifies the event has already been - scheduled for deletion when it runs into an empty WHC */ - // FIXME: store serdata in pp and this hack can go away - // HACK HACK HACK - struct ddsi_addrset *as = make_spdp_addrset (adm); - const ddsi_guid_t subguid = { .prefix = pp->e.guid.prefix, .entityid = { .u = DDSI_ENTITYID_SPDP_BUILTIN_PARTICIPANT_WRITER } }; - struct ddsi_writer *wr = ddsi_entidx_lookup_writer_guid (pp->e.gv->entity_index, &subguid); - assert (wr != NULL); - ddsrt_mutex_lock (&wr->e.lock); - ddsi_unref_addrset (wr->as); - wr->as = as; - ddsrt_mutex_unlock (&wr->e.lock); - // HACK HACK HACK - // FIXME: this currently also handles the secure SPDP message, but that may change - ddsi_spdp_dispose_unregister ((struct ddsi_participant *) pp); - // HACK HACK HACK - ddsrt_mutex_lock (&wr->e.lock); - ddsi_unref_addrset (wr->as); - wr->as = ddsi_new_addrset (); - ddsrt_mutex_unlock (&wr->e.lock); - // HACK HACK HACK } dds_return_t ddsi_spdp_ref_locator (struct spdp_admin *adm, const ddsi_xlocator_t *xloc) @@ -420,50 +364,68 @@ void ddsi_spdp_unref_locator (struct spdp_admin *adm, const ddsi_xlocator_t *xlo ddsrt_mutex_unlock (&adm->lock); } +enum resend_spdp_dst_kind { + RSDK_LOCATOR, + RSDK_PROXY_READER +}; + +struct resend_spdp_dst { + enum resend_spdp_dst_kind kind; + union { + const ddsi_xlocator_t *xloc; + const struct ddsi_proxy_reader *prd; + } u; +}; + ddsrt_nonnull ((2, 3)) -static void resend_spdp (struct ddsi_xpack *xp, const struct ddsi_participant *pp, const ddsi_xlocator_t *xloc) +static void resend_spdp (struct ddsi_xpack *xp, const struct ddsi_participant *pp, const struct resend_spdp_dst *dst) { + // SPDP writer serves no purpose other than providing some things to ddsi_create_fragment_message, and for + // SPDP most of that information is in the uninteresting state (because it is best-effort and can't be + // fragmented) but passing all that info in a different way is also unpleasant. struct ddsi_writer *spdp_wr; dds_return_t ret = ddsi_get_builtin_writer (pp, DDSI_ENTITYID_SPDP_BUILTIN_PARTICIPANT_WRITER, &spdp_wr); if (ret != DDS_RETCODE_OK || spdp_wr == NULL) + { + ETRACE (pp, "ddsi_spdp_write("PGUIDFMT") - builtin participant writer not found\n", PGUID (pp->e.guid)); return; - // Construct a key for looking up the SPDP sample in the WHC ... - // FIXME: this is just way too inefficient (and we do it over and over ...) - // FIXME: why not cache a pointer to it in pp? - ddsi_plist_t ps; - ddsi_plist_init_empty (&ps); - ps.present |= PP_PARTICIPANT_GUID; - ps.participant_guid = pp->e.guid; - struct ddsi_serdata * const sdkey = ddsi_serdata_from_sample (pp->e.gv->spdp_type, SDK_KEY, &ps); - ddsi_plist_fini (&ps); - // ... then borrow it, surreptitiously increment its refcount before returning it so we can unlock earlier ... - struct ddsi_whc_borrowed_sample sample; - ddsrt_mutex_lock (&spdp_wr->e.lock); - if (!ddsi_whc_borrow_sample_key (spdp_wr->whc, sdkey, &sample)) - sample.serdata = NULL; - else + } + + const ddsi_xlocator_t *xloc = NULL; + const struct ddsi_proxy_reader *prd = NULL; + switch (dst->kind) { - (void) ddsi_serdata_ref (sample.serdata); - ddsi_whc_return_sample (spdp_wr->whc, &sample, false); + case RSDK_LOCATOR: + xloc = dst->u.xloc; + prd = NULL; + break; + case RSDK_PROXY_READER: + xloc = NULL; + prd = dst->u.prd; + break; } - ddsrt_mutex_unlock (&spdp_wr->e.lock); - ddsi_serdata_unref (sdkey); - // ... and send it - if (sample.serdata) + assert ((xloc != NULL) != (prd != NULL)); + + ddsrt_mutex_lock ((ddsrt_mutex_t *) &pp->e.lock); + ddsi_seqno_t seqno = pp->spdp_seqno; + struct ddsi_serdata *serdata = ddsi_serdata_ref (pp->spdp_serdata); + ddsrt_mutex_unlock ((ddsrt_mutex_t *) &pp->e.lock); + + static const ddsi_guid_prefix_t nullguidprefix; + struct ddsi_xmsg *msg; + // FIXME: need a spdp_wr for this, but we don't need one for any other reason (can't fragment them anyway, so its trivial) + if (ddsi_create_fragment_message (spdp_wr, seqno, serdata, 0, UINT16_MAX, prd, &msg, 1, UINT32_MAX) >= 0) { - static const ddsi_guid_prefix_t nullguidprefix; - struct ddsi_xmsg *msg; - if (ddsi_create_fragment_message (spdp_wr, sample.seq, sample.serdata, 0, UINT16_MAX, NULL, &msg, 1, UINT32_MAX) >= 0) - { - // FIXME: ddsi_create_fragment_message set the wrong destination, maybe refactor that? + // FIXME: ddsi_create_fragment_message set the wrong destination so we have to patch it. Maybe refactor that? + if (xloc) ddsi_xmsg_setdst1_generic (pp->e.gv, msg, &nullguidprefix, xloc); - if (xp) - ddsi_xpack_addmsg (xp, msg, 0); - else - ddsi_qxev_msg (pp->e.gv->xevents, msg); - } - ddsi_serdata_unref (sample.serdata); + + if (xp) + ddsi_xpack_addmsg (xp, msg, 0); + else + ddsi_qxev_msg (pp->e.gv->xevents, msg); } + ddsi_serdata_unref (serdata); } ddsrt_nonnull_all @@ -486,7 +448,7 @@ static ddsrt_mtime_t spdp_do_aging_locators (struct spdp_admin *adm, struct ddsi { ddsrt_avl_iter_t it; for (struct spdp_pp *ppn = ddsrt_avl_iter_first (&spdp_pp_td, &adm->pp, &it); ppn; ppn = ddsrt_avl_iter_next (&it)) - resend_spdp (xp, ppn->pp, &n->c.xloc); + resend_spdp (xp, ppn->pp, &(struct resend_spdp_dst){ .kind = RSDK_LOCATOR, .u = { .xloc = &n->c.xloc }}); // Why do we keep trying an address where there used to be one if there's no one anymore? Well, // there might be someone else in the same situation with the cable cut ... // @@ -563,7 +525,7 @@ static ddsrt_mtime_t spdp_do_live_locators (struct spdp_admin *adm, struct ddsi_ for (struct spdp_loc_live *n = ddsrt_avl_iter_first (&spdp_loc_td, &adm->live, &loc_it); n; n = ddsrt_avl_iter_next (&loc_it)) for (struct spdp_pp *ppn = ddsrt_avl_iter_first (&spdp_pp_td, &adm->pp, &pp_it); ppn; ppn = ddsrt_avl_iter_next (&pp_it)) if (t_cutoff.v >= ppn->tsched.v) - resend_spdp (xp, ppn->pp, &n->c.xloc); + resend_spdp (xp, ppn->pp, &(struct resend_spdp_dst){ .kind = RSDK_LOCATOR, .u = { .xloc = &n->c.xloc }}); // Update schedule ddsrt_mtime_t t_sched = DDSRT_MTIME_NEVER; for (struct spdp_pp *ppn = ddsrt_avl_iter_first (&spdp_pp_td, &adm->pp, &pp_it); ppn; ppn = ddsrt_avl_iter_next (&pp_it)) @@ -591,7 +553,7 @@ void ddsi_spdp_handle_live_locators_xevent_cb (struct ddsi_domaingv *gv, struct ddsi_resched_xevent_if_earlier (xev, t_sched); } -void ddsi_spdp_force_republish (struct spdp_admin *adm, const struct ddsi_participant *pp) +bool ddsi_spdp_force_republish (struct spdp_admin *adm, const struct ddsi_participant *pp, const struct ddsi_proxy_reader *prd) { // Used for: initial publication, QoS update, dispose+unregister, faster rediscovery in // oneliner in implementation of "hearing!" @@ -600,12 +562,36 @@ void ddsi_spdp_force_republish (struct spdp_admin *adm, const struct ddsi_partic // // This gets called from various threads and not all of them have a message packer at hand. // Passing a null pointer for "xpack" hands it off to the tev thread for publication. - ddsrt_mutex_lock (&adm->lock); - ddsrt_avl_iter_t loc_it; - for (struct spdp_loc_live *n = ddsrt_avl_iter_first (&spdp_loc_td, &adm->live, &loc_it); n; n = ddsrt_avl_iter_next (&loc_it)) - resend_spdp (NULL, pp, &n->c.xloc); - for (struct spdp_loc_aging *n = ddsrt_avl_iter_first (&spdp_loc_td, &adm->aging, &loc_it); n; n = ddsrt_avl_iter_next (&loc_it)) - resend_spdp (NULL, pp, &n->c.xloc); - ddsrt_mutex_unlock (&adm->lock); -} + if (prd == NULL) + { +#ifndef NDEBUG + ddsrt_mutex_lock ((ddsrt_mutex_t *) &pp->e.lock); + assert (pp->spdp_serdata); + ddsrt_mutex_unlock ((ddsrt_mutex_t *) &pp->e.lock); +#endif + ddsrt_mutex_lock (&adm->lock); + ddsrt_avl_iter_t loc_it; + for (struct spdp_loc_live *n = ddsrt_avl_iter_first (&spdp_loc_td, &adm->live, &loc_it); n; n = ddsrt_avl_iter_next (&loc_it)) + resend_spdp (NULL, pp, &(struct resend_spdp_dst){ .kind = RSDK_LOCATOR, .u = { .xloc = &n->c.xloc }}); + for (struct spdp_loc_aging *n = ddsrt_avl_iter_first (&spdp_loc_td, &adm->aging, &loc_it); n; n = ddsrt_avl_iter_next (&loc_it)) + resend_spdp (NULL, pp, &(struct resend_spdp_dst){ .kind = RSDK_LOCATOR, .u = { .xloc = &n->c.xloc }}); + ddsrt_mutex_unlock (&adm->lock); + return true; + } + else + { + ddsrt_mutex_lock ((ddsrt_mutex_t *) &pp->e.lock); + if (pp->spdp_serdata != NULL) + { + ddsrt_mutex_unlock ((ddsrt_mutex_t *) &pp->e.lock); + resend_spdp (NULL, pp, &(struct resend_spdp_dst){ .kind = RSDK_PROXY_READER, .u = { .prd = prd }}); + return true; + } + else + { + ddsrt_mutex_unlock ((ddsrt_mutex_t *) &pp->e.lock); + return false; + } + } +} diff --git a/src/core/ddsi/src/ddsi_transmit.c b/src/core/ddsi/src/ddsi_transmit.c index 4312702faf..678d0217f7 100644 --- a/src/core/ddsi/src/ddsi_transmit.c +++ b/src/core/ddsi/src/ddsi_transmit.c @@ -124,7 +124,7 @@ static dds_return_t ddsi_create_fragment_message_simple (struct ddsi_writer *wr, return 0; } -dds_return_t ddsi_create_fragment_message (struct ddsi_writer *wr, ddsi_seqno_t seq, struct ddsi_serdata *serdata, uint32_t fragnum, uint16_t nfrags, struct ddsi_proxy_reader *prd, struct ddsi_xmsg **pmsg, int isnew, uint32_t advertised_fragnum) +dds_return_t ddsi_create_fragment_message (struct ddsi_writer *wr, ddsi_seqno_t seq, struct ddsi_serdata *serdata, uint32_t fragnum, uint16_t nfrags, const struct ddsi_proxy_reader *prd, struct ddsi_xmsg **pmsg, int isnew, uint32_t advertised_fragnum) { /* We always fragment into FRAGMENT_SIZEd fragments, which are near the smallest allowed fragment size & can't be bothered (yet) to From d72b56f96fe568314b7728c802f152a3e0d7b67f Mon Sep 17 00:00:00 2001 From: Erik Boasson Date: Fri, 6 Sep 2024 11:38:58 +0200 Subject: [PATCH 04/13] Raise default MaxAutoParticipantIndex The old default of 9 (so 10 participants) was hit rather often. This raises the default to 99 (so 100 participants). The only real downside is that increases the number of SPDP packets 10x if unicast discovery is used. --- docs/manual/config/config_file_reference.rst | 6 +++--- docs/manual/options.md | 6 +++--- etc/cyclonedds.rnc | 6 +++--- etc/cyclonedds.xsd | 6 +++--- src/core/ddsi/defconfig.c | 4 ++-- src/core/ddsi/src/ddsi__cfgelems.h | 5 +++-- 6 files changed, 17 insertions(+), 16 deletions(-) diff --git a/docs/manual/config/config_file_reference.rst b/docs/manual/config/config_file_reference.rst index a526e4da16..527214d6b4 100644 --- a/docs/manual/config/config_file_reference.rst +++ b/docs/manual/config/config_file_reference.rst @@ -187,9 +187,9 @@ The default value is: ``10 s`` Integer -This element specifies the maximum DDSI participant index selected by this instance of the Cyclone DDS service if the Discovery/ParticipantIndex is "auto". +This element specifies the maximum DDSI participant index selected by this instance of the Cyclone DDS service if the Discovery/ParticipantIndex is "auto". This also determines the range of port numbers pinged by default for unicast participant discovery. -The default value is: ``9`` +The default value is: ``99`` .. _`//CycloneDDS/Domain/Discovery/ParticipantIndex`: @@ -2718,7 +2718,7 @@ The default value is: ``none`` .. generated from ddsi_config.h[e6e75c7c07b3b91a92715063cfd8abdd0fbd8b08] generated from ddsi__cfgunits.h[bd22f0c0ed210501d0ecd3b07c992eca549ef5aa] - generated from ddsi__cfgelems.h[69679834d0a592a339803ed27e3966adc900d592] + generated from ddsi__cfgelems.h[7adb2155a65c329d28b242cef936bbfc08e76118] generated from ddsi_config.c[8d7ef0ae962a47cb2138de27ac0f6751e3393c66] generated from _confgen.h[9554f1d72645c0b8bb66ffbfbc3c0fb664fc1a43] generated from _confgen.c[237308acd53897a34e8c643e16e05a61d73ffd65] diff --git a/docs/manual/options.md b/docs/manual/options.md index bd01a1bb4c..1da11441ca 100644 --- a/docs/manual/options.md +++ b/docs/manual/options.md @@ -118,9 +118,9 @@ The default value is: `10 s` #### //CycloneDDS/Domain/Discovery/MaxAutoParticipantIndex Integer -This element specifies the maximum DDSI participant index selected by this instance of the Cyclone DDS service if the Discovery/ParticipantIndex is "auto". +This element specifies the maximum DDSI participant index selected by this instance of the Cyclone DDS service if the Discovery/ParticipantIndex is "auto". This also determines the range of port numbers pinged by default for unicast participant discovery. -The default value is: `9` +The default value is: `99` #### //CycloneDDS/Domain/Discovery/ParticipantIndex @@ -1908,7 +1908,7 @@ The categorisation of tracing output is incomplete and hence most of the verbosi The default value is: `none` - + diff --git a/etc/cyclonedds.rnc b/etc/cyclonedds.rnc index 0370114513..671bd49466 100644 --- a/etc/cyclonedds.rnc +++ b/etc/cyclonedds.rnc @@ -83,8 +83,8 @@ CycloneDDS configuration""" ] ] duration }? & [ a:documentation [ xml:lang="en" """ -

This element specifies the maximum DDSI participant index selected by this instance of the Cyclone DDS service if the Discovery/ParticipantIndex is "auto".

-

The default value is: 9

""" ] ] +

This element specifies the maximum DDSI participant index selected by this instance of the Cyclone DDS service if the Discovery/ParticipantIndex is "auto". This also determines the range of port numbers pinged by default for unicast participant discovery.

+

The default value is: 99

""" ] ] element MaxAutoParticipantIndex { xsd:integer }? @@ -1321,7 +1321,7 @@ MIIEpAIBAAKCAQEA3HIh...AOBaaqSV37XBUJg==
} # generated from ddsi_config.h[e6e75c7c07b3b91a92715063cfd8abdd0fbd8b08] # generated from ddsi__cfgunits.h[bd22f0c0ed210501d0ecd3b07c992eca549ef5aa] -# generated from ddsi__cfgelems.h[69679834d0a592a339803ed27e3966adc900d592] +# generated from ddsi__cfgelems.h[7adb2155a65c329d28b242cef936bbfc08e76118] # generated from ddsi_config.c[8d7ef0ae962a47cb2138de27ac0f6751e3393c66] # generated from _confgen.h[9554f1d72645c0b8bb66ffbfbc3c0fb664fc1a43] # generated from _confgen.c[237308acd53897a34e8c643e16e05a61d73ffd65] diff --git a/etc/cyclonedds.xsd b/etc/cyclonedds.xsd index 15e0a9c9c5..1df49d3823 100644 --- a/etc/cyclonedds.xsd +++ b/etc/cyclonedds.xsd @@ -165,8 +165,8 @@ CycloneDDS configuration -<p>This element specifies the maximum DDSI participant index selected by this instance of the Cyclone DDS service if the Discovery/ParticipantIndex is "auto".</p> -<p>The default value is: <code>9</code></p> +<p>This element specifies the maximum DDSI participant index selected by this instance of the Cyclone DDS service if the Discovery/ParticipantIndex is "auto". This also determines the range of port numbers pinged by default for unicast participant discovery.</p> +<p>The default value is: <code>99</code></p> @@ -1981,7 +1981,7 @@ MIIEpAIBAAKCAQEA3HIh...AOBaaqSV37XBUJg==<br> - + diff --git a/src/core/ddsi/defconfig.c b/src/core/ddsi/defconfig.c index 53614ab3ab..f469990405 100644 --- a/src/core/ddsi/defconfig.c +++ b/src/core/ddsi/defconfig.c @@ -31,7 +31,7 @@ void ddsi_config_init_default (struct ddsi_config *cfg) cfg->extDomainId.isdefault = 1; cfg->ds_grace_period = INT64_C (30000000000); cfg->participantIndex = INT32_C (-3); - cfg->maxAutoParticipantIndex = INT32_C (9); + cfg->maxAutoParticipantIndex = INT32_C (99); cfg->spdpMulticastAddressString = "239.255.0.1"; cfg->spdp_interval.isdefault = 1; cfg->ports.base = UINT32_C (7400); @@ -101,7 +101,7 @@ void ddsi_config_init_default (struct ddsi_config *cfg) } /* generated from ddsi_config.h[e6e75c7c07b3b91a92715063cfd8abdd0fbd8b08] */ /* generated from ddsi__cfgunits.h[bd22f0c0ed210501d0ecd3b07c992eca549ef5aa] */ -/* generated from ddsi__cfgelems.h[69679834d0a592a339803ed27e3966adc900d592] */ +/* generated from ddsi__cfgelems.h[7adb2155a65c329d28b242cef936bbfc08e76118] */ /* generated from ddsi_config.c[8d7ef0ae962a47cb2138de27ac0f6751e3393c66] */ /* generated from _confgen.h[9554f1d72645c0b8bb66ffbfbc3c0fb664fc1a43] */ /* generated from _confgen.c[237308acd53897a34e8c643e16e05a61d73ffd65] */ diff --git a/src/core/ddsi/src/ddsi__cfgelems.h b/src/core/ddsi/src/ddsi__cfgelems.h index 9f67e84f85..de29099e5d 100644 --- a/src/core/ddsi/src/ddsi__cfgelems.h +++ b/src/core/ddsi/src/ddsi__cfgelems.h @@ -1932,13 +1932,14 @@ static struct cfgelem discovery_cfgelems[] = { "
  • default: use none if multicast discovery is used on all " "selected network interfaces, else auto.
  • " )), - INT("MaxAutoParticipantIndex", NULL, 1, "9", + INT("MaxAutoParticipantIndex", NULL, 1, "99", MEMBER(maxAutoParticipantIndex), FUNCTIONS(0, uf_natint, 0, pf_int), DESCRIPTION( "

    This element specifies the maximum DDSI participant index selected " "by this instance of the Cyclone DDS service if the " - "Discovery/ParticipantIndex is \"auto\".

    " + "Discovery/ParticipantIndex is \"auto\". This also determines the range " + "of port numbers pinged by default for unicast participant discovery.

    " )), STRING("SPDPMulticastAddress", NULL, 1, "239.255.0.1", MEMBER(spdpMulticastAddressString), From 81e061201508600b23f231f61bdfb31d3edd0d27 Mon Sep 17 00:00:00 2001 From: Erik Boasson Date: Fri, 6 Sep 2024 11:39:22 +0200 Subject: [PATCH 05/13] Fix memory leak on OOM in _confgen Signed-off-by: Erik Boasson --- docs/manual/config/config_file_reference.rst | 2 +- docs/manual/options.md | 2 +- etc/cyclonedds.rnc | 2 +- etc/cyclonedds.xsd | 2 +- src/core/ddsi/defconfig.c | 2 +- src/tools/_confgen/_confgen.c | 4 +++- 6 files changed, 8 insertions(+), 6 deletions(-) diff --git a/docs/manual/config/config_file_reference.rst b/docs/manual/config/config_file_reference.rst index 527214d6b4..01678d678e 100644 --- a/docs/manual/config/config_file_reference.rst +++ b/docs/manual/config/config_file_reference.rst @@ -2721,7 +2721,7 @@ The default value is: ``none`` generated from ddsi__cfgelems.h[7adb2155a65c329d28b242cef936bbfc08e76118] generated from ddsi_config.c[8d7ef0ae962a47cb2138de27ac0f6751e3393c66] generated from _confgen.h[9554f1d72645c0b8bb66ffbfbc3c0fb664fc1a43] - generated from _confgen.c[237308acd53897a34e8c643e16e05a61d73ffd65] + generated from _confgen.c[86c631048046ed4e14c46dba40e5253b50a748fe] generated from generate_rnc.c[b50e4b7ab1d04b2bc1d361a0811247c337b74934] generated from generate_md.c[789b92e422631684352909cfb8bf43f6ceb16a01] generated from generate_rst.c[3c4b523fbb57c8e4a7e247379d06a8021ccc21c4] diff --git a/docs/manual/options.md b/docs/manual/options.md index 1da11441ca..60ede6dfc1 100644 --- a/docs/manual/options.md +++ b/docs/manual/options.md @@ -1911,7 +1911,7 @@ The default value is: `none` - + diff --git a/etc/cyclonedds.rnc b/etc/cyclonedds.rnc index 671bd49466..b450751fa4 100644 --- a/etc/cyclonedds.rnc +++ b/etc/cyclonedds.rnc @@ -1324,7 +1324,7 @@ MIIEpAIBAAKCAQEA3HIh...AOBaaqSV37XBUJg==
    # generated from ddsi__cfgelems.h[7adb2155a65c329d28b242cef936bbfc08e76118] # generated from ddsi_config.c[8d7ef0ae962a47cb2138de27ac0f6751e3393c66] # generated from _confgen.h[9554f1d72645c0b8bb66ffbfbc3c0fb664fc1a43] -# generated from _confgen.c[237308acd53897a34e8c643e16e05a61d73ffd65] +# generated from _confgen.c[86c631048046ed4e14c46dba40e5253b50a748fe] # generated from generate_rnc.c[b50e4b7ab1d04b2bc1d361a0811247c337b74934] # generated from generate_md.c[789b92e422631684352909cfb8bf43f6ceb16a01] # generated from generate_rst.c[3c4b523fbb57c8e4a7e247379d06a8021ccc21c4] diff --git a/etc/cyclonedds.xsd b/etc/cyclonedds.xsd index 1df49d3823..35d31d02c7 100644 --- a/etc/cyclonedds.xsd +++ b/etc/cyclonedds.xsd @@ -1984,7 +1984,7 @@ MIIEpAIBAAKCAQEA3HIh...AOBaaqSV37XBUJg==<br> - + diff --git a/src/core/ddsi/defconfig.c b/src/core/ddsi/defconfig.c index f469990405..4d3bf2f6f4 100644 --- a/src/core/ddsi/defconfig.c +++ b/src/core/ddsi/defconfig.c @@ -104,7 +104,7 @@ void ddsi_config_init_default (struct ddsi_config *cfg) /* generated from ddsi__cfgelems.h[7adb2155a65c329d28b242cef936bbfc08e76118] */ /* generated from ddsi_config.c[8d7ef0ae962a47cb2138de27ac0f6751e3393c66] */ /* generated from _confgen.h[9554f1d72645c0b8bb66ffbfbc3c0fb664fc1a43] */ -/* generated from _confgen.c[237308acd53897a34e8c643e16e05a61d73ffd65] */ +/* generated from _confgen.c[86c631048046ed4e14c46dba40e5253b50a748fe] */ /* generated from generate_rnc.c[b50e4b7ab1d04b2bc1d361a0811247c337b74934] */ /* generated from generate_md.c[789b92e422631684352909cfb8bf43f6ceb16a01] */ /* generated from generate_rst.c[3c4b523fbb57c8e4a7e247379d06a8021ccc21c4] */ diff --git a/src/tools/_confgen/_confgen.c b/src/tools/_confgen/_confgen.c index b54b474985..74e2d5406c 100644 --- a/src/tools/_confgen/_confgen.c +++ b/src/tools/_confgen/_confgen.c @@ -382,8 +382,10 @@ static int generate_list_pattern(struct cfgelem *elem) lst[pos++] = '\0'; assert(pos == size); size_t patsz = 8 + strlen(val) + 2 * strlen(lst); - if ((pat = malloc (patsz)) == NULL) + if ((pat = malloc (patsz)) == NULL) { + free (lst); return -1; + } if (strlen(val) != 0) snprintf(pat, patsz, "%s|(%s(,%s)*)", val, lst, lst); else From 51a4c9e689ecdde8b2b200a2e5d90556a9f9b1c7 Mon Sep 17 00:00:00 2001 From: Erik Boasson Date: Fri, 6 Sep 2024 11:58:56 +0200 Subject: [PATCH 06/13] Remove as_disc, track addresses in spdp scheduler --- .../ddsi/include/dds/ddsi/ddsi_domaingv.h | 1 - src/core/ddsi/src/ddsi__proxy_participant.h | 2 +- src/core/ddsi/src/ddsi__spdp_schedule.h | 2 +- src/core/ddsi/src/ddsi_init.c | 119 +--------- src/core/ddsi/src/ddsi_proxy_participant.c | 7 +- src/core/ddsi/src/ddsi_spdp_schedule.c | 220 ++++++++++++++++-- src/core/ddsi/src/ddsi_tcp.c | 2 +- 7 files changed, 211 insertions(+), 142 deletions(-) diff --git a/src/core/ddsi/include/dds/ddsi/ddsi_domaingv.h b/src/core/ddsi/include/dds/ddsi/ddsi_domaingv.h index 5bfdc910be..0f2c8f1cc1 100644 --- a/src/core/ddsi/include/dds/ddsi/ddsi_domaingv.h +++ b/src/core/ddsi/include/dds/ddsi/ddsi_domaingv.h @@ -205,7 +205,6 @@ struct ddsi_domaingv { Initial discovery address set, and the current discovery address set. These are the addresses that SPDP pings get sent to. */ - struct ddsi_addrset *as_disc; struct spdp_admin *spdp_schedule; ddsrt_mutex_t lock; diff --git a/src/core/ddsi/src/ddsi__proxy_participant.h b/src/core/ddsi/src/ddsi__proxy_participant.h index a1cefd157f..df223b6189 100644 --- a/src/core/ddsi/src/ddsi__proxy_participant.h +++ b/src/core/ddsi/src/ddsi__proxy_participant.h @@ -50,7 +50,7 @@ int ddsi_update_proxy_participant_plist_locked (struct ddsi_proxy_participant *p void ddsi_proxy_participant_reassign_lease (struct ddsi_proxy_participant *proxypp, struct ddsi_lease *newlease); /** @component ddsi_proxy_participant */ -void ddsi_purge_proxy_participants (struct ddsi_domaingv *gv, const ddsi_xlocator_t *loc, bool delete_from_as_disc); +void ddsi_purge_proxy_participants (struct ddsi_domaingv *gv, const ddsi_xlocator_t *loc); /** @component ddsi_proxy_participant */ int ddsi_ref_proxy_participant (struct ddsi_proxy_participant *proxypp, struct ddsi_proxy_endpoint_common *c); diff --git a/src/core/ddsi/src/ddsi__spdp_schedule.h b/src/core/ddsi/src/ddsi__spdp_schedule.h index 2a14065739..a3db3f50b6 100644 --- a/src/core/ddsi/src/ddsi__spdp_schedule.h +++ b/src/core/ddsi/src/ddsi__spdp_schedule.h @@ -21,7 +21,7 @@ extern "C" { struct spdp_admin; // ddsi_participant::e.lock gets locked while spdp_admin::lock is held struct ddsi_proxy_reader; -struct spdp_admin *ddsi_spdp_scheduler_new (struct ddsi_domaingv *gv) +struct spdp_admin *ddsi_spdp_scheduler_new (struct ddsi_domaingv *gv, bool add_localhost) ddsrt_nonnull_all ddsrt_attribute_warn_unused_result; void ddsi_spdp_scheduler_delete (struct spdp_admin *adm) diff --git a/src/core/ddsi/src/ddsi_init.c b/src/core/ddsi/src/ddsi_init.c index e059fefb90..f9426dac6e 100644 --- a/src/core/ddsi/src/ddsi_init.c +++ b/src/core/ddsi/src/ddsi_init.c @@ -74,88 +74,6 @@ #include "dds__whc.h" #include "dds/cdr/dds_cdrstream.h" -static int add_peer_address_ports (const struct ddsi_domaingv *gv, struct ddsi_addrset *as, ddsi_locator_t *loc) -{ - struct ddsi_tran_factory * const tran = ddsi_factory_find_supported_kind (gv, loc->kind); - assert (tran != NULL); - char buf[DDSI_LOCSTRLEN]; - int32_t maxidx; - - // check whether port number, address type and mode make sense, and prepare the - // locator by patching the first port number to use if none is given - if (ddsi_tran_get_locator_port (tran, loc) != DDSI_LOCATOR_PORT_INVALID) - { - maxidx = 0; - } - else if (ddsi_is_mcaddr (gv, loc)) - { - ddsi_tran_set_locator_port (tran, loc, ddsi_get_port (&gv->config, DDSI_PORT_MULTI_DISC, 0)); - maxidx = 0; - } - else - { - ddsi_tran_set_locator_port (tran, loc, ddsi_get_port (&gv->config, DDSI_PORT_UNI_DISC, 0)); - maxidx = gv->config.maxAutoParticipantIndex; - } - - GVLOG (DDS_LC_CONFIG, "add_peer_address: add %s", ddsi_locator_to_string (buf, sizeof (buf), loc)); - ddsi_add_locator_to_addrset (gv, as, loc); - for (int32_t i = 1; i < maxidx; i++) - { - ddsi_tran_set_locator_port (tran, loc, ddsi_get_port (&gv->config, DDSI_PORT_UNI_DISC, i)); - GVLOG (DDS_LC_CONFIG, ", :%"PRIu32, loc->port); - ddsi_add_locator_to_addrset (gv, as, loc); - } - GVLOG (DDS_LC_CONFIG, "\n"); - return 0; -} - -static int add_peer_address (const struct ddsi_domaingv *gv, struct ddsi_addrset *as, const char *addrs) -{ - DDSRT_WARNING_MSVC_OFF(4996); - char *addrs_copy, *cursor, *a; - int retval = -1; - addrs_copy = ddsrt_strdup (addrs); - cursor = addrs_copy; - while ((a = ddsrt_strsep (&cursor, ",")) != NULL) - { - ddsi_locator_t loc; - switch (ddsi_locator_from_string (gv, &loc, a, gv->m_factory)) - { - case AFSR_OK: - break; - case AFSR_INVALID: - GVERROR ("add_peer_address: %s: not a valid address\n", a); - goto error; - case AFSR_UNKNOWN: - GVERROR ("add_peer_address: %s: unknown address\n", a); - goto error; - case AFSR_MISMATCH: - GVERROR ("add_peer_address: %s: address family mismatch\n", a); - goto error; - } - if (add_peer_address_ports (gv, as, &loc) < 0) - { - goto error; - } - } - retval = 0; - error: - ddsrt_free (addrs_copy); - return retval; - DDSRT_WARNING_MSVC_ON(4996); -} - - -static void add_peer_addresses (const struct ddsi_domaingv *gv, struct ddsi_addrset *as, const struct ddsi_config_peer_listelem *list) -{ - while (list) - { - add_peer_address (gv, as, list->peer); - list = list->next; - } -} - enum make_uc_sockets_ret { MUSRET_SUCCESS, /* unicast socket(s) created */ MUSRET_INVALID_PORTS, /* specified port numbers are invalid */ @@ -1301,7 +1219,7 @@ int ddsi_init (struct ddsi_domaingv *gv, struct ddsi_psmx_instance_locators *psm // UnicastSPDPInterval -> 30s |_ perhaps adding something like this // @silentports -> 5min | would make sense? // @dropafter -> 30min -+ - bool add_self_to_as_disc = false; + bool add_localhost_to_initial_peers = false; if (gv->config.participantIndex == DDSI_PARTICIPANT_INDEX_DEFAULT) { #ifndef NDEBUG @@ -1340,7 +1258,7 @@ int ddsi_init (struct ddsi_domaingv *gv, struct ddsi_psmx_instance_locators *psm (none_allow_spdp_mc && gv->config.add_localhost_to_peers != DDSI_BOOLDEF_FALSE)) { // add self to as_disc, but only once we have everything set up to actually do that - add_self_to_as_disc = true; + add_localhost_to_initial_peers = true; } } @@ -1684,34 +1602,10 @@ int ddsi_init (struct ddsi_domaingv *gv, struct ddsi_psmx_instance_locators *psm ddsi_omg_security_init (gv); #endif - gv->as_disc = ddsi_new_addrset (); - for (int i = 0; i < gv->n_interfaces; i++) - { - if ((gv->interfaces[i].allow_multicast & DDSI_AMC_SPDP) && - ddsi_factory_supports (gv->xmit_conns[i]->m_factory, gv->loc_spdp_mc.kind)) - { - const ddsi_xlocator_t xloc = { .conn = gv->xmit_conns[i], .c = gv->loc_spdp_mc }; - ddsi_add_xlocator_to_addrset (gv, gv->as_disc, &xloc); - } - } - if (add_self_to_as_disc) - { - struct ddsi_config_peer_listelem peer_local; - char local_addr[DDSI_LOCSTRLEN]; - ddsi_locator_to_string_no_port (local_addr, sizeof (local_addr), &gv->interfaces[0].loc); - GVTRACE ("adding self (%s)\n", local_addr); - peer_local.next = NULL; - peer_local.peer = local_addr; - add_peer_addresses (gv, gv->as_disc, &peer_local); - } - if (gv->config.peers) - { - add_peer_addresses (gv, gv->as_disc, gv->config.peers); - } + gv->spdp_schedule = ddsi_spdp_scheduler_new (gv, add_localhost_to_initial_peers); + if (gv->spdp_schedule == NULL) + abort (); // FIXME: handle OOM here, it is not that hard ... - gv->spdp_schedule = ddsi_spdp_scheduler_new (gv); - if (gv->spdp_schedule == NULL) abort (); // FIXME: handle OOM here, it is not that hard ... - gv->gcreq_queue = ddsi_gcreq_queue_new (gv); ddsrt_atomic_st32 (&gv->rtps_keepgoing, 1); @@ -2017,7 +1911,7 @@ void ddsi_fini (struct ddsi_domaingv *gv) { /* No participants or proxy participants left, so this is safe */ ddsi_spdp_scheduler_delete (gv->spdp_schedule); - + /* The receive threads have already been stopped, therefore defragmentation and reorder state can't change anymore and can be freed. */ @@ -2063,7 +1957,6 @@ void ddsi_fini (struct ddsi_domaingv *gv) } ddsi_free_config_nwpart_addresses (gv); - ddsi_unref_addrset (gv->as_disc); /* Must delay freeing of rbufpools until after *all* references have been dropped, which only happens once all receive threads have diff --git a/src/core/ddsi/src/ddsi_proxy_participant.c b/src/core/ddsi/src/ddsi_proxy_participant.c index 6c5281804d..eba194653a 100644 --- a/src/core/ddsi/src/ddsi_proxy_participant.c +++ b/src/core/ddsi/src/ddsi_proxy_participant.c @@ -727,7 +727,7 @@ static void purge_helper (const ddsi_xlocator_t *n, void * varg) ddsi_delete_proxy_participant_by_guid (data->proxypp->e.gv, &data->proxypp->e.guid, data->timestamp, 1); } -void ddsi_purge_proxy_participants (struct ddsi_domaingv *gv, const ddsi_xlocator_t *loc, bool delete_from_as_disc) +void ddsi_purge_proxy_participants (struct ddsi_domaingv *gv, const ddsi_xlocator_t *loc) { /* FIXME: check whether addr:port can't be reused for a new connection by the time we get here. */ /* NOTE: This function exists for the sole purpose of cleaning up after closing a TCP connection in ddsi_tcp_close_conn and the state of the calling thread could be anything at this point. Because of that we do the unspeakable and toggle the thread state conditionally. We can't afford to have it in "asleep", as that causes a race with the garbage collector. */ @@ -742,11 +742,6 @@ void ddsi_purge_proxy_participants (struct ddsi_domaingv *gv, const ddsi_xlocato while ((data.proxypp = ddsi_entidx_enum_proxy_participant_next (&est)) != NULL) ddsi_addrset_forall (data.proxypp->as_meta, purge_helper, &data); ddsi_entidx_enum_proxy_participant_fini (&est); - - /* Shouldn't try to keep pinging clients once they're gone */ - if (delete_from_as_disc) - ddsi_remove_from_addrset (gv, gv->as_disc, loc); - ddsi_thread_state_asleep (thrst); } diff --git a/src/core/ddsi/src/ddsi_spdp_schedule.c b/src/core/ddsi/src/ddsi_spdp_schedule.c index 351e476413..29703e1796 100644 --- a/src/core/ddsi/src/ddsi_spdp_schedule.c +++ b/src/core/ddsi/src/ddsi_spdp_schedule.c @@ -28,6 +28,7 @@ #include "ddsi__security_omg.h" #include "ddsi__endpoint.h" #include "ddsi__plist.h" +#include "ddsi__portmapping.h" #include "ddsi__proxy_participant.h" #include "ddsi__topic.h" #include "ddsi__vendor.h" @@ -161,26 +162,204 @@ static int compare_spdp_pp (const void *va, const void *vb) { static const ddsrt_avl_treedef_t spdp_loc_td = DDSRT_AVL_TREEDEF_INITIALIZER(offsetof (union spdp_loc_union, c.avlnode), offsetof (union spdp_loc_union, c.xloc), compare_xlocators_vwrap, NULL); static const ddsrt_avl_treedef_t spdp_pp_td = DDSRT_AVL_TREEDEF_INITIALIZER(offsetof (struct spdp_pp, avlnode), 0, compare_spdp_pp, NULL); -struct add_as_disc_helper_arg { - struct spdp_admin *adm; - bool all_ok; -}; +static dds_return_t add_peer_address_xlocator (struct spdp_admin *adm, const ddsi_xlocator_t *xloc) +{ + // Used for initial addresses only. These are all inserted as "aging" so there cannot + // be any live addresses yet. + assert (ddsrt_avl_is_empty (&adm->live)); + dds_return_t ret = DDS_RETCODE_OK; + union spdp_loc_union *n; + ddsrt_mutex_lock (&adm->lock); + union { + ddsrt_avl_ipath_t ip; + ddsrt_avl_dpath_t dp; + } avlpath; + if ((n = ddsrt_avl_lookup_ipath (&spdp_loc_td, &adm->aging, xloc, &avlpath.ip)) != NULL) + { + } + else if ((n = ddsrt_malloc_s (sizeof (*n))) != NULL) + { + n->c.xloc = *xloc; + ddsrt_avl_insert_ipath (&spdp_loc_td, &adm->aging, n, &avlpath.ip); + } + else + { + ret = DDS_RETCODE_OUT_OF_RESOURCES; + } + ddsrt_mutex_unlock (&adm->lock); + return ret; +} -static void add_as_disc_helper (const ddsi_xlocator_t *loc, void *varg) +static dds_return_t add_peer_address_ports_interface (struct spdp_admin *adm, const ddsi_locator_t *loc) { - struct add_as_disc_helper_arg * const arg = varg; - // FIXME: some but not all initial locators need to go into aging - if (arg->all_ok && ddsi_spdp_ref_locator (arg->adm, loc) != DDS_RETCODE_OK) - arg->all_ok = false; + struct ddsi_domaingv const * const gv = adm->gv; + dds_return_t rc = DDS_RETCODE_OK; + if (ddsi_is_unspec_locator (loc)) + return rc; + if (ddsi_is_mcaddr (gv, loc)) + { + // multicast: use all transmit connections + for (int i = 0; i < gv->n_interfaces && rc == DDS_RETCODE_OK; i++) + { + if (ddsi_factory_supports (gv->xmit_conns[i]->m_factory, loc->kind)) + rc = add_peer_address_xlocator (adm, &(const ddsi_xlocator_t) { + .conn = gv->xmit_conns[i], + .c = *loc }); + } + } + else + { + // unicast: assume the kernel knows how to route it from any connection + // if it doesn't match a local interface + int interf_idx = -1, fallback_interf_idx = -1; + for (int i = 0; i < gv->n_interfaces && interf_idx < 0; i++) + { + if (!ddsi_factory_supports (gv->xmit_conns[i]->m_factory, loc->kind)) + continue; + switch (ddsi_is_nearby_address (gv, loc, (size_t) gv->n_interfaces, gv->interfaces, NULL)) + { + case DNAR_SELF: + case DNAR_LOCAL: + interf_idx = i; + break; + case DNAR_DISTANT: + if (fallback_interf_idx < 0) + fallback_interf_idx = i; + break; + case DNAR_UNREACHABLE: + break; + } + } + if (interf_idx >= 0 || fallback_interf_idx >= 0) + { + const int i = (interf_idx >= 0) ? interf_idx : fallback_interf_idx; + rc = add_peer_address_xlocator (adm, &(const ddsi_xlocator_t) { + .conn = gv->xmit_conns[i], + .c = *loc }); + } + } + return rc; } -static void remove_as_disc_helper (const ddsi_xlocator_t *loc, void *varg) +static dds_return_t add_peer_address_ports (struct spdp_admin *adm, ddsi_locator_t *loc) { - struct spdp_admin * const adm = varg; - ddsi_spdp_unref_locator (adm, loc, false); + struct ddsi_domaingv const * const gv = adm->gv; + struct ddsi_tran_factory * const tran = ddsi_factory_find_supported_kind (gv, loc->kind); + assert (tran != NULL); + char buf[DDSI_LOCSTRLEN]; + int32_t maxidx; + dds_return_t rc = DDS_RETCODE_OK; + + // check whether port number, address type and mode make sense, and prepare the + // locator by patching the first port number to use if none is given + if (ddsi_tran_get_locator_port (tran, loc) != DDSI_LOCATOR_PORT_INVALID) + { + maxidx = 0; + } + else if (ddsi_is_mcaddr (gv, loc)) + { + ddsi_tran_set_locator_port (tran, loc, ddsi_get_port (&gv->config, DDSI_PORT_MULTI_DISC, 0)); + maxidx = 0; + } + else + { + ddsi_tran_set_locator_port (tran, loc, ddsi_get_port (&gv->config, DDSI_PORT_UNI_DISC, 0)); + maxidx = gv->config.maxAutoParticipantIndex; + } + + GVLOG (DDS_LC_CONFIG, "add_peer_address: add %s", ddsi_locator_to_string (buf, sizeof (buf), loc)); + rc = add_peer_address_ports_interface (adm, loc); + for (int32_t i = 1; i < maxidx && rc == DDS_RETCODE_OK; i++) + { + ddsi_tran_set_locator_port (tran, loc, ddsi_get_port (&gv->config, DDSI_PORT_UNI_DISC, i)); + if (i + 1 == maxidx) + GVLOG (DDS_LC_CONFIG, " ... :%"PRIu32, loc->port); + rc = add_peer_address_ports_interface (adm, loc); + } + GVLOG (DDS_LC_CONFIG, "\n"); + return rc; +} + +static dds_return_t add_peer_address (struct spdp_admin *adm, const char *addrs) +{ + DDSRT_WARNING_MSVC_OFF(4996); + struct ddsi_domaingv const * const gv = adm->gv; + char *addrs_copy, *cursor, *a; + dds_return_t rc = DDS_RETCODE_ERROR; + addrs_copy = ddsrt_strdup (addrs); + cursor = addrs_copy; + while ((a = ddsrt_strsep (&cursor, ",")) != NULL) + { + ddsi_locator_t loc; + switch (ddsi_locator_from_string (gv, &loc, a, gv->m_factory)) + { + case AFSR_OK: + break; + case AFSR_INVALID: + GVERROR ("add_peer_address: %s: not a valid address\n", a); + goto error; + case AFSR_UNKNOWN: + GVERROR ("add_peer_address: %s: unknown address\n", a); + goto error; + case AFSR_MISMATCH: + GVERROR ("add_peer_address: %s: address family mismatch\n", a); + goto error; + } + if ((rc = add_peer_address_ports (adm, &loc)) < 0) + { + goto error; + } + } + rc = DDS_RETCODE_OK; + error: + ddsrt_free (addrs_copy); + return rc; + DDSRT_WARNING_MSVC_ON(4996); +} + +static dds_return_t add_peer_addresses (struct spdp_admin *adm, const struct ddsi_config_peer_listelem *list) +{ + dds_return_t rc = DDS_RETCODE_OK; + while (list && rc == DDS_RETCODE_OK) + { + rc = add_peer_address (adm, list->peer); + list = list->next; + } + return rc; } -struct spdp_admin *ddsi_spdp_scheduler_new (struct ddsi_domaingv *gv) +static dds_return_t populate_initial_addresses (struct spdp_admin *adm, bool add_localhost) +{ + struct ddsi_domaingv const * const gv = adm->gv; + dds_return_t rc = DDS_RETCODE_OK; + for (int i = 0; i < gv->n_interfaces && rc == DDS_RETCODE_OK; i++) + { + if ((gv->interfaces[i].allow_multicast & DDSI_AMC_SPDP) && + ddsi_factory_supports (gv->xmit_conns[i]->m_factory, gv->loc_spdp_mc.kind)) + { + const ddsi_xlocator_t xloc = { .conn = gv->xmit_conns[i], .c = gv->loc_spdp_mc }; + // multicast discovery addresses never expire + rc = ddsi_spdp_ref_locator (adm, &xloc); + } + } + if (rc == DDS_RETCODE_OK && add_localhost) + { + struct ddsi_config_peer_listelem peer_local; + char local_addr[DDSI_LOCSTRLEN]; + ddsi_locator_to_string_no_port (local_addr, sizeof (local_addr), &gv->interfaces[0].loc); + GVTRACE ("adding self (%s)\n", local_addr); + peer_local.next = NULL; + peer_local.peer = local_addr; + rc = add_peer_addresses (adm, &peer_local); + } + if (rc == DDS_RETCODE_OK && gv->config.peers) + { + rc = add_peer_addresses (adm, gv->config.peers); + } + return rc; +} + +struct spdp_admin *ddsi_spdp_scheduler_new (struct ddsi_domaingv *gv, bool add_localhost) { struct spdp_admin *adm; if ((adm = ddsrt_malloc_s (sizeof (*adm))) == NULL) @@ -191,9 +370,7 @@ struct spdp_admin *ddsi_spdp_scheduler_new (struct ddsi_domaingv *gv) ddsrt_avl_init (&spdp_loc_td, &adm->live); ddsrt_avl_init (&spdp_pp_td, &adm->pp); - struct add_as_disc_helper_arg arg = { .adm = adm, .all_ok = true }; - ddsi_addrset_forall (gv->as_disc, add_as_disc_helper, &arg); - if (!arg.all_ok) + if (populate_initial_addresses (adm, add_localhost) != DDS_RETCODE_OK) { ddsrt_avl_free (&spdp_loc_td, &adm->live, ddsrt_free); // FIXME: need to do aging of initial locators, too @@ -212,13 +389,18 @@ struct spdp_admin *ddsi_spdp_scheduler_new (struct ddsi_domaingv *gv) void ddsi_spdp_scheduler_delete (struct spdp_admin *adm) { - // FIXME: Initial addresses may still be around, probably should invert it, check refc=1, check present in as_disc, then free - ddsi_addrset_forall (adm->gv->as_disc, remove_as_disc_helper, adm); - assert (ddsrt_avl_is_empty (&adm->live)); assert (ddsrt_avl_is_empty (&adm->pp)); +#ifndef NDEBUG + { + ddsrt_avl_iter_t it; + for (struct spdp_loc_live *n = ddsrt_avl_iter_first (&spdp_loc_td, &adm->live, &it); n; n = ddsrt_avl_iter_next (&it)) + assert (n->proxypp_refc == 1); + } +#endif ddsi_delete_xevent (adm->aging_xev); ddsi_delete_xevent (adm->live_xev); // intrusive data structures, can simply free everything + ddsrt_avl_free (&spdp_loc_td, &adm->live, ddsrt_free); ddsrt_avl_free (&spdp_loc_td, &adm->aging, ddsrt_free); ddsrt_mutex_destroy (&adm->lock); ddsrt_free (adm); diff --git a/src/core/ddsi/src/ddsi_tcp.c b/src/core/ddsi/src/ddsi_tcp.c index 676f2c6293..bb21ff6d7a 100644 --- a/src/core/ddsi/src/ddsi_tcp.c +++ b/src/core/ddsi/src/ddsi_tcp.c @@ -1046,7 +1046,7 @@ static void ddsi_tcp_close_conn (struct ddsi_tran_conn * tc) ddsi_ipaddr_to_loc(&loc.c, &conn->m_peer_addr.a, addrfam_to_locator_kind(conn->m_peer_addr.a.sa_family)); loc.c.port = conn->m_peer_port; loc.conn = tc; - ddsi_purge_proxy_participants (gv, &loc, conn->m_base.m_server); + ddsi_purge_proxy_participants (gv, &loc); } } From e1a95a5ebcc34909cf284a1327e6528b1c3081d3 Mon Sep 17 00:00:00 2001 From: Erik Boasson Date: Fri, 6 Sep 2024 12:00:50 +0200 Subject: [PATCH 07/13] Pruning of discovery addresses If one adds a host to the peer list, we add MaxAutoParticipantIndex+1 locators for it, but it generally doesn't make much sense to keep pinging all of them if there is no response. Especially if MaxAutoParticipantIndex is large, it causes a lot of periodic traffic. --- docs/manual/config/config_file_reference.rst | 52 +++++- docs/manual/options.md | 40 ++++- etc/cyclonedds.rnc | 27 ++- etc/cyclonedds.xsd | 32 +++- src/core/ddsi/defconfig.c | 8 +- src/core/ddsi/include/dds/ddsi/ddsi_config.h | 3 + src/core/ddsi/src/ddsi__cfgelems.h | 28 +++ src/core/ddsi/src/ddsi__spdp_schedule.h | 2 +- src/core/ddsi/src/ddsi_config.c | 15 ++ src/core/ddsi/src/ddsi_init.c | 2 + src/core/ddsi/src/ddsi_participant.c | 25 ++- src/core/ddsi/src/ddsi_proxy_participant.c | 2 +- src/core/ddsi/src/ddsi_spdp_schedule.c | 174 +++++++++++-------- 13 files changed, 316 insertions(+), 94 deletions(-) diff --git a/docs/manual/config/config_file_reference.rst b/docs/manual/config/config_file_reference.rst index 01678d678e..e0114820fd 100644 --- a/docs/manual/config/config_file_reference.rst +++ b/docs/manual/config/config_file_reference.rst @@ -112,7 +112,7 @@ The default value is: ``lax`` //CycloneDDS/Domain/Discovery ============================= -Children: :ref:`DSGracePeriod`, :ref:`DefaultMulticastAddress`, :ref:`EnableTopicDiscoveryEndpoints`, :ref:`ExternalDomainId`, :ref:`LeaseDuration`, :ref:`MaxAutoParticipantIndex`, :ref:`ParticipantIndex`, :ref:`Peers`, :ref:`Ports`, :ref:`SPDPInterval`, :ref:`SPDPMulticastAddress`, :ref:`Tag` +Children: :ref:`DSGracePeriod`, :ref:`DefaultMulticastAddress`, :ref:`DiscoveredLocatorPruneDelay`, :ref:`EnableTopicDiscoveryEndpoints`, :ref:`ExternalDomainId`, :ref:`InitialLocatorPruneDelay`, :ref:`LeaseDuration`, :ref:`MaxAutoParticipantIndex`, :ref:`ParticipantIndex`, :ref:`Peers`, :ref:`Ports`, :ref:`SPDPInterval`, :ref:`SPDPMulticastAddress`, :ref:`Tag` The Discovery element allows you to specify various parameters related to the discovery of peers. @@ -143,6 +143,20 @@ This element specifies the default multicast address for all traffic other than The default value is: ``auto`` +.. _`//CycloneDDS/Domain/Discovery/DiscoveredLocatorPruneDelay`: + +//CycloneDDS/Domain/Discovery/DiscoveredLocatorPruneDelay +--------------------------------------------------------- + +Number-with-unit + +This element specifies the time for which discovered (unicast) participant locators are pinged after a participant at that address disappeared because of a lease expiry. Locators for participants for which notice of graceful termination was received are not retained. + +Valid values are finite durations with an explicit unit or the keyword 'inf' for infinity. Recognised units: ns, us, ms, s, min, hr, day. + +The default value is: ``60s`` + + .. _`//CycloneDDS/Domain/Discovery/EnableTopicDiscoveryEndpoints`: //CycloneDDS/Domain/Discovery/EnableTopicDiscoveryEndpoints @@ -167,6 +181,20 @@ An override for the domain id is used to discovery and determine the port number The default value is: ``default`` +.. _`//CycloneDDS/Domain/Discovery/InitialLocatorPruneDelay`: + +//CycloneDDS/Domain/Discovery/InitialLocatorPruneDelay +------------------------------------------------------ + +Number-with-unit + +This element specifies the default time for configured peer locators are initially ping and after disappearance of the last participant at that address until it is pruned. It can be overridden for individual peers. + +Valid values are finite durations with an explicit unit or the keyword 'inf' for infinity. Recognised units: ns, us, ms, s, min, hr, day. + +The default value is: ``30s`` + + .. _`//CycloneDDS/Domain/Discovery/LeaseDuration`: //CycloneDDS/Domain/Discovery/LeaseDuration @@ -244,7 +272,7 @@ The default value is: ``default`` //CycloneDDS/Domain/Discovery/Peers/Peer ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -Attributes: :ref:`Address` +Attributes: :ref:`Address`, :ref:`PruneDelay` This element statically configures addresses for discovery. @@ -261,6 +289,20 @@ This element specifies an IP address to which discovery packets must be sent, in The default value is: ```` +.. _`//CycloneDDS/Domain/Discovery/Peers/Peer[@PruneDelay]`: + +//CycloneDDS/Domain/Discovery/Peers/Peer[@PruneDelay] +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Number-with-unit + +This element specifies the duration for which the locator must be pinged for participant discovery before it is pruned as a useless address. The value "default" means the value in Discovery/InitialLocatorPruneDelay is used. + +Valid values are finite durations with an explicit unit or the keyword 'inf' for infinity. Recognised units: ns, us, ms, s, min, hr, day. + +The default value is: ``default`` + + .. _`//CycloneDDS/Domain/Discovery/Ports`: //CycloneDDS/Domain/Discovery/Ports @@ -2716,10 +2758,10 @@ The categorisation of tracing output is incomplete and hence most of the verbosi The default value is: ``none`` .. - generated from ddsi_config.h[e6e75c7c07b3b91a92715063cfd8abdd0fbd8b08] + generated from ddsi_config.h[dbf9996a8b49da8e7cb4b62ab157ba80a073cd81] generated from ddsi__cfgunits.h[bd22f0c0ed210501d0ecd3b07c992eca549ef5aa] - generated from ddsi__cfgelems.h[7adb2155a65c329d28b242cef936bbfc08e76118] - generated from ddsi_config.c[8d7ef0ae962a47cb2138de27ac0f6751e3393c66] + generated from ddsi__cfgelems.h[a8b3ad5170f4e86fd7c1f29c5677c332930ea6a4] + generated from ddsi_config.c[94a98ea7709bca260c9cfb5cf43396b0d5e3c953] generated from _confgen.h[9554f1d72645c0b8bb66ffbfbc3c0fb664fc1a43] generated from _confgen.c[86c631048046ed4e14c46dba40e5253b50a748fe] generated from generate_rnc.c[b50e4b7ab1d04b2bc1d361a0811247c337b74934] diff --git a/docs/manual/options.md b/docs/manual/options.md index 60ede6dfc1..adf865ab00 100644 --- a/docs/manual/options.md +++ b/docs/manual/options.md @@ -67,7 +67,7 @@ The default value is: `lax` ### //CycloneDDS/Domain/Discovery -Children: [DSGracePeriod](#cycloneddsdomaindiscoverydsgraceperiod), [DefaultMulticastAddress](#cycloneddsdomaindiscoverydefaultmulticastaddress), [EnableTopicDiscoveryEndpoints](#cycloneddsdomaindiscoveryenabletopicdiscoveryendpoints), [ExternalDomainId](#cycloneddsdomaindiscoveryexternaldomainid), [LeaseDuration](#cycloneddsdomaindiscoveryleaseduration), [MaxAutoParticipantIndex](#cycloneddsdomaindiscoverymaxautoparticipantindex), [ParticipantIndex](#cycloneddsdomaindiscoveryparticipantindex), [Peers](#cycloneddsdomaindiscoverypeers), [Ports](#cycloneddsdomaindiscoveryports), [SPDPInterval](#cycloneddsdomaindiscoveryspdpinterval), [SPDPMulticastAddress](#cycloneddsdomaindiscoveryspdpmulticastaddress), [Tag](#cycloneddsdomaindiscoverytag) +Children: [DSGracePeriod](#cycloneddsdomaindiscoverydsgraceperiod), [DefaultMulticastAddress](#cycloneddsdomaindiscoverydefaultmulticastaddress), [DiscoveredLocatorPruneDelay](#cycloneddsdomaindiscoverydiscoveredlocatorprunedelay), [EnableTopicDiscoveryEndpoints](#cycloneddsdomaindiscoveryenabletopicdiscoveryendpoints), [ExternalDomainId](#cycloneddsdomaindiscoveryexternaldomainid), [InitialLocatorPruneDelay](#cycloneddsdomaindiscoveryinitiallocatorprunedelay), [LeaseDuration](#cycloneddsdomaindiscoveryleaseduration), [MaxAutoParticipantIndex](#cycloneddsdomaindiscoverymaxautoparticipantindex), [ParticipantIndex](#cycloneddsdomaindiscoveryparticipantindex), [Peers](#cycloneddsdomaindiscoverypeers), [Ports](#cycloneddsdomaindiscoveryports), [SPDPInterval](#cycloneddsdomaindiscoveryspdpinterval), [SPDPMulticastAddress](#cycloneddsdomaindiscoveryspdpmulticastaddress), [Tag](#cycloneddsdomaindiscoverytag) The Discovery element allows you to specify various parameters related to the discovery of peers. @@ -90,6 +90,16 @@ This element specifies the default multicast address for all traffic other than The default value is: `auto` +#### //CycloneDDS/Domain/Discovery/DiscoveredLocatorPruneDelay +Number-with-unit + +This element specifies the time for which discovered (unicast) participant locators are pinged after a participant at that address disappeared because of a lease expiry. Locators for participants for which notice of graceful termination was received are not retained. + +Valid values are finite durations with an explicit unit or the keyword 'inf' for infinity. Recognised units: ns, us, ms, s, min, hr, day. + +The default value is: `60s` + + #### //CycloneDDS/Domain/Discovery/EnableTopicDiscoveryEndpoints Boolean @@ -106,6 +116,16 @@ An override for the domain id is used to discovery and determine the port number The default value is: `default` +#### //CycloneDDS/Domain/Discovery/InitialLocatorPruneDelay +Number-with-unit + +This element specifies the default time for configured peer locators are initially ping and after disappearance of the last participant at that address until it is pruned. It can be overridden for individual peers. + +Valid values are finite durations with an explicit unit or the keyword 'inf' for infinity. Recognised units: ns, us, ms, s, min, hr, day. + +The default value is: `30s` + + #### //CycloneDDS/Domain/Discovery/LeaseDuration Number-with-unit @@ -157,7 +177,7 @@ The default value is: `default` ##### //CycloneDDS/Domain/Discovery/Peers/Peer -Attributes: [Address](#cycloneddsdomaindiscoverypeerspeeraddress) +Attributes: [Address](#cycloneddsdomaindiscoverypeerspeeraddress), [PruneDelay](#cycloneddsdomaindiscoverypeerspeerprunedelay) This element statically configures addresses for discovery. @@ -170,6 +190,16 @@ This element specifies an IP address to which discovery packets must be sent, in The default value is: `` +##### //CycloneDDS/Domain/Discovery/Peers/Peer[@PruneDelay] +Number-with-unit + +This element specifies the duration for which the locator must be pinged for participant discovery before it is pruned as a useless address. The value "default" means the value in Discovery/InitialLocatorPruneDelay is used. + +Valid values are finite durations with an explicit unit or the keyword 'inf' for infinity. Recognised units: ns, us, ms, s, min, hr, day. + +The default value is: `default` + + #### //CycloneDDS/Domain/Discovery/Ports Children: [Base](#cycloneddsdomaindiscoveryportsbase), [DomainGain](#cycloneddsdomaindiscoveryportsdomaingain), [MulticastDataOffset](#cycloneddsdomaindiscoveryportsmulticastdataoffset), [MulticastMetaOffset](#cycloneddsdomaindiscoveryportsmulticastmetaoffset), [ParticipantGain](#cycloneddsdomaindiscoveryportsparticipantgain), [UnicastDataOffset](#cycloneddsdomaindiscoveryportsunicastdataoffset), [UnicastMetaOffset](#cycloneddsdomaindiscoveryportsunicastmetaoffset) @@ -1906,10 +1936,10 @@ While none prevents any message from being written to a DDSI2 log file. The categorisation of tracing output is incomplete and hence most of the verbosity levels and categories are not of much use in the current release. This is an ongoing process and here we describe the target situation rather than the current situation. Currently, the most useful verbosity levels are config, fine and finest. The default value is: `none` - + - - + + diff --git a/etc/cyclonedds.rnc b/etc/cyclonedds.rnc index b450751fa4..0e6d7e7ccf 100644 --- a/etc/cyclonedds.rnc +++ b/etc/cyclonedds.rnc @@ -64,6 +64,13 @@ CycloneDDS configuration""" ] ] text }? & [ a:documentation [ xml:lang="en" """ +

    This element specifies the time for which discovered (unicast) participant locators are pinged after a participant at that address disappeared because of a lease expiry. Locators for participants for which notice of graceful termination was received are not retained.

    +

    Valid values are finite durations with an explicit unit or the keyword 'inf' for infinity. Recognised units: ns, us, ms, s, min, hr, day.

    +

    The default value is: 60s

    """ ] ] + element DiscoveredLocatorPruneDelay { + duration_inf + }? + & [ a:documentation [ xml:lang="en" """

    This element controls whether the built-in endpoints for topic discovery are created and used to exchange topic discovery information.

    The default value is: false

    """ ] ] element EnableTopicDiscoveryEndpoints { @@ -76,6 +83,13 @@ CycloneDDS configuration""" ] ] text }? & [ a:documentation [ xml:lang="en" """ +

    This element specifies the default time for configured peer locators are initially ping and after disappearance of the last participant at that address until it is pruned. It can be overridden for individual peers.

    +

    Valid values are finite durations with an explicit unit or the keyword 'inf' for infinity. Recognised units: ns, us, ms, s, min, hr, day.

    +

    The default value is: 30s

    """ ] ] + element InitialLocatorPruneDelay { + duration_inf + }? + & [ a:documentation [ xml:lang="en" """

    This setting controls the default participant lease duration.

    The unit must be specified explicitly. Recognised units: ns, us, ms, s, min, hr, day.

    The default value is: 10 s

    """ ] ] @@ -118,6 +132,13 @@ CycloneDDS configuration""" ] ] attribute Address { text } + & [ a:documentation [ xml:lang="en" """ +

    This element specifies the duration for which the locator must be pinged for participant discovery before it is pruned as a useless address. The value "default" means the value in Discovery/InitialLocatorPruneDelay is used.

    +

    Valid values are finite durations with an explicit unit or the keyword 'inf' for infinity. Recognised units: ns, us, ms, s, min, hr, day.

    +

    The default value is: default

    """ ] ] + attribute PruneDelay { + duration_inf + }? }* }? & [ a:documentation [ xml:lang="en" """ @@ -1319,10 +1340,10 @@ MIIEpAIBAAKCAQEA3HIh...AOBaaqSV37XBUJg==
    duration_inf = xsd:token { pattern = "inf|0|(\d+(\.\d*)?([Ee][\-+]?\d+)?|\.\d+([Ee][\-+]?\d+)?) *([num]?s|min|hr|day)" } memsize = xsd:token { pattern = "0|(\d+(\.\d*)?([Ee][\-+]?\d+)?|\.\d+([Ee][\-+]?\d+)?) *([kMG]i?)?B" } } -# generated from ddsi_config.h[e6e75c7c07b3b91a92715063cfd8abdd0fbd8b08] +# generated from ddsi_config.h[dbf9996a8b49da8e7cb4b62ab157ba80a073cd81] # generated from ddsi__cfgunits.h[bd22f0c0ed210501d0ecd3b07c992eca549ef5aa] -# generated from ddsi__cfgelems.h[7adb2155a65c329d28b242cef936bbfc08e76118] -# generated from ddsi_config.c[8d7ef0ae962a47cb2138de27ac0f6751e3393c66] +# generated from ddsi__cfgelems.h[a8b3ad5170f4e86fd7c1f29c5677c332930ea6a4] +# generated from ddsi_config.c[94a98ea7709bca260c9cfb5cf43396b0d5e3c953] # generated from _confgen.h[9554f1d72645c0b8bb66ffbfbc3c0fb664fc1a43] # generated from _confgen.c[86c631048046ed4e14c46dba40e5253b50a748fe] # generated from generate_rnc.c[b50e4b7ab1d04b2bc1d361a0811247c337b74934] diff --git a/etc/cyclonedds.xsd b/etc/cyclonedds.xsd index 35d31d02c7..567152e4e1 100644 --- a/etc/cyclonedds.xsd +++ b/etc/cyclonedds.xsd @@ -112,8 +112,10 @@ CycloneDDS configuration + + @@ -140,6 +142,14 @@ CycloneDDS configuration <p>The default value is: <code>auto</code></p> + + + +<p>This element specifies the time for which discovered (unicast) participant locators are pinged after a participant at that address disappeared because of a lease expiry. Locators for participants for which notice of graceful termination was received are not retained.</p> +<p>Valid values are finite durations with an explicit unit or the keyword 'inf' for infinity. Recognised units: ns, us, ms, s, min, hr, day.</p> +<p>The default value is: <code>60s</code></p> + + @@ -154,6 +164,14 @@ CycloneDDS configuration <p>The default value is: <code>default</code></p> + + + +<p>This element specifies the default time for configured peer locators are initially ping and after disappearance of the last participant at that address until it is pruned. It can be overridden for individual peers.</p> +<p>Valid values are finite durations with an explicit unit or the keyword 'inf' for infinity. Recognised units: ns, us, ms, s, min, hr, day.</p> +<p>The default value is: <code>30s</code></p> + + @@ -213,6 +231,14 @@ CycloneDDS configuration <p>The default value is: <code>&lt;empty&gt;</code></p> + + + +<p>This element specifies the duration for which the locator must be pinged for participant discovery before it is pruned as a useless address. The value "default" means the value in Discovery/InitialLocatorPruneDelay is used.</p> +<p>Valid values are finite durations with an explicit unit or the keyword 'inf' for infinity. Recognised units: ns, us, ms, s, min, hr, day.</p> +<p>The default value is: <code>default</code></p> + + @@ -1979,10 +2005,10 @@ MIIEpAIBAAKCAQEA3HIh...AOBaaqSV37XBUJg==<br> - + - - + + diff --git a/src/core/ddsi/defconfig.c b/src/core/ddsi/defconfig.c index 4d3bf2f6f4..4358fd5b08 100644 --- a/src/core/ddsi/defconfig.c +++ b/src/core/ddsi/defconfig.c @@ -34,6 +34,8 @@ void ddsi_config_init_default (struct ddsi_config *cfg) cfg->maxAutoParticipantIndex = INT32_C (99); cfg->spdpMulticastAddressString = "239.255.0.1"; cfg->spdp_interval.isdefault = 1; + cfg->spdp_prune_delay_initial = INT64_C (30000000000); + cfg->spdp_prune_delay_discovered = INT64_C (60000000000); cfg->ports.base = UINT32_C (7400); cfg->ports.dg = UINT32_C (250); cfg->ports.pg = UINT32_C (2); @@ -99,10 +101,10 @@ void ddsi_config_init_default (struct ddsi_config *cfg) cfg->ssl_min_version.minor = 3; #endif /* DDS_HAS_TCP_TLS */ } -/* generated from ddsi_config.h[e6e75c7c07b3b91a92715063cfd8abdd0fbd8b08] */ +/* generated from ddsi_config.h[dbf9996a8b49da8e7cb4b62ab157ba80a073cd81] */ /* generated from ddsi__cfgunits.h[bd22f0c0ed210501d0ecd3b07c992eca549ef5aa] */ -/* generated from ddsi__cfgelems.h[7adb2155a65c329d28b242cef936bbfc08e76118] */ -/* generated from ddsi_config.c[8d7ef0ae962a47cb2138de27ac0f6751e3393c66] */ +/* generated from ddsi__cfgelems.h[a8b3ad5170f4e86fd7c1f29c5677c332930ea6a4] */ +/* generated from ddsi_config.c[94a98ea7709bca260c9cfb5cf43396b0d5e3c953] */ /* generated from _confgen.h[9554f1d72645c0b8bb66ffbfbc3c0fb664fc1a43] */ /* generated from _confgen.c[86c631048046ed4e14c46dba40e5253b50a748fe] */ /* generated from generate_rnc.c[b50e4b7ab1d04b2bc1d361a0811247c337b74934] */ diff --git a/src/core/ddsi/include/dds/ddsi/ddsi_config.h b/src/core/ddsi/include/dds/ddsi/ddsi_config.h index e240e85a72..f3a8cb5cb7 100644 --- a/src/core/ddsi/include/dds/ddsi/ddsi_config.h +++ b/src/core/ddsi/include/dds/ddsi/ddsi_config.h @@ -138,6 +138,7 @@ struct ddsi_config_peer_listelem { struct ddsi_config_peer_listelem *next; char *peer; + struct ddsi_config_maybe_duration prune_delay; }; struct ddsi_config_prune_deleted_ppant { @@ -305,6 +306,8 @@ struct ddsi_config char *defaultMulticastAddressString; struct ddsi_config_maybe_duration spdp_interval; int64_t spdp_response_delay_max; + int64_t spdp_prune_delay_initial; + int64_t spdp_prune_delay_discovered; int64_t lease_duration; int64_t const_hb_intv_sched; int64_t const_hb_intv_sched_min; diff --git a/src/core/ddsi/src/ddsi__cfgelems.h b/src/core/ddsi/src/ddsi__cfgelems.h index de29099e5d..34239af476 100644 --- a/src/core/ddsi/src/ddsi__cfgelems.h +++ b/src/core/ddsi/src/ddsi__cfgelems.h @@ -1855,6 +1855,15 @@ static struct cfgelem discovery_peer_cfgattrs[] = { "explicitly set the port to which it must be sent. Multiple Peers may " "be specified.

    " )), + STRING("PruneDelay", NULL, 1, "default", + MEMBEROF(ddsi_config_peer_listelem, prune_delay), + FUNCTIONS(0, uf_maybe_duration_inf, 0, pf_maybe_duration), + DESCRIPTION( + "

    This element specifies the duration for which the locator must " + "be pinged for participant discovery before it is pruned as a useless " + "address. The value \"default\" means the value in " + "Discovery/InitialLocatorPruneDelay is used.

    "), + UNIT("duration_inf")), END_MARKER }; @@ -1968,6 +1977,25 @@ static struct cfgelem discovery_cfgelems[] = { "traffic other than participant discovery packets. It defaults to " "Discovery/SPDPMulticastAddress.

    " )), + STRING("InitialLocatorPruneDelay", NULL, 1, "30s", + MEMBER(spdp_prune_delay_initial), + FUNCTIONS(0, uf_duration_inf, 0, pf_duration), + DESCRIPTION( + "

    This element specifies the default time for configured " + "peer locators are initially ping and after disappearance of the " + "last participant at that address until it is pruned. It can be " + "overridden for individual peers.

    "), + UNIT("duration_inf")), + STRING("DiscoveredLocatorPruneDelay", NULL, 1, "60s", + MEMBER(spdp_prune_delay_discovered), + FUNCTIONS(0, uf_duration_inf, 0, pf_duration), + DESCRIPTION( + "

    This element specifies the time for which discovered (unicast) " + "participant locators are pinged after a participant at that " + "address disappeared because of a lease expiry. Locators for " + "participants for which notice of graceful termination was received " + "are not retained.

    "), + UNIT("duration_inf")), GROUP("Ports", discovery_ports_cfgelems, NULL, 1, NOMEMBER, NOFUNCTIONS, diff --git a/src/core/ddsi/src/ddsi__spdp_schedule.h b/src/core/ddsi/src/ddsi__spdp_schedule.h index a3db3f50b6..fd32c32ab4 100644 --- a/src/core/ddsi/src/ddsi__spdp_schedule.h +++ b/src/core/ddsi/src/ddsi__spdp_schedule.h @@ -36,7 +36,7 @@ dds_return_t ddsi_spdp_register_participant (struct spdp_admin *adm, const struc void ddsi_spdp_unregister_participant (struct spdp_admin *adm, const struct ddsi_participant *pp) ddsrt_nonnull_all; -dds_return_t ddsi_spdp_ref_locator (struct spdp_admin *adm, const ddsi_xlocator_t *xloc) +dds_return_t ddsi_spdp_ref_locator (struct spdp_admin *adm, const ddsi_xlocator_t *xloc, dds_duration_t prune_delay) ddsrt_nonnull_all ddsrt_attribute_warn_unused_result; void ddsi_spdp_unref_locator (struct spdp_admin *adm, const ddsi_xlocator_t *xloc, bool on_lease_expiry) diff --git a/src/core/ddsi/src/ddsi_config.c b/src/core/ddsi/src/ddsi_config.c index 40c97a178b..0ac5b3200a 100644 --- a/src/core/ddsi/src/ddsi_config.c +++ b/src/core/ddsi/src/ddsi_config.c @@ -174,6 +174,7 @@ DU(duration_us_1s); DU(duration_100ms_1hr); DU(nop_duration_ms_1hr); DU(maybe_duration_ms_1hr); +DU(maybe_duration_inf); PF(duration); PF(maybe_duration); DUPF(standards_conformance); @@ -732,6 +733,8 @@ static int if_peer (struct ddsi_cfgst *cfgst, void *parent, struct cfgelem const if (new == NULL) return -1; new->peer = NULL; + new->prune_delay.isdefault = true; + new->prune_delay.value = DDS_INFINITY; return 0; } @@ -1476,6 +1479,18 @@ static enum update_result uf_maybe_duration_ms_1hr (struct ddsi_cfgst *cfgst, vo return uf_maybe_duration_gen (cfgst, parent, cfgelem, value, DDS_MSECS (1), 0, DDS_SECS (3600)); } +static enum update_result uf_maybe_duration_inf (struct ddsi_cfgst *cfgst, void *parent, struct cfgelem const * const cfgelem, UNUSED_ARG (int first), const char *value) +{ + if (ddsrt_strcasecmp (value, "inf") == 0) { + struct ddsi_config_maybe_duration * const elem = cfg_address (cfgst, parent, cfgelem); + elem->isdefault = 0; + elem->value = DDS_INFINITY; + return URES_SUCCESS; + } else { + return uf_maybe_duration_gen (cfgst, parent, cfgelem, value, 0, 0, DDS_INFINITY - 1); + } +} + static enum update_result uf_nop_duration_ms_1hr (struct ddsi_cfgst *cfgst, UNUSED_ARG(void *parent), UNUSED_ARG(struct cfgelem const * const cfgelem), UNUSED_ARG (int first), const char *value) { int64_t dummy; diff --git a/src/core/ddsi/src/ddsi_init.c b/src/core/ddsi/src/ddsi_init.c index f9426dac6e..3aa0247548 100644 --- a/src/core/ddsi/src/ddsi_init.c +++ b/src/core/ddsi/src/ddsi_init.c @@ -1128,6 +1128,8 @@ int ddsi_init (struct ddsi_domaingv *gv, struct ddsi_psmx_instance_locators *psm case DDSI_TRANS_TCP6: gv->config.publish_uc_locators = (gv->config.tcp_port != -1); gv->config.enable_uc_locators = 1; + /* Discovery on TCP works different, there's no point in trying to ping client ports after they have disappeared */ + gv->config.spdp_prune_delay_discovered = 0; /* TCP affects what features are supported/required */ gv->config.many_sockets_mode = DDSI_MSM_SINGLE_UNICAST; if (ddsi_tcp_init (gv) < 0) diff --git a/src/core/ddsi/src/ddsi_participant.c b/src/core/ddsi/src/ddsi_participant.c index 152b2e4aca..17c796cee3 100644 --- a/src/core/ddsi/src/ddsi_participant.c +++ b/src/core/ddsi/src/ddsi_participant.c @@ -690,7 +690,11 @@ void ddsi_unref_participant (struct ddsi_participant *pp, const struct ddsi_guid ddsi_delete_xevent (pp->pmd_update_xevent); // FIXME: locking - update_participant_spdp_sample (pp, false); + if (update_participant_spdp_sample (pp, false) != DDS_RETCODE_OK) + { + // Pretty much harmless to fail at this: the others will eventually see a lease expiry + GVTRACE ("failed to construct dispose/unregister SPDP sample for participant "PGUIDFMT"\n", PGUID (pp->e.guid)); + } ddsi_spdp_unregister_participant (gv->spdp_schedule, pp); ddsi_serdata_unref (pp->spdp_serdata); @@ -997,9 +1001,16 @@ dds_return_t ddsi_new_participant (ddsi_guid_t *ppguid, struct ddsi_domaingv *gv if (!(flags & RTPS_PF_NO_BUILTIN_WRITERS) || !(flags & RTPS_PF_NO_PRIVILEGED_PP)) { - update_participant_spdp_sample (pp, true); - if (ddsi_spdp_register_participant (gv->spdp_schedule, pp) != 0) + if (update_participant_spdp_sample (pp, true) != DDS_RETCODE_OK) + { + GVTRACE ("failed to construct dispose/unregister SPDP sample for participant "PGUIDFMT"\n", PGUID (pp->e.guid)); + abort (); // FIXME + } + if (ddsi_spdp_register_participant (gv->spdp_schedule, pp) != DDS_RETCODE_OK) + { + GVTRACE ("failed to register participant "PGUIDFMT" with SPDP scheduling\n", PGUID (pp->e.guid)); abort (); // FIXME + } ddsrt_mtime_t tsched = (pp->plist->qos.liveliness.lease_duration == DDS_INFINITY) ? DDSRT_MTIME_NEVER : (ddsrt_mtime_t){0}; struct ddsi_write_pmd_message_xevent_cb_arg arg = { .pp_guid = pp->e.guid }; @@ -1045,7 +1056,13 @@ void ddsi_update_participant_plist (struct ddsi_participant *pp, const ddsi_plis { ddsrt_mutex_lock (&pp->e.lock); if (ddsi_update_qos_locked (&pp->e, &pp->plist->qos, &plist->qos, ddsrt_time_wallclock ())) - update_participant_spdp_sample_locked (pp, true); + { + if (update_participant_spdp_sample_locked (pp, true) != DDS_RETCODE_OK) + { + ETRACE (pp, "failed to construct updated SPDP sample for participant "PGUIDFMT" after QoS update\n", PGUID (pp->e.guid)); + abort (); // can't undo QoS update, but can't published updated QoS either ... + } + } ddsrt_mutex_unlock (&pp->e.lock); ddsi_spdp_force_republish (pp->e.gv->spdp_schedule, pp, NULL); } diff --git a/src/core/ddsi/src/ddsi_proxy_participant.c b/src/core/ddsi/src/ddsi_proxy_participant.c index eba194653a..c033cfc5ce 100644 --- a/src/core/ddsi/src/ddsi_proxy_participant.c +++ b/src/core/ddsi/src/ddsi_proxy_participant.c @@ -67,7 +67,7 @@ static void maybe_update_as_disc_for_proxypp (struct ddsi_domaingv *gv, const st switch (op) { case MUADFPOP_ADD: - if (ddsi_spdp_ref_locator (gv->spdp_schedule, &loc) != DDS_RETCODE_OK) + if (ddsi_spdp_ref_locator (gv->spdp_schedule, &loc, gv->config.spdp_prune_delay_discovered) != DDS_RETCODE_OK) abort (); // FIXME: propagate and delete proxy participant break; case MUADFPOP_REMOVE_ON_DELETE: diff --git a/src/core/ddsi/src/ddsi_spdp_schedule.c b/src/core/ddsi/src/ddsi_spdp_schedule.c index 29703e1796..023f0f745d 100644 --- a/src/core/ddsi/src/ddsi_spdp_schedule.c +++ b/src/core/ddsi/src/ddsi_spdp_schedule.c @@ -105,12 +105,13 @@ X struct spdp_loc_common { ddsrt_avl_node_t avlnode; // indexed on address ddsi_xlocator_t xloc; // the address + dds_duration_t prune_delay; }; struct spdp_loc_aging { struct spdp_loc_common c; ddsrt_mtime_t tsched; // time at which to ping this locator again - uint32_t age; // decremented, entry is deleted when it reaches 0 + ddsrt_mtime_t tprune; // decremented, entry is deleted when it reaches 0 }; struct spdp_loc_live { @@ -149,6 +150,10 @@ struct spdp_admin { ddsrt_avl_tree_t pp; }; +struct handle_locators_xevent_arg { + struct spdp_admin *adm; +}; + static int compare_xlocators_vwrap (const void *va, const void *vb) { return ddsi_compare_xlocators (va, vb); } @@ -162,7 +167,7 @@ static int compare_spdp_pp (const void *va, const void *vb) { static const ddsrt_avl_treedef_t spdp_loc_td = DDSRT_AVL_TREEDEF_INITIALIZER(offsetof (union spdp_loc_union, c.avlnode), offsetof (union spdp_loc_union, c.xloc), compare_xlocators_vwrap, NULL); static const ddsrt_avl_treedef_t spdp_pp_td = DDSRT_AVL_TREEDEF_INITIALIZER(offsetof (struct spdp_pp, avlnode), 0, compare_spdp_pp, NULL); -static dds_return_t add_peer_address_xlocator (struct spdp_admin *adm, const ddsi_xlocator_t *xloc) +static dds_return_t add_peer_address_xlocator (struct spdp_admin *adm, const ddsi_xlocator_t *xloc, dds_duration_t prune_delay) { // Used for initial addresses only. These are all inserted as "aging" so there cannot // be any live addresses yet. @@ -176,10 +181,18 @@ static dds_return_t add_peer_address_xlocator (struct spdp_admin *adm, const dds } avlpath; if ((n = ddsrt_avl_lookup_ipath (&spdp_loc_td, &adm->aging, xloc, &avlpath.ip)) != NULL) { + // duplicate: take the maximum prune_delay, that seems a reasonable-enough approach + if (prune_delay > n->c.prune_delay) + n->c.prune_delay = prune_delay; } else if ((n = ddsrt_malloc_s (sizeof (*n))) != NULL) { + const ddsrt_mtime_t tnow = ddsrt_time_monotonic (); n->c.xloc = *xloc; + n->c.prune_delay = prune_delay; + n->aging.tprune = ddsrt_mtime_add_duration (tnow, prune_delay); + // FIXME: initial schedule, should be "NEVER" in the absence of participants (but that isn't going to happen) + n->aging.tsched = ddsrt_mtime_add_duration (tnow, DDS_MSECS (100)); ddsrt_avl_insert_ipath (&spdp_loc_td, &adm->aging, n, &avlpath.ip); } else @@ -190,7 +203,7 @@ static dds_return_t add_peer_address_xlocator (struct spdp_admin *adm, const dds return ret; } -static dds_return_t add_peer_address_ports_interface (struct spdp_admin *adm, const ddsi_locator_t *loc) +static dds_return_t add_peer_address_ports_interface (struct spdp_admin *adm, const ddsi_locator_t *loc, dds_duration_t prune_delay) { struct ddsi_domaingv const * const gv = adm->gv; dds_return_t rc = DDS_RETCODE_OK; @@ -204,7 +217,7 @@ static dds_return_t add_peer_address_ports_interface (struct spdp_admin *adm, co if (ddsi_factory_supports (gv->xmit_conns[i]->m_factory, loc->kind)) rc = add_peer_address_xlocator (adm, &(const ddsi_xlocator_t) { .conn = gv->xmit_conns[i], - .c = *loc }); + .c = *loc }, prune_delay); } } else @@ -235,13 +248,13 @@ static dds_return_t add_peer_address_ports_interface (struct spdp_admin *adm, co const int i = (interf_idx >= 0) ? interf_idx : fallback_interf_idx; rc = add_peer_address_xlocator (adm, &(const ddsi_xlocator_t) { .conn = gv->xmit_conns[i], - .c = *loc }); + .c = *loc }, prune_delay); } } return rc; } -static dds_return_t add_peer_address_ports (struct spdp_admin *adm, ddsi_locator_t *loc) +static dds_return_t add_peer_address_ports (struct spdp_admin *adm, ddsi_locator_t *loc, dds_duration_t prune_delay) { struct ddsi_domaingv const * const gv = adm->gv; struct ddsi_tran_factory * const tran = ddsi_factory_find_supported_kind (gv, loc->kind); @@ -268,19 +281,19 @@ static dds_return_t add_peer_address_ports (struct spdp_admin *adm, ddsi_locator } GVLOG (DDS_LC_CONFIG, "add_peer_address: add %s", ddsi_locator_to_string (buf, sizeof (buf), loc)); - rc = add_peer_address_ports_interface (adm, loc); + rc = add_peer_address_ports_interface (adm, loc, prune_delay); for (int32_t i = 1; i < maxidx && rc == DDS_RETCODE_OK; i++) { ddsi_tran_set_locator_port (tran, loc, ddsi_get_port (&gv->config, DDSI_PORT_UNI_DISC, i)); if (i + 1 == maxidx) GVLOG (DDS_LC_CONFIG, " ... :%"PRIu32, loc->port); - rc = add_peer_address_ports_interface (adm, loc); + rc = add_peer_address_ports_interface (adm, loc, prune_delay); } - GVLOG (DDS_LC_CONFIG, "\n"); + GVLOG (DDS_LC_CONFIG, " (prune delay %"PRId64")\n", prune_delay); return rc; } -static dds_return_t add_peer_address (struct spdp_admin *adm, const char *addrs) +static dds_return_t add_peer_address (struct spdp_admin *adm, const char *addrs, dds_duration_t prune_delay) { DDSRT_WARNING_MSVC_OFF(4996); struct ddsi_domaingv const * const gv = adm->gv; @@ -305,7 +318,7 @@ static dds_return_t add_peer_address (struct spdp_admin *adm, const char *addrs) GVERROR ("add_peer_address: %s: address family mismatch\n", a); goto error; } - if ((rc = add_peer_address_ports (adm, &loc)) < 0) + if ((rc = add_peer_address_ports (adm, &loc, prune_delay)) < 0) { goto error; } @@ -322,7 +335,8 @@ static dds_return_t add_peer_addresses (struct spdp_admin *adm, const struct dds dds_return_t rc = DDS_RETCODE_OK; while (list && rc == DDS_RETCODE_OK) { - rc = add_peer_address (adm, list->peer); + const dds_duration_t prune_delay = list->prune_delay.isdefault ? adm->gv->config.spdp_prune_delay_initial : list->prune_delay.value; + rc = add_peer_address (adm, list->peer, prune_delay); list = list->next; } return rc; @@ -332,29 +346,44 @@ static dds_return_t populate_initial_addresses (struct spdp_admin *adm, bool add { struct ddsi_domaingv const * const gv = adm->gv; dds_return_t rc = DDS_RETCODE_OK; - for (int i = 0; i < gv->n_interfaces && rc == DDS_RETCODE_OK; i++) + + // There is no difference in the resulting configuration if the config.peers gets added + // first or the automatic localhost is: because it takes the maximum of the prune delay. + if (rc == DDS_RETCODE_OK && gv->config.peers) { - if ((gv->interfaces[i].allow_multicast & DDSI_AMC_SPDP) && - ddsi_factory_supports (gv->xmit_conns[i]->m_factory, gv->loc_spdp_mc.kind)) - { - const ddsi_xlocator_t xloc = { .conn = gv->xmit_conns[i], .c = gv->loc_spdp_mc }; - // multicast discovery addresses never expire - rc = ddsi_spdp_ref_locator (adm, &xloc); - } + rc = add_peer_addresses (adm, gv->config.peers); } if (rc == DDS_RETCODE_OK && add_localhost) { struct ddsi_config_peer_listelem peer_local; char local_addr[DDSI_LOCSTRLEN]; ddsi_locator_to_string_no_port (local_addr, sizeof (local_addr), &gv->interfaces[0].loc); - GVTRACE ("adding self (%s)\n", local_addr); + GVLOG (DDS_LC_CONFIG, "adding self (%s)\n", local_addr); peer_local.next = NULL; peer_local.peer = local_addr; + peer_local.prune_delay.isdefault = true; rc = add_peer_addresses (adm, &peer_local); } - if (rc == DDS_RETCODE_OK && gv->config.peers) + + // Add default multicast addresses for interfaces on which multicast SPDP is enabled only + // once all initial addresses have been added: that way, "add_peer_addresses" can assert + // that the live tree is still empty and trivially guarantee the invariant that the set of + // live ones is disjoint from the set of aging ones. + // + // ddsi_spdp_ref_locator takes care to move addresses over from the aging ones to the live + // ones, and so we need not worry about someone adding the multicast as a peer. + for (int i = 0; rc == DDS_RETCODE_OK && i < gv->n_interfaces; i++) { - rc = add_peer_addresses (adm, gv->config.peers); + if ((gv->interfaces[i].allow_multicast & DDSI_AMC_SPDP) && + ddsi_factory_supports (gv->xmit_conns[i]->m_factory, gv->loc_spdp_mc.kind)) + { + const ddsi_xlocator_t xloc = { .conn = gv->xmit_conns[i], .c = gv->loc_spdp_mc }; + // multicast discovery addresses never expire + char buf[DDSI_LOCSTRLEN]; + GVLOG (DDS_LC_CONFIG, "interface %s has spdp multicast enabled, adding %s (never expiring)\n", + gv->interfaces[i].name, ddsi_xlocator_to_string (buf, sizeof (buf), &xloc)); + rc = ddsi_spdp_ref_locator (adm, &xloc, DDS_INFINITY); + } } return rc; } @@ -373,18 +402,20 @@ struct spdp_admin *ddsi_spdp_scheduler_new (struct ddsi_domaingv *gv, bool add_l if (populate_initial_addresses (adm, add_localhost) != DDS_RETCODE_OK) { ddsrt_avl_free (&spdp_loc_td, &adm->live, ddsrt_free); - // FIXME: need to do aging of initial locators, too ddsrt_avl_free (&spdp_loc_td, &adm->aging, ddsrt_free); ddsrt_mutex_destroy (&adm->lock); ddsrt_free (adm); return NULL; } - - // from here on we potentially have multiple threads messing with `adm` - const ddsrt_mtime_t t_sched = ddsrt_mtime_add_duration (ddsrt_time_monotonic (), DDS_MSECS (0)); - adm->aging_xev = ddsi_qxev_callback (gv->xevents, t_sched, ddsi_spdp_handle_aging_locators_xevent_cb, NULL, 0, true); - adm->live_xev = ddsi_qxev_callback (gv->xevents, t_sched, ddsi_spdp_handle_live_locators_xevent_cb, NULL, 0, true); - return adm; + else + { + // from here on we potentially have multiple threads messing with `adm` + const ddsrt_mtime_t t_sched = ddsrt_mtime_add_duration (ddsrt_time_monotonic (), DDS_MSECS (0)); + struct handle_locators_xevent_arg arg = { .adm = adm }; + adm->aging_xev = ddsi_qxev_callback (adm->gv->xevents, t_sched, ddsi_spdp_handle_aging_locators_xevent_cb, &arg, sizeof (arg), true); + adm->live_xev = ddsi_qxev_callback (adm->gv->xevents, t_sched, ddsi_spdp_handle_live_locators_xevent_cb, &arg, sizeof (arg), true); + return adm; + } } void ddsi_spdp_scheduler_delete (struct spdp_admin *adm) @@ -471,7 +502,7 @@ void ddsi_spdp_unregister_participant (struct spdp_admin *adm, const struct ddsi ddsrt_mutex_unlock (&adm->lock); } -dds_return_t ddsi_spdp_ref_locator (struct spdp_admin *adm, const ddsi_xlocator_t *xloc) +dds_return_t ddsi_spdp_ref_locator (struct spdp_admin *adm, const ddsi_xlocator_t *xloc, dds_duration_t prune_delay) { dds_return_t ret = DDS_RETCODE_OK; union spdp_loc_union *n; @@ -494,6 +525,7 @@ dds_return_t ddsi_spdp_ref_locator (struct spdp_admin *adm, const ddsi_xlocator_ else if ((n = ddsrt_malloc_s (sizeof (*n))) != NULL) { n->c.xloc = *xloc; + n->c.prune_delay = prune_delay; n->live.proxypp_refc = 1; n->live.lease_expiry_occurred = false; ddsrt_avl_insert_ipath (&spdp_loc_td, &adm->live, n, &avlpath.ip); @@ -530,7 +562,7 @@ void ddsi_spdp_unref_locator (struct spdp_admin *adm, const ddsi_xlocator_t *xlo // reasonable to assume they would all have discovered us at the same time (true for // Cyclone anyway) and so there won't be anything at this locator until a new one is // created. For that case, we can reasonably rely on that new one. - if (!n->live.lease_expiry_occurred) + if (!n->live.lease_expiry_occurred || n->c.prune_delay == 0) ddsrt_free (n); else { @@ -538,8 +570,9 @@ void ddsi_spdp_unref_locator (struct spdp_admin *adm, const ddsi_xlocator_t *xlo // FIXME: for those learnt along the way, an appropriate configuration setting needs to be added (here 10 times/10 minutes) // the idea is to ping at least several (10) times and keep trying for at least several (10) minutes const dds_duration_t base_intv = adm->gv->config.spdp_interval.isdefault ? DDS_SECS (30) : adm->gv->config.spdp_interval.value; - n->aging.age = (base_intv < 1 || base_intv >= DDS_SECS (60)) ? 10 : (uint32_t) (DDS_SECS (600) / base_intv); - n->aging.tsched = ddsrt_mtime_add_duration (ddsrt_time_monotonic (), base_intv); + const ddsrt_mtime_t tnow = ddsrt_time_monotonic (); + n->aging.tprune = ddsrt_mtime_add_duration (tnow, n->c.prune_delay); + n->aging.tsched = ddsrt_mtime_add_duration (tnow, base_intv); ddsrt_avl_insert (&spdp_loc_td, &adm->aging, n); } } @@ -613,52 +646,53 @@ static void resend_spdp (struct ddsi_xpack *xp, const struct ddsi_participant *p ddsrt_nonnull_all static ddsrt_mtime_t spdp_do_aging_locators (struct spdp_admin *adm, struct ddsi_xpack *xp, ddsrt_mtime_t tnow) { + struct ddsi_domaingv * const gv = adm->gv; const dds_duration_t t_coalesce = DDS_SECS (1); // aging, so low rate const ddsrt_mtime_t t_cutoff = ddsrt_mtime_add_duration (tnow, t_coalesce); ddsrt_mtime_t t_sched = DDSRT_MTIME_NEVER; ddsrt_mutex_lock (&adm->lock); - struct spdp_loc_aging *n = ddsrt_avl_find_max (&spdp_loc_td, &adm->aging); + struct spdp_loc_aging *n = ddsrt_avl_find_min (&spdp_loc_td, &adm->aging); while (n != NULL) { struct spdp_loc_aging * const nextn = ddsrt_avl_find_succ (&spdp_loc_td, &adm->aging, n); - if (t_cutoff.v < n->tsched.v) + if (n->tprune.v <= tnow.v) { - if (n->tsched.v < t_sched.v) - t_sched = n->tsched; + char buf[DDSI_LOCSTRLEN]; + GVLOGDISC("pruning SPDP locator %s\n", ddsi_xlocator_to_string (buf, sizeof (buf), &n->c.xloc)); + ddsrt_avl_delete (&spdp_loc_td, &adm->aging, n); + ddsrt_free (n); } else { - ddsrt_avl_iter_t it; - for (struct spdp_pp *ppn = ddsrt_avl_iter_first (&spdp_pp_td, &adm->pp, &it); ppn; ppn = ddsrt_avl_iter_next (&it)) - resend_spdp (xp, ppn->pp, &(struct resend_spdp_dst){ .kind = RSDK_LOCATOR, .u = { .xloc = &n->c.xloc }}); - // Why do we keep trying an address where there used to be one if there's no one anymore? Well, - // there might be someone else in the same situation with the cable cut ... - // - // That of course is interesting only if the disappearance of that node was detected by a lease - // expiration. If all participants behind the locator tell us they will be gone, it is a - // different situation: then we can (possibly) delete it immedialy. - // - // There is also the case of the initial set of addresses, there we (probably) want to drop the - // rate over time, but let's not do so now. - // - // So the strategy is to resend for a long time with a decreasing frequency - // default interval is 30s FIXME: tweak configurability - // exponential back-off would be the classic trick but is very rapid - // in this case, arguably, just pinging at the default frequency for - // a limited amount of time seems to make more sense - if (--n->age == 0) - { - ddsrt_avl_delete (&spdp_loc_td, &adm->aging, n); - ddsrt_free (n); - } - else + if (n->tsched.v <= t_cutoff.v) { + ddsrt_avl_iter_t it; + for (struct spdp_pp *ppn = ddsrt_avl_iter_first (&spdp_pp_td, &adm->pp, &it); ppn; ppn = ddsrt_avl_iter_next (&it)) + resend_spdp (xp, ppn->pp, &(struct resend_spdp_dst){ .kind = RSDK_LOCATOR, .u = { .xloc = &n->c.xloc }}); + // Why do we keep trying an address where there used to be one if there's no one anymore? Well, + // there might be someone else in the same situation with the cable cut ... + // + // That of course is interesting only if the disappearance of that node was detected by a lease + // expiration. If all participants behind the locator tell us they will be gone, it is a + // different situation: then we can (possibly) delete it immedialy. + // + // There is also the case of the initial set of addresses, there we (probably) want to drop the + // rate over time, but let's not do so now. + // + // So the strategy is to resend for a long time with a decreasing frequency + // default interval is 30s FIXME: tweak configurability + // exponential back-off would be the classic trick but is very rapid + // in this case, arguably, just pinging at the default frequency for + // a limited amount of time seems to make more sense // FIXME: this base_intv thing needs to be done smarter, or else combined with other places - const dds_duration_t base_intv = adm->gv->config.spdp_interval.isdefault ? DDS_SECS (30) : adm->gv->config.spdp_interval.value; + const dds_duration_t base_intv = gv->config.spdp_interval.isdefault ? DDS_SECS (30) : gv->config.spdp_interval.value; n->tsched = ddsrt_mtime_add_duration (tnow, base_intv); - if (n->tsched.v < t_sched.v) - t_sched = n->tsched; } + // Next time we should look at the aging locators: the first to be scheduled or pruned + if (n->tsched.v < t_sched.v) + t_sched = n->tsched; + if (n->tprune.v <= t_sched.v) + t_sched = n->tprune; } n = nextn; } @@ -723,15 +757,17 @@ static ddsrt_mtime_t spdp_do_live_locators (struct spdp_admin *adm, struct ddsi_ void ddsi_spdp_handle_aging_locators_xevent_cb (struct ddsi_domaingv *gv, struct ddsi_xevent *xev, struct ddsi_xpack *xp, void *varg, ddsrt_mtime_t tnow) { - (void) varg; - const ddsrt_mtime_t t_sched = spdp_do_aging_locators (gv->spdp_schedule, xp, tnow); + struct handle_locators_xevent_arg * const arg = varg; + (void) gv; + const ddsrt_mtime_t t_sched = spdp_do_aging_locators (arg->adm, xp, tnow); ddsi_resched_xevent_if_earlier (xev, t_sched); } void ddsi_spdp_handle_live_locators_xevent_cb (struct ddsi_domaingv *gv, struct ddsi_xevent *xev, struct ddsi_xpack *xp, void *varg, ddsrt_mtime_t tnow) { - (void) varg; - const ddsrt_mtime_t t_sched = spdp_do_live_locators (gv->spdp_schedule, xp, tnow); + struct handle_locators_xevent_arg * const arg = varg; + (void) gv; + const ddsrt_mtime_t t_sched = spdp_do_live_locators (arg->adm, xp, tnow); ddsi_resched_xevent_if_earlier (xev, t_sched); } From 653c70f2f592469f7a0cb483535f7ceb55d2509a Mon Sep 17 00:00:00 2001 From: Erik Boasson Date: Fri, 28 Jun 2024 09:21:52 +0200 Subject: [PATCH 08/13] Track whether a SPDP locator was discovered SPDP locators can be added by the configuration but also by discovering a peer that we (presumably) cannot reach via multicast. The question is what to do with such a discovered address when the peer is no longer there. If we learnt a locator through discovery and we know for certain that the peer is no longer there (i.e., received a dispose/unregister) and that there are also no other peers at the locator, it makes sense to remove the address: a new peer that shows up at that locator would presumably try to contact us. If instead the peer's lease expired, we need to keep pinging it for a some time in case it was just the disconnect. So for each SPDP locator we need to know why we have it in the table of addreses. Signed-off-by: Erik Boasson --- src/core/ddsi/src/ddsi__spdp_schedule.h | 2 +- src/core/ddsi/src/ddsi_proxy_participant.c | 2 +- src/core/ddsi/src/ddsi_spdp_schedule.c | 81 ++++++++++++++-------- 3 files changed, 56 insertions(+), 29 deletions(-) diff --git a/src/core/ddsi/src/ddsi__spdp_schedule.h b/src/core/ddsi/src/ddsi__spdp_schedule.h index fd32c32ab4..19190f608d 100644 --- a/src/core/ddsi/src/ddsi__spdp_schedule.h +++ b/src/core/ddsi/src/ddsi__spdp_schedule.h @@ -36,7 +36,7 @@ dds_return_t ddsi_spdp_register_participant (struct spdp_admin *adm, const struc void ddsi_spdp_unregister_participant (struct spdp_admin *adm, const struct ddsi_participant *pp) ddsrt_nonnull_all; -dds_return_t ddsi_spdp_ref_locator (struct spdp_admin *adm, const ddsi_xlocator_t *xloc, dds_duration_t prune_delay) +dds_return_t ddsi_spdp_ref_locator (struct spdp_admin *adm, const ddsi_xlocator_t *xloc, bool discovered) ddsrt_nonnull_all ddsrt_attribute_warn_unused_result; void ddsi_spdp_unref_locator (struct spdp_admin *adm, const ddsi_xlocator_t *xloc, bool on_lease_expiry) diff --git a/src/core/ddsi/src/ddsi_proxy_participant.c b/src/core/ddsi/src/ddsi_proxy_participant.c index c033cfc5ce..333c638c6a 100644 --- a/src/core/ddsi/src/ddsi_proxy_participant.c +++ b/src/core/ddsi/src/ddsi_proxy_participant.c @@ -67,7 +67,7 @@ static void maybe_update_as_disc_for_proxypp (struct ddsi_domaingv *gv, const st switch (op) { case MUADFPOP_ADD: - if (ddsi_spdp_ref_locator (gv->spdp_schedule, &loc, gv->config.spdp_prune_delay_discovered) != DDS_RETCODE_OK) + if (ddsi_spdp_ref_locator (gv->spdp_schedule, &loc, true) != DDS_RETCODE_OK) abort (); // FIXME: propagate and delete proxy participant break; case MUADFPOP_REMOVE_ON_DELETE: diff --git a/src/core/ddsi/src/ddsi_spdp_schedule.c b/src/core/ddsi/src/ddsi_spdp_schedule.c index 023f0f745d..c9474cdd97 100644 --- a/src/core/ddsi/src/ddsi_spdp_schedule.c +++ b/src/core/ddsi/src/ddsi_spdp_schedule.c @@ -105,13 +105,13 @@ X struct spdp_loc_common { ddsrt_avl_node_t avlnode; // indexed on address ddsi_xlocator_t xloc; // the address - dds_duration_t prune_delay; + bool discovered; // true iff we discovered the existence of this locator through discovery + ddsrt_mtime_t tprune; // pruning only occurs for "aging" locators; set to 0 when discovering a new address }; struct spdp_loc_aging { struct spdp_loc_common c; ddsrt_mtime_t tsched; // time at which to ping this locator again - ddsrt_mtime_t tprune; // decremented, entry is deleted when it reaches 0 }; struct spdp_loc_live { @@ -173,6 +173,8 @@ static dds_return_t add_peer_address_xlocator (struct spdp_admin *adm, const dds // be any live addresses yet. assert (ddsrt_avl_is_empty (&adm->live)); dds_return_t ret = DDS_RETCODE_OK; + const ddsrt_mtime_t tnow = ddsrt_time_monotonic (); + const ddsrt_mtime_t tprune = ddsrt_mtime_add_duration (tnow, prune_delay); union spdp_loc_union *n; ddsrt_mutex_lock (&adm->lock); union { @@ -182,15 +184,14 @@ static dds_return_t add_peer_address_xlocator (struct spdp_admin *adm, const dds if ((n = ddsrt_avl_lookup_ipath (&spdp_loc_td, &adm->aging, xloc, &avlpath.ip)) != NULL) { // duplicate: take the maximum prune_delay, that seems a reasonable-enough approach - if (prune_delay > n->c.prune_delay) - n->c.prune_delay = prune_delay; + if (tprune.v > n->c.tprune.v) + n->c.tprune = tprune; } else if ((n = ddsrt_malloc_s (sizeof (*n))) != NULL) { - const ddsrt_mtime_t tnow = ddsrt_time_monotonic (); n->c.xloc = *xloc; - n->c.prune_delay = prune_delay; - n->aging.tprune = ddsrt_mtime_add_duration (tnow, prune_delay); + n->c.tprune = tprune; + n->c.discovered = false; // FIXME: initial schedule, should be "NEVER" in the absence of participants (but that isn't going to happen) n->aging.tsched = ddsrt_mtime_add_duration (tnow, DDS_MSECS (100)); ddsrt_avl_insert_ipath (&spdp_loc_td, &adm->aging, n, &avlpath.ip); @@ -382,7 +383,7 @@ static dds_return_t populate_initial_addresses (struct spdp_admin *adm, bool add char buf[DDSI_LOCSTRLEN]; GVLOG (DDS_LC_CONFIG, "interface %s has spdp multicast enabled, adding %s (never expiring)\n", gv->interfaces[i].name, ddsi_xlocator_to_string (buf, sizeof (buf), &xloc)); - rc = ddsi_spdp_ref_locator (adm, &xloc, DDS_INFINITY); + rc = ddsi_spdp_ref_locator (adm, &xloc, false); } } return rc; @@ -502,10 +503,12 @@ void ddsi_spdp_unregister_participant (struct spdp_admin *adm, const struct ddsi ddsrt_mutex_unlock (&adm->lock); } -dds_return_t ddsi_spdp_ref_locator (struct spdp_admin *adm, const ddsi_xlocator_t *xloc, dds_duration_t prune_delay) +dds_return_t ddsi_spdp_ref_locator (struct spdp_admin *adm, const ddsi_xlocator_t *xloc, bool discovered) { dds_return_t ret = DDS_RETCODE_OK; union spdp_loc_union *n; + char locstr[DDSI_LOCSTRLEN]; + struct ddsi_domaingv * const gv = adm->gv; ddsrt_mutex_lock (&adm->lock); union { ddsrt_avl_ipath_t ip; @@ -517,18 +520,22 @@ dds_return_t ddsi_spdp_ref_locator (struct spdp_admin *adm, const ddsi_xlocator_ n->live.proxypp_refc = 1; n->live.lease_expiry_occurred = false; ddsrt_avl_insert (&spdp_loc_td, &adm->live, n); + GVTRACE ("spdp: ref aging loc %s, now live (refc = %"PRIu32", tprune = %"PRId64")\n", ddsi_xlocator_to_string (locstr, sizeof (locstr), xloc), n->live.proxypp_refc, n->c.tprune.v); } else if ((n = ddsrt_avl_lookup_ipath (&spdp_loc_td, &adm->live, xloc, &avlpath.ip)) != NULL) { n->live.proxypp_refc++; + GVTRACE ("spdp: ref live loc %s (refc = %"PRIu32", tprune = %"PRId64")\n", ddsi_xlocator_to_string (locstr, sizeof (locstr), xloc), n->live.proxypp_refc, n->c.tprune.v); } else if ((n = ddsrt_malloc_s (sizeof (*n))) != NULL) { n->c.xloc = *xloc; - n->c.prune_delay = prune_delay; + n->c.tprune.v = 0; + n->c.discovered = discovered; n->live.proxypp_refc = 1; n->live.lease_expiry_occurred = false; ddsrt_avl_insert_ipath (&spdp_loc_td, &adm->live, n, &avlpath.ip); + GVTRACE ("spdp: new live loc %s (refc = %"PRIu32", tprune = %"PRId64")\n", ddsi_xlocator_to_string (locstr, sizeof (locstr), xloc), n->live.proxypp_refc, n->c.tprune.v); } else { @@ -545,35 +552,55 @@ void ddsi_spdp_unref_locator (struct spdp_admin *adm, const ddsi_xlocator_t *xlo union spdp_loc_union *n; ddsrt_mutex_lock (&adm->lock); ddsrt_avl_dpath_t dp; + char locstr[DDSI_LOCSTRLEN]; + struct ddsi_domaingv * const gv = adm->gv; n = ddsrt_avl_lookup_dpath (&spdp_loc_td, &adm->live, xloc, &dp); assert (n != NULL); assert (n->live.proxypp_refc > 0); if (on_lease_expiry) n->live.lease_expiry_occurred = true; - if (--n->live.proxypp_refc == 0) + if (--n->live.proxypp_refc > 0) + { + GVTRACE ("spdp: unref live loc %s (refc = %"PRIu32", tprune = %"PRId64")\n", ddsi_xlocator_to_string (locstr, sizeof (locstr), xloc), n->live.proxypp_refc, n->c.tprune.v); + } + else { ddsrt_avl_delete_dpath (&spdp_loc_td, &adm->live, n, &dp); assert (ddsrt_avl_lookup (&spdp_loc_td, &adm->aging, xloc) == NULL); - // If all proxy participants informed us they were being deleted, then we don't need to - // start aging it - // - // What if it is shortly after startup and we'd still be pinging it if there hadn't been - // a proxy participant at this address? It is unicast, so if there are others it is - // reasonable to assume they would all have discovered us at the same time (true for - // Cyclone anyway) and so there won't be anything at this locator until a new one is - // created. For that case, we can reasonably rely on that new one. - if (!n->live.lease_expiry_occurred || n->c.prune_delay == 0) + const ddsrt_mtime_t tnow = ddsrt_time_monotonic (); + if (!n->live.lease_expiry_occurred && n->c.discovered) + { + // If all proxy participants at the locator informed us they were being deleted, + // and the address is one we discovered, drop it immediately (on the assumption + // that next time it is used, we'll discover it again) + GVTRACE ("spdp: drop live loc %s: delete (explicit, discovered)\n", ddsi_xlocator_to_string (locstr, sizeof (locstr), xloc)); + ddsrt_free (n); + } + else if (!n->live.lease_expiry_occurred && n->c.tprune.v <= tnow.v) + { + // What if it is shortly after startup and we'd still be pinging it if there hadn't been + // a proxy participant at this address? It is unicast, so if there are others it is + // reasonable to assume they would all have discovered us at the same time (true for + // Cyclone anyway) and so there won't be anything at this locator until a new one is + // created. For that case, we can reasonably rely on that new one. + + // FIXME: do I want really to drop it immediately even if the address was configured and wouldn't have expired yet? no, right? + GVTRACE ("spdp: drop live loc %s: delete (%s, tprune = %"PRId64")\n", ddsi_xlocator_to_string (locstr, sizeof (locstr), xloc), n->live.lease_expiry_occurred ? "implicit" : "explicit", n->c.tprune.v); ddsrt_free (n); + } else { // FIXME: Discovery/Peers: user needs to set timeout for each locator, that should be used here for those in the initial set // FIXME: for those learnt along the way, an appropriate configuration setting needs to be added (here 10 times/10 minutes) // the idea is to ping at least several (10) times and keep trying for at least several (10) minutes const dds_duration_t base_intv = adm->gv->config.spdp_interval.isdefault ? DDS_SECS (30) : adm->gv->config.spdp_interval.value; - const ddsrt_mtime_t tnow = ddsrt_time_monotonic (); - n->aging.tprune = ddsrt_mtime_add_duration (tnow, n->c.prune_delay); + ddsrt_mtime_t tprune = ddsrt_mtime_add_duration (tnow, gv->config.spdp_prune_delay_discovered); + if (tprune.v > n->c.tprune.v) + n->c.tprune = tprune; n->aging.tsched = ddsrt_mtime_add_duration (tnow, base_intv); ddsrt_avl_insert (&spdp_loc_td, &adm->aging, n); + GVTRACE ("spdp: drop live loc %s: now aging (tprune = %"PRId64")\n", ddsi_xlocator_to_string (locstr, sizeof (locstr), xloc), n->c.tprune.v); + ddsi_resched_xevent_if_earlier (adm->aging_xev, (n->aging.tsched.v < n->c.tprune.v) ? n->aging.tsched : n->c.tprune); } } ddsrt_mutex_unlock (&adm->lock); @@ -655,10 +682,10 @@ static ddsrt_mtime_t spdp_do_aging_locators (struct spdp_admin *adm, struct ddsi while (n != NULL) { struct spdp_loc_aging * const nextn = ddsrt_avl_find_succ (&spdp_loc_td, &adm->aging, n); - if (n->tprune.v <= tnow.v) + if (n->c.tprune.v <= tnow.v) { char buf[DDSI_LOCSTRLEN]; - GVLOGDISC("pruning SPDP locator %s\n", ddsi_xlocator_to_string (buf, sizeof (buf), &n->c.xloc)); + GVLOGDISC("spdp: prune loc %s\n", ddsi_xlocator_to_string (buf, sizeof (buf), &n->c.xloc)); ddsrt_avl_delete (&spdp_loc_td, &adm->aging, n); ddsrt_free (n); } @@ -688,11 +715,11 @@ static ddsrt_mtime_t spdp_do_aging_locators (struct spdp_admin *adm, struct ddsi const dds_duration_t base_intv = gv->config.spdp_interval.isdefault ? DDS_SECS (30) : gv->config.spdp_interval.value; n->tsched = ddsrt_mtime_add_duration (tnow, base_intv); } - // Next time we should look at the aging locators: the first to be scheduled or pruned + // Next time to look at the aging locators again: the first to be scheduled or pruned if (n->tsched.v < t_sched.v) t_sched = n->tsched; - if (n->tprune.v <= t_sched.v) - t_sched = n->tprune; + if (n->c.tprune.v <= t_sched.v) + t_sched = n->c.tprune; } n = nextn; } From 0f9b718bc7df7adf74d0861f2a054f551230a10a Mon Sep 17 00:00:00 2001 From: Erik Boasson Date: Wed, 26 Jun 2024 15:43:43 +0200 Subject: [PATCH 09/13] Periodically trace outgoing message queue length This replaces the somewhat silly (and ancient) tracing of the queue length on every insert. --- src/core/ddsi/src/ddsi_xevent.c | 84 ++++++++++++++++++++++----------- 1 file changed, 57 insertions(+), 27 deletions(-) diff --git a/src/core/ddsi/src/ddsi_xevent.c b/src/core/ddsi/src/ddsi_xevent.c index 51505cd529..e06801d52a 100644 --- a/src/core/ddsi/src/ddsi_xevent.c +++ b/src/core/ddsi/src/ddsi_xevent.c @@ -95,7 +95,6 @@ struct ddsi_xeventq { ddsrt_avl_tree_t msg_xevents; struct ddsi_xevent_nt *non_timed_xmit_list_oldest; struct ddsi_xevent_nt *non_timed_xmit_list_newest; /* undefined if ..._oldest == NULL */ - size_t non_timed_xmit_list_length; size_t queued_rexmit_bytes; size_t queued_rexmit_msgs; size_t max_queued_rexmit_bytes; @@ -107,6 +106,9 @@ struct ddsi_xeventq { ddsrt_cond_t cond; size_t cum_rexmit_bytes; + size_t ntxl_length; + ddsrt_mtime_t ntxl_t_last_update; + uint64_t ntxl_length_time; }; static uint32_t xevent_thread (struct ddsi_xeventq *xevq); @@ -185,7 +187,18 @@ static void forget_msg (struct ddsi_xeventq *evq, struct ddsi_xevent_nt *ev) ddsrt_avl_delete (&msg_xevents_treedef, &evq->msg_xevents, ev); } -static void add_to_non_timed_xmit_list (struct ddsi_xeventq *evq, struct ddsi_xevent_nt *ev) +static void update_non_timed_list_stats (struct ddsi_xeventq *evq, int delta, ddsrt_mtime_t tnow) +{ + if (tnow.v > evq->ntxl_t_last_update.v) + { + uint64_t dt = (uint64_t) (tnow.v - evq->ntxl_t_last_update.v); + evq->ntxl_length_time += dt * evq->ntxl_length; + } + evq->ntxl_length += (size_t) delta; + evq->ntxl_t_last_update = tnow; +} + +static void add_to_non_timed_xmit_list (struct ddsi_xeventq *evq, struct ddsi_xevent_nt *ev, ddsrt_mtime_t tnow) { ev->listnode.next = NULL; if (evq->non_timed_xmit_list_oldest == NULL) { @@ -195,7 +208,7 @@ static void add_to_non_timed_xmit_list (struct ddsi_xeventq *evq, struct ddsi_xe evq->non_timed_xmit_list_newest->listnode.next = ev; } evq->non_timed_xmit_list_newest = ev; - evq->non_timed_xmit_list_length++; + update_non_timed_list_stats (evq, 1, tnow); if (ev->kind == XEVK_MSG_REXMIT) remember_msg (evq, ev); @@ -203,14 +216,14 @@ static void add_to_non_timed_xmit_list (struct ddsi_xeventq *evq, struct ddsi_xe ddsrt_cond_broadcast (&evq->cond); } -static struct ddsi_xevent_nt *getnext_from_non_timed_xmit_list (struct ddsi_xeventq *evq) +static struct ddsi_xevent_nt *getnext_from_non_timed_xmit_list (struct ddsi_xeventq *evq, ddsrt_mtime_t tnow) { /* function removes and returns the first item in the list (from the front) and frees the container */ struct ddsi_xevent_nt *ev = evq->non_timed_xmit_list_oldest; if (ev != NULL) { - evq->non_timed_xmit_list_length--; + update_non_timed_list_stats (evq, -1, tnow); evq->non_timed_xmit_list_oldest = ev->listnode.next; if (ev->kind == XEVK_MSG_REXMIT) @@ -398,13 +411,12 @@ static void qxev_insert (struct ddsi_xevent *ev) } } -static void qxev_insert_nt (struct ddsi_xevent_nt *ev) +static void qxev_insert_nt (struct ddsi_xevent_nt *ev, ddsrt_mtime_t tnow) { /* qxev_insert is how all non-timed xevents are queued. */ struct ddsi_xeventq *evq = ev->evq; ASSERT_MUTEX_HELD (&evq->lock); - add_to_non_timed_xmit_list (evq, ev); - EVQTRACE (" (%"PRIuSIZE" in queue)\n", evq->non_timed_xmit_list_length); + add_to_non_timed_xmit_list (evq, ev, tnow); } static int msg_xevents_cmp (const void *a, const void *b) @@ -422,7 +434,6 @@ struct ddsi_xeventq * ddsi_xeventq_new (struct ddsi_domaingv *gv, size_t max_que ddsrt_avl_init (&msg_xevents_treedef, &evq->msg_xevents); evq->non_timed_xmit_list_oldest = NULL; evq->non_timed_xmit_list_newest = NULL; - evq->non_timed_xmit_list_length = 0; evq->terminate = 0; evq->thrst = NULL; evq->max_queued_rexmit_bytes = max_queued_rexmit_bytes; @@ -434,6 +445,9 @@ struct ddsi_xeventq * ddsi_xeventq_new (struct ddsi_domaingv *gv, size_t max_que ddsrt_cond_init (&evq->cond); evq->cum_rexmit_bytes = 0; + evq->ntxl_length_time = 0; + evq->ntxl_length = 0; + evq->ntxl_t_last_update = ddsrt_time_monotonic (); return evq; } @@ -485,7 +499,7 @@ void ddsi_xeventq_free (struct ddsi_xeventq *evq) while (!non_timed_xmit_list_is_empty (evq)) { ddsi_thread_state_awake_to_awake_no_nest (ddsi_lookup_thread_state ()); - handle_nontimed_xevent (evq, getnext_from_non_timed_xmit_list (evq), xp); + handle_nontimed_xevent (evq, getnext_from_non_timed_xmit_list (evq, ddsrt_time_monotonic ()), xp); } ddsrt_mutex_unlock (&evq->lock); ddsi_xpack_send (xp, false); @@ -592,7 +606,7 @@ static void handle_xevents (struct ddsi_thread_state * const thrst, struct ddsi_ if (!non_timed_xmit_list_is_empty (xevq)) { - struct ddsi_xevent_nt *xev = getnext_from_non_timed_xmit_list (xevq); + struct ddsi_xevent_nt *xev = getnext_from_non_timed_xmit_list (xevq, tnow); ddsi_thread_state_awake_to_awake_no_nest (thrst); handle_nontimed_xevent (xevq, xev, xp); cont = true; @@ -616,38 +630,54 @@ void ddsi_xeventq_step (struct ddsi_xeventq *evq) ddsi_xpack_free (xp); } -static uint32_t xevent_thread (struct ddsi_xeventq * xevq) +static uint32_t xevent_thread (struct ddsi_xeventq * evq) { struct ddsi_thread_state * const thrst = ddsi_lookup_thread_state (); + struct ddsi_domaingv * const gv = evq->gv; ddsrt_mtime_t next_thread_cputime = { 0 }; + ddsrt_mtime_t next_print_queue_length; + ddsrt_mtime_t last_ntxl_t_last_update; + uint64_t last_ntxl_length_time = 0; + next_print_queue_length = last_ntxl_t_last_update = ddsrt_time_monotonic (); - struct ddsi_xpack * const xp = ddsi_xpack_new (xevq->gv, false); - ddsrt_mutex_lock (&xevq->lock); - while (!xevq->terminate) + struct ddsi_xpack * const xp = ddsi_xpack_new (gv, false); + ddsrt_mutex_lock (&evq->lock); + while (!evq->terminate) { ddsrt_mtime_t tnow = ddsrt_time_monotonic (); - LOG_THREAD_CPUTIME (&xevq->gv->logconfig, next_thread_cputime); + LOG_THREAD_CPUTIME (&gv->logconfig, next_thread_cputime); + if ((gv->logconfig.c.mask & DDS_LC_TRACE) && + (tnow.v >= next_print_queue_length.v && evq->ntxl_t_last_update.v > last_ntxl_t_last_update.v)) + { + // add a line to the trace at most once per second, and only when something happened + EVQTRACE("queue length %"PRIuSIZE" avg since last line %f\n", + evq->ntxl_length, + (double) (evq->ntxl_length_time - last_ntxl_length_time) / (double) (evq->ntxl_t_last_update.v - last_ntxl_t_last_update.v)); + last_ntxl_length_time = evq->ntxl_length_time; + last_ntxl_t_last_update = evq->ntxl_t_last_update; + next_print_queue_length = ddsrt_mtime_add_duration (tnow, DDS_SECS (1)); + } ddsi_thread_state_awake_fixed_domain (thrst); - handle_xevents (thrst, xevq, xp, tnow); + handle_xevents (thrst, evq, xp, tnow); /* Send to the network unlocked, as it may sleep due to bandwidth limitation */ - ddsrt_mutex_unlock (&xevq->lock); + ddsrt_mutex_unlock (&evq->lock); ddsi_xpack_send (xp, false); - ddsrt_mutex_lock (&xevq->lock); + ddsrt_mutex_lock (&evq->lock); ddsi_thread_state_asleep (thrst); - if (!non_timed_xmit_list_is_empty (xevq) || xevq->terminate) + if (!non_timed_xmit_list_is_empty (evq) || evq->terminate) { /* continue immediately */ } else { - ddsrt_mtime_t twakeup = earliest_in_xeventq (xevq); + ddsrt_mtime_t twakeup = earliest_in_xeventq (evq); if (twakeup.v == DDS_NEVER) { /* no scheduled events nor any non-timed events */ - ddsrt_cond_wait (&xevq->cond, &xevq->lock); + ddsrt_cond_wait (&evq->cond, &evq->lock); } else { @@ -656,12 +686,12 @@ static uint32_t xevent_thread (struct ddsi_xeventq * xevq) if (twakeup.v > tnow.v) { twakeup.v -= tnow.v; - ddsrt_cond_waitfor (&xevq->cond, &xevq->lock, twakeup.v); + ddsrt_cond_waitfor (&evq->cond, &evq->lock, twakeup.v); } } } } - ddsrt_mutex_unlock (&xevq->lock); + ddsrt_mutex_unlock (&evq->lock); ddsi_xpack_send (xp, false); ddsi_xpack_free (xp); return 0; @@ -675,7 +705,7 @@ void ddsi_qxev_msg (struct ddsi_xeventq *evq, struct ddsi_xmsg *msg) ddsrt_mutex_lock (&evq->lock); ev = qxev_common_nt (evq, XEVK_MSG); ev->u.msg.msg = msg; - qxev_insert_nt (ev); + qxev_insert_nt (ev, ddsrt_time_monotonic ()); ddsrt_mutex_unlock (&evq->lock); } @@ -687,7 +717,7 @@ void ddsi_qxev_nt_callback (struct ddsi_xeventq *evq, void (*cb) (void *arg), vo ev = qxev_common_nt (evq, XEVK_NT_CALLBACK); ev->u.callback.cb = cb; ev->u.callback.arg = arg; - qxev_insert_nt (ev); + qxev_insert_nt (ev, ddsrt_time_monotonic ()); ddsrt_mutex_unlock (&evq->lock); } @@ -743,7 +773,7 @@ enum ddsi_qxev_msg_rexmit_result ddsi_qxev_msg_rexmit_wrlock_held (struct ddsi_x ev->u.msg_rexmit.queued_rexmit_bytes = msg_size; evq->queued_rexmit_bytes += msg_size; evq->queued_rexmit_msgs++; - qxev_insert_nt (ev); + qxev_insert_nt (ev, ddsrt_time_monotonic ()); #if 0 GVTRACE ("AAA(%p,%"PA_PRIuSIZE")", (void *) ev, msg_size); #endif From bba519ec86c777e95edd13a5f00e3ce0f0c8eb1a Mon Sep 17 00:00:00 2001 From: Erik Boasson Date: Fri, 6 Sep 2024 16:34:11 +0200 Subject: [PATCH 10/13] EADDRNOTAVAIL is not a generic error for sendmsg Signed-off-by: Erik Boasson --- src/ddsrt/src/sockets/posix/socket.c | 1 + 1 file changed, 1 insertion(+) diff --git a/src/ddsrt/src/sockets/posix/socket.c b/src/ddsrt/src/sockets/posix/socket.c index af467cdc12..74d20a322b 100644 --- a/src/ddsrt/src/sockets/posix/socket.c +++ b/src/ddsrt/src/sockets/posix/socket.c @@ -520,6 +520,7 @@ send_error_to_retcode(int errnum) return DDS_RETCODE_OUT_OF_RESOURCES; case EHOSTUNREACH: case EHOSTDOWN: + case EADDRNOTAVAIL: return DDS_RETCODE_NO_CONNECTION; default: break; From 4cb337b67ef3316701e91524f5029cc84b147be9 Mon Sep 17 00:00:00 2001 From: Erik Boasson Date: Fri, 6 Sep 2024 16:34:56 +0200 Subject: [PATCH 11/13] Consider pinging localhost when ppidx not default Signed-off-by: Erik Boasson --- src/core/ddsi/src/ddsi_init.c | 97 ++++----- src/core/xtests/spdp_scenarios.bash | 295 ++++++++++++++++++++++++++++ 2 files changed, 345 insertions(+), 47 deletions(-) create mode 100644 src/core/xtests/spdp_scenarios.bash diff --git a/src/core/ddsi/src/ddsi_init.c b/src/core/ddsi/src/ddsi_init.c index 3aa0247548..7ebe44bec1 100644 --- a/src/core/ddsi/src/ddsi_init.c +++ b/src/core/ddsi/src/ddsi_init.c @@ -1072,6 +1072,54 @@ static void set_locator_port_if_not_unspec_locator (const struct ddsi_domaingv * } } +static void decide_participant_index_and_add_localhost_to_peers (struct ddsi_domaingv *gv, bool *add_localhost_to_initial_peers) +{ + *add_localhost_to_initial_peers = false; +#ifndef NDEBUG + for (int i = 0; i < gv->n_interfaces; i++) + { + // sanity check that by now we have eliminated "default" from allow_multicast and + // that no bits in allow_multicast are set if the interface is not capable of + // handling multicast + assert ((gv->interfaces[i].allow_multicast & DDSI_AMC_DEFAULT) == 0); + assert (gv->interfaces[i].allow_multicast == 0 || gv->interfaces[i].mc_capable); + } +#endif + bool all_allow_spdp_mc = true, none_allow_spdp_mc = true; + for (int i = 0; i < gv->n_interfaces; i++) + { + if (gv->interfaces[i].allow_multicast & DDSI_AMC_SPDP) + none_allow_spdp_mc = false; + else + all_allow_spdp_mc = false; + } + if (gv->config.participantIndex == DDSI_PARTICIPANT_INDEX_DEFAULT) + { + if (all_allow_spdp_mc && gv->config.peers == NULL) + { + GVTRACE ("all interfaces allow spdp multicast, no peers defined: defaulting participant index to \"none\"\n"); + gv->config.participantIndex = DDSI_PARTICIPANT_INDEX_NONE; + } else if (all_allow_spdp_mc) + { + GVTRACE ("all interfaces allow spdp multicast, but peers defined: defaulting participant index to \"auto\"\n"); + gv->config.participantIndex = DDSI_PARTICIPANT_INDEX_AUTO; + } + else + { + GVTRACE ("some interfaces disallow spdp multicast: defaulting participant index to \"auto\"\n"); + gv->config.participantIndex = DDSI_PARTICIPANT_INDEX_AUTO; + } + } + if (gv->config.add_localhost_to_peers == DDSI_BOOLDEF_TRUE || + (none_allow_spdp_mc && gv->config.add_localhost_to_peers != DDSI_BOOLDEF_FALSE)) + { + // add self to as_disc, but only once we have everything set up to actually do that + if (gv->config.add_localhost_to_peers == DDSI_BOOLDEF_DEFAULT) + GVTRACE ("No multicast SPDP, adding localhost to peers\n"); + *add_localhost_to_initial_peers = true; + } +} + int ddsi_init (struct ddsi_domaingv *gv, struct ddsi_psmx_instance_locators *psmx_locators) { uint32_t port_disc_uc = 0; @@ -1216,53 +1264,8 @@ int ddsi_init (struct ddsi_domaingv *gv, struct ddsi_psmx_instance_locators *psm // // No interfaces allow SPDP multicast: // - default ppidx = AUTO, default peers = { localhost } - // - // MaxAutoParticipantIndex -> 100 -+ - // UnicastSPDPInterval -> 30s |_ perhaps adding something like this - // @silentports -> 5min | would make sense? - // @dropafter -> 30min -+ - bool add_localhost_to_initial_peers = false; - if (gv->config.participantIndex == DDSI_PARTICIPANT_INDEX_DEFAULT) - { -#ifndef NDEBUG - for (int i = 0; i < gv->n_interfaces; i++) - { - // sanity check that by now we have eliminated "default" from allow_multicast and - // that no bits in allow_multicast are set if the interface is not capable of - // handling multicast - assert ((gv->interfaces[i].allow_multicast & DDSI_AMC_DEFAULT) == 0); - assert (gv->interfaces[i].allow_multicast == 0 || gv->interfaces[i].mc_capable); - } -#endif - bool all_allow_spdp_mc = true, none_allow_spdp_mc = true; - for (int i = 0; i < gv->n_interfaces; i++) - { - if (gv->interfaces[i].allow_multicast & DDSI_AMC_SPDP) - none_allow_spdp_mc = false; - else - all_allow_spdp_mc = false; - } - if (all_allow_spdp_mc && gv->config.peers == NULL) - { - GVTRACE ("all interfaces allow spdp multicast, no peers defined: defaulting participant index to \"none\"\n"); - gv->config.participantIndex = DDSI_PARTICIPANT_INDEX_NONE; - } else if (all_allow_spdp_mc) - { - GVTRACE ("all interfaces allow spdp multicast, but peers defined: defaulting participant index to \"auto\"\n"); - gv->config.participantIndex = DDSI_PARTICIPANT_INDEX_AUTO; - } - else - { - GVTRACE ("some interfaces disallow spdp multicast: defaulting participant index to \"auto\"\n"); - gv->config.participantIndex = DDSI_PARTICIPANT_INDEX_AUTO; - } - if (gv->config.add_localhost_to_peers == DDSI_BOOLDEF_TRUE || - (none_allow_spdp_mc && gv->config.add_localhost_to_peers != DDSI_BOOLDEF_FALSE)) - { - // add self to as_disc, but only once we have everything set up to actually do that - add_localhost_to_initial_peers = true; - } - } + bool add_localhost_to_initial_peers; + decide_participant_index_and_add_localhost_to_peers (gv, &add_localhost_to_initial_peers); if (set_recvips (gv) < 0) goto err_set_recvips; diff --git a/src/core/xtests/spdp_scenarios.bash b/src/core/xtests/spdp_scenarios.bash new file mode 100644 index 0000000000..9f9a7cb935 --- /dev/null +++ b/src/core/xtests/spdp_scenarios.bash @@ -0,0 +1,295 @@ +cu='truetrace' + +function start () { + allowmc="$1" ; shift + spdp="0.5s$12s" ; shift + parti="$12s2s" ; shift + peers="" + if [ $# -gt 0 ] ; then + peers="" + shift + done + peers="$peers" + fi + set -x + CYCLONEDDS_URI="$cu$allowmc$spdp$parti$peerscdds.log.${#pids[@]}" bin/Debug/ddsperf sanity & pids[${#pids[@]}]=$! + set +x +} + +x=true + +# sanity check a bunch of cases where discovery should/shouldn't happen +echo "====== I.1 ========" +rm -rf cdds.log.0 cdds.log.1 +declare -a pids +start true 239.255.0.2 default +start true 239.255.0.2 default +sleep 1 +kill ${pids[@]} +wait +unset pids +grep -q 'SPDP.*NEW' cdds.log.0 || x=false +grep -q 'SPDP.*NEW' cdds.log.1 || x=false +$x || exit 1 + +echo "====== I.2 ========" +rm -rf cdds.log.0 cdds.log.1 +declare -a pids +start false 239.255.0.2 default localhost +start false 239.255.0.2 default localhost +sleep 1 +kill ${pids[@]} +wait +unset pids +grep -q 'SPDP.*NEW' cdds.log.0 || x=false +grep -q 'SPDP.*NEW' cdds.log.1 || x=false +$x || exit 1 + +echo "====== I.3 ========" +rm -rf cdds.log.0 cdds.log.1 +declare -a pids +start true 0.0.0.0 default +start true 0.0.0.0 default +sleep 1 +kill ${pids[@]} +wait +unset pids +grep -q 'SPDP.*NEW' cdds.log.0 && x=false +grep -q 'SPDP.*NEW' cdds.log.1 && x=false +$x || exit 1 + +echo "====== I.4 ========" +# no discovery happens: first one says peers defined => well-known ports, pings localhost +# second says: MC supported/allowed, no peers defined => port none +# a bit weird, but not wrong +rm -rf cdds.log.0 cdds.log.1 +declare -a pids +start true 0.0.0.0 default localhost +start true 0.0.0.0 default +sleep 1 +kill ${pids[@]} +wait +unset pids +grep -q 'SPDP.*NEW' cdds.log.0 && x=false +grep -q 'SPDP.*NEW' cdds.log.1 && x=false +$x || exit 1 + +echo "====== I.5 ========" +# no discovery happens: first one says peers defined => well-known ports, pings localhost +# second says: peers defined => well-known ports +# a bit weird, but not wrong +rm -rf cdds.log.0 cdds.log.1 +declare -a pids +start true 0.0.0.0 default localhost +start true 0.0.0.0 default "" +sleep 1 +kill ${pids[@]} +wait +unset pids +grep -q 'SPDP.*NEW' cdds.log.0 || x=false +grep -q 'SPDP.*NEW' cdds.log.1 || x=false +$x || exit 1 + +echo "====== I.6 ========" +# no discovery happens: first one says MC works => no localhost, random port +# second says: peers defined => no MC, well-known ports +# but that doesn't allow them to discover each other +rm -rf cdds.log.0 cdds.log.1 +declare -a pids +start true 239.255.0.2 default +start false 0.0.0.0 default localhost +sleep 1 +kill ${pids[@]} +wait +unset pids +grep -q 'SPDP.*NEW' cdds.log.0 && x=false +grep -q 'SPDP.*NEW' cdds.log.1 && x=false +$x || exit 1 + +echo "====== I.7 ========" +rm -rf cdds.log.0 cdds.log.1 +declare -a pids +start true 239.255.0.2 auto +start false 0.0.0.0 default +sleep 1 +kill ${pids[@]} +wait +unset pids +grep -q 'SPDP.*NEW' cdds.log.0 || x=false +grep -q 'SPDP.*NEW' cdds.log.1 || x=false +$x || exit 1 + +echo "====== I.8 ========" +# fails: second uses random port +rm -rf cdds.log.0 cdds.log.1 +declare -a pids +start false 239.255.0.2 default localhost +start true 239.255.0.1 default +sleep 1 +kill ${pids[@]} +wait +unset pids +grep -q 'SPDP.*NEW' cdds.log.0 && x=false +grep -q 'SPDP.*NEW' cdds.log.1 && x=false +$x || exit 1 + +echo "====== I.9 ========" +# now it works: participant index set to auto +rm -rf cdds.log.0 cdds.log.1 +declare -a pids +start false 239.255.0.2 default localhost +start true 239.255.0.1 auto +sleep 1 +kill ${pids[@]} +wait +unset pids +grep -q 'SPDP.*NEW' cdds.log.0 || x=false +grep -q 'SPDP.*NEW' cdds.log.1 || x=false +$x || exit 1 + +# pruning of useless initial locators +# prune delays are 2s, spdp interval = 0.5s + +echo "====== II.1 ========" +rm -rf cdds.log.0 cdds.log.1 +declare -a pids +start true 239.255.0.1 default 168.10.10.10 +start true 239.255.0.1 default +sleep 3 +kill ${pids[@]} +wait +unset pids +grep -q 'SPDP.*NEW' cdds.log.0 || x=false +grep -q 'SPDP.*NEW' cdds.log.1 || x=false +grep -q 'spdp: prune loc.*168\.10\.10\.10' cdds.log.0 || x=false +$x || exit 1 + +echo "====== II.2 ========" +rm -rf cdds.log.0 cdds.log.1 +declare -a pids +start false 239.255.0.1 0 "" +start false 239.255.0.1 1 "" +sleep 3 +kill ${pids[@]} +wait +unset pids +grep -q 'SPDP.*NEW' cdds.log.0 || x=false +grep -q 'SPDP.*NEW' cdds.log.1 || x=false +# mustn't prune the address of the existing one +grep -q 'spdp: prune loc.*127\.0\.0\.1:7412' cdds.log.0 && x=false +grep -q 'spdp: prune loc.*127\.0\.0\.1:7410' cdds.log.1 && x=false +# must prune some others +grep -q 'spdp: prune loc.*127\.0\.0\.1:7414' cdds.log.0 || x=false +grep -q 'spdp: prune loc.*127\.0\.0\.1:7414' cdds.log.1 || x=false +$x || exit 1 + +echo "====== II.3 ========" +rm -rf cdds.log.0 cdds.log.1 +declare -a pids +start false 239.255.0.1 0 localhost +start false 239.255.0.1 1 localhost +sleep 3 +kill ${pids[@]} +wait +unset pids +grep -q 'SPDP.*NEW' cdds.log.0 || x=false +grep -q 'SPDP.*NEW' cdds.log.1 || x=false +# mustn't prune the address of the existing one +grep -q 'spdp: prune loc.*127\.0\.0\.1:7412' cdds.log.0 && x=false +grep -q 'spdp: prune loc.*127\.0\.0\.1:7410' cdds.log.1 && x=false +# must prune some others +grep -q 'spdp: prune loc.*127\.0\.0\.1:7414' cdds.log.0 || x=false +grep -q 'spdp: prune loc.*127\.0\.0\.1:7414' cdds.log.1 || x=false +$x || exit 1 + +echo "====== II.4 ========" +# first stops early, second discovered the address +rm -rf cdds.log.0 cdds.log.1 +declare -a pids +start false 239.255.0.1 0 localhost +start false 239.255.0.1 1 +sleep 3 +kill ${pids[0]} +sleep 1 +kill ${pids[1]} +wait +unset pids +grep -q 'SPDP.*NEW' cdds.log.0 || x=false +grep -q 'SPDP.*NEW' cdds.log.1 || x=false +# clean termination -- verify: +grep -q 'SPDP.*ST3' cdds.log.1 || x=false +# 2s until pruning +# but only a 1s wait -> can't have pruned based on time yet +# clean termination: should've been dropped +grep -q 'spdp: drop live loc' cdds.log.1 || x=false +$x || exit 1 + +echo "====== II.5 ========" +# second stops early, first simply uses initial locator +rm -rf cdds.log.0 cdds.log.1 +declare -a pids +start false 239.255.0.1 0 localhost +start false 239.255.0.1 1 +sleep 3 +kill ${pids[1]} +sleep 1 +kill ${pids[0]} +wait +unset pids +grep -q 'SPDP.*NEW' cdds.log.0 || x=false +grep -q 'SPDP.*NEW' cdds.log.1 || x=false +# clean termination -- verify: +grep -q 'SPDP.*ST3' cdds.log.0 || x=false +# enough time to clean up unused locators +grep -q 'spdp: prune loc.*127\.0\.0\.1:7414' cdds.log.0 || x=false +# locator was in initial set +# locator kept alive by existing participant +# clean termination -> may drop it +# initial prune delay passed, so dropped immediately +grep -q 'spdp: drop live loc.*127\.0\.0\.1:7412' cdds.log.0 || x=false +$x || exit 1 + +echo "====== II.6 ========" +rm -rf cdds.log.0 cdds.log.1 +declare -a pids +start false 239.255.0.1 0 localhost +start false 239.255.0.1 1 +sleep 3 +kill -9 ${pids[0]} +sleep 3 +kill ${pids[1]} +wait +unset pids +grep -q 'SPDP.*NEW' cdds.log.0 || x=false +grep -q 'SPDP.*NEW' cdds.log.1 || x=false +# 2s until lease expiry +# 2s until pruning +# but only a 3s wait -> not pruned yet +grep -q 'spdp: prune loc' cdds.log.1 && x=false +$x || exit 1 + +echo "====== II.7 ========" +rm -rf cdds.log.0 cdds.log.1 +declare -a pids +start false 239.255.0.1 0 localhost +start false 239.255.0.1 1 +sleep 3 +kill -9 ${pids[0]} +sleep 5 +kill ${pids[1]} +wait +unset pids +grep -q 'SPDP.*NEW' cdds.log.0 || x=false +grep -q 'SPDP.*NEW' cdds.log.1 || x=false +# 2s until lease expiry +# 2s until pruning +# 5s wait -> should be pruned +grep -q 'spdp: prune loc' cdds.log.1 || x=false +$x || exit 1 From 40db7169270e4cc04e4fcd2d928286468ec88256 Mon Sep 17 00:00:00 2001 From: Erik Boasson Date: Tue, 24 Sep 2024 10:26:06 +0200 Subject: [PATCH 12/13] Peers up to and including MaxAutoParticipantIndex Signed-off-by: Erik Boasson --- src/core/ddsi/src/ddsi_spdp_schedule.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/core/ddsi/src/ddsi_spdp_schedule.c b/src/core/ddsi/src/ddsi_spdp_schedule.c index c9474cdd97..99725e11b1 100644 --- a/src/core/ddsi/src/ddsi_spdp_schedule.c +++ b/src/core/ddsi/src/ddsi_spdp_schedule.c @@ -283,10 +283,10 @@ static dds_return_t add_peer_address_ports (struct spdp_admin *adm, ddsi_locator GVLOG (DDS_LC_CONFIG, "add_peer_address: add %s", ddsi_locator_to_string (buf, sizeof (buf), loc)); rc = add_peer_address_ports_interface (adm, loc, prune_delay); - for (int32_t i = 1; i < maxidx && rc == DDS_RETCODE_OK; i++) + for (int32_t i = 1; i <= maxidx && rc == DDS_RETCODE_OK; i++) { ddsi_tran_set_locator_port (tran, loc, ddsi_get_port (&gv->config, DDSI_PORT_UNI_DISC, i)); - if (i + 1 == maxidx) + if (i == maxidx) GVLOG (DDS_LC_CONFIG, " ... :%"PRIu32, loc->port); rc = add_peer_address_ports_interface (adm, loc, prune_delay); } From 31e30cb90ec8384e4020936503ce03849f42d399 Mon Sep 17 00:00:00 2001 From: Erik Boasson Date: Mon, 9 Sep 2024 07:31:44 +0200 Subject: [PATCH 13/13] Add tests for various SPDP-related behaviours Replaces src/core/xtests/spdp_scenarios.bash --- src/core/ddsc/tests/CMakeLists.txt | 1 + src/core/ddsc/tests/spdp.c | 608 ++++++++++++++++++++++++++++ src/core/xtests/spdp_scenarios.bash | 295 -------------- 3 files changed, 609 insertions(+), 295 deletions(-) create mode 100644 src/core/ddsc/tests/spdp.c delete mode 100644 src/core/xtests/spdp_scenarios.bash diff --git a/src/core/ddsc/tests/CMakeLists.txt b/src/core/ddsc/tests/CMakeLists.txt index d4ba15ccec..48ef8b8b26 100644 --- a/src/core/ddsc/tests/CMakeLists.txt +++ b/src/core/ddsc/tests/CMakeLists.txt @@ -78,6 +78,7 @@ set(ddsc_test_sources "read_instance.c" "redundantnw.c" "register.c" + "spdp.c" "subscriber.c" "take_instance.c" "time.c" diff --git a/src/core/ddsc/tests/spdp.c b/src/core/ddsc/tests/spdp.c new file mode 100644 index 0000000000..b3bacd2c86 --- /dev/null +++ b/src/core/ddsc/tests/spdp.c @@ -0,0 +1,608 @@ +// Copyright(c) 2024 ZettaScale Technology BV +// +// This program and the accompanying materials are made available under the +// terms of the Eclipse Public License v. 2.0 which is available at +// http://www.eclipse.org/legal/epl-2.0, or the Eclipse Distribution License +// v. 1.0 which is available at +// http://www.eclipse.org/org/documents/edl-v10.php. +// +// SPDX-License-Identifier: EPL-2.0 OR BSD-3-Clause + +#include "dds/ddsrt/heap.h" +#include "dds/ddsrt/io.h" +#include "dds/ddsrt/string.h" +#include "dds/ddsrt/environ.h" +#include "dds/ddsrt/process.h" +#include "dds__entity.h" +#include "dds/ddsi/ddsi_guid.h" +#include "dds/ddsi/ddsi_entity.h" +#include "dds/ddsi/ddsi_proxy_participant.h" +#include "ddsi__lease.h" +#include "ddsi__entity_index.h" +#include "ddsi__misc.h" +#include "dds/dds.h" + +#include "test_common.h" + +typedef struct { char str[DDSI_LOCSTRLEN]; } locstr_t; + +static void get_localhost_address (locstr_t *str) +{ + const char *cyclonedds_uri = ""; + (void) ddsrt_getenv ("CYCLONEDDS_URI", &cyclonedds_uri); + dds_entity_t eh = dds_create_domain (0, cyclonedds_uri); + CU_ASSERT_FATAL (eh > 0); + const struct ddsi_domaingv *gv = get_domaingv (eh); + CU_ASSERT_FATAL (gv != NULL); + ddsi_locator_to_string_no_port (str->str, sizeof (str->str), &gv->interfaces[0].loc); + dds_return_t rc = dds_delete (eh); + CU_ASSERT_FATAL (rc == 0); +} + +static void get_nonexist_address (locstr_t *str) +{ + const char *cyclonedds_uri = ""; + (void) ddsrt_getenv ("CYCLONEDDS_URI", &cyclonedds_uri); + dds_entity_t eh = dds_create_domain (0, cyclonedds_uri); + CU_ASSERT_FATAL (eh > 0); + const struct ddsi_domaingv *gv = get_domaingv (eh); + CU_ASSERT_FATAL (gv != NULL); + // No guarantee that this really yields a locator that doesn't point to an existing machine + // running DDSI at the port number we use, but in combination with a random domain tag, + // it hopefully probably works out ok + ddsi_locator_t loc = gv->interfaces[0].loc; + assert (loc.kind == DDSI_LOCATOR_KIND_UDPv4); + if (loc.address[15] == 254) + loc.address[15] = 1; + else + loc.address[15]++; + ddsi_locator_to_string_no_port (str->str, sizeof (str->str), &loc); + dds_return_t rc = dds_delete (eh); + CU_ASSERT_FATAL (rc == 0); +} + +// returns domain handle +static dds_entity_t make_domain_and_participant (uint32_t domainid, int base_port, enum ddsi_boolean_default allow_multicast, const char *spdp_address, const char *participant_index, bool add_localhost, const locstr_t *peer_address) +{ + const char *cyclonedds_uri = ""; + (void) ddsrt_getenv ("CYCLONEDDS_URI", &cyclonedds_uri); + + char *peers = NULL; + if (add_localhost || peer_address) + { + ddsrt_asprintf (&peers, "\ +\ + %s%s%s\ +", + add_localhost ? "addlocalhost=\"true\"" : "", + peer_address ? "str : "", + peer_address ? "\"/>" : ""); + } + else + { + peers = ddsrt_strdup (""); + } + + // MaxAutoParticipantIndex = 1: we never do more than 2 participants in this test, so this + // results in one additional index that won't be used. With the base port and unicast offsets + // that also means a 10-port offset between tests makes each test use unique port numbers + // outside the usual range. + char *config = NULL; + ddsrt_asprintf (&config, "%s,\ +\ + trace\ +\ +\ + %s\ +\ +\ + %d\ + \ + %d\ + 2\ + 3\ + \ + 0\ + 0.5s\ + %s\ + 2s\ + %s\ + 2\ + 2s\ + 2s\ + %s\ +", + cyclonedds_uri, + (allow_multicast == DDSI_BOOLDEF_DEFAULT) ? "default" : (allow_multicast == DDSI_BOOLDEF_TRUE) ? "true" : "false", + (int) ddsrt_getpid (), + base_port, + spdp_address, + participant_index, + peers); + ddsrt_free (peers); + //printf ("%s\n", config); + + const dds_entity_t dom = dds_create_domain (domainid, config); + CU_ASSERT_FATAL (dom > 0); + ddsrt_free (config); + const dds_entity_t pp = dds_create_participant (domainid, NULL, NULL); + CU_ASSERT_FATAL (pp > 0); + return dom; +} + +struct logger_pat { + const char *pat; + bool present; +}; +#define MAX_PATS 4 +struct logger_arg { + struct logger_pat expected[2][MAX_PATS]; // one for each domain, max 4 patterns per domain + uint32_t found[2]; +}; + +static void logger (void *varg, const dds_log_data_t *data) +{ + struct logger_arg * const arg = varg; + fputs (data->message - data->hdrsize, stdout); + if (data->domid == 0 || data->domid == 1) + { + for (uint32_t i = 0; i < MAX_PATS && arg->expected[data->domid][i].pat != NULL; i++) + if (ddsi_patmatch (arg->expected[data->domid][i].pat, data->message)) + arg->found[data->domid] |= (uint32_t)(1 << i); + } +} + +struct one { + enum ddsi_boolean_default allowmc; + const char *spdp_address; + const char *participant_index; + bool add_localhost; + const locstr_t *peer; +}; +struct cfg { + struct one one[2]; +}; + +// Operations for constructing different tests, there's always an implied shutting down +// of all domains after the last operation +enum oper { + SLEEP_1, + SLEEP_3, + SLEEP_5, + SHUTDOWN_0, + SHUTDOWN_1, + KILL_0, + KILL_1 +}; + +static void run_one (int base_port, const struct cfg *cfg, const struct logger_arg *larg_in, size_t nopers, const enum oper opers[]) +{ + assert (larg_in->found[0] == 0 && larg_in->found[1] == 0); + struct logger_arg larg = *larg_in; + dds_set_log_mask (DDS_LC_ALL); + dds_set_log_sink (&logger, &larg); + dds_set_trace_sink (&logger, &larg); + + dds_entity_t dom[2]; + for (uint32_t d = 0; d < 2; d++) + { + dom[d] = make_domain_and_participant (d, base_port, cfg->one[d].allowmc, cfg->one[d].spdp_address, cfg->one[d].participant_index, cfg->one[d].add_localhost, cfg->one[d].peer); + } + for (size_t i = 0; i < nopers;i ++) + { + switch (opers[i]) + { + case SLEEP_1: + dds_sleepfor (DDS_SECS (1)); + break; + case SLEEP_3: + dds_sleepfor (DDS_SECS (3)); + break; + case SLEEP_5: + dds_sleepfor (DDS_SECS (5)); + break; + case SHUTDOWN_0: + dds_delete (dom[0]); + break; + case SHUTDOWN_1: + dds_delete (dom[1]); + break; + case KILL_0: + // making it deafmute, then shutting it down has the same effect + // on the remote as killing it + dds_domain_set_deafmute (dom[0], true, true, DDS_INFINITY); + dds_delete (dom[0]); + break; + case KILL_1: + dds_domain_set_deafmute (dom[1], true, true, DDS_INFINITY); + dds_delete (dom[1]); + break; + } + } + for (int d = 0; d < 2; d++) + { + // don't check returns: we may have deleted the domain already + dds_delete (dom[d]); + } + + dds_set_log_mask (0); + dds_set_log_sink (NULL, NULL); + dds_set_trace_sink (NULL, NULL); + fflush (stdout); + + bool all_ok = true; + for (uint32_t d = 0; d < 2; d++) + { + for (uint32_t i = 0; i < MAX_PATS && larg.expected[d][i].pat != NULL; i++) + { + if (((larg.found[d] & (1u << i)) != 0) != larg.expected[d][i].present) + { + printf ("dom %"PRIu32" pattern %s: %s\n", + d, larg.expected[d][i].pat, + larg.expected[d][i].present ? "missing" : "present unexpectedly"); + all_ok = false; + } + } + } + CU_ASSERT_FATAL (all_ok); +} + +static const struct logger_arg larg_mut_disc = { + .expected = { + [0] = { + { "*SPDP*NEW*", 1 } + }, + [1] = { + { "*SPDP*NEW*", 1 } + } + } +}; +static const struct logger_arg larg_mut_nodisc = { + .expected = { + [0] = { + { "*SPDP*NEW*", 0 } + }, + [1] = { + { "*SPDP*NEW*", 0 } + } + } +}; + +// assume MC works on test platform (true for CI) +// using non-standard SPDP address for no reason at all, I think + +CU_Test(ddsc_spdp, I1_mc) +{ + const int baseport = 7000; + // default: expect discovery + struct logger_arg larg = larg_mut_disc; + struct cfg cfg = { { + { DDSI_BOOLDEF_DEFAULT, "239.255.0.2", "default", false, NULL }, + { DDSI_BOOLDEF_DEFAULT, "239.255.0.2", "default", false, NULL } } + }; + run_one (baseport, &cfg, &larg, 1, (enum oper[]){ SLEEP_1 }); +} + +CU_Test(ddsc_spdp, I2_uc_lhost) +{ + const int baseport = 7010; + // no mc, localhost as peer: expect discovery + locstr_t localhost; + get_localhost_address (&localhost); + struct logger_arg larg = larg_mut_disc; + struct cfg cfg = { { + { DDSI_BOOLDEF_FALSE, "239.255.0.2", "default", false, &localhost }, + { DDSI_BOOLDEF_FALSE, "239.255.0.2", "default", false, &localhost } } + }; + run_one (baseport, &cfg, &larg, 1, (enum oper[]){ SLEEP_1 }); +} + +CU_Test(ddsc_spdp, I3_mc_spdp0_no_lhost) +{ + const int baseport = 7020; + // all mc but SPDP address = 0.0.0.0: no discovery without peers, known port + struct logger_arg larg = larg_mut_nodisc; + struct cfg cfg = { { + { DDSI_BOOLDEF_TRUE, "0.0.0.0", "default", false, NULL }, + { DDSI_BOOLDEF_TRUE, "0.0.0.0", "default", false, NULL } } + }; + run_one (baseport, &cfg, &larg, 1, (enum oper[]){ SLEEP_1 }); +} + +CU_Test(ddsc_spdp, I4_mc_spdp0_lhost_in_one) +{ + const int baseport = 7030; + // no discovery happens: first one says peers defined => well-known ports, pings localhost + // second says: MC supported/allowed, no peers defined => port none + // a bit weird, but not wrong + locstr_t localhost; + get_localhost_address (&localhost); + struct logger_arg larg = larg_mut_nodisc; + struct cfg cfg = { { + { DDSI_BOOLDEF_TRUE, "0.0.0.0", "default", false, &localhost }, + { DDSI_BOOLDEF_TRUE, "0.0.0.0", "default", false, NULL } } + }; + run_one (baseport, &cfg, &larg, 1, (enum oper[]){ SLEEP_1 }); +} + +CU_Test(ddsc_spdp, I5_mc_spdp0_lhost_one_addlhost_other) +{ + const int baseport = 7040; + // first one says peers defined => well-known ports, pings localhost + // second says: peers defined => well-known ports + locstr_t localhost; + get_localhost_address (&localhost); + struct logger_arg larg = larg_mut_disc; + struct cfg cfg = { { + { DDSI_BOOLDEF_TRUE, "0.0.0.0", "default", false, &localhost }, + { DDSI_BOOLDEF_TRUE, "0.0.0.0", "default", true, NULL } } + }; + run_one (baseport, &cfg, &larg, 1, (enum oper[]){ SLEEP_1 }); +} + +CU_Test(ddsc_spdp, I6_mc_one_uc_lhost_other) +{ + const int baseport = 7050; + // no discovery happens: first one says MC works => no localhost, random port + // second says: peers defined => no MC, well-known ports + // but that doesn't allow them to discover each other + locstr_t localhost; + get_localhost_address (&localhost); + struct logger_arg larg = larg_mut_nodisc; + struct cfg cfg = { { + { DDSI_BOOLDEF_TRUE, "239.255.0.2", "default", false, NULL }, + { DDSI_BOOLDEF_FALSE, "0.0.0.0", "default", true, &localhost } } + }; + run_one (baseport, &cfg, &larg, 1, (enum oper[]){ SLEEP_1 }); +} + +CU_Test(ddsc_spdp, I7_mc_autoidx_one_uc_other) +{ + const int baseport = 7060; + struct logger_arg larg = larg_mut_disc; + struct cfg cfg = { { + { DDSI_BOOLDEF_TRUE, "239.255.0.2", "auto", false, NULL }, + { DDSI_BOOLDEF_FALSE, "0.0.0.0", "default", true, NULL } } + }; + run_one (baseport, &cfg, &larg, 1, (enum oper[]){ SLEEP_1 }); +} + +CU_Test(ddsc_spdp, I8_uc_lhost_one_mc_other) +{ + const int baseport = 7070; + // fails to discover: second uses random port + locstr_t localhost; + get_localhost_address (&localhost); + struct logger_arg larg = larg_mut_nodisc; + struct cfg cfg = { { + { DDSI_BOOLDEF_FALSE, "239.255.0.2", "default", false, &localhost }, + { DDSI_BOOLDEF_TRUE, "239.255.0.1", "default", false, NULL } } + }; + run_one (baseport, &cfg, &larg, 1, (enum oper[]){ SLEEP_1 }); +} + +CU_Test(ddsc_spdp, I9_uc_lhost_one_mc_autoidx_other) +{ + const int baseport = 7080; + // setting participant index to auto makes I8 work + locstr_t localhost; + get_localhost_address (&localhost); + struct logger_arg larg = larg_mut_disc; + struct cfg cfg = { { + { DDSI_BOOLDEF_FALSE, "239.255.0.2", "default", false, &localhost }, + { DDSI_BOOLDEF_TRUE, "239.255.0.1", "auto", false, NULL } } + }; + run_one (baseport, &cfg, &larg, 1, (enum oper[]){ SLEEP_1 }); +} + +// pruning of useless initial locators +// prune delays are 2s, spdp interval = 0.5s + +CU_Test(ddsc_spdp, II1_mc_nonexist_peer_in_one) +{ + const int baseport = 7090; + locstr_t nonexist; + get_nonexist_address (&nonexist); + char prunemsg[200]; + const int prunemsglen = snprintf (prunemsg, sizeof (prunemsg), "*spdp: prune loc*%s*", nonexist.str); + assert (prunemsglen < (int) sizeof (prunemsg)); + (void) prunemsglen; + struct logger_arg larg = { .expected = { + [0] = { + { "*SPDP*NEW*", 1 }, + { prunemsg, 1 } + }, + [1] = { + { "*SPDP*NEW*", 1 } + } + } }; + struct cfg cfg = { { + { DDSI_BOOLDEF_TRUE, "239.255.0.1", "default", false, &nonexist }, + { DDSI_BOOLDEF_TRUE, "239.255.0.1", "default", false, NULL } } + }; + run_one (baseport, &cfg, &larg, 1, (enum oper[]){ SLEEP_3 }); +} + +CU_Test(ddsc_spdp, II2_uc_prune_only_existing) +{ + const int baseport = 7100; + locstr_t localhost; + get_localhost_address (&localhost); + char prunemsg[2][2][200]; + const int prunemsglen = snprintf (prunemsg[0][0], sizeof (prunemsg[0][0]), "*spdp: prune loc*%s:%d*", localhost.str, baseport + 4); + assert (prunemsglen < (int) sizeof (prunemsg[0][0])); + (void) prunemsglen; + snprintf (prunemsg[0][1], sizeof (prunemsg[0][1]), "*spdp: prune loc*%s:%d*", localhost.str, baseport + 6); + snprintf (prunemsg[1][0], sizeof (prunemsg[1][0]), "*spdp: prune loc*%s:%d*", localhost.str, baseport + 2); + snprintf (prunemsg[1][1], sizeof (prunemsg[1][1]), "*spdp: prune loc*%s:%d*", localhost.str, baseport + 6); + struct logger_arg larg = { .expected = { + [0] = { + { "*SPDP*NEW*", 1 }, + { prunemsg[0][0], 0 }, // mustn't prune discovered peer + { prunemsg[0][1], 1 }, // must prune non-existent one + }, + [1] = { + { "*SPDP*NEW*", 1 }, + { prunemsg[1][0], 0 }, // mustn't prune discovered peer + { prunemsg[1][1], 1 }, // must prune non-existent one + } + } }; + struct cfg cfg = { { + { DDSI_BOOLDEF_FALSE, "239.255.0.1", "0", true, NULL }, + { DDSI_BOOLDEF_FALSE, "239.255.0.1", "1", true, NULL } } + }; + run_one (baseport, &cfg, &larg, 1, (enum oper[]){ SLEEP_3 }); +} + +CU_Test(ddsc_spdp, II3_uc_prune_only_existing) +{ + const int baseport = 7110; + locstr_t localhost; + get_localhost_address (&localhost); + char prunemsg[2][2][200]; + const int prunemsglen = snprintf (prunemsg[0][0], sizeof (prunemsg[0][0]), "*spdp: prune loc*%s:%d*", localhost.str, baseport + 4); + assert (prunemsglen < (int) sizeof (prunemsg[0][0])); + (void) prunemsglen; + snprintf (prunemsg[0][1], sizeof (prunemsg[0][1]), "*spdp: prune loc*%s:%d*", localhost.str, baseport + 6); + snprintf (prunemsg[1][0], sizeof (prunemsg[1][0]), "*spdp: prune loc*%s:%d*", localhost.str, baseport + 2); + snprintf (prunemsg[1][1], sizeof (prunemsg[1][1]), "*spdp: prune loc*%s:%d*", localhost.str, baseport + 6); + struct logger_arg larg = { .expected = { + [0] = { + { "*SPDP*NEW*", 1 }, + { prunemsg[0][0], 0 }, // mustn't prune discovered peer + { prunemsg[0][1], 1 }, // must prune non-existent one + }, + [1] = { + { "*SPDP*NEW*", 1 }, + { prunemsg[1][0], 0 }, // mustn't prune discovered peer + { prunemsg[1][1], 1 }, // must prune non-existent one + } + } }; + struct cfg cfg = { { + { DDSI_BOOLDEF_FALSE, "239.255.0.1", "0", false, &localhost }, + { DDSI_BOOLDEF_FALSE, "239.255.0.1", "1", false, &localhost } } + }; + run_one (baseport, &cfg, &larg, 1, (enum oper[]){ SLEEP_3 }); +} + +CU_Test(ddsc_spdp, II4_uc_other_disc_address_of_one) +{ + const int baseport = 7120; + // first stops early, second discovered the address + // 2s until pruning + // but only a 1s wait -> can't have pruned based on time yet + // clean termination: should've been dropped (not pruned) + locstr_t localhost; + get_localhost_address (&localhost); + struct logger_arg larg = { .expected = { + [0] = { + { "*SPDP*NEW*", 1 } + }, + [1] = { + { "*SPDP*NEW*", 1 }, + { "*SPDP*ST3*", 1 }, // shutdown termination detected + { "*spdp: drop live loc*", 1 } + } + } }; + struct cfg cfg = { { + { DDSI_BOOLDEF_FALSE, "239.255.0.1", "0", false, &localhost }, + { DDSI_BOOLDEF_FALSE, "239.255.0.1", "1", false, NULL } } + }; + run_one (baseport, &cfg, &larg, 3, (enum oper[]){ SLEEP_3, SHUTDOWN_0, SLEEP_1 }); +} + +CU_Test(ddsc_spdp, II5_uc_one_init_address_of_other) +{ + const int baseport = 7130; + // second stops early, first simply uses initial locator + // 2s until pruning + // but only a 1s wait -> can't have pruned based on time yet + // clean termination: should've been dropped (not pruned) + locstr_t localhost; + get_localhost_address (&localhost); + char prunemsg[200], dropmsg[200]; + const int prunemsglen = snprintf (prunemsg, sizeof (prunemsg), "*spdp: prune loc*%s:%d*", localhost.str, baseport + 6); + assert (prunemsglen < (int) sizeof (prunemsg)); + (void) prunemsglen; + const int dropmsglen = snprintf (dropmsg, sizeof (dropmsg), "*spdp: drop live loc*%s:%d*", localhost.str, baseport + 4); + assert (dropmsglen < (int) sizeof (dropmsg)); + (void) dropmsglen; + struct logger_arg larg = { .expected = { + [0] = { + { "*SPDP*NEW*", 1 }, + { "*SPDP*ST3*", 1 }, // shutdown termination detected + { prunemsg, 1 }, // enough time to do pruning of unused locators + // locator was in initial set + // locator kept alive by existing participant + // clean termination -> may drop it + // initial prune delay passed, so dropped immediately + { dropmsg, 1 } + }, + [1] = { + { "*SPDP*NEW*", 1 }, + } + } }; + struct cfg cfg = { { + { DDSI_BOOLDEF_FALSE, "239.255.0.1", "0", false, &localhost }, + { DDSI_BOOLDEF_FALSE, "239.255.0.1", "1", false, NULL } } + }; + run_one (baseport, &cfg, &larg, 3, (enum oper[]){ SLEEP_3, SHUTDOWN_1, SLEEP_1 }); +} + + +CU_Test(ddsc_spdp, II6_pruning_lease_exp) +{ + const int baseport = 7140; + // 2s until lease expiry + // 2s until pruning + // 3s wait -> not pruned yet + locstr_t localhost; + get_localhost_address (&localhost); + char prunemsg[200]; + const int prunemsglen = snprintf (prunemsg, sizeof (prunemsg), "*spdp: prune loc*%s:%d*", localhost.str, baseport + 2); + assert (prunemsglen < (int) sizeof (prunemsg)); + (void) prunemsglen; + struct logger_arg larg = { .expected = { + [0] = { + { "*SPDP*NEW*", 1 } + }, + [1] = { + { "*SPDP*NEW*", 1 }, + { prunemsg, 0 } + } + } }; + struct cfg cfg = { { + { DDSI_BOOLDEF_FALSE, "239.255.0.1", "0", false, &localhost }, + { DDSI_BOOLDEF_FALSE, "239.255.0.1", "1", false, NULL } } + }; + run_one (baseport, &cfg, &larg, 3, (enum oper[]){ SLEEP_3, KILL_0, SLEEP_3 }); +} + +CU_Test(ddsc_spdp, II7_pruning_lease_exp) +{ + const int baseport = 7150; + // 2s until lease expiry + // 2s until pruning + // 5s wait -> should be pruned + locstr_t localhost; + get_localhost_address (&localhost); + char prunemsg[200]; + const int prunemsglen = snprintf (prunemsg, sizeof (prunemsg), "*spdp: prune loc*%s:%d*", localhost.str, baseport + 2); + assert (prunemsglen < (int) sizeof (prunemsg)); + (void) prunemsglen; + struct logger_arg larg = { .expected = { + [0] = { + { "*SPDP*NEW*", 1 } + }, + [1] = { + { "*SPDP*NEW*", 1 }, + { prunemsg, 1 } + } + } }; + struct cfg cfg = { { + { DDSI_BOOLDEF_FALSE, "239.255.0.1", "0", false, &localhost }, + { DDSI_BOOLDEF_FALSE, "239.255.0.1", "1", false, NULL } } + }; + run_one (baseport, &cfg, &larg, 3, (enum oper[]){ SLEEP_3, KILL_0, SLEEP_5 }); +} diff --git a/src/core/xtests/spdp_scenarios.bash b/src/core/xtests/spdp_scenarios.bash deleted file mode 100644 index 9f9a7cb935..0000000000 --- a/src/core/xtests/spdp_scenarios.bash +++ /dev/null @@ -1,295 +0,0 @@ -cu='truetrace' - -function start () { - allowmc="$1" ; shift - spdp="0.5s$12s" ; shift - parti="$12s2s" ; shift - peers="" - if [ $# -gt 0 ] ; then - peers="" - shift - done - peers="$peers" - fi - set -x - CYCLONEDDS_URI="$cu$allowmc$spdp$parti$peerscdds.log.${#pids[@]}" bin/Debug/ddsperf sanity & pids[${#pids[@]}]=$! - set +x -} - -x=true - -# sanity check a bunch of cases where discovery should/shouldn't happen -echo "====== I.1 ========" -rm -rf cdds.log.0 cdds.log.1 -declare -a pids -start true 239.255.0.2 default -start true 239.255.0.2 default -sleep 1 -kill ${pids[@]} -wait -unset pids -grep -q 'SPDP.*NEW' cdds.log.0 || x=false -grep -q 'SPDP.*NEW' cdds.log.1 || x=false -$x || exit 1 - -echo "====== I.2 ========" -rm -rf cdds.log.0 cdds.log.1 -declare -a pids -start false 239.255.0.2 default localhost -start false 239.255.0.2 default localhost -sleep 1 -kill ${pids[@]} -wait -unset pids -grep -q 'SPDP.*NEW' cdds.log.0 || x=false -grep -q 'SPDP.*NEW' cdds.log.1 || x=false -$x || exit 1 - -echo "====== I.3 ========" -rm -rf cdds.log.0 cdds.log.1 -declare -a pids -start true 0.0.0.0 default -start true 0.0.0.0 default -sleep 1 -kill ${pids[@]} -wait -unset pids -grep -q 'SPDP.*NEW' cdds.log.0 && x=false -grep -q 'SPDP.*NEW' cdds.log.1 && x=false -$x || exit 1 - -echo "====== I.4 ========" -# no discovery happens: first one says peers defined => well-known ports, pings localhost -# second says: MC supported/allowed, no peers defined => port none -# a bit weird, but not wrong -rm -rf cdds.log.0 cdds.log.1 -declare -a pids -start true 0.0.0.0 default localhost -start true 0.0.0.0 default -sleep 1 -kill ${pids[@]} -wait -unset pids -grep -q 'SPDP.*NEW' cdds.log.0 && x=false -grep -q 'SPDP.*NEW' cdds.log.1 && x=false -$x || exit 1 - -echo "====== I.5 ========" -# no discovery happens: first one says peers defined => well-known ports, pings localhost -# second says: peers defined => well-known ports -# a bit weird, but not wrong -rm -rf cdds.log.0 cdds.log.1 -declare -a pids -start true 0.0.0.0 default localhost -start true 0.0.0.0 default "" -sleep 1 -kill ${pids[@]} -wait -unset pids -grep -q 'SPDP.*NEW' cdds.log.0 || x=false -grep -q 'SPDP.*NEW' cdds.log.1 || x=false -$x || exit 1 - -echo "====== I.6 ========" -# no discovery happens: first one says MC works => no localhost, random port -# second says: peers defined => no MC, well-known ports -# but that doesn't allow them to discover each other -rm -rf cdds.log.0 cdds.log.1 -declare -a pids -start true 239.255.0.2 default -start false 0.0.0.0 default localhost -sleep 1 -kill ${pids[@]} -wait -unset pids -grep -q 'SPDP.*NEW' cdds.log.0 && x=false -grep -q 'SPDP.*NEW' cdds.log.1 && x=false -$x || exit 1 - -echo "====== I.7 ========" -rm -rf cdds.log.0 cdds.log.1 -declare -a pids -start true 239.255.0.2 auto -start false 0.0.0.0 default -sleep 1 -kill ${pids[@]} -wait -unset pids -grep -q 'SPDP.*NEW' cdds.log.0 || x=false -grep -q 'SPDP.*NEW' cdds.log.1 || x=false -$x || exit 1 - -echo "====== I.8 ========" -# fails: second uses random port -rm -rf cdds.log.0 cdds.log.1 -declare -a pids -start false 239.255.0.2 default localhost -start true 239.255.0.1 default -sleep 1 -kill ${pids[@]} -wait -unset pids -grep -q 'SPDP.*NEW' cdds.log.0 && x=false -grep -q 'SPDP.*NEW' cdds.log.1 && x=false -$x || exit 1 - -echo "====== I.9 ========" -# now it works: participant index set to auto -rm -rf cdds.log.0 cdds.log.1 -declare -a pids -start false 239.255.0.2 default localhost -start true 239.255.0.1 auto -sleep 1 -kill ${pids[@]} -wait -unset pids -grep -q 'SPDP.*NEW' cdds.log.0 || x=false -grep -q 'SPDP.*NEW' cdds.log.1 || x=false -$x || exit 1 - -# pruning of useless initial locators -# prune delays are 2s, spdp interval = 0.5s - -echo "====== II.1 ========" -rm -rf cdds.log.0 cdds.log.1 -declare -a pids -start true 239.255.0.1 default 168.10.10.10 -start true 239.255.0.1 default -sleep 3 -kill ${pids[@]} -wait -unset pids -grep -q 'SPDP.*NEW' cdds.log.0 || x=false -grep -q 'SPDP.*NEW' cdds.log.1 || x=false -grep -q 'spdp: prune loc.*168\.10\.10\.10' cdds.log.0 || x=false -$x || exit 1 - -echo "====== II.2 ========" -rm -rf cdds.log.0 cdds.log.1 -declare -a pids -start false 239.255.0.1 0 "" -start false 239.255.0.1 1 "" -sleep 3 -kill ${pids[@]} -wait -unset pids -grep -q 'SPDP.*NEW' cdds.log.0 || x=false -grep -q 'SPDP.*NEW' cdds.log.1 || x=false -# mustn't prune the address of the existing one -grep -q 'spdp: prune loc.*127\.0\.0\.1:7412' cdds.log.0 && x=false -grep -q 'spdp: prune loc.*127\.0\.0\.1:7410' cdds.log.1 && x=false -# must prune some others -grep -q 'spdp: prune loc.*127\.0\.0\.1:7414' cdds.log.0 || x=false -grep -q 'spdp: prune loc.*127\.0\.0\.1:7414' cdds.log.1 || x=false -$x || exit 1 - -echo "====== II.3 ========" -rm -rf cdds.log.0 cdds.log.1 -declare -a pids -start false 239.255.0.1 0 localhost -start false 239.255.0.1 1 localhost -sleep 3 -kill ${pids[@]} -wait -unset pids -grep -q 'SPDP.*NEW' cdds.log.0 || x=false -grep -q 'SPDP.*NEW' cdds.log.1 || x=false -# mustn't prune the address of the existing one -grep -q 'spdp: prune loc.*127\.0\.0\.1:7412' cdds.log.0 && x=false -grep -q 'spdp: prune loc.*127\.0\.0\.1:7410' cdds.log.1 && x=false -# must prune some others -grep -q 'spdp: prune loc.*127\.0\.0\.1:7414' cdds.log.0 || x=false -grep -q 'spdp: prune loc.*127\.0\.0\.1:7414' cdds.log.1 || x=false -$x || exit 1 - -echo "====== II.4 ========" -# first stops early, second discovered the address -rm -rf cdds.log.0 cdds.log.1 -declare -a pids -start false 239.255.0.1 0 localhost -start false 239.255.0.1 1 -sleep 3 -kill ${pids[0]} -sleep 1 -kill ${pids[1]} -wait -unset pids -grep -q 'SPDP.*NEW' cdds.log.0 || x=false -grep -q 'SPDP.*NEW' cdds.log.1 || x=false -# clean termination -- verify: -grep -q 'SPDP.*ST3' cdds.log.1 || x=false -# 2s until pruning -# but only a 1s wait -> can't have pruned based on time yet -# clean termination: should've been dropped -grep -q 'spdp: drop live loc' cdds.log.1 || x=false -$x || exit 1 - -echo "====== II.5 ========" -# second stops early, first simply uses initial locator -rm -rf cdds.log.0 cdds.log.1 -declare -a pids -start false 239.255.0.1 0 localhost -start false 239.255.0.1 1 -sleep 3 -kill ${pids[1]} -sleep 1 -kill ${pids[0]} -wait -unset pids -grep -q 'SPDP.*NEW' cdds.log.0 || x=false -grep -q 'SPDP.*NEW' cdds.log.1 || x=false -# clean termination -- verify: -grep -q 'SPDP.*ST3' cdds.log.0 || x=false -# enough time to clean up unused locators -grep -q 'spdp: prune loc.*127\.0\.0\.1:7414' cdds.log.0 || x=false -# locator was in initial set -# locator kept alive by existing participant -# clean termination -> may drop it -# initial prune delay passed, so dropped immediately -grep -q 'spdp: drop live loc.*127\.0\.0\.1:7412' cdds.log.0 || x=false -$x || exit 1 - -echo "====== II.6 ========" -rm -rf cdds.log.0 cdds.log.1 -declare -a pids -start false 239.255.0.1 0 localhost -start false 239.255.0.1 1 -sleep 3 -kill -9 ${pids[0]} -sleep 3 -kill ${pids[1]} -wait -unset pids -grep -q 'SPDP.*NEW' cdds.log.0 || x=false -grep -q 'SPDP.*NEW' cdds.log.1 || x=false -# 2s until lease expiry -# 2s until pruning -# but only a 3s wait -> not pruned yet -grep -q 'spdp: prune loc' cdds.log.1 && x=false -$x || exit 1 - -echo "====== II.7 ========" -rm -rf cdds.log.0 cdds.log.1 -declare -a pids -start false 239.255.0.1 0 localhost -start false 239.255.0.1 1 -sleep 3 -kill -9 ${pids[0]} -sleep 5 -kill ${pids[1]} -wait -unset pids -grep -q 'SPDP.*NEW' cdds.log.0 || x=false -grep -q 'SPDP.*NEW' cdds.log.1 || x=false -# 2s until lease expiry -# 2s until pruning -# 5s wait -> should be pruned -grep -q 'spdp: prune loc' cdds.log.1 || x=false -$x || exit 1