From f852531d5f6c0e8aba22bff2710dba6c2cbe61ea Mon Sep 17 00:00:00 2001 From: Dean Sheather Date: Thu, 20 Jun 2024 12:21:04 +0000 Subject: [PATCH 1/4] fix: block incoming endpoints from call-me-maybe Previously, (*magicsock.Conn).SetBlockEndpoints(true) would only prevent local endpoints from being sent to other peers. This setting is primarily used to prevent any direct connection from forming, regardless of which side initiated it, but any connections initiated locally to endpoints received from the other peer would still work. If endpoints are blocked, we will now drop any endpoints we already know of (via call-me-maybe) as well as block any future endpoints received via call-me-maybe. Endpoints received via coordination are not impacted (and should be blocked using a different mechanism). --- wgengine/magicsock/magicsock.go | 23 +++++++ wgengine/magicsock/magicsock_test.go | 93 ++++++++++++++++++++++------ 2 files changed, 96 insertions(+), 20 deletions(-) diff --git a/wgengine/magicsock/magicsock.go b/wgengine/magicsock/magicsock.go index c4bb2e5858ff2..5bf8478a32c48 100644 --- a/wgengine/magicsock/magicsock.go +++ b/wgengine/magicsock/magicsock.go @@ -217,6 +217,9 @@ type Conn struct { // blockEndpoints is whether to avoid capturing, storing and sending // endpoints gathered from local interfaces or STUN. Only DERP endpoints // will be sent. + // This will also block incoming endpoints received via call-me-maybe disco + // packets. Endpoints manually added (e.g. via coordination) will not be + // blocked. blockEndpoints bool // endpointsUpdateActive indicates that updateEndpoints is // currently running. It's used to deduplicate concurrent endpoint @@ -856,6 +859,7 @@ func (c *Conn) SetBlockEndpoints(block bool) { return } + // Re-gather local endpoints. const why = "SetBlockEndpoints" if c.endpointsUpdateActive { if c.wantEndpointsUpdate != why { @@ -866,6 +870,17 @@ func (c *Conn) SetBlockEndpoints(block bool) { c.endpointsUpdateActive = true go c.updateEndpoints(why) } + + // Clear any endpoints from the peerMap if block is true. + if block { + c.peerMap.forEachEndpoint(func(ep *endpoint) { + // Simulate receiving a call-me-maybe disco packet with no + // endpoints, which will cause all endpoints to be removed. + ep.handleCallMeMaybe(&disco.CallMeMaybe{ + MyNumber: []netip.AddrPort{}, + }) + }) + } } // determineEndpoints returns the machine's endpoint addresses. It @@ -1523,6 +1538,14 @@ func (c *Conn) handleDiscoMessage(msg []byte, src netip.AddrPort, derpNodeSrc ke c.logf("[unexpected] CallMeMaybe from peer via DERP whose netmap discokey != disco source") return } + if c.blockEndpoints { + c.dlogf("[v1] magicsock: disco: %v<-%v (%v, %v) got call-me-maybe with %d endpoints, but endpoints blocked", + c.discoShort, epDisco.short, + ep.publicKey.ShortString(), derpStr(src.String()), + len(dm.MyNumber), + ) + dm.MyNumber = []netip.AddrPort{} + } c.dlogf("[v1] magicsock: disco: %v<-%v (%v, %v) got call-me-maybe, %d endpoints", c.discoShort, epDisco.short, ep.publicKey.ShortString(), derpStr(src.String()), diff --git a/wgengine/magicsock/magicsock_test.go b/wgengine/magicsock/magicsock_test.go index d9eb2cd12d206..b224ecc85bd81 100644 --- a/wgengine/magicsock/magicsock_test.go +++ b/wgengine/magicsock/magicsock_test.go @@ -3022,7 +3022,6 @@ func TestBlockEndpoints(t *testing.T) { t.Fatal("DERP IP in endpoints list?", ep.Addr) } haveEndpoint = true - break } ms.conn.mu.Unlock() if !haveEndpoint { @@ -3060,30 +3059,25 @@ func TestBlockEndpointsDERPOK(t *testing.T) { logf, closeLogf := logger.LogfCloser(t.Logf) defer closeLogf() - derpMap, cleanup := runDERPAndStun(t, t.Logf, localhostListener{}, netaddr.IPv4(127, 0, 0, 1)) - defer cleanup() + derpMap, cleanupDerp := runDERPAndStun(t, t.Logf, localhostListener{}, netaddr.IPv4(127, 0, 0, 1)) + defer cleanupDerp() ms1 := newMagicStack(t, logger.WithPrefix(logf, "conn1: "), d.m1, derpMap) defer ms1.Close() ms2 := newMagicStack(t, logger.WithPrefix(logf, "conn2: "), d.m2, derpMap) defer ms2.Close() - cleanup = meshStacks(logf, nil, ms1, ms2) - defer cleanup() + cleanupMesh := meshStacks(logf, nil, ms1, ms2) + defer cleanupMesh() m1IP := ms1.IP() m2IP := ms2.IP() logf("IPs: %s %s", m1IP, m2IP) - // SetBlockEndpoints is called later since it's incompatible with the test - // meshStacks implementations. - ms1.conn.SetBlockEndpoints(true) - ms2.conn.SetBlockEndpoints(true) - waitForNoEndpoints(t, ms1.conn) - waitForNoEndpoints(t, ms2.conn) - - cleanup = newPinger(t, logf, ms1, ms2) - defer cleanup() + cleanupPinger1 := newPinger(t, logf, ms1, ms2) + defer cleanupPinger1() + cleanupPinger2 := newPinger(t, logf, ms2, ms1) + defer cleanupPinger2() // Wait for both peers to know about each other. for { @@ -3098,14 +3092,56 @@ func TestBlockEndpointsDERPOK(t *testing.T) { break } - cleanup = newPinger(t, t.Logf, ms1, ms2) - defer cleanup() + waitForEndpoints(t, ms1.conn) + waitForEndpoints(t, ms2.conn) - if len(ms1.conn.activeDerp) == 0 { - t.Errorf("unexpected DERP empty got: %v want: >0", len(ms1.conn.activeDerp)) + // meshStacks doesn't use call-me-maybe packets to update endpoints, which + // makes them not get cleared when we call SetBlockEndpoints. + ep2, ok := ms1.conn.peerMap.endpointForNodeKey(ms2.Public()) + if !ok { + t.Fatalf("endpoint not found for ms2") + } + ep2.mu.Lock() + for k := range ep2.endpointState { + ep2.deleteEndpointLocked("TestBlockEndpointsDERPOK", k) + } + ep2.mu.Unlock() + + // Wait for the call-me-maybe packet to arrive. + for i := 0; i < 50; i++ { + time.Sleep(100 * time.Millisecond) + ep2.mu.Lock() + if len(ep2.endpointState) != 0 { + ep2.mu.Unlock() + break + } + ep2.mu.Unlock() } - if len(ms2.conn.activeDerp) == 0 { - t.Errorf("unexpected DERP empty got: %v want: >0", len(ms2.conn.activeDerp)) + + // SetBlockEndpoints is called later since it's incompatible with the test + // meshStacks implementations. + // We only set it on ms1, since ms2's endpoints should be ignored by ms1. + ms1.conn.SetBlockEndpoints(true) + + // All call-me-maybe endpoints should've been immediately removed from ms1. + ep2.mu.Lock() + if len(ep2.endpointState) != 0 { + ep2.mu.Unlock() + t.Fatalf("endpoints not removed from ms1") + } + ep2.mu.Unlock() + + // Wait for endpoints to finish updating. + waitForNoEndpoints(t, ms1.conn) + + // Give time for another call-me-maybe packet to arrive. I couldn't think of + // a better way than sleeping without making a bunch of changes. + time.Sleep(time.Second) + + ep2.mu.Lock() + defer ep2.mu.Unlock() + for i := range ep2.endpointState { + t.Fatalf("endpoint %q not missing", i.String()) } } @@ -3129,3 +3165,20 @@ func waitForNoEndpoints(t *testing.T, ms *Conn) { } t.Log("endpoints are blocked") } + +func waitForEndpoints(t *testing.T, ms *Conn) { + t.Helper() + for i := 0; i < 50; i++ { + time.Sleep(100 * time.Millisecond) + ms.mu.Lock() + for _, ep := range ms.lastEndpoints { + if ep.Addr.Addr() != tailcfg.DerpMagicIPAddr { + t.Log("endpoint found") + ms.mu.Unlock() + return + } + } + ms.mu.Unlock() + } + t.Fatal("endpoint was not found after 50 attempts") +} From b22cdd10362446b6eb66a98f2f6ffee5587770b0 Mon Sep 17 00:00:00 2001 From: Dean Sheather Date: Fri, 21 Jun 2024 12:20:45 +0000 Subject: [PATCH 2/4] Add debug logging to block endpoints test --- wgengine/magicsock/magicsock_test.go | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/wgengine/magicsock/magicsock_test.go b/wgengine/magicsock/magicsock_test.go index b224ecc85bd81..54705b9276a83 100644 --- a/wgengine/magicsock/magicsock_test.go +++ b/wgengine/magicsock/magicsock_test.go @@ -3064,8 +3064,10 @@ func TestBlockEndpointsDERPOK(t *testing.T) { ms1 := newMagicStack(t, logger.WithPrefix(logf, "conn1: "), d.m1, derpMap) defer ms1.Close() + ms1.conn.SetDebugLoggingEnabled(true) ms2 := newMagicStack(t, logger.WithPrefix(logf, "conn2: "), d.m2, derpMap) defer ms2.Close() + ms2.conn.SetDebugLoggingEnabled(true) cleanupMesh := meshStacks(logf, nil, ms1, ms2) defer cleanupMesh() @@ -3136,7 +3138,9 @@ func TestBlockEndpointsDERPOK(t *testing.T) { // Give time for another call-me-maybe packet to arrive. I couldn't think of // a better way than sleeping without making a bunch of changes. + t.Logf("sleeping for call-me-maybe packet to be received and ignored") time.Sleep(time.Second) + t.Logf("done sleeping") ep2.mu.Lock() defer ep2.mu.Unlock() From 5beb71a0d3276dcac093c9fd43c07cad04a62971 Mon Sep 17 00:00:00 2001 From: Dean Sheather Date: Fri, 21 Jun 2024 12:41:37 +0000 Subject: [PATCH 3/4] Wipe all endpoints on BlockEndpoints --- wgengine/magicsock/endpoint.go | 13 +++++++++++++ wgengine/magicsock/magicsock.go | 24 ++++++++++++----------- wgengine/magicsock/magicsock_test.go | 29 +++++----------------------- 3 files changed, 31 insertions(+), 35 deletions(-) diff --git a/wgengine/magicsock/endpoint.go b/wgengine/magicsock/endpoint.go index 28983ce9e95c0..23cc0c3f368ef 100644 --- a/wgengine/magicsock/endpoint.go +++ b/wgengine/magicsock/endpoint.go @@ -207,6 +207,19 @@ func (de *endpoint) deleteEndpointLocked(why string, ep netip.AddrPort) { } } +func (de *endpoint) clearEndpoints(why string) { + de.mu.Lock() + defer de.mu.Unlock() + de.debugUpdates.Add(EndpointChange{ + When: time.Now(), + What: "clearEndpoints-" + why, + }) + de.endpointState = map[netip.AddrPort]*endpointState{} + de.bestAddr = addrLatency{} + de.c.logf("magicsock: disco: node %s %s now using DERP only (all endpoints deleted)", + de.publicKey.ShortString(), de.discoShort()) +} + // initFakeUDPAddr populates fakeWGAddr with a globally unique fake UDPAddr. // The current implementation just uses the pointer value of de jammed into an IPv6 // address, but it could also be, say, a counter. diff --git a/wgengine/magicsock/magicsock.go b/wgengine/magicsock/magicsock.go index 5bf8478a32c48..9ad2a4ccb6c61 100644 --- a/wgengine/magicsock/magicsock.go +++ b/wgengine/magicsock/magicsock.go @@ -218,8 +218,7 @@ type Conn struct { // endpoints gathered from local interfaces or STUN. Only DERP endpoints // will be sent. // This will also block incoming endpoints received via call-me-maybe disco - // packets. Endpoints manually added (e.g. via coordination) will not be - // blocked. + // packets. blockEndpoints bool // endpointsUpdateActive indicates that updateEndpoints is // currently running. It's used to deduplicate concurrent endpoint @@ -846,10 +845,10 @@ func (c *Conn) DiscoPublicKey() key.DiscoPublic { return c.discoPublic } -// SetBlockEndpoints sets the blockEndpoints field. If changed, endpoints will -// be updated to apply the new settings. Existing connections may continue to -// use the old setting until they are reestablished. Disabling endpoints does -// not affect the UDP socket or portmapper. +// SetBlockEndpoints sets the blockEndpoints field. If enabled, all peer +// endpoints will be cleared from the peer map and every connection will +// immediately switch to DERP. Disabling endpoints does not affect the UDP +// socket or portmapper. func (c *Conn) SetBlockEndpoints(block bool) { c.mu.Lock() defer c.mu.Unlock() @@ -874,11 +873,7 @@ func (c *Conn) SetBlockEndpoints(block bool) { // Clear any endpoints from the peerMap if block is true. if block { c.peerMap.forEachEndpoint(func(ep *endpoint) { - // Simulate receiving a call-me-maybe disco packet with no - // endpoints, which will cause all endpoints to be removed. - ep.handleCallMeMaybe(&disco.CallMeMaybe{ - MyNumber: []netip.AddrPort{}, - }) + ep.clearEndpoints("SetBlockEndpoints") }) } } @@ -1944,6 +1939,13 @@ func (c *Conn) SetNetworkMap(nm *netmap.NetworkMap) { // we'll fall through to the next pass, which allocates but can // handle full set updates. for _, n := range nm.Peers { + if c.blockEndpoints { + c.dlogf("[v1] magicsock: SetNetworkingMap: %v (%v, %v) received %d endpoints, but endpoints blocked", + n.DiscoKey.ShortString(), n.Key.ShortString(), + len(n.Endpoints), + ) + n.Endpoints = []string{} + } if ep, ok := c.peerMap.endpointForNodeKey(n.Key); ok { if n.DiscoKey.IsZero() && !n.IsWireGuardOnly { // Discokey transitioned from non-zero to zero? This should not diff --git a/wgengine/magicsock/magicsock_test.go b/wgengine/magicsock/magicsock_test.go index 54705b9276a83..052f35bee501d 100644 --- a/wgengine/magicsock/magicsock_test.go +++ b/wgengine/magicsock/magicsock_test.go @@ -3097,35 +3097,16 @@ func TestBlockEndpointsDERPOK(t *testing.T) { waitForEndpoints(t, ms1.conn) waitForEndpoints(t, ms2.conn) - // meshStacks doesn't use call-me-maybe packets to update endpoints, which - // makes them not get cleared when we call SetBlockEndpoints. - ep2, ok := ms1.conn.peerMap.endpointForNodeKey(ms2.Public()) - if !ok { - t.Fatalf("endpoint not found for ms2") - } - ep2.mu.Lock() - for k := range ep2.endpointState { - ep2.deleteEndpointLocked("TestBlockEndpointsDERPOK", k) - } - ep2.mu.Unlock() - - // Wait for the call-me-maybe packet to arrive. - for i := 0; i < 50; i++ { - time.Sleep(100 * time.Millisecond) - ep2.mu.Lock() - if len(ep2.endpointState) != 0 { - ep2.mu.Unlock() - break - } - ep2.mu.Unlock() - } - // SetBlockEndpoints is called later since it's incompatible with the test // meshStacks implementations. // We only set it on ms1, since ms2's endpoints should be ignored by ms1. ms1.conn.SetBlockEndpoints(true) - // All call-me-maybe endpoints should've been immediately removed from ms1. + // All endpoints should've been immediately removed from ms1. + ep2, ok := ms1.conn.peerMap.endpointForNodeKey(ms2.Public()) + if !ok { + t.Fatalf("endpoint not found for ms2") + } ep2.mu.Lock() if len(ep2.endpointState) != 0 { ep2.mu.Unlock() From 6f250434329a899d34c651c2756112d5c0412a88 Mon Sep 17 00:00:00 2001 From: Dean Sheather Date: Fri, 21 Jun 2024 12:44:59 +0000 Subject: [PATCH 4/4] fixup! Wipe all endpoints on BlockEndpoints --- wgengine/magicsock/endpoint.go | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/wgengine/magicsock/endpoint.go b/wgengine/magicsock/endpoint.go index 23cc0c3f368ef..073c9637571f4 100644 --- a/wgengine/magicsock/endpoint.go +++ b/wgengine/magicsock/endpoint.go @@ -215,7 +215,14 @@ func (de *endpoint) clearEndpoints(why string) { What: "clearEndpoints-" + why, }) de.endpointState = map[netip.AddrPort]*endpointState{} - de.bestAddr = addrLatency{} + if de.bestAddr.AddrPort.IsValid() { + de.debugUpdates.Add(EndpointChange{ + When: time.Now(), + What: "clearEndpoints-bestAddr-" + why, + From: de.bestAddr, + }) + de.bestAddr = addrLatency{} + } de.c.logf("magicsock: disco: node %s %s now using DERP only (all endpoints deleted)", de.publicKey.ShortString(), de.discoShort()) }