diff --git a/wgengine/magicsock/debugknobs.go b/wgengine/magicsock/debugknobs.go index 54c554898..3e317fdd6 100644 --- a/wgengine/magicsock/debugknobs.go +++ b/wgengine/magicsock/debugknobs.go @@ -34,15 +34,16 @@ var ( debugReSTUNStopOnIdle = envknob.RegisterBool("TS_DEBUG_RESTUN_STOP_ON_IDLE") // debugAlwaysDERP disables the use of UDP, forcing all peer communication over DERP. debugAlwaysDERP = envknob.RegisterBool("TS_DEBUG_ALWAYS_USE_DERP") - // debugEnableSilentDisco disables the use of heartbeatTimer on the endpoint struct - // and attempts to handle disco silently. See issue #540 for details. - debugEnableSilentDisco = envknob.RegisterBool("TS_DEBUG_ENABLE_SILENT_DISCO") ) +// Unlike the other debug tweakables above, the following knobs needs to be +// checked every time at runtime, because tests set this after program +// startup. + // inTest reports whether the running program is a test that set the // IN_TS_TEST environment variable. -// -// Unlike the other debug tweakables above, this one needs to be -// checked every time at runtime, because tests set this after program -// startup. func inTest() bool { return envknob.Bool("IN_TS_TEST") } + +// silentDiscoEnabled reports whether the running program enabled +// silent disco to get rid of heartbeat pings. See issue #540 for details. +func silentDiscoEnabled() bool { return envknob.Bool("TS_DEBUG_ENABLE_SILENT_DISCO") } diff --git a/wgengine/magicsock/debugknobs_stubs.go b/wgengine/magicsock/debugknobs_stubs.go index 9c2214698..4fcbb0ed8 100644 --- a/wgengine/magicsock/debugknobs_stubs.go +++ b/wgengine/magicsock/debugknobs_stubs.go @@ -17,8 +17,8 @@ func debugOmitLocalAddresses() bool { return false } func logDerpVerbose() bool { return false } func debugReSTUNStopOnIdle() bool { return false } func debugAlwaysDERP() bool { return false } -func debugEnableSilentDisco() bool { return false } func debugUseDerpRouteEnv() string { return "" } func debugUseDerpRoute() opt.Bool { return "" } -func inTest() bool { return false } +func inTest() bool { return false } +func silentDiscoEnabled() bool { return false } diff --git a/wgengine/magicsock/magicsock.go b/wgengine/magicsock/magicsock.go index 7b529a0f8..0577c691a 100644 --- a/wgengine/magicsock/magicsock.go +++ b/wgengine/magicsock/magicsock.go @@ -2405,7 +2405,7 @@ func (c *Conn) SetNetworkMap(nm *netmap.NetworkMap) { } c.netMap = nm - heartbeatDisabled := debugEnableSilentDisco() || (c.netMap != nil && c.netMap.Debug != nil && c.netMap.Debug.EnableSilentDisco) + heartbeatDisabled := silentDiscoEnabled() || (c.netMap != nil && c.netMap.Debug != nil && c.netMap.Debug.EnableSilentDisco) // Try a pass of just upserting nodes and creating missing // endpoints. If the set of nodes is the same, this is an @@ -3417,6 +3417,7 @@ type endpoint struct { // See #540 for background. heartbeatDisabled bool pathFinderRunning bool + lastSendAtomic syncs.AtomicValue[mono.Time] // last time endpoint.send was called } type pendingCLIPing struct { @@ -3708,6 +3709,9 @@ func (de *endpoint) cliPing(res *ipnstate.PingResult, cb func(*ipnstate.PingResu } func (de *endpoint) send(b []byte) error { + now := mono.Now() + de.lastSendAtomic.Store(now) + if fn := de.sendFunc.Load(); fn != nil { return fn(b) } @@ -3721,7 +3725,6 @@ func (de *endpoint) send(b []byte) error { } } - now := mono.Now() udpAddr, derpAddr := de.addrForSendLocked(now) if de.canP2PLocked() && (!udpAddr.IsValid() || now.After(de.trustBestAddrUntil)) { de.sendPingsLocked(now, true) diff --git a/wgengine/magicsock/pathfinder.go b/wgengine/magicsock/pathfinder.go index b6b408853..a3f0b4ad5 100644 --- a/wgengine/magicsock/pathfinder.go +++ b/wgengine/magicsock/pathfinder.go @@ -4,10 +4,97 @@ package magicsock -// startPathFinder initializes the atomicSendFunc, and +import ( + "fmt" + "net/netip" + "time" + + "tailscale.com/tstime/mono" + "tailscale.com/types/key" +) + +// startPathFinder initializes the sendFunc, and // will eventually kick off a goroutine that monitors whether // that sendFunc is still the best option for the endpoint // to use and adjusts accordingly. func (de *endpoint) startPathFinder() { - de.pathFinderRunning = true + go func() { + for mono.Since(de.lastSendAtomic.Load()) < sessionActiveTimeout { + de.mu.Lock() + de.pathFinderRunning = true + de.mu.Unlock() + + // while the session has not timed out yet, + // check whether path needs to be upgraded on an interval + de.updateSendPathIfNecessary(mono.Now()) + + // TODO(2022-10-20): should not be using heartbeat at all, currently just + // trying to replicate existing behaviour + time.Sleep(heartbeatInterval) + } + }() +} + +// updateSendPathIfNecessary optionally upates sendFunc +// based on analysis of current conditions +func (de *endpoint) updateSendPathIfNecessary(now mono.Time) { + de.mu.Lock() + defer de.mu.Unlock() + + // default happy state is: use UDP, don't use Derp + useUDP := true + useDerp := false + + // if it's been longer than 6.5 seconds, also send useDerp + if now.After(de.trustBestAddrUntil) { + useDerp = true + } + + derpAddr := de.derpAddr + udpAddr := de.bestAddr.AddrPort + + // do final checks to make sure the addresses we want to send to are valid + if useUDP && !udpAddr.IsValid() { + de.c.logf(fmt.Sprintf("magicsock: silent-disco: invalid UDP addr found: %s", udpAddr)) + return + } + if useDerp && !derpAddr.IsValid() { + de.c.logf(fmt.Sprintf("magicsock: silent-disco: invalid DERP addr found: %s", derpAddr)) + return + } + + if useUDP && useDerp { + de.sendFunc.Store(de.sendDerpAndUDP(udpAddr, derpAddr, de.publicKey)) + } else if useUDP { + de.sendFunc.Store(de.sendSinglePath(udpAddr, de.publicKey)) + } else if useDerp { + de.sendFunc.Store(de.sendSinglePath(derpAddr, de.publicKey)) + } + + if de.wantFullPingLocked(now) { + de.sendPingsLocked(now, true) // spray endpoints, and enqueue CMM + } + + // currently does not re-implement the heartbeat calling startPingLocked + // keep-alive every 3 seconds. this is where the bulk of the new upgrade + // logic should be, I think? +} + +func (de *endpoint) sendSinglePath(addr netip.AddrPort, pubKey key.NodePublic) endpointSendFunc { + return func(b []byte) error { + _, err := de.c.sendAddr(addr, pubKey, b) + return err + } +} + +func (de *endpoint) sendDerpAndUDP(udpAddr netip.AddrPort, derpAddr netip.AddrPort, pubKey key.NodePublic) endpointSendFunc { + return func(b []byte) error { + _, udpErr := de.c.sendAddr(udpAddr, de.publicKey, b) + _, derpErr := de.c.sendAddr(derpAddr, de.publicKey, b) + if derpErr == nil || udpErr == nil { + // at least one packet send succeeded, good enough + return nil + } + return udpErr // error from UDP send supersedes error from Derp send + } }