wgengine/magicsock: add pathfinder file for alternate disco path

Adds Pathfinder file for alternate discovery path with heartbeat turned
off. Currently replicates largely existing behaviour and will not have
any functional impact with the envknob turned off. Also changes
heartbeat envknob from be settable after program startup.

Updates #540

Co-authored-by: Brad Fitzpatrick <bradfitz@tailscale.com>
Signed-off-by: Jenny Zhang <jz@tailscale.com>
pull/6069/head
Jenny Zhang 2022-10-20 14:24:57 -04:00
parent a011320370
commit c95a3fdce7
4 changed files with 104 additions and 13 deletions

View File

@ -34,15 +34,16 @@ var (
debugReSTUNStopOnIdle = envknob.RegisterBool("TS_DEBUG_RESTUN_STOP_ON_IDLE")
// debugAlwaysDERP disables the use of UDP, forcing all peer communication over DERP.
debugAlwaysDERP = envknob.RegisterBool("TS_DEBUG_ALWAYS_USE_DERP")
// debugEnableSilentDisco disables the use of heartbeatTimer on the endpoint struct
// and attempts to handle disco silently. See issue #540 for details.
debugEnableSilentDisco = envknob.RegisterBool("TS_DEBUG_ENABLE_SILENT_DISCO")
)
// Unlike the other debug tweakables above, the following knobs needs to be
// checked every time at runtime, because tests set this after program
// startup.
// inTest reports whether the running program is a test that set the
// IN_TS_TEST environment variable.
//
// Unlike the other debug tweakables above, this one needs to be
// checked every time at runtime, because tests set this after program
// startup.
func inTest() bool { return envknob.Bool("IN_TS_TEST") }
// silentDiscoEnabled reports whether the running program enabled
// silent disco to get rid of heartbeat pings. See issue #540 for details.
func silentDiscoEnabled() bool { return envknob.Bool("TS_DEBUG_ENABLE_SILENT_DISCO") }

View File

@ -17,8 +17,8 @@ func debugOmitLocalAddresses() bool { return false }
func logDerpVerbose() bool { return false }
func debugReSTUNStopOnIdle() bool { return false }
func debugAlwaysDERP() bool { return false }
func debugEnableSilentDisco() bool { return false }
func debugUseDerpRouteEnv() string { return "" }
func debugUseDerpRoute() opt.Bool { return "" }
func inTest() bool { return false }
func inTest() bool { return false }
func silentDiscoEnabled() bool { return false }

View File

@ -2405,7 +2405,7 @@ func (c *Conn) SetNetworkMap(nm *netmap.NetworkMap) {
}
c.netMap = nm
heartbeatDisabled := debugEnableSilentDisco() || (c.netMap != nil && c.netMap.Debug != nil && c.netMap.Debug.EnableSilentDisco)
heartbeatDisabled := silentDiscoEnabled() || (c.netMap != nil && c.netMap.Debug != nil && c.netMap.Debug.EnableSilentDisco)
// Try a pass of just upserting nodes and creating missing
// endpoints. If the set of nodes is the same, this is an
@ -3417,6 +3417,7 @@ type endpoint struct {
// See #540 for background.
heartbeatDisabled bool
pathFinderRunning bool
lastSendAtomic syncs.AtomicValue[mono.Time] // last time endpoint.send was called
}
type pendingCLIPing struct {
@ -3708,6 +3709,9 @@ func (de *endpoint) cliPing(res *ipnstate.PingResult, cb func(*ipnstate.PingResu
}
func (de *endpoint) send(b []byte) error {
now := mono.Now()
de.lastSendAtomic.Store(now)
if fn := de.sendFunc.Load(); fn != nil {
return fn(b)
}
@ -3721,7 +3725,6 @@ func (de *endpoint) send(b []byte) error {
}
}
now := mono.Now()
udpAddr, derpAddr := de.addrForSendLocked(now)
if de.canP2PLocked() && (!udpAddr.IsValid() || now.After(de.trustBestAddrUntil)) {
de.sendPingsLocked(now, true)

View File

@ -4,10 +4,97 @@
package magicsock
// startPathFinder initializes the atomicSendFunc, and
import (
"fmt"
"net/netip"
"time"
"tailscale.com/tstime/mono"
"tailscale.com/types/key"
)
// startPathFinder initializes the sendFunc, and
// will eventually kick off a goroutine that monitors whether
// that sendFunc is still the best option for the endpoint
// to use and adjusts accordingly.
func (de *endpoint) startPathFinder() {
de.pathFinderRunning = true
go func() {
for mono.Since(de.lastSendAtomic.Load()) < sessionActiveTimeout {
de.mu.Lock()
de.pathFinderRunning = true
de.mu.Unlock()
// while the session has not timed out yet,
// check whether path needs to be upgraded on an interval
de.updateSendPathIfNecessary(mono.Now())
// TODO(2022-10-20): should not be using heartbeat at all, currently just
// trying to replicate existing behaviour
time.Sleep(heartbeatInterval)
}
}()
}
// updateSendPathIfNecessary optionally upates sendFunc
// based on analysis of current conditions
func (de *endpoint) updateSendPathIfNecessary(now mono.Time) {
de.mu.Lock()
defer de.mu.Unlock()
// default happy state is: use UDP, don't use Derp
useUDP := true
useDerp := false
// if it's been longer than 6.5 seconds, also send useDerp
if now.After(de.trustBestAddrUntil) {
useDerp = true
}
derpAddr := de.derpAddr
udpAddr := de.bestAddr.AddrPort
// do final checks to make sure the addresses we want to send to are valid
if useUDP && !udpAddr.IsValid() {
de.c.logf(fmt.Sprintf("magicsock: silent-disco: invalid UDP addr found: %s", udpAddr))
return
}
if useDerp && !derpAddr.IsValid() {
de.c.logf(fmt.Sprintf("magicsock: silent-disco: invalid DERP addr found: %s", derpAddr))
return
}
if useUDP && useDerp {
de.sendFunc.Store(de.sendDerpAndUDP(udpAddr, derpAddr, de.publicKey))
} else if useUDP {
de.sendFunc.Store(de.sendSinglePath(udpAddr, de.publicKey))
} else if useDerp {
de.sendFunc.Store(de.sendSinglePath(derpAddr, de.publicKey))
}
if de.wantFullPingLocked(now) {
de.sendPingsLocked(now, true) // spray endpoints, and enqueue CMM
}
// currently does not re-implement the heartbeat calling startPingLocked
// keep-alive every 3 seconds. this is where the bulk of the new upgrade
// logic should be, I think?
}
func (de *endpoint) sendSinglePath(addr netip.AddrPort, pubKey key.NodePublic) endpointSendFunc {
return func(b []byte) error {
_, err := de.c.sendAddr(addr, pubKey, b)
return err
}
}
func (de *endpoint) sendDerpAndUDP(udpAddr netip.AddrPort, derpAddr netip.AddrPort, pubKey key.NodePublic) endpointSendFunc {
return func(b []byte) error {
_, udpErr := de.c.sendAddr(udpAddr, de.publicKey, b)
_, derpErr := de.c.sendAddr(derpAddr, de.publicKey, b)
if derpErr == nil || udpErr == nil {
// at least one packet send succeeded, good enough
return nil
}
return udpErr // error from UDP send supersedes error from Derp send
}
}