From a7c1ee19ae105d54985f2056a927ea130e8ef89f Mon Sep 17 00:00:00 2001 From: James Tucker Date: Tue, 13 Jun 2023 16:28:58 -0700 Subject: [PATCH] prober: make DERP probes fail when stun requests fail The client treats a single lost stun request as a failure in netcheck and will bounce regions as a result. The prober results should represent this if it is happening regularly in a region we need to know about that. The fist step is to report the data with more precision; the likely immediate next step is to adjust alerting to soften the blow. Updates tailscale/corp#11492 Signed-off-by: James Tucker --- prober/derp.go | 72 ++++++++++++++++++++++++-------------------------- 1 file changed, 35 insertions(+), 37 deletions(-) diff --git a/prober/derp.go b/prober/derp.go index cd267e27e..e0de5f803 100644 --- a/prober/derp.go +++ b/prober/derp.go @@ -13,6 +13,7 @@ import ( "log" "net" "net/http" + "net/netip" "strconv" "strings" "sync" @@ -209,47 +210,44 @@ func derpProbeUDP(ctx context.Context, ipStr string, port int) (latency time.Dur defer pc.Close() uc := pc.(*net.UDPConn) - tx := stun.NewTxID() - req := stun.Request(tx) - + ip, err := netip.ParseAddr(ipStr) + if err != nil { + return 0, err + } if port == 0 { port = 3478 } - for { - ip := net.ParseIP(ipStr) - _, err := uc.WriteToUDP(req, &net.UDPAddr{IP: ip, Port: port}) - if err != nil { - return 0, err + addr := netip.AddrPortFrom(ip, uint16(port)) + + // Binding requests and responses are fairly small (~40 bytes), + // but in practice a STUN response can be up to the size of the + // path MTU, so we use a jumbo frame size buffer here. + buf := make([]byte, 9000) + + tx := stun.NewTxID() + req := stun.Request(tx) + _, err = uc.WriteToUDPAddrPort(req, addr) + if err != nil { + return 0, err + } + t0 := time.Now() + n, _, err := uc.ReadFromUDP(buf) + d := time.Since(t0) + if err != nil { + if ctx.Err() != nil || err == context.DeadlineExceeded || d > time.Second { + return 0, fmt.Errorf("timeout reading from %s: (%s) %w", addr, d, err) } - // Binding requests and responses are fairly small (~40 bytes), - // but in practice a STUN response can be up to the size of the - // path MTU, so we use a jumbo frame size buffer here. - buf := make([]byte, 9000) - uc.SetReadDeadline(time.Now().Add(2 * time.Second)) - t0 := time.Now() - n, _, err := uc.ReadFromUDP(buf) - d := time.Since(t0) - if err != nil { - if ctx.Err() != nil { - return 0, fmt.Errorf("timeout reading from %v: %v", ip, err) - } - if d < time.Second { - return 0, fmt.Errorf("error reading from %v: %v", ip, err) - } - time.Sleep(100 * time.Millisecond) - continue - } - txBack, _, err := stun.ParseResponse(buf[:n]) - if err != nil { - return 0, fmt.Errorf("parsing STUN response from %v: %v", ip, err) - } - if txBack != tx { - return 0, fmt.Errorf("read wrong tx back from %v", ip) - } - if latency == 0 || d < latency { - latency = d - } - break + return 0, fmt.Errorf("error reading from %s: (%s) %w", addr, d, err) + } + txBack, _, err := stun.ParseResponse(buf[:n]) + if err != nil { + return 0, fmt.Errorf("parsing STUN response from %s: %v", addr, err) + } + if txBack != tx { + return 0, fmt.Errorf("read wrong tx back from %s", addr) + } + if latency == 0 || d < latency { + latency = d } return latency, nil }