prober: make DERP probes fail when stun requests fail
The client treats a single lost stun request as a failure in netcheck and will bounce regions as a result. The prober results should represent this if it is happening regularly in a region we need to know about that. The fist step is to report the data with more precision; the likely immediate next step is to adjust alerting to soften the blow. Updates tailscale/corp#11492 Signed-off-by: James Tucker <james@tailscale.com>raggi/derp-probe-stun-loss
parent
99f17a7135
commit
a7c1ee19ae
|
@ -13,6 +13,7 @@ import (
|
||||||
"log"
|
"log"
|
||||||
"net"
|
"net"
|
||||||
"net/http"
|
"net/http"
|
||||||
|
"net/netip"
|
||||||
"strconv"
|
"strconv"
|
||||||
"strings"
|
"strings"
|
||||||
"sync"
|
"sync"
|
||||||
|
@ -209,47 +210,44 @@ func derpProbeUDP(ctx context.Context, ipStr string, port int) (latency time.Dur
|
||||||
defer pc.Close()
|
defer pc.Close()
|
||||||
uc := pc.(*net.UDPConn)
|
uc := pc.(*net.UDPConn)
|
||||||
|
|
||||||
tx := stun.NewTxID()
|
ip, err := netip.ParseAddr(ipStr)
|
||||||
req := stun.Request(tx)
|
if err != nil {
|
||||||
|
return 0, err
|
||||||
|
}
|
||||||
if port == 0 {
|
if port == 0 {
|
||||||
port = 3478
|
port = 3478
|
||||||
}
|
}
|
||||||
for {
|
addr := netip.AddrPortFrom(ip, uint16(port))
|
||||||
ip := net.ParseIP(ipStr)
|
|
||||||
_, err := uc.WriteToUDP(req, &net.UDPAddr{IP: ip, Port: port})
|
// Binding requests and responses are fairly small (~40 bytes),
|
||||||
if err != nil {
|
// but in practice a STUN response can be up to the size of the
|
||||||
return 0, err
|
// path MTU, so we use a jumbo frame size buffer here.
|
||||||
|
buf := make([]byte, 9000)
|
||||||
|
|
||||||
|
tx := stun.NewTxID()
|
||||||
|
req := stun.Request(tx)
|
||||||
|
_, err = uc.WriteToUDPAddrPort(req, addr)
|
||||||
|
if err != nil {
|
||||||
|
return 0, err
|
||||||
|
}
|
||||||
|
t0 := time.Now()
|
||||||
|
n, _, err := uc.ReadFromUDP(buf)
|
||||||
|
d := time.Since(t0)
|
||||||
|
if err != nil {
|
||||||
|
if ctx.Err() != nil || err == context.DeadlineExceeded || d > time.Second {
|
||||||
|
return 0, fmt.Errorf("timeout reading from %s: (%s) %w", addr, d, err)
|
||||||
}
|
}
|
||||||
// Binding requests and responses are fairly small (~40 bytes),
|
return 0, fmt.Errorf("error reading from %s: (%s) %w", addr, d, err)
|
||||||
// but in practice a STUN response can be up to the size of the
|
}
|
||||||
// path MTU, so we use a jumbo frame size buffer here.
|
txBack, _, err := stun.ParseResponse(buf[:n])
|
||||||
buf := make([]byte, 9000)
|
if err != nil {
|
||||||
uc.SetReadDeadline(time.Now().Add(2 * time.Second))
|
return 0, fmt.Errorf("parsing STUN response from %s: %v", addr, err)
|
||||||
t0 := time.Now()
|
}
|
||||||
n, _, err := uc.ReadFromUDP(buf)
|
if txBack != tx {
|
||||||
d := time.Since(t0)
|
return 0, fmt.Errorf("read wrong tx back from %s", addr)
|
||||||
if err != nil {
|
}
|
||||||
if ctx.Err() != nil {
|
if latency == 0 || d < latency {
|
||||||
return 0, fmt.Errorf("timeout reading from %v: %v", ip, err)
|
latency = d
|
||||||
}
|
|
||||||
if d < time.Second {
|
|
||||||
return 0, fmt.Errorf("error reading from %v: %v", ip, err)
|
|
||||||
}
|
|
||||||
time.Sleep(100 * time.Millisecond)
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
txBack, _, err := stun.ParseResponse(buf[:n])
|
|
||||||
if err != nil {
|
|
||||||
return 0, fmt.Errorf("parsing STUN response from %v: %v", ip, err)
|
|
||||||
}
|
|
||||||
if txBack != tx {
|
|
||||||
return 0, fmt.Errorf("read wrong tx back from %v", ip)
|
|
||||||
}
|
|
||||||
if latency == 0 || d < latency {
|
|
||||||
latency = d
|
|
||||||
}
|
|
||||||
break
|
|
||||||
}
|
}
|
||||||
return latency, nil
|
return latency, nil
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue