health, control/controlclient, wgengine: report when router unhealthy
Updates tailscale/corp#1338 Signed-off-by: Brad Fitzpatrick <bradfitz@tailscale.com>pull/1304/head
parent
2d96215d97
commit
fd8e070d01
|
@ -71,6 +71,7 @@ tailscale.com/cmd/tailscaled dependencies: (generated by github.com/tailscale/de
|
||||||
tailscale.com/derp/derphttp from tailscale.com/net/netcheck+
|
tailscale.com/derp/derphttp from tailscale.com/net/netcheck+
|
||||||
tailscale.com/derp/derpmap from tailscale.com/cmd/tailscaled
|
tailscale.com/derp/derpmap from tailscale.com/cmd/tailscaled
|
||||||
tailscale.com/disco from tailscale.com/derp+
|
tailscale.com/disco from tailscale.com/derp+
|
||||||
|
tailscale.com/health from tailscale.com/control/controlclient+
|
||||||
tailscale.com/internal/deepprint from tailscale.com/ipn/ipnlocal+
|
tailscale.com/internal/deepprint from tailscale.com/ipn/ipnlocal+
|
||||||
tailscale.com/ipn from tailscale.com/ipn/ipnserver+
|
tailscale.com/ipn from tailscale.com/ipn/ipnserver+
|
||||||
tailscale.com/ipn/ipnlocal from tailscale.com/ipn/ipnserver+
|
tailscale.com/ipn/ipnlocal from tailscale.com/ipn/ipnserver+
|
||||||
|
|
|
@ -18,6 +18,7 @@ import (
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
"golang.org/x/oauth2"
|
"golang.org/x/oauth2"
|
||||||
|
"tailscale.com/health"
|
||||||
"tailscale.com/logtail/backoff"
|
"tailscale.com/logtail/backoff"
|
||||||
"tailscale.com/tailcfg"
|
"tailscale.com/tailcfg"
|
||||||
"tailscale.com/types/empty"
|
"tailscale.com/types/empty"
|
||||||
|
@ -116,6 +117,8 @@ type Client struct {
|
||||||
closed bool
|
closed bool
|
||||||
newMapCh chan struct{} // readable when we must restart a map request
|
newMapCh chan struct{} // readable when we must restart a map request
|
||||||
|
|
||||||
|
unregisterHealthWatch func()
|
||||||
|
|
||||||
mu sync.Mutex // mutex guards the following fields
|
mu sync.Mutex // mutex guards the following fields
|
||||||
statusFunc func(Status) // called to update Client status
|
statusFunc func(Status) // called to update Client status
|
||||||
|
|
||||||
|
@ -171,7 +174,14 @@ func NewNoStart(opts Options) (*Client, error) {
|
||||||
}
|
}
|
||||||
c.authCtx, c.authCancel = context.WithCancel(context.Background())
|
c.authCtx, c.authCancel = context.WithCancel(context.Background())
|
||||||
c.mapCtx, c.mapCancel = context.WithCancel(context.Background())
|
c.mapCtx, c.mapCancel = context.WithCancel(context.Background())
|
||||||
|
c.unregisterHealthWatch = health.RegisterWatcher(c.onHealthChange)
|
||||||
return c, nil
|
return c, nil
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
func (c *Client) onHealthChange(key string, err error) {
|
||||||
|
c.logf("controlclient: restarting map request for %q health change to new state: %v", key, err)
|
||||||
|
c.cancelMapSafely()
|
||||||
}
|
}
|
||||||
|
|
||||||
// SetPaused controls whether HTTP activity should be paused.
|
// SetPaused controls whether HTTP activity should be paused.
|
||||||
|
@ -700,6 +710,7 @@ func (c *Client) Shutdown() {
|
||||||
|
|
||||||
c.logf("client.Shutdown: inSendStatus=%v", inSendStatus)
|
c.logf("client.Shutdown: inSendStatus=%v", inSendStatus)
|
||||||
if !closed {
|
if !closed {
|
||||||
|
c.unregisterHealthWatch()
|
||||||
close(c.quit)
|
close(c.quit)
|
||||||
c.cancelAuth()
|
c.cancelAuth()
|
||||||
<-c.authDone
|
<-c.authDone
|
||||||
|
|
|
@ -33,6 +33,7 @@ import (
|
||||||
"golang.org/x/crypto/nacl/box"
|
"golang.org/x/crypto/nacl/box"
|
||||||
"golang.org/x/oauth2"
|
"golang.org/x/oauth2"
|
||||||
"inet.af/netaddr"
|
"inet.af/netaddr"
|
||||||
|
"tailscale.com/health"
|
||||||
"tailscale.com/log/logheap"
|
"tailscale.com/log/logheap"
|
||||||
"tailscale.com/net/dnscache"
|
"tailscale.com/net/dnscache"
|
||||||
"tailscale.com/net/netns"
|
"tailscale.com/net/netns"
|
||||||
|
@ -537,9 +538,16 @@ func (c *Direct) sendMapRequest(ctx context.Context, maxPolls int, cb func(*netm
|
||||||
DebugFlags: c.debugFlags,
|
DebugFlags: c.debugFlags,
|
||||||
OmitPeers: cb == nil,
|
OmitPeers: cb == nil,
|
||||||
}
|
}
|
||||||
|
var extraDebugFlags []string
|
||||||
if hostinfo != nil && ipForwardingBroken(hostinfo.RoutableIPs) {
|
if hostinfo != nil && ipForwardingBroken(hostinfo.RoutableIPs) {
|
||||||
|
extraDebugFlags = append(extraDebugFlags, "warn-ip-forwarding-off")
|
||||||
|
}
|
||||||
|
if health.RouterHealth() != nil {
|
||||||
|
extraDebugFlags = append(extraDebugFlags, "warn-router-unhealthy")
|
||||||
|
}
|
||||||
|
if len(extraDebugFlags) > 0 {
|
||||||
old := request.DebugFlags
|
old := request.DebugFlags
|
||||||
request.DebugFlags = append(old[:len(old):len(old)], "warn-ip-forwarding-off")
|
request.DebugFlags = append(old[:len(old):len(old)], extraDebugFlags...)
|
||||||
}
|
}
|
||||||
if c.newDecompressor != nil {
|
if c.newDecompressor != nil {
|
||||||
request.Compress = "zstd"
|
request.Compress = "zstd"
|
||||||
|
|
|
@ -0,0 +1,71 @@
|
||||||
|
// Copyright (c) 2021 Tailscale Inc & AUTHORS All rights reserved.
|
||||||
|
// Use of this source code is governed by a BSD-style
|
||||||
|
// license that can be found in the LICENSE file.
|
||||||
|
|
||||||
|
// Package health is a registry for other packages to report & check
|
||||||
|
// overall health status of the node.
|
||||||
|
package health
|
||||||
|
|
||||||
|
import (
|
||||||
|
"sync"
|
||||||
|
)
|
||||||
|
|
||||||
|
var (
|
||||||
|
mu sync.Mutex
|
||||||
|
m = map[string]error{} // error key => err (or nil for no error)
|
||||||
|
watchers = map[*watchHandle]func(string, error){} // opt func to run if error state changes
|
||||||
|
)
|
||||||
|
|
||||||
|
type watchHandle byte
|
||||||
|
|
||||||
|
// RegisterWatcher adds a function that will be called if an
|
||||||
|
// error changes state either to unhealthy or from unhealthy. It is
|
||||||
|
// not called on transition from unknown to healthy. It must be non-nil
|
||||||
|
// and is run in its own goroutine. The returned func unregisters it.
|
||||||
|
func RegisterWatcher(cb func(errKey string, err error)) (unregister func()) {
|
||||||
|
mu.Lock()
|
||||||
|
defer mu.Unlock()
|
||||||
|
handle := new(watchHandle)
|
||||||
|
watchers[handle] = cb
|
||||||
|
return func() {
|
||||||
|
mu.Lock()
|
||||||
|
defer mu.Unlock()
|
||||||
|
delete(watchers, handle)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// SetRouter sets the state of the wgengine/router.Router.
|
||||||
|
func SetRouterHealth(err error) { set("router", err) }
|
||||||
|
|
||||||
|
// RouterHealth returns the wgengine/router.Router error state.
|
||||||
|
func RouterHealth() error { return get("router") }
|
||||||
|
|
||||||
|
func get(key string) error {
|
||||||
|
mu.Lock()
|
||||||
|
defer mu.Unlock()
|
||||||
|
return m[key]
|
||||||
|
}
|
||||||
|
|
||||||
|
func set(key string, err error) {
|
||||||
|
mu.Lock()
|
||||||
|
defer mu.Unlock()
|
||||||
|
old, ok := m[key]
|
||||||
|
if !ok && err == nil {
|
||||||
|
// Initial happy path.
|
||||||
|
m[key] = nil
|
||||||
|
return
|
||||||
|
}
|
||||||
|
if ok && (old == nil) == (err == nil) {
|
||||||
|
// No change in overall error status (nil-vs-not), so
|
||||||
|
// don't run callbacks, but exact error might've
|
||||||
|
// changed, so note it.
|
||||||
|
if err != nil {
|
||||||
|
m[key] = err
|
||||||
|
}
|
||||||
|
return
|
||||||
|
}
|
||||||
|
m[key] = err
|
||||||
|
for _, cb := range watchers {
|
||||||
|
go cb(key, err)
|
||||||
|
}
|
||||||
|
}
|
|
@ -635,6 +635,8 @@ type MapRequest struct {
|
||||||
// Current DebugFlags values are:
|
// Current DebugFlags values are:
|
||||||
// * "warn-ip-forwarding-off": client is trying to be a subnet
|
// * "warn-ip-forwarding-off": client is trying to be a subnet
|
||||||
// router but their IP forwarding is broken.
|
// router but their IP forwarding is broken.
|
||||||
|
// * "warn-router-unhealthy": client's Router implementation is
|
||||||
|
// having problems.
|
||||||
// * "v6-overlay": IPv6 development flag to have control send
|
// * "v6-overlay": IPv6 development flag to have control send
|
||||||
// v6 node addrs
|
// v6 node addrs
|
||||||
// * "minimize-netmap": have control minimize the netmap, removing
|
// * "minimize-netmap": have control minimize the netmap, removing
|
||||||
|
|
|
@ -26,6 +26,7 @@ import (
|
||||||
"go4.org/mem"
|
"go4.org/mem"
|
||||||
"inet.af/netaddr"
|
"inet.af/netaddr"
|
||||||
"tailscale.com/control/controlclient"
|
"tailscale.com/control/controlclient"
|
||||||
|
"tailscale.com/health"
|
||||||
"tailscale.com/internal/deepprint"
|
"tailscale.com/internal/deepprint"
|
||||||
"tailscale.com/ipn/ipnstate"
|
"tailscale.com/ipn/ipnstate"
|
||||||
"tailscale.com/net/flowtrack"
|
"tailscale.com/net/flowtrack"
|
||||||
|
@ -1003,7 +1004,9 @@ func (e *userspaceEngine) Reconfig(cfg *wgcfg.Config, routerCfg *router.Config)
|
||||||
routerCfg.DNS.Nameservers = []netaddr.IP{tsaddr.TailscaleServiceIP()}
|
routerCfg.DNS.Nameservers = []netaddr.IP{tsaddr.TailscaleServiceIP()}
|
||||||
}
|
}
|
||||||
e.logf("wgengine: Reconfig: configuring router")
|
e.logf("wgengine: Reconfig: configuring router")
|
||||||
if err := e.router.Set(routerCfg); err != nil {
|
err := e.router.Set(routerCfg)
|
||||||
|
health.SetRouterHealth(err)
|
||||||
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue