From c41c4f9fb551e03f1067cc02fdcac9f0d5a810b3 Mon Sep 17 00:00:00 2001 From: Denton Gentry Date: Sat, 18 Mar 2023 12:27:21 -0700 Subject: [PATCH] debugging azure Signed-off-by: Denton Gentry --- control/controlclient/auto.go | 8 +++---- control/controlclient/direct.go | 24 ++++++++++++++++++-- ipn/ipnlocal/local.go | 39 ++++++++++++++++++++++++++++----- net/interfaces/interfaces.go | 4 ++-- 4 files changed, 62 insertions(+), 13 deletions(-) diff --git a/control/controlclient/auto.go b/control/controlclient/auto.go index ddd0d4f2b..05250c915 100644 --- a/control/controlclient/auto.go +++ b/control/controlclient/auto.go @@ -475,7 +475,7 @@ func (c *Auto) mapRoutine() { } continue } - c.logf("[v1] mapRoutine: %s", c.state) + c.logf("mapRoutine: %s", c.state) loggedIn := c.loggedIn ctx := c.mapCtx c.mu.Unlock() @@ -488,7 +488,7 @@ func (c *Auto) mapRoutine() { } report := func(err error, msg string) { - c.logf("[v1] %s: %v", msg, err) + c.logf("%s: %v", msg, err) err = fmt.Errorf("%s: %w", msg, err) // don't send status updates for context errors, // since context cancelation is always on purpose. @@ -506,9 +506,9 @@ func (c *Auto) mapRoutine() { select { case <-ctx.Done(): - c.logf("[v1] mapRoutine: context done.") + c.logf("mapRoutine: context done.") case <-c.newMapCh: - c.logf("[v1] mapRoutine: new map needed while idle.") + c.logf("mapRoutine: new map needed while idle.") } } else { // Be sure this is false when we're not inside diff --git a/control/controlclient/direct.go b/control/controlclient/direct.go index 35fa1d8a0..a2d643cba 100644 --- a/control/controlclient/direct.go +++ b/control/controlclient/direct.go @@ -767,11 +767,13 @@ func (c *Direct) SetEndpoints(endpoints []tailcfg.Endpoint) (changed bool) { // PollNetMap makes a /map request to download the network map, calling cb with // each new netmap. func (c *Direct) PollNetMap(ctx context.Context, cb func(*netmap.NetworkMap)) error { + c.logf("PollNetMap") return c.sendMapRequest(ctx, -1, false, cb) } // FetchNetMap fetches the netmap once. func (c *Direct) FetchNetMap(ctx context.Context) (*netmap.NetworkMap, error) { + c.logf("FetchNetMap") var ret *netmap.NetworkMap err := c.sendMapRequest(ctx, 1, false, func(nm *netmap.NetworkMap) { ret = nm @@ -786,6 +788,7 @@ func (c *Direct) FetchNetMap(ctx context.Context) (*netmap.NetworkMap, error) { // but does not fetch anything. It returns an error if the server did not return a // successful 200 OK response. func (c *Direct) SendLiteMapUpdate(ctx context.Context) error { + c.logf("SendLiteMapUpdate") return c.sendMapRequest(ctx, 1, false, nil) } @@ -796,6 +799,7 @@ const pollTimeout = 120 * time.Second // cb nil means to omit peers. func (c *Direct) sendMapRequest(ctx context.Context, maxPolls int, readOnly bool, cb func(*netmap.NetworkMap)) error { + c.logf("sendMapRequest") metricMapRequests.Add(1) metricMapRequestsActive.Add(1) defer metricMapRequestsActive.Add(-1) @@ -822,21 +826,25 @@ func (c *Direct) sendMapRequest(ctx context.Context, maxPolls int, readOnly bool machinePrivKey, err := c.getMachinePrivKey() if err != nil { + c.logf("sendMapRequest: machinePrivKey failed") return fmt.Errorf("getMachinePrivKey: %w", err) } if machinePrivKey.IsZero() { + c.logf("sendMapRequest: machinePrivKey isZero") return errors.New("getMachinePrivKey returned zero key") } if persist.PrivateNodeKey().IsZero() { + c.logf("sendMapRequest: privateNodeKey isZero") return errors.New("privateNodeKey is zero") } if backendLogID == "" { + c.logf("sendMapRequest: BackendLogID missing") return errors.New("hostinfo: BackendLogID missing") } allowStream := maxPolls != 1 - c.logf("[v1] PollNetMap: stream=%v ep=%v", allowStream, epStrs) + c.logf("PollNetMap: stream=%v ep=%v", allowStream, epStrs) vlogf := logger.Discard if DevKnob.DumpNetMaps() { @@ -891,7 +899,8 @@ func (c *Direct) sendMapRequest(ctx context.Context, maxPolls int, readOnly bool } bodyData, err := encode(request, serverKey, serverNoiseKey, machinePrivKey) - if err != nil { + if err != nil { + c.logf("PollNetMap: encode failed") vlogf("netmap: encode: %v", err) return err } @@ -919,11 +928,13 @@ func (c *Direct) sendMapRequest(ctx context.Context, maxPolls int, readOnly bool req, err := http.NewRequestWithContext(ctx, "POST", url, bytes.NewReader(bodyData)) if err != nil { + c.logf("PollNetMap: NewRequestWithContext failed") return err } res, err := httpc.Do(req) if err != nil { + c.logf("PollNetMap: httpc.Do failed") vlogf("netmap: Do: %v", err) return err } @@ -931,6 +942,7 @@ func (c *Direct) sendMapRequest(ctx context.Context, maxPolls int, readOnly bool if res.StatusCode != 200 { msg, _ := io.ReadAll(res.Body) res.Body.Close() + c.logf("PollNetMap: Status != 200") return fmt.Errorf("initial fetch failed %d: %.200s", res.StatusCode, strings.TrimSpace(string(msg))) } @@ -940,6 +952,7 @@ func (c *Direct) sendMapRequest(ctx context.Context, maxPolls int, readOnly bool if cb == nil { io.Copy(io.Discard, res.Body) + c.logf("PollNetMap: cb == nil") return nil } @@ -952,6 +965,7 @@ func (c *Direct) sendMapRequest(ctx context.Context, maxPolls int, readOnly bool select { case <-pollDone: vlogf("netmap: ending timeout goroutine") + c.logf("netmap: ending timeout goroutine") return case <-timeout.C: c.logf("map response long-poll timed out!") @@ -963,10 +977,12 @@ func (c *Direct) sendMapRequest(ctx context.Context, maxPolls int, readOnly bool case <-timeout.C: case <-pollDone: vlogf("netmap: ending timeout goroutine") + c.logf("netmap: ending timeout goroutine") return } } vlogf("netmap: reset timeout timer") + c.logf("netmap: reset timeout timer") timeout.Reset(pollTimeout) } } @@ -989,6 +1005,7 @@ func (c *Direct) sendMapRequest(ctx context.Context, maxPolls int, readOnly bool vlogf("netmap: starting size read after %v (poll %v)", time.Since(t0).Round(time.Millisecond), i) var siz [4]byte if _, err := io.ReadFull(res.Body, siz[:]); err != nil { + c.logf("PollNetMap: io.ReadFull 4 bytes failed") vlogf("netmap: size read error after %v: %v", time.Since(t0).Round(time.Millisecond), err) return err } @@ -996,6 +1013,7 @@ func (c *Direct) sendMapRequest(ctx context.Context, maxPolls int, readOnly bool vlogf("netmap: read size %v after %v", size, time.Since(t0).Round(time.Millisecond)) msg = append(msg[:0], make([]byte, size)...) if _, err := io.ReadFull(res.Body, msg); err != nil { + c.logf("PollNetMap: io.ReadFull all bytes failed") vlogf("netmap: body read error: %v", err) return err } @@ -1003,6 +1021,7 @@ func (c *Direct) sendMapRequest(ctx context.Context, maxPolls int, readOnly bool var resp tailcfg.MapResponse if err := c.decodeMsg(msg, &resp, machinePrivKey); err != nil { + c.logf("PollNetMap: decode error") vlogf("netmap: decode error: %v") return err } @@ -1048,6 +1067,7 @@ func (c *Direct) sendMapRequest(ctx context.Context, maxPolls int, readOnly bool c.logf("netmap: [unexpected] new dial plan; nowhere to store it") } } + c.logf("PollNetMap: past dial plan") select { case timeoutReset <- struct{}{}: diff --git a/ipn/ipnlocal/local.go b/ipn/ipnlocal/local.go index dde7a394d..94eae7abb 100644 --- a/ipn/ipnlocal/local.go +++ b/ipn/ipnlocal/local.go @@ -312,7 +312,7 @@ func NewLocalBackend(logf logger.Logf, logid string, store ipn.StateStore, diale if version.IsUnstableBuild() { b.sockstatLogger, err = sockstatlog.NewLogger(logpolicy.LogsDir(logf), logf) if err != nil { - log.Printf("error setting up sockstat logger: %v", err) + b.logf("error setting up sockstat logger: %v", err) } } @@ -498,6 +498,7 @@ func (b *LocalBackend) linkChange(major bool, ifst *interfaces.State) { case ipn.NoState, ipn.Stopped: // Do nothing. default: + b.logf("linkChange: calling b.authReconfig") go b.authReconfig() } } @@ -839,6 +840,7 @@ func (b *LocalBackend) SetDecompressor(fn func() (controlclient.Decompressor, er // setClientStatus is the callback invoked by the control client whenever it posts a new status. // Among other things, this is where we update the netmap, packet filters, DNS and DERP maps. func (b *LocalBackend) setClientStatus(st controlclient.Status) { + b.logf("setClientStatus called") // The following do not depend on any data for which we need to lock b. if st.Err != nil { // TODO(crawshaw): display in the UI. @@ -852,6 +854,7 @@ func (b *LocalBackend) setClientStatus(st controlclient.Status) { s := uerr.UserVisibleError() b.send(ipn.Notify{ErrMessage: &s}) } + b.logf("setClientStatus status send failed, returning") return } @@ -887,6 +890,7 @@ func (b *LocalBackend) setClientStatus(st controlclient.Status) { // saved call (e.g. if we race stopping this // timer). if b.numClientStatusCalls.Load() != currCall { + b.logf("setClientStatus b.numClientStatusCalls.Load, returning") return } @@ -920,8 +924,11 @@ func (b *LocalBackend) setClientStatus(st controlclient.Status) { if st.LoginFinished != nil && wasBlocked { // Auth completed, unblock the engine b.blockEngineUpdates(false) + b.logf("ipnlocal: blockEngineUpdates returned") b.authReconfig() + b.logf("ipnlocal: b.authReconfig returned") b.send(ipn.Notify{LoginFinished: &empty.Message{}}) + b.logf("ipnlocal: ipn.Notify returned") } // Lock b once and do only the things that require locking. @@ -2764,7 +2771,9 @@ func (b *LocalBackend) setPrefsLockedOnEntry(caller string, newp *ipn.Prefs) ipn if oldp.WantRunning() != newp.WantRunning { b.stateMachine() } else { + b.logf("setPrefsLockedOnEntry: calling b.authReconfig") b.authReconfig() + b.logf("setPrefsLockedOnEntry: b.authReconfig returned") } b.send(ipn.Notify{Prefs: &prefs}) @@ -2893,24 +2902,28 @@ func (b *LocalBackend) blockEngineUpdates(block bool) { // updates are not currently blocked, based on the cached netmap and // user prefs. func (b *LocalBackend) authReconfig() { + b.logf("authReconfig: starting.") b.mu.Lock() + b.logf("authReconfig: Lock acquired.") blocked := b.blocked prefs := b.pm.CurrentPrefs() + b.logf("authReconfig: CurrentPrefs returned.") nm := b.netMap hasPAC := b.prevIfState.HasPAC() + b.logf("authReconfig: HasPAC returned.") disableSubnetsIfPAC := nm != nil && nm.Debug != nil && nm.Debug.DisableSubnetsIfPAC.EqualBool(true) b.mu.Unlock() if blocked { - b.logf("[v1] authReconfig: blocked, skipping.") + b.logf("authReconfig: blocked, skipping.") return } if nm == nil { - b.logf("[v1] authReconfig: netmap not yet valid. Skipping.") + b.logf("authReconfig: netmap not yet valid. Skipping.") return } if !prefs.WantRunning() { - b.logf("[v1] authReconfig: skipping because !WantRunning.") + b.logf("authReconfig: skipping because !WantRunning.") return } @@ -2927,6 +2940,7 @@ func (b *LocalBackend) authReconfig() { flags &^= netmap.AllowSubnetRoutes } } + b.logf("authReconfig: bitfields finished.") // Keep the dialer updated about whether we're supposed to use // an exit node's DNS server (so SOCKS5/HTTP outgoing dials @@ -2936,24 +2950,31 @@ func (b *LocalBackend) authReconfig() { } else { b.dialer.SetExitDNSDoH("") } + b.logf("authReconfig: exitNodeCanProxyDNS returned.") cfg, err := nmcfg.WGCfg(nm, b.logf, flags, prefs.ExitNodeID()) if err != nil { b.logf("wgcfg: %v", err) return } + b.logf("authReconfig: WGCfg exit node returned.") oneCGNATRoute := shouldUseOneCGNATRoute(nm, b.logf, version.OS()) + b.logf("authReconfig: shouldUseOneCGNATRoute.") rcfg := b.routerConfig(cfg, prefs, oneCGNATRoute) + b.logf("authReconfig: routerConfig oneCGNATRoute.") dcfg := dnsConfigForNetmap(nm, prefs, b.logf, version.OS()) + b.logf("authReconfig: dnsConfigForNetmap.") err = b.e.Reconfig(cfg, rcfg, dcfg, nm.Debug) if err == wgengine.ErrNoChanges { + b.logf("authReconfig: wgengine.ErrNoChanges.") return } - b.logf("[v1] authReconfig: ra=%v dns=%v 0x%02x: %v", prefs.RouteAll(), prefs.CorpDNS(), flags, err) + b.logf("authReconfig: ra=%v dns=%v 0x%02x: %v", prefs.RouteAll(), prefs.CorpDNS(), flags, err) b.initPeerAPIListener() + b.logf("authReconfig: b.initPeerAPIListener.") } // shouldUseOneCGNATRoute reports whether we should prefer to make one big @@ -3572,9 +3593,12 @@ func (b *LocalBackend) enterStateLockedOnEntry(newState ipn.State) { systemd.Status("Stopped; run 'tailscale up' to log in") } case ipn.Starting, ipn.NeedsMachineAuth: + b.logf("enterStateLockedOnEntry: ipn.Starting, ipn.NeedsMachineAuth") b.authReconfig() + b.logf("enterStateLockedOnEntry: authReconfig returned") // Needed so that UpdateEndpoints can run b.e.RequestStatus() + b.logf("enterStateLockedOnEntry: b.e.RequestStatus returned") case ipn.Running: var addrs []string for _, addr := range netMap.Addresses { @@ -3871,6 +3895,7 @@ func (b *LocalBackend) updatePersistFromNetMapLocked(nm *netmap.NetworkMap, pref } func (b *LocalBackend) setNetMapLocked(nm *netmap.NetworkMap) { + b.logf("setNetMapLocked") b.dialer.SetNetMap(nm) var login string if nm != nil { @@ -4520,6 +4545,7 @@ type keyProvingNoiseRoundTripper struct { } func (n keyProvingNoiseRoundTripper) RoundTrip(req *http.Request) (*http.Response, error) { + log.Println("RoundTrip") b := n.b var priv key.NodePrivate @@ -4820,6 +4846,7 @@ func (b *LocalBackend) initTKALocked() error { // resetForProfileChangeLockedOnEntry resets the backend for a profile change. func (b *LocalBackend) resetForProfileChangeLockedOnEntry() error { + b.logf("resetForProfileChangeLockedOnEntry") b.setNetMapLocked(nil) // Reset netmap. // Reset the NetworkMap in the engine b.e.SetNetworkMap(new(netmap.NetworkMap)) @@ -4878,6 +4905,7 @@ func (b *LocalBackend) ListProfiles() []ipn.LoginProfile { // backend is left with a new profile, ready for StartLoginInterative to be // called to register it as new node. func (b *LocalBackend) ResetAuth() error { + b.logf("ResetAuth") b.mu.Lock() b.resetControlClientLockedAsync() if err := b.clearMachineKeyLocked(); err != nil { @@ -4934,6 +4962,7 @@ func (b *LocalBackend) StreamDebugCapture(ctx context.Context, w io.Writer) erro } func (b *LocalBackend) GetPeerEndpointChanges(ctx context.Context, ip netip.Addr) ([]magicsock.EndpointChange, error) { + b.logf("GetPeerEndpointChanges") pip, ok := b.e.PeerForIP(ip) if !ok { return nil, fmt.Errorf("no matching peer") diff --git a/net/interfaces/interfaces.go b/net/interfaces/interfaces.go index f8f815bd8..f27347cfb 100644 --- a/net/interfaces/interfaces.go +++ b/net/interfaces/interfaces.go @@ -155,7 +155,7 @@ func LocalAddresses() (regular, loopback []netip.Addr, err error) { // addresses we otherwise wouldn't, like: // + 169.254.x.x (AWS Lambda uses NAT with these) // + IPv6 ULA (Google Cloud Run uses these with address translation) - if hostinfo.GetEnvType() == hostinfo.AWSLambda { + if hostinfo.GetEnvType() == hostinfo.AWSLambda || hostinfo.GetEnvType() == hostinfo.AzureAppService { regular4 = linklocal4 } regular6 = ula6 @@ -645,7 +645,7 @@ func isUsableV4(ip netip.Addr) bool { return false } if ip.IsLinkLocalUnicast() { - return hostinfo.GetEnvType() == hostinfo.AWSLambda + return (hostinfo.GetEnvType() == hostinfo.AWSLambda || hostinfo.GetEnvType() == hostinfo.AzureAppService) } return true }