diff --git a/wgengine/magicsock/magicsock.go b/wgengine/magicsock/magicsock.go index 08cb38219..478a431e5 100644 --- a/wgengine/magicsock/magicsock.go +++ b/wgengine/magicsock/magicsock.go @@ -55,6 +55,44 @@ import ( "tailscale.com/version" ) +// Various debugging and experimental tweakables, set by environment +// variable. +var ( + // logPacketDests prints the known addresses for a peer every time + // they change, in the legacy (non-discovery) endpoint code only. + logPacketDests, _ = strconv.ParseBool(os.Getenv("TS_DEBUG_LOG_PACKET_DESTS")) + // debugDisco prints verbose logs of active discovery events as + // they happen. + debugDisco, _ = strconv.ParseBool(os.Getenv("TS_DEBUG_DISCO")) + // debugOmitLocalAddresses removes all local interface addresses + // from magicsock's discovered local endpoints. Used in some tests. + debugOmitLocalAddresses, _ = strconv.ParseBool(os.Getenv("TS_DEBUG_OMIT_LOCAL_ADDRS")) + // debugUseDerpRoute temporarily (2020-03-22) controls whether DERP + // reverse routing is enabled (Issue 150). It will become always true + // later. + debugUseDerpRoute, _ = strconv.ParseBool(os.Getenv("TS_DEBUG_ENABLE_DERP_ROUTE")) + // logDerpVerbose logs all received DERP packets, including their + // full payload. + logDerpVerbose, _ = strconv.ParseBool(os.Getenv("TS_DEBUG_DERP")) + // debugReSTUNStopOnIdle unconditionally enables the "shut down + // STUN if magicsock is idle" behavior that normally only triggers + // on mobile devices, lowers the shutdown interval, and logs more + // verbosely about idle measurements. + debugReSTUNStopOnIdle, _ = strconv.ParseBool(os.Getenv("TS_DEBUG_RESTUN_STOP_ON_IDLE")) +) + +// inTest binds magicsock to 127.0.0.1 instead of its usual 0.0.0.0, +// to avoid macOS prompting for firewall permissions during +// interactive tests. +// +// Unlike the other debug tweakables above, this one needs to be +// checked every time at runtime, because tests set this after program +// startup. +func inTest() bool { + inTest, _ := strconv.ParseBool(os.Getenv("IN_TS_TEST")) + return inTest +} + // A Conn routes UDP packets and actively manages a list of its endpoints. // It implements wireguard/conn.Bind. type Conn struct { @@ -602,8 +640,6 @@ func (c *Conn) goDerpConnect(node int) { go c.derpWriteChanOfAddr(netaddr.IPPort{IP: derpMagicIPAddr, Port: uint16(node)}, key.Public{}) } -var debugOmitLocalAddresses, _ = strconv.ParseBool(os.Getenv("TS_DEBUG_OMIT_LOCAL_ADDRS")) - // determineEndpoints returns the machine's endpoint addresses. It // does a STUN lookup (via netcheck) to determine its public address. // @@ -705,10 +741,6 @@ func shouldSprayPacket(b []byte) bool { return false } -var logPacketDests, _ = strconv.ParseBool(os.Getenv("TS_DEBUG_LOG_PACKET_DESTS")) - -var debugDisco, _ = strconv.ParseBool(os.Getenv("TS_DEBUG_DISCO")) - const sprayPeriod = 3 * time.Second // appendDests appends to dsts the destinations that b should be @@ -933,11 +965,6 @@ func (c *Conn) sendAddr(addr netaddr.IPPort, pubKey key.Public, b []byte) (sent // TODO: this is currently arbitrary. Figure out something better? const bufferedDerpWritesBeforeDrop = 32 -// debugUseDerpRoute temporarily (2020-03-22) controls whether DERP -// reverse routing is enabled (Issue 150). It will become always true -// later. -var debugUseDerpRoute, _ = strconv.ParseBool(os.Getenv("TS_DEBUG_ENABLE_DERP_ROUTE")) - // derpWriteChanOfAddr returns a DERP client for fake UDP addresses that // represent DERP servers, creating them as necessary. For real UDP // addresses, it returns nil. @@ -1113,8 +1140,6 @@ type derpReadResult struct { copyBuf func(dst []byte) int } -var logDerpVerbose, _ = strconv.ParseBool(os.Getenv("DEBUG_DERP_VERBOSE")) - // runDerpReader runs in a goroutine for the life of a DERP // connection, handling received packets. func (c *Conn) runDerpReader(ctx context.Context, derpFakeAddr netaddr.IPPort, dc *derphttp.Client, wg *syncs.WaitGroupChan, startGate <-chan struct{}) { @@ -2070,8 +2095,6 @@ func (c *Conn) Close() error { return err } -var debugReSTUNStopOnIdle, _ = strconv.ParseBool(os.Getenv("TS_DEBUG_RESTUN_STOP_ON_IDLE")) - func maxIdleBeforeSTUNShutdown() time.Duration { if debugReSTUNStopOnIdle { return time.Minute @@ -2197,7 +2220,7 @@ func (c *Conn) listenPacket(ctx context.Context, network, addr string) (net.Pack func (c *Conn) bind1(ruc **RebindingUDPConn, which string) error { host := "" - if v, _ := strconv.ParseBool(os.Getenv("IN_TS_TEST")); v { + if inTest() { host = "127.0.0.1" } var pc net.PacketConn @@ -2227,7 +2250,7 @@ func (c *Conn) bind1(ruc **RebindingUDPConn, which string) error { // It should be followed by a call to ReSTUN. func (c *Conn) Rebind() { host := "" - if v, _ := strconv.ParseBool(os.Getenv("IN_TS_TEST")); v { + if inTest() { host = "127.0.0.1" } listenCtx := context.Background() // unused without DNS name to resolve diff --git a/wgengine/magicsock/magicsock_test.go b/wgengine/magicsock/magicsock_test.go index 82945d978..068c2c855 100644 --- a/wgengine/magicsock/magicsock_test.go +++ b/wgengine/magicsock/magicsock_test.go @@ -6,6 +6,7 @@ package magicsock import ( "bytes" + "context" crand "crypto/rand" "crypto/tls" "encoding/binary" @@ -26,9 +27,11 @@ import ( "github.com/tailscale/wireguard-go/wgcfg" "golang.org/x/crypto/nacl/box" "inet.af/netaddr" + "tailscale.com/control/controlclient" "tailscale.com/derp" "tailscale.com/derp/derphttp" "tailscale.com/derp/derpmap" + "tailscale.com/ipn/ipnstate" "tailscale.com/net/stun/stuntest" "tailscale.com/tailcfg" "tailscale.com/tstest" @@ -120,6 +123,7 @@ type magicStack struct { tun *tuntest.ChannelTUN // tuntap device to send/receive packets tsTun *tstun.TUN // wrapped tun that implements filtering and wgengine hooks dev *device.Device // the wireguard-go Device that connects the previous things + tsIP chan netaddr.IP // buffered, guaranteed to yield at least 1 value } // newMagicStack builds and initializes an idle magicsock and @@ -182,6 +186,7 @@ func newMagicStack(t *testing.T, logf logger.Logf, l nettype.PacketListener, der tun: tun, tsTun: tsTun, dev: dev, + tsIP: make(chan netaddr.IP, 1), } } @@ -190,6 +195,139 @@ func (s *magicStack) Close() { s.conn.Close() } +func (s *magicStack) Status() *ipnstate.Status { + var sb ipnstate.StatusBuilder + s.conn.UpdateStatus(&sb) + return sb.Status() +} + +// AwaitIP waits for magicStack to receive a Tailscale IP address on +// tsIP, and returns the IP. It's intended for use with magicStacks +// that have been meshed with meshStacks, to wait for configs to have +// propagated enough that everyone has a Tailscale IP that should +// work. +func (s *magicStack) AwaitIP() netaddr.IP { + select { + case ip := <-s.tsIP: + return ip + case <-time.After(2 * time.Second): + panic("timed out waiting for magicStack to get an IP") + } +} + +func (s *magicStack) Ping(src, dst netaddr.IP) { + pkt := tuntest.Ping(dst.IPAddr().IP, src.IPAddr().IP) + s.tun.Outbound <- pkt +} + +func (s *magicStack) AwaitPacket(timeout time.Duration) bool { + select { + case <-s.tun.Inbound: + return true + case <-time.After(timeout): + return false + } +} + +// meshStacks monitors epCh on all given ms, and plumbs network maps +// and WireGuard configs into everyone to form a full mesh that has up +// to date endpoint info. Think of it as an extremely stripped down +// and purpose-built Tailscale control plane. +// +// meshStacks only supports disco connections, not legacy logic. +func meshStacks(logf logger.Logf, ms []*magicStack) (cleanup func()) { + ctx, cancel := context.WithCancel(context.Background()) + + // Serialize all reconfigurations globally, just to keep things + // simpler. + var ( + mu sync.Mutex + eps = make([][]string, len(ms)) + ) + + buildNetmapLocked := func(myIdx int) *controlclient.NetworkMap { + me := ms[myIdx] + nm := &controlclient.NetworkMap{ + PrivateKey: me.privateKey, + NodeKey: tailcfg.NodeKey(me.privateKey.Public()), + Addresses: []wgcfg.CIDR{{IP: wgcfg.IPv4(1, 0, 0, byte(myIdx+1)), Mask: 32}}, + } + for i, peer := range ms { + if i == myIdx { + continue + } + addrs := []wgcfg.CIDR{{IP: wgcfg.IPv4(1, 0, 0, byte(i+1)), Mask: 32}} + peer := &tailcfg.Node{ + ID: tailcfg.NodeID(i + 1), + Name: fmt.Sprintf("node%d", i+1), + Key: tailcfg.NodeKey(peer.privateKey.Public()), + DiscoKey: peer.conn.DiscoPublicKey(), + Addresses: addrs, + AllowedIPs: addrs, + Endpoints: eps[i], + DERP: "127.3.3.40:1", + } + nm.Peers = append(nm.Peers, peer) + } + + return nm + } + + updateEps := func(idx int, newEps []string) { + mu.Lock() + defer mu.Unlock() + + eps[idx] = newEps + + for i, m := range ms { + netmap := buildNetmapLocked(i) + nip, _ := netaddr.FromStdIP(netmap.Addresses[0].IP.IP()) + select { + case m.tsIP <- nip: + default: + } + m.conn.SetNetworkMap(netmap) + peerSet := make(map[key.Public]struct{}, len(netmap.Peers)) + for _, peer := range netmap.Peers { + peerSet[key.Public(peer.Key)] = struct{}{} + } + m.conn.UpdatePeers(peerSet) + wg, err := netmap.WGCfg(logf, controlclient.AllowSingleHosts, nil) + if err != nil { + // We're too far from the *testing.T to be graceful, + // blow up. Shouldn't happen anyway. + panic(fmt.Sprintf("failed to construct wgcfg from netmap: %v", err)) + } + if err := m.dev.Reconfig(wg); err != nil { + panic(fmt.Sprintf("device reconfig failed: %v", err)) + } + } + } + + var wg sync.WaitGroup + wg.Add(len(ms)) + for i := range ms { + go func(myIdx int) { + defer wg.Done() + + for { + select { + case <-ctx.Done(): + return + case eps := <-ms[myIdx].epCh: + logf("conn%d endpoints update", myIdx+1) + updateEps(myIdx, eps) + } + } + }(i) + } + + return func() { + cancel() + wg.Wait() + } +} + func TestNewConn(t *testing.T) { tstest.PanicOnLog() rc := tstest.NewResourceCheck() @@ -446,7 +584,8 @@ func TestTwoDevicePing(t *testing.T) { testTwoDevicePing(t, n) }) t.Run("natlab", func(t *testing.T) { - t.Run("simple internet", func(t *testing.T) { + t.Run("simple_internet", func(t *testing.T) { + t.Parallel() mstun := &natlab.Machine{Name: "stun"} m1 := &natlab.Machine{Name: "m1"} m2 := &natlab.Machine{Name: "m2"} @@ -463,10 +602,10 @@ func TestTwoDevicePing(t *testing.T) { stun: mstun, stunIP: sif.V4(), } - testTwoDevicePing(t, n) + testActiveDiscovery(t, n) }) - t.Run("facing firewalls", func(t *testing.T) { + t.Run("facing_firewalls", func(t *testing.T) { mstun := &natlab.Machine{Name: "stun"} m1 := &natlab.Machine{ Name: "m1", @@ -505,6 +644,42 @@ type devices struct { stunIP netaddr.IP } +func testActiveDiscovery(t *testing.T, d *devices) { + tstest.PanicOnLog() + rc := tstest.NewResourceCheck() + defer rc.Assert(t) + + tlogf, setT := makeNestable(t) + setT(t) + + start := time.Now() + logf := func(msg string, args ...interface{}) { + msg = fmt.Sprintf("%s: %s", time.Since(start), msg) + tlogf(msg, args...) + } + + derpMap, cleanup := runDERPAndStun(t, logf, d.stun, d.stunIP) + defer cleanup() + + m1 := newMagicStack(t, logger.WithPrefix(logf, "conn1: "), d.m1, derpMap) + defer m1.Close() + m2 := newMagicStack(t, logger.WithPrefix(logf, "conn2: "), d.m2, derpMap) + defer m2.Close() + + // Interconnect the two magicsocks, tell them about each other. + cleanup = meshStacks(logf, []*magicStack{m1, m2}) + defer cleanup() + + m1IP := m1.AwaitIP() + m2IP := m2.AwaitIP() + logf("IPs: %s %s", m1IP, m2IP) + + m1.Ping(m1IP, m2IP) + if !m2.AwaitPacket(10 * time.Second) { + t.Errorf("timed out waiting for ping") + } +} + func testTwoDevicePing(t *testing.T, d *devices) { tstest.PanicOnLog() rc := tstest.NewResourceCheck()