portlist: refactor Poller to simplify external API

This change makes it so that Poller has only two external methods: Run and Close. The Poller becomes harder to get wrong by external packages because Close is a no-op if Run is not called and Run must be called to obtain updates and check an init-error. The only 3rd method is an exported "Init" that preserves an old behavior whereby you can pre-fetch a list of ports and check for errors _without_ starting Run's ticker. However, I'd be happy to remove that if we can afford it. Fixes #8171 Signed-off-by: Marwan Sulaiman <marwan@tailscale.com>
2023-05-24 19:37:32 -04:00 · 2023-05-24 19:37:32 -04:00 · 4b807ca95e
parent 27ea062078
commit 4b807ca95e
3 changed files with 110 additions and 81 deletions
--- a/ipn/ipnlocal/local.go
+++ b/ipn/ipnlocal/local.go
@ -293,7 +293,7 @@ func NewLocalBackend(logf logger.Logf, logID logid.PublicID, sys *tsd.System, lo
 	ctx, cancel := context.WithCancel(context.Background())
 	portpoll := new(portlist.Poller)
-	err = portpoll.Check()
+	err = portpoll.Init()
 	if err != nil {
 		logf("skipping portlist: %s", err)
 		portpoll = nil
@ -1379,8 +1379,12 @@ func (b *LocalBackend) Start(opts ipn.Options) error {
 	if b.portpoll != nil {
 		b.portpollOnce.Do(func() {
-			go b.portpoll.Run(b.ctx)
+			updates, err := b.portpoll.Run(b.ctx)
-			go b.readPoller()
+			if err != nil {
 				b.logf("error running port poller: %v", err)
 				return
 			}
 			go b.readPoller(updates)
 			// Give the poller a second to get results to
 			// prevent it from restarting our map poll
@ -1813,15 +1817,20 @@ func dnsMapsEqual(new, old *netmap.NetworkMap) bool {
 // readPoller is a goroutine that receives service lists from
 // b.portpoll and propagates them into the controlclient's HostInfo.
-func (b *LocalBackend) readPoller() {
+func (b *LocalBackend) readPoller(updates chan portlist.Update) {
 	defer b.portpoll.Close()
 	n := 0
 	for {
-		ports, ok := <-b.portpoll.Updates()
+		update, ok := <-updates
 		if !ok {
 			return
 		}
 		if update.Error != nil {
 			b.logf("error polling os ports: %v", update.Error)
 			return // preserve all behavior, though we can just continue
 		}
 		sl := []tailcfg.Service{}
-		for _, p := range ports {
+		for _, p := range update.List {
 			s := tailcfg.Service{
 				Proto:       tailcfg.ServiceProto(p.Proto),
 				Port:        p.Port,
--- a/portlist/poller.go
+++ b/portlist/poller.go
@ -9,6 +9,7 @@ package portlist
 import (
 	"context"
 	"errors"
 	"fmt"
 	"runtime"
 	"sync"
 	"time"
@ -29,9 +30,14 @@ type Poller struct {
 	// This field should only be changed before calling Run.
 	IncludeLocalhost bool
-	c chan List // unbuffered
+	// Interval sets the polling interval for probing the underlying
 	// os for port updates.
 	Interval time.Duration
 	c chan Update // unbuffered
 	initOnce sync.Once // guards init of private fields
 	initErr  error
 	// os, if non-nil, is an OS-specific implementation of the portlist getting
 	// code. When non-nil, it's responsible for getting the complete list of
@ -53,6 +59,19 @@ type Poller struct {
 	prev List // most recent data, not aliasing scratch
 }
 // Update is a container for a portlist update event.
 // When Poller polls the underlying OS for an update,
 // it either returns a new list of open ports,
 // or an error that happened in the process.
 //
 // Note that it is up to the caller to act upon the error,
 // such as closing the Poller. Otherwise, the Poller will continue
 // to try and get a list for every interval.
 type Update struct {
 	List  List
 	Error error
 }
 // osImpl is the OS-specific implementation of getting the open listening ports.
 type osImpl interface {
 	Close() error
@ -78,37 +97,54 @@ func (p *Poller) setPrev(pl List) {
 	p.prev = slices.Clone(pl)
 }
-// init sets the os implementation if exists. It also sets
+// Init is an optional method that makes sure the Poller is enabled
-// all private fields. All exported methods must call this in a
+// and the undelrying OS implementation is working properly.
-// Once, otherwise they may panic.
+//
-func (p *Poller) init() {
+// An error returned from Init is non-fatal and means
-	if debugDisablePortlist() {
+// that it's been administratively disabled or the underlying
-		return
+// OS is not implemented.
-	}
+func (p *Poller) Init() error {
-	if newOSImpl != nil {
+	p.initOnce.Do(func() {
-		p.os = newOSImpl(p.IncludeLocalhost)
+		p.initErr = p.init()
-	}
+	})
-	p.closeCtx, p.closeCtxCancel = context.WithCancel(context.Background())
+	return p.initErr
 	p.c = make(chan List)
 	p.runDone = make(chan struct{})
 }
-// Updates return the channel that receives port list updates.
+func (p *Poller) init() error {
-//
+	if debugDisablePortlist() {
-// The channel is closed when the Poller is closed.
+		return errors.New("portlist disabled by envknob")
-func (p *Poller) Updates() <-chan List {
+	}
-	p.initOnce.Do(p.init)
+	if newOSImpl == nil {
-	return p.c
+		return errUnimplemented
 	}
 	p.os = newOSImpl(p.IncludeLocalhost)
 	// Do one initial poll synchronously so we can return an error
 	// early.
 	if pl, err := p.getList(); err != nil {
 		return err
 	} else {
 		p.setPrev(pl)
 	}
 	if p.Interval == 0 {
 		p.Interval = pollInterval
 	}
 	p.closeCtx, p.closeCtxCancel = context.WithCancel(context.Background())
 	p.c = make(chan Update)
 	p.runDone = make(chan struct{})
 	return nil
 }
 // Close closes the Poller.
 // Run will return with a nil error.
 func (p *Poller) Close() error {
 	p.initOnce.Do(p.init)
 	p.closeCtxCancel()
 	if p.os == nil {
 		return nil
 	}
 	p.closeCtxCancel()
 	<-p.runDone // if caller of Close never called Run, this can hang.
 	if p.os != nil {
 		p.os.Close()
@ -117,14 +153,14 @@ func (p *Poller) Close() error {
 }
 // send sends pl to p.c and returns whether it was successfully sent.
-func (p *Poller) send(ctx context.Context, pl List) (sent bool, err error) {
+func (p *Poller) send(ctx context.Context, pl List, plErr error) (sent bool) {
 	select {
-	case p.c <- pl:
+	case p.c <- Update{pl, plErr}:
-		return true, nil
+		return true
 	case <-ctx.Done():
-		return false, ctx.Err()
+		return false
 	case <-p.closeCtx.Done():
-		return false, nil
+		return false
 	}
 }
@ -132,45 +168,26 @@ func (p *Poller) send(ctx context.Context, pl List) (sent bool, err error) {
 // is done, or the Close is called.
 //
 // Run may only be called once.
-func (p *Poller) Run(ctx context.Context) error {
+func (p *Poller) Run(ctx context.Context) (chan Update, error) {
-	tick := time.NewTicker(pollInterval)
+	if p.os == nil {
 		err := p.Init()
 		if err != nil {
 			return nil, fmt.Errorf("error initializing poller: %w", err)
 		}
 	}
 	tick := time.NewTicker(p.Interval)
 	defer tick.Stop()
-	return p.runWithTickChan(ctx, tick.C)
+	go p.runWithTickChan(ctx, tick.C)
-}
+	return p.c, nil
 // Check makes sure that the Poller is enabled and
 // the undelrying OS implementation is working properly.
 //
 // An error returned from Check is non-fatal and means
 // that it's been administratively disabled or the underlying
 // OS is not implemented.
 func (p *Poller) Check() error {
 	p.initOnce.Do(p.init)
 	if p.os == nil {
 		return errUnimplemented
 	}
 	// Do one initial poll synchronously so we can return an error
 	// early.
 	if pl, err := p.getList(); err != nil {
 		return err
 	} else {
 		p.setPrev(pl)
 	}
 	return nil
 }
 func (p *Poller) runWithTickChan(ctx context.Context, tickChan <-chan time.Time) error {
 	p.initOnce.Do(p.init)
 	if p.os == nil {
 		return errUnimplemented
 }
 func (p *Poller) runWithTickChan(ctx context.Context, tickChan <-chan time.Time) {
 	defer close(p.runDone)
 	defer close(p.c)
 	// Send out the pre-generated initial value.
-	if sent, err := p.send(ctx, p.prev); !sent {
+	if sent := p.send(ctx, p.prev, nil); !sent {
-		return err
+		return
 	}
 	for {
@ -178,28 +195,27 @@ func (p *Poller) runWithTickChan(ctx context.Context, tickChan <-chan time.Time)
 		case <-tickChan:
 			pl, err := p.getList()
 			if err != nil {
-				return err
+				if !p.send(ctx, nil, err) {
 					return
 				}
 				continue
 			}
 			if pl.equal(p.prev) {
 				continue
 			}
 			p.setPrev(pl)
-			if sent, err := p.send(ctx, p.prev); !sent {
+			if !p.send(ctx, p.prev, nil) {
-				return err
+				return
 			}
 		case <-ctx.Done():
-			return ctx.Err()
+			return
 		case <-p.closeCtx.Done():
-			return nil
+			return
 		}
 	}
 }
 func (p *Poller) getList() (List, error) {
 	if debugDisablePortlist() {
 		return nil, nil
 	}
 	p.initOnce.Do(p.init)
 	var err error
 	p.scratch, err = p.os.AppendListeningPorts(p.scratch[:0])
 	return p.scratch, err
--- a/portlist/portlist_test.go
+++ b/portlist/portlist_test.go
@ -17,6 +17,7 @@ func TestGetList(t *testing.T) {
 	tstest.ResourceCheck(t)
 	var p Poller
 	p.os = newOSImpl(false)
 	pl, err := p.getList()
 	if err != nil {
 		t.Fatal(err)
@ -38,6 +39,7 @@ func TestIgnoreLocallyBoundPorts(t *testing.T) {
 	ta := ln.Addr().(*net.TCPAddr)
 	port := ta.Port
 	var p Poller
 	p.os = newOSImpl(false)
 	pl, err := p.getList()
 	if err != nil {
 		t.Fatal(err)
@ -51,7 +53,7 @@ func TestIgnoreLocallyBoundPorts(t *testing.T) {
 func TestChangesOverTime(t *testing.T) {
 	var p Poller
-	p.IncludeLocalhost = true
+	p.os = newOSImpl(true)
 	get := func(t *testing.T) []Port {
 		t.Helper()
 		s, err := p.getList()
@ -177,7 +179,7 @@ func TestEqualLessThan(t *testing.T) {
 func TestPoller(t *testing.T) {
 	var p Poller
-	err := p.Check()
+	err := p.Init()
 	if err != nil {
 		t.Skipf("not running test: %v", err)
 	}
@ -190,10 +192,14 @@ func TestPoller(t *testing.T) {
 	go func() {
 		defer wg.Done()
-		for pl := range p.Updates() {
+		for update := range p.c {
 			if update.Error != nil {
 				t.Errorf("error polling ports: %v", err)
 				return
 			}
 			// Look at all the pl slice memory to maximize
 			// chance of race detector seeing violations.
-			for _, v := range pl {
+			for _, v := range update.List {
 				if v == (Port{}) {
 					// Force use
 					panic("empty port")
@ -209,9 +215,7 @@ func TestPoller(t *testing.T) {
 	tick := make(chan time.Time, 16)
 	go func() {
 		defer wg.Done()
-		if err := p.runWithTickChan(context.Background(), tick); err != nil {
+		p.runWithTickChan(context.Background(), tick)
 			t.Error("runWithTickChan:", err)
 		}
 	}()
 	for i := 0; i < 10; i++ {
 		ln, err := net.Listen("tcp", ":0")