From 8668103f0621c3b0732506debcce5885960be1b6 Mon Sep 17 00:00:00 2001 From: David Crawshaw Date: Thu, 26 Aug 2021 18:19:42 -0400 Subject: [PATCH] tstest/integration/vms: print qemu console output, fix printing issues Fix a few test printing issues when tests fail. Qemu console output is super useful when something is wrong in the harness and we cannot even bring up the tests. Also useful for figuring out where all the time goes in tests. A little noisy, but not too noisy as long as you're only running one VM as part of the tests, which is my plan. Signed-off-by: David Crawshaw --- tstest/integration/vms/vm_setup_test.go | 167 ++++++++++++++--------- tstest/integration/vms/vms_steps_test.go | 48 +++---- tstest/integration/vms/vms_test.go | 54 ++++---- 3 files changed, 155 insertions(+), 114 deletions(-) diff --git a/tstest/integration/vms/vm_setup_test.go b/tstest/integration/vms/vm_setup_test.go index c7b292cee..a24d7f87a 100644 --- a/tstest/integration/vms/vm_setup_test.go +++ b/tstest/integration/vms/vm_setup_test.go @@ -8,9 +8,9 @@ package vms import ( + "bytes" "crypto/sha256" "encoding/hex" - "errors" "fmt" "io" "net" @@ -18,11 +18,10 @@ import ( "os" "os/exec" "path/filepath" + "regexp" "strconv" "strings" - "syscall" "testing" - "time" "github.com/aws/aws-sdk-go/aws" "github.com/aws/aws-sdk-go/aws/session" @@ -33,10 +32,26 @@ import ( "tailscale.com/types/logger" ) +type vmInstance struct { + d Distro + cmd *exec.Cmd + done chan struct{} + doneErr error // not written until done is closed +} + +func (vm *vmInstance) running() bool { + select { + case <-vm.done: + return false + default: + return true + } +} + // mkVM makes a KVM-accelerated virtual machine and prepares it for introduction // to the testcontrol server. The function it returns is for killing the virtual // machine when it is time for it to die. -func (h *Harness) mkVM(t *testing.T, n int, d Distro, sshKey, hostURL, tdir string) { +func (h *Harness) mkVM(t *testing.T, n int, d Distro, sshKey, hostURL, tdir string) *vmInstance { t.Helper() cdir, err := os.UserCacheDir() @@ -51,7 +66,7 @@ func (h *Harness) mkVM(t *testing.T, n int, d Distro, sshKey, hostURL, tdir stri t.Fatal(err) } - mkLayeredQcow(t, tdir, d, h.fetchDistro(t, d)) + mkLayeredQcow(t, tdir, d, fetchDistro(t, d)) mkSeed(t, d, sshKey, hostURL, tdir, port) driveArg := fmt.Sprintf("file=%s,if=virtio", filepath.Join(tdir, d.Name+".qcow2")) @@ -65,6 +80,7 @@ func (h *Harness) mkVM(t *testing.T, n int, d Distro, sshKey, hostURL, tdir stri "-drive", driveArg, "-cdrom", filepath.Join(tdir, d.Name, "seed", "seed.iso"), "-smbios", "type=1,serial=ds=nocloud;h=" + d.Name, + "-nographic", } if *useVNC { @@ -82,33 +98,58 @@ func (h *Harness) mkVM(t *testing.T, n int, d Distro, sshKey, hostURL, tdir stri t.Logf("running: qemu-system-x86_64 %s", strings.Join(args, " ")) cmd := exec.Command("qemu-system-x86_64", args...) - cmd.Stdout = logger.FuncWriter(t.Logf) - cmd.Stderr = logger.FuncWriter(t.Logf) - err = cmd.Start() - - if err != nil { + cmd.Stdout = &qemuLog{f: t.Logf} + cmd.Stderr = &qemuLog{f: t.Logf} + if err := cmd.Start(); err != nil { t.Fatal(err) } - time.Sleep(time.Second) - - // NOTE(Xe): In Unix if you do a kill with signal number 0, the kernel will do - // all of the access checking for the process (existence, permissions, etc) but - // nothing else. This is a way to ensure that qemu's process is active. - if err := cmd.Process.Signal(syscall.Signal(0)); err != nil { - t.Fatalf("qemu is not running: %v", err) + vm := &vmInstance{ + cmd: cmd, + d: d, + done: make(chan struct{}), } + go func() { + vm.doneErr = cmd.Wait() + close(vm.done) + }() t.Cleanup(func() { - err := cmd.Process.Kill() + err := vm.cmd.Process.Kill() if err != nil { - t.Errorf("can't kill %s (%d): %v", d.Name, cmd.Process.Pid, err) + t.Logf("can't kill %s (%d): %v", d.Name, cmd.Process.Pid, err) } - - cmd.Wait() + <-vm.done }) + + return vm } +type qemuLog struct { + buf []byte + f logger.Logf +} + +func (w *qemuLog) Write(p []byte) (int, error) { + if !*verboseQemu { + return len(p), nil + } + w.buf = append(w.buf, p...) + if i := bytes.LastIndexByte(w.buf, '\n'); i > 0 { + j := i + if w.buf[j-1] == '\r' { + j-- + } + buf := ansiEscCodeRE.ReplaceAll(w.buf[:j], nil) + w.buf = w.buf[i+1:] + + w.f("qemu console: %q", buf) + } + return len(p), nil +} + +var ansiEscCodeRE = regexp.MustCompile("\x1b" + `\[[0-?]*[ -/]*[@-~]`) + // fetchFromS3 fetches a distribution image from Amazon S3 or reports whether // it is unable to. It can fail to fetch from S3 if there is either no AWS // configuration (in ~/.aws/credentials) or if the `-no-s3` flag is passed. In @@ -161,7 +202,7 @@ func fetchFromS3(t *testing.T, fout *os.File, d Distro) bool { // fetchDistro fetches a distribution from the internet if it doesn't already exist locally. It // also validates the sha256 sum from a known good hash. -func (h *Harness) fetchDistro(t *testing.T, resultDistro Distro) string { +func fetchDistro(t *testing.T, resultDistro Distro) string { t.Helper() cdir, err := os.UserCacheDir() @@ -170,66 +211,60 @@ func (h *Harness) fetchDistro(t *testing.T, resultDistro Distro) string { } cdir = filepath.Join(cdir, "tailscale", "vm-test") - if strings.HasPrefix(resultDistro.Name, "nixos") { - return h.makeNixOSImage(t, resultDistro, cdir) - } - qcowPath := filepath.Join(cdir, "qcow2", resultDistro.SHA256Sum) - _, err = os.Stat(qcowPath) - if err == nil { + if _, err = os.Stat(qcowPath); err == nil { hash := checkCachedImageHash(t, resultDistro, cdir) - if hash != resultDistro.SHA256Sum { - t.Logf("hash for %s (%s) doesn't match expected %s, re-downloading", resultDistro.Name, qcowPath, resultDistro.SHA256Sum) - err = errors.New("some fake non-nil error to force a redownload") - - if err := os.Remove(qcowPath); err != nil { - t.Fatalf("can't delete wrong cached image: %v", err) - } + if hash == resultDistro.SHA256Sum { + return qcowPath + } + t.Logf("hash for %s (%s) doesn't match expected %s, re-downloading", resultDistro.Name, qcowPath, resultDistro.SHA256Sum) + if err := os.Remove(qcowPath); err != nil { + t.Fatalf("can't delete wrong cached image: %v", err) } } + t.Logf("downloading distro image %s to %s", resultDistro.URL, qcowPath) + if err := os.MkdirAll(filepath.Dir(qcowPath), 0777); err != nil { + t.Fatal(err) + } + fout, err := os.Create(qcowPath) if err != nil { - t.Logf("downloading distro image %s to %s", resultDistro.URL, qcowPath) - fout, err := os.Create(qcowPath) + t.Fatal(err) + } + + if !fetchFromS3(t, fout, resultDistro) { + resp, err := http.Get(resultDistro.URL) if err != nil { - t.Fatal(err) + t.Fatalf("can't fetch qcow2 for %s (%s): %v", resultDistro.Name, resultDistro.URL, err) } - if !fetchFromS3(t, fout, resultDistro) { - resp, err := http.Get(resultDistro.URL) - if err != nil { - t.Fatalf("can't fetch qcow2 for %s (%s): %v", resultDistro.Name, resultDistro.URL, err) - } - - if resp.StatusCode != http.StatusOK { - resp.Body.Close() - t.Fatalf("%s replied %s", resultDistro.URL, resp.Status) - } - - _, err = io.Copy(fout, resp.Body) - if err != nil { - t.Fatalf("download of %s failed: %v", resultDistro.URL, err) - } - + if resp.StatusCode != http.StatusOK { resp.Body.Close() - err = fout.Close() - if err != nil { - t.Fatalf("can't close fout: %v", err) - } + t.Fatalf("%s replied %s", resultDistro.URL, resp.Status) + } - hash := checkCachedImageHash(t, resultDistro, cdir) + if _, err = io.Copy(fout, resp.Body); err != nil { + t.Fatalf("download of %s failed: %v", resultDistro.URL, err) + } - if hash != resultDistro.SHA256Sum { - t.Fatalf("hash mismatch, want: %s, got: %s", resultDistro.SHA256Sum, hash) - } + resp.Body.Close() + err = fout.Close() + if err != nil { + t.Fatalf("can't close fout: %v", err) + } + + hash := checkCachedImageHash(t, resultDistro, cdir) + + if hash != resultDistro.SHA256Sum { + t.Fatalf("hash mismatch for %s, want: %s, got: %s", resultDistro.URL, resultDistro.SHA256Sum, hash) } } return qcowPath } -func checkCachedImageHash(t *testing.T, d Distro, cacheDir string) (gotHash string) { +func checkCachedImageHash(t *testing.T, d Distro, cacheDir string) string { t.Helper() qcowPath := filepath.Join(cacheDir, "qcow2", d.SHA256Sum) @@ -238,6 +273,7 @@ func checkCachedImageHash(t *testing.T, d Distro, cacheDir string) (gotHash stri if err != nil { t.Fatal(err) } + defer fin.Close() hasher := sha256.New() if _, err := io.Copy(hasher, fin); err != nil { @@ -248,10 +284,7 @@ func checkCachedImageHash(t *testing.T, d Distro, cacheDir string) (gotHash stri if hash != d.SHA256Sum { t.Fatalf("hash mismatch, got: %q, want: %q", hash, d.SHA256Sum) } - - gotHash = hash - - return + return hash } func (h *Harness) copyBinaries(t *testing.T, d Distro, conn *ssh.Client) { diff --git a/tstest/integration/vms/vms_steps_test.go b/tstest/integration/vms/vms_steps_test.go index a7068d188..74369796c 100644 --- a/tstest/integration/vms/vms_steps_test.go +++ b/tstest/integration/vms/vms_steps_test.go @@ -41,34 +41,35 @@ func retry(t *testing.T, fn func() error) { } func (h *Harness) testPing(t *testing.T, ipAddr netaddr.IP, cli *ssh.Client) { - var outp []byte - var err error retry(t, func() error { sess := getSession(t, cli) - - outp, err = sess.CombinedOutput(fmt.Sprintf("tailscale ping -c 1 %s", ipAddr)) - return err + cmd := fmt.Sprintf("tailscale ping --verbose %s", ipAddr) + outp, err := sess.CombinedOutput(cmd) + if err == nil && !bytes.Contains(outp, []byte("pong")) { + err = fmt.Errorf("%s: no pong", cmd) + } + if err != nil { + return fmt.Errorf("%s : %v, output: %s", cmd, err, outp) + } + t.Logf("%s", outp) + return nil }) - if !bytes.Contains(outp, []byte("pong")) { - t.Log(string(outp)) - t.Fatal("no pong") - } - retry(t, func() error { sess := getSession(t, cli) // NOTE(Xe): the ping command is inconsistent across distros. Joy. - pingCmd := fmt.Sprintf("sh -c 'ping -c 1 %[1]s || ping -6 -c 1 %[1]s || ping6 -c 1 %[1]s\n'", ipAddr) - t.Logf("running %q", pingCmd) - outp, err = sess.CombinedOutput(pingCmd) + cmd := fmt.Sprintf("sh -c 'ping -c 1 %[1]s || ping -6 -c 1 %[1]s || ping6 -c 1 %[1]s\n'", ipAddr) + t.Logf("running %q", cmd) + outp, err := sess.CombinedOutput(cmd) + if err == nil && !bytes.Contains(outp, []byte("bytes")) { + err = fmt.Errorf("%s: wanted output to contain %q, it did not", cmd, "bytes") + } + if err != nil { + err = fmt.Errorf("%s: %v, output: %s", cmd, err, outp) + } return err }) - - if !bytes.Contains(outp, []byte("bytes")) { - t.Log(string(outp)) - t.Fatalf("wanted output to contain %q, it did not", "bytes") - } } func getSession(t *testing.T, cli *ssh.Client) *ssh.Session { @@ -111,7 +112,6 @@ func (h *Harness) testOutgoingTCP(t *testing.T, ipAddr netaddr.IP, cli *ssh.Clie // sess.Stdout = logger.FuncWriter(t.Logf) // sess.Run("sysctl -a") - var outp []byte retry(t, func() error { var err error sess := getSession(t, cli) @@ -121,15 +121,15 @@ func (h *Harness) testOutgoingTCP(t *testing.T, ipAddr netaddr.IP, cli *ssh.Clie } cmd := fmt.Sprintf("curl -v %s -s -f http://%s\n", v6Arg, net.JoinHostPort(ipAddr.String(), port)) t.Logf("running: %s", cmd) - outp, err = sess.CombinedOutput(cmd) + outp, err := sess.CombinedOutput(cmd) + if msg := string(bytes.TrimSpace(outp)); err == nil && !strings.Contains(msg, sendmsg) { + err = fmt.Errorf("wanted %q, got: %q", sendmsg, msg) + } if err != nil { - t.Log(string(outp)) + err = fmt.Errorf("%v, output: %s", err, outp) } return err }) - if msg := string(bytes.TrimSpace(outp)); !strings.Contains(msg, sendmsg) { - t.Fatalf("wanted %q, got: %q", sendmsg, msg) - } <-ctx.Done() } diff --git a/tstest/integration/vms/vms_test.go b/tstest/integration/vms/vms_test.go index 720620200..94e4a4c60 100644 --- a/tstest/integration/vms/vms_test.go +++ b/tstest/integration/vms/vms_test.go @@ -30,7 +30,6 @@ import ( "golang.org/x/sync/semaphore" "inet.af/netaddr" "tailscale.com/tstest" - "tailscale.com/tstest/integration" "tailscale.com/types/logger" ) @@ -45,6 +44,7 @@ var ( vmRamLimit = flag.Int("ram-limit", 4096, "the maximum number of megabytes of ram that can be used for VMs, must be greater than or equal to 1024") useVNC = flag.Bool("use-vnc", false, "if set, display guest vms over VNC") verboseLogcatcher = flag.Bool("verbose-logcatcher", false, "if set, spew logcatcher to t.Logf (spamtastic)") + verboseQemu = flag.Bool("verbose-qemu", true, "if set, print qemu console to t.Logf") distroRex = func() *regexValue { result := ®exValue{r: regexp.MustCompile(`.*`)} flag.Var(result, "distro-regex", "The regex that matches what distros should be run") @@ -57,22 +57,18 @@ func TestDownloadImages(t *testing.T) { t.Skip("not running integration tests (need --run-vm-tests)") } - bins := integration.BuildTestBinaries(t) - for _, d := range Distros { distro := d t.Run(distro.Name, func(t *testing.T) { + t.Parallel() if !distroRex.Unwrap().MatchString(distro.Name) { t.Skipf("distro name %q doesn't match regex: %s", distro.Name, distroRex) } - if strings.HasPrefix(distro.Name, "nixos") { t.Skip("NixOS is built on the fly, no need to download it") } - t.Parallel() - - (&Harness{bins: bins}).fetchDistro(t, distro) + fetchDistro(t, distro) }) } } @@ -270,23 +266,36 @@ func testOneDistribution(t *testing.T, n int, distro Distro) { } t.Cleanup(func() { ramsem.sem.Release(int64(distro.MemoryMegs)) }) - h.mkVM(t, n, distro, h.pubKey, h.loginServerURL, dir) + vm := h.mkVM(t, n, distro, h.pubKey, h.loginServerURL, dir) var ipm ipMapping - t.Run("wait-for-start", func(t *testing.T) { - waiter := time.NewTicker(time.Second) - defer waiter.Stop() - var ok bool - for { - <-waiter.C - h.ipMu.Lock() - if ipm, ok = h.ipMap[distro.Name]; ok { - h.ipMu.Unlock() - break - } - h.ipMu.Unlock() + for i := 0; i < 100; i++ { + if vm.running() { + break } - }) + time.Sleep(100 * time.Millisecond) + } + if !vm.running() { + t.Fatal("vm not running") + } + + waiter := time.NewTicker(time.Second) + defer waiter.Stop() + for { + var ok bool + + h.ipMu.Lock() + ipm, ok = h.ipMap[distro.Name] + h.ipMu.Unlock() + + if ok { + break + } + if !vm.running() { + t.Fatal("vm not running") + } + <-waiter.C + } h.testDistro(t, distro, ipm) } @@ -377,11 +386,10 @@ func (h *Harness) testDistro(t *testing.T, d Distro, ipm ipMapping) { outp, err = sess.CombinedOutput("tailscale status") if err == nil { + t.Logf("tailscale status: %s", outp) if !strings.Contains(string(outp), "100.64.0.1") { - t.Log(string(outp)) t.Fatal("can't find tester IP") } - return } time.Sleep(dur)