fix(scaletest): fix flake in Test_Runner/Cleanup (#10252)

* fix(scaletest/createworkspaces): address flake in Test_Runner/CleanupPendingBuild * fix(scaletest): pass io.Writer to Cleanup() * add some extra logs to workspacebuild cleanup * fixup! fix(scaletest): pass io.Writer to Cleanup() * remove race * fmt * address PR comments
2023-10-16 12:37:12 +01:00 · 2023-10-16 12:37:12 +01:00 · dd86100f33
parent 1be24dcb5c
commit dd86100f33
10 changed files with 61 additions and 32 deletions
--- a/cli/exp_scaletest.go
+++ b/cli/exp_scaletest.go
@ -1241,7 +1241,7 @@ func (r *runnableTraceWrapper) Run(ctx context.Context, id string, logs io.Write
 	return r.runner.Run(ctx2, id, logs)
 }
-func (r *runnableTraceWrapper) Cleanup(ctx context.Context, id string) error {
+func (r *runnableTraceWrapper) Cleanup(ctx context.Context, id string, logs io.Writer) error {
 	c, ok := r.runner.(harness.Cleanable)
 	if !ok {
 		return nil
@ -1253,7 +1253,7 @@ func (r *runnableTraceWrapper) Cleanup(ctx context.Context, id string) error {
 	ctx, span := r.tracer.Start(ctx, r.spanName+" cleanup")
 	defer span.End()
-	return c.Cleanup(ctx, id)
+	return c.Cleanup(ctx, id, logs)
 }
 // newScaleTestUser returns a random username and email address that can be used
--- a/scaletest/createworkspaces/run.go
+++ b/scaletest/createworkspaces/run.go
@ -176,13 +176,14 @@ resourceLoop:
 }
 // Cleanup implements Cleanable.
-func (r *Runner) Cleanup(ctx context.Context, id string) error {
+func (r *Runner) Cleanup(ctx context.Context, id string, logs io.Writer) error {
 	if r.cfg.NoCleanup {
 		_, _ = fmt.Fprintln(logs, "skipping cleanup")
 		return nil
 	}
 	if r.workspacebuildRunner != nil {
-		err := r.workspacebuildRunner.Cleanup(ctx, id)
+		err := r.workspacebuildRunner.Cleanup(ctx, id, logs)
 		if err != nil {
 			return xerrors.Errorf("cleanup workspace: %w", err)
 		}
@ -191,6 +192,7 @@ func (r *Runner) Cleanup(ctx context.Context, id string) error {
 	if r.userID != uuid.Nil {
 		err := r.client.DeleteUser(ctx, r.userID)
 		if err != nil {
 			_, _ = fmt.Fprintf(logs, "failed to delete user %q: %v\n", r.userID.String(), err)
 			return xerrors.Errorf("delete user: %w", err)
 		}
 	}
--- a/scaletest/createworkspaces/run_test.go
+++ b/scaletest/createworkspaces/run_test.go
@ -177,8 +177,13 @@ func Test_Runner(t *testing.T) {
 		require.Contains(t, logsStr, "Opening reconnecting PTY connection to agent")
 		require.Contains(t, logsStr, "Opening connection to workspace agent")
-		err = runner.Cleanup(ctx, "1")
+		cleanupLogs := bytes.NewBuffer(nil)
 		err = runner.Cleanup(ctx, "1", cleanupLogs)
 		require.NoError(t, err)
 		cleanupLogsStr := cleanupLogs.String()
 		require.Contains(t, cleanupLogsStr, "deleting workspace")
 		require.NotContains(t, cleanupLogsStr, "canceling workspace build") // The build should have already completed.
 		require.Contains(t, cleanupLogsStr, "Build succeeded!")
 		// Ensure the user and workspace were deleted.
 		users, err = client.Users(ctx, codersdk.UsersRequest{})
@ -217,7 +222,7 @@ func Test_Runner(t *testing.T) {
 			},
 			ProvisionApply: []*proto.Response{
 				{
-					Type: &proto.Response_Log{Log: &proto.Log{}},
+					Type: &proto.Response_Log{Log: &proto.Log{}}, // This provisioner job will never complete.
 				},
 			},
 		})
@ -257,24 +262,36 @@ func Test_Runner(t *testing.T) {
 			close(done)
 		}()
 		// Wait for the workspace build job to be picked up.
 		require.Eventually(t, func() bool {
 			workspaces, err := client.Workspaces(ctx, codersdk.WorkspaceFilter{})
 			if err != nil {
 				return false
 			}
 			if len(workspaces.Workspaces) == 0 {
 				return false
 			}
-			return len(workspaces.Workspaces) > 0
+			ws := workspaces.Workspaces[0]
-		}, testutil.WaitShort, testutil.IntervalFast)
+			t.Logf("checking build: %s | %s", ws.LatestBuild.Transition, ws.LatestBuild.Job.Status)
 			// There should be only one build at present.
 			if ws.LatestBuild.Transition != codersdk.WorkspaceTransitionStart {
 				t.Errorf("expected build transition %s, got %s", codersdk.WorkspaceTransitionStart, ws.LatestBuild.Transition)
 				return false
 			}
 			return ws.LatestBuild.Job.Status == codersdk.ProvisionerJobRunning
 		}, testutil.WaitShort, testutil.IntervalMedium)
 		cancelFunc()
 		<-done
 		// When we run the cleanup, it should be canceled
 		cleanupLogs := bytes.NewBuffer(nil)
 		cancelCtx, cancelFunc = context.WithCancel(ctx)
 		done = make(chan struct{})
 		go func() {
 			// This will return an error as the "delete" operation will never complete.
-			_ = runner.Cleanup(cancelCtx, "1")
+			_ = runner.Cleanup(cancelCtx, "1", cleanupLogs)
 			close(done)
 		}()
@ -311,9 +328,11 @@ func Test_Runner(t *testing.T) {
 				}
 			}
 			return false
-		}, testutil.WaitShort, testutil.IntervalFast)
+		}, testutil.WaitShort, testutil.IntervalMedium)
 		cancelFunc()
 		<-done
 		cleanupLogsStr := cleanupLogs.String()
 		require.Contains(t, cleanupLogsStr, "canceling workspace build")
 	})
 	t.Run("NoCleanup", func(t *testing.T) {
@ -447,7 +466,8 @@ func Test_Runner(t *testing.T) {
 		require.Contains(t, logsStr, "Opening reconnecting PTY connection to agent")
 		require.Contains(t, logsStr, "Opening connection to workspace agent")
-		err = runner.Cleanup(ctx, "1")
+		cleanupLogs := bytes.NewBuffer(nil)
 		err = runner.Cleanup(ctx, "1", cleanupLogs)
 		require.NoError(t, err)
 		// Ensure the user and workspace were not deleted.
--- a/scaletest/dashboard/run.go
+++ b/scaletest/dashboard/run.go
@ -125,6 +125,6 @@ func (r *Runner) runUntilDeadlineExceeded(ctx context.Context) error {
 	}
 }
-func (*Runner) Cleanup(_ context.Context, _ string) error {
+func (*Runner) Cleanup(_ context.Context, _ string, _ io.Writer) error {
 	return nil
 }
--- a/scaletest/harness/harness_test.go
+++ b/scaletest/harness/harness_test.go
@ -112,7 +112,7 @@ func Test_TestHarness(t *testing.T) {
 				RunFn: func(_ context.Context, _ string, _ io.Writer) error {
 					return nil
 				},
-				CleanupFn: func(_ context.Context, _ string) error {
+				CleanupFn: func(_ context.Context, _ string, _ io.Writer) error {
 					panic(testPanicMessage)
 				},
 			})
@ -150,7 +150,7 @@ func Test_TestHarness(t *testing.T) {
 				RunFn: func(_ context.Context, _ string, _ io.Writer) error {
 					return nil
 				},
-				CleanupFn: func(_ context.Context, _ string) error {
+				CleanupFn: func(_ context.Context, _ string, _ io.Writer) error {
 					return nil
 				},
 			})
@ -295,7 +295,7 @@ func fakeTestFns(err, cleanupErr error) testFns {
 		RunFn: func(_ context.Context, _ string, _ io.Writer) error {
 			return err
 		},
-		CleanupFn: func(_ context.Context, _ string) error {
+		CleanupFn: func(_ context.Context, _ string, _ io.Writer) error {
 			return cleanupErr
 		},
 	}
--- a/scaletest/harness/run.go
+++ b/scaletest/harness/run.go
@ -28,7 +28,7 @@ type Runnable interface {
 type Cleanable interface {
 	Runnable
 	// Cleanup should clean up any lingering resources from the test.
-	Cleanup(ctx context.Context, id string) error
+	Cleanup(ctx context.Context, id string, logs io.Writer) error
 }
 // AddRun creates a new *TestRun with the given name, ID and Runnable, adds it
@ -131,7 +131,7 @@ func (r *TestRun) Cleanup(ctx context.Context) (err error) {
 		}
 	}()
-	err = c.Cleanup(ctx, r.id)
+	err = c.Cleanup(ctx, r.id, r.logs)
 	//nolint:revive // we use named returns because we mutate it in a defer
 	return
 }
--- a/scaletest/harness/run_test.go
+++ b/scaletest/harness/run_test.go
@ -16,7 +16,7 @@ import (
 type testFns struct {
 	RunFn func(ctx context.Context, id string, logs io.Writer) error
 	// CleanupFn is optional if no cleanup is required.
-	CleanupFn func(ctx context.Context, id string) error
+	CleanupFn func(ctx context.Context, id string, logs io.Writer) error
 }
 // Run implements Runnable.
@ -25,12 +25,12 @@ func (fns testFns) Run(ctx context.Context, id string, logs io.Writer) error {
 }
 // Cleanup implements Cleanable.
-func (fns testFns) Cleanup(ctx context.Context, id string) error {
+func (fns testFns) Cleanup(ctx context.Context, id string, logs io.Writer) error {
 	if fns.CleanupFn == nil {
 		return nil
 	}
-	return fns.CleanupFn(ctx, id)
+	return fns.CleanupFn(ctx, id, logs)
 }
 func Test_TestRun(t *testing.T) {
@ -49,7 +49,7 @@ func Test_TestRun(t *testing.T) {
 					atomic.AddInt64(&runCalled, 1)
 					return nil
 				},
-				CleanupFn: func(ctx context.Context, id string) error {
+				CleanupFn: func(ctx context.Context, id string, logs io.Writer) error {
 					atomic.AddInt64(&cleanupCalled, 1)
 					return nil
 				},
@ -93,7 +93,7 @@ func Test_TestRun(t *testing.T) {
 				RunFn: func(ctx context.Context, id string, logs io.Writer) error {
 					return nil
 				},
-				CleanupFn: func(ctx context.Context, id string) error {
+				CleanupFn: func(ctx context.Context, id string, logs io.Writer) error {
 					atomic.AddInt64(&cleanupCalled, 1)
 					return nil
 				},
--- a/scaletest/workspacebuild/run.go
+++ b/scaletest/workspacebuild/run.go
@ -4,6 +4,7 @@ import (
 	"context"
 	"fmt"
 	"io"
 	"net/http"
 	"time"
 	"github.com/google/uuid"
@ -106,25 +107,31 @@ func NewCleanupRunner(client *codersdk.Client, workspaceID uuid.UUID) *CleanupRu
 // Run implements Runnable.
 func (r *CleanupRunner) Run(ctx context.Context, _ string, logs io.Writer) error {
 	ctx, span := tracing.StartSpan(ctx)
 	defer span.End()
 	logs = loadtestutil.NewSyncWriter(logs)
 	logger := slog.Make(sloghuman.Sink(logs)).Leveled(slog.LevelDebug)
 	if r.workspaceID == uuid.Nil {
 		return nil
 	}
-	ctx, span := tracing.StartSpan(ctx)
+	logger.Info(ctx, "deleting workspace", slog.F("workspace_id", r.workspaceID))
 	defer span.End()
 	logs = loadtestutil.NewSyncWriter(logs)
 	logger := slog.Make(sloghuman.Sink(logs)).Leveled(slog.LevelDebug)
 	r.client.SetLogger(logger)
 	r.client.SetLogBodies(true)
 	ws, err := r.client.Workspace(ctx, r.workspaceID)
 	if err != nil {
 		var sdkErr *codersdk.Error
 		if xerrors.As(err, &sdkErr) && sdkErr.StatusCode() == http.StatusNotFound {
 			logger.Info(ctx, "workspace not found, skipping delete", slog.F("workspace_id", r.workspaceID))
 			return nil
 		}
 		return err
 	}
 	build, err := r.client.WorkspaceBuild(ctx, ws.LatestBuild.ID)
 	if err == nil && build.Job.Status.Active() {
 		// mark the build as canceled
 		logger.Info(ctx, "canceling workspace build", slog.F("build_id", build.ID), slog.F("workspace_id", r.workspaceID))
 		if err = r.client.CancelWorkspaceBuild(ctx, build.ID); err == nil {
 			// Wait for the job to cancel before we delete it
 			_ = waitForBuild(ctx, logs, r.client, build.ID) // it will return a "build canceled" error
@ -151,12 +158,11 @@ func (r *CleanupRunner) Run(ctx context.Context, _ string, logs io.Writer) error
 }
 // Cleanup implements Cleanable by wrapping CleanupRunner.
-func (r *Runner) Cleanup(ctx context.Context, id string) error {
+func (r *Runner) Cleanup(ctx context.Context, id string, w io.Writer) error {
 	// TODO: capture these logs
 	return (&CleanupRunner{
 		client:      r.client,
 		workspaceID: r.workspaceID,
-	}).Run(ctx, id, io.Discard)
+	}).Run(ctx, id, w)
 }
 func waitForBuild(ctx context.Context, w io.Writer, client *codersdk.Client, buildID uuid.UUID) error {
--- a/scaletest/workspacebuild/run_test.go
+++ b/scaletest/workspacebuild/run_test.go
@ -180,7 +180,8 @@ func Test_Runner(t *testing.T) {
 		coderdtest.AwaitWorkspaceBuildJobCompleted(t, client, workspaces[0].LatestBuild.ID)
 		coderdtest.AwaitWorkspaceAgents(t, client, workspaces[0].ID)
-		err = runner.Cleanup(ctx, "1")
+		cleanupLogs := bytes.NewBuffer(nil)
 		err = runner.Cleanup(ctx, "1", cleanupLogs)
 		require.NoError(t, err)
 	})
--- a/scaletest/workspacetraffic/run.go
+++ b/scaletest/workspacetraffic/run.go
@ -169,7 +169,7 @@ func (r *Runner) Run(ctx context.Context, _ string, logs io.Writer) error {
 }
 // Cleanup does nothing, successfully.
-func (*Runner) Cleanup(context.Context, string) error {
+func (*Runner) Cleanup(context.Context, string, io.Writer) error {
 	return nil
 }