fix(scaletest): cleanup: attempt to cancel in-progress jobs (#9080)

This change modifies the cleanup behaviour to make a best-effort attempt to cancel the in-progress scaletest workspace build jobs before deleting them.
This commit is contained in:
Cian Johnston 2023-08-14 12:43:45 +01:00 committed by GitHub
parent 72575cc462
commit ef9d84c723
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 136 additions and 1 deletions

View File

@ -16,6 +16,7 @@ import (
"github.com/coder/coder/agent"
"github.com/coder/coder/coderd/coderdtest"
"github.com/coder/coder/coderd/httpapi"
"github.com/coder/coder/coderd/util/ptr"
"github.com/coder/coder/codersdk"
"github.com/coder/coder/codersdk/agentsdk"
"github.com/coder/coder/provisioner/echo"
@ -156,6 +157,117 @@ func Test_Runner(t *testing.T) {
require.Len(t, workspaces.Workspaces, 0)
})
t.Run("CleanupPendingBuild", func(t *testing.T) {
t.Parallel()
ctx, cancel := context.WithTimeout(context.Background(), testutil.WaitLong)
defer cancel()
client := coderdtest.New(t, &coderdtest.Options{
IncludeProvisionerDaemon: true,
})
user := coderdtest.CreateFirstUser(t, client)
version := coderdtest.CreateTemplateVersion(t, client, user.OrganizationID, &echo.Responses{
Parse: echo.ParseComplete,
ProvisionPlan: echo.ProvisionComplete,
ProvisionApply: []*proto.Provision_Response{
{
Type: &proto.Provision_Response_Log{Log: &proto.Log{}},
},
},
})
version = coderdtest.AwaitTemplateVersionJob(t, client, version.ID)
template := coderdtest.CreateTemplate(t, client, user.OrganizationID, version.ID, func(request *codersdk.CreateTemplateRequest) {
request.AllowUserCancelWorkspaceJobs = ptr.Ref(true)
})
const (
username = "scaletest-user"
email = "scaletest@test.coder.com"
)
runner := createworkspaces.NewRunner(client, createworkspaces.Config{
User: createworkspaces.UserConfig{
OrganizationID: user.OrganizationID,
Username: username,
Email: email,
},
Workspace: workspacebuild.Config{
OrganizationID: user.OrganizationID,
Request: codersdk.CreateWorkspaceRequest{
TemplateID: template.ID,
},
},
})
cancelCtx, cancelFunc := context.WithCancel(ctx)
done := make(chan struct{})
logs := bytes.NewBuffer(nil)
go func() {
err := runner.Run(cancelCtx, "1", logs)
logsStr := logs.String()
t.Log("Runner logs:\n\n" + logsStr)
require.ErrorIs(t, err, context.Canceled)
close(done)
}()
require.Eventually(t, func() bool {
workspaces, err := client.Workspaces(ctx, codersdk.WorkspaceFilter{})
if err != nil {
return false
}
return len(workspaces.Workspaces) > 0
}, testutil.WaitShort, testutil.IntervalFast)
cancelFunc()
<-done
// When we run the cleanup, it should be canceled
cancelCtx, cancelFunc = context.WithCancel(ctx)
done = make(chan struct{})
go func() {
// This will return an error as the "delete" operation will never complete.
_ = runner.Cleanup(cancelCtx, "1")
close(done)
}()
// Ensure the job has been marked as deleted
require.Eventually(t, func() bool {
workspaces, err := client.Workspaces(ctx, codersdk.WorkspaceFilter{})
if err != nil {
return false
}
if len(workspaces.Workspaces) == 0 {
return false
}
// There should be two builds
builds, err := client.WorkspaceBuilds(ctx, codersdk.WorkspaceBuildsRequest{
WorkspaceID: workspaces.Workspaces[0].ID,
})
if err != nil {
return false
}
for _, build := range builds {
// One of the builds should be for creating the workspace,
if build.Transition != codersdk.WorkspaceTransitionStart {
continue
}
// And it should be either canceled or cancelling
if build.Job.Status == codersdk.ProvisionerJobCanceled || build.Job.Status == codersdk.ProvisionerJobCanceling {
return true
}
}
return false
}, testutil.WaitShort, testutil.IntervalFast)
cancelFunc()
<-done
})
t.Run("NoCleanup", func(t *testing.T) {
t.Parallel()

View File

@ -112,7 +112,30 @@ func (r *CleanupRunner) Run(ctx context.Context, _ string, logs io.Writer) error
ctx, span := tracing.StartSpan(ctx)
defer span.End()
build, err := r.client.CreateWorkspaceBuild(ctx, r.workspaceID, codersdk.CreateWorkspaceBuildRequest{
logs = loadtestutil.NewSyncWriter(logs)
logger := slog.Make(sloghuman.Sink(logs)).Leveled(slog.LevelDebug)
r.client.SetLogger(logger)
r.client.SetLogBodies(true)
ws, err := r.client.Workspace(ctx, r.workspaceID)
if err != nil {
return err
}
build, err := r.client.WorkspaceBuild(ctx, ws.LatestBuild.ID)
if err == nil && build.Job.Status.Active() {
// mark the build as canceled
if err = r.client.CancelWorkspaceBuild(ctx, build.ID); err == nil {
// Wait for the job to cancel before we delete it
_ = waitForBuild(ctx, logs, r.client, build.ID) // it will return a "build canceled" error
} else {
logger.Warn(ctx, "failed to cancel workspace build, attempting to delete anyway", slog.Error(err))
}
} else {
logger.Warn(ctx, "unable to lookup latest workspace build, attempting to delete anyway", slog.Error(err))
}
build, err = r.client.CreateWorkspaceBuild(ctx, r.workspaceID, codersdk.CreateWorkspaceBuildRequest{
Transition: codersdk.WorkspaceTransitionDelete,
})
if err != nil {