chore: add auditing to workspace dormancy (#10070)

- Adds an audit log for workspaces automatically transitioned to the dormant
  state.
- Imposes a mininum of 1 minute on cleanup-related fields. This is to
  prevent accidental API misuse from resulting in catastrophe.
This commit is contained in:
Jon Ayers 2023-10-05 13:41:07 -05:00 committed by GitHub
parent 888b97fd86
commit 91265678ad
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
8 changed files with 236 additions and 62 deletions

View File

@ -938,7 +938,7 @@ func (r *RootCmd) Server(newAPI func(context.Context, *coderd.Options) (*coderd.
autobuildTicker := time.NewTicker(vals.AutobuildPollInterval.Value())
defer autobuildTicker.Stop()
autobuildExecutor := autobuild.NewExecutor(
ctx, options.Database, options.Pubsub, coderAPI.TemplateScheduleStore, logger, autobuildTicker.C)
ctx, options.Database, options.Pubsub, coderAPI.TemplateScheduleStore, &coderAPI.Auditor, logger, autobuildTicker.C)
autobuildExecutor.Run()
hangDetectorTicker := time.NewTicker(vals.JobHangDetectorInterval.Value())

View File

@ -3,6 +3,9 @@ package autobuild
import (
"context"
"database/sql"
"encoding/json"
"net/http"
"strconv"
"sync"
"sync/atomic"
"time"
@ -12,6 +15,7 @@ import (
"golang.org/x/xerrors"
"cdr.dev/slog"
"github.com/coder/coder/v2/coderd/audit"
"github.com/coder/coder/v2/coderd/database"
"github.com/coder/coder/v2/coderd/database/dbauthz"
"github.com/coder/coder/v2/coderd/database/dbtime"
@ -29,6 +33,7 @@ type Executor struct {
db database.Store
ps pubsub.Pubsub
templateScheduleStore *atomic.Pointer[schedule.TemplateScheduleStore]
auditor *atomic.Pointer[audit.Auditor]
log slog.Logger
tick <-chan time.Time
statsCh chan<- Stats
@ -42,7 +47,7 @@ type Stats struct {
}
// New returns a new wsactions executor.
func NewExecutor(ctx context.Context, db database.Store, ps pubsub.Pubsub, tss *atomic.Pointer[schedule.TemplateScheduleStore], log slog.Logger, tick <-chan time.Time) *Executor {
func NewExecutor(ctx context.Context, db database.Store, ps pubsub.Pubsub, tss *atomic.Pointer[schedule.TemplateScheduleStore], auditor *atomic.Pointer[audit.Auditor], log slog.Logger, tick <-chan time.Time) *Executor {
le := &Executor{
//nolint:gocritic // Autostart has a limited set of permissions.
ctx: dbauthz.AsAutostart(ctx),
@ -51,6 +56,7 @@ func NewExecutor(ctx context.Context, db database.Store, ps pubsub.Pubsub, tss *
templateScheduleStore: tss,
tick: tick,
log: log.Named("autobuild"),
auditor: auditor,
}
return le
}
@ -166,13 +172,14 @@ func (e *Executor) runOnce(t time.Time) Stats {
return nil
}
var build *database.WorkspaceBuild
if nextTransition != "" {
builder := wsbuilder.New(ws, nextTransition).
SetLastWorkspaceBuildInTx(&latestBuild).
SetLastWorkspaceBuildJobInTx(&latestJob).
Reason(reason)
_, job, err = builder.Build(e.ctx, tx, nil)
build, job, err = builder.Build(e.ctx, tx, nil)
if err != nil {
log.Error(e.ctx, "unable to transition workspace",
slog.F("transition", nextTransition),
@ -185,6 +192,7 @@ func (e *Executor) runOnce(t time.Time) Stats {
// Transition the workspace to dormant if it has breached the template's
// threshold for inactivity.
if reason == database.BuildReasonAutolock {
wsOld := ws
ws, err = tx.UpdateWorkspaceDormantDeletingAt(e.ctx, database.UpdateWorkspaceDormantDeletingAtParams{
ID: ws.ID,
DormantAt: sql.NullTime{
@ -192,6 +200,16 @@ func (e *Executor) runOnce(t time.Time) Stats {
Valid: true,
},
})
auditBuild(e.ctx, e.log, *e.auditor.Load(), auditParams{
Build: build,
Job: latestJob,
Reason: reason,
Old: wsOld,
New: ws,
Success: err == nil,
})
if err != nil {
log.Error(e.ctx, "unable to transition workspace to dormant",
slog.F("transition", nextTransition),
@ -384,3 +402,46 @@ func isEligibleForFailedStop(build database.WorkspaceBuild, job database.Provisi
job.CompletedAt.Valid &&
currentTick.Sub(job.CompletedAt.Time) > templateSchedule.FailureTTL
}
type auditParams struct {
Build *database.WorkspaceBuild
Job database.ProvisionerJob
Reason database.BuildReason
Old database.Workspace
New database.Workspace
Success bool
}
func auditBuild(ctx context.Context, log slog.Logger, auditor audit.Auditor, params auditParams) {
fields := audit.AdditionalFields{
WorkspaceName: params.New.Name,
BuildReason: params.Reason,
}
if params.Build != nil {
fields.BuildNumber = strconv.FormatInt(int64(params.Build.BuildNumber), 10)
}
raw, err := json.Marshal(fields)
if err != nil {
log.Error(ctx, "marshal resource info for successful job", slog.Error(err))
}
status := http.StatusInternalServerError
if params.Success {
status = http.StatusOK
}
audit.WorkspaceBuildAudit(ctx, &audit.BuildAuditParams[database.Workspace]{
Audit: auditor,
Log: log,
UserID: params.Job.InitiatorID,
OrganizationID: params.New.OrganizationID,
JobID: params.Job.ID,
Action: database.AuditActionWrite,
Old: params.Old,
New: params.New,
Status: status,
AdditionalFields: raw,
})
}

View File

@ -262,12 +262,19 @@ func NewOptions(t testing.TB, options *Options) (func(http.Handler), context.Can
}
templateScheduleStore.Store(&options.TemplateScheduleStore)
var auditor atomic.Pointer[audit.Auditor]
if options.Auditor == nil {
options.Auditor = audit.NewNop()
}
auditor.Store(&options.Auditor)
ctx, cancelFunc := context.WithCancel(context.Background())
lifecycleExecutor := autobuild.NewExecutor(
ctx,
options.Database,
options.Pubsub,
&templateScheduleStore,
&auditor,
slogtest.Make(t, nil).Named("autobuild.executor").Leveled(slog.LevelDebug),
options.AutobuildTicker,
).WithStatsChannel(options.AutobuildStats)

View File

@ -537,17 +537,19 @@ func (api *API) patchTemplateMeta(rw http.ResponseWriter, r *http.Request) {
if req.AutostopRequirement.Weeks > schedule.MaxTemplateAutostopRequirementWeeks {
validErrs = append(validErrs, codersdk.ValidationError{Field: "autostop_requirement.weeks", Detail: fmt.Sprintf("Must be less than %d.", schedule.MaxTemplateAutostopRequirementWeeks)})
}
if req.FailureTTLMillis < 0 {
validErrs = append(validErrs, codersdk.ValidationError{Field: "failure_ttl_ms", Detail: "Must be a positive integer."})
// The minimum valid value for a dormant TTL is 1 minute. This is
// to ensure an uninformed user does not send an unintentionally
// small number resulting in potentially catastrophic consequences.
const minTTL = 1000 * 60
if req.FailureTTLMillis < 0 || (req.FailureTTLMillis > 0 && req.FailureTTLMillis < minTTL) {
validErrs = append(validErrs, codersdk.ValidationError{Field: "failure_ttl_ms", Detail: "Value must be at least one minute."})
}
if req.TimeTilDormantMillis < 0 {
validErrs = append(validErrs, codersdk.ValidationError{Field: "inactivity_ttl_ms", Detail: "Must be a positive integer."})
if req.TimeTilDormantMillis < 0 || (req.TimeTilDormantMillis > 0 && req.TimeTilDormantMillis < minTTL) {
validErrs = append(validErrs, codersdk.ValidationError{Field: "time_til_dormant_ms", Detail: "Value must be at least one minute."})
}
if req.TimeTilDormantMillis < 0 {
validErrs = append(validErrs, codersdk.ValidationError{Field: "inactivity_ttl_ms", Detail: "Must be a positive integer."})
}
if req.TimeTilDormantAutoDeleteMillis < 0 {
validErrs = append(validErrs, codersdk.ValidationError{Field: "locked_ttl_ms", Detail: "Must be a positive integer."})
if req.TimeTilDormantAutoDeleteMillis < 0 || (req.TimeTilDormantAutoDeleteMillis > 0 && req.TimeTilDormantAutoDeleteMillis < minTTL) {
validErrs = append(validErrs, codersdk.ValidationError{Field: "time_til_dormant_autodelete_ms", Detail: "Value must be at least one minute."})
}
if len(validErrs) > 0 {

View File

@ -816,8 +816,20 @@ func (api *API) putWorkspaceTTL(rw http.ResponseWriter, r *http.Request) {
// @Success 200 {object} codersdk.Workspace
// @Router /workspaces/{workspace}/dormant [put]
func (api *API) putWorkspaceDormant(rw http.ResponseWriter, r *http.Request) {
ctx := r.Context()
workspace := httpmw.WorkspaceParam(r)
var (
ctx = r.Context()
workspace = httpmw.WorkspaceParam(r)
oldWorkspace = workspace
auditor = api.Auditor.Load()
aReq, commitAudit = audit.InitRequest[database.Workspace](rw, &audit.RequestParams{
Audit: *auditor,
Log: api.Logger,
Request: r,
Action: database.AuditActionWrite,
})
)
aReq.Old = oldWorkspace
defer commitAudit()
var req codersdk.UpdateWorkspaceDormancy
if !httpapi.Read(ctx, rw, r, &req) {
@ -865,6 +877,7 @@ func (api *API) putWorkspaceDormant(rw http.ResponseWriter, r *http.Request) {
return
}
aReq.New = workspace
httpapi.Write(ctx, rw, http.StatusOK, convertWorkspace(
workspace,
data.builds[0],

View File

@ -2824,7 +2824,11 @@ func TestWorkspaceDormant(t *testing.T) {
t.Run("OK", func(t *testing.T) {
t.Parallel()
var (
client = coderdtest.New(t, &coderdtest.Options{IncludeProvisionerDaemon: true})
auditRecorder = audit.NewMock()
client = coderdtest.New(t, &coderdtest.Options{
IncludeProvisionerDaemon: true,
Auditor: auditRecorder,
})
user = coderdtest.CreateFirstUser(t, client)
version = coderdtest.CreateTemplateVersion(t, client, user.OrganizationID, nil)
_ = coderdtest.AwaitTemplateVersionJobCompleted(t, client, version.ID)
@ -2841,10 +2845,12 @@ func TestWorkspaceDormant(t *testing.T) {
defer cancel()
lastUsedAt := workspace.LastUsedAt
auditRecorder.ResetLogs()
err := client.UpdateWorkspaceDormancy(ctx, workspace.ID, codersdk.UpdateWorkspaceDormancy{
Dormant: true,
})
require.NoError(t, err)
require.Len(t, auditRecorder.AuditLogs(), 1)
workspace = coderdtest.MustWorkspace(t, client, workspace.ID)
require.NoError(t, err, "fetch provisioned workspace")

View File

@ -185,55 +185,123 @@ func TestTemplates(t *testing.T) {
})
t.Run("CleanupTTLs", func(t *testing.T) {
t.Parallel()
t.Run("OK", func(t *testing.T) {
t.Parallel()
ctx := testutil.Context(t, testutil.WaitMedium)
client, user := coderdenttest.New(t, &coderdenttest.Options{
Options: &coderdtest.Options{
IncludeProvisionerDaemon: true,
},
LicenseOptions: &coderdenttest.LicenseOptions{
Features: license.Features{
codersdk.FeatureAdvancedTemplateScheduling: 1,
ctx := testutil.Context(t, testutil.WaitMedium)
client, user := coderdenttest.New(t, &coderdenttest.Options{
Options: &coderdtest.Options{
IncludeProvisionerDaemon: true,
},
},
LicenseOptions: &coderdenttest.LicenseOptions{
Features: license.Features{
codersdk.FeatureAdvancedTemplateScheduling: 1,
},
},
})
version := coderdtest.CreateTemplateVersion(t, client, user.OrganizationID, nil)
coderdtest.AwaitTemplateVersionJobCompleted(t, client, version.ID)
template := coderdtest.CreateTemplate(t, client, user.OrganizationID, version.ID)
require.EqualValues(t, 0, template.TimeTilDormantMillis)
require.EqualValues(t, 0, template.FailureTTLMillis)
require.EqualValues(t, 0, template.TimeTilDormantAutoDeleteMillis)
var (
failureTTL = 1 * time.Minute
inactivityTTL = 2 * time.Minute
dormantTTL = 3 * time.Minute
)
updated, err := client.UpdateTemplateMeta(ctx, template.ID, codersdk.UpdateTemplateMeta{
Name: template.Name,
DisplayName: template.DisplayName,
Description: template.Description,
Icon: template.Icon,
AllowUserCancelWorkspaceJobs: template.AllowUserCancelWorkspaceJobs,
TimeTilDormantMillis: inactivityTTL.Milliseconds(),
FailureTTLMillis: failureTTL.Milliseconds(),
TimeTilDormantAutoDeleteMillis: dormantTTL.Milliseconds(),
})
require.NoError(t, err)
require.Equal(t, failureTTL.Milliseconds(), updated.FailureTTLMillis)
require.Equal(t, inactivityTTL.Milliseconds(), updated.TimeTilDormantMillis)
require.Equal(t, dormantTTL.Milliseconds(), updated.TimeTilDormantAutoDeleteMillis)
// Validate fetching the template returns the same values as updating
// the template.
template, err = client.Template(ctx, template.ID)
require.NoError(t, err)
require.Equal(t, failureTTL.Milliseconds(), updated.FailureTTLMillis)
require.Equal(t, inactivityTTL.Milliseconds(), updated.TimeTilDormantMillis)
require.Equal(t, dormantTTL.Milliseconds(), updated.TimeTilDormantAutoDeleteMillis)
})
version := coderdtest.CreateTemplateVersion(t, client, user.OrganizationID, nil)
coderdtest.AwaitTemplateVersionJobCompleted(t, client, version.ID)
template := coderdtest.CreateTemplate(t, client, user.OrganizationID, version.ID)
require.EqualValues(t, 0, template.TimeTilDormantMillis)
require.EqualValues(t, 0, template.FailureTTLMillis)
require.EqualValues(t, 0, template.TimeTilDormantAutoDeleteMillis)
t.Run("BadRequest", func(t *testing.T) {
t.Parallel()
var (
failureTTL int64 = 1
inactivityTTL int64 = 2
dormantTTL int64 = 3
)
ctx := testutil.Context(t, testutil.WaitMedium)
client, user := coderdenttest.New(t, &coderdenttest.Options{
Options: &coderdtest.Options{
IncludeProvisionerDaemon: true,
},
LicenseOptions: &coderdenttest.LicenseOptions{
Features: license.Features{
codersdk.FeatureAdvancedTemplateScheduling: 1,
},
},
})
updated, err := client.UpdateTemplateMeta(ctx, template.ID, codersdk.UpdateTemplateMeta{
Name: template.Name,
DisplayName: template.DisplayName,
Description: template.Description,
Icon: template.Icon,
AllowUserCancelWorkspaceJobs: template.AllowUserCancelWorkspaceJobs,
TimeTilDormantMillis: inactivityTTL,
FailureTTLMillis: failureTTL,
TimeTilDormantAutoDeleteMillis: dormantTTL,
version := coderdtest.CreateTemplateVersion(t, client, user.OrganizationID, nil)
coderdtest.AwaitTemplateVersionJobCompleted(t, client, version.ID)
template := coderdtest.CreateTemplate(t, client, user.OrganizationID, version.ID)
type testcase struct {
Name string
TimeTilDormantMS int64
FailureTTLMS int64
DormantAutoDeleteMS int64
}
cases := []testcase{
{
Name: "NegativeValue",
TimeTilDormantMS: -1,
FailureTTLMS: -2,
DormantAutoDeleteMS: -3,
},
{
Name: "ValueTooSmall",
TimeTilDormantMS: 1,
FailureTTLMS: 999,
DormantAutoDeleteMS: 500,
},
}
for _, c := range cases {
c := c
t.Run(c.Name, func(t *testing.T) {
t.Parallel()
_, err := client.UpdateTemplateMeta(ctx, template.ID, codersdk.UpdateTemplateMeta{
Name: template.Name,
DisplayName: template.DisplayName,
Description: template.Description,
Icon: template.Icon,
AllowUserCancelWorkspaceJobs: template.AllowUserCancelWorkspaceJobs,
TimeTilDormantMillis: c.TimeTilDormantMS,
FailureTTLMillis: c.FailureTTLMS,
TimeTilDormantAutoDeleteMillis: c.DormantAutoDeleteMS,
})
require.Error(t, err)
cerr, ok := codersdk.AsError(err)
require.True(t, ok)
require.Len(t, cerr.Validations, 3)
require.Equal(t, "Value must be at least one minute.", cerr.Validations[0].Detail)
})
}
})
require.NoError(t, err)
require.Equal(t, failureTTL, updated.FailureTTLMillis)
require.Equal(t, inactivityTTL, updated.TimeTilDormantMillis)
require.Equal(t, dormantTTL, updated.TimeTilDormantAutoDeleteMillis)
// Validate fetching the template returns the same values as updating
// the template.
template, err = client.Template(ctx, template.ID)
require.NoError(t, err)
require.Equal(t, failureTTL, updated.FailureTTLMillis)
require.Equal(t, inactivityTTL, updated.TimeTilDormantMillis)
require.Equal(t, dormantTTL, updated.TimeTilDormantAutoDeleteMillis)
})
t.Run("UpdateTimeTilDormantAutoDelete", func(t *testing.T) {

View File

@ -2,6 +2,7 @@ package coderd_test
import (
"context"
"encoding/json"
"fmt"
"net/http"
"sync/atomic"
@ -12,6 +13,7 @@ import (
"cdr.dev/slog/sloggers/slogtest"
"github.com/coder/coder/v2/coderd/audit"
"github.com/coder/coder/v2/coderd/autobuild"
"github.com/coder/coder/v2/coderd/coderdtest"
"github.com/coder/coder/v2/coderd/database"
@ -237,10 +239,11 @@ func TestWorkspaceAutobuild(t *testing.T) {
t.Parallel()
var (
ctx = testutil.Context(t, testutil.WaitMedium)
ticker = make(chan time.Time)
statCh = make(chan autobuild.Stats)
inactiveTTL = time.Minute
ctx = testutil.Context(t, testutil.WaitMedium)
ticker = make(chan time.Time)
statCh = make(chan autobuild.Stats)
inactiveTTL = time.Minute
auditRecorder = audit.NewMock()
)
client, user := coderdenttest.New(t, &coderdenttest.Options{
@ -249,6 +252,7 @@ func TestWorkspaceAutobuild(t *testing.T) {
IncludeProvisionerDaemon: true,
AutobuildStats: statCh,
TemplateScheduleStore: schedule.NewEnterpriseTemplateScheduleStore(agplUserQuietHoursScheduleStore()),
Auditor: auditRecorder,
},
LicenseOptions: &coderdenttest.LicenseOptions{
Features: license.Features{codersdk.FeatureAdvancedTemplateScheduling: 1},
@ -268,6 +272,9 @@ func TestWorkspaceAutobuild(t *testing.T) {
ws := coderdtest.CreateWorkspace(t, client, user.OrganizationID, template.ID)
build := coderdtest.AwaitWorkspaceBuildJobCompleted(t, client, ws.LatestBuild.ID)
require.Equal(t, codersdk.WorkspaceStatusRunning, build.Status)
// Reset the audit log so we can verify a log is generated.
auditRecorder.ResetLogs()
// Simulate being inactive.
ticker <- ws.LastUsedAt.Add(inactiveTTL * 2)
stats := <-statCh
@ -276,13 +283,23 @@ func TestWorkspaceAutobuild(t *testing.T) {
// failure TTL.
require.Len(t, stats.Transitions, 1)
require.Equal(t, stats.Transitions[ws.ID], database.WorkspaceTransitionStop)
require.Len(t, auditRecorder.AuditLogs(), 1)
auditLog := auditRecorder.AuditLogs()[0]
require.Equal(t, auditLog.Action, database.AuditActionWrite)
var fields audit.AdditionalFields
err := json.Unmarshal(auditLog.AdditionalFields, &fields)
require.NoError(t, err)
require.Equal(t, ws.Name, fields.WorkspaceName)
require.Equal(t, database.BuildReasonAutolock, fields.BuildReason)
// The workspace should be dormant.
ws = coderdtest.MustWorkspace(t, client, ws.ID)
require.NotNil(t, ws.DormantAt)
lastUsedAt := ws.LastUsedAt
err := client.UpdateWorkspaceDormancy(ctx, ws.ID, codersdk.UpdateWorkspaceDormancy{Dormant: false})
err = client.UpdateWorkspaceDormancy(ctx, ws.ID, codersdk.UpdateWorkspaceDormancy{Dormant: false})
require.NoError(t, err)
// Assert that we updated our last_used_at so that we don't immediately