diff --git a/cli/server.go b/cli/server.go index 9894e0c1f2..d278dbea35 100644 --- a/cli/server.go +++ b/cli/server.go @@ -209,7 +209,7 @@ func enablePrometheus( } afterCtx(ctx, closeUsersFunc) - closeWorkspacesFunc, err := prometheusmetrics.Workspaces(ctx, options.PrometheusRegistry, options.Database, 0) + closeWorkspacesFunc, err := prometheusmetrics.Workspaces(ctx, options.Logger.Named("workspaces_metrics"), options.PrometheusRegistry, options.Database, 0) if err != nil { return nil, xerrors.Errorf("register workspaces prometheus metric: %w", err) } diff --git a/cli/server_test.go b/cli/server_test.go index 7842f9e62b..065131fd97 100644 --- a/cli/server_test.go +++ b/cli/server_test.go @@ -973,7 +973,6 @@ func TestServer(t *testing.T) { scanner := bufio.NewScanner(res.Body) hasActiveUsers := false - hasWorkspaces := false for scanner.Scan() { // This metric is manually registered to be tracked in the server. That's // why we test it's tracked here. @@ -981,10 +980,6 @@ func TestServer(t *testing.T) { hasActiveUsers = true continue } - if strings.HasPrefix(scanner.Text(), "coderd_api_workspace_latest_build_total") { - hasWorkspaces = true - continue - } if strings.HasPrefix(scanner.Text(), "coderd_db_query_latencies_seconds") { t.Fatal("db metrics should not be tracked when --prometheus-collect-db-metrics is not enabled") } @@ -992,7 +987,6 @@ func TestServer(t *testing.T) { } require.NoError(t, scanner.Err()) require.True(t, hasActiveUsers) - require.True(t, hasWorkspaces) }) t.Run("DBMetricsEnabled", func(t *testing.T) { diff --git a/coderd/database/dbmem/dbmem.go b/coderd/database/dbmem/dbmem.go index ef112da121..8bb8559be7 100644 --- a/coderd/database/dbmem/dbmem.go +++ b/coderd/database/dbmem/dbmem.go @@ -404,6 +404,16 @@ func (q *FakeQuerier) convertToWorkspaceRowsNoLock(ctx context.Context, workspac break } } + + if pj, err := q.getProvisionerJobByIDNoLock(ctx, build.JobID); err == nil { + wr.LatestBuildStatus = pj.JobStatus + } + + wr.LatestBuildTransition = build.Transition + } + + if u, err := q.getUserByIDNoLock(w.OwnerID); err == nil { + wr.Username = u.Username } rows = append(rows, wr) diff --git a/coderd/database/modelqueries.go b/coderd/database/modelqueries.go index 40c953375d..ca38505b28 100644 --- a/coderd/database/modelqueries.go +++ b/coderd/database/modelqueries.go @@ -266,6 +266,7 @@ func (q *sqlQuerier) GetAuthorizedWorkspaces(ctx context.Context, arg GetWorkspa &i.LatestBuildCanceledAt, &i.LatestBuildError, &i.LatestBuildTransition, + &i.LatestBuildStatus, &i.Count, ); err != nil { return nil, err diff --git a/coderd/database/queries.sql.go b/coderd/database/queries.sql.go index 12d0658989..b3216fc2d8 100644 --- a/coderd/database/queries.sql.go +++ b/coderd/database/queries.sql.go @@ -12280,7 +12280,8 @@ SELECT latest_build.completed_at as latest_build_completed_at, latest_build.canceled_at as latest_build_canceled_at, latest_build.error as latest_build_error, - latest_build.transition as latest_build_transition + latest_build.transition as latest_build_transition, + latest_build.job_status as latest_build_status FROM workspaces JOIN @@ -12302,7 +12303,7 @@ LEFT JOIN LATERAL ( provisioner_jobs.job_status FROM workspace_builds - LEFT JOIN + JOIN provisioner_jobs ON provisioner_jobs.id = workspace_builds.job_id @@ -12507,7 +12508,7 @@ WHERE -- @authorize_filter ), filtered_workspaces_order AS ( SELECT - fw.id, fw.created_at, fw.updated_at, fw.owner_id, fw.organization_id, fw.template_id, fw.deleted, fw.name, fw.autostart_schedule, fw.ttl, fw.last_used_at, fw.dormant_at, fw.deleting_at, fw.automatic_updates, fw.favorite, fw.template_name, fw.template_version_id, fw.template_version_name, fw.username, fw.latest_build_completed_at, fw.latest_build_canceled_at, fw.latest_build_error, fw.latest_build_transition + fw.id, fw.created_at, fw.updated_at, fw.owner_id, fw.organization_id, fw.template_id, fw.deleted, fw.name, fw.autostart_schedule, fw.ttl, fw.last_used_at, fw.dormant_at, fw.deleting_at, fw.automatic_updates, fw.favorite, fw.template_name, fw.template_version_id, fw.template_version_name, fw.username, fw.latest_build_completed_at, fw.latest_build_canceled_at, fw.latest_build_error, fw.latest_build_transition, fw.latest_build_status FROM filtered_workspaces fw ORDER BY @@ -12528,7 +12529,7 @@ WHERE $19 ), filtered_workspaces_order_with_summary AS ( SELECT - fwo.id, fwo.created_at, fwo.updated_at, fwo.owner_id, fwo.organization_id, fwo.template_id, fwo.deleted, fwo.name, fwo.autostart_schedule, fwo.ttl, fwo.last_used_at, fwo.dormant_at, fwo.deleting_at, fwo.automatic_updates, fwo.favorite, fwo.template_name, fwo.template_version_id, fwo.template_version_name, fwo.username, fwo.latest_build_completed_at, fwo.latest_build_canceled_at, fwo.latest_build_error, fwo.latest_build_transition + fwo.id, fwo.created_at, fwo.updated_at, fwo.owner_id, fwo.organization_id, fwo.template_id, fwo.deleted, fwo.name, fwo.autostart_schedule, fwo.ttl, fwo.last_used_at, fwo.dormant_at, fwo.deleting_at, fwo.automatic_updates, fwo.favorite, fwo.template_name, fwo.template_version_id, fwo.template_version_name, fwo.username, fwo.latest_build_completed_at, fwo.latest_build_canceled_at, fwo.latest_build_error, fwo.latest_build_transition, fwo.latest_build_status FROM filtered_workspaces_order fwo -- Return a technical summary row with total count of workspaces. @@ -12558,7 +12559,8 @@ WHERE '0001-01-01 00:00:00+00'::timestamptz, -- latest_build_completed_at, '0001-01-01 00:00:00+00'::timestamptz, -- latest_build_canceled_at, '', -- latest_build_error - 'start'::workspace_transition -- latest_build_transition + 'start'::workspace_transition, -- latest_build_transition + 'unknown'::provisioner_job_status -- latest_build_status WHERE $21 :: boolean = true ), total_count AS ( @@ -12568,7 +12570,7 @@ WHERE filtered_workspaces ) SELECT - fwos.id, fwos.created_at, fwos.updated_at, fwos.owner_id, fwos.organization_id, fwos.template_id, fwos.deleted, fwos.name, fwos.autostart_schedule, fwos.ttl, fwos.last_used_at, fwos.dormant_at, fwos.deleting_at, fwos.automatic_updates, fwos.favorite, fwos.template_name, fwos.template_version_id, fwos.template_version_name, fwos.username, fwos.latest_build_completed_at, fwos.latest_build_canceled_at, fwos.latest_build_error, fwos.latest_build_transition, + fwos.id, fwos.created_at, fwos.updated_at, fwos.owner_id, fwos.organization_id, fwos.template_id, fwos.deleted, fwos.name, fwos.autostart_schedule, fwos.ttl, fwos.last_used_at, fwos.dormant_at, fwos.deleting_at, fwos.automatic_updates, fwos.favorite, fwos.template_name, fwos.template_version_id, fwos.template_version_name, fwos.username, fwos.latest_build_completed_at, fwos.latest_build_canceled_at, fwos.latest_build_error, fwos.latest_build_transition, fwos.latest_build_status, tc.count FROM filtered_workspaces_order_with_summary fwos @@ -12601,30 +12603,31 @@ type GetWorkspacesParams struct { } type GetWorkspacesRow struct { - ID uuid.UUID `db:"id" json:"id"` - CreatedAt time.Time `db:"created_at" json:"created_at"` - UpdatedAt time.Time `db:"updated_at" json:"updated_at"` - OwnerID uuid.UUID `db:"owner_id" json:"owner_id"` - OrganizationID uuid.UUID `db:"organization_id" json:"organization_id"` - TemplateID uuid.UUID `db:"template_id" json:"template_id"` - Deleted bool `db:"deleted" json:"deleted"` - Name string `db:"name" json:"name"` - AutostartSchedule sql.NullString `db:"autostart_schedule" json:"autostart_schedule"` - Ttl sql.NullInt64 `db:"ttl" json:"ttl"` - LastUsedAt time.Time `db:"last_used_at" json:"last_used_at"` - DormantAt sql.NullTime `db:"dormant_at" json:"dormant_at"` - DeletingAt sql.NullTime `db:"deleting_at" json:"deleting_at"` - AutomaticUpdates AutomaticUpdates `db:"automatic_updates" json:"automatic_updates"` - Favorite bool `db:"favorite" json:"favorite"` - TemplateName string `db:"template_name" json:"template_name"` - TemplateVersionID uuid.UUID `db:"template_version_id" json:"template_version_id"` - TemplateVersionName sql.NullString `db:"template_version_name" json:"template_version_name"` - Username string `db:"username" json:"username"` - LatestBuildCompletedAt sql.NullTime `db:"latest_build_completed_at" json:"latest_build_completed_at"` - LatestBuildCanceledAt sql.NullTime `db:"latest_build_canceled_at" json:"latest_build_canceled_at"` - LatestBuildError sql.NullString `db:"latest_build_error" json:"latest_build_error"` - LatestBuildTransition WorkspaceTransition `db:"latest_build_transition" json:"latest_build_transition"` - Count int64 `db:"count" json:"count"` + ID uuid.UUID `db:"id" json:"id"` + CreatedAt time.Time `db:"created_at" json:"created_at"` + UpdatedAt time.Time `db:"updated_at" json:"updated_at"` + OwnerID uuid.UUID `db:"owner_id" json:"owner_id"` + OrganizationID uuid.UUID `db:"organization_id" json:"organization_id"` + TemplateID uuid.UUID `db:"template_id" json:"template_id"` + Deleted bool `db:"deleted" json:"deleted"` + Name string `db:"name" json:"name"` + AutostartSchedule sql.NullString `db:"autostart_schedule" json:"autostart_schedule"` + Ttl sql.NullInt64 `db:"ttl" json:"ttl"` + LastUsedAt time.Time `db:"last_used_at" json:"last_used_at"` + DormantAt sql.NullTime `db:"dormant_at" json:"dormant_at"` + DeletingAt sql.NullTime `db:"deleting_at" json:"deleting_at"` + AutomaticUpdates AutomaticUpdates `db:"automatic_updates" json:"automatic_updates"` + Favorite bool `db:"favorite" json:"favorite"` + TemplateName string `db:"template_name" json:"template_name"` + TemplateVersionID uuid.UUID `db:"template_version_id" json:"template_version_id"` + TemplateVersionName sql.NullString `db:"template_version_name" json:"template_version_name"` + Username string `db:"username" json:"username"` + LatestBuildCompletedAt sql.NullTime `db:"latest_build_completed_at" json:"latest_build_completed_at"` + LatestBuildCanceledAt sql.NullTime `db:"latest_build_canceled_at" json:"latest_build_canceled_at"` + LatestBuildError sql.NullString `db:"latest_build_error" json:"latest_build_error"` + LatestBuildTransition WorkspaceTransition `db:"latest_build_transition" json:"latest_build_transition"` + LatestBuildStatus ProvisionerJobStatus `db:"latest_build_status" json:"latest_build_status"` + Count int64 `db:"count" json:"count"` } // build_params is used to filter by build parameters if present. @@ -12685,6 +12688,7 @@ func (q *sqlQuerier) GetWorkspaces(ctx context.Context, arg GetWorkspacesParams) &i.LatestBuildCanceledAt, &i.LatestBuildError, &i.LatestBuildTransition, + &i.LatestBuildStatus, &i.Count, ); err != nil { return nil, err diff --git a/coderd/database/queries/workspaces.sql b/coderd/database/queries/workspaces.sql index 767280634f..616e83a2ba 100644 --- a/coderd/database/queries/workspaces.sql +++ b/coderd/database/queries/workspaces.sql @@ -96,7 +96,8 @@ SELECT latest_build.completed_at as latest_build_completed_at, latest_build.canceled_at as latest_build_canceled_at, latest_build.error as latest_build_error, - latest_build.transition as latest_build_transition + latest_build.transition as latest_build_transition, + latest_build.job_status as latest_build_status FROM workspaces JOIN @@ -118,7 +119,7 @@ LEFT JOIN LATERAL ( provisioner_jobs.job_status FROM workspace_builds - LEFT JOIN + JOIN provisioner_jobs ON provisioner_jobs.id = workspace_builds.job_id @@ -374,7 +375,8 @@ WHERE '0001-01-01 00:00:00+00'::timestamptz, -- latest_build_completed_at, '0001-01-01 00:00:00+00'::timestamptz, -- latest_build_canceled_at, '', -- latest_build_error - 'start'::workspace_transition -- latest_build_transition + 'start'::workspace_transition, -- latest_build_transition + 'unknown'::provisioner_job_status -- latest_build_status WHERE @with_summary :: boolean = true ), total_count AS ( diff --git a/coderd/prometheusmetrics/prometheusmetrics.go b/coderd/prometheusmetrics/prometheusmetrics.go index b2c4b46677..4d3f1d1a04 100644 --- a/coderd/prometheusmetrics/prometheusmetrics.go +++ b/coderd/prometheusmetrics/prometheusmetrics.go @@ -24,10 +24,12 @@ import ( "github.com/coder/coder/v2/tailnet" ) +const defaultRefreshRate = time.Minute + // ActiveUsers tracks the number of users that have authenticated within the past hour. func ActiveUsers(ctx context.Context, registerer prometheus.Registerer, db database.Store, duration time.Duration) (func(), error) { if duration == 0 { - duration = 5 * time.Minute + duration = defaultRefreshRate } gauge := prometheus.NewGauge(prometheus.GaugeOpts{ @@ -72,36 +74,42 @@ func ActiveUsers(ctx context.Context, registerer prometheus.Registerer, db datab } // Workspaces tracks the total number of workspaces with labels on status. -func Workspaces(ctx context.Context, registerer prometheus.Registerer, db database.Store, duration time.Duration) (func(), error) { +func Workspaces(ctx context.Context, logger slog.Logger, registerer prometheus.Registerer, db database.Store, duration time.Duration) (func(), error) { if duration == 0 { - duration = 5 * time.Minute + duration = defaultRefreshRate } - gauge := prometheus.NewGaugeVec(prometheus.GaugeOpts{ + workspaceLatestBuildTotals := prometheus.NewGaugeVec(prometheus.GaugeOpts{ Namespace: "coderd", Subsystem: "api", Name: "workspace_latest_build_total", - Help: "The latest workspace builds with a status.", + Help: "The current number of workspace builds by status.", }, []string{"status"}) - err := registerer.Register(gauge) - if err != nil { + if err := registerer.Register(workspaceLatestBuildTotals); err != nil { + return nil, err + } + + workspaceLatestBuildStatuses := prometheus.NewGaugeVec(prometheus.GaugeOpts{ + Namespace: "coderd", + Name: "workspace_latest_build_status", + Help: "The current workspace statuses by template, transition, and owner.", + }, []string{"status", "template_name", "template_version", "workspace_owner", "workspace_transition"}) + if err := registerer.Register(workspaceLatestBuildStatuses); err != nil { return nil, err } - // This exists so the prometheus metric exports immediately when set. - // It helps with tests so they don't have to wait for a tick. - gauge.WithLabelValues("pending").Set(0) ctx, cancelFunc := context.WithCancel(ctx) done := make(chan struct{}) - // Use time.Nanosecond to force an initial tick. It will be reset to the - // correct duration after executing once. - ticker := time.NewTicker(time.Nanosecond) - doTick := func() { - defer ticker.Reset(duration) - + updateWorkspaceTotals := func() { builds, err := db.GetLatestWorkspaceBuilds(ctx) if err != nil { + if errors.Is(err, sql.ErrNoRows) { + // clear all series if there are no database entries + workspaceLatestBuildTotals.Reset() + } + + logger.Warn(ctx, "failed to load latest workspace builds", slog.Error(err)) return } jobIDs := make([]uuid.UUID, 0, len(builds)) @@ -110,16 +118,53 @@ func Workspaces(ctx context.Context, registerer prometheus.Registerer, db databa } jobs, err := db.GetProvisionerJobsByIDs(ctx, jobIDs) if err != nil { + ids := make([]string, 0, len(jobIDs)) + for _, id := range jobIDs { + ids = append(ids, id.String()) + } + + logger.Warn(ctx, "failed to load provisioner jobs", slog.F("ids", ids), slog.Error(err)) return } - gauge.Reset() + workspaceLatestBuildTotals.Reset() for _, job := range jobs { status := codersdk.ProvisionerJobStatus(job.JobStatus) - gauge.WithLabelValues(string(status)).Add(1) + workspaceLatestBuildTotals.WithLabelValues(string(status)).Add(1) } } + updateWorkspaceStatuses := func() { + ws, err := db.GetWorkspaces(ctx, database.GetWorkspacesParams{ + Deleted: false, + WithSummary: false, + }) + if err != nil { + if errors.Is(err, sql.ErrNoRows) { + // clear all series if there are no database entries + workspaceLatestBuildStatuses.Reset() + } + + logger.Warn(ctx, "failed to load active workspaces", slog.Error(err)) + return + } + + workspaceLatestBuildStatuses.Reset() + for _, w := range ws { + workspaceLatestBuildStatuses.WithLabelValues(string(w.LatestBuildStatus), w.TemplateName, w.TemplateVersionName.String, w.Username, string(w.LatestBuildTransition)).Add(1) + } + } + + // Use time.Nanosecond to force an initial tick. It will be reset to the + // correct duration after executing once. + ticker := time.NewTicker(time.Nanosecond) + doTick := func() { + defer ticker.Reset(duration) + + updateWorkspaceTotals() + updateWorkspaceStatuses() + } + go func() { defer close(done) defer ticker.Stop() @@ -141,7 +186,7 @@ func Workspaces(ctx context.Context, registerer prometheus.Registerer, db databa // Agents tracks the total number of workspaces with labels on status. func Agents(ctx context.Context, logger slog.Logger, registerer prometheus.Registerer, db database.Store, coordinator *atomic.Pointer[tailnet.Coordinator], derpMapFn func() *tailcfg.DERPMap, agentInactiveDisconnectTimeout, duration time.Duration) (func(), error) { if duration == 0 { - duration = 1 * time.Minute + duration = defaultRefreshRate } agentsGauge := NewCachedGaugeVec(prometheus.NewGaugeVec(prometheus.GaugeOpts{ @@ -330,7 +375,7 @@ func Agents(ctx context.Context, logger slog.Logger, registerer prometheus.Regis func AgentStats(ctx context.Context, logger slog.Logger, registerer prometheus.Registerer, db database.Store, initialCreateAfter time.Time, duration time.Duration, aggregateByLabels []string) (func(), error) { if duration == 0 { - duration = 1 * time.Minute + duration = defaultRefreshRate } if len(aggregateByLabels) == 0 { diff --git a/coderd/prometheusmetrics/prometheusmetrics_test.go b/coderd/prometheusmetrics/prometheusmetrics_test.go index 32e97f84c3..0ca7884cfb 100644 --- a/coderd/prometheusmetrics/prometheusmetrics_test.go +++ b/coderd/prometheusmetrics/prometheusmetrics_test.go @@ -11,6 +11,7 @@ import ( "testing" "time" + "github.com/coder/coder/v2/cryptorand" "github.com/google/uuid" "github.com/prometheus/client_golang/prometheus" "github.com/stretchr/testify/assert" @@ -110,89 +111,9 @@ func TestActiveUsers(t *testing.T) { } } -func TestWorkspaces(t *testing.T) { +func TestWorkspaceLatestBuildTotals(t *testing.T) { t.Parallel() - insertRunning := func(db database.Store) database.ProvisionerJob { - job, err := db.InsertProvisionerJob(context.Background(), database.InsertProvisionerJobParams{ - ID: uuid.New(), - CreatedAt: dbtime.Now(), - UpdatedAt: dbtime.Now(), - Provisioner: database.ProvisionerTypeEcho, - StorageMethod: database.ProvisionerStorageMethodFile, - Type: database.ProvisionerJobTypeWorkspaceBuild, - }) - require.NoError(t, err) - err = db.InsertWorkspaceBuild(context.Background(), database.InsertWorkspaceBuildParams{ - ID: uuid.New(), - WorkspaceID: uuid.New(), - JobID: job.ID, - BuildNumber: 1, - Transition: database.WorkspaceTransitionStart, - Reason: database.BuildReasonInitiator, - }) - require.NoError(t, err) - // This marks the job as started. - _, err = db.AcquireProvisionerJob(context.Background(), database.AcquireProvisionerJobParams{ - OrganizationID: job.OrganizationID, - StartedAt: sql.NullTime{ - Time: dbtime.Now(), - Valid: true, - }, - Types: []database.ProvisionerType{database.ProvisionerTypeEcho}, - }) - require.NoError(t, err) - return job - } - - insertCanceled := func(db database.Store) { - job := insertRunning(db) - err := db.UpdateProvisionerJobWithCancelByID(context.Background(), database.UpdateProvisionerJobWithCancelByIDParams{ - ID: job.ID, - CanceledAt: sql.NullTime{ - Time: dbtime.Now(), - Valid: true, - }, - }) - require.NoError(t, err) - err = db.UpdateProvisionerJobWithCompleteByID(context.Background(), database.UpdateProvisionerJobWithCompleteByIDParams{ - ID: job.ID, - CompletedAt: sql.NullTime{ - Time: dbtime.Now(), - Valid: true, - }, - }) - require.NoError(t, err) - } - - insertFailed := func(db database.Store) { - job := insertRunning(db) - err := db.UpdateProvisionerJobWithCompleteByID(context.Background(), database.UpdateProvisionerJobWithCompleteByIDParams{ - ID: job.ID, - CompletedAt: sql.NullTime{ - Time: dbtime.Now(), - Valid: true, - }, - Error: sql.NullString{ - String: "failed", - Valid: true, - }, - }) - require.NoError(t, err) - } - - insertSuccess := func(db database.Store) { - job := insertRunning(db) - err := db.UpdateProvisionerJobWithCompleteByID(context.Background(), database.UpdateProvisionerJobWithCompleteByIDParams{ - ID: job.ID, - CompletedAt: sql.NullTime{ - Time: dbtime.Now(), - Valid: true, - }, - }) - require.NoError(t, err) - } - for _, tc := range []struct { Name string Database func() database.Store @@ -208,13 +129,13 @@ func TestWorkspaces(t *testing.T) { Name: "Multiple", Database: func() database.Store { db := dbmem.New() - insertCanceled(db) - insertFailed(db) - insertFailed(db) - insertSuccess(db) - insertSuccess(db) - insertSuccess(db) - insertRunning(db) + insertCanceled(t, db) + insertFailed(t, db) + insertFailed(t, db) + insertSuccess(t, db) + insertSuccess(t, db) + insertSuccess(t, db) + insertRunning(t, db) return db }, Total: 7, @@ -229,29 +150,32 @@ func TestWorkspaces(t *testing.T) { t.Run(tc.Name, func(t *testing.T) { t.Parallel() registry := prometheus.NewRegistry() - closeFunc, err := prometheusmetrics.Workspaces(context.Background(), registry, tc.Database(), time.Millisecond) + closeFunc, err := prometheusmetrics.Workspaces(context.Background(), slogtest.Make(t, nil).Leveled(slog.LevelWarn), registry, tc.Database(), testutil.IntervalFast) require.NoError(t, err) t.Cleanup(closeFunc) require.Eventually(t, func() bool { metrics, err := registry.Gather() assert.NoError(t, err) - if len(metrics) < 1 { - return false - } sum := 0 - for _, metric := range metrics[0].Metric { - count, ok := tc.Status[codersdk.ProvisionerJobStatus(metric.Label[0].GetValue())] - if metric.Gauge.GetValue() == 0 { + for _, m := range metrics { + if m.GetName() != "coderd_api_workspace_latest_build_total" { continue } - if !ok { - t.Fail() + + for _, metric := range m.Metric { + count, ok := tc.Status[codersdk.ProvisionerJobStatus(metric.Label[0].GetValue())] + if metric.Gauge.GetValue() == 0 { + continue + } + if !ok { + t.Fail() + } + if metric.Gauge.GetValue() != float64(count) { + return false + } + sum += int(metric.Gauge.GetValue()) } - if metric.Gauge.GetValue() != float64(count) { - return false - } - sum += int(metric.Gauge.GetValue()) } t.Logf("sum %d == total %d", sum, tc.Total) return sum == tc.Total @@ -260,6 +184,90 @@ func TestWorkspaces(t *testing.T) { } } +func TestWorkspaceLatestBuildStatuses(t *testing.T) { + t.Parallel() + + for _, tc := range []struct { + Name string + Database func() database.Store + ExpectedWorkspaces int + ExpectedStatuses map[codersdk.ProvisionerJobStatus]int + }{{ + Name: "None", + Database: func() database.Store { + return dbmem.New() + }, + ExpectedWorkspaces: 0, + }, { + Name: "Multiple", + Database: func() database.Store { + db := dbmem.New() + insertTemplates(t, db) + insertCanceled(t, db) + insertFailed(t, db) + insertFailed(t, db) + insertSuccess(t, db) + insertSuccess(t, db) + insertSuccess(t, db) + insertRunning(t, db) + return db + }, + ExpectedWorkspaces: 7, + ExpectedStatuses: map[codersdk.ProvisionerJobStatus]int{ + codersdk.ProvisionerJobCanceled: 1, + codersdk.ProvisionerJobFailed: 2, + codersdk.ProvisionerJobSucceeded: 3, + codersdk.ProvisionerJobRunning: 1, + }, + }} { + tc := tc + t.Run(tc.Name, func(t *testing.T) { + t.Parallel() + registry := prometheus.NewRegistry() + closeFunc, err := prometheusmetrics.Workspaces(context.Background(), slogtest.Make(t, nil), registry, tc.Database(), testutil.IntervalFast) + require.NoError(t, err) + t.Cleanup(closeFunc) + + require.Eventually(t, func() bool { + metrics, err := registry.Gather() + assert.NoError(t, err) + + stMap := map[codersdk.ProvisionerJobStatus]int{} + for _, m := range metrics { + if m.GetName() != "coderd_workspace_latest_build_status" { + continue + } + + for _, metric := range m.Metric { + for _, l := range metric.Label { + if l == nil { + continue + } + + if l.GetName() == "status" { + status := codersdk.ProvisionerJobStatus(l.GetValue()) + stMap[status] += int(metric.Gauge.GetValue()) + } + } + } + } + + stSum := 0 + for st, count := range stMap { + if tc.ExpectedStatuses[st] != count { + return false + } + + stSum += count + } + + t.Logf("status series = %d, expected == %d", stSum, tc.ExpectedWorkspaces) + return stSum == tc.ExpectedWorkspaces + }, testutil.WaitShort, testutil.IntervalFast) + }) + } +} + func TestAgents(t *testing.T) { t.Parallel() @@ -601,3 +609,153 @@ func prepareWorkspaceAndAgent(t *testing.T, client *codersdk.Client, user coders agentClient.SetSessionToken(authToken) return agentClient } + +var ( + templateA = uuid.New() + templateVersionA = uuid.New() + templateB = uuid.New() + templateVersionB = uuid.New() +) + +func insertTemplates(t *testing.T, db database.Store) { + require.NoError(t, db.InsertTemplate(context.Background(), database.InsertTemplateParams{ + ID: templateA, + Name: "template-a", + Provisioner: database.ProvisionerTypeTerraform, + MaxPortSharingLevel: database.AppSharingLevelAuthenticated, + })) + + require.NoError(t, db.InsertTemplateVersion(context.Background(), database.InsertTemplateVersionParams{ + ID: templateVersionA, + TemplateID: uuid.NullUUID{UUID: templateA}, + Name: "version-1a", + })) + + require.NoError(t, db.InsertTemplate(context.Background(), database.InsertTemplateParams{ + ID: templateB, + Name: "template-b", + Provisioner: database.ProvisionerTypeTerraform, + MaxPortSharingLevel: database.AppSharingLevelAuthenticated, + })) + + require.NoError(t, db.InsertTemplateVersion(context.Background(), database.InsertTemplateVersionParams{ + ID: templateVersionB, + TemplateID: uuid.NullUUID{UUID: templateB}, + Name: "version-1b", + })) +} + +func insertUser(t *testing.T, db database.Store) database.User { + username, err := cryptorand.String(8) + require.NoError(t, err) + + user, err := db.InsertUser(context.Background(), database.InsertUserParams{ + ID: uuid.New(), + Username: username, + LoginType: database.LoginTypeNone, + }) + require.NoError(t, err) + + return user +} + +func insertRunning(t *testing.T, db database.Store) database.ProvisionerJob { + var template, templateVersion uuid.UUID + rnd, err := cryptorand.Intn(10) + require.NoError(t, err) + if rnd > 5 { + template = templateB + templateVersion = templateVersionB + } else { + template = templateA + templateVersion = templateVersionA + } + + workspace, err := db.InsertWorkspace(context.Background(), database.InsertWorkspaceParams{ + ID: uuid.New(), + OwnerID: insertUser(t, db).ID, + Name: uuid.NewString(), + TemplateID: template, + AutomaticUpdates: database.AutomaticUpdatesNever, + }) + require.NoError(t, err) + + job, err := db.InsertProvisionerJob(context.Background(), database.InsertProvisionerJobParams{ + ID: uuid.New(), + CreatedAt: dbtime.Now(), + UpdatedAt: dbtime.Now(), + Provisioner: database.ProvisionerTypeEcho, + StorageMethod: database.ProvisionerStorageMethodFile, + Type: database.ProvisionerJobTypeWorkspaceBuild, + }) + require.NoError(t, err) + err = db.InsertWorkspaceBuild(context.Background(), database.InsertWorkspaceBuildParams{ + ID: uuid.New(), + WorkspaceID: workspace.ID, + JobID: job.ID, + BuildNumber: 1, + Transition: database.WorkspaceTransitionStart, + Reason: database.BuildReasonInitiator, + TemplateVersionID: templateVersion, + }) + require.NoError(t, err) + // This marks the job as started. + _, err = db.AcquireProvisionerJob(context.Background(), database.AcquireProvisionerJobParams{ + OrganizationID: job.OrganizationID, + StartedAt: sql.NullTime{ + Time: dbtime.Now(), + Valid: true, + }, + Types: []database.ProvisionerType{database.ProvisionerTypeEcho}, + }) + require.NoError(t, err) + return job +} + +func insertCanceled(t *testing.T, db database.Store) { + job := insertRunning(t, db) + err := db.UpdateProvisionerJobWithCancelByID(context.Background(), database.UpdateProvisionerJobWithCancelByIDParams{ + ID: job.ID, + CanceledAt: sql.NullTime{ + Time: dbtime.Now(), + Valid: true, + }, + }) + require.NoError(t, err) + err = db.UpdateProvisionerJobWithCompleteByID(context.Background(), database.UpdateProvisionerJobWithCompleteByIDParams{ + ID: job.ID, + CompletedAt: sql.NullTime{ + Time: dbtime.Now(), + Valid: true, + }, + }) + require.NoError(t, err) +} + +func insertFailed(t *testing.T, db database.Store) { + job := insertRunning(t, db) + err := db.UpdateProvisionerJobWithCompleteByID(context.Background(), database.UpdateProvisionerJobWithCompleteByIDParams{ + ID: job.ID, + CompletedAt: sql.NullTime{ + Time: dbtime.Now(), + Valid: true, + }, + Error: sql.NullString{ + String: "failed", + Valid: true, + }, + }) + require.NoError(t, err) +} + +func insertSuccess(t *testing.T, db database.Store) { + job := insertRunning(t, db) + err := db.UpdateProvisionerJobWithCompleteByID(context.Background(), database.UpdateProvisionerJobWithCompleteByIDParams{ + ID: job.ID, + CompletedAt: sql.NullTime{ + Time: dbtime.Now(), + Valid: true, + }, + }) + require.NoError(t, err) +}