feat: expose workspace statuses (with details) as a prometheus metric (#12762)

Implements #12462
This commit is contained in:
Danny Kopping 2024-04-02 09:57:36 +02:00 committed by GitHub
parent 114830de26
commit 79fb8e43c5
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
8 changed files with 375 additions and 161 deletions

View File

@ -209,7 +209,7 @@ func enablePrometheus(
} }
afterCtx(ctx, closeUsersFunc) afterCtx(ctx, closeUsersFunc)
closeWorkspacesFunc, err := prometheusmetrics.Workspaces(ctx, options.PrometheusRegistry, options.Database, 0) closeWorkspacesFunc, err := prometheusmetrics.Workspaces(ctx, options.Logger.Named("workspaces_metrics"), options.PrometheusRegistry, options.Database, 0)
if err != nil { if err != nil {
return nil, xerrors.Errorf("register workspaces prometheus metric: %w", err) return nil, xerrors.Errorf("register workspaces prometheus metric: %w", err)
} }

View File

@ -973,7 +973,6 @@ func TestServer(t *testing.T) {
scanner := bufio.NewScanner(res.Body) scanner := bufio.NewScanner(res.Body)
hasActiveUsers := false hasActiveUsers := false
hasWorkspaces := false
for scanner.Scan() { for scanner.Scan() {
// This metric is manually registered to be tracked in the server. That's // This metric is manually registered to be tracked in the server. That's
// why we test it's tracked here. // why we test it's tracked here.
@ -981,10 +980,6 @@ func TestServer(t *testing.T) {
hasActiveUsers = true hasActiveUsers = true
continue continue
} }
if strings.HasPrefix(scanner.Text(), "coderd_api_workspace_latest_build_total") {
hasWorkspaces = true
continue
}
if strings.HasPrefix(scanner.Text(), "coderd_db_query_latencies_seconds") { if strings.HasPrefix(scanner.Text(), "coderd_db_query_latencies_seconds") {
t.Fatal("db metrics should not be tracked when --prometheus-collect-db-metrics is not enabled") t.Fatal("db metrics should not be tracked when --prometheus-collect-db-metrics is not enabled")
} }
@ -992,7 +987,6 @@ func TestServer(t *testing.T) {
} }
require.NoError(t, scanner.Err()) require.NoError(t, scanner.Err())
require.True(t, hasActiveUsers) require.True(t, hasActiveUsers)
require.True(t, hasWorkspaces)
}) })
t.Run("DBMetricsEnabled", func(t *testing.T) { t.Run("DBMetricsEnabled", func(t *testing.T) {

View File

@ -404,6 +404,16 @@ func (q *FakeQuerier) convertToWorkspaceRowsNoLock(ctx context.Context, workspac
break break
} }
} }
if pj, err := q.getProvisionerJobByIDNoLock(ctx, build.JobID); err == nil {
wr.LatestBuildStatus = pj.JobStatus
}
wr.LatestBuildTransition = build.Transition
}
if u, err := q.getUserByIDNoLock(w.OwnerID); err == nil {
wr.Username = u.Username
} }
rows = append(rows, wr) rows = append(rows, wr)

View File

@ -266,6 +266,7 @@ func (q *sqlQuerier) GetAuthorizedWorkspaces(ctx context.Context, arg GetWorkspa
&i.LatestBuildCanceledAt, &i.LatestBuildCanceledAt,
&i.LatestBuildError, &i.LatestBuildError,
&i.LatestBuildTransition, &i.LatestBuildTransition,
&i.LatestBuildStatus,
&i.Count, &i.Count,
); err != nil { ); err != nil {
return nil, err return nil, err

View File

@ -12280,7 +12280,8 @@ SELECT
latest_build.completed_at as latest_build_completed_at, latest_build.completed_at as latest_build_completed_at,
latest_build.canceled_at as latest_build_canceled_at, latest_build.canceled_at as latest_build_canceled_at,
latest_build.error as latest_build_error, latest_build.error as latest_build_error,
latest_build.transition as latest_build_transition latest_build.transition as latest_build_transition,
latest_build.job_status as latest_build_status
FROM FROM
workspaces workspaces
JOIN JOIN
@ -12302,7 +12303,7 @@ LEFT JOIN LATERAL (
provisioner_jobs.job_status provisioner_jobs.job_status
FROM FROM
workspace_builds workspace_builds
LEFT JOIN JOIN
provisioner_jobs provisioner_jobs
ON ON
provisioner_jobs.id = workspace_builds.job_id provisioner_jobs.id = workspace_builds.job_id
@ -12507,7 +12508,7 @@ WHERE
-- @authorize_filter -- @authorize_filter
), filtered_workspaces_order AS ( ), filtered_workspaces_order AS (
SELECT SELECT
fw.id, fw.created_at, fw.updated_at, fw.owner_id, fw.organization_id, fw.template_id, fw.deleted, fw.name, fw.autostart_schedule, fw.ttl, fw.last_used_at, fw.dormant_at, fw.deleting_at, fw.automatic_updates, fw.favorite, fw.template_name, fw.template_version_id, fw.template_version_name, fw.username, fw.latest_build_completed_at, fw.latest_build_canceled_at, fw.latest_build_error, fw.latest_build_transition fw.id, fw.created_at, fw.updated_at, fw.owner_id, fw.organization_id, fw.template_id, fw.deleted, fw.name, fw.autostart_schedule, fw.ttl, fw.last_used_at, fw.dormant_at, fw.deleting_at, fw.automatic_updates, fw.favorite, fw.template_name, fw.template_version_id, fw.template_version_name, fw.username, fw.latest_build_completed_at, fw.latest_build_canceled_at, fw.latest_build_error, fw.latest_build_transition, fw.latest_build_status
FROM FROM
filtered_workspaces fw filtered_workspaces fw
ORDER BY ORDER BY
@ -12528,7 +12529,7 @@ WHERE
$19 $19
), filtered_workspaces_order_with_summary AS ( ), filtered_workspaces_order_with_summary AS (
SELECT SELECT
fwo.id, fwo.created_at, fwo.updated_at, fwo.owner_id, fwo.organization_id, fwo.template_id, fwo.deleted, fwo.name, fwo.autostart_schedule, fwo.ttl, fwo.last_used_at, fwo.dormant_at, fwo.deleting_at, fwo.automatic_updates, fwo.favorite, fwo.template_name, fwo.template_version_id, fwo.template_version_name, fwo.username, fwo.latest_build_completed_at, fwo.latest_build_canceled_at, fwo.latest_build_error, fwo.latest_build_transition fwo.id, fwo.created_at, fwo.updated_at, fwo.owner_id, fwo.organization_id, fwo.template_id, fwo.deleted, fwo.name, fwo.autostart_schedule, fwo.ttl, fwo.last_used_at, fwo.dormant_at, fwo.deleting_at, fwo.automatic_updates, fwo.favorite, fwo.template_name, fwo.template_version_id, fwo.template_version_name, fwo.username, fwo.latest_build_completed_at, fwo.latest_build_canceled_at, fwo.latest_build_error, fwo.latest_build_transition, fwo.latest_build_status
FROM FROM
filtered_workspaces_order fwo filtered_workspaces_order fwo
-- Return a technical summary row with total count of workspaces. -- Return a technical summary row with total count of workspaces.
@ -12558,7 +12559,8 @@ WHERE
'0001-01-01 00:00:00+00'::timestamptz, -- latest_build_completed_at, '0001-01-01 00:00:00+00'::timestamptz, -- latest_build_completed_at,
'0001-01-01 00:00:00+00'::timestamptz, -- latest_build_canceled_at, '0001-01-01 00:00:00+00'::timestamptz, -- latest_build_canceled_at,
'', -- latest_build_error '', -- latest_build_error
'start'::workspace_transition -- latest_build_transition 'start'::workspace_transition, -- latest_build_transition
'unknown'::provisioner_job_status -- latest_build_status
WHERE WHERE
$21 :: boolean = true $21 :: boolean = true
), total_count AS ( ), total_count AS (
@ -12568,7 +12570,7 @@ WHERE
filtered_workspaces filtered_workspaces
) )
SELECT SELECT
fwos.id, fwos.created_at, fwos.updated_at, fwos.owner_id, fwos.organization_id, fwos.template_id, fwos.deleted, fwos.name, fwos.autostart_schedule, fwos.ttl, fwos.last_used_at, fwos.dormant_at, fwos.deleting_at, fwos.automatic_updates, fwos.favorite, fwos.template_name, fwos.template_version_id, fwos.template_version_name, fwos.username, fwos.latest_build_completed_at, fwos.latest_build_canceled_at, fwos.latest_build_error, fwos.latest_build_transition, fwos.id, fwos.created_at, fwos.updated_at, fwos.owner_id, fwos.organization_id, fwos.template_id, fwos.deleted, fwos.name, fwos.autostart_schedule, fwos.ttl, fwos.last_used_at, fwos.dormant_at, fwos.deleting_at, fwos.automatic_updates, fwos.favorite, fwos.template_name, fwos.template_version_id, fwos.template_version_name, fwos.username, fwos.latest_build_completed_at, fwos.latest_build_canceled_at, fwos.latest_build_error, fwos.latest_build_transition, fwos.latest_build_status,
tc.count tc.count
FROM FROM
filtered_workspaces_order_with_summary fwos filtered_workspaces_order_with_summary fwos
@ -12601,30 +12603,31 @@ type GetWorkspacesParams struct {
} }
type GetWorkspacesRow struct { type GetWorkspacesRow struct {
ID uuid.UUID `db:"id" json:"id"` ID uuid.UUID `db:"id" json:"id"`
CreatedAt time.Time `db:"created_at" json:"created_at"` CreatedAt time.Time `db:"created_at" json:"created_at"`
UpdatedAt time.Time `db:"updated_at" json:"updated_at"` UpdatedAt time.Time `db:"updated_at" json:"updated_at"`
OwnerID uuid.UUID `db:"owner_id" json:"owner_id"` OwnerID uuid.UUID `db:"owner_id" json:"owner_id"`
OrganizationID uuid.UUID `db:"organization_id" json:"organization_id"` OrganizationID uuid.UUID `db:"organization_id" json:"organization_id"`
TemplateID uuid.UUID `db:"template_id" json:"template_id"` TemplateID uuid.UUID `db:"template_id" json:"template_id"`
Deleted bool `db:"deleted" json:"deleted"` Deleted bool `db:"deleted" json:"deleted"`
Name string `db:"name" json:"name"` Name string `db:"name" json:"name"`
AutostartSchedule sql.NullString `db:"autostart_schedule" json:"autostart_schedule"` AutostartSchedule sql.NullString `db:"autostart_schedule" json:"autostart_schedule"`
Ttl sql.NullInt64 `db:"ttl" json:"ttl"` Ttl sql.NullInt64 `db:"ttl" json:"ttl"`
LastUsedAt time.Time `db:"last_used_at" json:"last_used_at"` LastUsedAt time.Time `db:"last_used_at" json:"last_used_at"`
DormantAt sql.NullTime `db:"dormant_at" json:"dormant_at"` DormantAt sql.NullTime `db:"dormant_at" json:"dormant_at"`
DeletingAt sql.NullTime `db:"deleting_at" json:"deleting_at"` DeletingAt sql.NullTime `db:"deleting_at" json:"deleting_at"`
AutomaticUpdates AutomaticUpdates `db:"automatic_updates" json:"automatic_updates"` AutomaticUpdates AutomaticUpdates `db:"automatic_updates" json:"automatic_updates"`
Favorite bool `db:"favorite" json:"favorite"` Favorite bool `db:"favorite" json:"favorite"`
TemplateName string `db:"template_name" json:"template_name"` TemplateName string `db:"template_name" json:"template_name"`
TemplateVersionID uuid.UUID `db:"template_version_id" json:"template_version_id"` TemplateVersionID uuid.UUID `db:"template_version_id" json:"template_version_id"`
TemplateVersionName sql.NullString `db:"template_version_name" json:"template_version_name"` TemplateVersionName sql.NullString `db:"template_version_name" json:"template_version_name"`
Username string `db:"username" json:"username"` Username string `db:"username" json:"username"`
LatestBuildCompletedAt sql.NullTime `db:"latest_build_completed_at" json:"latest_build_completed_at"` LatestBuildCompletedAt sql.NullTime `db:"latest_build_completed_at" json:"latest_build_completed_at"`
LatestBuildCanceledAt sql.NullTime `db:"latest_build_canceled_at" json:"latest_build_canceled_at"` LatestBuildCanceledAt sql.NullTime `db:"latest_build_canceled_at" json:"latest_build_canceled_at"`
LatestBuildError sql.NullString `db:"latest_build_error" json:"latest_build_error"` LatestBuildError sql.NullString `db:"latest_build_error" json:"latest_build_error"`
LatestBuildTransition WorkspaceTransition `db:"latest_build_transition" json:"latest_build_transition"` LatestBuildTransition WorkspaceTransition `db:"latest_build_transition" json:"latest_build_transition"`
Count int64 `db:"count" json:"count"` LatestBuildStatus ProvisionerJobStatus `db:"latest_build_status" json:"latest_build_status"`
Count int64 `db:"count" json:"count"`
} }
// build_params is used to filter by build parameters if present. // build_params is used to filter by build parameters if present.
@ -12685,6 +12688,7 @@ func (q *sqlQuerier) GetWorkspaces(ctx context.Context, arg GetWorkspacesParams)
&i.LatestBuildCanceledAt, &i.LatestBuildCanceledAt,
&i.LatestBuildError, &i.LatestBuildError,
&i.LatestBuildTransition, &i.LatestBuildTransition,
&i.LatestBuildStatus,
&i.Count, &i.Count,
); err != nil { ); err != nil {
return nil, err return nil, err

View File

@ -96,7 +96,8 @@ SELECT
latest_build.completed_at as latest_build_completed_at, latest_build.completed_at as latest_build_completed_at,
latest_build.canceled_at as latest_build_canceled_at, latest_build.canceled_at as latest_build_canceled_at,
latest_build.error as latest_build_error, latest_build.error as latest_build_error,
latest_build.transition as latest_build_transition latest_build.transition as latest_build_transition,
latest_build.job_status as latest_build_status
FROM FROM
workspaces workspaces
JOIN JOIN
@ -118,7 +119,7 @@ LEFT JOIN LATERAL (
provisioner_jobs.job_status provisioner_jobs.job_status
FROM FROM
workspace_builds workspace_builds
LEFT JOIN JOIN
provisioner_jobs provisioner_jobs
ON ON
provisioner_jobs.id = workspace_builds.job_id provisioner_jobs.id = workspace_builds.job_id
@ -374,7 +375,8 @@ WHERE
'0001-01-01 00:00:00+00'::timestamptz, -- latest_build_completed_at, '0001-01-01 00:00:00+00'::timestamptz, -- latest_build_completed_at,
'0001-01-01 00:00:00+00'::timestamptz, -- latest_build_canceled_at, '0001-01-01 00:00:00+00'::timestamptz, -- latest_build_canceled_at,
'', -- latest_build_error '', -- latest_build_error
'start'::workspace_transition -- latest_build_transition 'start'::workspace_transition, -- latest_build_transition
'unknown'::provisioner_job_status -- latest_build_status
WHERE WHERE
@with_summary :: boolean = true @with_summary :: boolean = true
), total_count AS ( ), total_count AS (

View File

@ -24,10 +24,12 @@ import (
"github.com/coder/coder/v2/tailnet" "github.com/coder/coder/v2/tailnet"
) )
const defaultRefreshRate = time.Minute
// ActiveUsers tracks the number of users that have authenticated within the past hour. // ActiveUsers tracks the number of users that have authenticated within the past hour.
func ActiveUsers(ctx context.Context, registerer prometheus.Registerer, db database.Store, duration time.Duration) (func(), error) { func ActiveUsers(ctx context.Context, registerer prometheus.Registerer, db database.Store, duration time.Duration) (func(), error) {
if duration == 0 { if duration == 0 {
duration = 5 * time.Minute duration = defaultRefreshRate
} }
gauge := prometheus.NewGauge(prometheus.GaugeOpts{ gauge := prometheus.NewGauge(prometheus.GaugeOpts{
@ -72,36 +74,42 @@ func ActiveUsers(ctx context.Context, registerer prometheus.Registerer, db datab
} }
// Workspaces tracks the total number of workspaces with labels on status. // Workspaces tracks the total number of workspaces with labels on status.
func Workspaces(ctx context.Context, registerer prometheus.Registerer, db database.Store, duration time.Duration) (func(), error) { func Workspaces(ctx context.Context, logger slog.Logger, registerer prometheus.Registerer, db database.Store, duration time.Duration) (func(), error) {
if duration == 0 { if duration == 0 {
duration = 5 * time.Minute duration = defaultRefreshRate
} }
gauge := prometheus.NewGaugeVec(prometheus.GaugeOpts{ workspaceLatestBuildTotals := prometheus.NewGaugeVec(prometheus.GaugeOpts{
Namespace: "coderd", Namespace: "coderd",
Subsystem: "api", Subsystem: "api",
Name: "workspace_latest_build_total", Name: "workspace_latest_build_total",
Help: "The latest workspace builds with a status.", Help: "The current number of workspace builds by status.",
}, []string{"status"}) }, []string{"status"})
err := registerer.Register(gauge) if err := registerer.Register(workspaceLatestBuildTotals); err != nil {
if err != nil { return nil, err
}
workspaceLatestBuildStatuses := prometheus.NewGaugeVec(prometheus.GaugeOpts{
Namespace: "coderd",
Name: "workspace_latest_build_status",
Help: "The current workspace statuses by template, transition, and owner.",
}, []string{"status", "template_name", "template_version", "workspace_owner", "workspace_transition"})
if err := registerer.Register(workspaceLatestBuildStatuses); err != nil {
return nil, err return nil, err
} }
// This exists so the prometheus metric exports immediately when set.
// It helps with tests so they don't have to wait for a tick.
gauge.WithLabelValues("pending").Set(0)
ctx, cancelFunc := context.WithCancel(ctx) ctx, cancelFunc := context.WithCancel(ctx)
done := make(chan struct{}) done := make(chan struct{})
// Use time.Nanosecond to force an initial tick. It will be reset to the updateWorkspaceTotals := func() {
// correct duration after executing once.
ticker := time.NewTicker(time.Nanosecond)
doTick := func() {
defer ticker.Reset(duration)
builds, err := db.GetLatestWorkspaceBuilds(ctx) builds, err := db.GetLatestWorkspaceBuilds(ctx)
if err != nil { if err != nil {
if errors.Is(err, sql.ErrNoRows) {
// clear all series if there are no database entries
workspaceLatestBuildTotals.Reset()
}
logger.Warn(ctx, "failed to load latest workspace builds", slog.Error(err))
return return
} }
jobIDs := make([]uuid.UUID, 0, len(builds)) jobIDs := make([]uuid.UUID, 0, len(builds))
@ -110,16 +118,53 @@ func Workspaces(ctx context.Context, registerer prometheus.Registerer, db databa
} }
jobs, err := db.GetProvisionerJobsByIDs(ctx, jobIDs) jobs, err := db.GetProvisionerJobsByIDs(ctx, jobIDs)
if err != nil { if err != nil {
ids := make([]string, 0, len(jobIDs))
for _, id := range jobIDs {
ids = append(ids, id.String())
}
logger.Warn(ctx, "failed to load provisioner jobs", slog.F("ids", ids), slog.Error(err))
return return
} }
gauge.Reset() workspaceLatestBuildTotals.Reset()
for _, job := range jobs { for _, job := range jobs {
status := codersdk.ProvisionerJobStatus(job.JobStatus) status := codersdk.ProvisionerJobStatus(job.JobStatus)
gauge.WithLabelValues(string(status)).Add(1) workspaceLatestBuildTotals.WithLabelValues(string(status)).Add(1)
} }
} }
updateWorkspaceStatuses := func() {
ws, err := db.GetWorkspaces(ctx, database.GetWorkspacesParams{
Deleted: false,
WithSummary: false,
})
if err != nil {
if errors.Is(err, sql.ErrNoRows) {
// clear all series if there are no database entries
workspaceLatestBuildStatuses.Reset()
}
logger.Warn(ctx, "failed to load active workspaces", slog.Error(err))
return
}
workspaceLatestBuildStatuses.Reset()
for _, w := range ws {
workspaceLatestBuildStatuses.WithLabelValues(string(w.LatestBuildStatus), w.TemplateName, w.TemplateVersionName.String, w.Username, string(w.LatestBuildTransition)).Add(1)
}
}
// Use time.Nanosecond to force an initial tick. It will be reset to the
// correct duration after executing once.
ticker := time.NewTicker(time.Nanosecond)
doTick := func() {
defer ticker.Reset(duration)
updateWorkspaceTotals()
updateWorkspaceStatuses()
}
go func() { go func() {
defer close(done) defer close(done)
defer ticker.Stop() defer ticker.Stop()
@ -141,7 +186,7 @@ func Workspaces(ctx context.Context, registerer prometheus.Registerer, db databa
// Agents tracks the total number of workspaces with labels on status. // Agents tracks the total number of workspaces with labels on status.
func Agents(ctx context.Context, logger slog.Logger, registerer prometheus.Registerer, db database.Store, coordinator *atomic.Pointer[tailnet.Coordinator], derpMapFn func() *tailcfg.DERPMap, agentInactiveDisconnectTimeout, duration time.Duration) (func(), error) { func Agents(ctx context.Context, logger slog.Logger, registerer prometheus.Registerer, db database.Store, coordinator *atomic.Pointer[tailnet.Coordinator], derpMapFn func() *tailcfg.DERPMap, agentInactiveDisconnectTimeout, duration time.Duration) (func(), error) {
if duration == 0 { if duration == 0 {
duration = 1 * time.Minute duration = defaultRefreshRate
} }
agentsGauge := NewCachedGaugeVec(prometheus.NewGaugeVec(prometheus.GaugeOpts{ agentsGauge := NewCachedGaugeVec(prometheus.NewGaugeVec(prometheus.GaugeOpts{
@ -330,7 +375,7 @@ func Agents(ctx context.Context, logger slog.Logger, registerer prometheus.Regis
func AgentStats(ctx context.Context, logger slog.Logger, registerer prometheus.Registerer, db database.Store, initialCreateAfter time.Time, duration time.Duration, aggregateByLabels []string) (func(), error) { func AgentStats(ctx context.Context, logger slog.Logger, registerer prometheus.Registerer, db database.Store, initialCreateAfter time.Time, duration time.Duration, aggregateByLabels []string) (func(), error) {
if duration == 0 { if duration == 0 {
duration = 1 * time.Minute duration = defaultRefreshRate
} }
if len(aggregateByLabels) == 0 { if len(aggregateByLabels) == 0 {

View File

@ -11,6 +11,7 @@ import (
"testing" "testing"
"time" "time"
"github.com/coder/coder/v2/cryptorand"
"github.com/google/uuid" "github.com/google/uuid"
"github.com/prometheus/client_golang/prometheus" "github.com/prometheus/client_golang/prometheus"
"github.com/stretchr/testify/assert" "github.com/stretchr/testify/assert"
@ -110,89 +111,9 @@ func TestActiveUsers(t *testing.T) {
} }
} }
func TestWorkspaces(t *testing.T) { func TestWorkspaceLatestBuildTotals(t *testing.T) {
t.Parallel() t.Parallel()
insertRunning := func(db database.Store) database.ProvisionerJob {
job, err := db.InsertProvisionerJob(context.Background(), database.InsertProvisionerJobParams{
ID: uuid.New(),
CreatedAt: dbtime.Now(),
UpdatedAt: dbtime.Now(),
Provisioner: database.ProvisionerTypeEcho,
StorageMethod: database.ProvisionerStorageMethodFile,
Type: database.ProvisionerJobTypeWorkspaceBuild,
})
require.NoError(t, err)
err = db.InsertWorkspaceBuild(context.Background(), database.InsertWorkspaceBuildParams{
ID: uuid.New(),
WorkspaceID: uuid.New(),
JobID: job.ID,
BuildNumber: 1,
Transition: database.WorkspaceTransitionStart,
Reason: database.BuildReasonInitiator,
})
require.NoError(t, err)
// This marks the job as started.
_, err = db.AcquireProvisionerJob(context.Background(), database.AcquireProvisionerJobParams{
OrganizationID: job.OrganizationID,
StartedAt: sql.NullTime{
Time: dbtime.Now(),
Valid: true,
},
Types: []database.ProvisionerType{database.ProvisionerTypeEcho},
})
require.NoError(t, err)
return job
}
insertCanceled := func(db database.Store) {
job := insertRunning(db)
err := db.UpdateProvisionerJobWithCancelByID(context.Background(), database.UpdateProvisionerJobWithCancelByIDParams{
ID: job.ID,
CanceledAt: sql.NullTime{
Time: dbtime.Now(),
Valid: true,
},
})
require.NoError(t, err)
err = db.UpdateProvisionerJobWithCompleteByID(context.Background(), database.UpdateProvisionerJobWithCompleteByIDParams{
ID: job.ID,
CompletedAt: sql.NullTime{
Time: dbtime.Now(),
Valid: true,
},
})
require.NoError(t, err)
}
insertFailed := func(db database.Store) {
job := insertRunning(db)
err := db.UpdateProvisionerJobWithCompleteByID(context.Background(), database.UpdateProvisionerJobWithCompleteByIDParams{
ID: job.ID,
CompletedAt: sql.NullTime{
Time: dbtime.Now(),
Valid: true,
},
Error: sql.NullString{
String: "failed",
Valid: true,
},
})
require.NoError(t, err)
}
insertSuccess := func(db database.Store) {
job := insertRunning(db)
err := db.UpdateProvisionerJobWithCompleteByID(context.Background(), database.UpdateProvisionerJobWithCompleteByIDParams{
ID: job.ID,
CompletedAt: sql.NullTime{
Time: dbtime.Now(),
Valid: true,
},
})
require.NoError(t, err)
}
for _, tc := range []struct { for _, tc := range []struct {
Name string Name string
Database func() database.Store Database func() database.Store
@ -208,13 +129,13 @@ func TestWorkspaces(t *testing.T) {
Name: "Multiple", Name: "Multiple",
Database: func() database.Store { Database: func() database.Store {
db := dbmem.New() db := dbmem.New()
insertCanceled(db) insertCanceled(t, db)
insertFailed(db) insertFailed(t, db)
insertFailed(db) insertFailed(t, db)
insertSuccess(db) insertSuccess(t, db)
insertSuccess(db) insertSuccess(t, db)
insertSuccess(db) insertSuccess(t, db)
insertRunning(db) insertRunning(t, db)
return db return db
}, },
Total: 7, Total: 7,
@ -229,29 +150,32 @@ func TestWorkspaces(t *testing.T) {
t.Run(tc.Name, func(t *testing.T) { t.Run(tc.Name, func(t *testing.T) {
t.Parallel() t.Parallel()
registry := prometheus.NewRegistry() registry := prometheus.NewRegistry()
closeFunc, err := prometheusmetrics.Workspaces(context.Background(), registry, tc.Database(), time.Millisecond) closeFunc, err := prometheusmetrics.Workspaces(context.Background(), slogtest.Make(t, nil).Leveled(slog.LevelWarn), registry, tc.Database(), testutil.IntervalFast)
require.NoError(t, err) require.NoError(t, err)
t.Cleanup(closeFunc) t.Cleanup(closeFunc)
require.Eventually(t, func() bool { require.Eventually(t, func() bool {
metrics, err := registry.Gather() metrics, err := registry.Gather()
assert.NoError(t, err) assert.NoError(t, err)
if len(metrics) < 1 {
return false
}
sum := 0 sum := 0
for _, metric := range metrics[0].Metric { for _, m := range metrics {
count, ok := tc.Status[codersdk.ProvisionerJobStatus(metric.Label[0].GetValue())] if m.GetName() != "coderd_api_workspace_latest_build_total" {
if metric.Gauge.GetValue() == 0 {
continue continue
} }
if !ok {
t.Fail() for _, metric := range m.Metric {
count, ok := tc.Status[codersdk.ProvisionerJobStatus(metric.Label[0].GetValue())]
if metric.Gauge.GetValue() == 0 {
continue
}
if !ok {
t.Fail()
}
if metric.Gauge.GetValue() != float64(count) {
return false
}
sum += int(metric.Gauge.GetValue())
} }
if metric.Gauge.GetValue() != float64(count) {
return false
}
sum += int(metric.Gauge.GetValue())
} }
t.Logf("sum %d == total %d", sum, tc.Total) t.Logf("sum %d == total %d", sum, tc.Total)
return sum == tc.Total return sum == tc.Total
@ -260,6 +184,90 @@ func TestWorkspaces(t *testing.T) {
} }
} }
func TestWorkspaceLatestBuildStatuses(t *testing.T) {
t.Parallel()
for _, tc := range []struct {
Name string
Database func() database.Store
ExpectedWorkspaces int
ExpectedStatuses map[codersdk.ProvisionerJobStatus]int
}{{
Name: "None",
Database: func() database.Store {
return dbmem.New()
},
ExpectedWorkspaces: 0,
}, {
Name: "Multiple",
Database: func() database.Store {
db := dbmem.New()
insertTemplates(t, db)
insertCanceled(t, db)
insertFailed(t, db)
insertFailed(t, db)
insertSuccess(t, db)
insertSuccess(t, db)
insertSuccess(t, db)
insertRunning(t, db)
return db
},
ExpectedWorkspaces: 7,
ExpectedStatuses: map[codersdk.ProvisionerJobStatus]int{
codersdk.ProvisionerJobCanceled: 1,
codersdk.ProvisionerJobFailed: 2,
codersdk.ProvisionerJobSucceeded: 3,
codersdk.ProvisionerJobRunning: 1,
},
}} {
tc := tc
t.Run(tc.Name, func(t *testing.T) {
t.Parallel()
registry := prometheus.NewRegistry()
closeFunc, err := prometheusmetrics.Workspaces(context.Background(), slogtest.Make(t, nil), registry, tc.Database(), testutil.IntervalFast)
require.NoError(t, err)
t.Cleanup(closeFunc)
require.Eventually(t, func() bool {
metrics, err := registry.Gather()
assert.NoError(t, err)
stMap := map[codersdk.ProvisionerJobStatus]int{}
for _, m := range metrics {
if m.GetName() != "coderd_workspace_latest_build_status" {
continue
}
for _, metric := range m.Metric {
for _, l := range metric.Label {
if l == nil {
continue
}
if l.GetName() == "status" {
status := codersdk.ProvisionerJobStatus(l.GetValue())
stMap[status] += int(metric.Gauge.GetValue())
}
}
}
}
stSum := 0
for st, count := range stMap {
if tc.ExpectedStatuses[st] != count {
return false
}
stSum += count
}
t.Logf("status series = %d, expected == %d", stSum, tc.ExpectedWorkspaces)
return stSum == tc.ExpectedWorkspaces
}, testutil.WaitShort, testutil.IntervalFast)
})
}
}
func TestAgents(t *testing.T) { func TestAgents(t *testing.T) {
t.Parallel() t.Parallel()
@ -601,3 +609,153 @@ func prepareWorkspaceAndAgent(t *testing.T, client *codersdk.Client, user coders
agentClient.SetSessionToken(authToken) agentClient.SetSessionToken(authToken)
return agentClient return agentClient
} }
var (
templateA = uuid.New()
templateVersionA = uuid.New()
templateB = uuid.New()
templateVersionB = uuid.New()
)
func insertTemplates(t *testing.T, db database.Store) {
require.NoError(t, db.InsertTemplate(context.Background(), database.InsertTemplateParams{
ID: templateA,
Name: "template-a",
Provisioner: database.ProvisionerTypeTerraform,
MaxPortSharingLevel: database.AppSharingLevelAuthenticated,
}))
require.NoError(t, db.InsertTemplateVersion(context.Background(), database.InsertTemplateVersionParams{
ID: templateVersionA,
TemplateID: uuid.NullUUID{UUID: templateA},
Name: "version-1a",
}))
require.NoError(t, db.InsertTemplate(context.Background(), database.InsertTemplateParams{
ID: templateB,
Name: "template-b",
Provisioner: database.ProvisionerTypeTerraform,
MaxPortSharingLevel: database.AppSharingLevelAuthenticated,
}))
require.NoError(t, db.InsertTemplateVersion(context.Background(), database.InsertTemplateVersionParams{
ID: templateVersionB,
TemplateID: uuid.NullUUID{UUID: templateB},
Name: "version-1b",
}))
}
func insertUser(t *testing.T, db database.Store) database.User {
username, err := cryptorand.String(8)
require.NoError(t, err)
user, err := db.InsertUser(context.Background(), database.InsertUserParams{
ID: uuid.New(),
Username: username,
LoginType: database.LoginTypeNone,
})
require.NoError(t, err)
return user
}
func insertRunning(t *testing.T, db database.Store) database.ProvisionerJob {
var template, templateVersion uuid.UUID
rnd, err := cryptorand.Intn(10)
require.NoError(t, err)
if rnd > 5 {
template = templateB
templateVersion = templateVersionB
} else {
template = templateA
templateVersion = templateVersionA
}
workspace, err := db.InsertWorkspace(context.Background(), database.InsertWorkspaceParams{
ID: uuid.New(),
OwnerID: insertUser(t, db).ID,
Name: uuid.NewString(),
TemplateID: template,
AutomaticUpdates: database.AutomaticUpdatesNever,
})
require.NoError(t, err)
job, err := db.InsertProvisionerJob(context.Background(), database.InsertProvisionerJobParams{
ID: uuid.New(),
CreatedAt: dbtime.Now(),
UpdatedAt: dbtime.Now(),
Provisioner: database.ProvisionerTypeEcho,
StorageMethod: database.ProvisionerStorageMethodFile,
Type: database.ProvisionerJobTypeWorkspaceBuild,
})
require.NoError(t, err)
err = db.InsertWorkspaceBuild(context.Background(), database.InsertWorkspaceBuildParams{
ID: uuid.New(),
WorkspaceID: workspace.ID,
JobID: job.ID,
BuildNumber: 1,
Transition: database.WorkspaceTransitionStart,
Reason: database.BuildReasonInitiator,
TemplateVersionID: templateVersion,
})
require.NoError(t, err)
// This marks the job as started.
_, err = db.AcquireProvisionerJob(context.Background(), database.AcquireProvisionerJobParams{
OrganizationID: job.OrganizationID,
StartedAt: sql.NullTime{
Time: dbtime.Now(),
Valid: true,
},
Types: []database.ProvisionerType{database.ProvisionerTypeEcho},
})
require.NoError(t, err)
return job
}
func insertCanceled(t *testing.T, db database.Store) {
job := insertRunning(t, db)
err := db.UpdateProvisionerJobWithCancelByID(context.Background(), database.UpdateProvisionerJobWithCancelByIDParams{
ID: job.ID,
CanceledAt: sql.NullTime{
Time: dbtime.Now(),
Valid: true,
},
})
require.NoError(t, err)
err = db.UpdateProvisionerJobWithCompleteByID(context.Background(), database.UpdateProvisionerJobWithCompleteByIDParams{
ID: job.ID,
CompletedAt: sql.NullTime{
Time: dbtime.Now(),
Valid: true,
},
})
require.NoError(t, err)
}
func insertFailed(t *testing.T, db database.Store) {
job := insertRunning(t, db)
err := db.UpdateProvisionerJobWithCompleteByID(context.Background(), database.UpdateProvisionerJobWithCompleteByIDParams{
ID: job.ID,
CompletedAt: sql.NullTime{
Time: dbtime.Now(),
Valid: true,
},
Error: sql.NullString{
String: "failed",
Valid: true,
},
})
require.NoError(t, err)
}
func insertSuccess(t *testing.T, db database.Store) {
job := insertRunning(t, db)
err := db.UpdateProvisionerJobWithCompleteByID(context.Background(), database.UpdateProvisionerJobWithCompleteByIDParams{
ID: job.ID,
CompletedAt: sql.NullTime{
Time: dbtime.Now(),
Valid: true,
},
})
require.NoError(t, err)
}