feat: expose workspace statuses (with details) as a prometheus metric (#12762)

Implements #12462
This commit is contained in:
Danny Kopping 2024-04-02 09:57:36 +02:00 committed by GitHub
parent 114830de26
commit 79fb8e43c5
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
8 changed files with 375 additions and 161 deletions

View File

@ -209,7 +209,7 @@ func enablePrometheus(
}
afterCtx(ctx, closeUsersFunc)
closeWorkspacesFunc, err := prometheusmetrics.Workspaces(ctx, options.PrometheusRegistry, options.Database, 0)
closeWorkspacesFunc, err := prometheusmetrics.Workspaces(ctx, options.Logger.Named("workspaces_metrics"), options.PrometheusRegistry, options.Database, 0)
if err != nil {
return nil, xerrors.Errorf("register workspaces prometheus metric: %w", err)
}

View File

@ -973,7 +973,6 @@ func TestServer(t *testing.T) {
scanner := bufio.NewScanner(res.Body)
hasActiveUsers := false
hasWorkspaces := false
for scanner.Scan() {
// This metric is manually registered to be tracked in the server. That's
// why we test it's tracked here.
@ -981,10 +980,6 @@ func TestServer(t *testing.T) {
hasActiveUsers = true
continue
}
if strings.HasPrefix(scanner.Text(), "coderd_api_workspace_latest_build_total") {
hasWorkspaces = true
continue
}
if strings.HasPrefix(scanner.Text(), "coderd_db_query_latencies_seconds") {
t.Fatal("db metrics should not be tracked when --prometheus-collect-db-metrics is not enabled")
}
@ -992,7 +987,6 @@ func TestServer(t *testing.T) {
}
require.NoError(t, scanner.Err())
require.True(t, hasActiveUsers)
require.True(t, hasWorkspaces)
})
t.Run("DBMetricsEnabled", func(t *testing.T) {

View File

@ -404,6 +404,16 @@ func (q *FakeQuerier) convertToWorkspaceRowsNoLock(ctx context.Context, workspac
break
}
}
if pj, err := q.getProvisionerJobByIDNoLock(ctx, build.JobID); err == nil {
wr.LatestBuildStatus = pj.JobStatus
}
wr.LatestBuildTransition = build.Transition
}
if u, err := q.getUserByIDNoLock(w.OwnerID); err == nil {
wr.Username = u.Username
}
rows = append(rows, wr)

View File

@ -266,6 +266,7 @@ func (q *sqlQuerier) GetAuthorizedWorkspaces(ctx context.Context, arg GetWorkspa
&i.LatestBuildCanceledAt,
&i.LatestBuildError,
&i.LatestBuildTransition,
&i.LatestBuildStatus,
&i.Count,
); err != nil {
return nil, err

View File

@ -12280,7 +12280,8 @@ SELECT
latest_build.completed_at as latest_build_completed_at,
latest_build.canceled_at as latest_build_canceled_at,
latest_build.error as latest_build_error,
latest_build.transition as latest_build_transition
latest_build.transition as latest_build_transition,
latest_build.job_status as latest_build_status
FROM
workspaces
JOIN
@ -12302,7 +12303,7 @@ LEFT JOIN LATERAL (
provisioner_jobs.job_status
FROM
workspace_builds
LEFT JOIN
JOIN
provisioner_jobs
ON
provisioner_jobs.id = workspace_builds.job_id
@ -12507,7 +12508,7 @@ WHERE
-- @authorize_filter
), filtered_workspaces_order AS (
SELECT
fw.id, fw.created_at, fw.updated_at, fw.owner_id, fw.organization_id, fw.template_id, fw.deleted, fw.name, fw.autostart_schedule, fw.ttl, fw.last_used_at, fw.dormant_at, fw.deleting_at, fw.automatic_updates, fw.favorite, fw.template_name, fw.template_version_id, fw.template_version_name, fw.username, fw.latest_build_completed_at, fw.latest_build_canceled_at, fw.latest_build_error, fw.latest_build_transition
fw.id, fw.created_at, fw.updated_at, fw.owner_id, fw.organization_id, fw.template_id, fw.deleted, fw.name, fw.autostart_schedule, fw.ttl, fw.last_used_at, fw.dormant_at, fw.deleting_at, fw.automatic_updates, fw.favorite, fw.template_name, fw.template_version_id, fw.template_version_name, fw.username, fw.latest_build_completed_at, fw.latest_build_canceled_at, fw.latest_build_error, fw.latest_build_transition, fw.latest_build_status
FROM
filtered_workspaces fw
ORDER BY
@ -12528,7 +12529,7 @@ WHERE
$19
), filtered_workspaces_order_with_summary AS (
SELECT
fwo.id, fwo.created_at, fwo.updated_at, fwo.owner_id, fwo.organization_id, fwo.template_id, fwo.deleted, fwo.name, fwo.autostart_schedule, fwo.ttl, fwo.last_used_at, fwo.dormant_at, fwo.deleting_at, fwo.automatic_updates, fwo.favorite, fwo.template_name, fwo.template_version_id, fwo.template_version_name, fwo.username, fwo.latest_build_completed_at, fwo.latest_build_canceled_at, fwo.latest_build_error, fwo.latest_build_transition
fwo.id, fwo.created_at, fwo.updated_at, fwo.owner_id, fwo.organization_id, fwo.template_id, fwo.deleted, fwo.name, fwo.autostart_schedule, fwo.ttl, fwo.last_used_at, fwo.dormant_at, fwo.deleting_at, fwo.automatic_updates, fwo.favorite, fwo.template_name, fwo.template_version_id, fwo.template_version_name, fwo.username, fwo.latest_build_completed_at, fwo.latest_build_canceled_at, fwo.latest_build_error, fwo.latest_build_transition, fwo.latest_build_status
FROM
filtered_workspaces_order fwo
-- Return a technical summary row with total count of workspaces.
@ -12558,7 +12559,8 @@ WHERE
'0001-01-01 00:00:00+00'::timestamptz, -- latest_build_completed_at,
'0001-01-01 00:00:00+00'::timestamptz, -- latest_build_canceled_at,
'', -- latest_build_error
'start'::workspace_transition -- latest_build_transition
'start'::workspace_transition, -- latest_build_transition
'unknown'::provisioner_job_status -- latest_build_status
WHERE
$21 :: boolean = true
), total_count AS (
@ -12568,7 +12570,7 @@ WHERE
filtered_workspaces
)
SELECT
fwos.id, fwos.created_at, fwos.updated_at, fwos.owner_id, fwos.organization_id, fwos.template_id, fwos.deleted, fwos.name, fwos.autostart_schedule, fwos.ttl, fwos.last_used_at, fwos.dormant_at, fwos.deleting_at, fwos.automatic_updates, fwos.favorite, fwos.template_name, fwos.template_version_id, fwos.template_version_name, fwos.username, fwos.latest_build_completed_at, fwos.latest_build_canceled_at, fwos.latest_build_error, fwos.latest_build_transition,
fwos.id, fwos.created_at, fwos.updated_at, fwos.owner_id, fwos.organization_id, fwos.template_id, fwos.deleted, fwos.name, fwos.autostart_schedule, fwos.ttl, fwos.last_used_at, fwos.dormant_at, fwos.deleting_at, fwos.automatic_updates, fwos.favorite, fwos.template_name, fwos.template_version_id, fwos.template_version_name, fwos.username, fwos.latest_build_completed_at, fwos.latest_build_canceled_at, fwos.latest_build_error, fwos.latest_build_transition, fwos.latest_build_status,
tc.count
FROM
filtered_workspaces_order_with_summary fwos
@ -12601,30 +12603,31 @@ type GetWorkspacesParams struct {
}
type GetWorkspacesRow struct {
ID uuid.UUID `db:"id" json:"id"`
CreatedAt time.Time `db:"created_at" json:"created_at"`
UpdatedAt time.Time `db:"updated_at" json:"updated_at"`
OwnerID uuid.UUID `db:"owner_id" json:"owner_id"`
OrganizationID uuid.UUID `db:"organization_id" json:"organization_id"`
TemplateID uuid.UUID `db:"template_id" json:"template_id"`
Deleted bool `db:"deleted" json:"deleted"`
Name string `db:"name" json:"name"`
AutostartSchedule sql.NullString `db:"autostart_schedule" json:"autostart_schedule"`
Ttl sql.NullInt64 `db:"ttl" json:"ttl"`
LastUsedAt time.Time `db:"last_used_at" json:"last_used_at"`
DormantAt sql.NullTime `db:"dormant_at" json:"dormant_at"`
DeletingAt sql.NullTime `db:"deleting_at" json:"deleting_at"`
AutomaticUpdates AutomaticUpdates `db:"automatic_updates" json:"automatic_updates"`
Favorite bool `db:"favorite" json:"favorite"`
TemplateName string `db:"template_name" json:"template_name"`
TemplateVersionID uuid.UUID `db:"template_version_id" json:"template_version_id"`
TemplateVersionName sql.NullString `db:"template_version_name" json:"template_version_name"`
Username string `db:"username" json:"username"`
LatestBuildCompletedAt sql.NullTime `db:"latest_build_completed_at" json:"latest_build_completed_at"`
LatestBuildCanceledAt sql.NullTime `db:"latest_build_canceled_at" json:"latest_build_canceled_at"`
LatestBuildError sql.NullString `db:"latest_build_error" json:"latest_build_error"`
LatestBuildTransition WorkspaceTransition `db:"latest_build_transition" json:"latest_build_transition"`
Count int64 `db:"count" json:"count"`
ID uuid.UUID `db:"id" json:"id"`
CreatedAt time.Time `db:"created_at" json:"created_at"`
UpdatedAt time.Time `db:"updated_at" json:"updated_at"`
OwnerID uuid.UUID `db:"owner_id" json:"owner_id"`
OrganizationID uuid.UUID `db:"organization_id" json:"organization_id"`
TemplateID uuid.UUID `db:"template_id" json:"template_id"`
Deleted bool `db:"deleted" json:"deleted"`
Name string `db:"name" json:"name"`
AutostartSchedule sql.NullString `db:"autostart_schedule" json:"autostart_schedule"`
Ttl sql.NullInt64 `db:"ttl" json:"ttl"`
LastUsedAt time.Time `db:"last_used_at" json:"last_used_at"`
DormantAt sql.NullTime `db:"dormant_at" json:"dormant_at"`
DeletingAt sql.NullTime `db:"deleting_at" json:"deleting_at"`
AutomaticUpdates AutomaticUpdates `db:"automatic_updates" json:"automatic_updates"`
Favorite bool `db:"favorite" json:"favorite"`
TemplateName string `db:"template_name" json:"template_name"`
TemplateVersionID uuid.UUID `db:"template_version_id" json:"template_version_id"`
TemplateVersionName sql.NullString `db:"template_version_name" json:"template_version_name"`
Username string `db:"username" json:"username"`
LatestBuildCompletedAt sql.NullTime `db:"latest_build_completed_at" json:"latest_build_completed_at"`
LatestBuildCanceledAt sql.NullTime `db:"latest_build_canceled_at" json:"latest_build_canceled_at"`
LatestBuildError sql.NullString `db:"latest_build_error" json:"latest_build_error"`
LatestBuildTransition WorkspaceTransition `db:"latest_build_transition" json:"latest_build_transition"`
LatestBuildStatus ProvisionerJobStatus `db:"latest_build_status" json:"latest_build_status"`
Count int64 `db:"count" json:"count"`
}
// build_params is used to filter by build parameters if present.
@ -12685,6 +12688,7 @@ func (q *sqlQuerier) GetWorkspaces(ctx context.Context, arg GetWorkspacesParams)
&i.LatestBuildCanceledAt,
&i.LatestBuildError,
&i.LatestBuildTransition,
&i.LatestBuildStatus,
&i.Count,
); err != nil {
return nil, err

View File

@ -96,7 +96,8 @@ SELECT
latest_build.completed_at as latest_build_completed_at,
latest_build.canceled_at as latest_build_canceled_at,
latest_build.error as latest_build_error,
latest_build.transition as latest_build_transition
latest_build.transition as latest_build_transition,
latest_build.job_status as latest_build_status
FROM
workspaces
JOIN
@ -118,7 +119,7 @@ LEFT JOIN LATERAL (
provisioner_jobs.job_status
FROM
workspace_builds
LEFT JOIN
JOIN
provisioner_jobs
ON
provisioner_jobs.id = workspace_builds.job_id
@ -374,7 +375,8 @@ WHERE
'0001-01-01 00:00:00+00'::timestamptz, -- latest_build_completed_at,
'0001-01-01 00:00:00+00'::timestamptz, -- latest_build_canceled_at,
'', -- latest_build_error
'start'::workspace_transition -- latest_build_transition
'start'::workspace_transition, -- latest_build_transition
'unknown'::provisioner_job_status -- latest_build_status
WHERE
@with_summary :: boolean = true
), total_count AS (

View File

@ -24,10 +24,12 @@ import (
"github.com/coder/coder/v2/tailnet"
)
const defaultRefreshRate = time.Minute
// ActiveUsers tracks the number of users that have authenticated within the past hour.
func ActiveUsers(ctx context.Context, registerer prometheus.Registerer, db database.Store, duration time.Duration) (func(), error) {
if duration == 0 {
duration = 5 * time.Minute
duration = defaultRefreshRate
}
gauge := prometheus.NewGauge(prometheus.GaugeOpts{
@ -72,36 +74,42 @@ func ActiveUsers(ctx context.Context, registerer prometheus.Registerer, db datab
}
// Workspaces tracks the total number of workspaces with labels on status.
func Workspaces(ctx context.Context, registerer prometheus.Registerer, db database.Store, duration time.Duration) (func(), error) {
func Workspaces(ctx context.Context, logger slog.Logger, registerer prometheus.Registerer, db database.Store, duration time.Duration) (func(), error) {
if duration == 0 {
duration = 5 * time.Minute
duration = defaultRefreshRate
}
gauge := prometheus.NewGaugeVec(prometheus.GaugeOpts{
workspaceLatestBuildTotals := prometheus.NewGaugeVec(prometheus.GaugeOpts{
Namespace: "coderd",
Subsystem: "api",
Name: "workspace_latest_build_total",
Help: "The latest workspace builds with a status.",
Help: "The current number of workspace builds by status.",
}, []string{"status"})
err := registerer.Register(gauge)
if err != nil {
if err := registerer.Register(workspaceLatestBuildTotals); err != nil {
return nil, err
}
workspaceLatestBuildStatuses := prometheus.NewGaugeVec(prometheus.GaugeOpts{
Namespace: "coderd",
Name: "workspace_latest_build_status",
Help: "The current workspace statuses by template, transition, and owner.",
}, []string{"status", "template_name", "template_version", "workspace_owner", "workspace_transition"})
if err := registerer.Register(workspaceLatestBuildStatuses); err != nil {
return nil, err
}
// This exists so the prometheus metric exports immediately when set.
// It helps with tests so they don't have to wait for a tick.
gauge.WithLabelValues("pending").Set(0)
ctx, cancelFunc := context.WithCancel(ctx)
done := make(chan struct{})
// Use time.Nanosecond to force an initial tick. It will be reset to the
// correct duration after executing once.
ticker := time.NewTicker(time.Nanosecond)
doTick := func() {
defer ticker.Reset(duration)
updateWorkspaceTotals := func() {
builds, err := db.GetLatestWorkspaceBuilds(ctx)
if err != nil {
if errors.Is(err, sql.ErrNoRows) {
// clear all series if there are no database entries
workspaceLatestBuildTotals.Reset()
}
logger.Warn(ctx, "failed to load latest workspace builds", slog.Error(err))
return
}
jobIDs := make([]uuid.UUID, 0, len(builds))
@ -110,16 +118,53 @@ func Workspaces(ctx context.Context, registerer prometheus.Registerer, db databa
}
jobs, err := db.GetProvisionerJobsByIDs(ctx, jobIDs)
if err != nil {
ids := make([]string, 0, len(jobIDs))
for _, id := range jobIDs {
ids = append(ids, id.String())
}
logger.Warn(ctx, "failed to load provisioner jobs", slog.F("ids", ids), slog.Error(err))
return
}
gauge.Reset()
workspaceLatestBuildTotals.Reset()
for _, job := range jobs {
status := codersdk.ProvisionerJobStatus(job.JobStatus)
gauge.WithLabelValues(string(status)).Add(1)
workspaceLatestBuildTotals.WithLabelValues(string(status)).Add(1)
}
}
updateWorkspaceStatuses := func() {
ws, err := db.GetWorkspaces(ctx, database.GetWorkspacesParams{
Deleted: false,
WithSummary: false,
})
if err != nil {
if errors.Is(err, sql.ErrNoRows) {
// clear all series if there are no database entries
workspaceLatestBuildStatuses.Reset()
}
logger.Warn(ctx, "failed to load active workspaces", slog.Error(err))
return
}
workspaceLatestBuildStatuses.Reset()
for _, w := range ws {
workspaceLatestBuildStatuses.WithLabelValues(string(w.LatestBuildStatus), w.TemplateName, w.TemplateVersionName.String, w.Username, string(w.LatestBuildTransition)).Add(1)
}
}
// Use time.Nanosecond to force an initial tick. It will be reset to the
// correct duration after executing once.
ticker := time.NewTicker(time.Nanosecond)
doTick := func() {
defer ticker.Reset(duration)
updateWorkspaceTotals()
updateWorkspaceStatuses()
}
go func() {
defer close(done)
defer ticker.Stop()
@ -141,7 +186,7 @@ func Workspaces(ctx context.Context, registerer prometheus.Registerer, db databa
// Agents tracks the total number of workspaces with labels on status.
func Agents(ctx context.Context, logger slog.Logger, registerer prometheus.Registerer, db database.Store, coordinator *atomic.Pointer[tailnet.Coordinator], derpMapFn func() *tailcfg.DERPMap, agentInactiveDisconnectTimeout, duration time.Duration) (func(), error) {
if duration == 0 {
duration = 1 * time.Minute
duration = defaultRefreshRate
}
agentsGauge := NewCachedGaugeVec(prometheus.NewGaugeVec(prometheus.GaugeOpts{
@ -330,7 +375,7 @@ func Agents(ctx context.Context, logger slog.Logger, registerer prometheus.Regis
func AgentStats(ctx context.Context, logger slog.Logger, registerer prometheus.Registerer, db database.Store, initialCreateAfter time.Time, duration time.Duration, aggregateByLabels []string) (func(), error) {
if duration == 0 {
duration = 1 * time.Minute
duration = defaultRefreshRate
}
if len(aggregateByLabels) == 0 {

View File

@ -11,6 +11,7 @@ import (
"testing"
"time"
"github.com/coder/coder/v2/cryptorand"
"github.com/google/uuid"
"github.com/prometheus/client_golang/prometheus"
"github.com/stretchr/testify/assert"
@ -110,89 +111,9 @@ func TestActiveUsers(t *testing.T) {
}
}
func TestWorkspaces(t *testing.T) {
func TestWorkspaceLatestBuildTotals(t *testing.T) {
t.Parallel()
insertRunning := func(db database.Store) database.ProvisionerJob {
job, err := db.InsertProvisionerJob(context.Background(), database.InsertProvisionerJobParams{
ID: uuid.New(),
CreatedAt: dbtime.Now(),
UpdatedAt: dbtime.Now(),
Provisioner: database.ProvisionerTypeEcho,
StorageMethod: database.ProvisionerStorageMethodFile,
Type: database.ProvisionerJobTypeWorkspaceBuild,
})
require.NoError(t, err)
err = db.InsertWorkspaceBuild(context.Background(), database.InsertWorkspaceBuildParams{
ID: uuid.New(),
WorkspaceID: uuid.New(),
JobID: job.ID,
BuildNumber: 1,
Transition: database.WorkspaceTransitionStart,
Reason: database.BuildReasonInitiator,
})
require.NoError(t, err)
// This marks the job as started.
_, err = db.AcquireProvisionerJob(context.Background(), database.AcquireProvisionerJobParams{
OrganizationID: job.OrganizationID,
StartedAt: sql.NullTime{
Time: dbtime.Now(),
Valid: true,
},
Types: []database.ProvisionerType{database.ProvisionerTypeEcho},
})
require.NoError(t, err)
return job
}
insertCanceled := func(db database.Store) {
job := insertRunning(db)
err := db.UpdateProvisionerJobWithCancelByID(context.Background(), database.UpdateProvisionerJobWithCancelByIDParams{
ID: job.ID,
CanceledAt: sql.NullTime{
Time: dbtime.Now(),
Valid: true,
},
})
require.NoError(t, err)
err = db.UpdateProvisionerJobWithCompleteByID(context.Background(), database.UpdateProvisionerJobWithCompleteByIDParams{
ID: job.ID,
CompletedAt: sql.NullTime{
Time: dbtime.Now(),
Valid: true,
},
})
require.NoError(t, err)
}
insertFailed := func(db database.Store) {
job := insertRunning(db)
err := db.UpdateProvisionerJobWithCompleteByID(context.Background(), database.UpdateProvisionerJobWithCompleteByIDParams{
ID: job.ID,
CompletedAt: sql.NullTime{
Time: dbtime.Now(),
Valid: true,
},
Error: sql.NullString{
String: "failed",
Valid: true,
},
})
require.NoError(t, err)
}
insertSuccess := func(db database.Store) {
job := insertRunning(db)
err := db.UpdateProvisionerJobWithCompleteByID(context.Background(), database.UpdateProvisionerJobWithCompleteByIDParams{
ID: job.ID,
CompletedAt: sql.NullTime{
Time: dbtime.Now(),
Valid: true,
},
})
require.NoError(t, err)
}
for _, tc := range []struct {
Name string
Database func() database.Store
@ -208,13 +129,13 @@ func TestWorkspaces(t *testing.T) {
Name: "Multiple",
Database: func() database.Store {
db := dbmem.New()
insertCanceled(db)
insertFailed(db)
insertFailed(db)
insertSuccess(db)
insertSuccess(db)
insertSuccess(db)
insertRunning(db)
insertCanceled(t, db)
insertFailed(t, db)
insertFailed(t, db)
insertSuccess(t, db)
insertSuccess(t, db)
insertSuccess(t, db)
insertRunning(t, db)
return db
},
Total: 7,
@ -229,29 +150,32 @@ func TestWorkspaces(t *testing.T) {
t.Run(tc.Name, func(t *testing.T) {
t.Parallel()
registry := prometheus.NewRegistry()
closeFunc, err := prometheusmetrics.Workspaces(context.Background(), registry, tc.Database(), time.Millisecond)
closeFunc, err := prometheusmetrics.Workspaces(context.Background(), slogtest.Make(t, nil).Leveled(slog.LevelWarn), registry, tc.Database(), testutil.IntervalFast)
require.NoError(t, err)
t.Cleanup(closeFunc)
require.Eventually(t, func() bool {
metrics, err := registry.Gather()
assert.NoError(t, err)
if len(metrics) < 1 {
return false
}
sum := 0
for _, metric := range metrics[0].Metric {
count, ok := tc.Status[codersdk.ProvisionerJobStatus(metric.Label[0].GetValue())]
if metric.Gauge.GetValue() == 0 {
for _, m := range metrics {
if m.GetName() != "coderd_api_workspace_latest_build_total" {
continue
}
if !ok {
t.Fail()
for _, metric := range m.Metric {
count, ok := tc.Status[codersdk.ProvisionerJobStatus(metric.Label[0].GetValue())]
if metric.Gauge.GetValue() == 0 {
continue
}
if !ok {
t.Fail()
}
if metric.Gauge.GetValue() != float64(count) {
return false
}
sum += int(metric.Gauge.GetValue())
}
if metric.Gauge.GetValue() != float64(count) {
return false
}
sum += int(metric.Gauge.GetValue())
}
t.Logf("sum %d == total %d", sum, tc.Total)
return sum == tc.Total
@ -260,6 +184,90 @@ func TestWorkspaces(t *testing.T) {
}
}
func TestWorkspaceLatestBuildStatuses(t *testing.T) {
t.Parallel()
for _, tc := range []struct {
Name string
Database func() database.Store
ExpectedWorkspaces int
ExpectedStatuses map[codersdk.ProvisionerJobStatus]int
}{{
Name: "None",
Database: func() database.Store {
return dbmem.New()
},
ExpectedWorkspaces: 0,
}, {
Name: "Multiple",
Database: func() database.Store {
db := dbmem.New()
insertTemplates(t, db)
insertCanceled(t, db)
insertFailed(t, db)
insertFailed(t, db)
insertSuccess(t, db)
insertSuccess(t, db)
insertSuccess(t, db)
insertRunning(t, db)
return db
},
ExpectedWorkspaces: 7,
ExpectedStatuses: map[codersdk.ProvisionerJobStatus]int{
codersdk.ProvisionerJobCanceled: 1,
codersdk.ProvisionerJobFailed: 2,
codersdk.ProvisionerJobSucceeded: 3,
codersdk.ProvisionerJobRunning: 1,
},
}} {
tc := tc
t.Run(tc.Name, func(t *testing.T) {
t.Parallel()
registry := prometheus.NewRegistry()
closeFunc, err := prometheusmetrics.Workspaces(context.Background(), slogtest.Make(t, nil), registry, tc.Database(), testutil.IntervalFast)
require.NoError(t, err)
t.Cleanup(closeFunc)
require.Eventually(t, func() bool {
metrics, err := registry.Gather()
assert.NoError(t, err)
stMap := map[codersdk.ProvisionerJobStatus]int{}
for _, m := range metrics {
if m.GetName() != "coderd_workspace_latest_build_status" {
continue
}
for _, metric := range m.Metric {
for _, l := range metric.Label {
if l == nil {
continue
}
if l.GetName() == "status" {
status := codersdk.ProvisionerJobStatus(l.GetValue())
stMap[status] += int(metric.Gauge.GetValue())
}
}
}
}
stSum := 0
for st, count := range stMap {
if tc.ExpectedStatuses[st] != count {
return false
}
stSum += count
}
t.Logf("status series = %d, expected == %d", stSum, tc.ExpectedWorkspaces)
return stSum == tc.ExpectedWorkspaces
}, testutil.WaitShort, testutil.IntervalFast)
})
}
}
func TestAgents(t *testing.T) {
t.Parallel()
@ -601,3 +609,153 @@ func prepareWorkspaceAndAgent(t *testing.T, client *codersdk.Client, user coders
agentClient.SetSessionToken(authToken)
return agentClient
}
var (
templateA = uuid.New()
templateVersionA = uuid.New()
templateB = uuid.New()
templateVersionB = uuid.New()
)
func insertTemplates(t *testing.T, db database.Store) {
require.NoError(t, db.InsertTemplate(context.Background(), database.InsertTemplateParams{
ID: templateA,
Name: "template-a",
Provisioner: database.ProvisionerTypeTerraform,
MaxPortSharingLevel: database.AppSharingLevelAuthenticated,
}))
require.NoError(t, db.InsertTemplateVersion(context.Background(), database.InsertTemplateVersionParams{
ID: templateVersionA,
TemplateID: uuid.NullUUID{UUID: templateA},
Name: "version-1a",
}))
require.NoError(t, db.InsertTemplate(context.Background(), database.InsertTemplateParams{
ID: templateB,
Name: "template-b",
Provisioner: database.ProvisionerTypeTerraform,
MaxPortSharingLevel: database.AppSharingLevelAuthenticated,
}))
require.NoError(t, db.InsertTemplateVersion(context.Background(), database.InsertTemplateVersionParams{
ID: templateVersionB,
TemplateID: uuid.NullUUID{UUID: templateB},
Name: "version-1b",
}))
}
func insertUser(t *testing.T, db database.Store) database.User {
username, err := cryptorand.String(8)
require.NoError(t, err)
user, err := db.InsertUser(context.Background(), database.InsertUserParams{
ID: uuid.New(),
Username: username,
LoginType: database.LoginTypeNone,
})
require.NoError(t, err)
return user
}
func insertRunning(t *testing.T, db database.Store) database.ProvisionerJob {
var template, templateVersion uuid.UUID
rnd, err := cryptorand.Intn(10)
require.NoError(t, err)
if rnd > 5 {
template = templateB
templateVersion = templateVersionB
} else {
template = templateA
templateVersion = templateVersionA
}
workspace, err := db.InsertWorkspace(context.Background(), database.InsertWorkspaceParams{
ID: uuid.New(),
OwnerID: insertUser(t, db).ID,
Name: uuid.NewString(),
TemplateID: template,
AutomaticUpdates: database.AutomaticUpdatesNever,
})
require.NoError(t, err)
job, err := db.InsertProvisionerJob(context.Background(), database.InsertProvisionerJobParams{
ID: uuid.New(),
CreatedAt: dbtime.Now(),
UpdatedAt: dbtime.Now(),
Provisioner: database.ProvisionerTypeEcho,
StorageMethod: database.ProvisionerStorageMethodFile,
Type: database.ProvisionerJobTypeWorkspaceBuild,
})
require.NoError(t, err)
err = db.InsertWorkspaceBuild(context.Background(), database.InsertWorkspaceBuildParams{
ID: uuid.New(),
WorkspaceID: workspace.ID,
JobID: job.ID,
BuildNumber: 1,
Transition: database.WorkspaceTransitionStart,
Reason: database.BuildReasonInitiator,
TemplateVersionID: templateVersion,
})
require.NoError(t, err)
// This marks the job as started.
_, err = db.AcquireProvisionerJob(context.Background(), database.AcquireProvisionerJobParams{
OrganizationID: job.OrganizationID,
StartedAt: sql.NullTime{
Time: dbtime.Now(),
Valid: true,
},
Types: []database.ProvisionerType{database.ProvisionerTypeEcho},
})
require.NoError(t, err)
return job
}
func insertCanceled(t *testing.T, db database.Store) {
job := insertRunning(t, db)
err := db.UpdateProvisionerJobWithCancelByID(context.Background(), database.UpdateProvisionerJobWithCancelByIDParams{
ID: job.ID,
CanceledAt: sql.NullTime{
Time: dbtime.Now(),
Valid: true,
},
})
require.NoError(t, err)
err = db.UpdateProvisionerJobWithCompleteByID(context.Background(), database.UpdateProvisionerJobWithCompleteByIDParams{
ID: job.ID,
CompletedAt: sql.NullTime{
Time: dbtime.Now(),
Valid: true,
},
})
require.NoError(t, err)
}
func insertFailed(t *testing.T, db database.Store) {
job := insertRunning(t, db)
err := db.UpdateProvisionerJobWithCompleteByID(context.Background(), database.UpdateProvisionerJobWithCompleteByIDParams{
ID: job.ID,
CompletedAt: sql.NullTime{
Time: dbtime.Now(),
Valid: true,
},
Error: sql.NullString{
String: "failed",
Valid: true,
},
})
require.NoError(t, err)
}
func insertSuccess(t *testing.T, db database.Store) {
job := insertRunning(t, db)
err := db.UpdateProvisionerJobWithCompleteByID(context.Background(), database.UpdateProvisionerJobWithCompleteByIDParams{
ID: job.ID,
CompletedAt: sql.NullTime{
Time: dbtime.Now(),
Valid: true,
},
})
require.NoError(t, err)
}