feat: add template info tags to `coderd_agents_up` metric (#7942)

Co-authored-by: Colin Adler <colin1adler@gmail.com>
This commit is contained in:
goodspark 2023-07-11 10:39:14 -07:00 committed by GitHub
parent 398e8fdf89
commit dd4aafb350
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 116 additions and 42 deletions

View File

@ -607,12 +607,12 @@ func (q *fakeQuerier) GetAuthorizedWorkspaces(ctx context.Context, arg database.
}
if arg.Limit > 0 {
if int(arg.Limit) > len(workspaces) {
return convertToWorkspaceRows(workspaces, int64(beforePageCount)), nil
return q.convertToWorkspaceRowsNoLock(ctx, workspaces, int64(beforePageCount)), nil
}
workspaces = workspaces[:arg.Limit]
}
return convertToWorkspaceRows(workspaces, int64(beforePageCount)), nil
return q.convertToWorkspaceRowsNoLock(ctx, workspaces, int64(beforePageCount)), nil
}
// mapAgentStatus determines the agent status based on different timestamps like created_at, last_connected_at, disconnected_at, etc.
@ -649,10 +649,10 @@ func mapAgentStatus(dbAgent database.WorkspaceAgent, agentInactiveDisconnectTime
return status
}
func convertToWorkspaceRows(workspaces []database.Workspace, count int64) []database.GetWorkspacesRow {
rows := make([]database.GetWorkspacesRow, len(workspaces))
for i, w := range workspaces {
rows[i] = database.GetWorkspacesRow{
func (q *fakeQuerier) convertToWorkspaceRowsNoLock(ctx context.Context, workspaces []database.Workspace, count int64) []database.GetWorkspacesRow {
rows := make([]database.GetWorkspacesRow, 0, len(workspaces))
for _, w := range workspaces {
wr := database.GetWorkspacesRow{
ID: w.ID,
CreatedAt: w.CreatedAt,
UpdatedAt: w.UpdatedAt,
@ -666,6 +666,28 @@ func convertToWorkspaceRows(workspaces []database.Workspace, count int64) []data
LastUsedAt: w.LastUsedAt,
Count: count,
}
for _, t := range q.templates {
if t.ID == w.TemplateID {
wr.TemplateName = t.Name
break
}
}
if build, err := q.getLatestWorkspaceBuildByWorkspaceIDNoLock(ctx, w.ID); err == nil {
for _, tv := range q.templateVersions {
if tv.ID == build.TemplateVersionID {
wr.TemplateVersionID = tv.ID
wr.TemplateVersionName = sql.NullString{
Valid: true,
String: tv.Name,
}
break
}
}
}
rows = append(rows, wr)
}
return rows
}

View File

@ -236,6 +236,9 @@ func (q *sqlQuerier) GetAuthorizedWorkspaces(ctx context.Context, arg GetWorkspa
&i.Ttl,
&i.LastUsedAt,
&i.LockedAt,
&i.TemplateName,
&i.TemplateVersionID,
&i.TemplateVersionName,
&i.Count,
); err != nil {
return nil, err

View File

@ -8403,7 +8403,11 @@ func (q *sqlQuerier) GetWorkspaceByWorkspaceAppID(ctx context.Context, workspace
const getWorkspaces = `-- name: GetWorkspaces :many
SELECT
workspaces.id, workspaces.created_at, workspaces.updated_at, workspaces.owner_id, workspaces.organization_id, workspaces.template_id, workspaces.deleted, workspaces.name, workspaces.autostart_schedule, workspaces.ttl, workspaces.last_used_at, workspaces.locked_at, COUNT(*) OVER () as count
workspaces.id, workspaces.created_at, workspaces.updated_at, workspaces.owner_id, workspaces.organization_id, workspaces.template_id, workspaces.deleted, workspaces.name, workspaces.autostart_schedule, workspaces.ttl, workspaces.last_used_at, workspaces.locked_at,
COALESCE(template_name.template_name, 'unknown') as template_name,
latest_build.template_version_id,
latest_build.template_version_name,
COUNT(*) OVER () as count
FROM
workspaces
JOIN
@ -8413,6 +8417,8 @@ ON
LEFT JOIN LATERAL (
SELECT
workspace_builds.transition,
workspace_builds.template_version_id,
template_versions.name AS template_version_name,
provisioner_jobs.id AS provisioner_job_id,
provisioner_jobs.started_at,
provisioner_jobs.updated_at,
@ -8425,6 +8431,10 @@ LEFT JOIN LATERAL (
provisioner_jobs
ON
provisioner_jobs.id = workspace_builds.job_id
LEFT JOIN
template_versions
ON
template_versions.id = workspace_builds.template_version_id
WHERE
workspace_builds.workspace_id = workspaces.id
ORDER BY
@ -8432,6 +8442,14 @@ LEFT JOIN LATERAL (
LIMIT
1
) latest_build ON TRUE
LEFT JOIN LATERAL (
SELECT
templates.name AS template_name
FROM
templates
WHERE
templates.id = workspaces.template_id
) template_name ON true
WHERE
-- Optionally include deleted workspaces
workspaces.deleted = $1
@ -8503,13 +8521,13 @@ WHERE
-- Filter by owner_id
AND CASE
WHEN $3 :: uuid != '00000000-0000-0000-0000-000000000000'::uuid THEN
owner_id = $3
workspaces.owner_id = $3
ELSE true
END
-- Filter by owner_name
AND CASE
WHEN $4 :: text != '' THEN
owner_id = (SELECT id FROM users WHERE lower(username) = lower($4) AND deleted = false)
workspaces.owner_id = (SELECT id FROM users WHERE lower(username) = lower($4) AND deleted = false)
ELSE true
END
-- Filter by template_name
@ -8517,19 +8535,19 @@ WHERE
-- Use the organization filter to restrict to 1 org if needed.
AND CASE
WHEN $5 :: text != '' THEN
template_id = ANY(SELECT id FROM templates WHERE lower(name) = lower($5) AND deleted = false)
workspaces.template_id = ANY(SELECT id FROM templates WHERE lower(name) = lower($5) AND deleted = false)
ELSE true
END
-- Filter by template_ids
AND CASE
WHEN array_length($6 :: uuid[], 1) > 0 THEN
template_id = ANY($6)
workspaces.template_id = ANY($6)
ELSE true
END
-- Filter by name, matching on substring
AND CASE
WHEN $7 :: text != '' THEN
name ILIKE '%' || $7 || '%'
workspaces.name ILIKE '%' || $7 || '%'
ELSE true
END
-- Filter by agent status
@ -8577,7 +8595,7 @@ ORDER BY
latest_build.error IS NULL AND
latest_build.transition = 'start'::workspace_transition) DESC,
LOWER(users.username) ASC,
LOWER(name) ASC
LOWER(workspaces.name) ASC
LIMIT
CASE
WHEN $11 :: integer > 0 THEN
@ -8602,19 +8620,22 @@ type GetWorkspacesParams struct {
}
type GetWorkspacesRow struct {
ID uuid.UUID `db:"id" json:"id"`
CreatedAt time.Time `db:"created_at" json:"created_at"`
UpdatedAt time.Time `db:"updated_at" json:"updated_at"`
OwnerID uuid.UUID `db:"owner_id" json:"owner_id"`
OrganizationID uuid.UUID `db:"organization_id" json:"organization_id"`
TemplateID uuid.UUID `db:"template_id" json:"template_id"`
Deleted bool `db:"deleted" json:"deleted"`
Name string `db:"name" json:"name"`
AutostartSchedule sql.NullString `db:"autostart_schedule" json:"autostart_schedule"`
Ttl sql.NullInt64 `db:"ttl" json:"ttl"`
LastUsedAt time.Time `db:"last_used_at" json:"last_used_at"`
LockedAt sql.NullTime `db:"locked_at" json:"locked_at"`
Count int64 `db:"count" json:"count"`
ID uuid.UUID `db:"id" json:"id"`
CreatedAt time.Time `db:"created_at" json:"created_at"`
UpdatedAt time.Time `db:"updated_at" json:"updated_at"`
OwnerID uuid.UUID `db:"owner_id" json:"owner_id"`
OrganizationID uuid.UUID `db:"organization_id" json:"organization_id"`
TemplateID uuid.UUID `db:"template_id" json:"template_id"`
Deleted bool `db:"deleted" json:"deleted"`
Name string `db:"name" json:"name"`
AutostartSchedule sql.NullString `db:"autostart_schedule" json:"autostart_schedule"`
Ttl sql.NullInt64 `db:"ttl" json:"ttl"`
LastUsedAt time.Time `db:"last_used_at" json:"last_used_at"`
LockedAt sql.NullTime `db:"locked_at" json:"locked_at"`
TemplateName string `db:"template_name" json:"template_name"`
TemplateVersionID uuid.UUID `db:"template_version_id" json:"template_version_id"`
TemplateVersionName sql.NullString `db:"template_version_name" json:"template_version_name"`
Count int64 `db:"count" json:"count"`
}
func (q *sqlQuerier) GetWorkspaces(ctx context.Context, arg GetWorkspacesParams) ([]GetWorkspacesRow, error) {
@ -8651,6 +8672,9 @@ func (q *sqlQuerier) GetWorkspaces(ctx context.Context, arg GetWorkspacesParams)
&i.Ttl,
&i.LastUsedAt,
&i.LockedAt,
&i.TemplateName,
&i.TemplateVersionID,
&i.TemplateVersionName,
&i.Count,
); err != nil {
return nil, err

View File

@ -75,7 +75,11 @@ WHERE
-- name: GetWorkspaces :many
SELECT
workspaces.*, COUNT(*) OVER () as count
workspaces.*,
COALESCE(template_name.template_name, 'unknown') as template_name,
latest_build.template_version_id,
latest_build.template_version_name,
COUNT(*) OVER () as count
FROM
workspaces
JOIN
@ -85,6 +89,8 @@ ON
LEFT JOIN LATERAL (
SELECT
workspace_builds.transition,
workspace_builds.template_version_id,
template_versions.name AS template_version_name,
provisioner_jobs.id AS provisioner_job_id,
provisioner_jobs.started_at,
provisioner_jobs.updated_at,
@ -97,6 +103,10 @@ LEFT JOIN LATERAL (
provisioner_jobs
ON
provisioner_jobs.id = workspace_builds.job_id
LEFT JOIN
template_versions
ON
template_versions.id = workspace_builds.template_version_id
WHERE
workspace_builds.workspace_id = workspaces.id
ORDER BY
@ -104,6 +114,14 @@ LEFT JOIN LATERAL (
LIMIT
1
) latest_build ON TRUE
LEFT JOIN LATERAL (
SELECT
templates.name AS template_name
FROM
templates
WHERE
templates.id = workspaces.template_id
) template_name ON true
WHERE
-- Optionally include deleted workspaces
workspaces.deleted = @deleted
@ -175,13 +193,13 @@ WHERE
-- Filter by owner_id
AND CASE
WHEN @owner_id :: uuid != '00000000-0000-0000-0000-000000000000'::uuid THEN
owner_id = @owner_id
workspaces.owner_id = @owner_id
ELSE true
END
-- Filter by owner_name
AND CASE
WHEN @owner_username :: text != '' THEN
owner_id = (SELECT id FROM users WHERE lower(username) = lower(@owner_username) AND deleted = false)
workspaces.owner_id = (SELECT id FROM users WHERE lower(username) = lower(@owner_username) AND deleted = false)
ELSE true
END
-- Filter by template_name
@ -189,19 +207,19 @@ WHERE
-- Use the organization filter to restrict to 1 org if needed.
AND CASE
WHEN @template_name :: text != '' THEN
template_id = ANY(SELECT id FROM templates WHERE lower(name) = lower(@template_name) AND deleted = false)
workspaces.template_id = ANY(SELECT id FROM templates WHERE lower(name) = lower(@template_name) AND deleted = false)
ELSE true
END
-- Filter by template_ids
AND CASE
WHEN array_length(@template_ids :: uuid[], 1) > 0 THEN
template_id = ANY(@template_ids)
workspaces.template_id = ANY(@template_ids)
ELSE true
END
-- Filter by name, matching on substring
AND CASE
WHEN @name :: text != '' THEN
name ILIKE '%' || @name || '%'
workspaces.name ILIKE '%' || @name || '%'
ELSE true
END
-- Filter by agent status
@ -249,7 +267,7 @@ ORDER BY
latest_build.error IS NULL AND
latest_build.transition = 'start'::workspace_transition) DESC,
LOWER(users.username) ASC,
LOWER(name) ASC
LOWER(workspaces.name) ASC
LIMIT
CASE
WHEN @limit_ :: integer > 0 THEN

View File

@ -15,7 +15,6 @@ import (
"tailscale.com/tailcfg"
"cdr.dev/slog"
"github.com/coder/coder/coderd/database"
"github.com/coder/coder/coderd/database/db2sdk"
"github.com/coder/coder/coderd/database/dbauthz"
@ -153,7 +152,7 @@ func Agents(ctx context.Context, logger slog.Logger, registerer prometheus.Regis
Subsystem: "agents",
Name: "up",
Help: "The number of active agents per workspace.",
}, []string{usernameLabel, workspaceNameLabel}))
}, []string{usernameLabel, workspaceNameLabel, "template_name", "template_version"}))
err := registerer.Register(agentsGauge)
if err != nil {
return nil, err
@ -234,29 +233,35 @@ func Agents(ctx context.Context, logger slog.Logger, registerer prometheus.Regis
}
for _, workspace := range workspaceRows {
templateName := workspace.TemplateName
templateVersionName := workspace.TemplateVersionName.String
if !workspace.TemplateVersionName.Valid {
templateVersionName = "unknown"
}
user, err := db.GetUserByID(ctx, workspace.OwnerID)
if err != nil {
logger.Error(ctx, "can't get user from the database", slog.F("user_id", workspace.OwnerID), slog.Error(err))
agentsGauge.WithLabelValues(VectorOperationAdd, 0, user.Username, workspace.Name)
agentsGauge.WithLabelValues(VectorOperationAdd, 0, user.Username, workspace.Name, templateName, templateVersionName)
continue
}
agents, err := db.GetWorkspaceAgentsInLatestBuildByWorkspaceID(ctx, workspace.ID)
if err != nil {
logger.Error(ctx, "can't get workspace agents", slog.F("workspace_id", workspace.ID), slog.Error(err))
agentsGauge.WithLabelValues(VectorOperationAdd, 0, user.Username, workspace.Name)
agentsGauge.WithLabelValues(VectorOperationAdd, 0, user.Username, workspace.Name, templateName, templateVersionName)
continue
}
if len(agents) == 0 {
logger.Debug(ctx, "workspace agents are unavailable", slog.F("workspace_id", workspace.ID))
agentsGauge.WithLabelValues(VectorOperationAdd, 0, user.Username, workspace.Name)
agentsGauge.WithLabelValues(VectorOperationAdd, 0, user.Username, workspace.Name, templateName, templateVersionName)
continue
}
for _, agent := range agents {
// Collect information about agents
agentsGauge.WithLabelValues(VectorOperationAdd, 1, user.Username, workspace.Name)
agentsGauge.WithLabelValues(VectorOperationAdd, 1, user.Username, workspace.Name, templateName, templateVersionName)
connectionStatus := agent.Status(agentInactiveDisconnectTimeout)
node := (*coordinator.Load()).Node(agent.ID)

View File

@ -312,7 +312,7 @@ func TestAgents(t *testing.T) {
// when
closeFunc, err := prometheusmetrics.Agents(ctx, slogtest.Make(t, &slogtest.Options{
IgnoreErrors: true,
}), registry, db, &coordinatorPtr, derpMap, agentInactiveDisconnectTimeout, time.Millisecond)
}), registry, db, &coordinatorPtr, derpMap, agentInactiveDisconnectTimeout, 50*time.Millisecond)
require.NoError(t, err)
t.Cleanup(closeFunc)
@ -332,8 +332,10 @@ func TestAgents(t *testing.T) {
for _, metric := range metrics {
switch metric.GetName() {
case "coderd_agents_up":
assert.Equal(t, "testuser", metric.Metric[0].Label[0].GetValue()) // Username
assert.Equal(t, workspace.Name, metric.Metric[0].Label[1].GetValue()) // Workspace name
assert.Equal(t, template.Name, metric.Metric[0].Label[0].GetValue()) // Template name
assert.Equal(t, version.Name, metric.Metric[0].Label[1].GetValue()) // Template version name
assert.Equal(t, "testuser", metric.Metric[0].Label[2].GetValue()) // Username
assert.Equal(t, workspace.Name, metric.Metric[0].Label[3].GetValue()) // Workspace name
assert.Equal(t, 1, int(metric.Metric[0].Gauge.GetValue())) // Metric value
agentsUp = true
case "coderd_agents_connections":