feat: add metrics to workspace agent scripts (#11132)

* push startup script metrics to agent
This commit is contained in:
Steven Masley 2023-12-13 11:45:43 -06:00 committed by GitHub
parent 41ed581460
commit b7bdb17460
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
20 changed files with 306 additions and 127 deletions

View File

@ -35,6 +35,8 @@ import (
"tailscale.com/types/netlogtype"
"cdr.dev/slog"
"github.com/coder/retry"
"github.com/coder/coder/v2/agent/agentproc"
"github.com/coder/coder/v2/agent/agentscripts"
"github.com/coder/coder/v2/agent/agentssh"
@ -45,7 +47,6 @@ import (
"github.com/coder/coder/v2/codersdk"
"github.com/coder/coder/v2/codersdk/agentsdk"
"github.com/coder/coder/v2/tailnet"
"github.com/coder/retry"
)
const (
@ -222,8 +223,10 @@ type agent struct {
connCountReconnectingPTY atomic.Int64
prometheusRegistry *prometheus.Registry
metrics *agentMetrics
syscaller agentproc.Syscaller
// metrics are prometheus registered metrics that will be collected and
// labeled in Coder with the agent + workspace.
metrics *agentMetrics
syscaller agentproc.Syscaller
// modifiedProcs is used for testing process priority management.
modifiedProcs chan []*agentproc.Process
@ -252,6 +255,9 @@ func (a *agent) init(ctx context.Context) {
Filesystem: a.filesystem,
PatchLogs: a.client.PatchLogs,
})
// Register runner metrics. If the prom registry is nil, the metrics
// will not report anywhere.
a.scriptRunner.RegisterMetrics(a.prometheusRegistry)
go a.runLoop(ctx)
}
@ -745,9 +751,12 @@ func (a *agent) run(ctx context.Context) error {
return xerrors.Errorf("init script runner: %w", err)
}
err = a.trackConnGoroutine(func() {
start := time.Now()
err := a.scriptRunner.Execute(ctx, func(script codersdk.WorkspaceAgentScript) bool {
return script.RunOnStart
})
// Measure the time immediately after the script has finished
dur := time.Since(start).Seconds()
if err != nil {
a.logger.Warn(ctx, "startup script(s) failed", slog.Error(err))
if errors.Is(err, agentscripts.ErrTimeout) {
@ -758,6 +767,12 @@ func (a *agent) run(ctx context.Context) error {
} else {
a.setLifecycle(ctx, codersdk.WorkspaceAgentLifecycleReady)
}
label := "false"
if err == nil {
label = "true"
}
a.metrics.startupScriptSeconds.WithLabelValues(label).Set(dur)
a.scriptRunner.StartCron()
})
if err != nil {

View File

@ -46,6 +46,7 @@ import (
"cdr.dev/slog"
"cdr.dev/slog/sloggers/sloghuman"
"cdr.dev/slog/sloggers/slogtest"
"github.com/coder/coder/v2/agent"
"github.com/coder/coder/v2/agent/agentproc"
"github.com/coder/coder/v2/agent/agentproc/agentproctest"
@ -2235,6 +2236,17 @@ func TestAgent_Metrics_SSH(t *testing.T) {
Type: agentsdk.AgentMetricTypeCounter,
Value: 0,
},
{
Name: "coderd_agentstats_startup_script_seconds",
Type: agentsdk.AgentMetricTypeGauge,
Value: 0,
Labels: []agentsdk.AgentMetricLabel{
{
Name: "success",
Value: "true",
},
},
},
}
var actual []*promgo.MetricFamily

View File

@ -13,12 +13,14 @@ import (
"sync/atomic"
"time"
"github.com/prometheus/client_golang/prometheus"
"github.com/robfig/cron/v3"
"github.com/spf13/afero"
"golang.org/x/sync/errgroup"
"golang.org/x/xerrors"
"cdr.dev/slog"
"github.com/coder/coder/v2/agent/agentssh"
"github.com/coder/coder/v2/codersdk"
"github.com/coder/coder/v2/codersdk/agentsdk"
@ -57,6 +59,11 @@ func New(opts Options) *Runner {
cronCtxCancel: cronCtxCancel,
cron: cron.New(cron.WithParser(parser)),
closed: make(chan struct{}),
scriptsExecuted: prometheus.NewCounterVec(prometheus.CounterOpts{
Namespace: "agent",
Subsystem: "scripts",
Name: "executed_total",
}, []string{"success"}),
}
}
@ -71,6 +78,19 @@ type Runner struct {
cron *cron.Cron
initialized atomic.Bool
scripts []codersdk.WorkspaceAgentScript
// scriptsExecuted includes all scripts executed by the workspace agent. Agents
// execute startup scripts, and scripts on a cron schedule. Both will increment
// this counter.
scriptsExecuted *prometheus.CounterVec
}
func (r *Runner) RegisterMetrics(reg prometheus.Registerer) {
if reg == nil {
// If no registry, do nothing.
return
}
reg.MustRegister(r.scriptsExecuted)
}
// Init initializes the runner with the provided scripts.
@ -90,7 +110,7 @@ func (r *Runner) Init(scripts []codersdk.WorkspaceAgentScript) error {
}
script := script
_, err := r.cron.AddFunc(script.Cron, func() {
err := r.run(r.cronCtx, script)
err := r.trackRun(r.cronCtx, script)
if err != nil {
r.Logger.Warn(context.Background(), "run agent script on schedule", slog.Error(err))
}
@ -131,7 +151,7 @@ func (r *Runner) Execute(ctx context.Context, filter func(script codersdk.Worksp
}
script := script
eg.Go(func() error {
err := r.run(ctx, script)
err := r.trackRun(ctx, script)
if err != nil {
return xerrors.Errorf("run agent script %q: %w", script.LogSourceID, err)
}
@ -141,6 +161,17 @@ func (r *Runner) Execute(ctx context.Context, filter func(script codersdk.Worksp
return eg.Wait()
}
// trackRun wraps "run" with metrics.
func (r *Runner) trackRun(ctx context.Context, script codersdk.WorkspaceAgentScript) error {
err := r.run(ctx, script)
if err != nil {
r.scriptsExecuted.WithLabelValues("false").Add(1)
} else {
r.scriptsExecuted.WithLabelValues("true").Add(1)
}
return err
}
// run executes the provided script with the timeout.
// If the timeout is exceeded, the process is sent an interrupt signal.
// If the process does not exit after a few seconds, it is forcefully killed.

View File

@ -17,6 +17,9 @@ import (
type agentMetrics struct {
connectionsTotal prometheus.Counter
reconnectingPTYErrors *prometheus.CounterVec
// startupScriptSeconds is the time in seconds that the start script(s)
// took to run. This is reported once per agent.
startupScriptSeconds *prometheus.GaugeVec
}
func newAgentMetrics(registerer prometheus.Registerer) *agentMetrics {
@ -35,9 +38,18 @@ func newAgentMetrics(registerer prometheus.Registerer) *agentMetrics {
)
registerer.MustRegister(reconnectingPTYErrors)
startupScriptSeconds := prometheus.NewGaugeVec(prometheus.GaugeOpts{
Namespace: "coderd",
Subsystem: "agentstats",
Name: "startup_script_seconds",
Help: "Amount of time taken to run the startup script in seconds.",
}, []string{"success"})
registerer.MustRegister(startupScriptSeconds)
return &agentMetrics{
connectionsTotal: connectionsTotal,
reconnectingPTYErrors: reconnectingPTYErrors,
startupScriptSeconds: startupScriptSeconds,
}
}

View File

@ -38,8 +38,10 @@ import (
_ "github.com/coder/coder/v2/coderd/apidoc"
"github.com/coder/coder/v2/coderd/externalauth"
"github.com/coder/coder/v2/coderd/healthcheck/derphealth"
"github.com/coder/coder/v2/coderd/prometheusmetrics"
"cdr.dev/slog"
"github.com/coder/coder/v2/buildinfo"
"github.com/coder/coder/v2/cli/clibase"
"github.com/coder/coder/v2/coderd/audit"
@ -168,7 +170,7 @@ type Options struct {
HTTPClient *http.Client
UpdateAgentMetrics func(ctx context.Context, username, workspaceName, agentName string, metrics []agentsdk.AgentMetric)
UpdateAgentMetrics func(ctx context.Context, labels prometheusmetrics.AgentMetricLabels, metrics []agentsdk.AgentMetric)
StatsBatcher *batchstats.Batcher
WorkspaceAppsStatsCollectorOptions workspaceapps.StatsCollectorOptions

View File

@ -1918,7 +1918,7 @@ func (q *querier) GetWorkspaceBuildsCreatedAfter(ctx context.Context, createdAt
return q.db.GetWorkspaceBuildsCreatedAfter(ctx, createdAt)
}
func (q *querier) GetWorkspaceByAgentID(ctx context.Context, agentID uuid.UUID) (database.Workspace, error) {
func (q *querier) GetWorkspaceByAgentID(ctx context.Context, agentID uuid.UUID) (database.GetWorkspaceByAgentIDRow, error) {
return fetch(q.log, q.auth, q.db.GetWorkspaceByAgentID)(ctx, agentID)
}

View File

@ -1065,21 +1065,30 @@ func (s *MethodTestSuite) TestWorkspace() {
check.Args(ws.ID).Asserts(ws, rbac.ActionRead).Returns(b)
}))
s.Run("GetWorkspaceAgentByID", s.Subtest(func(db database.Store, check *expects) {
ws := dbgen.Workspace(s.T(), db, database.Workspace{})
tpl := dbgen.Template(s.T(), db, database.Template{})
ws := dbgen.Workspace(s.T(), db, database.Workspace{
TemplateID: tpl.ID,
})
build := dbgen.WorkspaceBuild(s.T(), db, database.WorkspaceBuild{WorkspaceID: ws.ID, JobID: uuid.New()})
res := dbgen.WorkspaceResource(s.T(), db, database.WorkspaceResource{JobID: build.JobID})
agt := dbgen.WorkspaceAgent(s.T(), db, database.WorkspaceAgent{ResourceID: res.ID})
check.Args(agt.ID).Asserts(ws, rbac.ActionRead).Returns(agt)
}))
s.Run("GetWorkspaceAgentByInstanceID", s.Subtest(func(db database.Store, check *expects) {
ws := dbgen.Workspace(s.T(), db, database.Workspace{})
tpl := dbgen.Template(s.T(), db, database.Template{})
ws := dbgen.Workspace(s.T(), db, database.Workspace{
TemplateID: tpl.ID,
})
build := dbgen.WorkspaceBuild(s.T(), db, database.WorkspaceBuild{WorkspaceID: ws.ID, JobID: uuid.New()})
res := dbgen.WorkspaceResource(s.T(), db, database.WorkspaceResource{JobID: build.JobID})
agt := dbgen.WorkspaceAgent(s.T(), db, database.WorkspaceAgent{ResourceID: res.ID})
check.Args(agt.AuthInstanceID.String).Asserts(ws, rbac.ActionRead).Returns(agt)
}))
s.Run("UpdateWorkspaceAgentLifecycleStateByID", s.Subtest(func(db database.Store, check *expects) {
ws := dbgen.Workspace(s.T(), db, database.Workspace{})
tpl := dbgen.Template(s.T(), db, database.Template{})
ws := dbgen.Workspace(s.T(), db, database.Workspace{
TemplateID: tpl.ID,
})
build := dbgen.WorkspaceBuild(s.T(), db, database.WorkspaceBuild{WorkspaceID: ws.ID, JobID: uuid.New()})
res := dbgen.WorkspaceResource(s.T(), db, database.WorkspaceResource{JobID: build.JobID})
agt := dbgen.WorkspaceAgent(s.T(), db, database.WorkspaceAgent{ResourceID: res.ID})
@ -1089,7 +1098,10 @@ func (s *MethodTestSuite) TestWorkspace() {
}).Asserts(ws, rbac.ActionUpdate).Returns()
}))
s.Run("UpdateWorkspaceAgentLogOverflowByID", s.Subtest(func(db database.Store, check *expects) {
ws := dbgen.Workspace(s.T(), db, database.Workspace{})
tpl := dbgen.Template(s.T(), db, database.Template{})
ws := dbgen.Workspace(s.T(), db, database.Workspace{
TemplateID: tpl.ID,
})
build := dbgen.WorkspaceBuild(s.T(), db, database.WorkspaceBuild{WorkspaceID: ws.ID, JobID: uuid.New()})
res := dbgen.WorkspaceResource(s.T(), db, database.WorkspaceResource{JobID: build.JobID})
agt := dbgen.WorkspaceAgent(s.T(), db, database.WorkspaceAgent{ResourceID: res.ID})
@ -1099,7 +1111,10 @@ func (s *MethodTestSuite) TestWorkspace() {
}).Asserts(ws, rbac.ActionUpdate).Returns()
}))
s.Run("UpdateWorkspaceAgentStartupByID", s.Subtest(func(db database.Store, check *expects) {
ws := dbgen.Workspace(s.T(), db, database.Workspace{})
tpl := dbgen.Template(s.T(), db, database.Template{})
ws := dbgen.Workspace(s.T(), db, database.Workspace{
TemplateID: tpl.ID,
})
build := dbgen.WorkspaceBuild(s.T(), db, database.WorkspaceBuild{WorkspaceID: ws.ID, JobID: uuid.New()})
res := dbgen.WorkspaceResource(s.T(), db, database.WorkspaceResource{JobID: build.JobID})
agt := dbgen.WorkspaceAgent(s.T(), db, database.WorkspaceAgent{ResourceID: res.ID})
@ -1111,7 +1126,10 @@ func (s *MethodTestSuite) TestWorkspace() {
}).Asserts(ws, rbac.ActionUpdate).Returns()
}))
s.Run("GetWorkspaceAgentLogsAfter", s.Subtest(func(db database.Store, check *expects) {
ws := dbgen.Workspace(s.T(), db, database.Workspace{})
tpl := dbgen.Template(s.T(), db, database.Template{})
ws := dbgen.Workspace(s.T(), db, database.Workspace{
TemplateID: tpl.ID,
})
build := dbgen.WorkspaceBuild(s.T(), db, database.WorkspaceBuild{WorkspaceID: ws.ID, JobID: uuid.New()})
res := dbgen.WorkspaceResource(s.T(), db, database.WorkspaceResource{JobID: build.JobID})
agt := dbgen.WorkspaceAgent(s.T(), db, database.WorkspaceAgent{ResourceID: res.ID})
@ -1120,7 +1138,10 @@ func (s *MethodTestSuite) TestWorkspace() {
}).Asserts(ws, rbac.ActionRead).Returns([]database.WorkspaceAgentLog{})
}))
s.Run("GetWorkspaceAppByAgentIDAndSlug", s.Subtest(func(db database.Store, check *expects) {
ws := dbgen.Workspace(s.T(), db, database.Workspace{})
tpl := dbgen.Template(s.T(), db, database.Template{})
ws := dbgen.Workspace(s.T(), db, database.Workspace{
TemplateID: tpl.ID,
})
build := dbgen.WorkspaceBuild(s.T(), db, database.WorkspaceBuild{WorkspaceID: ws.ID, JobID: uuid.New()})
res := dbgen.WorkspaceResource(s.T(), db, database.WorkspaceResource{JobID: build.JobID})
agt := dbgen.WorkspaceAgent(s.T(), db, database.WorkspaceAgent{ResourceID: res.ID})
@ -1132,7 +1153,10 @@ func (s *MethodTestSuite) TestWorkspace() {
}).Asserts(ws, rbac.ActionRead).Returns(app)
}))
s.Run("GetWorkspaceAppsByAgentID", s.Subtest(func(db database.Store, check *expects) {
ws := dbgen.Workspace(s.T(), db, database.Workspace{})
tpl := dbgen.Template(s.T(), db, database.Template{})
ws := dbgen.Workspace(s.T(), db, database.Workspace{
TemplateID: tpl.ID,
})
build := dbgen.WorkspaceBuild(s.T(), db, database.WorkspaceBuild{WorkspaceID: ws.ID, JobID: uuid.New()})
res := dbgen.WorkspaceResource(s.T(), db, database.WorkspaceResource{JobID: build.JobID})
agt := dbgen.WorkspaceAgent(s.T(), db, database.WorkspaceAgent{ResourceID: res.ID})
@ -1173,11 +1197,17 @@ func (s *MethodTestSuite) TestWorkspace() {
check.Args(database.GetWorkspaceBuildsByWorkspaceIDParams{WorkspaceID: ws.ID}).Asserts(ws, rbac.ActionRead) // ordering
}))
s.Run("GetWorkspaceByAgentID", s.Subtest(func(db database.Store, check *expects) {
ws := dbgen.Workspace(s.T(), db, database.Workspace{})
tpl := dbgen.Template(s.T(), db, database.Template{})
ws := dbgen.Workspace(s.T(), db, database.Workspace{
TemplateID: tpl.ID,
})
build := dbgen.WorkspaceBuild(s.T(), db, database.WorkspaceBuild{WorkspaceID: ws.ID, JobID: uuid.New()})
res := dbgen.WorkspaceResource(s.T(), db, database.WorkspaceResource{JobID: build.JobID})
agt := dbgen.WorkspaceAgent(s.T(), db, database.WorkspaceAgent{ResourceID: res.ID})
check.Args(agt.ID).Asserts(ws, rbac.ActionRead).Returns(ws)
check.Args(agt.ID).Asserts(ws, rbac.ActionRead).Returns(database.GetWorkspaceByAgentIDRow{
Workspace: ws,
TemplateName: tpl.Name,
})
}))
s.Run("GetWorkspaceByOwnerIDAndName", s.Subtest(func(db database.Store, check *expects) {
ws := dbgen.Workspace(s.T(), db, database.Workspace{})

View File

@ -4293,11 +4293,24 @@ func (q *FakeQuerier) GetWorkspaceBuildsCreatedAfter(_ context.Context, after ti
return workspaceBuilds, nil
}
func (q *FakeQuerier) GetWorkspaceByAgentID(ctx context.Context, agentID uuid.UUID) (database.Workspace, error) {
func (q *FakeQuerier) GetWorkspaceByAgentID(ctx context.Context, agentID uuid.UUID) (database.GetWorkspaceByAgentIDRow, error) {
q.mutex.RLock()
defer q.mutex.RUnlock()
return q.getWorkspaceByAgentIDNoLock(ctx, agentID)
w, err := q.getWorkspaceByAgentIDNoLock(ctx, agentID)
if err != nil {
return database.GetWorkspaceByAgentIDRow{}, err
}
tpl, err := q.getTemplateByIDNoLock(ctx, w.TemplateID)
if err != nil {
return database.GetWorkspaceByAgentIDRow{}, err
}
return database.GetWorkspaceByAgentIDRow{
Workspace: w,
TemplateName: tpl.Name,
}, nil
}
func (q *FakeQuerier) GetWorkspaceByID(ctx context.Context, id uuid.UUID) (database.Workspace, error) {

View File

@ -1124,7 +1124,7 @@ func (m metricsStore) GetWorkspaceBuildsCreatedAfter(ctx context.Context, create
return builds, err
}
func (m metricsStore) GetWorkspaceByAgentID(ctx context.Context, agentID uuid.UUID) (database.Workspace, error) {
func (m metricsStore) GetWorkspaceByAgentID(ctx context.Context, agentID uuid.UUID) (database.GetWorkspaceByAgentIDRow, error) {
start := time.Now()
workspace, err := m.s.GetWorkspaceByAgentID(ctx, agentID)
m.queryLatencies.WithLabelValues("GetWorkspaceByAgentID").Observe(time.Since(start).Seconds())

View File

@ -2344,10 +2344,10 @@ func (mr *MockStoreMockRecorder) GetWorkspaceBuildsCreatedAfter(arg0, arg1 inter
}
// GetWorkspaceByAgentID mocks base method.
func (m *MockStore) GetWorkspaceByAgentID(arg0 context.Context, arg1 uuid.UUID) (database.Workspace, error) {
func (m *MockStore) GetWorkspaceByAgentID(arg0 context.Context, arg1 uuid.UUID) (database.GetWorkspaceByAgentIDRow, error) {
m.ctrl.T.Helper()
ret := m.ctrl.Call(m, "GetWorkspaceByAgentID", arg0, arg1)
ret0, _ := ret[0].(database.Workspace)
ret0, _ := ret[0].(database.GetWorkspaceByAgentIDRow)
ret1, _ := ret[1].(error)
return ret0, ret1
}

View File

@ -148,6 +148,10 @@ func (g Group) RBACObject() rbac.Object {
InOrg(g.OrganizationID)
}
func (w GetWorkspaceByAgentIDRow) RBACObject() rbac.Object {
return w.Workspace.RBACObject()
}
func (w Workspace) RBACObject() rbac.Object {
return rbac.ResourceWorkspace.WithID(w.ID).
InOrg(w.OrganizationID).

View File

@ -231,7 +231,7 @@ type sqlcQuerier interface {
GetWorkspaceBuildParameters(ctx context.Context, workspaceBuildID uuid.UUID) ([]WorkspaceBuildParameter, error)
GetWorkspaceBuildsByWorkspaceID(ctx context.Context, arg GetWorkspaceBuildsByWorkspaceIDParams) ([]WorkspaceBuild, error)
GetWorkspaceBuildsCreatedAfter(ctx context.Context, createdAt time.Time) ([]WorkspaceBuild, error)
GetWorkspaceByAgentID(ctx context.Context, agentID uuid.UUID) (Workspace, error)
GetWorkspaceByAgentID(ctx context.Context, agentID uuid.UUID) (GetWorkspaceByAgentIDRow, error)
GetWorkspaceByID(ctx context.Context, id uuid.UUID) (Workspace, error)
GetWorkspaceByOwnerIDAndName(ctx context.Context, arg GetWorkspaceByOwnerIDAndNameParams) (Workspace, error)
GetWorkspaceByWorkspaceAppID(ctx context.Context, workspaceAppID uuid.UUID) (Workspace, error)

View File

@ -10539,9 +10539,12 @@ func (q *sqlQuerier) GetDeploymentWorkspaceStats(ctx context.Context) (GetDeploy
const getWorkspaceByAgentID = `-- name: GetWorkspaceByAgentID :one
SELECT
id, created_at, updated_at, owner_id, organization_id, template_id, deleted, name, autostart_schedule, ttl, last_used_at, dormant_at, deleting_at, automatic_updates
workspaces.id, workspaces.created_at, workspaces.updated_at, workspaces.owner_id, workspaces.organization_id, workspaces.template_id, workspaces.deleted, workspaces.name, workspaces.autostart_schedule, workspaces.ttl, workspaces.last_used_at, workspaces.dormant_at, workspaces.deleting_at, workspaces.automatic_updates,
templates.name as template_name
FROM
workspaces
INNER JOIN
templates ON workspaces.template_id = templates.id
WHERE
workspaces.id = (
SELECT
@ -10567,24 +10570,30 @@ WHERE
)
`
func (q *sqlQuerier) GetWorkspaceByAgentID(ctx context.Context, agentID uuid.UUID) (Workspace, error) {
type GetWorkspaceByAgentIDRow struct {
Workspace Workspace `db:"workspace" json:"workspace"`
TemplateName string `db:"template_name" json:"template_name"`
}
func (q *sqlQuerier) GetWorkspaceByAgentID(ctx context.Context, agentID uuid.UUID) (GetWorkspaceByAgentIDRow, error) {
row := q.db.QueryRowContext(ctx, getWorkspaceByAgentID, agentID)
var i Workspace
var i GetWorkspaceByAgentIDRow
err := row.Scan(
&i.ID,
&i.CreatedAt,
&i.UpdatedAt,
&i.OwnerID,
&i.OrganizationID,
&i.TemplateID,
&i.Deleted,
&i.Name,
&i.AutostartSchedule,
&i.Ttl,
&i.LastUsedAt,
&i.DormantAt,
&i.DeletingAt,
&i.AutomaticUpdates,
&i.Workspace.ID,
&i.Workspace.CreatedAt,
&i.Workspace.UpdatedAt,
&i.Workspace.OwnerID,
&i.Workspace.OrganizationID,
&i.Workspace.TemplateID,
&i.Workspace.Deleted,
&i.Workspace.Name,
&i.Workspace.AutostartSchedule,
&i.Workspace.Ttl,
&i.Workspace.LastUsedAt,
&i.Workspace.DormantAt,
&i.Workspace.DeletingAt,
&i.Workspace.AutomaticUpdates,
&i.TemplateName,
)
return i, err
}

View File

@ -46,9 +46,12 @@ WHERE
-- name: GetWorkspaceByAgentID :one
SELECT
*
sqlc.embed(workspaces),
templates.name as template_name
FROM
workspaces
INNER JOIN
templates ON workspaces.template_id = templates.id
WHERE
workspaces.id = (
SELECT

View File

@ -47,6 +47,7 @@ type updateRequest struct {
username string
workspaceName string
agentName string
templateName string
metrics []agentsdk.AgentMetric
@ -59,6 +60,7 @@ type annotatedMetric struct {
username string
workspaceName string
agentName string
templateName string
expiryDate time.Time
}
@ -74,7 +76,7 @@ func (am *annotatedMetric) asPrometheus() (prometheus.Metric, error) {
labelValues := make([]string, 0, len(agentMetricsLabels)+len(am.Labels))
labels = append(labels, agentMetricsLabels...)
labelValues = append(labelValues, am.username, am.workspaceName, am.agentName)
labelValues = append(labelValues, am.username, am.workspaceName, am.agentName, am.templateName)
for _, l := range am.Labels {
labels = append(labels, l.Name)
@ -160,6 +162,7 @@ func (ma *MetricsAggregator) Run(ctx context.Context) func() {
username: req.username,
workspaceName: req.workspaceName,
agentName: req.agentName,
templateName: req.templateName,
AgentMetric: m,
@ -227,7 +230,16 @@ func (ma *MetricsAggregator) Run(ctx context.Context) func() {
func (*MetricsAggregator) Describe(_ chan<- *prometheus.Desc) {
}
var agentMetricsLabels = []string{usernameLabel, workspaceNameLabel, agentNameLabel}
var agentMetricsLabels = []string{usernameLabel, workspaceNameLabel, agentNameLabel, templateNameLabel}
// AgentMetricLabels are the labels used to decorate an agent's metrics.
// This list should match the list of labels in agentMetricsLabels.
type AgentMetricLabels struct {
Username string
WorkspaceName string
AgentName string
TemplateName string
}
func (ma *MetricsAggregator) Collect(ch chan<- prometheus.Metric) {
output := make(chan []prometheus.Metric, 1)
@ -246,12 +258,13 @@ func (ma *MetricsAggregator) Collect(ch chan<- prometheus.Metric) {
}
}
func (ma *MetricsAggregator) Update(ctx context.Context, username, workspaceName, agentName string, metrics []agentsdk.AgentMetric) {
func (ma *MetricsAggregator) Update(ctx context.Context, labels AgentMetricLabels, metrics []agentsdk.AgentMetric) {
select {
case ma.updateCh <- updateRequest{
username: username,
workspaceName: workspaceName,
agentName: agentName,
username: labels.Username,
workspaceName: labels.WorkspaceName,
agentName: labels.AgentName,
templateName: labels.TemplateName,
metrics: metrics,
timestamp: time.Now(),

View File

@ -2,6 +2,7 @@ package prometheusmetrics_test
import (
"context"
"sort"
"sync/atomic"
"testing"
"time"
@ -12,6 +13,7 @@ import (
"github.com/stretchr/testify/require"
"cdr.dev/slog/sloggers/slogtest"
"github.com/coder/coder/v2/coderd/prometheusmetrics"
"github.com/coder/coder/v2/codersdk/agentsdk"
"github.com/coder/coder/v2/cryptorand"
@ -22,8 +24,16 @@ const (
testWorkspaceName = "yogi-workspace"
testUsername = "yogi-bear"
testAgentName = "main-agent"
testTemplateName = "main-template"
)
var testLabels = prometheusmetrics.AgentMetricLabels{
Username: testUsername,
WorkspaceName: testWorkspaceName,
AgentName: testAgentName,
TemplateName: testTemplateName,
}
func TestUpdateMetrics_MetricsDoNotExpire(t *testing.T) {
t.Parallel()
@ -58,6 +68,7 @@ func TestUpdateMetrics_MetricsDoNotExpire(t *testing.T) {
{Name: "agent_name", Value: testAgentName},
{Name: "username", Value: testUsername},
{Name: "workspace_name", Value: testWorkspaceName},
{Name: "template_name", Value: testTemplateName},
}
expected := []agentsdk.AgentMetric{
{Name: "a_counter_one", Type: agentsdk.AgentMetricTypeCounter, Value: 1, Labels: commonLabels},
@ -69,13 +80,14 @@ func TestUpdateMetrics_MetricsDoNotExpire(t *testing.T) {
{Name: "hello", Value: "world"},
{Name: "username", Value: testUsername},
{Name: "workspace_name", Value: testWorkspaceName},
{Name: "template_name", Value: testTemplateName},
}},
{Name: "d_gauge_four", Type: agentsdk.AgentMetricTypeGauge, Value: 6, Labels: commonLabels},
}
// when
metricsAggregator.Update(ctx, testUsername, testWorkspaceName, testAgentName, given1)
metricsAggregator.Update(ctx, testUsername, testWorkspaceName, testAgentName, given2)
metricsAggregator.Update(ctx, testLabels, given1)
metricsAggregator.Update(ctx, testLabels, given2)
// then
require.Eventually(t, func() bool {
@ -119,6 +131,10 @@ func verifyCollectedMetrics(t *testing.T, expected []agentsdk.AgentMetric, actua
}
dtoLabels := asMetricAgentLabels(d.GetLabel())
// dto labels are sorted in alphabetical order.
sort.Slice(e.Labels, func(i, j int) bool {
return e.Labels[i].Name < e.Labels[j].Name
})
require.Equal(t, e.Labels, dtoLabels, d.String())
}
return true
@ -154,7 +170,7 @@ func TestUpdateMetrics_MetricsExpire(t *testing.T) {
}
// when
metricsAggregator.Update(ctx, testUsername, testWorkspaceName, testAgentName, given)
metricsAggregator.Update(ctx, testLabels, given)
time.Sleep(time.Millisecond * 10) // Ensure that metric is expired
@ -220,7 +236,7 @@ func Benchmark_MetricsAggregator_Run(b *testing.B) {
b.Logf("N=%d sending %d metrics", b.N, numMetrics)
var nGot atomic.Int64
b.StartTimer()
metricsAggregator.Update(ctx, testUsername, testWorkspaceName, testAgentName, metrics)
metricsAggregator.Update(ctx, testLabels, metrics)
for i := 0; i < numMetrics; i++ {
select {
case <-ctx.Done():

View File

@ -17,6 +17,7 @@ import (
"tailscale.com/tailcfg"
"cdr.dev/slog"
"github.com/coder/coder/v2/coderd/database"
"github.com/coder/coder/v2/coderd/database/dbauthz"
"github.com/coder/coder/v2/coderd/database/dbtime"
@ -24,6 +25,7 @@ import (
)
const (
templateNameLabel = "template_name"
agentNameLabel = "agent_name"
usernameLabel = "username"
workspaceNameLabel = "workspace_name"
@ -154,7 +156,7 @@ func Agents(ctx context.Context, logger slog.Logger, registerer prometheus.Regis
Subsystem: "agents",
Name: "up",
Help: "The number of active agents per workspace.",
}, []string{usernameLabel, workspaceNameLabel, "template_name", "template_version"}))
}, []string{usernameLabel, workspaceNameLabel, templateNameLabel, "template_version"}))
err := registerer.Register(agentsGauge)
if err != nil {
return nil, err

View File

@ -37,6 +37,7 @@ import (
"github.com/coder/coder/v2/coderd/externalauth"
"github.com/coder/coder/v2/coderd/httpapi"
"github.com/coder/coder/v2/coderd/httpmw"
"github.com/coder/coder/v2/coderd/prometheusmetrics"
"github.com/coder/coder/v2/coderd/rbac"
"github.com/coder/coder/v2/coderd/util/ptr"
"github.com/coder/coder/v2/codersdk"
@ -572,7 +573,7 @@ func (api *API) workspaceAgentLogs(rw http.ResponseWriter, r *http.Request) {
return
}
workspace, err := api.Database.GetWorkspaceByAgentID(ctx, workspaceAgent.ID)
row, err := api.Database.GetWorkspaceByAgentID(ctx, workspaceAgent.ID)
if err != nil {
httpapi.Write(ctx, rw, http.StatusInternalServerError, codersdk.Response{
Message: "Internal error fetching workspace by agent id.",
@ -580,6 +581,7 @@ func (api *API) workspaceAgentLogs(rw http.ResponseWriter, r *http.Request) {
})
return
}
workspace := row.Workspace
api.WebsocketWaitMutex.Lock()
api.WebsocketWaitGroup.Add(1)
@ -1648,7 +1650,7 @@ func (api *API) workspaceAgentReportStats(rw http.ResponseWriter, r *http.Reques
ctx := r.Context()
workspaceAgent := httpmw.WorkspaceAgent(r)
workspace, err := api.Database.GetWorkspaceByAgentID(ctx, workspaceAgent.ID)
row, err := api.Database.GetWorkspaceByAgentID(ctx, workspaceAgent.ID)
if err != nil {
httpapi.Write(ctx, rw, http.StatusBadRequest, codersdk.Response{
Message: "Failed to get workspace.",
@ -1656,6 +1658,7 @@ func (api *API) workspaceAgentReportStats(rw http.ResponseWriter, r *http.Reques
})
return
}
workspace := row.Workspace
var req agentsdk.Stats
if !httpapi.Read(ctx, rw, r, &req) {
@ -1681,7 +1684,7 @@ func (api *API) workspaceAgentReportStats(rw http.ResponseWriter, r *http.Reques
var nextAutostart time.Time
if workspace.AutostartSchedule.String != "" {
templateSchedule, err := (*(api.TemplateScheduleStore.Load())).Get(ctx, api.Database, workspace.TemplateID)
// If the template schedule fails to load, just default to bumping without the next trasition and log it.
// If the template schedule fails to load, just default to bumping without the next transition and log it.
if err != nil {
api.Logger.Warn(ctx, "failed to load template schedule bumping activity, defaulting to bumping by 60min",
slog.F("workspace_id", workspace.ID),
@ -1727,7 +1730,12 @@ func (api *API) workspaceAgentReportStats(rw http.ResponseWriter, r *http.Reques
return xerrors.Errorf("can't get user: %w", err)
}
api.Options.UpdateAgentMetrics(ctx, user.Username, workspace.Name, workspaceAgent.Name, req.Metrics)
api.Options.UpdateAgentMetrics(ctx, prometheusmetrics.AgentMetricLabels{
Username: user.Username,
WorkspaceName: workspace.Name,
AgentName: workspaceAgent.Name,
TemplateName: row.TemplateName,
}, req.Metrics)
return nil
})
}
@ -2103,7 +2111,7 @@ func (api *API) workspaceAgentReportLifecycle(rw http.ResponseWriter, r *http.Re
ctx := r.Context()
workspaceAgent := httpmw.WorkspaceAgent(r)
workspace, err := api.Database.GetWorkspaceByAgentID(ctx, workspaceAgent.ID)
row, err := api.Database.GetWorkspaceByAgentID(ctx, workspaceAgent.ID)
if err != nil {
httpapi.Write(ctx, rw, http.StatusBadRequest, codersdk.Response{
Message: "Failed to get workspace.",
@ -2111,6 +2119,7 @@ func (api *API) workspaceAgentReportLifecycle(rw http.ResponseWriter, r *http.Re
})
return
}
workspace := row.Workspace
var req agentsdk.PostLifecycleRequest
if !httpapi.Read(ctx, rw, r, &req) {

View File

@ -78,72 +78,74 @@ spec:
<!-- Code generated by 'make docs/admin/prometheus.md'. DO NOT EDIT -->
| Name | Type | Description | Labels |
| ----------------------------------------------------- | --------- | ------------------------------------------------------------------ | ----------------------------------------------------------------------------------- |
| `coderd_agents_apps` | gauge | Agent applications with statuses. | `agent_name` `app_name` `health` `username` `workspace_name` |
| `coderd_agents_connection_latencies_seconds` | gauge | Agent connection latencies in seconds. | `agent_name` `derp_region` `preferred` `username` `workspace_name` |
| `coderd_agents_connections` | gauge | Agent connections with statuses. | `agent_name` `lifecycle_state` `status` `tailnet_node` `username` `workspace_name` |
| `coderd_agents_up` | gauge | The number of active agents per workspace. | `username` `workspace_name` |
| `coderd_agentstats_connection_count` | gauge | The number of established connections by agent | `agent_name` `username` `workspace_name` |
| `coderd_agentstats_connection_median_latency_seconds` | gauge | The median agent connection latency | `agent_name` `username` `workspace_name` |
| `coderd_agentstats_rx_bytes` | gauge | Agent Rx bytes | `agent_name` `username` `workspace_name` |
| `coderd_agentstats_session_count_jetbrains` | gauge | The number of session established by JetBrains | `agent_name` `username` `workspace_name` |
| `coderd_agentstats_session_count_reconnecting_pty` | gauge | The number of session established by reconnecting PTY | `agent_name` `username` `workspace_name` |
| `coderd_agentstats_session_count_ssh` | gauge | The number of session established by SSH | `agent_name` `username` `workspace_name` |
| `coderd_agentstats_session_count_vscode` | gauge | The number of session established by VSCode | `agent_name` `username` `workspace_name` |
| `coderd_agentstats_tx_bytes` | gauge | Agent Tx bytes | `agent_name` `username` `workspace_name` |
| `coderd_api_active_users_duration_hour` | gauge | The number of users that have been active within the last hour. | |
| `coderd_api_concurrent_requests` | gauge | The number of concurrent API requests. | |
| `coderd_api_concurrent_websockets` | gauge | The total number of concurrent API websockets. | |
| `coderd_api_request_latencies_seconds` | histogram | Latency distribution of requests in seconds. | `method` `path` |
| `coderd_api_requests_processed_total` | counter | The total number of processed API requests | `code` `method` `path` |
| `coderd_api_websocket_durations_seconds` | histogram | Websocket duration distribution of requests in seconds. | `path` |
| `coderd_api_workspace_latest_build_total` | gauge | The latest workspace builds with a status. | `status` |
| `coderd_insights_applications_usage_seconds` | gauge | The application usage per template. | `application_name` `slug` `template_name` |
| `coderd_insights_parameters` | gauge | The parameter usage per template. | `parameter_name` `parameter_type` `parameter_value` `template_name` |
| `coderd_insights_templates_active_users` | gauge | The number of active users of the template. | `template_name` |
| `coderd_license_active_users` | gauge | The number of active users. | |
| `coderd_license_limit_users` | gauge | The user seats limit based on the active Coder license. | |
| `coderd_license_user_limit_enabled` | gauge | Returns 1 if the current license enforces the user limit. | |
| `coderd_metrics_collector_agents_execution_seconds` | histogram | Histogram for duration of agents metrics collection in seconds. | |
| `coderd_provisionerd_job_timings_seconds` | histogram | The provisioner job time duration in seconds. | `provisioner` `status` |
| `coderd_provisionerd_jobs_current` | gauge | The number of currently running provisioner jobs. | `provisioner` |
| `coderd_workspace_builds_total` | counter | The number of workspaces started, updated, or deleted. | `action` `owner_email` `status` `template_name` `template_version` `workspace_name` |
| `go_gc_duration_seconds` | summary | A summary of the pause duration of garbage collection cycles. | |
| `go_goroutines` | gauge | Number of goroutines that currently exist. | |
| `go_info` | gauge | Information about the Go environment. | `version` |
| `go_memstats_alloc_bytes` | gauge | Number of bytes allocated and still in use. | |
| `go_memstats_alloc_bytes_total` | counter | Total number of bytes allocated, even if freed. | |
| `go_memstats_buck_hash_sys_bytes` | gauge | Number of bytes used by the profiling bucket hash table. | |
| `go_memstats_frees_total` | counter | Total number of frees. | |
| `go_memstats_gc_sys_bytes` | gauge | Number of bytes used for garbage collection system metadata. | |
| `go_memstats_heap_alloc_bytes` | gauge | Number of heap bytes allocated and still in use. | |
| `go_memstats_heap_idle_bytes` | gauge | Number of heap bytes waiting to be used. | |
| `go_memstats_heap_inuse_bytes` | gauge | Number of heap bytes that are in use. | |
| `go_memstats_heap_objects` | gauge | Number of allocated objects. | |
| `go_memstats_heap_released_bytes` | gauge | Number of heap bytes released to OS. | |
| `go_memstats_heap_sys_bytes` | gauge | Number of heap bytes obtained from system. | |
| `go_memstats_last_gc_time_seconds` | gauge | Number of seconds since 1970 of last garbage collection. | |
| `go_memstats_lookups_total` | counter | Total number of pointer lookups. | |
| `go_memstats_mallocs_total` | counter | Total number of mallocs. | |
| `go_memstats_mcache_inuse_bytes` | gauge | Number of bytes in use by mcache structures. | |
| `go_memstats_mcache_sys_bytes` | gauge | Number of bytes used for mcache structures obtained from system. | |
| `go_memstats_mspan_inuse_bytes` | gauge | Number of bytes in use by mspan structures. | |
| `go_memstats_mspan_sys_bytes` | gauge | Number of bytes used for mspan structures obtained from system. | |
| `go_memstats_next_gc_bytes` | gauge | Number of heap bytes when next garbage collection will take place. | |
| `go_memstats_other_sys_bytes` | gauge | Number of bytes used for other system allocations. | |
| `go_memstats_stack_inuse_bytes` | gauge | Number of bytes in use by the stack allocator. | |
| `go_memstats_stack_sys_bytes` | gauge | Number of bytes obtained from system for stack allocator. | |
| `go_memstats_sys_bytes` | gauge | Number of bytes obtained from system. | |
| `go_threads` | gauge | Number of OS threads created. | |
| `process_cpu_seconds_total` | counter | Total user and system CPU time spent in seconds. | |
| `process_max_fds` | gauge | Maximum number of open file descriptors. | |
| `process_open_fds` | gauge | Number of open file descriptors. | |
| `process_resident_memory_bytes` | gauge | Resident memory size in bytes. | |
| `process_start_time_seconds` | gauge | Start time of the process since unix epoch in seconds. | |
| `process_virtual_memory_bytes` | gauge | Virtual memory size in bytes. | |
| `process_virtual_memory_max_bytes` | gauge | Maximum amount of virtual memory available in bytes. | |
| `promhttp_metric_handler_requests_in_flight` | gauge | Current number of scrapes being served. | |
| `promhttp_metric_handler_requests_total` | counter | Total number of scrapes by HTTP status code. | `code` |
| Name | Type | Description | Labels |
| ----------------------------------------------------- | --------- | ------------------------------------------------------------------------------------- | ----------------------------------------------------------------------------------- |
| `agent_scripts_executed_total` | counter | Total number of scripts executed by the Coder agent. Includes cron scheduled scripts. | `agent_name` `success` `template_name` `username` `workspace_name` |
| `coderd_agents_apps` | gauge | Agent applications with statuses. | `agent_name` `app_name` `health` `username` `workspace_name` |
| `coderd_agents_connection_latencies_seconds` | gauge | Agent connection latencies in seconds. | `agent_name` `derp_region` `preferred` `username` `workspace_name` |
| `coderd_agents_connections` | gauge | Agent connections with statuses. | `agent_name` `lifecycle_state` `status` `tailnet_node` `username` `workspace_name` |
| `coderd_agents_up` | gauge | The number of active agents per workspace. | `template_name` `username` `workspace_name` |
| `coderd_agentstats_connection_count` | gauge | The number of established connections by agent | `agent_name` `username` `workspace_name` |
| `coderd_agentstats_connection_median_latency_seconds` | gauge | The median agent connection latency | `agent_name` `username` `workspace_name` |
| `coderd_agentstats_rx_bytes` | gauge | Agent Rx bytes | `agent_name` `username` `workspace_name` |
| `coderd_agentstats_session_count_jetbrains` | gauge | The number of session established by JetBrains | `agent_name` `username` `workspace_name` |
| `coderd_agentstats_session_count_reconnecting_pty` | gauge | The number of session established by reconnecting PTY | `agent_name` `username` `workspace_name` |
| `coderd_agentstats_session_count_ssh` | gauge | The number of session established by SSH | `agent_name` `username` `workspace_name` |
| `coderd_agentstats_session_count_vscode` | gauge | The number of session established by VSCode | `agent_name` `username` `workspace_name` |
| `coderd_agentstats_startup_script_seconds` | gauge | The number of seconds the startup script took to execute. | `agent_name` `success` `template_name` `username` `workspace_name` |
| `coderd_agentstats_tx_bytes` | gauge | Agent Tx bytes | `agent_name` `username` `workspace_name` |
| `coderd_api_active_users_duration_hour` | gauge | The number of users that have been active within the last hour. | |
| `coderd_api_concurrent_requests` | gauge | The number of concurrent API requests. | |
| `coderd_api_concurrent_websockets` | gauge | The total number of concurrent API websockets. | |
| `coderd_api_request_latencies_seconds` | histogram | Latency distribution of requests in seconds. | `method` `path` |
| `coderd_api_requests_processed_total` | counter | The total number of processed API requests | `code` `method` `path` |
| `coderd_api_websocket_durations_seconds` | histogram | Websocket duration distribution of requests in seconds. | `path` |
| `coderd_api_workspace_latest_build_total` | gauge | The latest workspace builds with a status. | `status` |
| `coderd_insights_applications_usage_seconds` | gauge | The application usage per template. | `application_name` `slug` `template_name` |
| `coderd_insights_parameters` | gauge | The parameter usage per template. | `parameter_name` `parameter_type` `parameter_value` `template_name` |
| `coderd_insights_templates_active_users` | gauge | The number of active users of the template. | `template_name` |
| `coderd_license_active_users` | gauge | The number of active users. | |
| `coderd_license_limit_users` | gauge | The user seats limit based on the active Coder license. | |
| `coderd_license_user_limit_enabled` | gauge | Returns 1 if the current license enforces the user limit. | |
| `coderd_metrics_collector_agents_execution_seconds` | histogram | Histogram for duration of agents metrics collection in seconds. | |
| `coderd_provisionerd_job_timings_seconds` | histogram | The provisioner job time duration in seconds. | `provisioner` `status` |
| `coderd_provisionerd_jobs_current` | gauge | The number of currently running provisioner jobs. | `provisioner` |
| `coderd_workspace_builds_total` | counter | The number of workspaces started, updated, or deleted. | `action` `owner_email` `status` `template_name` `template_version` `workspace_name` |
| `go_gc_duration_seconds` | summary | A summary of the pause duration of garbage collection cycles. | |
| `go_goroutines` | gauge | Number of goroutines that currently exist. | |
| `go_info` | gauge | Information about the Go environment. | `version` |
| `go_memstats_alloc_bytes` | gauge | Number of bytes allocated and still in use. | |
| `go_memstats_alloc_bytes_total` | counter | Total number of bytes allocated, even if freed. | |
| `go_memstats_buck_hash_sys_bytes` | gauge | Number of bytes used by the profiling bucket hash table. | |
| `go_memstats_frees_total` | counter | Total number of frees. | |
| `go_memstats_gc_sys_bytes` | gauge | Number of bytes used for garbage collection system metadata. | |
| `go_memstats_heap_alloc_bytes` | gauge | Number of heap bytes allocated and still in use. | |
| `go_memstats_heap_idle_bytes` | gauge | Number of heap bytes waiting to be used. | |
| `go_memstats_heap_inuse_bytes` | gauge | Number of heap bytes that are in use. | |
| `go_memstats_heap_objects` | gauge | Number of allocated objects. | |
| `go_memstats_heap_released_bytes` | gauge | Number of heap bytes released to OS. | |
| `go_memstats_heap_sys_bytes` | gauge | Number of heap bytes obtained from system. | |
| `go_memstats_last_gc_time_seconds` | gauge | Number of seconds since 1970 of last garbage collection. | |
| `go_memstats_lookups_total` | counter | Total number of pointer lookups. | |
| `go_memstats_mallocs_total` | counter | Total number of mallocs. | |
| `go_memstats_mcache_inuse_bytes` | gauge | Number of bytes in use by mcache structures. | |
| `go_memstats_mcache_sys_bytes` | gauge | Number of bytes used for mcache structures obtained from system. | |
| `go_memstats_mspan_inuse_bytes` | gauge | Number of bytes in use by mspan structures. | |
| `go_memstats_mspan_sys_bytes` | gauge | Number of bytes used for mspan structures obtained from system. | |
| `go_memstats_next_gc_bytes` | gauge | Number of heap bytes when next garbage collection will take place. | |
| `go_memstats_other_sys_bytes` | gauge | Number of bytes used for other system allocations. | |
| `go_memstats_stack_inuse_bytes` | gauge | Number of bytes in use by the stack allocator. | |
| `go_memstats_stack_sys_bytes` | gauge | Number of bytes obtained from system for stack allocator. | |
| `go_memstats_sys_bytes` | gauge | Number of bytes obtained from system. | |
| `go_threads` | gauge | Number of OS threads created. | |
| `process_cpu_seconds_total` | counter | Total user and system CPU time spent in seconds. | |
| `process_max_fds` | gauge | Maximum number of open file descriptors. | |
| `process_open_fds` | gauge | Number of open file descriptors. | |
| `process_resident_memory_bytes` | gauge | Resident memory size in bytes. | |
| `process_start_time_seconds` | gauge | Start time of the process since unix epoch in seconds. | |
| `process_virtual_memory_bytes` | gauge | Virtual memory size in bytes. | |
| `process_virtual_memory_max_bytes` | gauge | Maximum amount of virtual memory available in bytes. | |
| `promhttp_metric_handler_requests_in_flight` | gauge | Current number of scrapes being served. | |
| `promhttp_metric_handler_requests_total` | counter | Total number of scrapes by HTTP status code. | `code` |
<!-- End generated by 'make docs/admin/prometheus.md'. -->

View File

@ -15,9 +15,15 @@ coderd_agents_connections{agent_name="main",lifecycle_state="start_timeout",stat
coderd_agents_connections{agent_name="main",lifecycle_state="start_timeout",status="connected",tailnet_node="nodeid:3779bd45d00be0eb",username="admin",workspace_name="workspace-1"} 1
# HELP coderd_agents_up The number of active agents per workspace.
# TYPE coderd_agents_up gauge
coderd_agents_up{username="admin",workspace_name="workspace-1"} 1
coderd_agents_up{username="admin",workspace_name="workspace-2"} 1
coderd_agents_up{username="admin",workspace_name="workspace-3"} 1
coderd_agents_up{template_name="docker", username="admin",workspace_name="workspace-1"} 1
coderd_agents_up{template_name="docker", username="admin",workspace_name="workspace-2"} 1
coderd_agents_up{template_name="gcp", username="admin",workspace_name="workspace-3"} 1
# HELP coderd_agentstats_startup_script_seconds The number of seconds the startup script took to execute.
# TYPE coderd_agentstats_startup_script_seconds gauge
coderd_agentstats_startup_script_seconds{agent_name="main",success="true",template_name="docker",username="admin",workspace_name="workspace-1"} 1.969900304
# HELP agent_scripts_executed_total Total number of scripts executed by the Coder agent. Includes cron scheduled scripts.
# TYPE agent_scripts_executed_total counter
agent_scripts_executed_total{agent_name="main",success="true",template_name="docker",username="admin",workspace_name="workspace-1"} 1
# HELP coderd_agentstats_connection_count The number of established connections by agent
# TYPE coderd_agentstats_connection_count gauge
coderd_agentstats_connection_count{agent_name="main",username="admin",workspace_name="workspace1"} 2