mirror of https://github.com/coder/coder.git
feat: add metrics to workspace agent scripts (#11132)
* push startup script metrics to agent
This commit is contained in:
parent
41ed581460
commit
b7bdb17460
|
@ -35,6 +35,8 @@ import (
|
|||
"tailscale.com/types/netlogtype"
|
||||
|
||||
"cdr.dev/slog"
|
||||
"github.com/coder/retry"
|
||||
|
||||
"github.com/coder/coder/v2/agent/agentproc"
|
||||
"github.com/coder/coder/v2/agent/agentscripts"
|
||||
"github.com/coder/coder/v2/agent/agentssh"
|
||||
|
@ -45,7 +47,6 @@ import (
|
|||
"github.com/coder/coder/v2/codersdk"
|
||||
"github.com/coder/coder/v2/codersdk/agentsdk"
|
||||
"github.com/coder/coder/v2/tailnet"
|
||||
"github.com/coder/retry"
|
||||
)
|
||||
|
||||
const (
|
||||
|
@ -222,8 +223,10 @@ type agent struct {
|
|||
connCountReconnectingPTY atomic.Int64
|
||||
|
||||
prometheusRegistry *prometheus.Registry
|
||||
metrics *agentMetrics
|
||||
syscaller agentproc.Syscaller
|
||||
// metrics are prometheus registered metrics that will be collected and
|
||||
// labeled in Coder with the agent + workspace.
|
||||
metrics *agentMetrics
|
||||
syscaller agentproc.Syscaller
|
||||
|
||||
// modifiedProcs is used for testing process priority management.
|
||||
modifiedProcs chan []*agentproc.Process
|
||||
|
@ -252,6 +255,9 @@ func (a *agent) init(ctx context.Context) {
|
|||
Filesystem: a.filesystem,
|
||||
PatchLogs: a.client.PatchLogs,
|
||||
})
|
||||
// Register runner metrics. If the prom registry is nil, the metrics
|
||||
// will not report anywhere.
|
||||
a.scriptRunner.RegisterMetrics(a.prometheusRegistry)
|
||||
go a.runLoop(ctx)
|
||||
}
|
||||
|
||||
|
@ -745,9 +751,12 @@ func (a *agent) run(ctx context.Context) error {
|
|||
return xerrors.Errorf("init script runner: %w", err)
|
||||
}
|
||||
err = a.trackConnGoroutine(func() {
|
||||
start := time.Now()
|
||||
err := a.scriptRunner.Execute(ctx, func(script codersdk.WorkspaceAgentScript) bool {
|
||||
return script.RunOnStart
|
||||
})
|
||||
// Measure the time immediately after the script has finished
|
||||
dur := time.Since(start).Seconds()
|
||||
if err != nil {
|
||||
a.logger.Warn(ctx, "startup script(s) failed", slog.Error(err))
|
||||
if errors.Is(err, agentscripts.ErrTimeout) {
|
||||
|
@ -758,6 +767,12 @@ func (a *agent) run(ctx context.Context) error {
|
|||
} else {
|
||||
a.setLifecycle(ctx, codersdk.WorkspaceAgentLifecycleReady)
|
||||
}
|
||||
|
||||
label := "false"
|
||||
if err == nil {
|
||||
label = "true"
|
||||
}
|
||||
a.metrics.startupScriptSeconds.WithLabelValues(label).Set(dur)
|
||||
a.scriptRunner.StartCron()
|
||||
})
|
||||
if err != nil {
|
||||
|
|
|
@ -46,6 +46,7 @@ import (
|
|||
"cdr.dev/slog"
|
||||
"cdr.dev/slog/sloggers/sloghuman"
|
||||
"cdr.dev/slog/sloggers/slogtest"
|
||||
|
||||
"github.com/coder/coder/v2/agent"
|
||||
"github.com/coder/coder/v2/agent/agentproc"
|
||||
"github.com/coder/coder/v2/agent/agentproc/agentproctest"
|
||||
|
@ -2235,6 +2236,17 @@ func TestAgent_Metrics_SSH(t *testing.T) {
|
|||
Type: agentsdk.AgentMetricTypeCounter,
|
||||
Value: 0,
|
||||
},
|
||||
{
|
||||
Name: "coderd_agentstats_startup_script_seconds",
|
||||
Type: agentsdk.AgentMetricTypeGauge,
|
||||
Value: 0,
|
||||
Labels: []agentsdk.AgentMetricLabel{
|
||||
{
|
||||
Name: "success",
|
||||
Value: "true",
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
var actual []*promgo.MetricFamily
|
||||
|
|
|
@ -13,12 +13,14 @@ import (
|
|||
"sync/atomic"
|
||||
"time"
|
||||
|
||||
"github.com/prometheus/client_golang/prometheus"
|
||||
"github.com/robfig/cron/v3"
|
||||
"github.com/spf13/afero"
|
||||
"golang.org/x/sync/errgroup"
|
||||
"golang.org/x/xerrors"
|
||||
|
||||
"cdr.dev/slog"
|
||||
|
||||
"github.com/coder/coder/v2/agent/agentssh"
|
||||
"github.com/coder/coder/v2/codersdk"
|
||||
"github.com/coder/coder/v2/codersdk/agentsdk"
|
||||
|
@ -57,6 +59,11 @@ func New(opts Options) *Runner {
|
|||
cronCtxCancel: cronCtxCancel,
|
||||
cron: cron.New(cron.WithParser(parser)),
|
||||
closed: make(chan struct{}),
|
||||
scriptsExecuted: prometheus.NewCounterVec(prometheus.CounterOpts{
|
||||
Namespace: "agent",
|
||||
Subsystem: "scripts",
|
||||
Name: "executed_total",
|
||||
}, []string{"success"}),
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -71,6 +78,19 @@ type Runner struct {
|
|||
cron *cron.Cron
|
||||
initialized atomic.Bool
|
||||
scripts []codersdk.WorkspaceAgentScript
|
||||
|
||||
// scriptsExecuted includes all scripts executed by the workspace agent. Agents
|
||||
// execute startup scripts, and scripts on a cron schedule. Both will increment
|
||||
// this counter.
|
||||
scriptsExecuted *prometheus.CounterVec
|
||||
}
|
||||
|
||||
func (r *Runner) RegisterMetrics(reg prometheus.Registerer) {
|
||||
if reg == nil {
|
||||
// If no registry, do nothing.
|
||||
return
|
||||
}
|
||||
reg.MustRegister(r.scriptsExecuted)
|
||||
}
|
||||
|
||||
// Init initializes the runner with the provided scripts.
|
||||
|
@ -90,7 +110,7 @@ func (r *Runner) Init(scripts []codersdk.WorkspaceAgentScript) error {
|
|||
}
|
||||
script := script
|
||||
_, err := r.cron.AddFunc(script.Cron, func() {
|
||||
err := r.run(r.cronCtx, script)
|
||||
err := r.trackRun(r.cronCtx, script)
|
||||
if err != nil {
|
||||
r.Logger.Warn(context.Background(), "run agent script on schedule", slog.Error(err))
|
||||
}
|
||||
|
@ -131,7 +151,7 @@ func (r *Runner) Execute(ctx context.Context, filter func(script codersdk.Worksp
|
|||
}
|
||||
script := script
|
||||
eg.Go(func() error {
|
||||
err := r.run(ctx, script)
|
||||
err := r.trackRun(ctx, script)
|
||||
if err != nil {
|
||||
return xerrors.Errorf("run agent script %q: %w", script.LogSourceID, err)
|
||||
}
|
||||
|
@ -141,6 +161,17 @@ func (r *Runner) Execute(ctx context.Context, filter func(script codersdk.Worksp
|
|||
return eg.Wait()
|
||||
}
|
||||
|
||||
// trackRun wraps "run" with metrics.
|
||||
func (r *Runner) trackRun(ctx context.Context, script codersdk.WorkspaceAgentScript) error {
|
||||
err := r.run(ctx, script)
|
||||
if err != nil {
|
||||
r.scriptsExecuted.WithLabelValues("false").Add(1)
|
||||
} else {
|
||||
r.scriptsExecuted.WithLabelValues("true").Add(1)
|
||||
}
|
||||
return err
|
||||
}
|
||||
|
||||
// run executes the provided script with the timeout.
|
||||
// If the timeout is exceeded, the process is sent an interrupt signal.
|
||||
// If the process does not exit after a few seconds, it is forcefully killed.
|
||||
|
|
|
@ -17,6 +17,9 @@ import (
|
|||
type agentMetrics struct {
|
||||
connectionsTotal prometheus.Counter
|
||||
reconnectingPTYErrors *prometheus.CounterVec
|
||||
// startupScriptSeconds is the time in seconds that the start script(s)
|
||||
// took to run. This is reported once per agent.
|
||||
startupScriptSeconds *prometheus.GaugeVec
|
||||
}
|
||||
|
||||
func newAgentMetrics(registerer prometheus.Registerer) *agentMetrics {
|
||||
|
@ -35,9 +38,18 @@ func newAgentMetrics(registerer prometheus.Registerer) *agentMetrics {
|
|||
)
|
||||
registerer.MustRegister(reconnectingPTYErrors)
|
||||
|
||||
startupScriptSeconds := prometheus.NewGaugeVec(prometheus.GaugeOpts{
|
||||
Namespace: "coderd",
|
||||
Subsystem: "agentstats",
|
||||
Name: "startup_script_seconds",
|
||||
Help: "Amount of time taken to run the startup script in seconds.",
|
||||
}, []string{"success"})
|
||||
registerer.MustRegister(startupScriptSeconds)
|
||||
|
||||
return &agentMetrics{
|
||||
connectionsTotal: connectionsTotal,
|
||||
reconnectingPTYErrors: reconnectingPTYErrors,
|
||||
startupScriptSeconds: startupScriptSeconds,
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -38,8 +38,10 @@ import (
|
|||
_ "github.com/coder/coder/v2/coderd/apidoc"
|
||||
"github.com/coder/coder/v2/coderd/externalauth"
|
||||
"github.com/coder/coder/v2/coderd/healthcheck/derphealth"
|
||||
"github.com/coder/coder/v2/coderd/prometheusmetrics"
|
||||
|
||||
"cdr.dev/slog"
|
||||
|
||||
"github.com/coder/coder/v2/buildinfo"
|
||||
"github.com/coder/coder/v2/cli/clibase"
|
||||
"github.com/coder/coder/v2/coderd/audit"
|
||||
|
@ -168,7 +170,7 @@ type Options struct {
|
|||
|
||||
HTTPClient *http.Client
|
||||
|
||||
UpdateAgentMetrics func(ctx context.Context, username, workspaceName, agentName string, metrics []agentsdk.AgentMetric)
|
||||
UpdateAgentMetrics func(ctx context.Context, labels prometheusmetrics.AgentMetricLabels, metrics []agentsdk.AgentMetric)
|
||||
StatsBatcher *batchstats.Batcher
|
||||
|
||||
WorkspaceAppsStatsCollectorOptions workspaceapps.StatsCollectorOptions
|
||||
|
|
|
@ -1918,7 +1918,7 @@ func (q *querier) GetWorkspaceBuildsCreatedAfter(ctx context.Context, createdAt
|
|||
return q.db.GetWorkspaceBuildsCreatedAfter(ctx, createdAt)
|
||||
}
|
||||
|
||||
func (q *querier) GetWorkspaceByAgentID(ctx context.Context, agentID uuid.UUID) (database.Workspace, error) {
|
||||
func (q *querier) GetWorkspaceByAgentID(ctx context.Context, agentID uuid.UUID) (database.GetWorkspaceByAgentIDRow, error) {
|
||||
return fetch(q.log, q.auth, q.db.GetWorkspaceByAgentID)(ctx, agentID)
|
||||
}
|
||||
|
||||
|
|
|
@ -1065,21 +1065,30 @@ func (s *MethodTestSuite) TestWorkspace() {
|
|||
check.Args(ws.ID).Asserts(ws, rbac.ActionRead).Returns(b)
|
||||
}))
|
||||
s.Run("GetWorkspaceAgentByID", s.Subtest(func(db database.Store, check *expects) {
|
||||
ws := dbgen.Workspace(s.T(), db, database.Workspace{})
|
||||
tpl := dbgen.Template(s.T(), db, database.Template{})
|
||||
ws := dbgen.Workspace(s.T(), db, database.Workspace{
|
||||
TemplateID: tpl.ID,
|
||||
})
|
||||
build := dbgen.WorkspaceBuild(s.T(), db, database.WorkspaceBuild{WorkspaceID: ws.ID, JobID: uuid.New()})
|
||||
res := dbgen.WorkspaceResource(s.T(), db, database.WorkspaceResource{JobID: build.JobID})
|
||||
agt := dbgen.WorkspaceAgent(s.T(), db, database.WorkspaceAgent{ResourceID: res.ID})
|
||||
check.Args(agt.ID).Asserts(ws, rbac.ActionRead).Returns(agt)
|
||||
}))
|
||||
s.Run("GetWorkspaceAgentByInstanceID", s.Subtest(func(db database.Store, check *expects) {
|
||||
ws := dbgen.Workspace(s.T(), db, database.Workspace{})
|
||||
tpl := dbgen.Template(s.T(), db, database.Template{})
|
||||
ws := dbgen.Workspace(s.T(), db, database.Workspace{
|
||||
TemplateID: tpl.ID,
|
||||
})
|
||||
build := dbgen.WorkspaceBuild(s.T(), db, database.WorkspaceBuild{WorkspaceID: ws.ID, JobID: uuid.New()})
|
||||
res := dbgen.WorkspaceResource(s.T(), db, database.WorkspaceResource{JobID: build.JobID})
|
||||
agt := dbgen.WorkspaceAgent(s.T(), db, database.WorkspaceAgent{ResourceID: res.ID})
|
||||
check.Args(agt.AuthInstanceID.String).Asserts(ws, rbac.ActionRead).Returns(agt)
|
||||
}))
|
||||
s.Run("UpdateWorkspaceAgentLifecycleStateByID", s.Subtest(func(db database.Store, check *expects) {
|
||||
ws := dbgen.Workspace(s.T(), db, database.Workspace{})
|
||||
tpl := dbgen.Template(s.T(), db, database.Template{})
|
||||
ws := dbgen.Workspace(s.T(), db, database.Workspace{
|
||||
TemplateID: tpl.ID,
|
||||
})
|
||||
build := dbgen.WorkspaceBuild(s.T(), db, database.WorkspaceBuild{WorkspaceID: ws.ID, JobID: uuid.New()})
|
||||
res := dbgen.WorkspaceResource(s.T(), db, database.WorkspaceResource{JobID: build.JobID})
|
||||
agt := dbgen.WorkspaceAgent(s.T(), db, database.WorkspaceAgent{ResourceID: res.ID})
|
||||
|
@ -1089,7 +1098,10 @@ func (s *MethodTestSuite) TestWorkspace() {
|
|||
}).Asserts(ws, rbac.ActionUpdate).Returns()
|
||||
}))
|
||||
s.Run("UpdateWorkspaceAgentLogOverflowByID", s.Subtest(func(db database.Store, check *expects) {
|
||||
ws := dbgen.Workspace(s.T(), db, database.Workspace{})
|
||||
tpl := dbgen.Template(s.T(), db, database.Template{})
|
||||
ws := dbgen.Workspace(s.T(), db, database.Workspace{
|
||||
TemplateID: tpl.ID,
|
||||
})
|
||||
build := dbgen.WorkspaceBuild(s.T(), db, database.WorkspaceBuild{WorkspaceID: ws.ID, JobID: uuid.New()})
|
||||
res := dbgen.WorkspaceResource(s.T(), db, database.WorkspaceResource{JobID: build.JobID})
|
||||
agt := dbgen.WorkspaceAgent(s.T(), db, database.WorkspaceAgent{ResourceID: res.ID})
|
||||
|
@ -1099,7 +1111,10 @@ func (s *MethodTestSuite) TestWorkspace() {
|
|||
}).Asserts(ws, rbac.ActionUpdate).Returns()
|
||||
}))
|
||||
s.Run("UpdateWorkspaceAgentStartupByID", s.Subtest(func(db database.Store, check *expects) {
|
||||
ws := dbgen.Workspace(s.T(), db, database.Workspace{})
|
||||
tpl := dbgen.Template(s.T(), db, database.Template{})
|
||||
ws := dbgen.Workspace(s.T(), db, database.Workspace{
|
||||
TemplateID: tpl.ID,
|
||||
})
|
||||
build := dbgen.WorkspaceBuild(s.T(), db, database.WorkspaceBuild{WorkspaceID: ws.ID, JobID: uuid.New()})
|
||||
res := dbgen.WorkspaceResource(s.T(), db, database.WorkspaceResource{JobID: build.JobID})
|
||||
agt := dbgen.WorkspaceAgent(s.T(), db, database.WorkspaceAgent{ResourceID: res.ID})
|
||||
|
@ -1111,7 +1126,10 @@ func (s *MethodTestSuite) TestWorkspace() {
|
|||
}).Asserts(ws, rbac.ActionUpdate).Returns()
|
||||
}))
|
||||
s.Run("GetWorkspaceAgentLogsAfter", s.Subtest(func(db database.Store, check *expects) {
|
||||
ws := dbgen.Workspace(s.T(), db, database.Workspace{})
|
||||
tpl := dbgen.Template(s.T(), db, database.Template{})
|
||||
ws := dbgen.Workspace(s.T(), db, database.Workspace{
|
||||
TemplateID: tpl.ID,
|
||||
})
|
||||
build := dbgen.WorkspaceBuild(s.T(), db, database.WorkspaceBuild{WorkspaceID: ws.ID, JobID: uuid.New()})
|
||||
res := dbgen.WorkspaceResource(s.T(), db, database.WorkspaceResource{JobID: build.JobID})
|
||||
agt := dbgen.WorkspaceAgent(s.T(), db, database.WorkspaceAgent{ResourceID: res.ID})
|
||||
|
@ -1120,7 +1138,10 @@ func (s *MethodTestSuite) TestWorkspace() {
|
|||
}).Asserts(ws, rbac.ActionRead).Returns([]database.WorkspaceAgentLog{})
|
||||
}))
|
||||
s.Run("GetWorkspaceAppByAgentIDAndSlug", s.Subtest(func(db database.Store, check *expects) {
|
||||
ws := dbgen.Workspace(s.T(), db, database.Workspace{})
|
||||
tpl := dbgen.Template(s.T(), db, database.Template{})
|
||||
ws := dbgen.Workspace(s.T(), db, database.Workspace{
|
||||
TemplateID: tpl.ID,
|
||||
})
|
||||
build := dbgen.WorkspaceBuild(s.T(), db, database.WorkspaceBuild{WorkspaceID: ws.ID, JobID: uuid.New()})
|
||||
res := dbgen.WorkspaceResource(s.T(), db, database.WorkspaceResource{JobID: build.JobID})
|
||||
agt := dbgen.WorkspaceAgent(s.T(), db, database.WorkspaceAgent{ResourceID: res.ID})
|
||||
|
@ -1132,7 +1153,10 @@ func (s *MethodTestSuite) TestWorkspace() {
|
|||
}).Asserts(ws, rbac.ActionRead).Returns(app)
|
||||
}))
|
||||
s.Run("GetWorkspaceAppsByAgentID", s.Subtest(func(db database.Store, check *expects) {
|
||||
ws := dbgen.Workspace(s.T(), db, database.Workspace{})
|
||||
tpl := dbgen.Template(s.T(), db, database.Template{})
|
||||
ws := dbgen.Workspace(s.T(), db, database.Workspace{
|
||||
TemplateID: tpl.ID,
|
||||
})
|
||||
build := dbgen.WorkspaceBuild(s.T(), db, database.WorkspaceBuild{WorkspaceID: ws.ID, JobID: uuid.New()})
|
||||
res := dbgen.WorkspaceResource(s.T(), db, database.WorkspaceResource{JobID: build.JobID})
|
||||
agt := dbgen.WorkspaceAgent(s.T(), db, database.WorkspaceAgent{ResourceID: res.ID})
|
||||
|
@ -1173,11 +1197,17 @@ func (s *MethodTestSuite) TestWorkspace() {
|
|||
check.Args(database.GetWorkspaceBuildsByWorkspaceIDParams{WorkspaceID: ws.ID}).Asserts(ws, rbac.ActionRead) // ordering
|
||||
}))
|
||||
s.Run("GetWorkspaceByAgentID", s.Subtest(func(db database.Store, check *expects) {
|
||||
ws := dbgen.Workspace(s.T(), db, database.Workspace{})
|
||||
tpl := dbgen.Template(s.T(), db, database.Template{})
|
||||
ws := dbgen.Workspace(s.T(), db, database.Workspace{
|
||||
TemplateID: tpl.ID,
|
||||
})
|
||||
build := dbgen.WorkspaceBuild(s.T(), db, database.WorkspaceBuild{WorkspaceID: ws.ID, JobID: uuid.New()})
|
||||
res := dbgen.WorkspaceResource(s.T(), db, database.WorkspaceResource{JobID: build.JobID})
|
||||
agt := dbgen.WorkspaceAgent(s.T(), db, database.WorkspaceAgent{ResourceID: res.ID})
|
||||
check.Args(agt.ID).Asserts(ws, rbac.ActionRead).Returns(ws)
|
||||
check.Args(agt.ID).Asserts(ws, rbac.ActionRead).Returns(database.GetWorkspaceByAgentIDRow{
|
||||
Workspace: ws,
|
||||
TemplateName: tpl.Name,
|
||||
})
|
||||
}))
|
||||
s.Run("GetWorkspaceByOwnerIDAndName", s.Subtest(func(db database.Store, check *expects) {
|
||||
ws := dbgen.Workspace(s.T(), db, database.Workspace{})
|
||||
|
|
|
@ -4293,11 +4293,24 @@ func (q *FakeQuerier) GetWorkspaceBuildsCreatedAfter(_ context.Context, after ti
|
|||
return workspaceBuilds, nil
|
||||
}
|
||||
|
||||
func (q *FakeQuerier) GetWorkspaceByAgentID(ctx context.Context, agentID uuid.UUID) (database.Workspace, error) {
|
||||
func (q *FakeQuerier) GetWorkspaceByAgentID(ctx context.Context, agentID uuid.UUID) (database.GetWorkspaceByAgentIDRow, error) {
|
||||
q.mutex.RLock()
|
||||
defer q.mutex.RUnlock()
|
||||
|
||||
return q.getWorkspaceByAgentIDNoLock(ctx, agentID)
|
||||
w, err := q.getWorkspaceByAgentIDNoLock(ctx, agentID)
|
||||
if err != nil {
|
||||
return database.GetWorkspaceByAgentIDRow{}, err
|
||||
}
|
||||
|
||||
tpl, err := q.getTemplateByIDNoLock(ctx, w.TemplateID)
|
||||
if err != nil {
|
||||
return database.GetWorkspaceByAgentIDRow{}, err
|
||||
}
|
||||
|
||||
return database.GetWorkspaceByAgentIDRow{
|
||||
Workspace: w,
|
||||
TemplateName: tpl.Name,
|
||||
}, nil
|
||||
}
|
||||
|
||||
func (q *FakeQuerier) GetWorkspaceByID(ctx context.Context, id uuid.UUID) (database.Workspace, error) {
|
||||
|
|
|
@ -1124,7 +1124,7 @@ func (m metricsStore) GetWorkspaceBuildsCreatedAfter(ctx context.Context, create
|
|||
return builds, err
|
||||
}
|
||||
|
||||
func (m metricsStore) GetWorkspaceByAgentID(ctx context.Context, agentID uuid.UUID) (database.Workspace, error) {
|
||||
func (m metricsStore) GetWorkspaceByAgentID(ctx context.Context, agentID uuid.UUID) (database.GetWorkspaceByAgentIDRow, error) {
|
||||
start := time.Now()
|
||||
workspace, err := m.s.GetWorkspaceByAgentID(ctx, agentID)
|
||||
m.queryLatencies.WithLabelValues("GetWorkspaceByAgentID").Observe(time.Since(start).Seconds())
|
||||
|
|
|
@ -2344,10 +2344,10 @@ func (mr *MockStoreMockRecorder) GetWorkspaceBuildsCreatedAfter(arg0, arg1 inter
|
|||
}
|
||||
|
||||
// GetWorkspaceByAgentID mocks base method.
|
||||
func (m *MockStore) GetWorkspaceByAgentID(arg0 context.Context, arg1 uuid.UUID) (database.Workspace, error) {
|
||||
func (m *MockStore) GetWorkspaceByAgentID(arg0 context.Context, arg1 uuid.UUID) (database.GetWorkspaceByAgentIDRow, error) {
|
||||
m.ctrl.T.Helper()
|
||||
ret := m.ctrl.Call(m, "GetWorkspaceByAgentID", arg0, arg1)
|
||||
ret0, _ := ret[0].(database.Workspace)
|
||||
ret0, _ := ret[0].(database.GetWorkspaceByAgentIDRow)
|
||||
ret1, _ := ret[1].(error)
|
||||
return ret0, ret1
|
||||
}
|
||||
|
|
|
@ -148,6 +148,10 @@ func (g Group) RBACObject() rbac.Object {
|
|||
InOrg(g.OrganizationID)
|
||||
}
|
||||
|
||||
func (w GetWorkspaceByAgentIDRow) RBACObject() rbac.Object {
|
||||
return w.Workspace.RBACObject()
|
||||
}
|
||||
|
||||
func (w Workspace) RBACObject() rbac.Object {
|
||||
return rbac.ResourceWorkspace.WithID(w.ID).
|
||||
InOrg(w.OrganizationID).
|
||||
|
|
|
@ -231,7 +231,7 @@ type sqlcQuerier interface {
|
|||
GetWorkspaceBuildParameters(ctx context.Context, workspaceBuildID uuid.UUID) ([]WorkspaceBuildParameter, error)
|
||||
GetWorkspaceBuildsByWorkspaceID(ctx context.Context, arg GetWorkspaceBuildsByWorkspaceIDParams) ([]WorkspaceBuild, error)
|
||||
GetWorkspaceBuildsCreatedAfter(ctx context.Context, createdAt time.Time) ([]WorkspaceBuild, error)
|
||||
GetWorkspaceByAgentID(ctx context.Context, agentID uuid.UUID) (Workspace, error)
|
||||
GetWorkspaceByAgentID(ctx context.Context, agentID uuid.UUID) (GetWorkspaceByAgentIDRow, error)
|
||||
GetWorkspaceByID(ctx context.Context, id uuid.UUID) (Workspace, error)
|
||||
GetWorkspaceByOwnerIDAndName(ctx context.Context, arg GetWorkspaceByOwnerIDAndNameParams) (Workspace, error)
|
||||
GetWorkspaceByWorkspaceAppID(ctx context.Context, workspaceAppID uuid.UUID) (Workspace, error)
|
||||
|
|
|
@ -10539,9 +10539,12 @@ func (q *sqlQuerier) GetDeploymentWorkspaceStats(ctx context.Context) (GetDeploy
|
|||
|
||||
const getWorkspaceByAgentID = `-- name: GetWorkspaceByAgentID :one
|
||||
SELECT
|
||||
id, created_at, updated_at, owner_id, organization_id, template_id, deleted, name, autostart_schedule, ttl, last_used_at, dormant_at, deleting_at, automatic_updates
|
||||
workspaces.id, workspaces.created_at, workspaces.updated_at, workspaces.owner_id, workspaces.organization_id, workspaces.template_id, workspaces.deleted, workspaces.name, workspaces.autostart_schedule, workspaces.ttl, workspaces.last_used_at, workspaces.dormant_at, workspaces.deleting_at, workspaces.automatic_updates,
|
||||
templates.name as template_name
|
||||
FROM
|
||||
workspaces
|
||||
INNER JOIN
|
||||
templates ON workspaces.template_id = templates.id
|
||||
WHERE
|
||||
workspaces.id = (
|
||||
SELECT
|
||||
|
@ -10567,24 +10570,30 @@ WHERE
|
|||
)
|
||||
`
|
||||
|
||||
func (q *sqlQuerier) GetWorkspaceByAgentID(ctx context.Context, agentID uuid.UUID) (Workspace, error) {
|
||||
type GetWorkspaceByAgentIDRow struct {
|
||||
Workspace Workspace `db:"workspace" json:"workspace"`
|
||||
TemplateName string `db:"template_name" json:"template_name"`
|
||||
}
|
||||
|
||||
func (q *sqlQuerier) GetWorkspaceByAgentID(ctx context.Context, agentID uuid.UUID) (GetWorkspaceByAgentIDRow, error) {
|
||||
row := q.db.QueryRowContext(ctx, getWorkspaceByAgentID, agentID)
|
||||
var i Workspace
|
||||
var i GetWorkspaceByAgentIDRow
|
||||
err := row.Scan(
|
||||
&i.ID,
|
||||
&i.CreatedAt,
|
||||
&i.UpdatedAt,
|
||||
&i.OwnerID,
|
||||
&i.OrganizationID,
|
||||
&i.TemplateID,
|
||||
&i.Deleted,
|
||||
&i.Name,
|
||||
&i.AutostartSchedule,
|
||||
&i.Ttl,
|
||||
&i.LastUsedAt,
|
||||
&i.DormantAt,
|
||||
&i.DeletingAt,
|
||||
&i.AutomaticUpdates,
|
||||
&i.Workspace.ID,
|
||||
&i.Workspace.CreatedAt,
|
||||
&i.Workspace.UpdatedAt,
|
||||
&i.Workspace.OwnerID,
|
||||
&i.Workspace.OrganizationID,
|
||||
&i.Workspace.TemplateID,
|
||||
&i.Workspace.Deleted,
|
||||
&i.Workspace.Name,
|
||||
&i.Workspace.AutostartSchedule,
|
||||
&i.Workspace.Ttl,
|
||||
&i.Workspace.LastUsedAt,
|
||||
&i.Workspace.DormantAt,
|
||||
&i.Workspace.DeletingAt,
|
||||
&i.Workspace.AutomaticUpdates,
|
||||
&i.TemplateName,
|
||||
)
|
||||
return i, err
|
||||
}
|
||||
|
|
|
@ -46,9 +46,12 @@ WHERE
|
|||
|
||||
-- name: GetWorkspaceByAgentID :one
|
||||
SELECT
|
||||
*
|
||||
sqlc.embed(workspaces),
|
||||
templates.name as template_name
|
||||
FROM
|
||||
workspaces
|
||||
INNER JOIN
|
||||
templates ON workspaces.template_id = templates.id
|
||||
WHERE
|
||||
workspaces.id = (
|
||||
SELECT
|
||||
|
|
|
@ -47,6 +47,7 @@ type updateRequest struct {
|
|||
username string
|
||||
workspaceName string
|
||||
agentName string
|
||||
templateName string
|
||||
|
||||
metrics []agentsdk.AgentMetric
|
||||
|
||||
|
@ -59,6 +60,7 @@ type annotatedMetric struct {
|
|||
username string
|
||||
workspaceName string
|
||||
agentName string
|
||||
templateName string
|
||||
|
||||
expiryDate time.Time
|
||||
}
|
||||
|
@ -74,7 +76,7 @@ func (am *annotatedMetric) asPrometheus() (prometheus.Metric, error) {
|
|||
labelValues := make([]string, 0, len(agentMetricsLabels)+len(am.Labels))
|
||||
|
||||
labels = append(labels, agentMetricsLabels...)
|
||||
labelValues = append(labelValues, am.username, am.workspaceName, am.agentName)
|
||||
labelValues = append(labelValues, am.username, am.workspaceName, am.agentName, am.templateName)
|
||||
|
||||
for _, l := range am.Labels {
|
||||
labels = append(labels, l.Name)
|
||||
|
@ -160,6 +162,7 @@ func (ma *MetricsAggregator) Run(ctx context.Context) func() {
|
|||
username: req.username,
|
||||
workspaceName: req.workspaceName,
|
||||
agentName: req.agentName,
|
||||
templateName: req.templateName,
|
||||
|
||||
AgentMetric: m,
|
||||
|
||||
|
@ -227,7 +230,16 @@ func (ma *MetricsAggregator) Run(ctx context.Context) func() {
|
|||
func (*MetricsAggregator) Describe(_ chan<- *prometheus.Desc) {
|
||||
}
|
||||
|
||||
var agentMetricsLabels = []string{usernameLabel, workspaceNameLabel, agentNameLabel}
|
||||
var agentMetricsLabels = []string{usernameLabel, workspaceNameLabel, agentNameLabel, templateNameLabel}
|
||||
|
||||
// AgentMetricLabels are the labels used to decorate an agent's metrics.
|
||||
// This list should match the list of labels in agentMetricsLabels.
|
||||
type AgentMetricLabels struct {
|
||||
Username string
|
||||
WorkspaceName string
|
||||
AgentName string
|
||||
TemplateName string
|
||||
}
|
||||
|
||||
func (ma *MetricsAggregator) Collect(ch chan<- prometheus.Metric) {
|
||||
output := make(chan []prometheus.Metric, 1)
|
||||
|
@ -246,12 +258,13 @@ func (ma *MetricsAggregator) Collect(ch chan<- prometheus.Metric) {
|
|||
}
|
||||
}
|
||||
|
||||
func (ma *MetricsAggregator) Update(ctx context.Context, username, workspaceName, agentName string, metrics []agentsdk.AgentMetric) {
|
||||
func (ma *MetricsAggregator) Update(ctx context.Context, labels AgentMetricLabels, metrics []agentsdk.AgentMetric) {
|
||||
select {
|
||||
case ma.updateCh <- updateRequest{
|
||||
username: username,
|
||||
workspaceName: workspaceName,
|
||||
agentName: agentName,
|
||||
username: labels.Username,
|
||||
workspaceName: labels.WorkspaceName,
|
||||
agentName: labels.AgentName,
|
||||
templateName: labels.TemplateName,
|
||||
metrics: metrics,
|
||||
|
||||
timestamp: time.Now(),
|
||||
|
|
|
@ -2,6 +2,7 @@ package prometheusmetrics_test
|
|||
|
||||
import (
|
||||
"context"
|
||||
"sort"
|
||||
"sync/atomic"
|
||||
"testing"
|
||||
"time"
|
||||
|
@ -12,6 +13,7 @@ import (
|
|||
"github.com/stretchr/testify/require"
|
||||
|
||||
"cdr.dev/slog/sloggers/slogtest"
|
||||
|
||||
"github.com/coder/coder/v2/coderd/prometheusmetrics"
|
||||
"github.com/coder/coder/v2/codersdk/agentsdk"
|
||||
"github.com/coder/coder/v2/cryptorand"
|
||||
|
@ -22,8 +24,16 @@ const (
|
|||
testWorkspaceName = "yogi-workspace"
|
||||
testUsername = "yogi-bear"
|
||||
testAgentName = "main-agent"
|
||||
testTemplateName = "main-template"
|
||||
)
|
||||
|
||||
var testLabels = prometheusmetrics.AgentMetricLabels{
|
||||
Username: testUsername,
|
||||
WorkspaceName: testWorkspaceName,
|
||||
AgentName: testAgentName,
|
||||
TemplateName: testTemplateName,
|
||||
}
|
||||
|
||||
func TestUpdateMetrics_MetricsDoNotExpire(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
|
@ -58,6 +68,7 @@ func TestUpdateMetrics_MetricsDoNotExpire(t *testing.T) {
|
|||
{Name: "agent_name", Value: testAgentName},
|
||||
{Name: "username", Value: testUsername},
|
||||
{Name: "workspace_name", Value: testWorkspaceName},
|
||||
{Name: "template_name", Value: testTemplateName},
|
||||
}
|
||||
expected := []agentsdk.AgentMetric{
|
||||
{Name: "a_counter_one", Type: agentsdk.AgentMetricTypeCounter, Value: 1, Labels: commonLabels},
|
||||
|
@ -69,13 +80,14 @@ func TestUpdateMetrics_MetricsDoNotExpire(t *testing.T) {
|
|||
{Name: "hello", Value: "world"},
|
||||
{Name: "username", Value: testUsername},
|
||||
{Name: "workspace_name", Value: testWorkspaceName},
|
||||
{Name: "template_name", Value: testTemplateName},
|
||||
}},
|
||||
{Name: "d_gauge_four", Type: agentsdk.AgentMetricTypeGauge, Value: 6, Labels: commonLabels},
|
||||
}
|
||||
|
||||
// when
|
||||
metricsAggregator.Update(ctx, testUsername, testWorkspaceName, testAgentName, given1)
|
||||
metricsAggregator.Update(ctx, testUsername, testWorkspaceName, testAgentName, given2)
|
||||
metricsAggregator.Update(ctx, testLabels, given1)
|
||||
metricsAggregator.Update(ctx, testLabels, given2)
|
||||
|
||||
// then
|
||||
require.Eventually(t, func() bool {
|
||||
|
@ -119,6 +131,10 @@ func verifyCollectedMetrics(t *testing.T, expected []agentsdk.AgentMetric, actua
|
|||
}
|
||||
|
||||
dtoLabels := asMetricAgentLabels(d.GetLabel())
|
||||
// dto labels are sorted in alphabetical order.
|
||||
sort.Slice(e.Labels, func(i, j int) bool {
|
||||
return e.Labels[i].Name < e.Labels[j].Name
|
||||
})
|
||||
require.Equal(t, e.Labels, dtoLabels, d.String())
|
||||
}
|
||||
return true
|
||||
|
@ -154,7 +170,7 @@ func TestUpdateMetrics_MetricsExpire(t *testing.T) {
|
|||
}
|
||||
|
||||
// when
|
||||
metricsAggregator.Update(ctx, testUsername, testWorkspaceName, testAgentName, given)
|
||||
metricsAggregator.Update(ctx, testLabels, given)
|
||||
|
||||
time.Sleep(time.Millisecond * 10) // Ensure that metric is expired
|
||||
|
||||
|
@ -220,7 +236,7 @@ func Benchmark_MetricsAggregator_Run(b *testing.B) {
|
|||
b.Logf("N=%d sending %d metrics", b.N, numMetrics)
|
||||
var nGot atomic.Int64
|
||||
b.StartTimer()
|
||||
metricsAggregator.Update(ctx, testUsername, testWorkspaceName, testAgentName, metrics)
|
||||
metricsAggregator.Update(ctx, testLabels, metrics)
|
||||
for i := 0; i < numMetrics; i++ {
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
|
|
|
@ -17,6 +17,7 @@ import (
|
|||
"tailscale.com/tailcfg"
|
||||
|
||||
"cdr.dev/slog"
|
||||
|
||||
"github.com/coder/coder/v2/coderd/database"
|
||||
"github.com/coder/coder/v2/coderd/database/dbauthz"
|
||||
"github.com/coder/coder/v2/coderd/database/dbtime"
|
||||
|
@ -24,6 +25,7 @@ import (
|
|||
)
|
||||
|
||||
const (
|
||||
templateNameLabel = "template_name"
|
||||
agentNameLabel = "agent_name"
|
||||
usernameLabel = "username"
|
||||
workspaceNameLabel = "workspace_name"
|
||||
|
@ -154,7 +156,7 @@ func Agents(ctx context.Context, logger slog.Logger, registerer prometheus.Regis
|
|||
Subsystem: "agents",
|
||||
Name: "up",
|
||||
Help: "The number of active agents per workspace.",
|
||||
}, []string{usernameLabel, workspaceNameLabel, "template_name", "template_version"}))
|
||||
}, []string{usernameLabel, workspaceNameLabel, templateNameLabel, "template_version"}))
|
||||
err := registerer.Register(agentsGauge)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
|
|
|
@ -37,6 +37,7 @@ import (
|
|||
"github.com/coder/coder/v2/coderd/externalauth"
|
||||
"github.com/coder/coder/v2/coderd/httpapi"
|
||||
"github.com/coder/coder/v2/coderd/httpmw"
|
||||
"github.com/coder/coder/v2/coderd/prometheusmetrics"
|
||||
"github.com/coder/coder/v2/coderd/rbac"
|
||||
"github.com/coder/coder/v2/coderd/util/ptr"
|
||||
"github.com/coder/coder/v2/codersdk"
|
||||
|
@ -572,7 +573,7 @@ func (api *API) workspaceAgentLogs(rw http.ResponseWriter, r *http.Request) {
|
|||
return
|
||||
}
|
||||
|
||||
workspace, err := api.Database.GetWorkspaceByAgentID(ctx, workspaceAgent.ID)
|
||||
row, err := api.Database.GetWorkspaceByAgentID(ctx, workspaceAgent.ID)
|
||||
if err != nil {
|
||||
httpapi.Write(ctx, rw, http.StatusInternalServerError, codersdk.Response{
|
||||
Message: "Internal error fetching workspace by agent id.",
|
||||
|
@ -580,6 +581,7 @@ func (api *API) workspaceAgentLogs(rw http.ResponseWriter, r *http.Request) {
|
|||
})
|
||||
return
|
||||
}
|
||||
workspace := row.Workspace
|
||||
|
||||
api.WebsocketWaitMutex.Lock()
|
||||
api.WebsocketWaitGroup.Add(1)
|
||||
|
@ -1648,7 +1650,7 @@ func (api *API) workspaceAgentReportStats(rw http.ResponseWriter, r *http.Reques
|
|||
ctx := r.Context()
|
||||
|
||||
workspaceAgent := httpmw.WorkspaceAgent(r)
|
||||
workspace, err := api.Database.GetWorkspaceByAgentID(ctx, workspaceAgent.ID)
|
||||
row, err := api.Database.GetWorkspaceByAgentID(ctx, workspaceAgent.ID)
|
||||
if err != nil {
|
||||
httpapi.Write(ctx, rw, http.StatusBadRequest, codersdk.Response{
|
||||
Message: "Failed to get workspace.",
|
||||
|
@ -1656,6 +1658,7 @@ func (api *API) workspaceAgentReportStats(rw http.ResponseWriter, r *http.Reques
|
|||
})
|
||||
return
|
||||
}
|
||||
workspace := row.Workspace
|
||||
|
||||
var req agentsdk.Stats
|
||||
if !httpapi.Read(ctx, rw, r, &req) {
|
||||
|
@ -1681,7 +1684,7 @@ func (api *API) workspaceAgentReportStats(rw http.ResponseWriter, r *http.Reques
|
|||
var nextAutostart time.Time
|
||||
if workspace.AutostartSchedule.String != "" {
|
||||
templateSchedule, err := (*(api.TemplateScheduleStore.Load())).Get(ctx, api.Database, workspace.TemplateID)
|
||||
// If the template schedule fails to load, just default to bumping without the next trasition and log it.
|
||||
// If the template schedule fails to load, just default to bumping without the next transition and log it.
|
||||
if err != nil {
|
||||
api.Logger.Warn(ctx, "failed to load template schedule bumping activity, defaulting to bumping by 60min",
|
||||
slog.F("workspace_id", workspace.ID),
|
||||
|
@ -1727,7 +1730,12 @@ func (api *API) workspaceAgentReportStats(rw http.ResponseWriter, r *http.Reques
|
|||
return xerrors.Errorf("can't get user: %w", err)
|
||||
}
|
||||
|
||||
api.Options.UpdateAgentMetrics(ctx, user.Username, workspace.Name, workspaceAgent.Name, req.Metrics)
|
||||
api.Options.UpdateAgentMetrics(ctx, prometheusmetrics.AgentMetricLabels{
|
||||
Username: user.Username,
|
||||
WorkspaceName: workspace.Name,
|
||||
AgentName: workspaceAgent.Name,
|
||||
TemplateName: row.TemplateName,
|
||||
}, req.Metrics)
|
||||
return nil
|
||||
})
|
||||
}
|
||||
|
@ -2103,7 +2111,7 @@ func (api *API) workspaceAgentReportLifecycle(rw http.ResponseWriter, r *http.Re
|
|||
ctx := r.Context()
|
||||
|
||||
workspaceAgent := httpmw.WorkspaceAgent(r)
|
||||
workspace, err := api.Database.GetWorkspaceByAgentID(ctx, workspaceAgent.ID)
|
||||
row, err := api.Database.GetWorkspaceByAgentID(ctx, workspaceAgent.ID)
|
||||
if err != nil {
|
||||
httpapi.Write(ctx, rw, http.StatusBadRequest, codersdk.Response{
|
||||
Message: "Failed to get workspace.",
|
||||
|
@ -2111,6 +2119,7 @@ func (api *API) workspaceAgentReportLifecycle(rw http.ResponseWriter, r *http.Re
|
|||
})
|
||||
return
|
||||
}
|
||||
workspace := row.Workspace
|
||||
|
||||
var req agentsdk.PostLifecycleRequest
|
||||
if !httpapi.Read(ctx, rw, r, &req) {
|
||||
|
|
|
@ -78,72 +78,74 @@ spec:
|
|||
|
||||
<!-- Code generated by 'make docs/admin/prometheus.md'. DO NOT EDIT -->
|
||||
|
||||
| Name | Type | Description | Labels |
|
||||
| ----------------------------------------------------- | --------- | ------------------------------------------------------------------ | ----------------------------------------------------------------------------------- |
|
||||
| `coderd_agents_apps` | gauge | Agent applications with statuses. | `agent_name` `app_name` `health` `username` `workspace_name` |
|
||||
| `coderd_agents_connection_latencies_seconds` | gauge | Agent connection latencies in seconds. | `agent_name` `derp_region` `preferred` `username` `workspace_name` |
|
||||
| `coderd_agents_connections` | gauge | Agent connections with statuses. | `agent_name` `lifecycle_state` `status` `tailnet_node` `username` `workspace_name` |
|
||||
| `coderd_agents_up` | gauge | The number of active agents per workspace. | `username` `workspace_name` |
|
||||
| `coderd_agentstats_connection_count` | gauge | The number of established connections by agent | `agent_name` `username` `workspace_name` |
|
||||
| `coderd_agentstats_connection_median_latency_seconds` | gauge | The median agent connection latency | `agent_name` `username` `workspace_name` |
|
||||
| `coderd_agentstats_rx_bytes` | gauge | Agent Rx bytes | `agent_name` `username` `workspace_name` |
|
||||
| `coderd_agentstats_session_count_jetbrains` | gauge | The number of session established by JetBrains | `agent_name` `username` `workspace_name` |
|
||||
| `coderd_agentstats_session_count_reconnecting_pty` | gauge | The number of session established by reconnecting PTY | `agent_name` `username` `workspace_name` |
|
||||
| `coderd_agentstats_session_count_ssh` | gauge | The number of session established by SSH | `agent_name` `username` `workspace_name` |
|
||||
| `coderd_agentstats_session_count_vscode` | gauge | The number of session established by VSCode | `agent_name` `username` `workspace_name` |
|
||||
| `coderd_agentstats_tx_bytes` | gauge | Agent Tx bytes | `agent_name` `username` `workspace_name` |
|
||||
| `coderd_api_active_users_duration_hour` | gauge | The number of users that have been active within the last hour. | |
|
||||
| `coderd_api_concurrent_requests` | gauge | The number of concurrent API requests. | |
|
||||
| `coderd_api_concurrent_websockets` | gauge | The total number of concurrent API websockets. | |
|
||||
| `coderd_api_request_latencies_seconds` | histogram | Latency distribution of requests in seconds. | `method` `path` |
|
||||
| `coderd_api_requests_processed_total` | counter | The total number of processed API requests | `code` `method` `path` |
|
||||
| `coderd_api_websocket_durations_seconds` | histogram | Websocket duration distribution of requests in seconds. | `path` |
|
||||
| `coderd_api_workspace_latest_build_total` | gauge | The latest workspace builds with a status. | `status` |
|
||||
| `coderd_insights_applications_usage_seconds` | gauge | The application usage per template. | `application_name` `slug` `template_name` |
|
||||
| `coderd_insights_parameters` | gauge | The parameter usage per template. | `parameter_name` `parameter_type` `parameter_value` `template_name` |
|
||||
| `coderd_insights_templates_active_users` | gauge | The number of active users of the template. | `template_name` |
|
||||
| `coderd_license_active_users` | gauge | The number of active users. | |
|
||||
| `coderd_license_limit_users` | gauge | The user seats limit based on the active Coder license. | |
|
||||
| `coderd_license_user_limit_enabled` | gauge | Returns 1 if the current license enforces the user limit. | |
|
||||
| `coderd_metrics_collector_agents_execution_seconds` | histogram | Histogram for duration of agents metrics collection in seconds. | |
|
||||
| `coderd_provisionerd_job_timings_seconds` | histogram | The provisioner job time duration in seconds. | `provisioner` `status` |
|
||||
| `coderd_provisionerd_jobs_current` | gauge | The number of currently running provisioner jobs. | `provisioner` |
|
||||
| `coderd_workspace_builds_total` | counter | The number of workspaces started, updated, or deleted. | `action` `owner_email` `status` `template_name` `template_version` `workspace_name` |
|
||||
| `go_gc_duration_seconds` | summary | A summary of the pause duration of garbage collection cycles. | |
|
||||
| `go_goroutines` | gauge | Number of goroutines that currently exist. | |
|
||||
| `go_info` | gauge | Information about the Go environment. | `version` |
|
||||
| `go_memstats_alloc_bytes` | gauge | Number of bytes allocated and still in use. | |
|
||||
| `go_memstats_alloc_bytes_total` | counter | Total number of bytes allocated, even if freed. | |
|
||||
| `go_memstats_buck_hash_sys_bytes` | gauge | Number of bytes used by the profiling bucket hash table. | |
|
||||
| `go_memstats_frees_total` | counter | Total number of frees. | |
|
||||
| `go_memstats_gc_sys_bytes` | gauge | Number of bytes used for garbage collection system metadata. | |
|
||||
| `go_memstats_heap_alloc_bytes` | gauge | Number of heap bytes allocated and still in use. | |
|
||||
| `go_memstats_heap_idle_bytes` | gauge | Number of heap bytes waiting to be used. | |
|
||||
| `go_memstats_heap_inuse_bytes` | gauge | Number of heap bytes that are in use. | |
|
||||
| `go_memstats_heap_objects` | gauge | Number of allocated objects. | |
|
||||
| `go_memstats_heap_released_bytes` | gauge | Number of heap bytes released to OS. | |
|
||||
| `go_memstats_heap_sys_bytes` | gauge | Number of heap bytes obtained from system. | |
|
||||
| `go_memstats_last_gc_time_seconds` | gauge | Number of seconds since 1970 of last garbage collection. | |
|
||||
| `go_memstats_lookups_total` | counter | Total number of pointer lookups. | |
|
||||
| `go_memstats_mallocs_total` | counter | Total number of mallocs. | |
|
||||
| `go_memstats_mcache_inuse_bytes` | gauge | Number of bytes in use by mcache structures. | |
|
||||
| `go_memstats_mcache_sys_bytes` | gauge | Number of bytes used for mcache structures obtained from system. | |
|
||||
| `go_memstats_mspan_inuse_bytes` | gauge | Number of bytes in use by mspan structures. | |
|
||||
| `go_memstats_mspan_sys_bytes` | gauge | Number of bytes used for mspan structures obtained from system. | |
|
||||
| `go_memstats_next_gc_bytes` | gauge | Number of heap bytes when next garbage collection will take place. | |
|
||||
| `go_memstats_other_sys_bytes` | gauge | Number of bytes used for other system allocations. | |
|
||||
| `go_memstats_stack_inuse_bytes` | gauge | Number of bytes in use by the stack allocator. | |
|
||||
| `go_memstats_stack_sys_bytes` | gauge | Number of bytes obtained from system for stack allocator. | |
|
||||
| `go_memstats_sys_bytes` | gauge | Number of bytes obtained from system. | |
|
||||
| `go_threads` | gauge | Number of OS threads created. | |
|
||||
| `process_cpu_seconds_total` | counter | Total user and system CPU time spent in seconds. | |
|
||||
| `process_max_fds` | gauge | Maximum number of open file descriptors. | |
|
||||
| `process_open_fds` | gauge | Number of open file descriptors. | |
|
||||
| `process_resident_memory_bytes` | gauge | Resident memory size in bytes. | |
|
||||
| `process_start_time_seconds` | gauge | Start time of the process since unix epoch in seconds. | |
|
||||
| `process_virtual_memory_bytes` | gauge | Virtual memory size in bytes. | |
|
||||
| `process_virtual_memory_max_bytes` | gauge | Maximum amount of virtual memory available in bytes. | |
|
||||
| `promhttp_metric_handler_requests_in_flight` | gauge | Current number of scrapes being served. | |
|
||||
| `promhttp_metric_handler_requests_total` | counter | Total number of scrapes by HTTP status code. | `code` |
|
||||
| Name | Type | Description | Labels |
|
||||
| ----------------------------------------------------- | --------- | ------------------------------------------------------------------------------------- | ----------------------------------------------------------------------------------- |
|
||||
| `agent_scripts_executed_total` | counter | Total number of scripts executed by the Coder agent. Includes cron scheduled scripts. | `agent_name` `success` `template_name` `username` `workspace_name` |
|
||||
| `coderd_agents_apps` | gauge | Agent applications with statuses. | `agent_name` `app_name` `health` `username` `workspace_name` |
|
||||
| `coderd_agents_connection_latencies_seconds` | gauge | Agent connection latencies in seconds. | `agent_name` `derp_region` `preferred` `username` `workspace_name` |
|
||||
| `coderd_agents_connections` | gauge | Agent connections with statuses. | `agent_name` `lifecycle_state` `status` `tailnet_node` `username` `workspace_name` |
|
||||
| `coderd_agents_up` | gauge | The number of active agents per workspace. | `template_name` `username` `workspace_name` |
|
||||
| `coderd_agentstats_connection_count` | gauge | The number of established connections by agent | `agent_name` `username` `workspace_name` |
|
||||
| `coderd_agentstats_connection_median_latency_seconds` | gauge | The median agent connection latency | `agent_name` `username` `workspace_name` |
|
||||
| `coderd_agentstats_rx_bytes` | gauge | Agent Rx bytes | `agent_name` `username` `workspace_name` |
|
||||
| `coderd_agentstats_session_count_jetbrains` | gauge | The number of session established by JetBrains | `agent_name` `username` `workspace_name` |
|
||||
| `coderd_agentstats_session_count_reconnecting_pty` | gauge | The number of session established by reconnecting PTY | `agent_name` `username` `workspace_name` |
|
||||
| `coderd_agentstats_session_count_ssh` | gauge | The number of session established by SSH | `agent_name` `username` `workspace_name` |
|
||||
| `coderd_agentstats_session_count_vscode` | gauge | The number of session established by VSCode | `agent_name` `username` `workspace_name` |
|
||||
| `coderd_agentstats_startup_script_seconds` | gauge | The number of seconds the startup script took to execute. | `agent_name` `success` `template_name` `username` `workspace_name` |
|
||||
| `coderd_agentstats_tx_bytes` | gauge | Agent Tx bytes | `agent_name` `username` `workspace_name` |
|
||||
| `coderd_api_active_users_duration_hour` | gauge | The number of users that have been active within the last hour. | |
|
||||
| `coderd_api_concurrent_requests` | gauge | The number of concurrent API requests. | |
|
||||
| `coderd_api_concurrent_websockets` | gauge | The total number of concurrent API websockets. | |
|
||||
| `coderd_api_request_latencies_seconds` | histogram | Latency distribution of requests in seconds. | `method` `path` |
|
||||
| `coderd_api_requests_processed_total` | counter | The total number of processed API requests | `code` `method` `path` |
|
||||
| `coderd_api_websocket_durations_seconds` | histogram | Websocket duration distribution of requests in seconds. | `path` |
|
||||
| `coderd_api_workspace_latest_build_total` | gauge | The latest workspace builds with a status. | `status` |
|
||||
| `coderd_insights_applications_usage_seconds` | gauge | The application usage per template. | `application_name` `slug` `template_name` |
|
||||
| `coderd_insights_parameters` | gauge | The parameter usage per template. | `parameter_name` `parameter_type` `parameter_value` `template_name` |
|
||||
| `coderd_insights_templates_active_users` | gauge | The number of active users of the template. | `template_name` |
|
||||
| `coderd_license_active_users` | gauge | The number of active users. | |
|
||||
| `coderd_license_limit_users` | gauge | The user seats limit based on the active Coder license. | |
|
||||
| `coderd_license_user_limit_enabled` | gauge | Returns 1 if the current license enforces the user limit. | |
|
||||
| `coderd_metrics_collector_agents_execution_seconds` | histogram | Histogram for duration of agents metrics collection in seconds. | |
|
||||
| `coderd_provisionerd_job_timings_seconds` | histogram | The provisioner job time duration in seconds. | `provisioner` `status` |
|
||||
| `coderd_provisionerd_jobs_current` | gauge | The number of currently running provisioner jobs. | `provisioner` |
|
||||
| `coderd_workspace_builds_total` | counter | The number of workspaces started, updated, or deleted. | `action` `owner_email` `status` `template_name` `template_version` `workspace_name` |
|
||||
| `go_gc_duration_seconds` | summary | A summary of the pause duration of garbage collection cycles. | |
|
||||
| `go_goroutines` | gauge | Number of goroutines that currently exist. | |
|
||||
| `go_info` | gauge | Information about the Go environment. | `version` |
|
||||
| `go_memstats_alloc_bytes` | gauge | Number of bytes allocated and still in use. | |
|
||||
| `go_memstats_alloc_bytes_total` | counter | Total number of bytes allocated, even if freed. | |
|
||||
| `go_memstats_buck_hash_sys_bytes` | gauge | Number of bytes used by the profiling bucket hash table. | |
|
||||
| `go_memstats_frees_total` | counter | Total number of frees. | |
|
||||
| `go_memstats_gc_sys_bytes` | gauge | Number of bytes used for garbage collection system metadata. | |
|
||||
| `go_memstats_heap_alloc_bytes` | gauge | Number of heap bytes allocated and still in use. | |
|
||||
| `go_memstats_heap_idle_bytes` | gauge | Number of heap bytes waiting to be used. | |
|
||||
| `go_memstats_heap_inuse_bytes` | gauge | Number of heap bytes that are in use. | |
|
||||
| `go_memstats_heap_objects` | gauge | Number of allocated objects. | |
|
||||
| `go_memstats_heap_released_bytes` | gauge | Number of heap bytes released to OS. | |
|
||||
| `go_memstats_heap_sys_bytes` | gauge | Number of heap bytes obtained from system. | |
|
||||
| `go_memstats_last_gc_time_seconds` | gauge | Number of seconds since 1970 of last garbage collection. | |
|
||||
| `go_memstats_lookups_total` | counter | Total number of pointer lookups. | |
|
||||
| `go_memstats_mallocs_total` | counter | Total number of mallocs. | |
|
||||
| `go_memstats_mcache_inuse_bytes` | gauge | Number of bytes in use by mcache structures. | |
|
||||
| `go_memstats_mcache_sys_bytes` | gauge | Number of bytes used for mcache structures obtained from system. | |
|
||||
| `go_memstats_mspan_inuse_bytes` | gauge | Number of bytes in use by mspan structures. | |
|
||||
| `go_memstats_mspan_sys_bytes` | gauge | Number of bytes used for mspan structures obtained from system. | |
|
||||
| `go_memstats_next_gc_bytes` | gauge | Number of heap bytes when next garbage collection will take place. | |
|
||||
| `go_memstats_other_sys_bytes` | gauge | Number of bytes used for other system allocations. | |
|
||||
| `go_memstats_stack_inuse_bytes` | gauge | Number of bytes in use by the stack allocator. | |
|
||||
| `go_memstats_stack_sys_bytes` | gauge | Number of bytes obtained from system for stack allocator. | |
|
||||
| `go_memstats_sys_bytes` | gauge | Number of bytes obtained from system. | |
|
||||
| `go_threads` | gauge | Number of OS threads created. | |
|
||||
| `process_cpu_seconds_total` | counter | Total user and system CPU time spent in seconds. | |
|
||||
| `process_max_fds` | gauge | Maximum number of open file descriptors. | |
|
||||
| `process_open_fds` | gauge | Number of open file descriptors. | |
|
||||
| `process_resident_memory_bytes` | gauge | Resident memory size in bytes. | |
|
||||
| `process_start_time_seconds` | gauge | Start time of the process since unix epoch in seconds. | |
|
||||
| `process_virtual_memory_bytes` | gauge | Virtual memory size in bytes. | |
|
||||
| `process_virtual_memory_max_bytes` | gauge | Maximum amount of virtual memory available in bytes. | |
|
||||
| `promhttp_metric_handler_requests_in_flight` | gauge | Current number of scrapes being served. | |
|
||||
| `promhttp_metric_handler_requests_total` | counter | Total number of scrapes by HTTP status code. | `code` |
|
||||
|
||||
<!-- End generated by 'make docs/admin/prometheus.md'. -->
|
||||
|
|
|
@ -15,9 +15,15 @@ coderd_agents_connections{agent_name="main",lifecycle_state="start_timeout",stat
|
|||
coderd_agents_connections{agent_name="main",lifecycle_state="start_timeout",status="connected",tailnet_node="nodeid:3779bd45d00be0eb",username="admin",workspace_name="workspace-1"} 1
|
||||
# HELP coderd_agents_up The number of active agents per workspace.
|
||||
# TYPE coderd_agents_up gauge
|
||||
coderd_agents_up{username="admin",workspace_name="workspace-1"} 1
|
||||
coderd_agents_up{username="admin",workspace_name="workspace-2"} 1
|
||||
coderd_agents_up{username="admin",workspace_name="workspace-3"} 1
|
||||
coderd_agents_up{template_name="docker", username="admin",workspace_name="workspace-1"} 1
|
||||
coderd_agents_up{template_name="docker", username="admin",workspace_name="workspace-2"} 1
|
||||
coderd_agents_up{template_name="gcp", username="admin",workspace_name="workspace-3"} 1
|
||||
# HELP coderd_agentstats_startup_script_seconds The number of seconds the startup script took to execute.
|
||||
# TYPE coderd_agentstats_startup_script_seconds gauge
|
||||
coderd_agentstats_startup_script_seconds{agent_name="main",success="true",template_name="docker",username="admin",workspace_name="workspace-1"} 1.969900304
|
||||
# HELP agent_scripts_executed_total Total number of scripts executed by the Coder agent. Includes cron scheduled scripts.
|
||||
# TYPE agent_scripts_executed_total counter
|
||||
agent_scripts_executed_total{agent_name="main",success="true",template_name="docker",username="admin",workspace_name="workspace-1"} 1
|
||||
# HELP coderd_agentstats_connection_count The number of established connections by agent
|
||||
# TYPE coderd_agentstats_connection_count gauge
|
||||
coderd_agentstats_connection_count{agent_name="main",username="admin",workspace_name="workspace1"} 2
|
||||
|
|
Loading…
Reference in New Issue