mirror of https://github.com/coder/coder.git
chore: add logging around agent app health reporting (#12071)
This commit is contained in:
parent
c0e169ebf9
commit
ec8e41f516
|
@ -26,7 +26,12 @@ type WorkspaceAppHealthReporter func(ctx context.Context)
|
||||||
|
|
||||||
// NewWorkspaceAppHealthReporter creates a WorkspaceAppHealthReporter that reports app health to coderd.
|
// NewWorkspaceAppHealthReporter creates a WorkspaceAppHealthReporter that reports app health to coderd.
|
||||||
func NewWorkspaceAppHealthReporter(logger slog.Logger, apps []codersdk.WorkspaceApp, postWorkspaceAgentAppHealth PostWorkspaceAgentAppHealth) WorkspaceAppHealthReporter {
|
func NewWorkspaceAppHealthReporter(logger slog.Logger, apps []codersdk.WorkspaceApp, postWorkspaceAgentAppHealth PostWorkspaceAgentAppHealth) WorkspaceAppHealthReporter {
|
||||||
|
logger = logger.Named("apphealth")
|
||||||
|
|
||||||
runHealthcheckLoop := func(ctx context.Context) error {
|
runHealthcheckLoop := func(ctx context.Context) error {
|
||||||
|
ctx, cancel := context.WithCancel(ctx)
|
||||||
|
defer cancel()
|
||||||
|
|
||||||
// no need to run this loop if no apps for this workspace.
|
// no need to run this loop if no apps for this workspace.
|
||||||
if len(apps) == 0 {
|
if len(apps) == 0 {
|
||||||
return nil
|
return nil
|
||||||
|
@ -87,6 +92,7 @@ func NewWorkspaceAppHealthReporter(logger slog.Logger, apps []codersdk.Workspace
|
||||||
return nil
|
return nil
|
||||||
}()
|
}()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
nowUnhealthy := false
|
||||||
mu.Lock()
|
mu.Lock()
|
||||||
if failures[app.ID] < int(app.Healthcheck.Threshold) {
|
if failures[app.ID] < int(app.Healthcheck.Threshold) {
|
||||||
// increment the failure count and keep status the same.
|
// increment the failure count and keep status the same.
|
||||||
|
@ -96,14 +102,21 @@ func NewWorkspaceAppHealthReporter(logger slog.Logger, apps []codersdk.Workspace
|
||||||
// set to unhealthy if we hit the failure threshold.
|
// set to unhealthy if we hit the failure threshold.
|
||||||
// we stop incrementing at the threshold to prevent the failure value from increasing forever.
|
// we stop incrementing at the threshold to prevent the failure value from increasing forever.
|
||||||
health[app.ID] = codersdk.WorkspaceAppHealthUnhealthy
|
health[app.ID] = codersdk.WorkspaceAppHealthUnhealthy
|
||||||
|
nowUnhealthy = true
|
||||||
}
|
}
|
||||||
mu.Unlock()
|
mu.Unlock()
|
||||||
|
logger.Debug(ctx, "error checking app health",
|
||||||
|
slog.F("id", app.ID.String()),
|
||||||
|
slog.F("slug", app.Slug),
|
||||||
|
slog.F("now_unhealthy", nowUnhealthy), slog.Error(err),
|
||||||
|
)
|
||||||
} else {
|
} else {
|
||||||
mu.Lock()
|
mu.Lock()
|
||||||
// we only need one successful health check to be considered healthy.
|
// we only need one successful health check to be considered healthy.
|
||||||
health[app.ID] = codersdk.WorkspaceAppHealthHealthy
|
health[app.ID] = codersdk.WorkspaceAppHealthHealthy
|
||||||
failures[app.ID] = 0
|
failures[app.ID] = 0
|
||||||
mu.Unlock()
|
mu.Unlock()
|
||||||
|
logger.Debug(ctx, "workspace app healthy", slog.F("id", app.ID.String()), slog.F("slug", app.Slug))
|
||||||
}
|
}
|
||||||
|
|
||||||
t.Reset(time.Duration(app.Healthcheck.Interval) * time.Second)
|
t.Reset(time.Duration(app.Healthcheck.Interval) * time.Second)
|
||||||
|
@ -137,7 +150,9 @@ func NewWorkspaceAppHealthReporter(logger slog.Logger, apps []codersdk.Workspace
|
||||||
Healths: lastHealth,
|
Healths: lastHealth,
|
||||||
})
|
})
|
||||||
if err != nil {
|
if err != nil {
|
||||||
logger.Error(ctx, "failed to report workspace app stat", slog.Error(err))
|
logger.Error(ctx, "failed to report workspace app health", slog.Error(err))
|
||||||
|
} else {
|
||||||
|
logger.Debug(ctx, "sent workspace app health", slog.F("health", lastHealth))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -24,6 +24,11 @@ func (a *AppsAPI) BatchUpdateAppHealths(ctx context.Context, req *agentproto.Bat
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|
||||||
|
a.Log.Debug(ctx, "got batch app health update",
|
||||||
|
slog.F("agent_id", workspaceAgent.ID.String()),
|
||||||
|
slog.F("updates", req.Updates),
|
||||||
|
)
|
||||||
|
|
||||||
if len(req.Updates) == 0 {
|
if len(req.Updates) == 0 {
|
||||||
return &agentproto.BatchUpdateAppHealthResponse{}, nil
|
return &agentproto.BatchUpdateAppHealthResponse{}, nil
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue