chore: ensure logs consistency across Coder (#8083)

This commit is contained in:
Marcin Tojek 2023-06-20 12:30:45 +02:00 committed by GitHub
parent d6f8bd7847
commit b1d1b63113
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
19 changed files with 132 additions and 57 deletions

View File

@ -1017,7 +1017,7 @@ func (a *agent) handleReconnectingPTY(ctx context.Context, logger slog.Logger, m
defer a.connCountReconnectingPTY.Add(-1)
connectionID := uuid.NewString()
logger = logger.With(slog.F("id", msg.ID), slog.F("connection_id", connectionID))
logger = logger.With(slog.F("message_id", msg.ID), slog.F("connection_id", connectionID))
logger.Debug(ctx, "starting handler")
defer func() {
@ -1029,9 +1029,9 @@ func (a *agent) handleReconnectingPTY(ctx context.Context, logger slog.Logger, m
// If the agent is closed, we don't want to
// log this as an error since it's expected.
if closed {
logger.Debug(ctx, "session error after agent close", slog.Error(err))
logger.Debug(ctx, "reconnecting PTY failed with session error (agent closed)", slog.Error(err))
} else {
logger.Error(ctx, "session error", slog.Error(err))
logger.Error(ctx, "reconnecting PTY failed with session error", slog.Error(err))
}
}
logger.Debug(ctx, "session closed")
@ -1107,9 +1107,9 @@ func (a *agent) handleReconnectingPTY(ctx context.Context, logger slog.Logger, m
// When the PTY is closed, this is triggered.
// Error is typically a benign EOF, so only log for debugging.
if errors.Is(err, io.EOF) {
logger.Debug(ctx, "unable to read pty output, command exited?", slog.Error(err))
logger.Debug(ctx, "unable to read pty output, command might have exited", slog.Error(err))
} else {
logger.Warn(ctx, "unable to read pty output, command exited?", slog.Error(err))
logger.Warn(ctx, "unable to read pty output, command might have exited", slog.Error(err))
a.metrics.reconnectingPTYErrors.WithLabelValues("output_reader").Add(1)
}
break
@ -1154,7 +1154,7 @@ func (a *agent) handleReconnectingPTY(ctx context.Context, logger slog.Logger, m
err := rpty.ptty.Resize(msg.Height, msg.Width)
if err != nil {
// We can continue after this, it's not fatal!
logger.Error(ctx, "resize", slog.Error(err))
logger.Error(ctx, "reconnecting PTY initial resize failed, but will continue", slog.Error(err))
a.metrics.reconnectingPTYErrors.WithLabelValues("resize").Add(1)
}
// Write any previously stored data for the TTY.
@ -1213,12 +1213,12 @@ func (a *agent) handleReconnectingPTY(ctx context.Context, logger slog.Logger, m
return nil
}
if err != nil {
logger.Warn(ctx, "read conn", slog.Error(err))
logger.Warn(ctx, "reconnecting PTY failed with read error", slog.Error(err))
return nil
}
_, err = rpty.ptty.InputWriter().Write([]byte(req.Data))
if err != nil {
logger.Warn(ctx, "write to pty", slog.Error(err))
logger.Warn(ctx, "reconnecting PTY failed with write error", slog.Error(err))
a.metrics.reconnectingPTYErrors.WithLabelValues("input_writer").Add(1)
return nil
}
@ -1229,7 +1229,7 @@ func (a *agent) handleReconnectingPTY(ctx context.Context, logger slog.Logger, m
err = rpty.ptty.Resize(req.Height, req.Width)
if err != nil {
// We can continue after this, it's not fatal!
logger.Error(ctx, "resize", slog.Error(err))
logger.Error(ctx, "reconnecting PTY resize failed, but will continue", slog.Error(err))
a.metrics.reconnectingPTYErrors.WithLabelValues("resize").Add(1)
}
}

View File

@ -120,8 +120,8 @@ func NewServer(ctx context.Context, logger slog.Logger, prometheusRegistry *prom
LocalPortForwardingCallback: func(ctx ssh.Context, destinationHost string, destinationPort uint32) bool {
// Allow local port forwarding all!
s.logger.Debug(ctx, "local port forward",
slog.F("destination-host", destinationHost),
slog.F("destination-port", destinationPort))
slog.F("destination_host", destinationHost),
slog.F("destination_port", destinationPort))
return true
},
PtyCallback: func(ctx ssh.Context, pty ssh.Pty) bool {
@ -130,8 +130,8 @@ func NewServer(ctx context.Context, logger slog.Logger, prometheusRegistry *prom
ReversePortForwardingCallback: func(ctx ssh.Context, bindHost string, bindPort uint32) bool {
// Allow reverse port forwarding all!
s.logger.Debug(ctx, "local port forward",
slog.F("bind-host", bindHost),
slog.F("bind-port", bindPort))
slog.F("bind_host", bindHost),
slog.F("bind_port", bindPort))
return true
},
RequestHandlers: map[string]ssh.RequestHandler{

View File

@ -126,7 +126,7 @@ func (r *RootCmd) workspaceAgent() *clibase.Cmd {
reaper.WithCatchSignals(InterruptSignals...),
)
if err != nil {
logger.Error(ctx, "failed to reap", slog.Error(err))
logger.Error(ctx, "agent process reaper unable to fork", slog.Error(err))
return xerrors.Errorf("fork reap: %w", err)
}
@ -163,7 +163,7 @@ func (r *RootCmd) workspaceAgent() *clibase.Cmd {
logger := slog.Make(sinks...).Leveled(slog.LevelDebug)
version := buildinfo.Version()
logger.Info(ctx, "starting agent",
logger.Info(ctx, "agent is starting now",
slog.F("url", r.agentURL),
slog.F("auth", auth),
slog.F("version", version),

View File

@ -54,7 +54,7 @@ func TestWorkspaceAgent(t *testing.T) {
clitest.Start(t, inv)
ctx := inv.Context()
pty.ExpectMatchContext(ctx, "starting agent")
pty.ExpectMatchContext(ctx, "agent is starting now")
coderdtest.AwaitWorkspaceAgents(t, client, workspace.ID)
@ -270,7 +270,7 @@ func TestWorkspaceAgent(t *testing.T) {
pty := ptytest.New(t).Attach(inv)
clitest.Start(t, inv)
pty.ExpectMatchContext(inv.Context(), "starting agent")
pty.ExpectMatchContext(inv.Context(), "agent is starting now")
resources := coderdtest.AwaitWorkspaceAgents(t, client, workspace.ID)
require.Len(t, resources, 1)

View File

@ -455,7 +455,7 @@ func watchAndClose(ctx context.Context, closer func() error, logger slog.Logger,
startWatchLoop:
for {
logger.Debug(ctx, "(re)connecting to the coder server to watch workspace events.")
logger.Debug(ctx, "connecting to the coder server to watch workspace events")
var wsWatch <-chan codersdk.Workspace
var err error
for r := retry.New(time.Second, 15*time.Second); r.Wait(ctx); {
@ -464,7 +464,7 @@ startWatchLoop:
break
}
if ctx.Err() != nil {
logger.Info(ctx, "context expired", slog.Error(ctx.Err()))
logger.Debug(ctx, "context expired", slog.Error(ctx.Err()))
return
}
}
@ -472,7 +472,7 @@ startWatchLoop:
for {
select {
case <-ctx.Done():
logger.Info(ctx, "context expired", slog.Error(ctx.Err()))
logger.Debug(ctx, "context expired", slog.Error(ctx.Err()))
return
case w, ok := <-wsWatch:
if !ok {

View File

@ -71,11 +71,11 @@ func (h *HTTPAuthorizer) Authorize(r *http.Request, action rbac.Action, object r
internalError := new(rbac.UnauthorizedError)
logger := h.Logger
if xerrors.As(err, internalError) {
logger = h.Logger.With(slog.F("internal", internalError.Internal()))
logger = h.Logger.With(slog.F("internal_error", internalError.Internal()))
}
// Log information for debugging. This will be very helpful
// in the early days
logger.Warn(r.Context(), "unauthorized",
logger.Warn(r.Context(), "requester is not authorized to access the object",
slog.F("roles", roles.Actor.SafeRoleNames()),
slog.F("actor_id", roles.Actor.ID),
slog.F("actor_name", roles.ActorName),

View File

@ -245,9 +245,9 @@ func New(options *Options) *API {
options.TracerProvider = trace.NewNoopTracerProvider()
}
if options.SetUserGroups == nil {
options.SetUserGroups = func(ctx context.Context, _ database.Store, id uuid.UUID, groups []string) error {
options.SetUserGroups = func(ctx context.Context, _ database.Store, userID uuid.UUID, groups []string) error {
options.Logger.Warn(ctx, "attempted to assign OIDC groups without enterprise license",
slog.F("id", id), slog.F("groups", groups),
slog.F("user_id", userID), slog.F("groups", groups),
)
return nil
}

View File

@ -69,7 +69,7 @@ func logNotAuthorizedError(ctx context.Context, logger slog.Logger, err error) e
return &contextError
}
logger.Debug(ctx, "unauthorized",
slog.F("internal", internalError.Internal()),
slog.F("internal_error", internalError.Internal()),
slog.F("input", internalError.Input()),
slog.Error(err),
)

View File

@ -50,7 +50,7 @@ func ReportCLITelemetry(log slog.Logger, rep telemetry.Reporter) func(http.Handl
log.Error(
r.Context(),
"base64 decode",
slog.F("error", err),
slog.Error(err),
)
return
}

View File

@ -222,7 +222,7 @@ func Agents(ctx context.Context, logger slog.Logger, registerer prometheus.Regis
case <-ticker.C:
}
logger.Debug(ctx, "Agent metrics collection is starting")
logger.Debug(ctx, "agent metrics collection is starting")
timer := prometheus.NewTimer(metricsCollectorAgents)
workspaceRows, err := db.GetWorkspaces(ctx, database.GetWorkspacesParams{
@ -236,7 +236,7 @@ func Agents(ctx context.Context, logger slog.Logger, registerer prometheus.Regis
for _, workspace := range workspaceRows {
user, err := db.GetUserByID(ctx, workspace.OwnerID)
if err != nil {
logger.Error(ctx, "can't get user", slog.F("user_id", workspace.OwnerID), slog.Error(err))
logger.Error(ctx, "can't get user from the database", slog.F("user_id", workspace.OwnerID), slog.Error(err))
agentsGauge.WithLabelValues(VectorOperationAdd, 0, user.Username, workspace.Name)
continue
}
@ -314,7 +314,7 @@ func Agents(ctx context.Context, logger slog.Logger, registerer prometheus.Regis
agentsAppsGauge.Commit()
done:
logger.Debug(ctx, "Agent metrics collection is done")
logger.Debug(ctx, "agent metrics collection is done")
timer.ObserveDuration()
ticker.Reset(duration)
}
@ -447,7 +447,7 @@ func AgentStats(ctx context.Context, logger slog.Logger, registerer prometheus.R
case <-ticker.C:
}
logger.Debug(ctx, "Agent metrics collection is starting")
logger.Debug(ctx, "agent metrics collection is starting")
timer := prometheus.NewTimer(metricsCollectorAgentStats)
checkpoint := time.Now()
@ -482,7 +482,7 @@ func AgentStats(ctx context.Context, logger slog.Logger, registerer prometheus.R
}
}
logger.Debug(ctx, "Agent metrics collection is done", slog.F("len", len(stats)))
logger.Debug(ctx, "agent metrics collection is done", slog.F("len", len(stats)))
timer.ObserveDuration()
createdAfter = checkpoint

View File

@ -108,7 +108,7 @@ func (server *Server) AcquireJob(ctx context.Context, _ *proto.Empty) (*proto.Ac
if err != nil {
return nil, xerrors.Errorf("acquire job: %w", err)
}
server.Logger.Debug(ctx, "locked job from database", slog.F("id", job.ID))
server.Logger.Debug(ctx, "locked job from database", slog.F("job_id", job.ID))
// Marks the acquired job as failed with the error message provided.
failJob := func(errorMessage string) error {

View File

@ -56,7 +56,7 @@ func (api *API) provisionerJobLogs(rw http.ResponseWriter, r *http.Request, job
}
if !follow {
fetchAndWriteLogs(ctx, logger, api.Database, job.ID, after, rw)
fetchAndWriteLogs(ctx, api.Database, job.ID, after, rw)
return
}
@ -222,7 +222,7 @@ func convertProvisionerJob(provisionerJob database.ProvisionerJob) codersdk.Prov
return job
}
func fetchAndWriteLogs(ctx context.Context, logger slog.Logger, db database.Store, jobID uuid.UUID, after int64, rw http.ResponseWriter) {
func fetchAndWriteLogs(ctx context.Context, db database.Store, jobID uuid.UUID, after int64, rw http.ResponseWriter) {
logs, err := db.GetProvisionerLogsAfterID(ctx, database.GetProvisionerLogsAfterIDParams{
JobID: jobID,
CreatedAfter: after,
@ -237,8 +237,6 @@ func fetchAndWriteLogs(ctx context.Context, logger slog.Logger, db database.Stor
if logs == nil {
logs = []database.ProvisionerJobLog{}
}
logger.Debug(ctx, "Finished non-follow job logs")
httpapi.Write(ctx, rw, http.StatusOK, convertProvisionerJobLogs(logs))
}
@ -259,7 +257,7 @@ func jobIsComplete(logger slog.Logger, job database.ProvisionerJob) bool {
return false
default:
logger.Error(context.Background(),
"unknown status",
"can't convert the provisioner job status",
slog.F("job_id", job.ID), slog.F("status", status))
return false
}

View File

@ -464,7 +464,6 @@ func (api *API) workspaceAgentStartupLogs(rw http.ResponseWriter, r *http.Reques
}
if !follow {
logger.Debug(ctx, "Finished non-follow job logs")
httpapi.Write(ctx, rw, http.StatusOK, convertWorkspaceAgentStartupLogs(logs))
return
}
@ -967,7 +966,7 @@ func (api *API) workspaceAgentCoordinate(rw http.ResponseWriter, r *http.Request
if !xerrors.Is(err, context.Canceled) && !database.IsQueryCanceledError(err) {
api.Logger.Error(ctx, "failed to update agent disconnect time",
slog.Error(err),
slog.F("workspace", build.WorkspaceID),
slog.F("workspace_id", build.WorkspaceID),
)
}
}
@ -1256,8 +1255,8 @@ func (api *API) workspaceAgentReportStats(rw http.ResponseWriter, r *http.Reques
api.Logger.Debug(ctx, "read stats report",
slog.F("interval", api.AgentStatsRefreshInterval),
slog.F("agent", workspaceAgent.ID),
slog.F("workspace", workspace.ID),
slog.F("workspace_agent_id", workspaceAgent.ID),
slog.F("workspace_id", workspace.ID),
slog.F("payload", req),
)
@ -1267,7 +1266,7 @@ func (api *API) workspaceAgentReportStats(rw http.ResponseWriter, r *http.Reques
payload, err := json.Marshal(req.ConnectionsByProto)
if err != nil {
api.Logger.Error(ctx, "marshal agent connections by proto", slog.F("workspace_agent", workspaceAgent.ID), slog.Error(err))
api.Logger.Error(ctx, "marshal agent connections by proto", slog.F("workspace_agent_id", workspaceAgent.ID), slog.Error(err))
payload = json.RawMessage("{}")
}
@ -1399,8 +1398,8 @@ func (api *API) workspaceAgentPostMetadata(rw http.ResponseWriter, r *http.Reque
api.Logger.Debug(
ctx, "accepted metadata report",
slog.F("agent", workspaceAgent.ID),
slog.F("workspace", workspace.ID),
slog.F("workspace_agent_id", workspaceAgent.ID),
slog.F("workspace_id", workspace.ID),
slog.F("collected_at", datum.CollectedAt),
slog.F("key", datum.Key),
)
@ -1577,8 +1576,8 @@ func (api *API) workspaceAgentReportLifecycle(rw http.ResponseWriter, r *http.Re
}
logger := api.Logger.With(
slog.F("agent", workspaceAgent.ID),
slog.F("workspace", workspace.ID),
slog.F("workspace_agent_id", workspaceAgent.ID),
slog.F("workspace_id", workspace.ID),
slog.F("payload", req),
)
logger.Debug(ctx, "workspace agent state report")

View File

@ -51,12 +51,12 @@ func (m *Mesh) SetAddresses(addresses []string, connect bool) {
for _, address := range addresses {
addressURL, err := url.Parse(address)
if err != nil {
m.logger.Error(m.ctx, "invalid address", slog.F("address", err), slog.Error(err))
m.logger.Error(m.ctx, "invalid address", slog.F("address", address), slog.Error(err))
continue
}
derpURL, err := addressURL.Parse("/derp")
if err != nil {
m.logger.Error(m.ctx, "parse derp", slog.F("address", err), slog.Error(err))
m.logger.Error(m.ctx, "parse derp", slog.F("address", addressURL.String()), slog.Error(err))
continue
}
address = derpURL.String()

View File

@ -1009,7 +1009,7 @@ func TestProvisionerd(t *testing.T) {
acquireJob: func(ctx context.Context, _ *proto.Empty) (*proto.AcquiredJob, error) {
m.Lock()
defer m.Unlock()
logger.Info(ctx, "AcquiredJob called.")
logger.Info(ctx, "provisioner stage: AcquiredJob")
if len(ops) > 0 {
return &proto.AcquiredJob{}, nil
}
@ -1031,7 +1031,7 @@ func TestProvisionerd(t *testing.T) {
updateJob: func(ctx context.Context, update *proto.UpdateJobRequest) (*proto.UpdateJobResponse, error) {
m.Lock()
defer m.Unlock()
logger.Info(ctx, "UpdateJob called.")
logger.Info(ctx, "provisioner stage: UpdateJob")
ops = append(ops, "UpdateJob")
for _, log := range update.Logs {
ops = append(ops, fmt.Sprintf("Log: %s | %s", log.Stage, log.Output))
@ -1041,7 +1041,7 @@ func TestProvisionerd(t *testing.T) {
completeJob: func(ctx context.Context, job *proto.CompletedJob) (*proto.Empty, error) {
m.Lock()
defer m.Unlock()
logger.Info(ctx, "CompleteJob called.")
logger.Info(ctx, "provisioner stage: CompleteJob")
ops = append(ops, "CompleteJob")
completeOnce.Do(func() { close(completeChan) })
return &proto.Empty{}, nil
@ -1049,7 +1049,7 @@ func TestProvisionerd(t *testing.T) {
failJob: func(ctx context.Context, job *proto.FailedJob) (*proto.Empty, error) {
m.Lock()
defer m.Unlock()
logger.Info(ctx, "FailJob called.")
logger.Info(ctx, "provisioner stage: FailJob")
ops = append(ops, "FailJob")
return &proto.Empty{}, nil
},

View File

@ -68,9 +68,9 @@ func (r *Runner) Run(ctx context.Context, _ string, logs io.Writer) error {
bytesPerTick = r.cfg.BytesPerTick
)
logger.Info(ctx, "config",
logger.Debug(ctx, "config",
slog.F("agent_id", agentID),
slog.F("reconnect", reconnect),
slog.F("reconnecting_pty_id", reconnect),
slog.F("height", height),
slog.F("width", width),
slog.F("tick_interval", tickInterval),

View File

@ -289,3 +289,81 @@ func notImplementsFullResponseWriter(ctx *dsl.VarFilterContext) bool {
!(types.Implements(p, flusher) || types.Implements(ctx.Type, flusher)) ||
!(types.Implements(p, hijacker) || types.Implements(ctx.Type, hijacker))
}
// slogFieldNameSnakeCase is a lint rule that ensures naming consistency
// of logged field names.
func slogFieldNameSnakeCase(m dsl.Matcher) {
m.Import("cdr.dev/slog")
m.Match(
`slog.F($name, $value)`,
).
Where(m["name"].Const && !m["name"].Text.Matches(`^"[a-z]+(_[a-z]+)*"$`)).
Report("Field name $name must be snake_case.")
}
// slogUUIDFieldNameHasIDSuffix ensures that "uuid.UUID" field has ID prefix
// in the field name.
func slogUUIDFieldNameHasIDSuffix(m dsl.Matcher) {
m.Import("cdr.dev/slog")
m.Import("github.com/google/uuid")
m.Match(
`slog.F($name, $value)`,
).
Where(m["value"].Type.Is("uuid.UUID") && !m["name"].Text.Matches(`_id"$`)).
Report(`uuid.UUID field $name must have "_id" suffix.`)
}
// slogMessageFormat ensures that the log message starts with lowercase, and does not
// end with special character.
func slogMessageFormat(m dsl.Matcher) {
m.Import("cdr.dev/slog")
m.Match(
`logger.Error($ctx, $message, $*args)`,
`Logger.Error($ctx, $message, $*args)`,
`logger.Warn($ctx, $message, $*args)`,
`Logger.Warn($ctx, $message, $*args)`,
`logger.Info($ctx, $message, $*args)`,
`Logger.Info($ctx, $message, $*args)`,
`logger.Debug($ctx, $message, $*args)`,
`Logger.Debug($ctx, $message, $*args)`,
).
Where(
(
// It doesn't end with a special character:
m["message"].Text.Matches(`[.!?]"$`) ||
// it starts with lowercase:
m["message"].Text.Matches(`^"[A-Z]{1}`) &&
// but there are exceptions:
!m["message"].Text.Matches(`^"Prometheus`))).
Report(`Message $message must start with lowercase, and does not end with a special characters.`)
}
// slogMessageLength ensures that important log messages are meaningful, and must be at least 16 characters long.
func slogMessageLength(m dsl.Matcher) {
m.Import("cdr.dev/slog")
m.Match(
`logger.Error($ctx, $message, $*args)`,
`Logger.Error($ctx, $message, $*args)`,
`logger.Warn($ctx, $message, $*args)`,
`Logger.Warn($ctx, $message, $*args)`,
`logger.Info($ctx, $message, $*args)`,
`Logger.Info($ctx, $message, $*args)`,
// no debug
).
Where(
// It has at least 16 characters (+ ""):
m["message"].Text.Matches(`^".{0,15}"$`) &&
// but there are exceptions:
!m["message"].Text.Matches(`^"command exit"$`)).
Report(`Message $message is too short, it must be at least 16 characters long.`)
}
// slogErr ensures that errors are logged with "slog.Error" instead of "slog.F"
func slogError(m dsl.Matcher) {
m.Import("cdr.dev/slog")
m.Match(
`slog.F($name, $value)`,
).
Where(m["name"].Const && m["value"].Type.Is("error") && !m["name"].Text.Matches(`^"internal_error"$`)).
Report(`Error should be logged using "slog.Error" instead.`)
}

View File

@ -231,7 +231,7 @@ func NewConn(options *Options) (conn *Conn, err error) {
}
}()
wireguardEngine.SetStatusCallback(func(s *wgengine.Status, err error) {
server.logger.Debug(context.Background(), "wireguard status", slog.F("status", s), slog.F("err", err))
server.logger.Debug(context.Background(), "wireguard status", slog.F("status", s), slog.Error(err))
if err != nil {
return
}

View File

@ -332,7 +332,7 @@ func (c *coordinator) ServeClient(conn net.Conn, id uuid.UUID, agent uuid.UUID)
for {
err := c.handleNextClientMessage(id, agent, decoder)
if err != nil {
logger.Debug(ctx, "unable to read client update; closed conn?", slog.Error(err))
logger.Debug(ctx, "unable to read client update, connection may be closed", slog.Error(err))
if errors.Is(err, io.EOF) || errors.Is(err, io.ErrClosedPipe) || errors.Is(err, context.Canceled) {
return nil
}
@ -469,7 +469,7 @@ func (c *coordinator) ServeAgent(conn net.Conn, id uuid.UUID, name string) error
for {
err := c.handleNextAgentMessage(id, decoder)
if err != nil {
logger.Debug(ctx, "unable to read agent update; closed conn?", slog.Error(err))
logger.Debug(ctx, "unable to read agent update, connection may be closed", slog.Error(err))
if errors.Is(err, io.EOF) || errors.Is(err, io.ErrClosedPipe) || errors.Is(err, context.Canceled) {
return nil
}