mirror of https://github.com/coder/coder.git
feat(support): add client magicsock and agent prometheus metrics to support bundle (#12604)
* feat(codersdk): add ability to fetch prometheus metrics directly from agent * feat(support): add client magicsock and agent prometheus metrics to support bundle * refactor(support): simplify AgentInfo control flow Co-authored-by: Mathias Fredriksson <mafredri@gmail.com>
This commit is contained in:
parent
4d9e6c0134
commit
b0c4e7504c
|
@ -25,6 +25,7 @@ import (
|
|||
"github.com/go-chi/chi/v5"
|
||||
"github.com/google/uuid"
|
||||
"github.com/prometheus/client_golang/prometheus"
|
||||
"github.com/prometheus/common/expfmt"
|
||||
"github.com/spf13/afero"
|
||||
"go.uber.org/atomic"
|
||||
"golang.org/x/exp/slices"
|
||||
|
@ -34,6 +35,7 @@ import (
|
|||
"tailscale.com/net/speedtest"
|
||||
"tailscale.com/tailcfg"
|
||||
"tailscale.com/types/netlogtype"
|
||||
"tailscale.com/util/clientmetric"
|
||||
|
||||
"cdr.dev/slog"
|
||||
"github.com/coder/retry"
|
||||
|
@ -1980,3 +1982,26 @@ func (a *apiConnRoutineManager) start(name string, b gracefulShutdownBehavior, f
|
|||
func (a *apiConnRoutineManager) wait() error {
|
||||
return a.eg.Wait()
|
||||
}
|
||||
|
||||
func PrometheusMetricsHandler(prometheusRegistry *prometheus.Registry, logger slog.Logger) http.Handler {
|
||||
return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
w.Header().Set("Content-Type", "text/plain")
|
||||
|
||||
// Based on: https://github.com/tailscale/tailscale/blob/280255acae604796a1113861f5a84e6fa2dc6121/ipn/localapi/localapi.go#L489
|
||||
clientmetric.WritePrometheusExpositionFormat(w)
|
||||
|
||||
metricFamilies, err := prometheusRegistry.Gather()
|
||||
if err != nil {
|
||||
logger.Error(context.Background(), "prometheus handler failed to gather metric families", slog.Error(err))
|
||||
return
|
||||
}
|
||||
|
||||
for _, metricFamily := range metricFamilies {
|
||||
_, err = expfmt.MetricFamilyToText(w, metricFamily)
|
||||
if err != nil {
|
||||
logger.Error(context.Background(), "expfmt.MetricFamilyToText failed", slog.Error(err))
|
||||
return
|
||||
}
|
||||
}
|
||||
})
|
||||
}
|
||||
|
|
|
@ -35,11 +35,13 @@ func (a *agent) apiHandler() http.Handler {
|
|||
ignorePorts: cpy,
|
||||
cacheDuration: cacheDuration,
|
||||
}
|
||||
promHandler := PrometheusMetricsHandler(a.prometheusRegistry, a.logger)
|
||||
r.Get("/api/v0/listening-ports", lp.handler)
|
||||
r.Get("/debug/logs", a.HandleHTTPDebugLogs)
|
||||
r.Get("/debug/magicsock", a.HandleHTTPDebugMagicsock)
|
||||
r.Get("/debug/magicsock/debug-logging/{state}", a.HandleHTTPMagicsockDebugLoggingState)
|
||||
r.Get("/debug/manifest", a.HandleHTTPDebugManifest)
|
||||
r.Get("/debug/prometheus", promHandler.ServeHTTP)
|
||||
|
||||
return r
|
||||
}
|
||||
|
|
28
cli/agent.go
28
cli/agent.go
|
@ -18,10 +18,8 @@ import (
|
|||
"cloud.google.com/go/compute/metadata"
|
||||
"golang.org/x/xerrors"
|
||||
"gopkg.in/natefinch/lumberjack.v2"
|
||||
"tailscale.com/util/clientmetric"
|
||||
|
||||
"github.com/prometheus/client_golang/prometheus"
|
||||
"github.com/prometheus/common/expfmt"
|
||||
|
||||
"cdr.dev/slog"
|
||||
"cdr.dev/slog/sloggers/sloghuman"
|
||||
|
@ -315,7 +313,8 @@ func (r *RootCmd) workspaceAgent() *clibase.Cmd {
|
|||
ModifiedProcesses: nil,
|
||||
})
|
||||
|
||||
prometheusSrvClose := ServeHandler(ctx, logger, prometheusMetricsHandler(prometheusRegistry, logger), prometheusAddress, "prometheus")
|
||||
promHandler := agent.PrometheusMetricsHandler(prometheusRegistry, logger)
|
||||
prometheusSrvClose := ServeHandler(ctx, logger, promHandler, prometheusAddress, "prometheus")
|
||||
defer prometheusSrvClose()
|
||||
|
||||
debugSrvClose := ServeHandler(ctx, logger, agnt.HTTPDebug(), debugAddress, "debug")
|
||||
|
@ -501,26 +500,3 @@ func urlPort(u string) (int, error) {
|
|||
}
|
||||
return -1, xerrors.Errorf("invalid port: %s", u)
|
||||
}
|
||||
|
||||
func prometheusMetricsHandler(prometheusRegistry *prometheus.Registry, logger slog.Logger) http.Handler {
|
||||
return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
w.Header().Set("Content-Type", "text/plain")
|
||||
|
||||
// Based on: https://github.com/tailscale/tailscale/blob/280255acae604796a1113861f5a84e6fa2dc6121/ipn/localapi/localapi.go#L489
|
||||
clientmetric.WritePrometheusExpositionFormat(w)
|
||||
|
||||
metricFamilies, err := prometheusRegistry.Gather()
|
||||
if err != nil {
|
||||
logger.Error(context.Background(), "Prometheus handler can't gather metric families", slog.Error(err))
|
||||
return
|
||||
}
|
||||
|
||||
for _, metricFamily := range metricFamilies {
|
||||
_, err = expfmt.MetricFamilyToText(w, metricFamily)
|
||||
if err != nil {
|
||||
logger.Error(context.Background(), "expfmt.MetricFamilyToText failed", slog.Error(err))
|
||||
return
|
||||
}
|
||||
}
|
||||
})
|
||||
}
|
||||
|
|
|
@ -176,8 +176,10 @@ func writeBundle(src *support.Bundle, dest *zip.Writer) error {
|
|||
"network/tailnet_debug.html": src.Network.TailnetDebug,
|
||||
"workspace/build_logs.txt": humanizeBuildLogs(src.Workspace.BuildLogs),
|
||||
"agent/logs.txt": string(src.Agent.Logs),
|
||||
"agent/magicsock.html": string(src.Agent.MagicsockHTML),
|
||||
"agent/agent_magicsock.html": string(src.Agent.AgentMagicsockHTML),
|
||||
"agent/client_magicsock.html": string(src.Agent.ClientMagicsockHTML),
|
||||
"agent/startup_logs.txt": humanizeAgentLogs(src.Agent.StartupLogs),
|
||||
"agent/prometheus.txt": string(src.Agent.Prometheus),
|
||||
"workspace/template_file.zip": string(templateVersionBytes),
|
||||
"logs.txt": strings.Join(src.Logs, "\n"),
|
||||
} {
|
||||
|
|
|
@ -177,9 +177,12 @@ func assertBundleContents(t *testing.T, path string) {
|
|||
case "agent/logs.txt":
|
||||
bs := readBytesFromZip(t, f)
|
||||
require.NotEmpty(t, bs, "logs should not be empty")
|
||||
case "agent/magicsock.html":
|
||||
case "agent/agent_magicsock.html":
|
||||
bs := readBytesFromZip(t, f)
|
||||
require.NotEmpty(t, bs, "agent magicsock should not be empty")
|
||||
case "agent/client_magicsock.html":
|
||||
bs := readBytesFromZip(t, f)
|
||||
require.NotEmpty(t, bs, "client magicsock should not be empty")
|
||||
case "agent/manifest.json":
|
||||
var v agentsdk.Manifest
|
||||
decodeJSONFromZip(t, f, &v)
|
||||
|
@ -192,6 +195,9 @@ func assertBundleContents(t *testing.T, path string) {
|
|||
var v *ipnstate.PingResult
|
||||
decodeJSONFromZip(t, f, &v)
|
||||
require.NotEmpty(t, v, "ping result should not be empty")
|
||||
case "agent/prometheus.txt":
|
||||
bs := readBytesFromZip(t, f)
|
||||
require.NotEmpty(t, bs, "agent prometheus metrics should not be empty")
|
||||
case "agent/startup_logs.txt":
|
||||
bs := readBytesFromZip(t, f)
|
||||
require.Contains(t, string(bs), "started up")
|
||||
|
|
|
@ -364,6 +364,9 @@ func (c *WorkspaceAgentConn) DebugMagicsock(ctx context.Context) ([]byte, error)
|
|||
if err != nil {
|
||||
return nil, xerrors.Errorf("do request: %w", err)
|
||||
}
|
||||
if res.StatusCode != http.StatusOK {
|
||||
return nil, ReadBodyAsError(res)
|
||||
}
|
||||
defer res.Body.Close()
|
||||
bs, err := io.ReadAll(res.Body)
|
||||
if err != nil {
|
||||
|
@ -382,6 +385,9 @@ func (c *WorkspaceAgentConn) DebugManifest(ctx context.Context) ([]byte, error)
|
|||
return nil, xerrors.Errorf("do request: %w", err)
|
||||
}
|
||||
defer res.Body.Close()
|
||||
if res.StatusCode != http.StatusOK {
|
||||
return nil, ReadBodyAsError(res)
|
||||
}
|
||||
bs, err := io.ReadAll(res.Body)
|
||||
if err != nil {
|
||||
return nil, xerrors.Errorf("read response body: %w", err)
|
||||
|
@ -398,6 +404,28 @@ func (c *WorkspaceAgentConn) DebugLogs(ctx context.Context) ([]byte, error) {
|
|||
return nil, xerrors.Errorf("do request: %w", err)
|
||||
}
|
||||
defer res.Body.Close()
|
||||
if res.StatusCode != http.StatusOK {
|
||||
return nil, ReadBodyAsError(res)
|
||||
}
|
||||
bs, err := io.ReadAll(res.Body)
|
||||
if err != nil {
|
||||
return nil, xerrors.Errorf("read response body: %w", err)
|
||||
}
|
||||
return bs, nil
|
||||
}
|
||||
|
||||
// PrometheusMetrics returns a response from the agent's prometheus metrics endpoint
|
||||
func (c *WorkspaceAgentConn) PrometheusMetrics(ctx context.Context) ([]byte, error) {
|
||||
ctx, span := tracing.StartSpan(ctx)
|
||||
defer span.End()
|
||||
res, err := c.apiRequest(ctx, http.MethodGet, "/debug/prometheus", nil)
|
||||
if err != nil {
|
||||
return nil, xerrors.Errorf("do request: %w", err)
|
||||
}
|
||||
defer res.Body.Close()
|
||||
if res.StatusCode != http.StatusOK {
|
||||
return nil, ReadBodyAsError(res)
|
||||
}
|
||||
bs, err := io.ReadAll(res.Body)
|
||||
if err != nil {
|
||||
return nil, xerrors.Errorf("read response body: %w", err)
|
||||
|
|
|
@ -7,6 +7,7 @@ import (
|
|||
"encoding/json"
|
||||
"io"
|
||||
"net/http"
|
||||
"net/http/httptest"
|
||||
"strings"
|
||||
|
||||
"golang.org/x/sync/errgroup"
|
||||
|
@ -57,14 +58,16 @@ type Workspace struct {
|
|||
}
|
||||
|
||||
type Agent struct {
|
||||
Agent *codersdk.WorkspaceAgent `json:"agent"`
|
||||
ListeningPorts *codersdk.WorkspaceAgentListeningPortsResponse `json:"listening_ports"`
|
||||
Logs []byte `json:"logs"`
|
||||
MagicsockHTML []byte `json:"magicsock_html"`
|
||||
Manifest *agentsdk.Manifest `json:"manifest"`
|
||||
PeerDiagnostics *tailnet.PeerDiagnostics `json:"peer_diagnostics"`
|
||||
PingResult *ipnstate.PingResult `json:"ping_result"`
|
||||
StartupLogs []codersdk.WorkspaceAgentLog `json:"startup_logs"`
|
||||
Agent *codersdk.WorkspaceAgent `json:"agent"`
|
||||
ListeningPorts *codersdk.WorkspaceAgentListeningPortsResponse `json:"listening_ports"`
|
||||
Logs []byte `json:"logs"`
|
||||
ClientMagicsockHTML []byte `json:"client_magicsock_html"`
|
||||
AgentMagicsockHTML []byte `json:"agent_magicsock_html"`
|
||||
Manifest *agentsdk.Manifest `json:"manifest"`
|
||||
PeerDiagnostics *tailnet.PeerDiagnostics `json:"peer_diagnostics"`
|
||||
PingResult *ipnstate.PingResult `json:"ping_result"`
|
||||
Prometheus []byte `json:"prometheus"`
|
||||
StartupLogs []codersdk.WorkspaceAgentLog `json:"startup_logs"`
|
||||
}
|
||||
|
||||
// Deps is a set of dependencies for discovering information
|
||||
|
@ -313,77 +316,10 @@ func AgentInfo(ctx context.Context, client *codersdk.Client, log slog.Logger, ag
|
|||
return nil
|
||||
})
|
||||
|
||||
conn, err := client.DialWorkspaceAgent(ctx, agentID, &codersdk.DialWorkspaceAgentOptions{
|
||||
Logger: log.Named("dial-agent"),
|
||||
BlockEndpoints: false,
|
||||
})
|
||||
if err != nil {
|
||||
log.Error(ctx, "dial agent", slog.Error(err))
|
||||
} else {
|
||||
defer func() {
|
||||
if err := conn.Close(); err != nil {
|
||||
log.Error(ctx, "failed to close agent connection", slog.Error(err))
|
||||
}
|
||||
<-conn.Closed()
|
||||
}()
|
||||
if !conn.AwaitReachable(ctx) {
|
||||
log.Error(ctx, "timed out waiting for agent")
|
||||
} else {
|
||||
eg.Go(func() error {
|
||||
_, _, pingRes, err := conn.Ping(ctx)
|
||||
if err != nil {
|
||||
return xerrors.Errorf("ping agent: %w", err)
|
||||
}
|
||||
a.PingResult = pingRes
|
||||
return nil
|
||||
})
|
||||
|
||||
eg.Go(func() error {
|
||||
pds := conn.GetPeerDiagnostics()
|
||||
a.PeerDiagnostics = &pds
|
||||
return nil
|
||||
})
|
||||
|
||||
eg.Go(func() error {
|
||||
msBytes, err := conn.DebugMagicsock(ctx)
|
||||
if err != nil {
|
||||
return xerrors.Errorf("get agent magicsock page: %w", err)
|
||||
}
|
||||
a.MagicsockHTML = msBytes
|
||||
return nil
|
||||
})
|
||||
|
||||
eg.Go(func() error {
|
||||
manifestRes, err := conn.DebugManifest(ctx)
|
||||
if err != nil {
|
||||
return xerrors.Errorf("fetch manifest: %w", err)
|
||||
}
|
||||
if err := json.NewDecoder(bytes.NewReader(manifestRes)).Decode(&a.Manifest); err != nil {
|
||||
return xerrors.Errorf("decode agent manifest: %w", err)
|
||||
}
|
||||
|
||||
return nil
|
||||
})
|
||||
|
||||
eg.Go(func() error {
|
||||
logBytes, err := conn.DebugLogs(ctx)
|
||||
if err != nil {
|
||||
return xerrors.Errorf("fetch coder agent logs: %w", err)
|
||||
}
|
||||
a.Logs = logBytes
|
||||
return nil
|
||||
})
|
||||
|
||||
eg.Go(func() error {
|
||||
lps, err := conn.ListeningPorts(ctx)
|
||||
if err != nil {
|
||||
return xerrors.Errorf("get listening ports: %w", err)
|
||||
}
|
||||
a.ListeningPorts = &lps
|
||||
return nil
|
||||
})
|
||||
}
|
||||
}
|
||||
// to simplify control flow, fetching information directly from
|
||||
// the agent is handled in a separate function
|
||||
closer := connectedAgentInfo(ctx, client, log, agentID, &eg, &a)
|
||||
defer closer()
|
||||
|
||||
if err := eg.Wait(); err != nil {
|
||||
log.Error(ctx, "fetch agent information", slog.Error(err))
|
||||
|
@ -392,6 +328,108 @@ func AgentInfo(ctx context.Context, client *codersdk.Client, log slog.Logger, ag
|
|||
return a
|
||||
}
|
||||
|
||||
func connectedAgentInfo(ctx context.Context, client *codersdk.Client, log slog.Logger, agentID uuid.UUID, eg *errgroup.Group, a *Agent) (closer func()) {
|
||||
conn, err := client.DialWorkspaceAgent(ctx, agentID, &codersdk.DialWorkspaceAgentOptions{
|
||||
Logger: log.Named("dial-agent"),
|
||||
BlockEndpoints: false,
|
||||
})
|
||||
|
||||
closer = func() {}
|
||||
|
||||
if err != nil {
|
||||
log.Error(ctx, "dial agent", slog.Error(err))
|
||||
return closer
|
||||
}
|
||||
|
||||
if !conn.AwaitReachable(ctx) {
|
||||
log.Error(ctx, "timed out waiting for agent")
|
||||
return closer
|
||||
}
|
||||
|
||||
closer = func() {
|
||||
if err := conn.Close(); err != nil {
|
||||
log.Error(ctx, "failed to close agent connection", slog.Error(err))
|
||||
}
|
||||
<-conn.Closed()
|
||||
}
|
||||
|
||||
eg.Go(func() error {
|
||||
req, err := http.NewRequestWithContext(ctx, http.MethodGet, "http://localhost/", nil)
|
||||
if err != nil {
|
||||
return xerrors.Errorf("create request: %w", err)
|
||||
}
|
||||
rr := httptest.NewRecorder()
|
||||
conn.MagicsockServeHTTPDebug(rr, req)
|
||||
a.ClientMagicsockHTML = rr.Body.Bytes()
|
||||
return nil
|
||||
})
|
||||
|
||||
eg.Go(func() error {
|
||||
promRes, err := conn.PrometheusMetrics(ctx)
|
||||
if err != nil {
|
||||
return xerrors.Errorf("fetch agent prometheus metrics: %w", err)
|
||||
}
|
||||
a.Prometheus = promRes
|
||||
return nil
|
||||
})
|
||||
|
||||
eg.Go(func() error {
|
||||
_, _, pingRes, err := conn.Ping(ctx)
|
||||
if err != nil {
|
||||
return xerrors.Errorf("ping agent: %w", err)
|
||||
}
|
||||
a.PingResult = pingRes
|
||||
return nil
|
||||
})
|
||||
|
||||
eg.Go(func() error {
|
||||
pds := conn.GetPeerDiagnostics()
|
||||
a.PeerDiagnostics = &pds
|
||||
return nil
|
||||
})
|
||||
|
||||
eg.Go(func() error {
|
||||
msBytes, err := conn.DebugMagicsock(ctx)
|
||||
if err != nil {
|
||||
return xerrors.Errorf("get agent magicsock page: %w", err)
|
||||
}
|
||||
a.AgentMagicsockHTML = msBytes
|
||||
return nil
|
||||
})
|
||||
|
||||
eg.Go(func() error {
|
||||
manifestRes, err := conn.DebugManifest(ctx)
|
||||
if err != nil {
|
||||
return xerrors.Errorf("fetch manifest: %w", err)
|
||||
}
|
||||
if err := json.NewDecoder(bytes.NewReader(manifestRes)).Decode(&a.Manifest); err != nil {
|
||||
return xerrors.Errorf("decode agent manifest: %w", err)
|
||||
}
|
||||
|
||||
return nil
|
||||
})
|
||||
|
||||
eg.Go(func() error {
|
||||
logBytes, err := conn.DebugLogs(ctx)
|
||||
if err != nil {
|
||||
return xerrors.Errorf("fetch coder agent logs: %w", err)
|
||||
}
|
||||
a.Logs = logBytes
|
||||
return nil
|
||||
})
|
||||
|
||||
eg.Go(func() error {
|
||||
lps, err := conn.ListeningPorts(ctx)
|
||||
if err != nil {
|
||||
return xerrors.Errorf("get listening ports: %w", err)
|
||||
}
|
||||
a.ListeningPorts = &lps
|
||||
return nil
|
||||
})
|
||||
|
||||
return closer
|
||||
}
|
||||
|
||||
// Run generates a support bundle with the given dependencies.
|
||||
func Run(ctx context.Context, d *Deps) (*Bundle, error) {
|
||||
var b Bundle
|
||||
|
|
|
@ -75,9 +75,11 @@ func TestRun(t *testing.T) {
|
|||
assertNotNilNotEmpty(t, bun.Agent.Agent, "agent should be present")
|
||||
assertNotNilNotEmpty(t, bun.Agent.ListeningPorts, "agent listening ports should be present")
|
||||
assertNotNilNotEmpty(t, bun.Agent.Logs, "agent logs should be present")
|
||||
assertNotNilNotEmpty(t, bun.Agent.MagicsockHTML, "agent magicsock should be present")
|
||||
assertNotNilNotEmpty(t, bun.Agent.AgentMagicsockHTML, "agent magicsock should be present")
|
||||
assertNotNilNotEmpty(t, bun.Agent.ClientMagicsockHTML, "client magicsock should be present")
|
||||
assertNotNilNotEmpty(t, bun.Agent.PeerDiagnostics, "agent peer diagnostics should be present")
|
||||
assertNotNilNotEmpty(t, bun.Agent.PingResult, "agent ping result should be present")
|
||||
assertNotNilNotEmpty(t, bun.Agent.Prometheus, "agent prometheus metrics should be present")
|
||||
assertNotNilNotEmpty(t, bun.Agent.StartupLogs, "agent startup logs should be present")
|
||||
assertNotNilNotEmpty(t, bun.Logs, "bundle logs should be present")
|
||||
})
|
||||
|
|
Loading…
Reference in New Issue