feat(support): add client magicsock and agent prometheus metrics to support bundle (#12604)

* feat(codersdk): add ability to fetch prometheus metrics directly from agent
* feat(support): add client magicsock and agent prometheus metrics to support bundle
* refactor(support): simplify AgentInfo control flow

Co-authored-by: Mathias Fredriksson <mafredri@gmail.com>
This commit is contained in:
Cian Johnston 2024-03-15 15:33:49 +00:00 committed by GitHub
parent 4d9e6c0134
commit b0c4e7504c
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
8 changed files with 187 additions and 108 deletions

View File

@ -25,6 +25,7 @@ import (
"github.com/go-chi/chi/v5"
"github.com/google/uuid"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/common/expfmt"
"github.com/spf13/afero"
"go.uber.org/atomic"
"golang.org/x/exp/slices"
@ -34,6 +35,7 @@ import (
"tailscale.com/net/speedtest"
"tailscale.com/tailcfg"
"tailscale.com/types/netlogtype"
"tailscale.com/util/clientmetric"
"cdr.dev/slog"
"github.com/coder/retry"
@ -1980,3 +1982,26 @@ func (a *apiConnRoutineManager) start(name string, b gracefulShutdownBehavior, f
func (a *apiConnRoutineManager) wait() error {
return a.eg.Wait()
}
func PrometheusMetricsHandler(prometheusRegistry *prometheus.Registry, logger slog.Logger) http.Handler {
return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
w.Header().Set("Content-Type", "text/plain")
// Based on: https://github.com/tailscale/tailscale/blob/280255acae604796a1113861f5a84e6fa2dc6121/ipn/localapi/localapi.go#L489
clientmetric.WritePrometheusExpositionFormat(w)
metricFamilies, err := prometheusRegistry.Gather()
if err != nil {
logger.Error(context.Background(), "prometheus handler failed to gather metric families", slog.Error(err))
return
}
for _, metricFamily := range metricFamilies {
_, err = expfmt.MetricFamilyToText(w, metricFamily)
if err != nil {
logger.Error(context.Background(), "expfmt.MetricFamilyToText failed", slog.Error(err))
return
}
}
})
}

View File

@ -35,11 +35,13 @@ func (a *agent) apiHandler() http.Handler {
ignorePorts: cpy,
cacheDuration: cacheDuration,
}
promHandler := PrometheusMetricsHandler(a.prometheusRegistry, a.logger)
r.Get("/api/v0/listening-ports", lp.handler)
r.Get("/debug/logs", a.HandleHTTPDebugLogs)
r.Get("/debug/magicsock", a.HandleHTTPDebugMagicsock)
r.Get("/debug/magicsock/debug-logging/{state}", a.HandleHTTPMagicsockDebugLoggingState)
r.Get("/debug/manifest", a.HandleHTTPDebugManifest)
r.Get("/debug/prometheus", promHandler.ServeHTTP)
return r
}

View File

@ -18,10 +18,8 @@ import (
"cloud.google.com/go/compute/metadata"
"golang.org/x/xerrors"
"gopkg.in/natefinch/lumberjack.v2"
"tailscale.com/util/clientmetric"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/common/expfmt"
"cdr.dev/slog"
"cdr.dev/slog/sloggers/sloghuman"
@ -315,7 +313,8 @@ func (r *RootCmd) workspaceAgent() *clibase.Cmd {
ModifiedProcesses: nil,
})
prometheusSrvClose := ServeHandler(ctx, logger, prometheusMetricsHandler(prometheusRegistry, logger), prometheusAddress, "prometheus")
promHandler := agent.PrometheusMetricsHandler(prometheusRegistry, logger)
prometheusSrvClose := ServeHandler(ctx, logger, promHandler, prometheusAddress, "prometheus")
defer prometheusSrvClose()
debugSrvClose := ServeHandler(ctx, logger, agnt.HTTPDebug(), debugAddress, "debug")
@ -501,26 +500,3 @@ func urlPort(u string) (int, error) {
}
return -1, xerrors.Errorf("invalid port: %s", u)
}
func prometheusMetricsHandler(prometheusRegistry *prometheus.Registry, logger slog.Logger) http.Handler {
return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
w.Header().Set("Content-Type", "text/plain")
// Based on: https://github.com/tailscale/tailscale/blob/280255acae604796a1113861f5a84e6fa2dc6121/ipn/localapi/localapi.go#L489
clientmetric.WritePrometheusExpositionFormat(w)
metricFamilies, err := prometheusRegistry.Gather()
if err != nil {
logger.Error(context.Background(), "Prometheus handler can't gather metric families", slog.Error(err))
return
}
for _, metricFamily := range metricFamilies {
_, err = expfmt.MetricFamilyToText(w, metricFamily)
if err != nil {
logger.Error(context.Background(), "expfmt.MetricFamilyToText failed", slog.Error(err))
return
}
}
})
}

View File

@ -176,8 +176,10 @@ func writeBundle(src *support.Bundle, dest *zip.Writer) error {
"network/tailnet_debug.html": src.Network.TailnetDebug,
"workspace/build_logs.txt": humanizeBuildLogs(src.Workspace.BuildLogs),
"agent/logs.txt": string(src.Agent.Logs),
"agent/magicsock.html": string(src.Agent.MagicsockHTML),
"agent/agent_magicsock.html": string(src.Agent.AgentMagicsockHTML),
"agent/client_magicsock.html": string(src.Agent.ClientMagicsockHTML),
"agent/startup_logs.txt": humanizeAgentLogs(src.Agent.StartupLogs),
"agent/prometheus.txt": string(src.Agent.Prometheus),
"workspace/template_file.zip": string(templateVersionBytes),
"logs.txt": strings.Join(src.Logs, "\n"),
} {

View File

@ -177,9 +177,12 @@ func assertBundleContents(t *testing.T, path string) {
case "agent/logs.txt":
bs := readBytesFromZip(t, f)
require.NotEmpty(t, bs, "logs should not be empty")
case "agent/magicsock.html":
case "agent/agent_magicsock.html":
bs := readBytesFromZip(t, f)
require.NotEmpty(t, bs, "agent magicsock should not be empty")
case "agent/client_magicsock.html":
bs := readBytesFromZip(t, f)
require.NotEmpty(t, bs, "client magicsock should not be empty")
case "agent/manifest.json":
var v agentsdk.Manifest
decodeJSONFromZip(t, f, &v)
@ -192,6 +195,9 @@ func assertBundleContents(t *testing.T, path string) {
var v *ipnstate.PingResult
decodeJSONFromZip(t, f, &v)
require.NotEmpty(t, v, "ping result should not be empty")
case "agent/prometheus.txt":
bs := readBytesFromZip(t, f)
require.NotEmpty(t, bs, "agent prometheus metrics should not be empty")
case "agent/startup_logs.txt":
bs := readBytesFromZip(t, f)
require.Contains(t, string(bs), "started up")

View File

@ -364,6 +364,9 @@ func (c *WorkspaceAgentConn) DebugMagicsock(ctx context.Context) ([]byte, error)
if err != nil {
return nil, xerrors.Errorf("do request: %w", err)
}
if res.StatusCode != http.StatusOK {
return nil, ReadBodyAsError(res)
}
defer res.Body.Close()
bs, err := io.ReadAll(res.Body)
if err != nil {
@ -382,6 +385,9 @@ func (c *WorkspaceAgentConn) DebugManifest(ctx context.Context) ([]byte, error)
return nil, xerrors.Errorf("do request: %w", err)
}
defer res.Body.Close()
if res.StatusCode != http.StatusOK {
return nil, ReadBodyAsError(res)
}
bs, err := io.ReadAll(res.Body)
if err != nil {
return nil, xerrors.Errorf("read response body: %w", err)
@ -398,6 +404,28 @@ func (c *WorkspaceAgentConn) DebugLogs(ctx context.Context) ([]byte, error) {
return nil, xerrors.Errorf("do request: %w", err)
}
defer res.Body.Close()
if res.StatusCode != http.StatusOK {
return nil, ReadBodyAsError(res)
}
bs, err := io.ReadAll(res.Body)
if err != nil {
return nil, xerrors.Errorf("read response body: %w", err)
}
return bs, nil
}
// PrometheusMetrics returns a response from the agent's prometheus metrics endpoint
func (c *WorkspaceAgentConn) PrometheusMetrics(ctx context.Context) ([]byte, error) {
ctx, span := tracing.StartSpan(ctx)
defer span.End()
res, err := c.apiRequest(ctx, http.MethodGet, "/debug/prometheus", nil)
if err != nil {
return nil, xerrors.Errorf("do request: %w", err)
}
defer res.Body.Close()
if res.StatusCode != http.StatusOK {
return nil, ReadBodyAsError(res)
}
bs, err := io.ReadAll(res.Body)
if err != nil {
return nil, xerrors.Errorf("read response body: %w", err)

View File

@ -7,6 +7,7 @@ import (
"encoding/json"
"io"
"net/http"
"net/http/httptest"
"strings"
"golang.org/x/sync/errgroup"
@ -60,10 +61,12 @@ type Agent struct {
Agent *codersdk.WorkspaceAgent `json:"agent"`
ListeningPorts *codersdk.WorkspaceAgentListeningPortsResponse `json:"listening_ports"`
Logs []byte `json:"logs"`
MagicsockHTML []byte `json:"magicsock_html"`
ClientMagicsockHTML []byte `json:"client_magicsock_html"`
AgentMagicsockHTML []byte `json:"agent_magicsock_html"`
Manifest *agentsdk.Manifest `json:"manifest"`
PeerDiagnostics *tailnet.PeerDiagnostics `json:"peer_diagnostics"`
PingResult *ipnstate.PingResult `json:"ping_result"`
Prometheus []byte `json:"prometheus"`
StartupLogs []codersdk.WorkspaceAgentLog `json:"startup_logs"`
}
@ -313,22 +316,63 @@ func AgentInfo(ctx context.Context, client *codersdk.Client, log slog.Logger, ag
return nil
})
// to simplify control flow, fetching information directly from
// the agent is handled in a separate function
closer := connectedAgentInfo(ctx, client, log, agentID, &eg, &a)
defer closer()
if err := eg.Wait(); err != nil {
log.Error(ctx, "fetch agent information", slog.Error(err))
}
return a
}
func connectedAgentInfo(ctx context.Context, client *codersdk.Client, log slog.Logger, agentID uuid.UUID, eg *errgroup.Group, a *Agent) (closer func()) {
conn, err := client.DialWorkspaceAgent(ctx, agentID, &codersdk.DialWorkspaceAgentOptions{
Logger: log.Named("dial-agent"),
BlockEndpoints: false,
})
closer = func() {}
if err != nil {
log.Error(ctx, "dial agent", slog.Error(err))
} else {
defer func() {
return closer
}
if !conn.AwaitReachable(ctx) {
log.Error(ctx, "timed out waiting for agent")
return closer
}
closer = func() {
if err := conn.Close(); err != nil {
log.Error(ctx, "failed to close agent connection", slog.Error(err))
}
<-conn.Closed()
}()
if !conn.AwaitReachable(ctx) {
log.Error(ctx, "timed out waiting for agent")
} else {
}
eg.Go(func() error {
req, err := http.NewRequestWithContext(ctx, http.MethodGet, "http://localhost/", nil)
if err != nil {
return xerrors.Errorf("create request: %w", err)
}
rr := httptest.NewRecorder()
conn.MagicsockServeHTTPDebug(rr, req)
a.ClientMagicsockHTML = rr.Body.Bytes()
return nil
})
eg.Go(func() error {
promRes, err := conn.PrometheusMetrics(ctx)
if err != nil {
return xerrors.Errorf("fetch agent prometheus metrics: %w", err)
}
a.Prometheus = promRes
return nil
})
eg.Go(func() error {
_, _, pingRes, err := conn.Ping(ctx)
if err != nil {
@ -349,7 +393,7 @@ func AgentInfo(ctx context.Context, client *codersdk.Client, log slog.Logger, ag
if err != nil {
return xerrors.Errorf("get agent magicsock page: %w", err)
}
a.MagicsockHTML = msBytes
a.AgentMagicsockHTML = msBytes
return nil
})
@ -382,14 +426,8 @@ func AgentInfo(ctx context.Context, client *codersdk.Client, log slog.Logger, ag
a.ListeningPorts = &lps
return nil
})
}
}
if err := eg.Wait(); err != nil {
log.Error(ctx, "fetch agent information", slog.Error(err))
}
return a
return closer
}
// Run generates a support bundle with the given dependencies.

View File

@ -75,9 +75,11 @@ func TestRun(t *testing.T) {
assertNotNilNotEmpty(t, bun.Agent.Agent, "agent should be present")
assertNotNilNotEmpty(t, bun.Agent.ListeningPorts, "agent listening ports should be present")
assertNotNilNotEmpty(t, bun.Agent.Logs, "agent logs should be present")
assertNotNilNotEmpty(t, bun.Agent.MagicsockHTML, "agent magicsock should be present")
assertNotNilNotEmpty(t, bun.Agent.AgentMagicsockHTML, "agent magicsock should be present")
assertNotNilNotEmpty(t, bun.Agent.ClientMagicsockHTML, "client magicsock should be present")
assertNotNilNotEmpty(t, bun.Agent.PeerDiagnostics, "agent peer diagnostics should be present")
assertNotNilNotEmpty(t, bun.Agent.PingResult, "agent ping result should be present")
assertNotNilNotEmpty(t, bun.Agent.Prometheus, "agent prometheus metrics should be present")
assertNotNilNotEmpty(t, bun.Agent.StartupLogs, "agent startup logs should be present")
assertNotNilNotEmpty(t, bun.Logs, "bundle logs should be present")
})