coder/enterprise/wsproxy/wsproxy.go

551 lines
18 KiB
Go

package wsproxy
import (
"context"
"crypto/tls"
"crypto/x509"
"errors"
"fmt"
"net/http"
"net/url"
"reflect"
"regexp"
"strings"
"sync/atomic"
"time"
"github.com/go-chi/chi/v5"
"github.com/google/uuid"
"github.com/hashicorp/go-multierror"
"github.com/prometheus/client_golang/prometheus"
"go.opentelemetry.io/otel/trace"
"golang.org/x/xerrors"
"tailscale.com/derp"
"tailscale.com/derp/derphttp"
"tailscale.com/tailcfg"
"tailscale.com/types/key"
"cdr.dev/slog"
"github.com/coder/coder/v2/buildinfo"
"github.com/coder/coder/v2/cli/cliutil"
"github.com/coder/coder/v2/coderd"
"github.com/coder/coder/v2/coderd/httpapi"
"github.com/coder/coder/v2/coderd/httpmw"
"github.com/coder/coder/v2/coderd/tracing"
"github.com/coder/coder/v2/coderd/workspaceapps"
"github.com/coder/coder/v2/codersdk"
"github.com/coder/coder/v2/enterprise/derpmesh"
"github.com/coder/coder/v2/enterprise/wsproxy/wsproxysdk"
"github.com/coder/coder/v2/site"
"github.com/coder/coder/v2/tailnet"
)
type Options struct {
Logger slog.Logger
Experiments codersdk.Experiments
HTTPClient *http.Client
// DashboardURL is the URL of the primary coderd instance.
DashboardURL *url.URL
// AccessURL is the URL of the WorkspaceProxy.
AccessURL *url.URL
// TODO: @emyrk We use these two fields in many places with this comment.
// Maybe we should make some shared options struct?
// AppHostname should be the wildcard hostname to use for workspace
// applications INCLUDING the asterisk, (optional) suffix and leading dot.
// It will use the same scheme and port number as the access URL.
// E.g. "*.apps.coder.com" or "*-apps.coder.com".
AppHostname string
// AppHostnameRegex contains the regex version of options.AppHostname as
// generated by appurl.CompileHostnamePattern(). It MUST be set if
// options.AppHostname is set.
AppHostnameRegex *regexp.Regexp
RealIPConfig *httpmw.RealIPConfig
Tracing trace.TracerProvider
PrometheusRegistry *prometheus.Registry
TLSCertificates []tls.Certificate
APIRateLimit int
SecureAuthCookie bool
DisablePathApps bool
DERPEnabled bool
DERPServerRelayAddress string
// DERPOnly determines whether this proxy only provides DERP and does not
// provide access to workspace apps/terminal.
DERPOnly bool
ProxySessionToken string
// AllowAllCors will set all CORs headers to '*'.
// By default, CORs is set to accept external requests
// from the dashboardURL. This should only be used in development.
AllowAllCors bool
StatsCollectorOptions workspaceapps.StatsCollectorOptions
}
func (o *Options) Validate() error {
var errs optErrors
errs.Required("Logger", o.Logger)
errs.Required("DashboardURL", o.DashboardURL)
errs.Required("AccessURL", o.AccessURL)
errs.Required("RealIPConfig", o.RealIPConfig)
errs.Required("PrometheusRegistry", o.PrometheusRegistry)
errs.NotEmpty("ProxySessionToken", o.ProxySessionToken)
if len(errs) > 0 {
return errs
}
return nil
}
// Server is an external workspace proxy server. This server can communicate
// directly with a workspace. It requires a primary coderd to establish a said
// connection.
type Server struct {
Options *Options
Handler chi.Router
DashboardURL *url.URL
AppServer *workspaceapps.Server
// Logging/Metrics
Logger slog.Logger
TracerProvider trace.TracerProvider
PrometheusRegistry *prometheus.Registry
// SDKClient is a client to the primary coderd instance authenticated with
// the moon's token.
SDKClient *wsproxysdk.Client
// DERP
derpMesh *derpmesh.Mesh
latestDERPMap atomic.Pointer[tailcfg.DERPMap]
// Used for graceful shutdown. Required for the dialer.
ctx context.Context
cancel context.CancelFunc
derpCloseFunc func()
registerLoop *wsproxysdk.RegisterWorkspaceProxyLoop
}
// New creates a new workspace proxy server. This requires a primary coderd
// instance to be reachable and the correct authorization access token to be
// provided. If the proxy cannot authenticate with the primary, this will fail.
func New(ctx context.Context, opts *Options) (*Server, error) {
if opts.PrometheusRegistry == nil {
opts.PrometheusRegistry = prometheus.NewRegistry()
}
if err := opts.Validate(); err != nil {
return nil, err
}
client := wsproxysdk.New(opts.DashboardURL)
err := client.SetSessionToken(opts.ProxySessionToken)
if err != nil {
return nil, xerrors.Errorf("set client token: %w", err)
}
// Use the configured client if provided.
if opts.HTTPClient != nil {
client.SDKClient.HTTPClient = opts.HTTPClient
}
// TODO: Probably do some version checking here
info, err := client.SDKClient.BuildInfo(ctx)
if err != nil {
return nil, xerrors.Errorf("buildinfo: %w", errors.Join(
xerrors.Errorf("unable to fetch build info from primary coderd. Are you sure %q is a coderd instance?", opts.DashboardURL),
err,
))
}
if info.WorkspaceProxy {
return nil, xerrors.Errorf("%q is a workspace proxy, not a primary coderd instance", opts.DashboardURL)
}
meshRootCA := x509.NewCertPool()
for _, certificate := range opts.TLSCertificates {
for _, certificatePart := range certificate.Certificate {
certificate, err := x509.ParseCertificate(certificatePart)
if err != nil {
return nil, xerrors.Errorf("parse certificate %s: %w", certificate.Subject.CommonName, err)
}
meshRootCA.AddCert(certificate)
}
}
// This TLS configuration spoofs access from the access URL hostname
// assuming that the certificates provided will cover that hostname.
//
// Replica sync and DERP meshing require accessing replicas via their
// internal IP addresses, and if TLS is configured we use the same
// certificates.
meshTLSConfig := &tls.Config{
MinVersion: tls.VersionTLS12,
Certificates: opts.TLSCertificates,
RootCAs: meshRootCA,
ServerName: opts.AccessURL.Hostname(),
}
derpServer := derp.NewServer(key.NewNode(), tailnet.Logger(opts.Logger.Named("net.derp")))
ctx, cancel := context.WithCancel(context.Background())
r := chi.NewRouter()
s := &Server{
Options: opts,
Handler: r,
DashboardURL: opts.DashboardURL,
Logger: opts.Logger.Named("net.workspace-proxy"),
TracerProvider: opts.Tracing,
PrometheusRegistry: opts.PrometheusRegistry,
SDKClient: client,
derpMesh: derpmesh.New(opts.Logger.Named("net.derpmesh"), derpServer, meshTLSConfig),
ctx: ctx,
cancel: cancel,
}
// Register the workspace proxy with the primary coderd instance and start a
// goroutine to periodically re-register.
replicaID := uuid.New()
osHostname := cliutil.Hostname()
registerLoop, regResp, err := client.RegisterWorkspaceProxyLoop(ctx, wsproxysdk.RegisterWorkspaceProxyLoopOpts{
Logger: opts.Logger,
Request: wsproxysdk.RegisterWorkspaceProxyRequest{
AccessURL: opts.AccessURL.String(),
WildcardHostname: opts.AppHostname,
DerpEnabled: opts.DERPEnabled,
DerpOnly: opts.DERPOnly,
ReplicaID: replicaID,
ReplicaHostname: osHostname,
ReplicaError: "",
ReplicaRelayAddress: opts.DERPServerRelayAddress,
Version: buildinfo.Version(),
},
MutateFn: s.mutateRegister,
CallbackFn: s.handleRegister,
FailureFn: s.handleRegisterFailure,
})
if err != nil {
return nil, xerrors.Errorf("register proxy: %w", err)
}
s.registerLoop = registerLoop
derpServer.SetMeshKey(regResp.DERPMeshKey)
err = s.handleRegister(regResp)
if err != nil {
return nil, xerrors.Errorf("handle register: %w", err)
}
secKey, err := workspaceapps.KeyFromString(regResp.AppSecurityKey)
if err != nil {
return nil, xerrors.Errorf("parse app security key: %w", err)
}
agentProvider, err := coderd.NewServerTailnet(ctx,
s.Logger,
nil,
func() *tailcfg.DERPMap {
return s.latestDERPMap.Load()
},
regResp.DERPForceWebSockets,
s.DialCoordinator,
false, // TODO: this will be covered in a subsequent pr.
s.TracerProvider,
)
if err != nil {
return nil, xerrors.Errorf("create server tailnet: %w", err)
}
workspaceAppsLogger := opts.Logger.Named("workspaceapps")
if opts.StatsCollectorOptions.Logger == nil {
named := workspaceAppsLogger.Named("stats_collector")
opts.StatsCollectorOptions.Logger = &named
}
if opts.StatsCollectorOptions.Reporter == nil {
opts.StatsCollectorOptions.Reporter = &appStatsReporter{Client: client}
}
s.AppServer = &workspaceapps.Server{
Logger: workspaceAppsLogger,
DashboardURL: opts.DashboardURL,
AccessURL: opts.AccessURL,
Hostname: opts.AppHostname,
HostnameRegex: opts.AppHostnameRegex,
RealIPConfig: opts.RealIPConfig,
SignedTokenProvider: &TokenProvider{
DashboardURL: opts.DashboardURL,
AccessURL: opts.AccessURL,
AppHostname: opts.AppHostname,
Client: client,
SecurityKey: secKey,
Logger: s.Logger.Named("proxy_token_provider"),
},
AppSecurityKey: secKey,
DisablePathApps: opts.DisablePathApps,
SecureAuthCookie: opts.SecureAuthCookie,
AgentProvider: agentProvider,
StatsCollector: workspaceapps.NewStatsCollector(opts.StatsCollectorOptions),
}
derpHandler := derphttp.Handler(derpServer)
derpHandler, s.derpCloseFunc = tailnet.WithWebsocketSupport(derpServer, derpHandler)
// The primary coderd dashboard needs to make some GET requests to
// the workspace proxies to check latency.
corsMW := httpmw.Cors(opts.AllowAllCors, opts.DashboardURL.String())
prometheusMW := httpmw.Prometheus(s.PrometheusRegistry)
// Routes
apiRateLimiter := httpmw.RateLimit(opts.APIRateLimit, time.Minute)
// Persistent middlewares to all routes
r.Use(
// TODO: @emyrk Should we standardize these in some other package?
httpmw.Recover(s.Logger),
tracing.StatusWriterMiddleware,
tracing.Middleware(s.TracerProvider),
httpmw.AttachRequestID,
httpmw.ExtractRealIP(s.Options.RealIPConfig),
httpmw.Logger(s.Logger),
prometheusMW,
corsMW,
// HandleSubdomain is a middleware that handles all requests to the
// subdomain-based workspace apps.
s.AppServer.HandleSubdomain(apiRateLimiter),
// Build-Version is helpful for debugging.
func(next http.Handler) http.Handler {
return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
w.Header().Add(codersdk.BuildVersionHeader, buildinfo.Version())
next.ServeHTTP(w, r)
})
},
// This header stops a browser from trying to MIME-sniff the content type and
// forces it to stick with the declared content-type. This is the only valid
// value for this header.
// See: https://github.com/coder/security/issues/12
func(next http.Handler) http.Handler {
return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
w.Header().Add("X-Content-Type-Options", "nosniff")
next.ServeHTTP(w, r)
})
},
// CSRF is required here because we need to set the CSRF cookies on
// responses.
httpmw.CSRF(s.Options.SecureAuthCookie),
)
// Attach workspace apps routes.
if !opts.DERPOnly {
r.Group(func(r chi.Router) {
r.Use(apiRateLimiter)
s.AppServer.Attach(r)
})
} else {
r.Group(func(r chi.Router) {
derpOnlyHandler := func(rw http.ResponseWriter, r *http.Request) {
site.RenderStaticErrorPage(rw, r, site.ErrorPageData{
Title: "Head to the Dashboard",
Status: http.StatusBadRequest,
HideStatus: true,
Description: "This workspace proxy is DERP-only and cannot be used for browser connections. " +
"Please use a different region directly from the dashboard. Click to be redirected!",
RetryEnabled: false,
DashboardURL: opts.DashboardURL.String(),
})
}
serveDerpOnlyHandler := func(r chi.Router) {
r.HandleFunc("/*", derpOnlyHandler)
}
r.Route("/%40{user}/{workspace_and_agent}/apps/{workspaceapp}", serveDerpOnlyHandler)
r.Route("/@{user}/{workspace_and_agent}/apps/{workspaceapp}", serveDerpOnlyHandler)
r.Get("/api/v2/workspaceagents/{workspaceagent}/pty", derpOnlyHandler)
})
}
if opts.DERPEnabled {
r.Route("/derp", func(r chi.Router) {
r.Get("/", derpHandler.ServeHTTP)
// This is used when UDP is blocked, and latency must be checked via HTTP(s).
r.Get("/latency-check", func(w http.ResponseWriter, r *http.Request) {
w.WriteHeader(http.StatusOK)
})
})
} else {
r.Route("/derp", func(r chi.Router) {
r.HandleFunc("/*", func(rw http.ResponseWriter, r *http.Request) {
httpapi.Write(ctx, rw, http.StatusBadRequest, codersdk.Response{
Message: "DERP is disabled on this proxy.",
})
})
})
}
r.Get("/api/v2/buildinfo", s.buildInfo)
r.Get("/healthz", func(w http.ResponseWriter, r *http.Request) { _, _ = w.Write([]byte("OK")) })
// TODO: @emyrk should this be authenticated or debounced?
r.Get("/healthz-report", s.healthReport)
r.NotFound(func(rw http.ResponseWriter, r *http.Request) {
site.RenderStaticErrorPage(rw, r, site.ErrorPageData{
Title: "Head to the Dashboard",
Status: http.StatusBadRequest,
HideStatus: true,
Description: "Workspace Proxies route traffic in terminals and apps directly to your workspace. " +
"This page must be loaded from the dashboard. Click to be redirected!",
RetryEnabled: false,
DashboardURL: opts.DashboardURL.String(),
})
})
// See coderd/coderd.go for why we need this.
rootRouter := chi.NewRouter()
// Make sure to add the cors middleware to the latency check route.
rootRouter.Get("/latency-check", tracing.StatusWriterMiddleware(prometheusMW(coderd.LatencyCheck())).ServeHTTP)
rootRouter.Mount("/", r)
s.Handler = rootRouter
return s, nil
}
func (s *Server) RegisterNow() error {
_, err := s.registerLoop.RegisterNow()
return err
}
func (s *Server) Close() error {
s.cancel()
var err error
s.registerLoop.Close()
s.derpCloseFunc()
appServerErr := s.AppServer.Close()
if appServerErr != nil {
err = multierror.Append(err, appServerErr)
}
agentProviderErr := s.AppServer.AgentProvider.Close()
if agentProviderErr != nil {
err = multierror.Append(err, agentProviderErr)
}
s.SDKClient.SDKClient.HTTPClient.CloseIdleConnections()
return err
}
func (*Server) mutateRegister(_ *wsproxysdk.RegisterWorkspaceProxyRequest) {
// TODO: we should probably ping replicas similarly to the replicasync
// package in the primary and update req.ReplicaError accordingly.
}
func (s *Server) handleRegister(res wsproxysdk.RegisterWorkspaceProxyResponse) error {
addresses := make([]string, len(res.SiblingReplicas))
for i, replica := range res.SiblingReplicas {
addresses[i] = replica.RelayAddress
}
s.Logger.Debug(s.ctx, "setting DERP mesh sibling addresses", slog.F("addresses", addresses))
s.derpMesh.SetAddresses(addresses, false)
s.latestDERPMap.Store(res.DERPMap)
return nil
}
func (s *Server) handleRegisterFailure(err error) {
if s.ctx.Err() != nil {
return
}
s.Logger.Fatal(s.ctx,
"failed to periodically re-register workspace proxy with primary Coder deployment",
slog.Error(err),
)
}
func (s *Server) DialCoordinator(ctx context.Context) (tailnet.MultiAgentConn, error) {
return s.SDKClient.DialCoordinator(ctx)
}
func (s *Server) buildInfo(rw http.ResponseWriter, r *http.Request) {
httpapi.Write(r.Context(), rw, http.StatusOK, codersdk.BuildInfoResponse{
ExternalURL: buildinfo.ExternalURL(),
Version: buildinfo.Version(),
AgentAPIVersion: coderd.AgentAPIVersionREST,
DashboardURL: s.DashboardURL.String(),
WorkspaceProxy: true,
})
}
// healthReport is a more thorough health check than the '/healthz' endpoint.
// This endpoint not only responds if the server is running, but can do some
// internal diagnostics to ensure that the server is running correctly. The
// primary coderd will use this to determine if this workspace proxy can be used
// by the users. This endpoint will take longer to respond than the '/healthz'.
// Checks:
// - Can communicate with primary coderd
//
// TODO: Config checks to ensure consistent with primary
func (s *Server) healthReport(rw http.ResponseWriter, r *http.Request) {
ctx := r.Context()
var report codersdk.ProxyHealthReport
// This is to catch edge cases where the server is shutting down, but might
// still serve a web request that returns "healthy". This is mainly just for
// unit tests, as shutting down the test webserver is tied to the lifecycle
// of the test. In practice, the webserver is tied to the lifecycle of the
// app, so the webserver AND the proxy will be shut down at the same time.
if s.ctx.Err() != nil {
httpapi.Write(r.Context(), rw, http.StatusInternalServerError, "workspace proxy in middle of shutting down")
return
}
// Hit the build info to do basic version checking.
primaryBuild, err := s.SDKClient.SDKClient.BuildInfo(ctx)
if err != nil {
report.Errors = append(report.Errors, fmt.Sprintf("failed to get build info: %s", err.Error()))
httpapi.Write(r.Context(), rw, http.StatusOK, report)
return
}
if primaryBuild.WorkspaceProxy {
// This could be a simple mistake of using a proxy url as the dashboard url.
report.Errors = append(report.Errors,
fmt.Sprintf("dashboard url (%s) is a workspace proxy, must be a primary coderd", s.DashboardURL.String()))
}
// If we are in dev mode, never check versions.
if !buildinfo.IsDev() && !buildinfo.VersionsMatch(primaryBuild.Version, buildinfo.Version()) {
// Version mismatches are not fatal, but should be reported.
report.Warnings = append(report.Warnings,
fmt.Sprintf("version mismatch: primary coderd (%s) != workspace proxy (%s)", primaryBuild.Version, buildinfo.Version()))
}
// TODO: We should hit the deployment config endpoint and do some config
// checks. We can check the version from the X-CODER-BUILD-VERSION header
httpapi.Write(r.Context(), rw, http.StatusOK, report)
}
type optErrors []error
func (e optErrors) Error() string {
var b strings.Builder
for _, err := range e {
_, _ = b.WriteString(err.Error())
_, _ = b.WriteString("\n")
}
return b.String()
}
func (e *optErrors) Required(name string, v any) {
if v == nil {
*e = append(*e, xerrors.Errorf("%s is required, got <nil>", name))
}
}
func (e *optErrors) NotEmpty(name string, v any) {
if reflect.ValueOf(v).IsZero() {
*e = append(*e, xerrors.Errorf("%s is required, got the zero value", name))
}
}