feat(coderd/healthcheck): improve detection of STUN issues (#12951)

Adds checks to coderd/healthcheck/derphealth for STUN issues:
- Alerts if there is not least one healthy STUN server,
- Alerts if we see variable port mapping.
This commit is contained in:
Cian Johnston 2024-04-15 17:10:49 +01:00 committed by GitHub
parent c13909a1a2
commit 9a4703a311
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
5 changed files with 215 additions and 3 deletions

View File

@ -101,7 +101,7 @@ func (r *RootCmd) supportBundle() *serpent.Command {
// Check if we're running inside a workspace
if val, found := os.LookupEnv("CODER"); found && val == "true" {
_, _ = fmt.Fprintln(inv.Stderr, "Running inside Coder workspace; this can affect results!")
cliui.Warn(inv.Stderr, "Running inside Coder workspace; this can affect results!")
cliLog.Debug(inv.Context(), "running inside coder workspace")
}
@ -122,7 +122,7 @@ func (r *RootCmd) supportBundle() *serpent.Command {
if len(inv.Args) == 0 {
cliLog.Warn(inv.Context(), "no workspace specified")
_, _ = fmt.Fprintln(inv.Stderr, "Warning: no workspace specified. This will result in incomplete information.")
cliui.Warn(inv.Stderr, "No workspace specified. This will result in incomplete information.")
} else {
ws, err := namedWorkspace(inv.Context(), client, inv.Args[0])
if err != nil {
@ -191,6 +191,7 @@ func (r *RootCmd) supportBundle() *serpent.Command {
return xerrors.Errorf("write support bundle to %s: %w", outputPath, err)
}
_, _ = fmt.Fprintln(inv.Stderr, "Wrote support bundle to "+outputPath)
return nil
},
}

View File

@ -32,6 +32,8 @@ const (
warningNodeUsesWebsocket = `Node uses WebSockets because the "Upgrade: DERP" header may be blocked on the load balancer.`
oneNodeUnhealthy = "Region is operational, but performance might be degraded as one node is unhealthy."
missingNodeReport = "Missing node health report, probably a developer error."
noSTUN = "No STUN servers are available."
stunMapVaryDest = "STUN returned different addresses; you may be behind a hard NAT."
)
type ReportOptions struct {
@ -107,9 +109,30 @@ func (r *Report) Run(ctx context.Context, opts *ReportOptions) {
ncReport, netcheckErr := nc.GetReport(ctx, opts.DERPMap)
r.Netcheck = ncReport
r.NetcheckErr = convertError(netcheckErr)
if mapVaryDest, _ := r.Netcheck.MappingVariesByDestIP.Get(); mapVaryDest {
r.Warnings = append(r.Warnings, health.Messagef(health.CodeSTUNMapVaryDest, stunMapVaryDest))
}
wg.Wait()
// Count the number of STUN-capable nodes.
var stunCapableNodes int
var stunTotalNodes int
for _, region := range r.Regions {
for _, node := range region.NodeReports {
if node.STUN.Enabled {
stunTotalNodes++
}
if node.STUN.CanSTUN {
stunCapableNodes++
}
}
}
if stunCapableNodes == 0 && stunTotalNodes > 0 {
r.Severity = health.SeverityWarning
r.Warnings = append(r.Warnings, health.Messagef(health.CodeSTUNNoNodes, noSTUN))
}
// Review region reports and select the highest severity.
for _, regionReport := range r.Regions {
if regionReport.Severity.Value() > r.Severity.Value() {

View File

@ -129,9 +129,67 @@ func TestDERP(t *testing.T) {
assert.True(t, report.Healthy)
assert.Equal(t, health.SeverityWarning, report.Severity)
assert.True(t, report.Dismissed)
if assert.NotEmpty(t, report.Warnings) {
if assert.Len(t, report.Warnings, 1) {
assert.Contains(t, report.Warnings[0].Code, health.CodeDERPOneNodeUnhealthy)
}
for _, region := range report.Regions {
assert.True(t, region.Healthy)
assert.True(t, region.NodeReports[0].Healthy)
assert.Empty(t, region.NodeReports[0].Warnings)
assert.Equal(t, health.SeverityOK, region.NodeReports[0].Severity)
assert.False(t, region.NodeReports[1].Healthy)
assert.Equal(t, health.SeverityError, region.NodeReports[1].Severity)
assert.Len(t, region.Warnings, 1)
}
})
t.Run("HealthyWithNoSTUN", func(t *testing.T) {
t.Parallel()
healthyDerpSrv := derp.NewServer(key.NewNode(), func(format string, args ...any) { t.Logf(format, args...) })
defer healthyDerpSrv.Close()
healthySrv := httptest.NewServer(derphttp.Handler(healthyDerpSrv))
defer healthySrv.Close()
var (
ctx = context.Background()
report = derphealth.Report{}
derpURL, _ = url.Parse(healthySrv.URL)
opts = &derphealth.ReportOptions{
DERPMap: &tailcfg.DERPMap{Regions: map[int]*tailcfg.DERPRegion{
1: {
EmbeddedRelay: true,
RegionID: 999,
Nodes: []*tailcfg.DERPNode{{
Name: "1a",
RegionID: 999,
HostName: derpURL.Host,
IPv4: derpURL.Host,
STUNPort: -1,
InsecureForTests: true,
ForceHTTP: true,
}, {
Name: "badstun",
RegionID: 999,
HostName: derpURL.Host,
STUNPort: 19302,
STUNOnly: true,
InsecureForTests: true,
ForceHTTP: true,
}},
},
}},
}
)
report.Run(ctx, opts)
assert.True(t, report.Healthy)
assert.Equal(t, health.SeverityWarning, report.Severity)
if assert.Len(t, report.Warnings, 2) {
assert.EqualValues(t, report.Warnings[1].Code, health.CodeSTUNNoNodes)
assert.EqualValues(t, report.Warnings[0].Code, health.CodeDERPOneNodeUnhealthy)
}
for _, region := range report.Regions {
assert.True(t, region.Healthy)
assert.True(t, region.NodeReports[0].Healthy)
@ -291,8 +349,10 @@ func TestDERP(t *testing.T) {
report.Run(ctx, opts)
assert.True(t, report.Healthy)
assert.Equal(t, health.SeverityOK, report.Severity)
for _, region := range report.Regions {
assert.True(t, region.Healthy)
assert.Equal(t, health.SeverityOK, region.Severity)
for _, node := range region.NodeReports {
assert.True(t, node.Healthy)
assert.False(t, node.CanExchangeMessages)
@ -304,6 +364,107 @@ func TestDERP(t *testing.T) {
}
}
})
t.Run("STUNOnly/OneBadOneGood", func(t *testing.T) {
t.Parallel()
var (
ctx = context.Background()
report = derphealth.Report{}
opts = &derphealth.ReportOptions{
DERPMap: &tailcfg.DERPMap{
Regions: map[int]*tailcfg.DERPRegion{
1: {
EmbeddedRelay: true,
RegionID: 999,
Nodes: []*tailcfg.DERPNode{{
Name: "badstun",
RegionID: 999,
HostName: "badstun.example.com",
STUNPort: 19302,
STUNOnly: true,
InsecureForTests: true,
ForceHTTP: true,
}, {
Name: "goodstun",
RegionID: 999,
HostName: "stun.l.google.com",
STUNPort: 19302,
STUNOnly: true,
InsecureForTests: true,
ForceHTTP: true,
}},
},
},
},
}
)
report.Run(ctx, opts)
assert.True(t, report.Healthy)
assert.Equal(t, health.SeverityWarning, report.Severity)
if assert.Len(t, report.Warnings, 1) {
assert.Equal(t, health.CodeDERPOneNodeUnhealthy, report.Warnings[0].Code)
}
for _, region := range report.Regions {
assert.True(t, region.Healthy)
assert.Equal(t, health.SeverityWarning, region.Severity)
// badstun
assert.False(t, region.NodeReports[0].Healthy)
assert.True(t, region.NodeReports[0].STUN.Enabled)
assert.False(t, region.NodeReports[0].STUN.CanSTUN)
assert.NotNil(t, region.NodeReports[0].STUN.Error)
// goodstun
assert.True(t, region.NodeReports[1].Healthy)
assert.True(t, region.NodeReports[1].STUN.Enabled)
assert.True(t, region.NodeReports[1].STUN.CanSTUN)
assert.Nil(t, region.NodeReports[1].STUN.Error)
}
})
t.Run("STUNOnly/NoStun", func(t *testing.T) {
t.Parallel()
var (
ctx = context.Background()
report = derphealth.Report{}
opts = &derphealth.ReportOptions{
DERPMap: &tailcfg.DERPMap{
Regions: map[int]*tailcfg.DERPRegion{
1: {
EmbeddedRelay: true,
RegionID: 999,
Nodes: []*tailcfg.DERPNode{{
Name: "badstun",
RegionID: 999,
HostName: "badstun.example.com",
STUNPort: 19302,
STUNOnly: true,
InsecureForTests: true,
ForceHTTP: true,
}},
},
},
},
}
)
report.Run(ctx, opts)
assert.False(t, report.Healthy)
assert.Equal(t, health.SeverityError, report.Severity)
for _, region := range report.Regions {
assert.False(t, region.Healthy)
assert.Equal(t, health.SeverityError, region.Severity)
for _, node := range region.NodeReports {
assert.False(t, node.Healthy)
assert.False(t, node.CanExchangeMessages)
assert.Empty(t, node.ClientLogs)
assert.True(t, node.STUN.Enabled)
assert.False(t, node.STUN.CanSTUN)
assert.NotNil(t, node.STUN.Error)
}
}
})
}
func tsDERPMap(ctx context.Context, t testing.TB) *tailcfg.DERPMap {

View File

@ -36,6 +36,8 @@ const (
CodeDERPNodeUsesWebsocket Code = `EDERP01`
CodeDERPOneNodeUnhealthy Code = `EDERP02`
CodeSTUNNoNodes = `ESTUN01`
CodeSTUNMapVaryDest = `ESTUN02`
CodeProvisionerDaemonsNoProvisionerDaemons Code = `EPD01`
CodeProvisionerDaemonVersionMismatch Code = `EPD02`

View File

@ -170,6 +170,31 @@ curl -v "https://coder.company.com/derp"
# DERP requires connection upgrade
```
### ESTUN01
_No STUN servers available._
**Problem:** This is shown if no STUN servers are available. Coder will use STUN
to establish [direct connections](../networking/stun.md). Without at least one
working STUN server, direct connections may not be possible.
**Solution:** Ensure that the
[configured STUN severs](../cli/server.md#derp-server-stun-addresses) are
reachable from Coder and that UDP traffic can be sent/received on the configured
port.
### ESTUN02
_STUN returned different addresses; you may be behind a hard NAT._
**Problem:** This is a warning shown when multiple attempts to determine our
public IP address/port via STUN resulted in different `ip:port` combinations.
This is a sign that you are behind a "hard NAT", and may result in difficulty
establishing direct connections. However, it does not mean that direct
connections are impossible.
**Solution:** Engage with your network administrator.
## Websocket
Coder makes heavy use of [WebSockets](https://datatracker.ietf.org/doc/rfc6455/)