mirror of https://github.com/coder/coder.git
feat(coderd/healthcheck): improve detection of STUN issues (#12951)
Adds checks to coderd/healthcheck/derphealth for STUN issues: - Alerts if there is not least one healthy STUN server, - Alerts if we see variable port mapping.
This commit is contained in:
parent
c13909a1a2
commit
9a4703a311
|
@ -101,7 +101,7 @@ func (r *RootCmd) supportBundle() *serpent.Command {
|
|||
|
||||
// Check if we're running inside a workspace
|
||||
if val, found := os.LookupEnv("CODER"); found && val == "true" {
|
||||
_, _ = fmt.Fprintln(inv.Stderr, "Running inside Coder workspace; this can affect results!")
|
||||
cliui.Warn(inv.Stderr, "Running inside Coder workspace; this can affect results!")
|
||||
cliLog.Debug(inv.Context(), "running inside coder workspace")
|
||||
}
|
||||
|
||||
|
@ -122,7 +122,7 @@ func (r *RootCmd) supportBundle() *serpent.Command {
|
|||
|
||||
if len(inv.Args) == 0 {
|
||||
cliLog.Warn(inv.Context(), "no workspace specified")
|
||||
_, _ = fmt.Fprintln(inv.Stderr, "Warning: no workspace specified. This will result in incomplete information.")
|
||||
cliui.Warn(inv.Stderr, "No workspace specified. This will result in incomplete information.")
|
||||
} else {
|
||||
ws, err := namedWorkspace(inv.Context(), client, inv.Args[0])
|
||||
if err != nil {
|
||||
|
@ -191,6 +191,7 @@ func (r *RootCmd) supportBundle() *serpent.Command {
|
|||
return xerrors.Errorf("write support bundle to %s: %w", outputPath, err)
|
||||
}
|
||||
_, _ = fmt.Fprintln(inv.Stderr, "Wrote support bundle to "+outputPath)
|
||||
|
||||
return nil
|
||||
},
|
||||
}
|
||||
|
|
|
@ -32,6 +32,8 @@ const (
|
|||
warningNodeUsesWebsocket = `Node uses WebSockets because the "Upgrade: DERP" header may be blocked on the load balancer.`
|
||||
oneNodeUnhealthy = "Region is operational, but performance might be degraded as one node is unhealthy."
|
||||
missingNodeReport = "Missing node health report, probably a developer error."
|
||||
noSTUN = "No STUN servers are available."
|
||||
stunMapVaryDest = "STUN returned different addresses; you may be behind a hard NAT."
|
||||
)
|
||||
|
||||
type ReportOptions struct {
|
||||
|
@ -107,9 +109,30 @@ func (r *Report) Run(ctx context.Context, opts *ReportOptions) {
|
|||
ncReport, netcheckErr := nc.GetReport(ctx, opts.DERPMap)
|
||||
r.Netcheck = ncReport
|
||||
r.NetcheckErr = convertError(netcheckErr)
|
||||
if mapVaryDest, _ := r.Netcheck.MappingVariesByDestIP.Get(); mapVaryDest {
|
||||
r.Warnings = append(r.Warnings, health.Messagef(health.CodeSTUNMapVaryDest, stunMapVaryDest))
|
||||
}
|
||||
|
||||
wg.Wait()
|
||||
|
||||
// Count the number of STUN-capable nodes.
|
||||
var stunCapableNodes int
|
||||
var stunTotalNodes int
|
||||
for _, region := range r.Regions {
|
||||
for _, node := range region.NodeReports {
|
||||
if node.STUN.Enabled {
|
||||
stunTotalNodes++
|
||||
}
|
||||
if node.STUN.CanSTUN {
|
||||
stunCapableNodes++
|
||||
}
|
||||
}
|
||||
}
|
||||
if stunCapableNodes == 0 && stunTotalNodes > 0 {
|
||||
r.Severity = health.SeverityWarning
|
||||
r.Warnings = append(r.Warnings, health.Messagef(health.CodeSTUNNoNodes, noSTUN))
|
||||
}
|
||||
|
||||
// Review region reports and select the highest severity.
|
||||
for _, regionReport := range r.Regions {
|
||||
if regionReport.Severity.Value() > r.Severity.Value() {
|
||||
|
|
|
@ -129,9 +129,67 @@ func TestDERP(t *testing.T) {
|
|||
assert.True(t, report.Healthy)
|
||||
assert.Equal(t, health.SeverityWarning, report.Severity)
|
||||
assert.True(t, report.Dismissed)
|
||||
if assert.NotEmpty(t, report.Warnings) {
|
||||
if assert.Len(t, report.Warnings, 1) {
|
||||
assert.Contains(t, report.Warnings[0].Code, health.CodeDERPOneNodeUnhealthy)
|
||||
}
|
||||
for _, region := range report.Regions {
|
||||
assert.True(t, region.Healthy)
|
||||
assert.True(t, region.NodeReports[0].Healthy)
|
||||
assert.Empty(t, region.NodeReports[0].Warnings)
|
||||
assert.Equal(t, health.SeverityOK, region.NodeReports[0].Severity)
|
||||
assert.False(t, region.NodeReports[1].Healthy)
|
||||
assert.Equal(t, health.SeverityError, region.NodeReports[1].Severity)
|
||||
assert.Len(t, region.Warnings, 1)
|
||||
}
|
||||
})
|
||||
|
||||
t.Run("HealthyWithNoSTUN", func(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
healthyDerpSrv := derp.NewServer(key.NewNode(), func(format string, args ...any) { t.Logf(format, args...) })
|
||||
defer healthyDerpSrv.Close()
|
||||
healthySrv := httptest.NewServer(derphttp.Handler(healthyDerpSrv))
|
||||
defer healthySrv.Close()
|
||||
|
||||
var (
|
||||
ctx = context.Background()
|
||||
report = derphealth.Report{}
|
||||
derpURL, _ = url.Parse(healthySrv.URL)
|
||||
opts = &derphealth.ReportOptions{
|
||||
DERPMap: &tailcfg.DERPMap{Regions: map[int]*tailcfg.DERPRegion{
|
||||
1: {
|
||||
EmbeddedRelay: true,
|
||||
RegionID: 999,
|
||||
Nodes: []*tailcfg.DERPNode{{
|
||||
Name: "1a",
|
||||
RegionID: 999,
|
||||
HostName: derpURL.Host,
|
||||
IPv4: derpURL.Host,
|
||||
STUNPort: -1,
|
||||
InsecureForTests: true,
|
||||
ForceHTTP: true,
|
||||
}, {
|
||||
Name: "badstun",
|
||||
RegionID: 999,
|
||||
HostName: derpURL.Host,
|
||||
STUNPort: 19302,
|
||||
STUNOnly: true,
|
||||
InsecureForTests: true,
|
||||
ForceHTTP: true,
|
||||
}},
|
||||
},
|
||||
}},
|
||||
}
|
||||
)
|
||||
|
||||
report.Run(ctx, opts)
|
||||
|
||||
assert.True(t, report.Healthy)
|
||||
assert.Equal(t, health.SeverityWarning, report.Severity)
|
||||
if assert.Len(t, report.Warnings, 2) {
|
||||
assert.EqualValues(t, report.Warnings[1].Code, health.CodeSTUNNoNodes)
|
||||
assert.EqualValues(t, report.Warnings[0].Code, health.CodeDERPOneNodeUnhealthy)
|
||||
}
|
||||
for _, region := range report.Regions {
|
||||
assert.True(t, region.Healthy)
|
||||
assert.True(t, region.NodeReports[0].Healthy)
|
||||
|
@ -291,8 +349,10 @@ func TestDERP(t *testing.T) {
|
|||
report.Run(ctx, opts)
|
||||
|
||||
assert.True(t, report.Healthy)
|
||||
assert.Equal(t, health.SeverityOK, report.Severity)
|
||||
for _, region := range report.Regions {
|
||||
assert.True(t, region.Healthy)
|
||||
assert.Equal(t, health.SeverityOK, region.Severity)
|
||||
for _, node := range region.NodeReports {
|
||||
assert.True(t, node.Healthy)
|
||||
assert.False(t, node.CanExchangeMessages)
|
||||
|
@ -304,6 +364,107 @@ func TestDERP(t *testing.T) {
|
|||
}
|
||||
}
|
||||
})
|
||||
|
||||
t.Run("STUNOnly/OneBadOneGood", func(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
var (
|
||||
ctx = context.Background()
|
||||
report = derphealth.Report{}
|
||||
opts = &derphealth.ReportOptions{
|
||||
DERPMap: &tailcfg.DERPMap{
|
||||
Regions: map[int]*tailcfg.DERPRegion{
|
||||
1: {
|
||||
EmbeddedRelay: true,
|
||||
RegionID: 999,
|
||||
Nodes: []*tailcfg.DERPNode{{
|
||||
Name: "badstun",
|
||||
RegionID: 999,
|
||||
HostName: "badstun.example.com",
|
||||
STUNPort: 19302,
|
||||
STUNOnly: true,
|
||||
InsecureForTests: true,
|
||||
ForceHTTP: true,
|
||||
}, {
|
||||
Name: "goodstun",
|
||||
RegionID: 999,
|
||||
HostName: "stun.l.google.com",
|
||||
STUNPort: 19302,
|
||||
STUNOnly: true,
|
||||
InsecureForTests: true,
|
||||
ForceHTTP: true,
|
||||
}},
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
)
|
||||
|
||||
report.Run(ctx, opts)
|
||||
assert.True(t, report.Healthy)
|
||||
assert.Equal(t, health.SeverityWarning, report.Severity)
|
||||
if assert.Len(t, report.Warnings, 1) {
|
||||
assert.Equal(t, health.CodeDERPOneNodeUnhealthy, report.Warnings[0].Code)
|
||||
}
|
||||
for _, region := range report.Regions {
|
||||
assert.True(t, region.Healthy)
|
||||
assert.Equal(t, health.SeverityWarning, region.Severity)
|
||||
// badstun
|
||||
assert.False(t, region.NodeReports[0].Healthy)
|
||||
assert.True(t, region.NodeReports[0].STUN.Enabled)
|
||||
assert.False(t, region.NodeReports[0].STUN.CanSTUN)
|
||||
assert.NotNil(t, region.NodeReports[0].STUN.Error)
|
||||
// goodstun
|
||||
assert.True(t, region.NodeReports[1].Healthy)
|
||||
assert.True(t, region.NodeReports[1].STUN.Enabled)
|
||||
assert.True(t, region.NodeReports[1].STUN.CanSTUN)
|
||||
assert.Nil(t, region.NodeReports[1].STUN.Error)
|
||||
}
|
||||
})
|
||||
|
||||
t.Run("STUNOnly/NoStun", func(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
var (
|
||||
ctx = context.Background()
|
||||
report = derphealth.Report{}
|
||||
opts = &derphealth.ReportOptions{
|
||||
DERPMap: &tailcfg.DERPMap{
|
||||
Regions: map[int]*tailcfg.DERPRegion{
|
||||
1: {
|
||||
EmbeddedRelay: true,
|
||||
RegionID: 999,
|
||||
Nodes: []*tailcfg.DERPNode{{
|
||||
Name: "badstun",
|
||||
RegionID: 999,
|
||||
HostName: "badstun.example.com",
|
||||
STUNPort: 19302,
|
||||
STUNOnly: true,
|
||||
InsecureForTests: true,
|
||||
ForceHTTP: true,
|
||||
}},
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
)
|
||||
|
||||
report.Run(ctx, opts)
|
||||
assert.False(t, report.Healthy)
|
||||
assert.Equal(t, health.SeverityError, report.Severity)
|
||||
for _, region := range report.Regions {
|
||||
assert.False(t, region.Healthy)
|
||||
assert.Equal(t, health.SeverityError, region.Severity)
|
||||
for _, node := range region.NodeReports {
|
||||
assert.False(t, node.Healthy)
|
||||
assert.False(t, node.CanExchangeMessages)
|
||||
assert.Empty(t, node.ClientLogs)
|
||||
assert.True(t, node.STUN.Enabled)
|
||||
assert.False(t, node.STUN.CanSTUN)
|
||||
assert.NotNil(t, node.STUN.Error)
|
||||
}
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
func tsDERPMap(ctx context.Context, t testing.TB) *tailcfg.DERPMap {
|
||||
|
|
|
@ -36,6 +36,8 @@ const (
|
|||
|
||||
CodeDERPNodeUsesWebsocket Code = `EDERP01`
|
||||
CodeDERPOneNodeUnhealthy Code = `EDERP02`
|
||||
CodeSTUNNoNodes = `ESTUN01`
|
||||
CodeSTUNMapVaryDest = `ESTUN02`
|
||||
|
||||
CodeProvisionerDaemonsNoProvisionerDaemons Code = `EPD01`
|
||||
CodeProvisionerDaemonVersionMismatch Code = `EPD02`
|
||||
|
|
|
@ -170,6 +170,31 @@ curl -v "https://coder.company.com/derp"
|
|||
# DERP requires connection upgrade
|
||||
```
|
||||
|
||||
### ESTUN01
|
||||
|
||||
_No STUN servers available._
|
||||
|
||||
**Problem:** This is shown if no STUN servers are available. Coder will use STUN
|
||||
to establish [direct connections](../networking/stun.md). Without at least one
|
||||
working STUN server, direct connections may not be possible.
|
||||
|
||||
**Solution:** Ensure that the
|
||||
[configured STUN severs](../cli/server.md#derp-server-stun-addresses) are
|
||||
reachable from Coder and that UDP traffic can be sent/received on the configured
|
||||
port.
|
||||
|
||||
### ESTUN02
|
||||
|
||||
_STUN returned different addresses; you may be behind a hard NAT._
|
||||
|
||||
**Problem:** This is a warning shown when multiple attempts to determine our
|
||||
public IP address/port via STUN resulted in different `ip:port` combinations.
|
||||
This is a sign that you are behind a "hard NAT", and may result in difficulty
|
||||
establishing direct connections. However, it does not mean that direct
|
||||
connections are impossible.
|
||||
|
||||
**Solution:** Engage with your network administrator.
|
||||
|
||||
## Websocket
|
||||
|
||||
Coder makes heavy use of [WebSockets](https://datatracker.ietf.org/doc/rfc6455/)
|
||||
|
|
Loading…
Reference in New Issue