feat: ensure coder remains healthy with single degraded DERP server (#10813)

This commit is contained in:
Marcin Tojek 2023-11-21 12:58:25 +01:00 committed by GitHub
parent abafc0863c
commit 048dc0450f
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 61 additions and 3 deletions

View File

@ -26,6 +26,7 @@ import (
const (
warningNodeUsesWebsocket = `Node uses WebSockets because the "Upgrade: DERP" header may be blocked on the load balancer.`
oneNodeUnhealthy = "Region is operational, but performance might be degraded as one node is unhealthy."
)
// @typescript-generate Report
@ -146,6 +147,7 @@ func (r *RegionReport) Run(ctx context.Context) {
r.NodeReports = []*NodeReport{}
wg := &sync.WaitGroup{}
var healthyNodes int // atomic.Int64 is not mandatory as we depend on RegionReport mutex.
wg.Add(len(r.Region.Nodes))
for _, node := range r.Region.Nodes {
@ -169,8 +171,8 @@ func (r *RegionReport) Run(ctx context.Context) {
r.mu.Lock()
r.NodeReports = append(r.NodeReports, &nodeReport)
if !nodeReport.Healthy {
r.Healthy = false
if nodeReport.Healthy {
healthyNodes++
}
for _, w := range nodeReport.Warnings {
@ -179,8 +181,14 @@ func (r *RegionReport) Run(ctx context.Context) {
r.mu.Unlock()
}()
}
wg.Wait()
// Coder allows for 1 unhealthy node in the region, unless there is only 1 node.
if len(r.Region.Nodes) == 1 {
r.Healthy = healthyNodes == len(r.Region.Nodes)
} else if healthyNodes < len(r.Region.Nodes) {
r.Warnings = append(r.Warnings, oneNodeUnhealthy)
}
}
func (r *NodeReport) derpURL() *url.URL {

View File

@ -81,6 +81,56 @@ func TestDERP(t *testing.T) {
}
})
t.Run("HealthyWithNodeDegraded", func(t *testing.T) {
t.Parallel()
healthyDerpSrv := derp.NewServer(key.NewNode(), func(format string, args ...any) { t.Logf(format, args...) })
defer healthyDerpSrv.Close()
healthySrv := httptest.NewServer(derphttp.Handler(healthyDerpSrv))
defer healthySrv.Close()
var (
ctx = context.Background()
report = derphealth.Report{}
derpURL, _ = url.Parse(healthySrv.URL)
opts = &derphealth.ReportOptions{
DERPMap: &tailcfg.DERPMap{Regions: map[int]*tailcfg.DERPRegion{
1: {
EmbeddedRelay: true,
RegionID: 999,
Nodes: []*tailcfg.DERPNode{{
Name: "1a",
RegionID: 999,
HostName: derpURL.Host,
IPv4: derpURL.Host,
STUNPort: -1,
InsecureForTests: true,
ForceHTTP: true,
}, {
Name: "1b",
RegionID: 999,
HostName: "derp.is.dead.tld",
IPv4: "derp.is.dead.tld",
STUNPort: -1,
InsecureForTests: true,
ForceHTTP: true,
}},
},
}},
}
)
report.Run(ctx, opts)
assert.True(t, report.Healthy)
for _, region := range report.Regions {
assert.True(t, region.Healthy)
assert.True(t, region.NodeReports[0].Healthy)
assert.False(t, region.NodeReports[1].Healthy)
assert.Len(t, region.Warnings, 1)
}
})
t.Run("Tailscale/Dallas/OK", func(t *testing.T) {
t.Parallel()