feat(healthcheck): add failing sections to report (#7789)

This commit is contained in:
Colin Adler 2023-06-01 19:21:24 -05:00 committed by GitHub
parent 9b8e5c2d8a
commit 2b63492649
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
11 changed files with 217 additions and 38 deletions

6
coderd/apidoc/docs.go generated
View File

@ -10440,6 +10440,12 @@ const docTemplate = `{
"derp": {
"$ref": "#/definitions/healthcheck.DERPReport"
},
"failing_sections": {
"type": "array",
"items": {
"type": "string"
}
},
"healthy": {
"description": "Healthy is true if the report returns no errors.",
"type": "boolean"

View File

@ -9425,6 +9425,12 @@
"derp": {
"$ref": "#/definitions/healthcheck.DERPReport"
},
"failing_sections": {
"type": "array",
"items": {
"type": "string"
}
},
"healthy": {
"description": "Healthy is true if the report returns no errors.",
"type": "boolean"

View File

@ -130,7 +130,7 @@ type Options struct {
// AppSecurityKey is the crypto key used to sign and encrypt tokens related to
// workspace applications. It consists of both a signing and encryption key.
AppSecurityKey workspaceapps.SecurityKey
HealthcheckFunc func(ctx context.Context, apiKey string) (*healthcheck.Report, error)
HealthcheckFunc func(ctx context.Context, apiKey string) *healthcheck.Report
HealthcheckTimeout time.Duration
HealthcheckRefresh time.Duration
@ -266,7 +266,7 @@ func New(options *Options) *API {
options.TemplateScheduleStore.Store(&v)
}
if options.HealthcheckFunc == nil {
options.HealthcheckFunc = func(ctx context.Context, apiKey string) (*healthcheck.Report, error) {
options.HealthcheckFunc = func(ctx context.Context, apiKey string) *healthcheck.Report {
return healthcheck.Run(ctx, &healthcheck.ReportOptions{
AccessURL: options.AccessURL,
DERPMap: options.DERPMap.Clone(),

View File

@ -107,7 +107,7 @@ type Options struct {
TrialGenerator func(context.Context, string) error
TemplateScheduleStore schedule.TemplateScheduleStore
HealthcheckFunc func(ctx context.Context, apiKey string) (*healthcheck.Report, error)
HealthcheckFunc func(ctx context.Context, apiKey string) *healthcheck.Report
HealthcheckTimeout time.Duration
HealthcheckRefresh time.Duration

View File

@ -47,11 +47,9 @@ func (api *API) debugDeploymentHealth(rw http.ResponseWriter, r *http.Request) {
ctx, cancel := context.WithTimeout(context.Background(), api.HealthcheckTimeout)
defer cancel()
report, err := api.HealthcheckFunc(ctx, apiKey)
if err == nil {
api.healthCheckCache.Store(report)
}
return report, err
report := api.HealthcheckFunc(ctx, apiKey)
api.healthCheckCache.Store(report)
return report, nil
})
select {

View File

@ -24,9 +24,9 @@ func TestDebugHealth(t *testing.T) {
ctx, cancel = context.WithTimeout(context.Background(), testutil.WaitShort)
sessionToken string
client = coderdtest.New(t, &coderdtest.Options{
HealthcheckFunc: func(_ context.Context, apiKey string) (*healthcheck.Report, error) {
HealthcheckFunc: func(_ context.Context, apiKey string) *healthcheck.Report {
assert.Equal(t, sessionToken, apiKey)
return &healthcheck.Report{}, nil
return &healthcheck.Report{}
},
})
_ = coderdtest.CreateFirstUser(t, client)
@ -48,15 +48,15 @@ func TestDebugHealth(t *testing.T) {
ctx, cancel = context.WithTimeout(context.Background(), testutil.WaitShort)
client = coderdtest.New(t, &coderdtest.Options{
HealthcheckTimeout: time.Microsecond,
HealthcheckFunc: func(context.Context, string) (*healthcheck.Report, error) {
HealthcheckFunc: func(context.Context, string) *healthcheck.Report {
t := time.NewTimer(time.Second)
defer t.Stop()
select {
case <-ctx.Done():
return nil, ctx.Err()
return &healthcheck.Report{}
case <-t.C:
return &healthcheck.Report{}, nil
return &healthcheck.Report{}
}
},
})
@ -80,11 +80,11 @@ func TestDebugHealth(t *testing.T) {
client = coderdtest.New(t, &coderdtest.Options{
HealthcheckRefresh: time.Hour,
HealthcheckTimeout: time.Hour,
HealthcheckFunc: func(context.Context, string) (*healthcheck.Report, error) {
HealthcheckFunc: func(context.Context, string) *healthcheck.Report {
calls++
return &healthcheck.Report{
Time: time.Now(),
}, nil
}
},
})
_ = coderdtest.CreateFirstUser(t, client)

View File

@ -25,7 +25,6 @@ import (
)
type DERPReport struct {
mu sync.Mutex
Healthy bool `json:"healthy"`
Regions map[int]*DERPRegionReport `json:"regions"`
@ -78,6 +77,7 @@ func (r *DERPReport) Run(ctx context.Context, opts *DERPReportOptions) {
r.Regions = map[int]*DERPRegionReport{}
wg := &sync.WaitGroup{}
mu := sync.Mutex{}
wg.Add(len(opts.DERPMap.Regions))
for _, region := range opts.DERPMap.Regions {
@ -97,19 +97,19 @@ func (r *DERPReport) Run(ctx context.Context, opts *DERPReportOptions) {
regionReport.Run(ctx)
r.mu.Lock()
mu.Lock()
r.Regions[region.RegionID] = &regionReport
if !regionReport.Healthy {
r.Healthy = false
}
r.mu.Unlock()
mu.Unlock()
}()
}
ncLogf := func(format string, args ...interface{}) {
r.mu.Lock()
mu.Lock()
r.NetcheckLogs = append(r.NetcheckLogs, fmt.Sprintf(format, args...))
r.mu.Unlock()
mu.Unlock()
}
nc := &netcheck.Client{
PortMapper: portmapper.NewClient(tslogger.WithPrefix(ncLogf, "portmap: "), nil),

View File

@ -11,11 +11,24 @@ import (
"tailscale.com/tailcfg"
)
const (
SectionDERP string = "DERP"
SectionAccessURL string = "AccessURL"
SectionWebsocket string = "Websocket"
)
type Checker interface {
DERP(ctx context.Context, opts *DERPReportOptions) DERPReport
AccessURL(ctx context.Context, opts *AccessURLOptions) AccessURLReport
Websocket(ctx context.Context, opts *WebsocketReportOptions) WebsocketReport
}
type Report struct {
// Time is the time the report was generated at.
Time time.Time `json:"time"`
// Healthy is true if the report returns no errors.
Healthy bool `json:"healthy"`
Healthy bool `json:"healthy"`
FailingSections []string `json:"failing_sections"`
DERP DERPReport `json:"derp"`
AccessURL AccessURLReport `json:"access_url"`
@ -28,12 +41,36 @@ type ReportOptions struct {
AccessURL *url.URL
Client *http.Client
APIKey string
Checker Checker
}
func Run(ctx context.Context, opts *ReportOptions) (*Report, error) {
var report Report
type defaultChecker struct{}
wg := &sync.WaitGroup{}
func (defaultChecker) DERP(ctx context.Context, opts *DERPReportOptions) (report DERPReport) {
report.Run(ctx, opts)
return report
}
func (defaultChecker) AccessURL(ctx context.Context, opts *AccessURLOptions) (report AccessURLReport) {
report.Run(ctx, opts)
return report
}
func (defaultChecker) Websocket(ctx context.Context, opts *WebsocketReportOptions) (report WebsocketReport) {
report.Run(ctx, opts)
return report
}
func Run(ctx context.Context, opts *ReportOptions) *Report {
var (
wg sync.WaitGroup
report Report
)
if opts.Checker == nil {
opts.Checker = defaultChecker{}
}
wg.Add(1)
go func() {
@ -44,7 +81,7 @@ func Run(ctx context.Context, opts *ReportOptions) (*Report, error) {
}
}()
report.DERP.Run(ctx, &DERPReportOptions{
report.DERP = opts.Checker.DERP(ctx, &DERPReportOptions{
DERPMap: opts.DERPMap,
})
}()
@ -58,7 +95,7 @@ func Run(ctx context.Context, opts *ReportOptions) (*Report, error) {
}
}()
report.AccessURL.Run(ctx, &AccessURLOptions{
report.AccessURL = opts.Checker.AccessURL(ctx, &AccessURLOptions{
AccessURL: opts.AccessURL,
Client: opts.Client,
})
@ -72,7 +109,8 @@ func Run(ctx context.Context, opts *ReportOptions) (*Report, error) {
report.Websocket.Error = xerrors.Errorf("%v", err)
}
}()
report.Websocket.Run(ctx, &WebsocketReportOptions{
report.Websocket = opts.Checker.Websocket(ctx, &WebsocketReportOptions{
APIKey: opts.APIKey,
AccessURL: opts.AccessURL,
})
@ -80,8 +118,16 @@ func Run(ctx context.Context, opts *ReportOptions) (*Report, error) {
wg.Wait()
report.Time = time.Now()
report.Healthy = report.DERP.Healthy &&
report.AccessURL.Healthy &&
report.Websocket.Healthy
return &report, nil
if !report.DERP.Healthy {
report.FailingSections = append(report.FailingSections, SectionDERP)
}
if !report.AccessURL.Healthy {
report.FailingSections = append(report.FailingSections, SectionAccessURL)
}
if !report.Websocket.Healthy {
report.FailingSections = append(report.FailingSections, SectionWebsocket)
}
report.Healthy = len(report.FailingSections) == 0
return &report
}

View File

@ -0,0 +1,120 @@
package healthcheck_test
import (
"context"
"testing"
"github.com/stretchr/testify/assert"
"github.com/coder/coder/coderd/healthcheck"
)
type testChecker struct {
DERPReport healthcheck.DERPReport
AccessURLReport healthcheck.AccessURLReport
WebsocketReport healthcheck.WebsocketReport
}
func (c *testChecker) DERP(context.Context, *healthcheck.DERPReportOptions) healthcheck.DERPReport {
return c.DERPReport
}
func (c *testChecker) AccessURL(context.Context, *healthcheck.AccessURLOptions) healthcheck.AccessURLReport {
return c.AccessURLReport
}
func (c *testChecker) Websocket(context.Context, *healthcheck.WebsocketReportOptions) healthcheck.WebsocketReport {
return c.WebsocketReport
}
func TestHealthcheck(t *testing.T) {
t.Parallel()
for _, c := range []struct {
name string
checker *testChecker
healthy bool
failingSections []string
}{{
name: "OK",
checker: &testChecker{
DERPReport: healthcheck.DERPReport{
Healthy: true,
},
AccessURLReport: healthcheck.AccessURLReport{
Healthy: true,
},
WebsocketReport: healthcheck.WebsocketReport{
Healthy: true,
},
},
healthy: true,
failingSections: nil,
}, {
name: "DERPFail",
checker: &testChecker{
DERPReport: healthcheck.DERPReport{
Healthy: false,
},
AccessURLReport: healthcheck.AccessURLReport{
Healthy: true,
},
WebsocketReport: healthcheck.WebsocketReport{
Healthy: true,
},
},
healthy: false,
failingSections: []string{healthcheck.SectionDERP},
}, {
name: "AccessURLFail",
checker: &testChecker{
DERPReport: healthcheck.DERPReport{
Healthy: true,
},
AccessURLReport: healthcheck.AccessURLReport{
Healthy: false,
},
WebsocketReport: healthcheck.WebsocketReport{
Healthy: true,
},
},
healthy: false,
failingSections: []string{healthcheck.SectionAccessURL},
}, {
name: "WebsocketFail",
checker: &testChecker{
DERPReport: healthcheck.DERPReport{
Healthy: true,
},
AccessURLReport: healthcheck.AccessURLReport{
Healthy: true,
},
WebsocketReport: healthcheck.WebsocketReport{
Healthy: false,
},
},
healthy: false,
failingSections: []string{healthcheck.SectionWebsocket},
}, {
name: "AllFail",
checker: &testChecker{},
healthy: false,
failingSections: []string{healthcheck.SectionDERP, healthcheck.SectionAccessURL, healthcheck.SectionWebsocket},
}} {
c := c
t.Run(c.name, func(t *testing.T) {
t.Parallel()
report := healthcheck.Run(context.Background(), &healthcheck.ReportOptions{
Checker: c.checker,
})
assert.Equal(t, c.healthy, report.Healthy)
assert.Equal(t, c.failingSections, report.FailingSections)
assert.Equal(t, c.checker.DERPReport.Healthy, report.DERP.Healthy)
assert.Equal(t, c.checker.AccessURLReport.Healthy, report.AccessURL.Healthy)
assert.Equal(t, c.checker.WebsocketReport.Healthy, report.Websocket.Healthy)
assert.NotZero(t, report.Time)
})
}
}

View File

@ -206,6 +206,7 @@ curl -X GET http://coder-server:8080/api/v2/debug/health \
}
}
},
"failing_sections": ["string"],
"healthy": true,
"time": "string",
"websocket": {

View File

@ -6376,6 +6376,7 @@ Parameter represents a set value for the scope.
}
}
},
"failing_sections": ["string"],
"healthy": true,
"time": "string",
"websocket": {
@ -6391,13 +6392,14 @@ Parameter represents a set value for the scope.
### Properties
| Name | Type | Required | Restrictions | Description |
| ------------ | ---------------------------------------------------------- | -------- | ------------ | ------------------------------------------------ |
| `access_url` | [healthcheck.AccessURLReport](#healthcheckaccessurlreport) | false | | |
| `derp` | [healthcheck.DERPReport](#healthcheckderpreport) | false | | |
| `healthy` | boolean | false | | Healthy is true if the report returns no errors. |
| `time` | string | false | | Time is the time the report was generated at. |
| `websocket` | [healthcheck.WebsocketReport](#healthcheckwebsocketreport) | false | | |
| Name | Type | Required | Restrictions | Description |
| ------------------ | ---------------------------------------------------------- | -------- | ------------ | ------------------------------------------------ |
| `access_url` | [healthcheck.AccessURLReport](#healthcheckaccessurlreport) | false | | |
| `derp` | [healthcheck.DERPReport](#healthcheckderpreport) | false | | |
| `failing_sections` | array of string | false | | |
| `healthy` | boolean | false | | Healthy is true if the report returns no errors. |
| `time` | string | false | | Time is the time the report was generated at. |
| `websocket` | [healthcheck.WebsocketReport](#healthcheckwebsocketreport) | false | | |
## healthcheck.WebsocketReport