fix(healthcheck): don't allow panics to exit coderd (#7276)

This commit is contained in:
Colin Adler 2023-04-25 10:11:45 -05:00 committed by GitHub
parent a98341612c
commit b62b6af0eb
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
9 changed files with 88 additions and 24 deletions

5
coderd/apidoc/docs.go generated
View File

@ -10030,7 +10030,7 @@ const docTemplate = `{
"healthcheck.AccessURLReport": {
"type": "object",
"properties": {
"err": {},
"error": {},
"healthy": {
"type": "boolean"
},
@ -10067,6 +10067,7 @@ const docTemplate = `{
}
}
},
"error": {},
"healthy": {
"type": "boolean"
},
@ -10090,6 +10091,7 @@ const docTemplate = `{
"healthcheck.DERPRegionReport": {
"type": "object",
"properties": {
"error": {},
"healthy": {
"type": "boolean"
},
@ -10107,6 +10109,7 @@ const docTemplate = `{
"healthcheck.DERPReport": {
"type": "object",
"properties": {
"error": {},
"healthy": {
"type": "boolean"
},

View File

@ -9065,7 +9065,7 @@
"healthcheck.AccessURLReport": {
"type": "object",
"properties": {
"err": {},
"error": {},
"healthy": {
"type": "boolean"
},
@ -9102,6 +9102,7 @@
}
}
},
"error": {},
"healthy": {
"type": "boolean"
},
@ -9125,6 +9126,7 @@
"healthcheck.DERPRegionReport": {
"type": "object",
"properties": {
"error": {},
"healthy": {
"type": "boolean"
},
@ -9142,6 +9144,7 @@
"healthcheck.DERPReport": {
"type": "object",
"properties": {
"error": {},
"healthy": {
"type": "boolean"
},

View File

@ -250,7 +250,8 @@ func New(options *Options) *API {
if options.HealthcheckFunc == nil {
options.HealthcheckFunc = func(ctx context.Context) (*healthcheck.Report, error) {
return healthcheck.Run(ctx, &healthcheck.ReportOptions{
DERPMap: options.DERPMap.Clone(),
AccessURL: options.AccessURL,
DERPMap: options.DERPMap.Clone(),
})
}
}

View File

@ -15,7 +15,7 @@ type AccessURLReport struct {
Reachable bool
StatusCode int
HealthzResponse string
Err error
Error error
}
type AccessURLOptions struct {
@ -27,32 +27,37 @@ func (r *AccessURLReport) Run(ctx context.Context, opts *AccessURLOptions) {
ctx, cancel := context.WithTimeout(ctx, 5*time.Second)
defer cancel()
if opts.AccessURL == nil {
r.Error = xerrors.New("access URL is nil")
return
}
if opts.Client == nil {
opts.Client = http.DefaultClient
}
accessURL, err := opts.AccessURL.Parse("/healthz")
if err != nil {
r.Err = xerrors.Errorf("parse healthz endpoint: %w", err)
r.Error = xerrors.Errorf("parse healthz endpoint: %w", err)
return
}
req, err := http.NewRequestWithContext(ctx, "GET", accessURL.String(), nil)
if err != nil {
r.Err = xerrors.Errorf("create healthz request: %w", err)
r.Error = xerrors.Errorf("create healthz request: %w", err)
return
}
res, err := opts.Client.Do(req)
if err != nil {
r.Err = xerrors.Errorf("get healthz endpoint: %w", err)
r.Error = xerrors.Errorf("get healthz endpoint: %w", err)
return
}
defer res.Body.Close()
body, err := io.ReadAll(res.Body)
if err != nil {
r.Err = xerrors.Errorf("read healthz response: %w", err)
r.Error = xerrors.Errorf("read healthz response: %w", err)
return
}

View File

@ -36,7 +36,7 @@ func TestAccessURL(t *testing.T) {
assert.True(t, report.Reachable)
assert.Equal(t, http.StatusOK, report.StatusCode)
assert.Equal(t, "OK", report.HealthzResponse)
assert.NoError(t, report.Err)
assert.NoError(t, report.Error)
})
t.Run("404", func(t *testing.T) {
@ -66,7 +66,7 @@ func TestAccessURL(t *testing.T) {
assert.True(t, report.Reachable)
assert.Equal(t, http.StatusNotFound, report.StatusCode)
assert.Equal(t, string(resp), report.HealthzResponse)
assert.NoError(t, report.Err)
assert.NoError(t, report.Error)
})
t.Run("ClientErr", func(t *testing.T) {
@ -102,7 +102,7 @@ func TestAccessURL(t *testing.T) {
assert.False(t, report.Reachable)
assert.Equal(t, 0, report.StatusCode)
assert.Equal(t, "", report.HealthzResponse)
assert.ErrorIs(t, report.Err, expErr)
assert.ErrorIs(t, report.Error, expErr)
})
}

View File

@ -33,6 +33,8 @@ type DERPReport struct {
Netcheck *netcheck.Report `json:"netcheck"`
NetcheckErr error `json:"netcheck_err"`
NetcheckLogs []string `json:"netcheck_logs"`
Error error `json:"error"`
}
type DERPRegionReport struct {
@ -41,6 +43,7 @@ type DERPRegionReport struct {
Region *tailcfg.DERPRegion `json:"region"`
NodeReports []*DERPNodeReport `json:"node_reports"`
Error error `json:"error"`
}
type DERPNodeReport struct {
mu sync.Mutex
@ -55,6 +58,7 @@ type DERPNodeReport struct {
UsesWebsocket bool `json:"uses_websocket"`
ClientLogs [][]string `json:"client_logs"`
ClientErrs [][]error `json:"client_errs"`
Error error `json:"error"`
STUN DERPStunReport `json:"stun"`
}
@ -77,12 +81,19 @@ func (r *DERPReport) Run(ctx context.Context, opts *DERPReportOptions) {
wg.Add(len(opts.DERPMap.Regions))
for _, region := range opts.DERPMap.Regions {
region := region
go func() {
defer wg.Done()
regionReport := DERPRegionReport{
var (
region = region
regionReport = DERPRegionReport{
Region: region,
}
)
go func() {
defer wg.Done()
defer func() {
if err := recover(); err != nil {
regionReport.Error = xerrors.Errorf("%v", err)
}
}()
regionReport.Run(ctx)
@ -117,14 +128,21 @@ func (r *DERPRegionReport) Run(ctx context.Context) {
wg.Add(len(r.Region.Nodes))
for _, node := range r.Region.Nodes {
node := node
go func() {
defer wg.Done()
nodeReport := DERPNodeReport{
var (
node = node
nodeReport = DERPNodeReport{
Node: node,
Healthy: true,
}
)
go func() {
defer wg.Done()
defer func() {
if err := recover(); err != nil {
nodeReport.Error = xerrors.Errorf("%v", err)
}
}()
nodeReport.Run(ctx)

View File

@ -7,6 +7,7 @@ import (
"sync"
"time"
"golang.org/x/xerrors"
"tailscale.com/tailcfg"
)
@ -38,6 +39,12 @@ func Run(ctx context.Context, opts *ReportOptions) (*Report, error) {
wg.Add(1)
go func() {
defer wg.Done()
defer func() {
if err := recover(); err != nil {
report.DERP.Error = xerrors.Errorf("%v", err)
}
}()
report.DERP.Run(ctx, &DERPReportOptions{
DERPMap: opts.DERPMap,
})
@ -46,6 +53,12 @@ func Run(ctx context.Context, opts *ReportOptions) (*Report, error) {
wg.Add(1)
go func() {
defer wg.Done()
defer func() {
if err := recover(); err != nil {
report.AccessURL.Error = xerrors.Errorf("%v", err)
}
}()
report.AccessURL.Run(ctx, &AccessURLOptions{
AccessURL: opts.AccessURL,
Client: opts.Client,

View File

@ -40,13 +40,14 @@ curl -X GET http://coder-server:8080/api/v2/debug/health \
```json
{
"access_url": {
"err": null,
"error": null,
"healthy": true,
"healthzResponse": "string",
"reachable": true,
"statusCode": 0
},
"derp": {
"error": null,
"healthy": true,
"netcheck": {
"captivePortal": "string",
@ -82,12 +83,14 @@ curl -X GET http://coder-server:8080/api/v2/debug/health \
"netcheck_logs": ["string"],
"regions": {
"property1": {
"error": null,
"healthy": true,
"node_reports": [
{
"can_exchange_messages": true,
"client_errs": [[null]],
"client_logs": [["string"]],
"error": null,
"healthy": true,
"node": {
"certName": "string",
@ -141,12 +144,14 @@ curl -X GET http://coder-server:8080/api/v2/debug/health \
}
},
"property2": {
"error": null,
"healthy": true,
"node_reports": [
{
"can_exchange_messages": true,
"client_errs": [[null]],
"client_logs": [["string"]],
"error": null,
"healthy": true,
"node": {
"certName": "string",

View File

@ -5724,7 +5724,7 @@ Parameter represents a set value for the scope.
```json
{
"err": null,
"error": null,
"healthy": true,
"healthzResponse": "string",
"reachable": true,
@ -5736,7 +5736,7 @@ Parameter represents a set value for the scope.
| Name | Type | Required | Restrictions | Description |
| ----------------- | ------- | -------- | ------------ | ----------- |
| `err` | any | false | | |
| `error` | any | false | | |
| `healthy` | boolean | false | | |
| `healthzResponse` | string | false | | |
| `reachable` | boolean | false | | |
@ -5749,6 +5749,7 @@ Parameter represents a set value for the scope.
"can_exchange_messages": true,
"client_errs": [[null]],
"client_logs": [["string"]],
"error": null,
"healthy": true,
"node": {
"certName": "string",
@ -5785,6 +5786,7 @@ Parameter represents a set value for the scope.
| `can_exchange_messages` | boolean | false | | |
| `client_errs` | array of array | false | | |
| `client_logs` | array of array | false | | |
| `error` | any | false | | |
| `healthy` | boolean | false | | |
| `node` | [tailcfg.DERPNode](#tailcfgderpnode) | false | | |
| `node_info` | [derp.ServerInfoMessage](#derpserverinfomessage) | false | | |
@ -5796,12 +5798,14 @@ Parameter represents a set value for the scope.
```json
{
"error": null,
"healthy": true,
"node_reports": [
{
"can_exchange_messages": true,
"client_errs": [[null]],
"client_logs": [["string"]],
"error": null,
"healthy": true,
"node": {
"certName": "string",
@ -5860,6 +5864,7 @@ Parameter represents a set value for the scope.
| Name | Type | Required | Restrictions | Description |
| -------------- | ----------------------------------------------------------------- | -------- | ------------ | ----------- |
| `error` | any | false | | |
| `healthy` | boolean | false | | |
| `node_reports` | array of [healthcheck.DERPNodeReport](#healthcheckderpnodereport) | false | | |
| `region` | [tailcfg.DERPRegion](#tailcfgderpregion) | false | | |
@ -5868,6 +5873,7 @@ Parameter represents a set value for the scope.
```json
{
"error": null,
"healthy": true,
"netcheck": {
"captivePortal": "string",
@ -5903,12 +5909,14 @@ Parameter represents a set value for the scope.
"netcheck_logs": ["string"],
"regions": {
"property1": {
"error": null,
"healthy": true,
"node_reports": [
{
"can_exchange_messages": true,
"client_errs": [[null]],
"client_logs": [["string"]],
"error": null,
"healthy": true,
"node": {
"certName": "string",
@ -5962,12 +5970,14 @@ Parameter represents a set value for the scope.
}
},
"property2": {
"error": null,
"healthy": true,
"node_reports": [
{
"can_exchange_messages": true,
"client_errs": [[null]],
"client_logs": [["string"]],
"error": null,
"healthy": true,
"node": {
"certName": "string",
@ -6028,6 +6038,7 @@ Parameter represents a set value for the scope.
| Name | Type | Required | Restrictions | Description |
| ------------------ | ------------------------------------------------------------ | -------- | ------------ | ----------- |
| `error` | any | false | | |
| `healthy` | boolean | false | | |
| `netcheck` | [netcheck.Report](#netcheckreport) | false | | |
| `netcheck_err` | any | false | | |
@ -6058,13 +6069,14 @@ Parameter represents a set value for the scope.
```json
{
"access_url": {
"err": null,
"error": null,
"healthy": true,
"healthzResponse": "string",
"reachable": true,
"statusCode": 0
},
"derp": {
"error": null,
"healthy": true,
"netcheck": {
"captivePortal": "string",
@ -6100,12 +6112,14 @@ Parameter represents a set value for the scope.
"netcheck_logs": ["string"],
"regions": {
"property1": {
"error": null,
"healthy": true,
"node_reports": [
{
"can_exchange_messages": true,
"client_errs": [[null]],
"client_logs": [["string"]],
"error": null,
"healthy": true,
"node": {
"certName": "string",
@ -6159,12 +6173,14 @@ Parameter represents a set value for the scope.
}
},
"property2": {
"error": null,
"healthy": true,
"node_reports": [
{
"can_exchange_messages": true,
"client_errs": [[null]],
"client_logs": [["string"]],
"error": null,
"healthy": true,
"node": {
"certName": "string",