mirror of https://github.com/coder/coder.git
fix: routinely ping agent websocket to ensure liveness (#5824)
This commit is contained in:
parent
ba8dd496c3
commit
d2ae16dd22
|
@ -430,6 +430,9 @@ func (a *agent) createTailnet(ctx context.Context, derpMap *tailcfg.DERPMap) (_
|
|||
// runCoordinator runs a coordinator and returns whether a reconnect
|
||||
// should occur.
|
||||
func (a *agent) runCoordinator(ctx context.Context, network *tailnet.Conn) error {
|
||||
ctx, cancel := context.WithCancel(ctx)
|
||||
defer cancel()
|
||||
|
||||
coordinator, err := a.client.ListenWorkspaceAgent(ctx)
|
||||
if err != nil {
|
||||
return err
|
||||
|
|
|
@ -83,6 +83,7 @@ func workspaceAgent() *cobra.Command {
|
|||
slog.F("version", version),
|
||||
)
|
||||
client := codersdk.New(coderURL)
|
||||
client.Logger = logger
|
||||
// Set a reasonable timeout so requests can't hang forever!
|
||||
client.HTTPClient.Timeout = 10 * time.Second
|
||||
|
||||
|
|
|
@ -340,6 +340,42 @@ func (c *Client) ListenWorkspaceAgent(ctx context.Context) (net.Conn, error) {
|
|||
return nil, readBodyAsError(res)
|
||||
}
|
||||
|
||||
// Ping once every 30 seconds to ensure that the websocket is alive. If we
|
||||
// don't get a response within 30s we kill the websocket and reconnect.
|
||||
// See: https://github.com/coder/coder/pull/5824
|
||||
go func() {
|
||||
tick := 30 * time.Second
|
||||
ticker := time.NewTicker(tick)
|
||||
defer ticker.Stop()
|
||||
defer func() {
|
||||
c.Logger.Debug(ctx, "coordinate pinger exited")
|
||||
}()
|
||||
for {
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
return
|
||||
case start := <-ticker.C:
|
||||
ctx, cancel := context.WithTimeout(ctx, tick)
|
||||
|
||||
err := conn.Ping(ctx)
|
||||
if err != nil {
|
||||
c.Logger.Error(ctx, "workspace agent coordinate ping", slog.Error(err))
|
||||
|
||||
err := conn.Close(websocket.StatusGoingAway, "Ping failed")
|
||||
if err != nil {
|
||||
c.Logger.Error(ctx, "close workspace agent coordinate websocket", slog.Error(err))
|
||||
}
|
||||
|
||||
cancel()
|
||||
return
|
||||
}
|
||||
|
||||
c.Logger.Debug(ctx, "got coordinate pong", slog.F("took", time.Since(start)))
|
||||
cancel()
|
||||
}
|
||||
}
|
||||
}()
|
||||
|
||||
return websocket.NetConn(ctx, conn, websocket.MessageBinary), nil
|
||||
}
|
||||
|
||||
|
|
|
@ -12,9 +12,6 @@ import (
|
|||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/coder/coder/provisionerd/runner"
|
||||
"github.com/coder/coder/testutil"
|
||||
|
||||
"github.com/hashicorp/yamux"
|
||||
"github.com/stretchr/testify/assert"
|
||||
"github.com/stretchr/testify/require"
|
||||
|
@ -26,11 +23,12 @@ import (
|
|||
|
||||
"cdr.dev/slog"
|
||||
"cdr.dev/slog/sloggers/slogtest"
|
||||
|
||||
"github.com/coder/coder/provisionerd"
|
||||
"github.com/coder/coder/provisionerd/proto"
|
||||
"github.com/coder/coder/provisionerd/runner"
|
||||
"github.com/coder/coder/provisionersdk"
|
||||
sdkproto "github.com/coder/coder/provisionersdk/proto"
|
||||
"github.com/coder/coder/testutil"
|
||||
)
|
||||
|
||||
func TestMain(m *testing.M) {
|
||||
|
|
Loading…
Reference in New Issue