mirror of https://github.com/coder/coder.git
fix: use TSMP for pings and checking reachability (#11306)
We're seeing some flaky tests related to agent connectivity - https://github.com/coder/coder/actions/runs/7286675441/job/19856270998 I'm pretty sure what happened in this one is that the client opened a connection while the wgengine was in the process of reconfiguring the wireguard device, so the fact that the peer became "active" as a result of traffic being sent was not noticed. The test calls `AwaitReachable()` but this only tests the disco layer, so it doesn't wait for wireguard to come up. I think we should be using TSMP for pinging and reachability, since this operates at the IP layer, and therefore requires that wireguard comes up before being successful. This should also help with the problems we have seen where a TCP connection starts before wireguard is up and the initial round trip has to wait for the 5 second wireguard handshake retry. fixes: #11294
This commit is contained in:
parent
58e40f6cd6
commit
520c3a8ff7
|
@ -174,10 +174,10 @@ func TestAgent_Stats_Magic(t *testing.T) {
|
|||
require.NoError(t, err)
|
||||
err = session.Shell()
|
||||
require.NoError(t, err)
|
||||
var s *agentsdk.Stats
|
||||
require.Eventuallyf(t, func() bool {
|
||||
var ok bool
|
||||
s, ok = <-stats
|
||||
s, ok := <-stats
|
||||
t.Logf("got stats: ok=%t, ConnectionCount=%d, RxBytes=%d, TxBytes=%d, SessionCountVSCode=%d, ConnectionMedianLatencyMS=%f",
|
||||
ok, s.ConnectionCount, s.RxBytes, s.TxBytes, s.SessionCountVSCode, s.ConnectionMedianLatencyMS)
|
||||
return ok && s.ConnectionCount > 0 && s.RxBytes > 0 && s.TxBytes > 0 &&
|
||||
// Ensure that the connection didn't count as a "normal" SSH session.
|
||||
// This was a special one, so it should be labeled specially in the stats!
|
||||
|
@ -186,7 +186,7 @@ func TestAgent_Stats_Magic(t *testing.T) {
|
|||
// If it isn't, it's set to -1.
|
||||
s.ConnectionMedianLatencyMS >= 0
|
||||
}, testutil.WaitLong, testutil.IntervalFast,
|
||||
"never saw stats: %+v", s,
|
||||
"never saw stats",
|
||||
)
|
||||
// The shell will automatically exit if there is no stdin!
|
||||
_ = stdin.Close()
|
||||
|
@ -240,14 +240,14 @@ func TestAgent_Stats_Magic(t *testing.T) {
|
|||
_ = tunneledConn.Close()
|
||||
})
|
||||
|
||||
var s *agentsdk.Stats
|
||||
require.Eventuallyf(t, func() bool {
|
||||
var ok bool
|
||||
s, ok = <-stats
|
||||
s, ok := <-stats
|
||||
t.Logf("got stats with conn open: ok=%t, ConnectionCount=%d, SessionCountJetBrains=%d",
|
||||
ok, s.ConnectionCount, s.SessionCountJetBrains)
|
||||
return ok && s.ConnectionCount > 0 &&
|
||||
s.SessionCountJetBrains == 1
|
||||
}, testutil.WaitLong, testutil.IntervalFast,
|
||||
"never saw stats with conn open: %+v", s,
|
||||
"never saw stats with conn open",
|
||||
)
|
||||
|
||||
// Kill the server and connection after checking for the echo.
|
||||
|
@ -256,12 +256,13 @@ func TestAgent_Stats_Magic(t *testing.T) {
|
|||
_ = tunneledConn.Close()
|
||||
|
||||
require.Eventuallyf(t, func() bool {
|
||||
var ok bool
|
||||
s, ok = <-stats
|
||||
return ok && s.ConnectionCount == 0 &&
|
||||
s, ok := <-stats
|
||||
t.Logf("got stats after disconnect %t, %d",
|
||||
ok, s.SessionCountJetBrains)
|
||||
return ok &&
|
||||
s.SessionCountJetBrains == 0
|
||||
}, testutil.WaitLong, testutil.IntervalFast,
|
||||
"never saw stats after conn closes: %+v", s,
|
||||
"never saw stats after conn closes",
|
||||
)
|
||||
})
|
||||
}
|
||||
|
|
|
@ -670,12 +670,12 @@ func (c *Conn) Status() *ipnstate.Status {
|
|||
return sb.Status()
|
||||
}
|
||||
|
||||
// Ping sends a Disco ping to the Wireguard engine.
|
||||
// Ping sends a ping to the Wireguard engine.
|
||||
// The bool returned is true if the ping was performed P2P.
|
||||
func (c *Conn) Ping(ctx context.Context, ip netip.Addr) (time.Duration, bool, *ipnstate.PingResult, error) {
|
||||
errCh := make(chan error, 1)
|
||||
prChan := make(chan *ipnstate.PingResult, 1)
|
||||
go c.wireguardEngine.Ping(ip, tailcfg.PingDisco, func(pr *ipnstate.PingResult) {
|
||||
go c.wireguardEngine.Ping(ip, tailcfg.PingTSMP, func(pr *ipnstate.PingResult) {
|
||||
if pr.Err != "" {
|
||||
errCh <- xerrors.New(pr.Err)
|
||||
return
|
||||
|
|
Loading…
Reference in New Issue