diff --git a/cli/cliui/agent.go b/cli/cliui/agent.go index ab2217095e..ee8285dcf7 100644 --- a/cli/cliui/agent.go +++ b/cli/cliui/agent.go @@ -2,13 +2,17 @@ package cliui import ( "context" + "fmt" "io" + "strconv" + "strings" "time" "github.com/google/uuid" "golang.org/x/xerrors" "github.com/coder/coder/v2/codersdk" + "github.com/coder/coder/v2/tailnet" ) var errAgentShuttingDown = xerrors.New("agent is shutting down") @@ -281,3 +285,55 @@ type closeFunc func() error func (c closeFunc) Close() error { return c() } + +func PeerDiagnostics(w io.Writer, d tailnet.PeerDiagnostics) { + if d.PreferredDERP > 0 { + rn, ok := d.DERPRegionNames[d.PreferredDERP] + if !ok { + rn = "unknown" + } + _, _ = fmt.Fprintf(w, "✔ preferred DERP region: %d (%s)\n", d.PreferredDERP, rn) + } else { + _, _ = fmt.Fprint(w, "✘ not connected to DERP\n") + } + if d.SentNode { + _, _ = fmt.Fprint(w, "✔ sent local data to Coder networking coodinator\n") + } else { + _, _ = fmt.Fprint(w, "✘ have not sent local data to Coder networking coordinator\n") + } + if d.ReceivedNode != nil { + dp := d.ReceivedNode.DERP + dn := "" + // should be 127.3.3.40:N where N is the DERP region + ap := strings.Split(dp, ":") + if len(ap) == 2 { + dp = ap[1] + di, err := strconv.Atoi(dp) + if err == nil { + var ok bool + dn, ok = d.DERPRegionNames[di] + if ok { + dn = fmt.Sprintf("(%s)", dn) + } else { + dn = "(unknown)" + } + } + } + _, _ = fmt.Fprintf(w, + "✔ received remote agent data from Coder networking coordinator\n preferred DERP region: %s %s\n endpoints: %s\n", + dp, dn, strings.Join(d.ReceivedNode.Endpoints, ", ")) + } else { + _, _ = fmt.Fprint(w, "✘ have not received remote agent data from Coder networking coordinator\n") + } + if !d.LastWireguardHandshake.IsZero() { + ago := time.Since(d.LastWireguardHandshake) + symbol := "✔" + // wireguard is supposed to refresh handshake on 5 minute intervals + if ago > 5*time.Minute { + symbol = "⚠" + } + _, _ = fmt.Fprintf(w, "%s Wireguard handshake %s ago\n", symbol, ago.Round(time.Second)) + } else { + _, _ = fmt.Fprint(w, "✘ Wireguard is not connected\n") + } +} diff --git a/cli/cliui/agent_test.go b/cli/cliui/agent_test.go index 4cb10d8ec0..2cd592b2c8 100644 --- a/cli/cliui/agent_test.go +++ b/cli/cliui/agent_test.go @@ -6,6 +6,7 @@ import ( "context" "io" "os" + "regexp" "strings" "sync/atomic" "testing" @@ -15,12 +16,14 @@ import ( "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" "golang.org/x/xerrors" + "tailscale.com/tailcfg" "github.com/coder/coder/v2/cli/clibase" "github.com/coder/coder/v2/cli/clitest" "github.com/coder/coder/v2/cli/cliui" "github.com/coder/coder/v2/coderd/util/ptr" "github.com/coder/coder/v2/codersdk" + "github.com/coder/coder/v2/tailnet" "github.com/coder/coder/v2/testutil" ) @@ -476,3 +479,191 @@ func TestAgent(t *testing.T) { require.NoError(t, cmd.Invoke().Run()) }) } + +func TestPeerDiagnostics(t *testing.T) { + t.Parallel() + testCases := []struct { + name string + diags tailnet.PeerDiagnostics + want []*regexp.Regexp // must be ordered, can omit lines + }{ + { + name: "noPreferredDERP", + diags: tailnet.PeerDiagnostics{ + PreferredDERP: 0, + DERPRegionNames: make(map[int]string), + SentNode: true, + ReceivedNode: &tailcfg.Node{DERP: "127.3.3.40:999"}, + LastWireguardHandshake: time.Now(), + }, + want: []*regexp.Regexp{ + regexp.MustCompile("^✘ not connected to DERP$"), + }, + }, + { + name: "preferredDERP", + diags: tailnet.PeerDiagnostics{ + PreferredDERP: 23, + DERPRegionNames: map[int]string{ + 23: "testo", + }, + SentNode: true, + ReceivedNode: &tailcfg.Node{DERP: "127.3.3.40:999"}, + LastWireguardHandshake: time.Now(), + }, + want: []*regexp.Regexp{ + regexp.MustCompile(`^✔ preferred DERP region: 23 \(testo\)$`), + }, + }, + { + name: "sentNode", + diags: tailnet.PeerDiagnostics{ + PreferredDERP: 0, + DERPRegionNames: map[int]string{}, + SentNode: true, + ReceivedNode: &tailcfg.Node{DERP: "127.3.3.40:999"}, + LastWireguardHandshake: time.Time{}, + }, + want: []*regexp.Regexp{ + regexp.MustCompile(`^✔ sent local data to Coder networking coodinator$`), + }, + }, + { + name: "didntSendNode", + diags: tailnet.PeerDiagnostics{ + PreferredDERP: 0, + DERPRegionNames: map[int]string{}, + SentNode: false, + ReceivedNode: &tailcfg.Node{DERP: "127.3.3.40:999"}, + LastWireguardHandshake: time.Time{}, + }, + want: []*regexp.Regexp{ + regexp.MustCompile(`^✘ have not sent local data to Coder networking coordinator$`), + }, + }, + { + name: "receivedNodeDERPOKNoEndpoints", + diags: tailnet.PeerDiagnostics{ + PreferredDERP: 0, + DERPRegionNames: map[int]string{999: "Embedded"}, + SentNode: true, + ReceivedNode: &tailcfg.Node{DERP: "127.3.3.40:999"}, + LastWireguardHandshake: time.Time{}, + }, + want: []*regexp.Regexp{ + regexp.MustCompile(`^✔ received remote agent data from Coder networking coordinator$`), + regexp.MustCompile(`preferred DERP region: 999 \(Embedded\)$`), + regexp.MustCompile(`endpoints: $`), + }, + }, + { + name: "receivedNodeDERPUnknownNoEndpoints", + diags: tailnet.PeerDiagnostics{ + PreferredDERP: 0, + DERPRegionNames: map[int]string{}, + SentNode: true, + ReceivedNode: &tailcfg.Node{DERP: "127.3.3.40:999"}, + LastWireguardHandshake: time.Time{}, + }, + want: []*regexp.Regexp{ + regexp.MustCompile(`^✔ received remote agent data from Coder networking coordinator$`), + regexp.MustCompile(`preferred DERP region: 999 \(unknown\)$`), + regexp.MustCompile(`endpoints: $`), + }, + }, + { + name: "receivedNodeEndpointsNoDERP", + diags: tailnet.PeerDiagnostics{ + PreferredDERP: 0, + DERPRegionNames: map[int]string{999: "Embedded"}, + SentNode: true, + ReceivedNode: &tailcfg.Node{Endpoints: []string{"99.88.77.66:4555", "33.22.11.0:3444"}}, + LastWireguardHandshake: time.Time{}, + }, + want: []*regexp.Regexp{ + regexp.MustCompile(`^✔ received remote agent data from Coder networking coordinator$`), + regexp.MustCompile(`preferred DERP region:\s*$`), + regexp.MustCompile(`endpoints: 99\.88\.77\.66:4555, 33\.22\.11\.0:3444$`), + }, + }, + { + name: "didntReceiveNode", + diags: tailnet.PeerDiagnostics{ + PreferredDERP: 0, + DERPRegionNames: map[int]string{}, + SentNode: false, + ReceivedNode: nil, + LastWireguardHandshake: time.Time{}, + }, + want: []*regexp.Regexp{ + regexp.MustCompile(`^✘ have not received remote agent data from Coder networking coordinator$`), + }, + }, + { + name: "noWireguardHandshake", + diags: tailnet.PeerDiagnostics{ + PreferredDERP: 0, + DERPRegionNames: map[int]string{}, + SentNode: false, + ReceivedNode: nil, + LastWireguardHandshake: time.Time{}, + }, + want: []*regexp.Regexp{ + regexp.MustCompile(`^✘ Wireguard is not connected$`), + }, + }, + { + name: "wireguardHandshakeRecent", + diags: tailnet.PeerDiagnostics{ + PreferredDERP: 0, + DERPRegionNames: map[int]string{}, + SentNode: false, + ReceivedNode: nil, + LastWireguardHandshake: time.Now().Add(-5 * time.Second), + }, + want: []*regexp.Regexp{ + regexp.MustCompile(`^✔ Wireguard handshake \d+s ago$`), + }, + }, + { + name: "wireguardHandshakeOld", + diags: tailnet.PeerDiagnostics{ + PreferredDERP: 0, + DERPRegionNames: map[int]string{}, + SentNode: false, + ReceivedNode: nil, + LastWireguardHandshake: time.Now().Add(-450 * time.Second), // 7m30s + }, + want: []*regexp.Regexp{ + regexp.MustCompile(`^⚠ Wireguard handshake 7m\d+s ago$`), + }, + }, + } + for _, tc := range testCases { + tc := tc + t.Run(tc.name, func(t *testing.T) { + t.Parallel() + r, w := io.Pipe() + go func() { + defer w.Close() + cliui.PeerDiagnostics(w, tc.diags) + }() + s := bufio.NewScanner(r) + i := 0 + got := make([]string, 0) + for s.Scan() { + got = append(got, s.Text()) + if i < len(tc.want) { + reg := tc.want[i] + if reg.Match(s.Bytes()) { + i++ + } + } + } + if i < len(tc.want) { + t.Logf("failed to match regexp: %s\ngot:\n%s", tc.want[i].String(), strings.Join(got, "\n")) + t.FailNow() + } + }) + } +} diff --git a/cli/ping.go b/cli/ping.go index fa0d0472a0..da8a53a782 100644 --- a/cli/ping.go +++ b/cli/ping.go @@ -135,6 +135,8 @@ func (r *RootCmd) ping() *clibase.Cmd { ) if n == int(pingNum) { + diags := conn.GetPeerDiagnostics() + cliui.PeerDiagnostics(inv.Stdout, diags) return nil } } diff --git a/cli/ping_test.go b/cli/ping_test.go index 55787bb39b..7a4dcee5e1 100644 --- a/cli/ping_test.go +++ b/cli/ping_test.go @@ -42,4 +42,32 @@ func TestPing(t *testing.T) { cancel() <-cmdDone }) + + t.Run("1Ping", func(t *testing.T) { + t.Parallel() + + client, workspace, agentToken := setupWorkspaceForAgent(t) + inv, root := clitest.New(t, "ping", "-n", "1", workspace.Name) + clitest.SetupConfig(t, client, root) + pty := ptytest.New(t) + inv.Stdin = pty.Input() + inv.Stderr = pty.Output() + inv.Stdout = pty.Output() + + _ = agenttest.New(t, client.URL, agentToken) + _ = coderdtest.AwaitWorkspaceAgents(t, client, workspace.ID) + + ctx, cancel := context.WithTimeout(context.Background(), testutil.WaitLong) + defer cancel() + + cmdDone := tGo(t, func() { + err := inv.WithContext(ctx).Run() + assert.NoError(t, err) + }) + + pty.ExpectMatch("pong from " + workspace.Name) + pty.ExpectMatch("✔ received remote agent data from Coder networking coordinator") + cancel() + <-cmdDone + }) } diff --git a/cli/ssh.go b/cli/ssh.go index bdc5d98b3c..990b972309 100644 --- a/cli/ssh.go +++ b/cli/ssh.go @@ -563,7 +563,7 @@ func getWorkspaceAndAgent(ctx context.Context, inv *clibase.Invocation, client * if workspace.LatestBuild.Transition != codersdk.WorkspaceTransitionStart { if !autostart { - return codersdk.Workspace{}, codersdk.WorkspaceAgent{}, xerrors.New("workspace must be in start transition to ssh") + return codersdk.Workspace{}, codersdk.WorkspaceAgent{}, xerrors.New("workspace must be started") } // Autostart the workspace for the user. // For some failure modes, return a better message. @@ -579,7 +579,7 @@ func getWorkspaceAndAgent(ctx context.Context, inv *clibase.Invocation, client * // It cannot be in any pending or failed state. if workspace.LatestBuild.Status != codersdk.WorkspaceStatusStopped { return codersdk.Workspace{}, codersdk.WorkspaceAgent{}, - xerrors.Errorf("workspace must be in start transition to ssh, was unable to autostart as the last build job is %q, expected %q", + xerrors.Errorf("workspace must be started; was unable to autostart as the last build job is %q, expected %q", workspace.LatestBuild.Status, codersdk.WorkspaceStatusStopped, ) diff --git a/codersdk/workspaceagentconn.go b/codersdk/workspaceagentconn.go index fe49158e46..9fe229293d 100644 --- a/codersdk/workspaceagentconn.go +++ b/codersdk/workspaceagentconn.go @@ -414,3 +414,7 @@ func (c *WorkspaceAgentConn) apiClient() *http.Client { }, } } + +func (c *WorkspaceAgentConn) GetPeerDiagnostics() tailnet.PeerDiagnostics { + return c.Conn.GetPeerDiagnostics(c.opts.AgentID) +} diff --git a/tailnet/configmaps.go b/tailnet/configmaps.go index a149984f2a..b273a5b75c 100644 --- a/tailnet/configmaps.go +++ b/tailnet/configmaps.go @@ -521,6 +521,27 @@ func (c *configMaps) nodeAddresses(publicKey key.NodePublic) ([]netip.Prefix, bo return nil, false } +func (c *configMaps) fillPeerDiagnostics(d *PeerDiagnostics, peerID uuid.UUID) { + status := c.status() + c.L.Lock() + defer c.L.Unlock() + if c.derpMap != nil { + for j, r := range c.derpMap.Regions { + d.DERPRegionNames[j] = r.RegionName + } + } + lc, ok := c.peers[peerID] + if !ok { + return + } + d.ReceivedNode = lc.node + ps, ok := status.Peer[lc.node.Key] + if !ok { + return + } + d.LastWireguardHandshake = ps.LastHandshake +} + type peerLifecycle struct { peerID uuid.UUID node *tailcfg.Node diff --git a/tailnet/configmaps_internal_test.go b/tailnet/configmaps_internal_test.go index f842a710a6..1008562904 100644 --- a/tailnet/configmaps_internal_test.go +++ b/tailnet/configmaps_internal_test.go @@ -773,6 +773,78 @@ func TestConfigMaps_setDERPMap_same(t *testing.T) { _ = testutil.RequireRecvCtx(ctx, t, done) } +func TestConfigMaps_fillPeerDiagnostics(t *testing.T) { + t.Parallel() + ctx := testutil.Context(t, testutil.WaitShort) + logger := slogtest.Make(t, nil).Leveled(slog.LevelDebug) + fEng := newFakeEngineConfigurable() + nodePrivateKey := key.NewNode() + nodeID := tailcfg.NodeID(5) + discoKey := key.NewDisco() + uut := newConfigMaps(logger, fEng, nodeID, nodePrivateKey, discoKey.Public()) + defer uut.close() + + // Given: DERP Map and peer already set + derpMap := &tailcfg.DERPMap{ + HomeParams: &tailcfg.DERPHomeParams{RegionScore: map[int]float64{ + 1: 0.025, + 1001: 0.111, + }}, + Regions: map[int]*tailcfg.DERPRegion{ + 1: { + RegionCode: "AUH", + RegionName: "AUH", + Nodes: []*tailcfg.DERPNode{ + {Name: "AUH0"}, + }, + }, + 1001: { + RegionCode: "DXB", + RegionName: "DXB", + Nodes: []*tailcfg.DERPNode{ + {Name: "DXB0"}, + }, + }, + }, + } + p1ID := uuid.UUID{1} + p1Node := newTestNode(1) + p1n, err := NodeToProto(p1Node) + require.NoError(t, err) + p1tcn, err := uut.protoNodeToTailcfg(p1n) + p1tcn.KeepAlive = true + require.NoError(t, err) + + hst := time.Date(2024, 1, 7, 12, 0, 10, 0, time.UTC) + uut.L.Lock() + uut.derpMap = derpMap + uut.peers[p1ID] = &peerLifecycle{ + peerID: p1ID, + node: p1tcn, + lastHandshake: hst, + } + uut.L.Unlock() + + s0 := expectStatusWithHandshake(ctx, t, fEng, p1Node.Key, hst) + + // When: call fillPeerDiagnostics + d := PeerDiagnostics{DERPRegionNames: make(map[int]string)} + uut.fillPeerDiagnostics(&d, p1ID) + testutil.RequireRecvCtx(ctx, t, s0) + + // Then: + require.Equal(t, map[int]string{1: "AUH", 1001: "DXB"}, d.DERPRegionNames) + require.Equal(t, p1tcn, d.ReceivedNode) + require.Equal(t, hst, d.LastWireguardHandshake) + + done := make(chan struct{}) + go func() { + defer close(done) + uut.close() + }() + _ = testutil.RequireRecvCtx(ctx, t, done) +} + func expectStatusWithHandshake( ctx context.Context, t testing.TB, fEng *fakeEngineConfigurable, k key.NodePublic, lastHandshake time.Time, ) <-chan struct{} { diff --git a/tailnet/conn.go b/tailnet/conn.go index 11c821d6b6..b6f792fa5b 100644 --- a/tailnet/conn.go +++ b/tailnet/conn.go @@ -645,6 +645,30 @@ func (c *Conn) MagicsockServeHTTPDebug(w http.ResponseWriter, r *http.Request) { c.magicConn.ServeHTTPDebug(w, r) } +// PeerDiagnostics is a checklist of human-readable conditions necessary to establish an encrypted +// tunnel to a peer via a Conn +type PeerDiagnostics struct { + // PreferredDERP is 0 if we are not connected to a DERP region. If non-zero, we are connected to + // the given region as our home or "preferred" DERP. + PreferredDERP int + DERPRegionNames map[int]string + // SentNode is true if we have successfully transmitted our local Node via the most recently set + // NodeCallback. + SentNode bool + // ReceivedNode is the last Node we received for the peer, or nil if we haven't received the node. + ReceivedNode *tailcfg.Node + // LastWireguardHandshake is the last time we completed a wireguard handshake + LastWireguardHandshake time.Time + // TODO: surface Discovery (disco) protocol problems +} + +func (c *Conn) GetPeerDiagnostics(peerID uuid.UUID) PeerDiagnostics { + d := PeerDiagnostics{DERPRegionNames: make(map[int]string)} + c.nodeUpdater.fillPeerDiagnostics(&d) + c.configMaps.fillPeerDiagnostics(&d, peerID) + return d +} + type listenKey struct { network string host string diff --git a/tailnet/node.go b/tailnet/node.go index e7e83b6690..ce9e9cd9fc 100644 --- a/tailnet/node.go +++ b/tailnet/node.go @@ -36,6 +36,7 @@ type nodeUpdater struct { addresses []netip.Prefix lastStatus time.Time blockEndpoints bool + sentNode bool // for PeerDiagnostics } // updateLoop waits until the config is dirty and then calls the callback with the newest node. @@ -79,6 +80,7 @@ func (u *nodeUpdater) updateLoop() { u.logger.Debug(context.Background(), "calling nodeUpdater callback", slog.F("node", node)) callback(node) u.L.Lock() + u.sentNode = true } } @@ -212,6 +214,7 @@ func (u *nodeUpdater) setCallback(callback func(node *Node)) { defer u.L.Unlock() u.callback = callback u.dirty = true + u.sentNode = false u.Broadcast() } @@ -228,3 +231,11 @@ func (u *nodeUpdater) setBlockEndpoints(blockEndpoints bool) { u.blockEndpoints = blockEndpoints u.Broadcast() } + +// fillPeerDiagnostics fills out the PeerDiagnostics with PreferredDERP and SentNode +func (u *nodeUpdater) fillPeerDiagnostics(d *PeerDiagnostics) { + u.L.Lock() + defer u.L.Unlock() + d.PreferredDERP = u.preferredDERP + d.SentNode = u.sentNode +} diff --git a/tailnet/node_internal_test.go b/tailnet/node_internal_test.go index aa933de4be..577ce55a83 100644 --- a/tailnet/node_internal_test.go +++ b/tailnet/node_internal_test.go @@ -5,16 +5,14 @@ import ( "testing" "time" - "golang.org/x/xerrors" - - "golang.org/x/exp/slices" - - "tailscale.com/wgengine" - + "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" "golang.org/x/exp/maps" + "golang.org/x/exp/slices" + "golang.org/x/xerrors" "tailscale.com/tailcfg" "tailscale.com/types/key" + "tailscale.com/wgengine" "cdr.dev/slog" "cdr.dev/slog/sloggers/slogtest" @@ -569,3 +567,91 @@ func TestNodeUpdater_setBlockEndpoints_same(t *testing.T) { }() _ = testutil.RequireRecvCtx(ctx, t, done) } + +func TestNodeUpdater_fillPeerDiagnostics(t *testing.T) { + t.Parallel() + ctx := testutil.Context(t, testutil.WaitShort) + logger := slogtest.Make(t, nil).Leveled(slog.LevelDebug) + id := tailcfg.NodeID(1) + nodeKey := key.NewNode().Public() + discoKey := key.NewDisco().Public() + uut := newNodeUpdater( + logger, + func(n *Node) {}, + id, nodeKey, discoKey, + ) + defer uut.close() + + // at start of day, filling diagnostics will not have derp and sentNode is false + d := PeerDiagnostics{} + uut.fillPeerDiagnostics(&d) + require.Equal(t, 0, d.PreferredDERP) + require.False(t, d.SentNode) + + dl := map[string]float64{"1": 0.025} + uut.setNetInfo(&tailcfg.NetInfo{ + PreferredDERP: 1, + DERPLatency: dl, + }) + + // after node callback, we should get the derp and SentNode is true. + // Use eventually since, there is a race between the callback completing + // and the test checking + require.Eventually(t, func() bool { + d := PeerDiagnostics{} + uut.fillPeerDiagnostics(&d) + // preferred DERP should be set right away, even if the callback is not + // complete. + if !assert.Equal(t, 1, d.PreferredDERP) { + return false + } + return d.SentNode + }, testutil.WaitShort, testutil.IntervalFast) + + done := make(chan struct{}) + go func() { + defer close(done) + uut.close() + }() + _ = testutil.RequireRecvCtx(ctx, t, done) +} + +func TestNodeUpdater_fillPeerDiagnostics_noCallback(t *testing.T) { + t.Parallel() + ctx := testutil.Context(t, testutil.WaitShort) + logger := slogtest.Make(t, nil).Leveled(slog.LevelDebug) + id := tailcfg.NodeID(1) + nodeKey := key.NewNode().Public() + discoKey := key.NewDisco().Public() + uut := newNodeUpdater( + logger, + nil, + id, nodeKey, discoKey, + ) + defer uut.close() + + // at start of day, filling diagnostics will not have derp and sentNode is false + d := PeerDiagnostics{} + uut.fillPeerDiagnostics(&d) + require.Equal(t, 0, d.PreferredDERP) + require.False(t, d.SentNode) + + dl := map[string]float64{"1": 0.025} + uut.setNetInfo(&tailcfg.NetInfo{ + PreferredDERP: 1, + DERPLatency: dl, + }) + + // since there is no callback, SentNode should not be true, but we get + // the preferred DERP + uut.fillPeerDiagnostics(&d) + require.Equal(t, 1, d.PreferredDERP) + require.False(t, d.SentNode) + + done := make(chan struct{}) + go func() { + defer close(done) + uut.close() + }() + _ = testutil.RequireRecvCtx(ctx, t, done) +}