coder/cli/ssh_internal_test.go

package cli

import (
	"context"
	"net/url"
	"testing"
	"time"

	"github.com/stretchr/testify/assert"
	"github.com/stretchr/testify/require"
	"golang.org/x/xerrors"

	"cdr.dev/slog"
	"cdr.dev/slog/sloggers/slogtest"

	"github.com/coder/coder/v2/codersdk"
	"github.com/coder/coder/v2/testutil"
)

const (
	fakeOwnerName     = "fake-owner-name"
	fakeServerURL     = "https://fake-foo-url"
	fakeWorkspaceName = "fake-workspace-name"
)

func TestVerifyWorkspaceOutdated(t *testing.T) {
	t.Parallel()

	serverURL, err := url.Parse(fakeServerURL)
	require.NoError(t, err)

	client := codersdk.Client{URL: serverURL}

	t.Run("Up-to-date", func(t *testing.T) {
		t.Parallel()

		workspace := codersdk.Workspace{Name: fakeWorkspaceName, OwnerName: fakeOwnerName}

		_, outdated := verifyWorkspaceOutdated(&client, workspace)

		assert.False(t, outdated, "workspace should be up-to-date")
	})
	t.Run("Outdated", func(t *testing.T) {
		t.Parallel()

		workspace := codersdk.Workspace{Name: fakeWorkspaceName, OwnerName: fakeOwnerName, Outdated: true}

		updateWorkspaceBanner, outdated := verifyWorkspaceOutdated(&client, workspace)

		assert.True(t, outdated, "workspace should be outdated")
		assert.NotEmpty(t, updateWorkspaceBanner, "workspace banner should be present")
	})
}

func TestBuildWorkspaceLink(t *testing.T) {
	t.Parallel()

	serverURL, err := url.Parse(fakeServerURL)
	require.NoError(t, err)

	workspace := codersdk.Workspace{Name: fakeWorkspaceName, OwnerName: fakeOwnerName}
	workspaceLink := buildWorkspaceLink(serverURL, workspace)

	assert.Equal(t, workspaceLink.String(), fakeServerURL+"/@"+fakeOwnerName+"/"+fakeWorkspaceName)
}

func TestCloserStack_Mainline(t *testing.T) {
	t.Parallel()
	ctx := testutil.Context(t, testutil.WaitShort)
	logger := slogtest.Make(t, nil).Leveled(slog.LevelDebug)
	uut := newCloserStack(ctx, logger)
	closes := new([]*fakeCloser)
	fc0 := &fakeCloser{closes: closes}
	fc1 := &fakeCloser{closes: closes}

	func() {
		defer uut.close(nil)
		err := uut.push("fc0", fc0)
		require.NoError(t, err)
		err = uut.push("fc1", fc1)
		require.NoError(t, err)
	}()
	// order reversed
	require.Equal(t, []*fakeCloser{fc1, fc0}, *closes)
}

func TestCloserStack_Context(t *testing.T) {
	t.Parallel()
	ctx := testutil.Context(t, testutil.WaitShort)
	ctx, cancel := context.WithCancel(ctx)
	defer cancel()
	logger := slogtest.Make(t, nil).Leveled(slog.LevelDebug)
	uut := newCloserStack(ctx, logger)
	closes := new([]*fakeCloser)
	fc0 := &fakeCloser{closes: closes}
	fc1 := &fakeCloser{closes: closes}

	err := uut.push("fc0", fc0)
	require.NoError(t, err)
	err = uut.push("fc1", fc1)
	require.NoError(t, err)
	cancel()
	require.Eventually(t, func() bool {
		uut.Lock()
		defer uut.Unlock()
		return uut.closed
	}, testutil.WaitShort, testutil.IntervalFast)
}

func TestCloserStack_PushAfterClose(t *testing.T) {
	t.Parallel()
	ctx := testutil.Context(t, testutil.WaitShort)
	logger := slogtest.Make(t, &slogtest.Options{IgnoreErrors: true}).Leveled(slog.LevelDebug)
	uut := newCloserStack(ctx, logger)
	closes := new([]*fakeCloser)
	fc0 := &fakeCloser{closes: closes}
	fc1 := &fakeCloser{closes: closes}

	err := uut.push("fc0", fc0)
	require.NoError(t, err)

	exErr := xerrors.New("test")
	uut.close(exErr)
	require.Equal(t, []*fakeCloser{fc0}, *closes)

	err = uut.push("fc1", fc1)
	require.ErrorIs(t, err, exErr)
	require.Equal(t, []*fakeCloser{fc1, fc0}, *closes, "should close fc1")
}

func TestCloserStack_CloseAfterContext(t *testing.T) {
	t.Parallel()
	testCtx := testutil.Context(t, testutil.WaitShort)
	ctx, cancel := context.WithCancel(testCtx)
	defer cancel()
	logger := slogtest.Make(t, &slogtest.Options{IgnoreErrors: true}).Leveled(slog.LevelDebug)
	uut := newCloserStack(ctx, logger)
	ac := &asyncCloser{
		t:        t,
		ctx:      testCtx,
		complete: make(chan struct{}),
		started:  make(chan struct{}),
	}
	err := uut.push("async", ac)
	require.NoError(t, err)
	cancel()
	testutil.RequireRecvCtx(testCtx, t, ac.started)

	closed := make(chan struct{})
	go func() {
		defer close(closed)
		uut.close(nil)
	}()

	// since the asyncCloser is still waiting, we shouldn't complete uut.close()
	select {
	case <-time.After(testutil.IntervalFast):
		// OK!
	case <-closed:
		t.Fatal("closed before stack was finished")
	}

	// complete the asyncCloser
	close(ac.complete)
	testutil.RequireRecvCtx(testCtx, t, closed)
}

type fakeCloser struct {
	closes *[]*fakeCloser
	err    error
}

func (c *fakeCloser) Close() error {
	*c.closes = append(*c.closes, c)
	return c.err
}

type asyncCloser struct {
	t        *testing.T
	ctx      context.Context
	started  chan struct{}
	complete chan struct{}
}

func (c *asyncCloser) Close() error {
	close(c.started)
	select {
	case <-c.ctx.Done():
		c.t.Error("timed out")
		return c.ctx.Err()
	case <-c.complete:
		return nil
	}
}
feat: show banner when workspace is outdated (#4926) * feat: show banner when workspace is outdated * Address PR comments * Fix: writer 2022-11-07 18:12:39 +00:00			`package cli`

			`import (`
fix: close ssh sessions gracefully (#10732) Re-enables TestSSH/RemoteForward_Unix_Signal and addresses the underlying race: we were not closing the remote forward on context expiry, only the session and connection. However, there is still a more fundamental issue in that we don't have the ability to ensure that TCP sessions are properly terminated before tearing down the Tailnet conn. This is due to the assumption in the sockets API, that the underlying IP interface is long lived compared with the TCP socket, and thus closing a socket returns immediately and does not wait for the TCP termination handshake --- that is handled async in the tcpip stack. However, this assumption does not hold for us and tailnet, since on shutdown, we also tear down the tailnet connection, and this can race with the TCP termination. Closing the remote forward explicitly should prevent forward state from accumulating, since the Close() function waits for a reply from the remote SSH server. I've also attempted to workaround the TCP/tailnet issue for `--stdio` by using `CloseWrite()` instead of `Close()`. By closing the write side of the connection, half-close the TCP connection, and the server detects this and closes the other direction, which then triggers our read loop to exit only after the server has had a chance to process the close. TODO in a stacked PR is to implement this logic for `vscodessh` as well. 2023-11-17 08:43:20 +00:00			`"context"`
feat: show banner when workspace is outdated (#4926) * feat: show banner when workspace is outdated * Address PR comments * Fix: writer 2022-11-07 18:12:39 +00:00			`"net/url"`
			`"testing"`
fix: ensure ssh cleanup happens on cmd error I noticed in my logs that sometimes `coder ssh` doesn't gracefully disconnect from the coordinator. The cause is the `closerStack` construct we use in that function. It has two paths to start closing things down: 1. explicit `close()` which we do in `defer` 2. context cancellation, which happens if the cli function returns an error sometimes the ssh remote command returns an error, and this triggers context cancellation of the `closerStack`. That is fine in and of itself, but we still want the explicit `close()` to wait until everything is closed before returning, since that's where we do cleanup, including the graceful disconnect. Prior to this fix the `close()` just immediately exits if another goroutine is closing the stack. Here we add a wait until everything is done. 2024-03-07 13:26:49 +00:00			`"time"`
feat: show banner when workspace is outdated (#4926) * feat: show banner when workspace is outdated * Address PR comments * Fix: writer 2022-11-07 18:12:39 +00:00
fix: accept legacy redirect HTTP environment variables (#10748) > Can someone help me understand the differences between these env variables: > > CODER_REDIRECT_TO_ACCESS_URL > CODER_TLS_REDIRECT_HTTP_TO_HTTPS > CODER_TLS_REDIRECT_HTTP Oh man, what a mess. It looks like `CODER_TLS_REDIRECT_HTTP ` appears in our config docs. Maybe that was the initial name for the environment variable? At some point, both the flag and the environment variable were `--tls-redirect-http-to-https` and `CODER_TLS_REDIRECT_HTTP_TO_HTTPS`. `CODER_TLS_REDIRECT_HTTP` did nothing. However, then we introduced `CODER_REDIRECT_TO_ACCESS_URL`, we put in some deprecation code that was maybe fat-fingered such that we accept the environment variable `CODER_TLS_REDIRECT_HTTP` but the flag `--tls-redirect-http-to-https`. Our docs still refer to `CODER_TLS_REDIRECT_HTTP` at https://coder.com/docs/v2/latest/admin/configure#address So, I think what we gotta do is still accept `CODER_TLS_REDIRECT_HTTP` since it was working and in an example doc, but also fix the deprecation code to accept `CODER_TLS_REDIRECT_HTTP_TO_HTTPS` environment variable. 2023-11-17 11:09:29 +00:00			`"github.com/stretchr/testify/assert"`
			`"github.com/stretchr/testify/require"`
fix: close ssh sessions gracefully (#10732) Re-enables TestSSH/RemoteForward_Unix_Signal and addresses the underlying race: we were not closing the remote forward on context expiry, only the session and connection. However, there is still a more fundamental issue in that we don't have the ability to ensure that TCP sessions are properly terminated before tearing down the Tailnet conn. This is due to the assumption in the sockets API, that the underlying IP interface is long lived compared with the TCP socket, and thus closing a socket returns immediately and does not wait for the TCP termination handshake --- that is handled async in the tcpip stack. However, this assumption does not hold for us and tailnet, since on shutdown, we also tear down the tailnet connection, and this can race with the TCP termination. Closing the remote forward explicitly should prevent forward state from accumulating, since the Close() function waits for a reply from the remote SSH server. I've also attempted to workaround the TCP/tailnet issue for `--stdio` by using `CloseWrite()` instead of `Close()`. By closing the write side of the connection, half-close the TCP connection, and the server detects this and closes the other direction, which then triggers our read loop to exit only after the server has had a chance to process the close. TODO in a stacked PR is to implement this logic for `vscodessh` as well. 2023-11-17 08:43:20 +00:00			`"golang.org/x/xerrors"`

			`"cdr.dev/slog"`
			`"cdr.dev/slog/sloggers/slogtest"`
feat: show banner when workspace is outdated (#4926) * feat: show banner when workspace is outdated * Address PR comments * Fix: writer 2022-11-07 18:12:39 +00:00
chore: add /v2 to import module path (#9072) * chore: add /v2 to import module path go mod requires semantic versioning with versions greater than 1.x This was a mechanical update by running: ``` go install github.com/marwan-at-work/mod/cmd/mod@latest mod upgrade ``` Migrate generated files to import /v2 * Fix gen 2023-08-18 18:55:43 +00:00			`"github.com/coder/coder/v2/codersdk"`
fix: accept legacy redirect HTTP environment variables (#10748) > Can someone help me understand the differences between these env variables: > > CODER_REDIRECT_TO_ACCESS_URL > CODER_TLS_REDIRECT_HTTP_TO_HTTPS > CODER_TLS_REDIRECT_HTTP Oh man, what a mess. It looks like `CODER_TLS_REDIRECT_HTTP ` appears in our config docs. Maybe that was the initial name for the environment variable? At some point, both the flag and the environment variable were `--tls-redirect-http-to-https` and `CODER_TLS_REDIRECT_HTTP_TO_HTTPS`. `CODER_TLS_REDIRECT_HTTP` did nothing. However, then we introduced `CODER_REDIRECT_TO_ACCESS_URL`, we put in some deprecation code that was maybe fat-fingered such that we accept the environment variable `CODER_TLS_REDIRECT_HTTP` but the flag `--tls-redirect-http-to-https`. Our docs still refer to `CODER_TLS_REDIRECT_HTTP` at https://coder.com/docs/v2/latest/admin/configure#address So, I think what we gotta do is still accept `CODER_TLS_REDIRECT_HTTP` since it was working and in an example doc, but also fix the deprecation code to accept `CODER_TLS_REDIRECT_HTTP_TO_HTTPS` environment variable. 2023-11-17 11:09:29 +00:00			`"github.com/coder/coder/v2/testutil"`
feat: show banner when workspace is outdated (#4926) * feat: show banner when workspace is outdated * Address PR comments * Fix: writer 2022-11-07 18:12:39 +00:00			`)`

			`const (`
			`fakeOwnerName = "fake-owner-name"`
			`fakeServerURL = "https://fake-foo-url"`
			`fakeWorkspaceName = "fake-workspace-name"`
			`)`

			`func TestVerifyWorkspaceOutdated(t *testing.T) {`
			`t.Parallel()`

			`serverURL, err := url.Parse(fakeServerURL)`
			`require.NoError(t, err)`

			`client := codersdk.Client{URL: serverURL}`

			`t.Run("Up-to-date", func(t *testing.T) {`
			`t.Parallel()`

			`workspace := codersdk.Workspace{Name: fakeWorkspaceName, OwnerName: fakeOwnerName}`

			`_, outdated := verifyWorkspaceOutdated(&client, workspace)`

			`assert.False(t, outdated, "workspace should be up-to-date")`
			`})`
			`t.Run("Outdated", func(t *testing.T) {`
			`t.Parallel()`

			`workspace := codersdk.Workspace{Name: fakeWorkspaceName, OwnerName: fakeOwnerName, Outdated: true}`

			`updateWorkspaceBanner, outdated := verifyWorkspaceOutdated(&client, workspace)`

			`assert.True(t, outdated, "workspace should be outdated")`
			`assert.NotEmpty(t, updateWorkspaceBanner, "workspace banner should be present")`
			`})`
			`}`

			`func TestBuildWorkspaceLink(t *testing.T) {`
			`t.Parallel()`

			`serverURL, err := url.Parse(fakeServerURL)`
			`require.NoError(t, err)`

			`workspace := codersdk.Workspace{Name: fakeWorkspaceName, OwnerName: fakeOwnerName}`
			`workspaceLink := buildWorkspaceLink(serverURL, workspace)`

			`assert.Equal(t, workspaceLink.String(), fakeServerURL+"/@"+fakeOwnerName+"/"+fakeWorkspaceName)`
			`}`
fix: close ssh sessions gracefully (#10732) Re-enables TestSSH/RemoteForward_Unix_Signal and addresses the underlying race: we were not closing the remote forward on context expiry, only the session and connection. However, there is still a more fundamental issue in that we don't have the ability to ensure that TCP sessions are properly terminated before tearing down the Tailnet conn. This is due to the assumption in the sockets API, that the underlying IP interface is long lived compared with the TCP socket, and thus closing a socket returns immediately and does not wait for the TCP termination handshake --- that is handled async in the tcpip stack. However, this assumption does not hold for us and tailnet, since on shutdown, we also tear down the tailnet connection, and this can race with the TCP termination. Closing the remote forward explicitly should prevent forward state from accumulating, since the Close() function waits for a reply from the remote SSH server. I've also attempted to workaround the TCP/tailnet issue for `--stdio` by using `CloseWrite()` instead of `Close()`. By closing the write side of the connection, half-close the TCP connection, and the server detects this and closes the other direction, which then triggers our read loop to exit only after the server has had a chance to process the close. TODO in a stacked PR is to implement this logic for `vscodessh` as well. 2023-11-17 08:43:20 +00:00
			`func TestCloserStack_Mainline(t *testing.T) {`
			`t.Parallel()`
			`ctx := testutil.Context(t, testutil.WaitShort)`
			`logger := slogtest.Make(t, nil).Leveled(slog.LevelDebug)`
			`uut := newCloserStack(ctx, logger)`
			`closes := new([]*fakeCloser)`
			`fc0 := &fakeCloser{closes: closes}`
			`fc1 := &fakeCloser{closes: closes}`

			`func() {`
			`defer uut.close(nil)`
			`err := uut.push("fc0", fc0)`
			`require.NoError(t, err)`
			`err = uut.push("fc1", fc1)`
			`require.NoError(t, err)`
			`}()`
			`// order reversed`
			`require.Equal(t, []fakeCloser{fc1, fc0}, closes)`
			`}`

			`func TestCloserStack_Context(t *testing.T) {`
			`t.Parallel()`
			`ctx := testutil.Context(t, testutil.WaitShort)`
			`ctx, cancel := context.WithCancel(ctx)`
			`defer cancel()`
			`logger := slogtest.Make(t, nil).Leveled(slog.LevelDebug)`
			`uut := newCloserStack(ctx, logger)`
			`closes := new([]*fakeCloser)`
			`fc0 := &fakeCloser{closes: closes}`
			`fc1 := &fakeCloser{closes: closes}`

			`err := uut.push("fc0", fc0)`
			`require.NoError(t, err)`
			`err = uut.push("fc1", fc1)`
			`require.NoError(t, err)`
			`cancel()`
			`require.Eventually(t, func() bool {`
			`uut.Lock()`
			`defer uut.Unlock()`
			`return uut.closed`
			`}, testutil.WaitShort, testutil.IntervalFast)`
			`}`

			`func TestCloserStack_PushAfterClose(t *testing.T) {`
			`t.Parallel()`
			`ctx := testutil.Context(t, testutil.WaitShort)`
			`logger := slogtest.Make(t, &slogtest.Options{IgnoreErrors: true}).Leveled(slog.LevelDebug)`
			`uut := newCloserStack(ctx, logger)`
			`closes := new([]*fakeCloser)`
			`fc0 := &fakeCloser{closes: closes}`
			`fc1 := &fakeCloser{closes: closes}`

			`err := uut.push("fc0", fc0)`
			`require.NoError(t, err)`

			`exErr := xerrors.New("test")`
			`uut.close(exErr)`
			`require.Equal(t, []fakeCloser{fc0}, closes)`

			`err = uut.push("fc1", fc1)`
			`require.ErrorIs(t, err, exErr)`
			`require.Equal(t, []fakeCloser{fc1, fc0}, closes, "should close fc1")`
			`}`

fix: ensure ssh cleanup happens on cmd error I noticed in my logs that sometimes `coder ssh` doesn't gracefully disconnect from the coordinator. The cause is the `closerStack` construct we use in that function. It has two paths to start closing things down: 1. explicit `close()` which we do in `defer` 2. context cancellation, which happens if the cli function returns an error sometimes the ssh remote command returns an error, and this triggers context cancellation of the `closerStack`. That is fine in and of itself, but we still want the explicit `close()` to wait until everything is closed before returning, since that's where we do cleanup, including the graceful disconnect. Prior to this fix the `close()` just immediately exits if another goroutine is closing the stack. Here we add a wait until everything is done. 2024-03-07 13:26:49 +00:00			`func TestCloserStack_CloseAfterContext(t *testing.T) {`
			`t.Parallel()`
			`testCtx := testutil.Context(t, testutil.WaitShort)`
			`ctx, cancel := context.WithCancel(testCtx)`
			`defer cancel()`
			`logger := slogtest.Make(t, &slogtest.Options{IgnoreErrors: true}).Leveled(slog.LevelDebug)`
			`uut := newCloserStack(ctx, logger)`
			`ac := &asyncCloser{`
			`t: t,`
			`ctx: testCtx,`
			`complete: make(chan struct{}),`
			`started: make(chan struct{}),`
			`}`
			`err := uut.push("async", ac)`
			`require.NoError(t, err)`
			`cancel()`
			`testutil.RequireRecvCtx(testCtx, t, ac.started)`

			`closed := make(chan struct{})`
			`go func() {`
			`defer close(closed)`
			`uut.close(nil)`
			`}()`

			`// since the asyncCloser is still waiting, we shouldn't complete uut.close()`
			`select {`
			`case <-time.After(testutil.IntervalFast):`
			`// OK!`
			`case <-closed:`
			`t.Fatal("closed before stack was finished")`
			`}`

			`// complete the asyncCloser`
			`close(ac.complete)`
			`testutil.RequireRecvCtx(testCtx, t, closed)`
			`}`

fix: close ssh sessions gracefully (#10732) Re-enables TestSSH/RemoteForward_Unix_Signal and addresses the underlying race: we were not closing the remote forward on context expiry, only the session and connection. However, there is still a more fundamental issue in that we don't have the ability to ensure that TCP sessions are properly terminated before tearing down the Tailnet conn. This is due to the assumption in the sockets API, that the underlying IP interface is long lived compared with the TCP socket, and thus closing a socket returns immediately and does not wait for the TCP termination handshake --- that is handled async in the tcpip stack. However, this assumption does not hold for us and tailnet, since on shutdown, we also tear down the tailnet connection, and this can race with the TCP termination. Closing the remote forward explicitly should prevent forward state from accumulating, since the Close() function waits for a reply from the remote SSH server. I've also attempted to workaround the TCP/tailnet issue for `--stdio` by using `CloseWrite()` instead of `Close()`. By closing the write side of the connection, half-close the TCP connection, and the server detects this and closes the other direction, which then triggers our read loop to exit only after the server has had a chance to process the close. TODO in a stacked PR is to implement this logic for `vscodessh` as well. 2023-11-17 08:43:20 +00:00			`type fakeCloser struct {`
			`closes []fakeCloser`
			`err error`
			`}`

			`func (c *fakeCloser) Close() error {`
			`c.closes = append(c.closes, c)`
			`return c.err`
			`}`
fix: ensure ssh cleanup happens on cmd error I noticed in my logs that sometimes `coder ssh` doesn't gracefully disconnect from the coordinator. The cause is the `closerStack` construct we use in that function. It has two paths to start closing things down: 1. explicit `close()` which we do in `defer` 2. context cancellation, which happens if the cli function returns an error sometimes the ssh remote command returns an error, and this triggers context cancellation of the `closerStack`. That is fine in and of itself, but we still want the explicit `close()` to wait until everything is closed before returning, since that's where we do cleanup, including the graceful disconnect. Prior to this fix the `close()` just immediately exits if another goroutine is closing the stack. Here we add a wait until everything is done. 2024-03-07 13:26:49 +00:00
			`type asyncCloser struct {`
			`t *testing.T`
			`ctx context.Context`
			`started chan struct{}`
			`complete chan struct{}`
			`}`

			`func (c *asyncCloser) Close() error {`
			`close(c.started)`
			`select {`
			`case <-c.ctx.Done():`
			`c.t.Error("timed out")`
			`return c.ctx.Err()`
			`case <-c.complete:`
			`return nil`
			`}`
			`}`