coder/agent/agent_test.go

2814 lines
80 KiB
Go
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

package agent_test
import (
"bufio"
"bytes"
"context"
"encoding/json"
"errors"
"fmt"
"io"
"net"
"net/http"
"net/http/httptest"
"net/netip"
"os"
"os/exec"
"os/user"
"path"
"path/filepath"
"regexp"
"runtime"
"strings"
"sync"
"sync/atomic"
"syscall"
"testing"
"time"
"github.com/bramvdbogaerde/go-scp"
"github.com/google/uuid"
"github.com/pion/udp"
"github.com/pkg/sftp"
"github.com/prometheus/client_golang/prometheus"
promgo "github.com/prometheus/client_model/go"
"github.com/spf13/afero"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
"go.uber.org/goleak"
"go.uber.org/mock/gomock"
"golang.org/x/crypto/ssh"
"golang.org/x/exp/slices"
"golang.org/x/xerrors"
"tailscale.com/net/speedtest"
"tailscale.com/tailcfg"
"cdr.dev/slog"
"cdr.dev/slog/sloggers/sloghuman"
"cdr.dev/slog/sloggers/slogtest"
"github.com/coder/coder/v2/agent"
"github.com/coder/coder/v2/agent/agentproc"
"github.com/coder/coder/v2/agent/agentproc/agentproctest"
"github.com/coder/coder/v2/agent/agentssh"
"github.com/coder/coder/v2/agent/agenttest"
"github.com/coder/coder/v2/agent/proto"
"github.com/coder/coder/v2/codersdk"
"github.com/coder/coder/v2/codersdk/agentsdk"
"github.com/coder/coder/v2/codersdk/workspacesdk"
"github.com/coder/coder/v2/cryptorand"
"github.com/coder/coder/v2/pty/ptytest"
"github.com/coder/coder/v2/tailnet"
"github.com/coder/coder/v2/tailnet/tailnettest"
"github.com/coder/coder/v2/testutil"
)
func TestMain(m *testing.M) {
goleak.VerifyTestMain(m)
}
// NOTE: These tests only work when your default shell is bash for some reason.
func TestAgent_Stats_SSH(t *testing.T) {
t.Parallel()
ctx, cancel := context.WithTimeout(context.Background(), testutil.WaitLong)
defer cancel()
//nolint:dogsled
conn, _, stats, _, _ := setupAgent(t, agentsdk.Manifest{}, 0)
sshClient, err := conn.SSHClient(ctx)
require.NoError(t, err)
defer sshClient.Close()
session, err := sshClient.NewSession()
require.NoError(t, err)
defer session.Close()
stdin, err := session.StdinPipe()
require.NoError(t, err)
err = session.Shell()
require.NoError(t, err)
var s *proto.Stats
require.Eventuallyf(t, func() bool {
var ok bool
s, ok = <-stats
return ok && s.ConnectionCount > 0 && s.RxBytes > 0 && s.TxBytes > 0 && s.SessionCountSsh == 1
}, testutil.WaitLong, testutil.IntervalFast,
"never saw stats: %+v", s,
)
_ = stdin.Close()
err = session.Wait()
require.NoError(t, err)
}
func TestAgent_Stats_ReconnectingPTY(t *testing.T) {
t.Parallel()
ctx, cancel := context.WithTimeout(context.Background(), testutil.WaitLong)
defer cancel()
//nolint:dogsled
conn, _, stats, _, _ := setupAgent(t, agentsdk.Manifest{}, 0)
ptyConn, err := conn.ReconnectingPTY(ctx, uuid.New(), 128, 128, "bash")
require.NoError(t, err)
defer ptyConn.Close()
data, err := json.Marshal(workspacesdk.ReconnectingPTYRequest{
Data: "echo test\r\n",
})
require.NoError(t, err)
_, err = ptyConn.Write(data)
require.NoError(t, err)
var s *proto.Stats
require.Eventuallyf(t, func() bool {
var ok bool
s, ok = <-stats
return ok && s.ConnectionCount > 0 && s.RxBytes > 0 && s.TxBytes > 0 && s.SessionCountReconnectingPty == 1
}, testutil.WaitLong, testutil.IntervalFast,
"never saw stats: %+v", s,
)
}
func TestAgent_Stats_Magic(t *testing.T) {
t.Parallel()
t.Run("StripsEnvironmentVariable", func(t *testing.T) {
t.Parallel()
ctx, cancel := context.WithTimeout(context.Background(), testutil.WaitLong)
defer cancel()
//nolint:dogsled
conn, _, _, _, _ := setupAgent(t, agentsdk.Manifest{}, 0)
sshClient, err := conn.SSHClient(ctx)
require.NoError(t, err)
defer sshClient.Close()
session, err := sshClient.NewSession()
require.NoError(t, err)
session.Setenv(agentssh.MagicSessionTypeEnvironmentVariable, agentssh.MagicSessionTypeVSCode)
defer session.Close()
command := "sh -c 'echo $" + agentssh.MagicSessionTypeEnvironmentVariable + "'"
expected := ""
if runtime.GOOS == "windows" {
expected = "%" + agentssh.MagicSessionTypeEnvironmentVariable + "%"
command = "cmd.exe /c echo " + expected
}
output, err := session.Output(command)
require.NoError(t, err)
require.Equal(t, expected, strings.TrimSpace(string(output)))
})
t.Run("TracksVSCode", func(t *testing.T) {
t.Parallel()
if runtime.GOOS == "window" {
t.Skip("Sleeping for infinity doesn't work on Windows")
}
ctx, cancel := context.WithTimeout(context.Background(), testutil.WaitLong)
defer cancel()
//nolint:dogsled
conn, _, stats, _, _ := setupAgent(t, agentsdk.Manifest{}, 0)
sshClient, err := conn.SSHClient(ctx)
require.NoError(t, err)
defer sshClient.Close()
session, err := sshClient.NewSession()
require.NoError(t, err)
session.Setenv(agentssh.MagicSessionTypeEnvironmentVariable, agentssh.MagicSessionTypeVSCode)
defer session.Close()
stdin, err := session.StdinPipe()
require.NoError(t, err)
err = session.Shell()
require.NoError(t, err)
require.Eventuallyf(t, func() bool {
s, ok := <-stats
t.Logf("got stats: ok=%t, ConnectionCount=%d, RxBytes=%d, TxBytes=%d, SessionCountVSCode=%d, ConnectionMedianLatencyMS=%f",
ok, s.ConnectionCount, s.RxBytes, s.TxBytes, s.SessionCountVscode, s.ConnectionMedianLatencyMs)
return ok && s.ConnectionCount > 0 && s.RxBytes > 0 && s.TxBytes > 0 &&
// Ensure that the connection didn't count as a "normal" SSH session.
// This was a special one, so it should be labeled specially in the stats!
s.SessionCountVscode == 1 &&
// Ensure that connection latency is being counted!
// If it isn't, it's set to -1.
s.ConnectionMedianLatencyMs >= 0
}, testutil.WaitLong, testutil.IntervalFast,
"never saw stats",
)
// The shell will automatically exit if there is no stdin!
_ = stdin.Close()
err = session.Wait()
require.NoError(t, err)
})
t.Run("TracksJetBrains", func(t *testing.T) {
t.Parallel()
if runtime.GOOS != "linux" {
t.Skip("JetBrains tracking is only supported on Linux")
}
ctx := testutil.Context(t, testutil.WaitLong)
// JetBrains tracking works by looking at the process name listening on the
// forwarded port. If the process's command line includes the magic string
// we are looking for, then we assume it is a JetBrains editor. So when we
// connect to the port we must ensure the process includes that magic string
// to fool the agent into thinking this is JetBrains. To do this we need to
// spawn an external process (in this case a simple echo server) so we can
// control the process name. The -D here is just to mimic how Java options
// are set but is not necessary as the agent looks only for the magic
// string itself anywhere in the command.
_, b, _, ok := runtime.Caller(0)
require.True(t, ok)
dir := filepath.Join(filepath.Dir(b), "../scripts/echoserver/main.go")
echoServerCmd := exec.Command("go", "run", dir,
"-D", agentssh.MagicProcessCmdlineJetBrains)
stdout, err := echoServerCmd.StdoutPipe()
require.NoError(t, err)
err = echoServerCmd.Start()
require.NoError(t, err)
defer echoServerCmd.Process.Kill()
// The echo server prints its port as the first line.
sc := bufio.NewScanner(stdout)
sc.Scan()
remotePort := sc.Text()
//nolint:dogsled
conn, _, stats, _, _ := setupAgent(t, agentsdk.Manifest{}, 0)
sshClient, err := conn.SSHClient(ctx)
require.NoError(t, err)
tunneledConn, err := sshClient.Dial("tcp", fmt.Sprintf("127.0.0.1:%s", remotePort))
require.NoError(t, err)
t.Cleanup(func() {
// always close on failure of test
_ = conn.Close()
_ = tunneledConn.Close()
})
require.Eventuallyf(t, func() bool {
s, ok := <-stats
t.Logf("got stats with conn open: ok=%t, ConnectionCount=%d, SessionCountJetBrains=%d",
ok, s.ConnectionCount, s.SessionCountJetbrains)
return ok && s.ConnectionCount > 0 &&
s.SessionCountJetbrains == 1
}, testutil.WaitLong, testutil.IntervalFast,
"never saw stats with conn open",
)
// Kill the server and connection after checking for the echo.
requireEcho(t, tunneledConn)
_ = echoServerCmd.Process.Kill()
_ = tunneledConn.Close()
require.Eventuallyf(t, func() bool {
s, ok := <-stats
t.Logf("got stats after disconnect %t, %d",
ok, s.SessionCountJetbrains)
return ok &&
s.SessionCountJetbrains == 0
}, testutil.WaitLong, testutil.IntervalFast,
"never saw stats after conn closes",
)
})
}
func TestAgent_SessionExec(t *testing.T) {
t.Parallel()
session := setupSSHSession(t, agentsdk.Manifest{}, codersdk.ServiceBannerConfig{}, nil)
command := "echo test"
if runtime.GOOS == "windows" {
command = "cmd.exe /c echo test"
}
output, err := session.Output(command)
require.NoError(t, err)
require.Equal(t, "test", strings.TrimSpace(string(output)))
}
//nolint:tparallel // Sub tests need to run sequentially.
func TestAgent_Session_EnvironmentVariables(t *testing.T) {
t.Parallel()
tmpdir := t.TempDir()
// Defined by the coder script runner, hardcoded here since we don't
// have a reference to it.
scriptBinDir := filepath.Join(tmpdir, "coder-script-data", "bin")
manifest := agentsdk.Manifest{
EnvironmentVariables: map[string]string{
"MY_MANIFEST": "true",
"MY_OVERRIDE": "false",
"MY_SESSION_MANIFEST": "false",
},
}
banner := codersdk.ServiceBannerConfig{}
session := setupSSHSession(t, manifest, banner, nil, func(_ *agenttest.Client, opts *agent.Options) {
opts.ScriptDataDir = tmpdir
opts.EnvironmentVariables["MY_OVERRIDE"] = "true"
})
err := session.Setenv("MY_SESSION_MANIFEST", "true")
require.NoError(t, err)
err = session.Setenv("MY_SESSION", "true")
require.NoError(t, err)
command := "sh"
echoEnv := func(t *testing.T, w io.Writer, env string) {
if runtime.GOOS == "windows" {
_, err := fmt.Fprintf(w, "echo %%%s%%\r\n", env)
require.NoError(t, err)
} else {
_, err := fmt.Fprintf(w, "echo $%s\n", env)
require.NoError(t, err)
}
}
if runtime.GOOS == "windows" {
command = "cmd.exe"
}
stdin, err := session.StdinPipe()
require.NoError(t, err)
defer stdin.Close()
stdout, err := session.StdoutPipe()
require.NoError(t, err)
err = session.Start(command)
require.NoError(t, err)
// Context is fine here since we're not doing a parallel subtest.
ctx := testutil.Context(t, testutil.WaitLong)
go func() {
<-ctx.Done()
_ = session.Close()
}()
s := bufio.NewScanner(stdout)
//nolint:paralleltest // These tests need to run sequentially.
for k, partialV := range map[string]string{
"CODER": "true", // From the agent.
"MY_MANIFEST": "true", // From the manifest.
"MY_OVERRIDE": "true", // From the agent environment variables option, overrides manifest.
"MY_SESSION_MANIFEST": "false", // From the manifest, overrides session env.
"MY_SESSION": "true", // From the session.
"PATH": scriptBinDir + string(filepath.ListSeparator),
} {
t.Run(k, func(t *testing.T) {
echoEnv(t, stdin, k)
// Windows is unreliable, so keep scanning until we find a match.
for s.Scan() {
got := strings.TrimSpace(s.Text())
t.Logf("%s=%s", k, got)
if strings.Contains(got, partialV) {
break
}
}
if err := s.Err(); !errors.Is(err, io.EOF) {
require.NoError(t, err)
}
})
}
}
func TestAgent_GitSSH(t *testing.T) {
t.Parallel()
session := setupSSHSession(t, agentsdk.Manifest{}, codersdk.ServiceBannerConfig{}, nil)
command := "sh -c 'echo $GIT_SSH_COMMAND'"
if runtime.GOOS == "windows" {
command = "cmd.exe /c echo %GIT_SSH_COMMAND%"
}
output, err := session.Output(command)
require.NoError(t, err)
require.True(t, strings.HasSuffix(strings.TrimSpace(string(output)), "gitssh --"))
}
func TestAgent_SessionTTYShell(t *testing.T) {
t.Parallel()
ctx, cancel := context.WithTimeout(context.Background(), testutil.WaitLong)
t.Cleanup(cancel)
if runtime.GOOS == "windows" {
// This might be our implementation, or ConPTY itself.
// It's difficult to find extensive tests for it, so
// it seems like it could be either.
t.Skip("ConPTY appears to be inconsistent on Windows.")
}
session := setupSSHSession(t, agentsdk.Manifest{}, codersdk.ServiceBannerConfig{}, nil)
command := "sh"
if runtime.GOOS == "windows" {
command = "cmd.exe"
}
err := session.RequestPty("xterm", 128, 128, ssh.TerminalModes{})
require.NoError(t, err)
ptty := ptytest.New(t)
session.Stdout = ptty.Output()
session.Stderr = ptty.Output()
session.Stdin = ptty.Input()
err = session.Start(command)
require.NoError(t, err)
_ = ptty.Peek(ctx, 1) // wait for the prompt
ptty.WriteLine("echo test")
ptty.ExpectMatch("test")
ptty.WriteLine("exit")
err = session.Wait()
require.NoError(t, err)
}
func TestAgent_SessionTTYExitCode(t *testing.T) {
t.Parallel()
session := setupSSHSession(t, agentsdk.Manifest{}, codersdk.ServiceBannerConfig{}, nil)
command := "areallynotrealcommand"
err := session.RequestPty("xterm", 128, 128, ssh.TerminalModes{})
require.NoError(t, err)
ptty := ptytest.New(t)
session.Stdout = ptty.Output()
session.Stderr = ptty.Output()
session.Stdin = ptty.Input()
err = session.Start(command)
require.NoError(t, err)
err = session.Wait()
exitErr := &ssh.ExitError{}
require.True(t, xerrors.As(err, &exitErr))
if runtime.GOOS == "windows" {
assert.Equal(t, 1, exitErr.ExitStatus())
} else {
assert.Equal(t, 127, exitErr.ExitStatus())
}
}
func TestAgent_Session_TTY_MOTD(t *testing.T) {
t.Parallel()
if runtime.GOOS == "windows" {
// This might be our implementation, or ConPTY itself.
// It's difficult to find extensive tests for it, so
// it seems like it could be either.
t.Skip("ConPTY appears to be inconsistent on Windows.")
}
u, err := user.Current()
require.NoError(t, err, "get current user")
name := filepath.Join(u.HomeDir, "motd")
wantMOTD := "Welcome to your Coder workspace!"
wantServiceBanner := "Service banner text goes here"
tests := []struct {
name string
manifest agentsdk.Manifest
banner codersdk.ServiceBannerConfig
expected []string
unexpected []string
expectedRe *regexp.Regexp
}{
{
name: "WithoutServiceBanner",
manifest: agentsdk.Manifest{MOTDFile: name},
banner: codersdk.ServiceBannerConfig{},
expected: []string{wantMOTD},
unexpected: []string{wantServiceBanner},
},
{
name: "WithServiceBanner",
manifest: agentsdk.Manifest{MOTDFile: name},
banner: codersdk.ServiceBannerConfig{
Enabled: true,
Message: wantServiceBanner,
},
expected: []string{wantMOTD, wantServiceBanner},
},
{
name: "ServiceBannerDisabled",
manifest: agentsdk.Manifest{MOTDFile: name},
banner: codersdk.ServiceBannerConfig{
Enabled: false,
Message: wantServiceBanner,
},
expected: []string{wantMOTD},
unexpected: []string{wantServiceBanner},
},
{
name: "ServiceBannerOnly",
manifest: agentsdk.Manifest{},
banner: codersdk.ServiceBannerConfig{
Enabled: true,
Message: wantServiceBanner,
},
expected: []string{wantServiceBanner},
unexpected: []string{wantMOTD},
},
{
name: "None",
manifest: agentsdk.Manifest{},
banner: codersdk.ServiceBannerConfig{},
unexpected: []string{wantServiceBanner, wantMOTD},
},
{
name: "CarriageReturns",
manifest: agentsdk.Manifest{},
banner: codersdk.ServiceBannerConfig{
Enabled: true,
Message: "service\n\nbanner\nhere",
},
expected: []string{"service\r\n\r\nbanner\r\nhere\r\n\r\n"},
unexpected: []string{},
},
{
name: "Trim",
// Enable motd since it will be printed after the banner,
// this ensures that we can test for an exact mount of
// newlines.
manifest: agentsdk.Manifest{
MOTDFile: name,
},
banner: codersdk.ServiceBannerConfig{
Enabled: true,
Message: "\n\n\n\n\n\nbanner\n\n\n\n\n\n",
},
expectedRe: regexp.MustCompile(`([^\n\r]|^)banner\r\n\r\n[^\r\n]`),
},
}
for _, test := range tests {
test := test
t.Run(test.name, func(t *testing.T) {
t.Parallel()
session := setupSSHSession(t, test.manifest, test.banner, func(fs afero.Fs) {
err := fs.MkdirAll(filepath.Dir(name), 0o700)
require.NoError(t, err)
err = afero.WriteFile(fs, name, []byte(wantMOTD), 0o600)
require.NoError(t, err)
})
testSessionOutput(t, session, test.expected, test.unexpected, test.expectedRe)
})
}
}
//nolint:tparallel // Sub tests need to run sequentially.
func TestAgent_Session_TTY_MOTD_Update(t *testing.T) {
t.Parallel()
if runtime.GOOS == "windows" {
// This might be our implementation, or ConPTY itself.
// It's difficult to find extensive tests for it, so
// it seems like it could be either.
t.Skip("ConPTY appears to be inconsistent on Windows.")
}
// Only the banner updates dynamically; the MOTD file does not.
wantServiceBanner := "Service banner text goes here"
tests := []struct {
banner codersdk.ServiceBannerConfig
expected []string
unexpected []string
}{
{
banner: codersdk.ServiceBannerConfig{},
expected: []string{},
unexpected: []string{wantServiceBanner},
},
{
banner: codersdk.ServiceBannerConfig{
Enabled: true,
Message: wantServiceBanner,
},
expected: []string{wantServiceBanner},
},
{
banner: codersdk.ServiceBannerConfig{
Enabled: false,
Message: wantServiceBanner,
},
expected: []string{},
unexpected: []string{wantServiceBanner},
},
{
banner: codersdk.ServiceBannerConfig{
Enabled: true,
Message: wantServiceBanner,
},
expected: []string{wantServiceBanner},
unexpected: []string{},
},
{
banner: codersdk.ServiceBannerConfig{},
unexpected: []string{wantServiceBanner},
},
}
ctx, cancel := context.WithTimeout(context.Background(), testutil.WaitLong)
defer cancel()
setSBInterval := func(_ *agenttest.Client, opts *agent.Options) {
opts.ServiceBannerRefreshInterval = 5 * time.Millisecond
}
//nolint:dogsled // Allow the blank identifiers.
conn, client, _, _, _ := setupAgent(t, agentsdk.Manifest{}, 0, setSBInterval)
sshClient, err := conn.SSHClient(ctx)
require.NoError(t, err)
t.Cleanup(func() {
_ = sshClient.Close()
})
//nolint:paralleltest // These tests need to swap the banner func.
for i, test := range tests {
test := test
t.Run(fmt.Sprintf("%d", i), func(t *testing.T) {
// Set new banner func and wait for the agent to call it to update the
// banner.
ready := make(chan struct{}, 2)
client.SetServiceBannerFunc(func() (codersdk.ServiceBannerConfig, error) {
select {
case ready <- struct{}{}:
default:
}
return test.banner, nil
})
<-ready
<-ready // Wait for two updates to ensure the value has propagated.
session, err := sshClient.NewSession()
require.NoError(t, err)
t.Cleanup(func() {
_ = session.Close()
})
testSessionOutput(t, session, test.expected, test.unexpected, nil)
})
}
}
//nolint:paralleltest // This test sets an environment variable.
func TestAgent_Session_TTY_QuietLogin(t *testing.T) {
if runtime.GOOS == "windows" {
// This might be our implementation, or ConPTY itself.
// It's difficult to find extensive tests for it, so
// it seems like it could be either.
t.Skip("ConPTY appears to be inconsistent on Windows.")
}
wantNotMOTD := "Welcome to your Coder workspace!"
wantMaybeServiceBanner := "Service banner text goes here"
u, err := user.Current()
require.NoError(t, err, "get current user")
name := filepath.Join(u.HomeDir, "motd")
// Neither banner nor MOTD should show if not a login shell.
t.Run("NotLogin", func(t *testing.T) {
session := setupSSHSession(t, agentsdk.Manifest{
MOTDFile: name,
}, codersdk.ServiceBannerConfig{
Enabled: true,
Message: wantMaybeServiceBanner,
}, func(fs afero.Fs) {
err := afero.WriteFile(fs, name, []byte(wantNotMOTD), 0o600)
require.NoError(t, err, "write motd file")
})
err = session.RequestPty("xterm", 128, 128, ssh.TerminalModes{})
require.NoError(t, err)
wantEcho := "foobar"
command := "echo " + wantEcho
output, err := session.Output(command)
require.NoError(t, err)
require.Contains(t, string(output), wantEcho, "should show echo")
require.NotContains(t, string(output), wantNotMOTD, "should not show motd")
require.NotContains(t, string(output), wantMaybeServiceBanner, "should not show service banner")
})
// Only the MOTD should be silenced when hushlogin is present.
t.Run("Hushlogin", func(t *testing.T) {
session := setupSSHSession(t, agentsdk.Manifest{
MOTDFile: name,
}, codersdk.ServiceBannerConfig{
Enabled: true,
Message: wantMaybeServiceBanner,
}, func(fs afero.Fs) {
err := afero.WriteFile(fs, name, []byte(wantNotMOTD), 0o600)
require.NoError(t, err, "write motd file")
// Create hushlogin to silence motd.
err = afero.WriteFile(fs, name, []byte{}, 0o600)
require.NoError(t, err, "write hushlogin file")
})
err = session.RequestPty("xterm", 128, 128, ssh.TerminalModes{})
require.NoError(t, err)
ptty := ptytest.New(t)
var stdout bytes.Buffer
session.Stdout = &stdout
session.Stderr = ptty.Output()
session.Stdin = ptty.Input()
err = session.Shell()
require.NoError(t, err)
ptty.WriteLine("exit 0")
err = session.Wait()
require.NoError(t, err)
require.NotContains(t, stdout.String(), wantNotMOTD, "should not show motd")
require.Contains(t, stdout.String(), wantMaybeServiceBanner, "should show service banner")
})
}
func TestAgent_Session_TTY_FastCommandHasOutput(t *testing.T) {
t.Parallel()
// This test is here to prevent regressions where quickly executing
// commands (with TTY) don't sync their output to the SSH session.
//
// See: https://github.com/coder/coder/issues/6656
ctx, cancel := context.WithTimeout(context.Background(), testutil.WaitLong)
defer cancel()
//nolint:dogsled
conn, _, _, _, _ := setupAgent(t, agentsdk.Manifest{}, 0)
sshClient, err := conn.SSHClient(ctx)
require.NoError(t, err)
defer sshClient.Close()
ptty := ptytest.New(t)
var stdout bytes.Buffer
// NOTE(mafredri): Increase iterations to increase chance of failure,
// assuming bug is present. Limiting GOMAXPROCS further
// increases the chance of failure.
// Using 1000 iterations is basically a guaranteed failure (but let's
// not increase test times needlessly).
// Limit GOMAXPROCS (e.g. `export GOMAXPROCS=1`) to further increase
// chance of failure. Also -race helps.
for i := 0; i < 5; i++ {
func() {
stdout.Reset()
session, err := sshClient.NewSession()
require.NoError(t, err)
defer session.Close()
err = session.RequestPty("xterm", 128, 128, ssh.TerminalModes{})
require.NoError(t, err)
session.Stdout = &stdout
session.Stderr = ptty.Output()
session.Stdin = ptty.Input()
err = session.Start("echo wazzup")
require.NoError(t, err)
err = session.Wait()
require.NoError(t, err)
require.Contains(t, stdout.String(), "wazzup", "should output greeting")
}()
}
}
func TestAgent_Session_TTY_HugeOutputIsNotLost(t *testing.T) {
t.Parallel()
// This test is here to prevent regressions where a command (with or
// without) a large amount of output would not be fully copied to the
// SSH session. On unix systems, this was fixed by duplicating the file
// descriptor of the PTY master and using it for copying the output.
//
// See: https://github.com/coder/coder/issues/6656
ctx, cancel := context.WithTimeout(context.Background(), testutil.WaitLong)
defer cancel()
//nolint:dogsled
conn, _, _, _, _ := setupAgent(t, agentsdk.Manifest{}, 0)
sshClient, err := conn.SSHClient(ctx)
require.NoError(t, err)
defer sshClient.Close()
ptty := ptytest.New(t)
var stdout bytes.Buffer
// NOTE(mafredri): Increase iterations to increase chance of failure,
// assuming bug is present.
// Using 10 iterations is basically a guaranteed failure (but let's
// not increase test times needlessly). Run with -race and do not
// limit parallelism (`export GOMAXPROCS=10`) to increase the chance
// of failure.
for i := 0; i < 1; i++ {
func() {
stdout.Reset()
session, err := sshClient.NewSession()
require.NoError(t, err)
defer session.Close()
err = session.RequestPty("xterm", 128, 128, ssh.TerminalModes{})
require.NoError(t, err)
session.Stdout = &stdout
session.Stderr = ptty.Output()
session.Stdin = ptty.Input()
want := strings.Repeat("wazzup", 1024+1) // ~6KB, +1 because 1024 is a common buffer size.
err = session.Start("echo " + want)
require.NoError(t, err)
err = session.Wait()
require.NoError(t, err)
require.Contains(t, stdout.String(), want, "should output entire greeting")
}()
}
}
func TestAgent_TCPLocalForwarding(t *testing.T) {
t.Parallel()
ctx := testutil.Context(t, testutil.WaitLong)
rl, err := net.Listen("tcp", "127.0.0.1:0")
require.NoError(t, err)
defer rl.Close()
tcpAddr, valid := rl.Addr().(*net.TCPAddr)
require.True(t, valid)
remotePort := tcpAddr.Port
go echoOnce(t, rl)
sshClient := setupAgentSSHClient(ctx, t)
conn, err := sshClient.Dial("tcp", fmt.Sprintf("127.0.0.1:%d", remotePort))
require.NoError(t, err)
defer conn.Close()
requireEcho(t, conn)
}
func TestAgent_TCPRemoteForwarding(t *testing.T) {
t.Parallel()
ctx := testutil.Context(t, testutil.WaitLong)
sshClient := setupAgentSSHClient(ctx, t)
localhost := netip.MustParseAddr("127.0.0.1")
var randomPort uint16
var ll net.Listener
var err error
for {
randomPort = testutil.RandomPortNoListen(t)
addr := net.TCPAddrFromAddrPort(netip.AddrPortFrom(localhost, randomPort))
ll, err = sshClient.ListenTCP(addr)
if err != nil {
t.Logf("error remote forwarding: %s", err.Error())
select {
case <-ctx.Done():
t.Fatal("timed out getting random listener")
default:
continue
}
}
break
}
defer ll.Close()
go echoOnce(t, ll)
conn, err := net.Dial("tcp", fmt.Sprintf("127.0.0.1:%d", randomPort))
require.NoError(t, err)
defer conn.Close()
requireEcho(t, conn)
}
func TestAgent_UnixLocalForwarding(t *testing.T) {
t.Parallel()
if runtime.GOOS == "windows" {
t.Skip("unix domain sockets are not fully supported on Windows")
}
ctx := testutil.Context(t, testutil.WaitLong)
tmpdir := tempDirUnixSocket(t)
remoteSocketPath := filepath.Join(tmpdir, "remote-socket")
l, err := net.Listen("unix", remoteSocketPath)
require.NoError(t, err)
defer l.Close()
go echoOnce(t, l)
sshClient := setupAgentSSHClient(ctx, t)
conn, err := sshClient.Dial("unix", remoteSocketPath)
require.NoError(t, err)
defer conn.Close()
_, err = conn.Write([]byte("test"))
require.NoError(t, err)
b := make([]byte, 4)
_, err = conn.Read(b)
require.NoError(t, err)
require.Equal(t, "test", string(b))
_ = conn.Close()
}
func TestAgent_UnixRemoteForwarding(t *testing.T) {
t.Parallel()
if runtime.GOOS == "windows" {
t.Skip("unix domain sockets are not fully supported on Windows")
}
tmpdir := tempDirUnixSocket(t)
remoteSocketPath := filepath.Join(tmpdir, "remote-socket")
ctx := testutil.Context(t, testutil.WaitLong)
sshClient := setupAgentSSHClient(ctx, t)
l, err := sshClient.ListenUnix(remoteSocketPath)
require.NoError(t, err)
defer l.Close()
go echoOnce(t, l)
conn, err := net.Dial("unix", remoteSocketPath)
require.NoError(t, err)
defer conn.Close()
requireEcho(t, conn)
}
func TestAgent_SFTP(t *testing.T) {
t.Parallel()
ctx, cancel := context.WithTimeout(context.Background(), testutil.WaitLong)
defer cancel()
u, err := user.Current()
require.NoError(t, err, "get current user")
home := u.HomeDir
if runtime.GOOS == "windows" {
home = "/" + strings.ReplaceAll(home, "\\", "/")
}
//nolint:dogsled
conn, _, _, _, _ := setupAgent(t, agentsdk.Manifest{}, 0)
sshClient, err := conn.SSHClient(ctx)
require.NoError(t, err)
defer sshClient.Close()
client, err := sftp.NewClient(sshClient)
require.NoError(t, err)
defer client.Close()
wd, err := client.Getwd()
require.NoError(t, err, "get working directory")
require.Equal(t, home, wd, "working directory should be home user home")
tempFile := filepath.Join(t.TempDir(), "sftp")
// SFTP only accepts unix-y paths.
remoteFile := filepath.ToSlash(tempFile)
if !path.IsAbs(remoteFile) {
// On Windows, e.g. "/C:/Users/...".
remoteFile = path.Join("/", remoteFile)
}
file, err := client.Create(remoteFile)
require.NoError(t, err)
err = file.Close()
require.NoError(t, err)
_, err = os.Stat(tempFile)
require.NoError(t, err)
}
func TestAgent_SCP(t *testing.T) {
t.Parallel()
ctx, cancel := context.WithTimeout(context.Background(), testutil.WaitLong)
defer cancel()
//nolint:dogsled
conn, _, _, _, _ := setupAgent(t, agentsdk.Manifest{}, 0)
sshClient, err := conn.SSHClient(ctx)
require.NoError(t, err)
defer sshClient.Close()
scpClient, err := scp.NewClientBySSH(sshClient)
require.NoError(t, err)
defer scpClient.Close()
tempFile := filepath.Join(t.TempDir(), "scp")
content := "hello world"
err = scpClient.CopyFile(context.Background(), strings.NewReader(content), tempFile, "0755")
require.NoError(t, err)
_, err = os.Stat(tempFile)
require.NoError(t, err)
}
func TestAgent_EnvironmentVariables(t *testing.T) {
t.Parallel()
key := "EXAMPLE"
value := "value"
session := setupSSHSession(t, agentsdk.Manifest{
EnvironmentVariables: map[string]string{
key: value,
},
}, codersdk.ServiceBannerConfig{}, nil)
command := "sh -c 'echo $" + key + "'"
if runtime.GOOS == "windows" {
command = "cmd.exe /c echo %" + key + "%"
}
output, err := session.Output(command)
require.NoError(t, err)
require.Equal(t, value, strings.TrimSpace(string(output)))
}
func TestAgent_EnvironmentVariableExpansion(t *testing.T) {
t.Parallel()
key := "EXAMPLE"
session := setupSSHSession(t, agentsdk.Manifest{
EnvironmentVariables: map[string]string{
key: "$SOMETHINGNOTSET",
},
}, codersdk.ServiceBannerConfig{}, nil)
command := "sh -c 'echo $" + key + "'"
if runtime.GOOS == "windows" {
command = "cmd.exe /c echo %" + key + "%"
}
output, err := session.Output(command)
require.NoError(t, err)
expect := ""
if runtime.GOOS == "windows" {
expect = "%EXAMPLE%"
}
// Output should be empty, because the variable is not set!
require.Equal(t, expect, strings.TrimSpace(string(output)))
}
func TestAgent_CoderEnvVars(t *testing.T) {
t.Parallel()
for _, key := range []string{"CODER", "CODER_WORKSPACE_NAME", "CODER_WORKSPACE_AGENT_NAME"} {
key := key
t.Run(key, func(t *testing.T) {
t.Parallel()
session := setupSSHSession(t, agentsdk.Manifest{}, codersdk.ServiceBannerConfig{}, nil)
command := "sh -c 'echo $" + key + "'"
if runtime.GOOS == "windows" {
command = "cmd.exe /c echo %" + key + "%"
}
output, err := session.Output(command)
require.NoError(t, err)
require.NotEmpty(t, strings.TrimSpace(string(output)))
})
}
}
func TestAgent_SSHConnectionEnvVars(t *testing.T) {
t.Parallel()
// Note: the SSH_TTY environment variable should only be set for TTYs.
// For some reason this test produces a TTY locally and a non-TTY in CI
// so we don't test for the absence of SSH_TTY.
for _, key := range []string{"SSH_CONNECTION", "SSH_CLIENT"} {
key := key
t.Run(key, func(t *testing.T) {
t.Parallel()
session := setupSSHSession(t, agentsdk.Manifest{}, codersdk.ServiceBannerConfig{}, nil)
command := "sh -c 'echo $" + key + "'"
if runtime.GOOS == "windows" {
command = "cmd.exe /c echo %" + key + "%"
}
output, err := session.Output(command)
require.NoError(t, err)
require.NotEmpty(t, strings.TrimSpace(string(output)))
})
}
}
func TestAgent_Metadata(t *testing.T) {
t.Parallel()
echoHello := "echo 'hello'"
t.Run("Once", func(t *testing.T) {
t.Parallel()
//nolint:dogsled
_, client, _, _, _ := setupAgent(t, agentsdk.Manifest{
Metadata: []codersdk.WorkspaceAgentMetadataDescription{
{
Key: "greeting1",
Interval: 0,
Script: echoHello,
},
{
Key: "greeting2",
Interval: 1,
Script: echoHello,
},
},
}, 0, func(_ *agenttest.Client, opts *agent.Options) {
opts.ReportMetadataInterval = testutil.IntervalFast
})
var gotMd map[string]agentsdk.Metadata
require.Eventually(t, func() bool {
gotMd = client.GetMetadata()
return len(gotMd) == 2
}, testutil.WaitShort, testutil.IntervalFast/2)
collectedAt1 := gotMd["greeting1"].CollectedAt
collectedAt2 := gotMd["greeting2"].CollectedAt
require.Eventually(t, func() bool {
gotMd = client.GetMetadata()
if len(gotMd) != 2 {
panic("unexpected number of metadata")
}
return !gotMd["greeting2"].CollectedAt.Equal(collectedAt2)
}, testutil.WaitShort, testutil.IntervalFast/2)
require.Equal(t, gotMd["greeting1"].CollectedAt, collectedAt1, "metadata should not be collected again")
})
t.Run("Many", func(t *testing.T) {
t.Parallel()
//nolint:dogsled
_, client, _, _, _ := setupAgent(t, agentsdk.Manifest{
Metadata: []codersdk.WorkspaceAgentMetadataDescription{
{
Key: "greeting",
Interval: 1,
Timeout: 100,
Script: echoHello,
},
},
}, 0, func(_ *agenttest.Client, opts *agent.Options) {
opts.ReportMetadataInterval = testutil.IntervalFast
})
var gotMd map[string]agentsdk.Metadata
require.Eventually(t, func() bool {
gotMd = client.GetMetadata()
return len(gotMd) == 1
}, testutil.WaitShort, testutil.IntervalFast/2)
collectedAt1 := gotMd["greeting"].CollectedAt
require.Equal(t, "hello", strings.TrimSpace(gotMd["greeting"].Value))
if !assert.Eventually(t, func() bool {
gotMd = client.GetMetadata()
return gotMd["greeting"].CollectedAt.After(collectedAt1)
}, testutil.WaitShort, testutil.IntervalFast/2) {
t.Fatalf("expected metadata to be collected again")
}
})
}
func TestAgentMetadata_Timing(t *testing.T) {
if runtime.GOOS == "windows" {
// Shell scripting in Windows is a pain, and we have already tested
// that the OS logic works in the simpler tests.
t.SkipNow()
}
testutil.SkipIfNotTiming(t)
t.Parallel()
dir := t.TempDir()
const reportInterval = 2
const intervalUnit = 100 * time.Millisecond
var (
greetingPath = filepath.Join(dir, "greeting")
script = "echo hello | tee -a " + greetingPath
)
//nolint:dogsled
_, client, _, _, _ := setupAgent(t, agentsdk.Manifest{
Metadata: []codersdk.WorkspaceAgentMetadataDescription{
{
Key: "greeting",
Interval: reportInterval,
Script: script,
},
{
Key: "bad",
Interval: reportInterval,
Script: "exit 1",
},
},
}, 0, func(_ *agenttest.Client, opts *agent.Options) {
opts.ReportMetadataInterval = intervalUnit
})
require.Eventually(t, func() bool {
return len(client.GetMetadata()) == 2
}, testutil.WaitShort, testutil.IntervalMedium)
for start := time.Now(); time.Since(start) < testutil.WaitMedium; time.Sleep(testutil.IntervalMedium) {
md := client.GetMetadata()
require.Len(t, md, 2, "got: %+v", md)
require.Equal(t, "hello\n", md["greeting"].Value)
require.Equal(t, "run cmd: exit status 1", md["bad"].Error)
greetingByt, err := os.ReadFile(greetingPath)
require.NoError(t, err)
var (
numGreetings = bytes.Count(greetingByt, []byte("hello"))
idealNumGreetings = time.Since(start) / (reportInterval * intervalUnit)
// We allow a 50% error margin because the report loop may backlog
// in CI and other toasters. In production, there is no hard
// guarantee on timing either, and the frontend gives similar
// wiggle room to the staleness of the value.
upperBound = int(idealNumGreetings) + 1
lowerBound = (int(idealNumGreetings) / 2)
)
if idealNumGreetings < 50 {
// There is an insufficient sample size.
continue
}
t.Logf("numGreetings: %d, idealNumGreetings: %d", numGreetings, idealNumGreetings)
// The report loop may slow down on load, but it should never, ever
// speed up.
if numGreetings > upperBound {
t.Fatalf("too many greetings: %d > %d in %v", numGreetings, upperBound, time.Since(start))
} else if numGreetings < lowerBound {
t.Fatalf("too few greetings: %d < %d", numGreetings, lowerBound)
}
}
}
func TestAgent_Lifecycle(t *testing.T) {
t.Parallel()
t.Run("StartTimeout", func(t *testing.T) {
t.Parallel()
_, client, _, _, _ := setupAgent(t, agentsdk.Manifest{
Scripts: []codersdk.WorkspaceAgentScript{{
Script: "sleep 3",
Timeout: time.Millisecond,
RunOnStart: true,
}},
}, 0)
want := []codersdk.WorkspaceAgentLifecycle{
codersdk.WorkspaceAgentLifecycleStarting,
codersdk.WorkspaceAgentLifecycleStartTimeout,
}
var got []codersdk.WorkspaceAgentLifecycle
assert.Eventually(t, func() bool {
got = client.GetLifecycleStates()
return slices.Contains(got, want[len(want)-1])
}, testutil.WaitShort, testutil.IntervalMedium)
require.Equal(t, want, got[:len(want)])
})
t.Run("StartError", func(t *testing.T) {
t.Parallel()
_, client, _, _, _ := setupAgent(t, agentsdk.Manifest{
Scripts: []codersdk.WorkspaceAgentScript{{
Script: "false",
Timeout: 30 * time.Second,
RunOnStart: true,
}},
}, 0)
want := []codersdk.WorkspaceAgentLifecycle{
codersdk.WorkspaceAgentLifecycleStarting,
codersdk.WorkspaceAgentLifecycleStartError,
}
var got []codersdk.WorkspaceAgentLifecycle
assert.Eventually(t, func() bool {
got = client.GetLifecycleStates()
return slices.Contains(got, want[len(want)-1])
}, testutil.WaitShort, testutil.IntervalMedium)
require.Equal(t, want, got[:len(want)])
})
t.Run("Ready", func(t *testing.T) {
t.Parallel()
_, client, _, _, _ := setupAgent(t, agentsdk.Manifest{
Scripts: []codersdk.WorkspaceAgentScript{{
Script: "true",
Timeout: 30 * time.Second,
RunOnStart: true,
}},
}, 0)
want := []codersdk.WorkspaceAgentLifecycle{
codersdk.WorkspaceAgentLifecycleStarting,
codersdk.WorkspaceAgentLifecycleReady,
}
var got []codersdk.WorkspaceAgentLifecycle
assert.Eventually(t, func() bool {
got = client.GetLifecycleStates()
return len(got) > 0 && got[len(got)-1] == want[len(want)-1]
}, testutil.WaitShort, testutil.IntervalMedium)
require.Equal(t, want, got)
})
t.Run("ShuttingDown", func(t *testing.T) {
t.Parallel()
_, client, _, _, closer := setupAgent(t, agentsdk.Manifest{
Scripts: []codersdk.WorkspaceAgentScript{{
Script: "sleep 3",
Timeout: 30 * time.Second,
RunOnStop: true,
}},
}, 0)
assert.Eventually(t, func() bool {
return slices.Contains(client.GetLifecycleStates(), codersdk.WorkspaceAgentLifecycleReady)
}, testutil.WaitShort, testutil.IntervalMedium)
// Start close asynchronously so that we an inspect the state.
done := make(chan struct{})
go func() {
defer close(done)
err := closer.Close()
assert.NoError(t, err)
}()
t.Cleanup(func() {
<-done
})
want := []codersdk.WorkspaceAgentLifecycle{
codersdk.WorkspaceAgentLifecycleStarting,
codersdk.WorkspaceAgentLifecycleReady,
codersdk.WorkspaceAgentLifecycleShuttingDown,
}
var got []codersdk.WorkspaceAgentLifecycle
assert.Eventually(t, func() bool {
got = client.GetLifecycleStates()
return slices.Contains(got, want[len(want)-1])
}, testutil.WaitShort, testutil.IntervalMedium)
require.Equal(t, want, got[:len(want)])
})
t.Run("ShutdownTimeout", func(t *testing.T) {
t.Parallel()
_, client, _, _, closer := setupAgent(t, agentsdk.Manifest{
Scripts: []codersdk.WorkspaceAgentScript{{
Script: "sleep 3",
Timeout: time.Millisecond,
RunOnStop: true,
}},
}, 0)
assert.Eventually(t, func() bool {
return slices.Contains(client.GetLifecycleStates(), codersdk.WorkspaceAgentLifecycleReady)
}, testutil.WaitShort, testutil.IntervalMedium)
// Start close asynchronously so that we an inspect the state.
done := make(chan struct{})
go func() {
defer close(done)
err := closer.Close()
assert.NoError(t, err)
}()
t.Cleanup(func() {
<-done
})
want := []codersdk.WorkspaceAgentLifecycle{
codersdk.WorkspaceAgentLifecycleStarting,
codersdk.WorkspaceAgentLifecycleReady,
codersdk.WorkspaceAgentLifecycleShuttingDown,
codersdk.WorkspaceAgentLifecycleShutdownTimeout,
}
var got []codersdk.WorkspaceAgentLifecycle
assert.Eventually(t, func() bool {
got = client.GetLifecycleStates()
return slices.Contains(got, want[len(want)-1])
}, testutil.WaitShort, testutil.IntervalMedium)
require.Equal(t, want, got[:len(want)])
})
t.Run("ShutdownError", func(t *testing.T) {
t.Parallel()
_, client, _, _, closer := setupAgent(t, agentsdk.Manifest{
Scripts: []codersdk.WorkspaceAgentScript{{
Script: "false",
Timeout: 30 * time.Second,
RunOnStop: true,
}},
}, 0)
assert.Eventually(t, func() bool {
return slices.Contains(client.GetLifecycleStates(), codersdk.WorkspaceAgentLifecycleReady)
}, testutil.WaitShort, testutil.IntervalMedium)
// Start close asynchronously so that we an inspect the state.
done := make(chan struct{})
go func() {
defer close(done)
err := closer.Close()
assert.NoError(t, err)
}()
t.Cleanup(func() {
<-done
})
want := []codersdk.WorkspaceAgentLifecycle{
codersdk.WorkspaceAgentLifecycleStarting,
codersdk.WorkspaceAgentLifecycleReady,
codersdk.WorkspaceAgentLifecycleShuttingDown,
codersdk.WorkspaceAgentLifecycleShutdownError,
}
var got []codersdk.WorkspaceAgentLifecycle
assert.Eventually(t, func() bool {
got = client.GetLifecycleStates()
return slices.Contains(got, want[len(want)-1])
}, testutil.WaitShort, testutil.IntervalMedium)
require.Equal(t, want, got[:len(want)])
})
t.Run("ShutdownScriptOnce", func(t *testing.T) {
t.Parallel()
logger := slogtest.Make(t, nil).Leveled(slog.LevelDebug)
expected := "this-is-shutdown"
derpMap, _ := tailnettest.RunDERPAndSTUN(t)
client := agenttest.NewClient(t,
logger,
uuid.New(),
agentsdk.Manifest{
DERPMap: derpMap,
Scripts: []codersdk.WorkspaceAgentScript{{
LogPath: "coder-startup-script.log",
Script: "echo 1",
RunOnStart: true,
}, {
LogPath: "coder-shutdown-script.log",
Script: "echo " + expected,
RunOnStop: true,
}},
},
make(chan *proto.Stats, 50),
tailnet.NewCoordinator(logger),
)
defer client.Close()
fs := afero.NewMemMapFs()
agent := agent.New(agent.Options{
Client: client,
Logger: logger.Named("agent"),
Filesystem: fs,
})
// agent.Close() loads the shutdown script from the agent metadata.
// The metadata is populated just before execution of the startup script, so it's mandatory to wait
// until the startup starts.
require.Eventually(t, func() bool {
outputPath := filepath.Join(os.TempDir(), "coder-startup-script.log")
content, err := afero.ReadFile(fs, outputPath)
if err != nil {
t.Logf("read file %q: %s", outputPath, err)
return false
}
return len(content) > 0 // something is in the startup log file
}, testutil.WaitShort, testutil.IntervalMedium)
err := agent.Close()
require.NoError(t, err, "agent should be closed successfully")
outputPath := filepath.Join(os.TempDir(), "coder-shutdown-script.log")
logFirstRead, err := afero.ReadFile(fs, outputPath)
require.NoError(t, err, "log file should be present")
require.Equal(t, expected, string(bytes.TrimSpace(logFirstRead)))
// Make sure that script can't be executed twice.
err = agent.Close()
require.NoError(t, err, "don't need to close the agent twice, no effect")
logSecondRead, err := afero.ReadFile(fs, outputPath)
require.NoError(t, err, "log file should be present")
require.Equal(t, string(bytes.TrimSpace(logFirstRead)), string(bytes.TrimSpace(logSecondRead)))
})
}
func TestAgent_Startup(t *testing.T) {
t.Parallel()
t.Run("EmptyDirectory", func(t *testing.T) {
t.Parallel()
ctx := testutil.Context(t, testutil.WaitShort)
_, client, _, _, _ := setupAgent(t, agentsdk.Manifest{
Directory: "",
}, 0)
startup := testutil.RequireRecvCtx(ctx, t, client.GetStartup())
require.Equal(t, "", startup.GetExpandedDirectory())
})
t.Run("HomeDirectory", func(t *testing.T) {
t.Parallel()
ctx := testutil.Context(t, testutil.WaitShort)
_, client, _, _, _ := setupAgent(t, agentsdk.Manifest{
Directory: "~",
}, 0)
startup := testutil.RequireRecvCtx(ctx, t, client.GetStartup())
homeDir, err := os.UserHomeDir()
require.NoError(t, err)
require.Equal(t, homeDir, startup.GetExpandedDirectory())
})
t.Run("NotAbsoluteDirectory", func(t *testing.T) {
t.Parallel()
ctx := testutil.Context(t, testutil.WaitShort)
_, client, _, _, _ := setupAgent(t, agentsdk.Manifest{
Directory: "coder/coder",
}, 0)
startup := testutil.RequireRecvCtx(ctx, t, client.GetStartup())
homeDir, err := os.UserHomeDir()
require.NoError(t, err)
require.Equal(t, filepath.Join(homeDir, "coder/coder"), startup.GetExpandedDirectory())
})
t.Run("HomeEnvironmentVariable", func(t *testing.T) {
t.Parallel()
ctx := testutil.Context(t, testutil.WaitShort)
_, client, _, _, _ := setupAgent(t, agentsdk.Manifest{
Directory: "$HOME",
}, 0)
startup := testutil.RequireRecvCtx(ctx, t, client.GetStartup())
homeDir, err := os.UserHomeDir()
require.NoError(t, err)
require.Equal(t, homeDir, startup.GetExpandedDirectory())
})
}
//nolint:paralleltest // This test sets an environment variable.
func TestAgent_ReconnectingPTY(t *testing.T) {
if runtime.GOOS == "windows" {
// This might be our implementation, or ConPTY itself.
// It's difficult to find extensive tests for it, so
// it seems like it could be either.
t.Skip("ConPTY appears to be inconsistent on Windows.")
}
backends := []string{"Buffered", "Screen"}
_, err := exec.LookPath("screen")
hasScreen := err == nil
// Make sure UTF-8 works even with LANG set to something like C.
t.Setenv("LANG", "C")
for _, backendType := range backends {
backendType := backendType
t.Run(backendType, func(t *testing.T) {
if backendType == "Screen" {
if runtime.GOOS != "linux" {
t.Skipf("`screen` is not supported on %s", runtime.GOOS)
} else if !hasScreen {
t.Skip("`screen` not found")
}
} else if hasScreen && runtime.GOOS == "linux" {
// Set up a PATH that does not have screen in it.
bashPath, err := exec.LookPath("bash")
require.NoError(t, err)
dir, err := os.MkdirTemp("/tmp", "coder-test-reconnecting-pty-PATH")
require.NoError(t, err, "create temp dir for reconnecting pty PATH")
err = os.Symlink(bashPath, filepath.Join(dir, "bash"))
require.NoError(t, err, "symlink bash into reconnecting pty PATH")
t.Setenv("PATH", dir)
}
ctx, cancel := context.WithTimeout(context.Background(), testutil.WaitLong)
defer cancel()
//nolint:dogsled
conn, _, _, _, _ := setupAgent(t, agentsdk.Manifest{}, 0)
id := uuid.New()
// --norc disables executing .bashrc, which is often used to customize the bash prompt
netConn1, err := conn.ReconnectingPTY(ctx, id, 80, 80, "bash --norc")
require.NoError(t, err)
defer netConn1.Close()
tr1 := testutil.NewTerminalReader(t, netConn1)
// A second simultaneous connection.
netConn2, err := conn.ReconnectingPTY(ctx, id, 80, 80, "bash --norc")
require.NoError(t, err)
defer netConn2.Close()
tr2 := testutil.NewTerminalReader(t, netConn2)
matchPrompt := func(line string) bool {
return strings.Contains(line, "$ ") || strings.Contains(line, "# ")
}
matchEchoCommand := func(line string) bool {
return strings.Contains(line, "echo test")
}
matchEchoOutput := func(line string) bool {
return strings.Contains(line, "test") && !strings.Contains(line, "echo")
}
matchExitCommand := func(line string) bool {
return strings.Contains(line, "exit")
}
matchExitOutput := func(line string) bool {
return strings.Contains(line, "exit") || strings.Contains(line, "logout")
}
// Wait for the prompt before writing commands. If the command arrives before the prompt is written, screen
// will sometimes put the command output on the same line as the command and the test will flake
require.NoError(t, tr1.ReadUntil(ctx, matchPrompt), "find prompt")
require.NoError(t, tr2.ReadUntil(ctx, matchPrompt), "find prompt")
data, err := json.Marshal(workspacesdk.ReconnectingPTYRequest{
Data: "echo test\r",
})
require.NoError(t, err)
_, err = netConn1.Write(data)
require.NoError(t, err)
// Once for typing the command...
require.NoError(t, tr1.ReadUntil(ctx, matchEchoCommand), "find echo command")
// And another time for the actual output.
require.NoError(t, tr1.ReadUntil(ctx, matchEchoOutput), "find echo output")
// Same for the other connection.
require.NoError(t, tr2.ReadUntil(ctx, matchEchoCommand), "find echo command")
require.NoError(t, tr2.ReadUntil(ctx, matchEchoOutput), "find echo output")
_ = netConn1.Close()
_ = netConn2.Close()
netConn3, err := conn.ReconnectingPTY(ctx, id, 80, 80, "bash --norc")
require.NoError(t, err)
defer netConn3.Close()
tr3 := testutil.NewTerminalReader(t, netConn3)
// Same output again!
require.NoError(t, tr3.ReadUntil(ctx, matchEchoCommand), "find echo command")
require.NoError(t, tr3.ReadUntil(ctx, matchEchoOutput), "find echo output")
// Exit should cause the connection to close.
data, err = json.Marshal(workspacesdk.ReconnectingPTYRequest{
Data: "exit\r",
})
require.NoError(t, err)
_, err = netConn3.Write(data)
require.NoError(t, err)
// Once for the input and again for the output.
require.NoError(t, tr3.ReadUntil(ctx, matchExitCommand), "find exit command")
require.NoError(t, tr3.ReadUntil(ctx, matchExitOutput), "find exit output")
// Wait for the connection to close.
require.ErrorIs(t, tr3.ReadUntil(ctx, nil), io.EOF)
// Try a non-shell command. It should output then immediately exit.
netConn4, err := conn.ReconnectingPTY(ctx, uuid.New(), 80, 80, "echo test")
require.NoError(t, err)
defer netConn4.Close()
tr4 := testutil.NewTerminalReader(t, netConn4)
require.NoError(t, tr4.ReadUntil(ctx, matchEchoOutput), "find echo output")
require.ErrorIs(t, tr4.ReadUntil(ctx, nil), io.EOF)
// Ensure that UTF-8 is supported. Avoid the terminal emulator because it
// does not appear to support UTF-8, just make sure the bytes that come
// back have the character in it.
netConn5, err := conn.ReconnectingPTY(ctx, uuid.New(), 80, 80, "echo ")
require.NoError(t, err)
defer netConn5.Close()
bytes, err := io.ReadAll(netConn5)
require.NoError(t, err)
require.Contains(t, string(bytes), "")
})
}
}
func TestAgent_Dial(t *testing.T) {
t.Parallel()
cases := []struct {
name string
setup func(t *testing.T) net.Listener
}{
{
name: "TCP",
setup: func(t *testing.T) net.Listener {
l, err := net.Listen("tcp", "127.0.0.1:0")
require.NoError(t, err, "create TCP listener")
return l
},
},
{
name: "UDP",
setup: func(t *testing.T) net.Listener {
addr := net.UDPAddr{
IP: net.ParseIP("127.0.0.1"),
Port: 0,
}
l, err := udp.Listen("udp", &addr)
require.NoError(t, err, "create UDP listener")
return l
},
},
}
for _, c := range cases {
c := c
t.Run(c.name, func(t *testing.T) {
t.Parallel()
// The purpose of this test is to ensure that a client can dial a
// listener in the workspace over tailnet.
l := c.setup(t)
done := make(chan struct{})
defer func() {
l.Close()
<-done
}()
ctx, cancel := context.WithTimeout(context.Background(), testutil.WaitLong)
defer cancel()
go func() {
defer close(done)
c, err := l.Accept()
if assert.NoError(t, err, "accept connection") {
defer c.Close()
testAccept(ctx, t, c)
}
}()
//nolint:dogsled
agentConn, _, _, _, _ := setupAgent(t, agentsdk.Manifest{}, 0)
require.True(t, agentConn.AwaitReachable(ctx))
conn, err := agentConn.DialContext(ctx, l.Addr().Network(), l.Addr().String())
require.NoError(t, err)
defer conn.Close()
testDial(ctx, t, conn)
})
}
}
// TestAgent_UpdatedDERP checks that agents can handle their DERP map being
// updated, and that clients can also handle it.
func TestAgent_UpdatedDERP(t *testing.T) {
t.Parallel()
logger := slogtest.Make(t, nil).Leveled(slog.LevelDebug)
originalDerpMap, _ := tailnettest.RunDERPAndSTUN(t)
require.NotNil(t, originalDerpMap)
coordinator := tailnet.NewCoordinator(logger)
// use t.Cleanup so the coordinator closing doesn't deadlock with in-memory
// coordination
t.Cleanup(func() {
_ = coordinator.Close()
})
agentID := uuid.New()
statsCh := make(chan *proto.Stats, 50)
fs := afero.NewMemMapFs()
client := agenttest.NewClient(t,
logger.Named("agent"),
agentID,
agentsdk.Manifest{
DERPMap: originalDerpMap,
// Force DERP.
DisableDirectConnections: true,
},
statsCh,
coordinator,
)
t.Cleanup(func() {
t.Log("closing client")
client.Close()
})
uut := agent.New(agent.Options{
Client: client,
Filesystem: fs,
Logger: logger.Named("agent"),
ReconnectingPTYTimeout: time.Minute,
})
t.Cleanup(func() {
t.Log("closing agent")
_ = uut.Close()
})
// Setup a client connection.
newClientConn := func(derpMap *tailcfg.DERPMap, name string) *workspacesdk.AgentConn {
conn, err := tailnet.NewConn(&tailnet.Options{
Addresses: []netip.Prefix{netip.PrefixFrom(tailnet.IP(), 128)},
DERPMap: derpMap,
Logger: logger.Named(name),
})
require.NoError(t, err)
t.Cleanup(func() {
t.Logf("closing conn %s", name)
_ = conn.Close()
})
testCtx, testCtxCancel := context.WithCancel(context.Background())
t.Cleanup(testCtxCancel)
clientID := uuid.New()
coordination := tailnet.NewInMemoryCoordination(
testCtx, logger,
clientID, agentID,
coordinator, conn)
t.Cleanup(func() {
t.Logf("closing coordination %s", name)
err := coordination.Close()
if err != nil {
t.Logf("error closing in-memory coordination: %s", err.Error())
}
t.Logf("closed coordination %s", name)
})
// Force DERP.
conn.SetBlockEndpoints(true)
sdkConn := workspacesdk.NewAgentConn(conn, workspacesdk.AgentConnOptions{
AgentID: agentID,
CloseFunc: func() error { return workspacesdk.ErrSkipClose },
})
t.Cleanup(func() {
t.Logf("closing sdkConn %s", name)
_ = sdkConn.Close()
})
ctx, cancel := context.WithTimeout(context.Background(), testutil.WaitLong)
defer cancel()
if !sdkConn.AwaitReachable(ctx) {
t.Fatal("agent not reachable")
}
return sdkConn
}
conn1 := newClientConn(originalDerpMap, "client1")
// Change the DERP map.
newDerpMap, _ := tailnettest.RunDERPAndSTUN(t)
require.NotNil(t, newDerpMap)
// Change the region ID.
newDerpMap.Regions[2] = newDerpMap.Regions[1]
delete(newDerpMap.Regions, 1)
newDerpMap.Regions[2].RegionID = 2
for _, node := range newDerpMap.Regions[2].Nodes {
node.RegionID = 2
}
// Push a new DERP map to the agent.
err := client.PushDERPMapUpdate(newDerpMap)
require.NoError(t, err)
t.Logf("pushed DERPMap update to agent")
require.Eventually(t, func() bool {
conn := uut.TailnetConn()
if conn == nil {
return false
}
regionIDs := conn.DERPMap().RegionIDs()
preferredDERP := conn.Node().PreferredDERP
t.Logf("agent Conn DERPMap with regionIDs %v, PreferredDERP %d", regionIDs, preferredDERP)
return len(regionIDs) == 1 && regionIDs[0] == 2 && preferredDERP == 2
}, testutil.WaitLong, testutil.IntervalFast)
t.Logf("agent got the new DERPMap")
// Connect from a second client and make sure it uses the new DERP map.
conn2 := newClientConn(newDerpMap, "client2")
require.Equal(t, []int{2}, conn2.DERPMap().RegionIDs())
t.Log("conn2 got the new DERPMap")
// If the first client gets a DERP map update, it should be able to
// reconnect just fine.
conn1.SetDERPMap(newDerpMap)
require.Equal(t, []int{2}, conn1.DERPMap().RegionIDs())
t.Log("set the new DERPMap on conn1")
ctx, cancel := context.WithTimeout(context.Background(), testutil.WaitLong)
defer cancel()
require.True(t, conn1.AwaitReachable(ctx))
t.Log("conn1 reached agent with new DERP")
}
func TestAgent_Speedtest(t *testing.T) {
t.Parallel()
t.Skip("This test is relatively flakey because of Tailscale's speedtest code...")
logger := slogtest.Make(t, &slogtest.Options{IgnoreErrors: true}).Leveled(slog.LevelDebug)
ctx, cancel := context.WithTimeout(context.Background(), testutil.WaitLong)
defer cancel()
derpMap, _ := tailnettest.RunDERPAndSTUN(t)
//nolint:dogsled
conn, _, _, _, _ := setupAgent(t, agentsdk.Manifest{
DERPMap: derpMap,
}, 0, func(client *agenttest.Client, options *agent.Options) {
options.Logger = logger.Named("agent")
})
defer conn.Close()
res, err := conn.Speedtest(ctx, speedtest.Upload, 250*time.Millisecond)
require.NoError(t, err)
t.Logf("%.2f MBits/s", res[len(res)-1].MBitsPerSecond())
}
func TestAgent_Reconnect(t *testing.T) {
t.Parallel()
logger := slogtest.Make(t, nil).Leveled(slog.LevelDebug)
// After the agent is disconnected from a coordinator, it's supposed
// to reconnect!
coordinator := tailnet.NewCoordinator(logger)
defer coordinator.Close()
agentID := uuid.New()
statsCh := make(chan *proto.Stats, 50)
derpMap, _ := tailnettest.RunDERPAndSTUN(t)
client := agenttest.NewClient(t,
logger,
agentID,
agentsdk.Manifest{
DERPMap: derpMap,
},
statsCh,
coordinator,
)
defer client.Close()
initialized := atomic.Int32{}
closer := agent.New(agent.Options{
ExchangeToken: func(ctx context.Context) (string, error) {
initialized.Add(1)
return "", nil
},
Client: client,
Logger: logger.Named("agent"),
})
defer closer.Close()
require.Eventually(t, func() bool {
return coordinator.Node(agentID) != nil
}, testutil.WaitShort, testutil.IntervalFast)
client.LastWorkspaceAgent()
require.Eventually(t, func() bool {
return initialized.Load() == 2
}, testutil.WaitShort, testutil.IntervalFast)
}
func TestAgent_WriteVSCodeConfigs(t *testing.T) {
t.Parallel()
logger := slogtest.Make(t, nil).Leveled(slog.LevelDebug)
coordinator := tailnet.NewCoordinator(logger)
defer coordinator.Close()
client := agenttest.NewClient(t,
logger,
uuid.New(),
agentsdk.Manifest{
GitAuthConfigs: 1,
DERPMap: &tailcfg.DERPMap{},
},
make(chan *proto.Stats, 50),
coordinator,
)
defer client.Close()
filesystem := afero.NewMemMapFs()
closer := agent.New(agent.Options{
ExchangeToken: func(ctx context.Context) (string, error) {
return "", nil
},
Client: client,
Logger: logger.Named("agent"),
Filesystem: filesystem,
})
defer closer.Close()
home, err := os.UserHomeDir()
require.NoError(t, err)
name := filepath.Join(home, ".vscode-server", "data", "Machine", "settings.json")
require.Eventually(t, func() bool {
_, err := filesystem.Stat(name)
return err == nil
}, testutil.WaitShort, testutil.IntervalFast)
}
func TestAgent_DebugServer(t *testing.T) {
t.Parallel()
logDir := t.TempDir()
logPath := filepath.Join(logDir, "coder-agent.log")
randLogStr, err := cryptorand.String(32)
require.NoError(t, err)
require.NoError(t, os.WriteFile(logPath, []byte(randLogStr), 0o600))
derpMap, _ := tailnettest.RunDERPAndSTUN(t)
//nolint:dogsled
conn, _, _, _, agnt := setupAgent(t, agentsdk.Manifest{
DERPMap: derpMap,
}, 0, func(c *agenttest.Client, o *agent.Options) {
o.ExchangeToken = func(context.Context) (string, error) {
return "token", nil
}
o.LogDir = logDir
})
awaitReachableCtx := testutil.Context(t, testutil.WaitLong)
ok := conn.AwaitReachable(awaitReachableCtx)
require.True(t, ok)
_ = conn.Close()
srv := httptest.NewServer(agnt.HTTPDebug())
t.Cleanup(srv.Close)
t.Run("MagicsockDebug", func(t *testing.T) {
t.Parallel()
ctx := testutil.Context(t, testutil.WaitLong)
req, err := http.NewRequestWithContext(ctx, http.MethodGet, srv.URL+"/debug/magicsock", nil)
require.NoError(t, err)
res, err := srv.Client().Do(req)
require.NoError(t, err)
defer res.Body.Close()
require.Equal(t, http.StatusOK, res.StatusCode)
resBody, err := io.ReadAll(res.Body)
require.NoError(t, err)
require.Contains(t, string(resBody), "<h1>magicsock</h1>")
})
t.Run("MagicsockDebugLogging", func(t *testing.T) {
t.Parallel()
t.Run("Enable", func(t *testing.T) {
t.Parallel()
ctx := testutil.Context(t, testutil.WaitLong)
req, err := http.NewRequestWithContext(ctx, http.MethodGet, srv.URL+"/debug/magicsock/debug-logging/t", nil)
require.NoError(t, err)
res, err := srv.Client().Do(req)
require.NoError(t, err)
defer res.Body.Close()
require.Equal(t, http.StatusOK, res.StatusCode)
resBody, err := io.ReadAll(res.Body)
require.NoError(t, err)
require.Contains(t, string(resBody), "updated magicsock debug logging to true")
})
t.Run("Disable", func(t *testing.T) {
t.Parallel()
ctx := testutil.Context(t, testutil.WaitLong)
req, err := http.NewRequestWithContext(ctx, http.MethodGet, srv.URL+"/debug/magicsock/debug-logging/0", nil)
require.NoError(t, err)
res, err := srv.Client().Do(req)
require.NoError(t, err)
defer res.Body.Close()
require.Equal(t, http.StatusOK, res.StatusCode)
resBody, err := io.ReadAll(res.Body)
require.NoError(t, err)
require.Contains(t, string(resBody), "updated magicsock debug logging to false")
})
t.Run("Invalid", func(t *testing.T) {
t.Parallel()
ctx := testutil.Context(t, testutil.WaitLong)
req, err := http.NewRequestWithContext(ctx, http.MethodGet, srv.URL+"/debug/magicsock/debug-logging/blah", nil)
require.NoError(t, err)
res, err := srv.Client().Do(req)
require.NoError(t, err)
defer res.Body.Close()
require.Equal(t, http.StatusBadRequest, res.StatusCode)
resBody, err := io.ReadAll(res.Body)
require.NoError(t, err)
require.Contains(t, string(resBody), `invalid state "blah", must be a boolean`)
})
})
t.Run("Manifest", func(t *testing.T) {
t.Parallel()
ctx := testutil.Context(t, testutil.WaitLong)
req, err := http.NewRequestWithContext(ctx, http.MethodGet, srv.URL+"/debug/manifest", nil)
require.NoError(t, err)
res, err := srv.Client().Do(req)
require.NoError(t, err)
defer res.Body.Close()
require.Equal(t, http.StatusOK, res.StatusCode)
var v agentsdk.Manifest
require.NoError(t, json.NewDecoder(res.Body).Decode(&v))
require.NotNil(t, v)
})
t.Run("Logs", func(t *testing.T) {
t.Parallel()
ctx := testutil.Context(t, testutil.WaitLong)
req, err := http.NewRequestWithContext(ctx, http.MethodGet, srv.URL+"/debug/logs", nil)
require.NoError(t, err)
res, err := srv.Client().Do(req)
require.NoError(t, err)
require.Equal(t, http.StatusOK, res.StatusCode)
defer res.Body.Close()
resBody, err := io.ReadAll(res.Body)
require.NoError(t, err)
require.NotEmpty(t, string(resBody))
require.Contains(t, string(resBody), randLogStr)
})
}
func TestAgent_ScriptLogging(t *testing.T) {
if runtime.GOOS == "windows" {
t.Skip("bash scripts only")
}
t.Parallel()
ctx := testutil.Context(t, testutil.WaitMedium)
derpMap, _ := tailnettest.RunDERPAndSTUN(t)
logsCh := make(chan *proto.BatchCreateLogsRequest, 100)
lsStart := uuid.UUID{0x11}
lsStop := uuid.UUID{0x22}
//nolint:dogsled
_, _, _, _, agnt := setupAgent(
t,
agentsdk.Manifest{
DERPMap: derpMap,
Scripts: []codersdk.WorkspaceAgentScript{
{
LogSourceID: lsStart,
RunOnStart: true,
Script: `#!/bin/sh
i=0
while [ $i -ne 5 ]
do
i=$(($i+1))
echo "start $i"
done
`,
},
{
LogSourceID: lsStop,
RunOnStop: true,
Script: `#!/bin/sh
i=0
while [ $i -ne 3000 ]
do
i=$(($i+1))
echo "stop $i"
done
`, // send a lot of stop logs to make sure we don't truncate shutdown logs before closing the API conn
},
},
},
0,
func(cl *agenttest.Client, _ *agent.Options) {
cl.SetLogsChannel(logsCh)
},
)
n := 1
for n <= 5 {
logs := testutil.RequireRecvCtx(ctx, t, logsCh)
require.NotNil(t, logs)
for _, l := range logs.GetLogs() {
require.Equal(t, fmt.Sprintf("start %d", n), l.GetOutput())
n++
}
}
err := agnt.Close()
require.NoError(t, err)
n = 1
for n <= 3000 {
logs := testutil.RequireRecvCtx(ctx, t, logsCh)
require.NotNil(t, logs)
for _, l := range logs.GetLogs() {
require.Equal(t, fmt.Sprintf("stop %d", n), l.GetOutput())
n++
}
t.Logf("got %d stop logs", n-1)
}
}
// setupAgentSSHClient creates an agent, dials it, and sets up an ssh.Client for it
func setupAgentSSHClient(ctx context.Context, t *testing.T) *ssh.Client {
//nolint: dogsled
agentConn, _, _, _, _ := setupAgent(t, agentsdk.Manifest{}, 0)
sshClient, err := agentConn.SSHClient(ctx)
require.NoError(t, err)
t.Cleanup(func() { sshClient.Close() })
return sshClient
}
func setupSSHSession(
t *testing.T,
manifest agentsdk.Manifest,
serviceBanner codersdk.ServiceBannerConfig,
prepareFS func(fs afero.Fs),
opts ...func(*agenttest.Client, *agent.Options),
) *ssh.Session {
ctx, cancel := context.WithTimeout(context.Background(), testutil.WaitLong)
defer cancel()
opts = append(opts, func(c *agenttest.Client, o *agent.Options) {
c.SetServiceBannerFunc(func() (codersdk.ServiceBannerConfig, error) {
return serviceBanner, nil
})
})
//nolint:dogsled
conn, _, _, fs, _ := setupAgent(t, manifest, 0, opts...)
if prepareFS != nil {
prepareFS(fs)
}
sshClient, err := conn.SSHClient(ctx)
require.NoError(t, err)
t.Cleanup(func() {
_ = sshClient.Close()
})
session, err := sshClient.NewSession()
require.NoError(t, err)
t.Cleanup(func() {
_ = session.Close()
})
return session
}
func setupAgent(t *testing.T, metadata agentsdk.Manifest, ptyTimeout time.Duration, opts ...func(*agenttest.Client, *agent.Options)) (
*workspacesdk.AgentConn,
*agenttest.Client,
<-chan *proto.Stats,
afero.Fs,
agent.Agent,
) {
logger := slogtest.Make(t, &slogtest.Options{
// Agent can drop errors when shutting down, and some, like the
// fasthttplistener connection closed error, are unexported.
IgnoreErrors: true,
}).Leveled(slog.LevelDebug)
if metadata.DERPMap == nil {
metadata.DERPMap, _ = tailnettest.RunDERPAndSTUN(t)
}
if metadata.AgentID == uuid.Nil {
metadata.AgentID = uuid.New()
}
if metadata.AgentName == "" {
metadata.AgentName = "test-agent"
}
if metadata.WorkspaceName == "" {
metadata.WorkspaceName = "test-workspace"
}
if metadata.WorkspaceID == uuid.Nil {
metadata.WorkspaceID = uuid.New()
}
coordinator := tailnet.NewCoordinator(logger)
t.Cleanup(func() {
_ = coordinator.Close()
})
statsCh := make(chan *proto.Stats, 50)
fs := afero.NewMemMapFs()
c := agenttest.NewClient(t, logger.Named("agenttest"), metadata.AgentID, metadata, statsCh, coordinator)
t.Cleanup(c.Close)
options := agent.Options{
Client: c,
Filesystem: fs,
Logger: logger.Named("agent"),
ReconnectingPTYTimeout: ptyTimeout,
EnvironmentVariables: map[string]string{},
}
for _, opt := range opts {
opt(c, &options)
}
agnt := agent.New(options)
t.Cleanup(func() {
_ = agnt.Close()
})
conn, err := tailnet.NewConn(&tailnet.Options{
Addresses: []netip.Prefix{netip.PrefixFrom(tailnet.IP(), 128)},
DERPMap: metadata.DERPMap,
Logger: logger.Named("client"),
})
require.NoError(t, err)
t.Cleanup(func() {
_ = conn.Close()
})
testCtx, testCtxCancel := context.WithCancel(context.Background())
t.Cleanup(testCtxCancel)
clientID := uuid.New()
coordination := tailnet.NewInMemoryCoordination(
testCtx, logger,
clientID, metadata.AgentID,
coordinator, conn)
t.Cleanup(func() {
err := coordination.Close()
if err != nil {
t.Logf("error closing in-mem coordination: %s", err.Error())
}
})
agentConn := workspacesdk.NewAgentConn(conn, workspacesdk.AgentConnOptions{
AgentID: metadata.AgentID,
})
t.Cleanup(func() {
_ = agentConn.Close()
})
// Ideally we wouldn't wait too long here, but sometimes the the
// networking needs more time to resolve itself.
ctx, cancel := context.WithTimeout(context.Background(), testutil.WaitLong)
defer cancel()
if !agentConn.AwaitReachable(ctx) {
t.Fatal("agent not reachable")
}
return agentConn, c, statsCh, fs, agnt
}
var dialTestPayload = []byte("dean-was-here123")
func testDial(ctx context.Context, t *testing.T, c net.Conn) {
t.Helper()
if deadline, ok := ctx.Deadline(); ok {
err := c.SetDeadline(deadline)
assert.NoError(t, err)
defer func() {
err := c.SetDeadline(time.Time{})
assert.NoError(t, err)
}()
}
assertWritePayload(t, c, dialTestPayload)
assertReadPayload(t, c, dialTestPayload)
}
func testAccept(ctx context.Context, t *testing.T, c net.Conn) {
t.Helper()
defer c.Close()
if deadline, ok := ctx.Deadline(); ok {
err := c.SetDeadline(deadline)
assert.NoError(t, err)
defer func() {
err := c.SetDeadline(time.Time{})
assert.NoError(t, err)
}()
}
assertReadPayload(t, c, dialTestPayload)
assertWritePayload(t, c, dialTestPayload)
}
func assertReadPayload(t *testing.T, r io.Reader, payload []byte) {
t.Helper()
b := make([]byte, len(payload)+16)
n, err := r.Read(b)
assert.NoError(t, err, "read payload")
assert.Equal(t, len(payload), n, "read payload length does not match")
assert.Equal(t, payload, b[:n])
}
func assertWritePayload(t *testing.T, w io.Writer, payload []byte) {
t.Helper()
n, err := w.Write(payload)
assert.NoError(t, err, "write payload")
assert.Equal(t, len(payload), n, "payload length does not match")
}
func testSessionOutput(t *testing.T, session *ssh.Session, expected, unexpected []string, expectedRe *regexp.Regexp) {
t.Helper()
err := session.RequestPty("xterm", 128, 128, ssh.TerminalModes{})
require.NoError(t, err)
ptty := ptytest.New(t)
var stdout bytes.Buffer
session.Stdout = &stdout
session.Stderr = ptty.Output()
session.Stdin = ptty.Input()
err = session.Shell()
require.NoError(t, err)
ptty.WriteLine("exit 0")
err = session.Wait()
require.NoError(t, err)
for _, unexpected := range unexpected {
require.NotContains(t, stdout.String(), unexpected, "should not show output")
}
for _, expect := range expected {
require.Contains(t, stdout.String(), expect, "should show output")
}
if expectedRe != nil {
require.Regexp(t, expectedRe, stdout.String())
}
}
// tempDirUnixSocket returns a temporary directory that can safely hold unix
// sockets (probably).
//
// During tests on darwin we hit the max path length limit for unix sockets
// pretty easily in the default location, so this function uses /tmp instead to
// get shorter paths.
func tempDirUnixSocket(t *testing.T) string {
t.Helper()
if runtime.GOOS == "darwin" {
testName := strings.ReplaceAll(t.Name(), "/", "_")
dir, err := os.MkdirTemp("/tmp", fmt.Sprintf("coder-test-%s-", testName))
require.NoError(t, err, "create temp dir for gpg test")
t.Cleanup(func() {
err := os.RemoveAll(dir)
assert.NoError(t, err, "remove temp dir", dir)
})
return dir
}
return t.TempDir()
}
func TestAgent_Metrics_SSH(t *testing.T) {
t.Parallel()
ctx, cancel := context.WithTimeout(context.Background(), testutil.WaitLong)
defer cancel()
registry := prometheus.NewRegistry()
//nolint:dogsled
conn, _, _, _, _ := setupAgent(t, agentsdk.Manifest{}, 0, func(_ *agenttest.Client, o *agent.Options) {
o.PrometheusRegistry = registry
})
sshClient, err := conn.SSHClient(ctx)
require.NoError(t, err)
defer sshClient.Close()
session, err := sshClient.NewSession()
require.NoError(t, err)
defer session.Close()
stdin, err := session.StdinPipe()
require.NoError(t, err)
err = session.Shell()
require.NoError(t, err)
expected := []agentsdk.AgentMetric{
{
Name: "agent_reconnecting_pty_connections_total",
Type: agentsdk.AgentMetricTypeCounter,
Value: 0,
},
{
Name: "agent_sessions_total",
Type: agentsdk.AgentMetricTypeCounter,
Value: 1,
Labels: []agentsdk.AgentMetricLabel{
{
Name: "magic_type",
Value: "ssh",
},
{
Name: "pty",
Value: "no",
},
},
},
{
Name: "agent_ssh_server_failed_connections_total",
Type: agentsdk.AgentMetricTypeCounter,
Value: 0,
},
{
Name: "agent_ssh_server_sftp_connections_total",
Type: agentsdk.AgentMetricTypeCounter,
Value: 0,
},
{
Name: "agent_ssh_server_sftp_server_errors_total",
Type: agentsdk.AgentMetricTypeCounter,
Value: 0,
},
{
Name: "coderd_agentstats_startup_script_seconds",
Type: agentsdk.AgentMetricTypeGauge,
Value: 0,
Labels: []agentsdk.AgentMetricLabel{
{
Name: "success",
Value: "true",
},
},
},
}
var actual []*promgo.MetricFamily
assert.Eventually(t, func() bool {
actual, err = registry.Gather()
if err != nil {
return false
}
if len(expected) != len(actual) {
return false
}
return verifyCollectedMetrics(t, expected, actual)
}, testutil.WaitLong, testutil.IntervalFast)
require.Len(t, actual, len(expected))
collected := verifyCollectedMetrics(t, expected, actual)
require.True(t, collected, "expected metrics were not collected")
_ = stdin.Close()
err = session.Wait()
require.NoError(t, err)
}
func TestAgent_ManageProcessPriority(t *testing.T) {
t.Parallel()
t.Run("OK", func(t *testing.T) {
t.Parallel()
if runtime.GOOS != "linux" {
t.Skip("Skipping non-linux environment")
}
var (
expectedProcs = map[int32]agentproc.Process{}
fs = afero.NewMemMapFs()
syscaller = agentproctest.NewMockSyscaller(gomock.NewController(t))
ticker = make(chan time.Time)
modProcs = make(chan []*agentproc.Process)
logger = slog.Make(sloghuman.Sink(io.Discard))
)
requireFileWrite(t, fs, "/proc/self/oom_score_adj", "-500")
// Create some processes.
for i := 0; i < 4; i++ {
// Create a prioritized process.
var proc agentproc.Process
if i == 0 {
proc = agentproctest.GenerateProcess(t, fs,
func(p *agentproc.Process) {
p.CmdLine = "./coder\x00agent\x00--no-reap"
p.PID = int32(i)
},
)
} else {
proc = agentproctest.GenerateProcess(t, fs,
func(p *agentproc.Process) {
// Make the cmd something similar to a prioritized
// process but differentiate the arguments.
p.CmdLine = "./coder\x00stat"
},
)
syscaller.EXPECT().GetPriority(proc.PID).Return(20, nil)
syscaller.EXPECT().SetPriority(proc.PID, 10).Return(nil)
}
syscaller.EXPECT().
Kill(proc.PID, syscall.Signal(0)).
Return(nil)
expectedProcs[proc.PID] = proc
}
_, _, _, _, _ = setupAgent(t, agentsdk.Manifest{}, 0, func(c *agenttest.Client, o *agent.Options) {
o.Syscaller = syscaller
o.ModifiedProcesses = modProcs
o.EnvironmentVariables = map[string]string{agent.EnvProcPrioMgmt: "1"}
o.Filesystem = fs
o.Logger = logger
o.ProcessManagementTick = ticker
})
actualProcs := <-modProcs
require.Len(t, actualProcs, len(expectedProcs)-1)
for _, proc := range actualProcs {
requireFileEquals(t, fs, fmt.Sprintf("/proc/%d/oom_score_adj", proc.PID), "0")
}
})
t.Run("IgnoreCustomNice", func(t *testing.T) {
t.Parallel()
if runtime.GOOS != "linux" {
t.Skip("Skipping non-linux environment")
}
var (
expectedProcs = map[int32]agentproc.Process{}
fs = afero.NewMemMapFs()
ticker = make(chan time.Time)
syscaller = agentproctest.NewMockSyscaller(gomock.NewController(t))
modProcs = make(chan []*agentproc.Process)
logger = slog.Make(sloghuman.Sink(io.Discard))
)
err := afero.WriteFile(fs, "/proc/self/oom_score_adj", []byte("0"), 0o644)
require.NoError(t, err)
// Create some processes.
for i := 0; i < 3; i++ {
proc := agentproctest.GenerateProcess(t, fs)
syscaller.EXPECT().
Kill(proc.PID, syscall.Signal(0)).
Return(nil)
if i == 0 {
// Set a random nice score. This one should not be adjusted by
// our management loop.
syscaller.EXPECT().GetPriority(proc.PID).Return(25, nil)
} else {
syscaller.EXPECT().GetPriority(proc.PID).Return(20, nil)
syscaller.EXPECT().SetPriority(proc.PID, 10).Return(nil)
}
expectedProcs[proc.PID] = proc
}
_, _, _, _, _ = setupAgent(t, agentsdk.Manifest{}, 0, func(c *agenttest.Client, o *agent.Options) {
o.Syscaller = syscaller
o.ModifiedProcesses = modProcs
o.EnvironmentVariables = map[string]string{agent.EnvProcPrioMgmt: "1"}
o.Filesystem = fs
o.Logger = logger
o.ProcessManagementTick = ticker
})
actualProcs := <-modProcs
// We should ignore the process with a custom nice score.
require.Len(t, actualProcs, 2)
for _, proc := range actualProcs {
_, ok := expectedProcs[proc.PID]
require.True(t, ok)
requireFileEquals(t, fs, fmt.Sprintf("/proc/%d/oom_score_adj", proc.PID), "998")
}
})
t.Run("CustomOOMScore", func(t *testing.T) {
t.Parallel()
if runtime.GOOS != "linux" {
t.Skip("Skipping non-linux environment")
}
var (
fs = afero.NewMemMapFs()
ticker = make(chan time.Time)
syscaller = agentproctest.NewMockSyscaller(gomock.NewController(t))
modProcs = make(chan []*agentproc.Process)
logger = slog.Make(sloghuman.Sink(io.Discard))
)
err := afero.WriteFile(fs, "/proc/self/oom_score_adj", []byte("0"), 0o644)
require.NoError(t, err)
// Create some processes.
for i := 0; i < 3; i++ {
proc := agentproctest.GenerateProcess(t, fs)
syscaller.EXPECT().
Kill(proc.PID, syscall.Signal(0)).
Return(nil)
syscaller.EXPECT().GetPriority(proc.PID).Return(20, nil)
syscaller.EXPECT().SetPriority(proc.PID, 10).Return(nil)
}
_, _, _, _, _ = setupAgent(t, agentsdk.Manifest{}, 0, func(c *agenttest.Client, o *agent.Options) {
o.Syscaller = syscaller
o.ModifiedProcesses = modProcs
o.EnvironmentVariables = map[string]string{
agent.EnvProcPrioMgmt: "1",
agent.EnvProcOOMScore: "-567",
}
o.Filesystem = fs
o.Logger = logger
o.ProcessManagementTick = ticker
})
actualProcs := <-modProcs
// We should ignore the process with a custom nice score.
require.Len(t, actualProcs, 3)
for _, proc := range actualProcs {
requireFileEquals(t, fs, fmt.Sprintf("/proc/%d/oom_score_adj", proc.PID), "-567")
}
})
t.Run("DisabledByDefault", func(t *testing.T) {
t.Parallel()
if runtime.GOOS != "linux" {
t.Skip("Skipping non-linux environment")
}
var (
buf bytes.Buffer
wr = &syncWriter{
w: &buf,
}
)
log := slog.Make(sloghuman.Sink(wr)).Leveled(slog.LevelDebug)
_, _, _, _, _ = setupAgent(t, agentsdk.Manifest{}, 0, func(c *agenttest.Client, o *agent.Options) {
o.Logger = log
})
require.Eventually(t, func() bool {
wr.mu.Lock()
defer wr.mu.Unlock()
return strings.Contains(buf.String(), "process priority not enabled")
}, testutil.WaitLong, testutil.IntervalFast)
})
t.Run("DisabledForNonLinux", func(t *testing.T) {
t.Parallel()
if runtime.GOOS == "linux" {
t.Skip("Skipping linux environment")
}
var (
buf bytes.Buffer
wr = &syncWriter{
w: &buf,
}
)
log := slog.Make(sloghuman.Sink(wr)).Leveled(slog.LevelDebug)
_, _, _, _, _ = setupAgent(t, agentsdk.Manifest{}, 0, func(c *agenttest.Client, o *agent.Options) {
o.Logger = log
// Try to enable it so that we can assert that non-linux
// environments are truly disabled.
o.EnvironmentVariables = map[string]string{agent.EnvProcPrioMgmt: "1"}
})
require.Eventually(t, func() bool {
wr.mu.Lock()
defer wr.mu.Unlock()
return strings.Contains(buf.String(), "process priority not enabled")
}, testutil.WaitLong, testutil.IntervalFast)
})
}
func verifyCollectedMetrics(t *testing.T, expected []agentsdk.AgentMetric, actual []*promgo.MetricFamily) bool {
t.Helper()
for i, e := range expected {
assert.Equal(t, e.Name, actual[i].GetName())
assert.Equal(t, string(e.Type), strings.ToLower(actual[i].GetType().String()))
for _, m := range actual[i].GetMetric() {
assert.Equal(t, e.Value, m.Counter.GetValue())
if len(m.GetLabel()) > 0 {
for j, lbl := range m.GetLabel() {
assert.Equal(t, e.Labels[j].Name, lbl.GetName())
assert.Equal(t, e.Labels[j].Value, lbl.GetValue())
}
}
m.GetLabel()
}
}
return true
}
type syncWriter struct {
mu sync.Mutex
w io.Writer
}
func (s *syncWriter) Write(p []byte) (int, error) {
s.mu.Lock()
defer s.mu.Unlock()
return s.w.Write(p)
}
// echoOnce accepts a single connection, reads 4 bytes and echos them back
func echoOnce(t *testing.T, ll net.Listener) {
t.Helper()
conn, err := ll.Accept()
if err != nil {
return
}
defer conn.Close()
b := make([]byte, 4)
_, err = conn.Read(b)
if !assert.NoError(t, err) {
return
}
_, err = conn.Write(b)
if !assert.NoError(t, err) {
return
}
}
// requireEcho sends 4 bytes and requires the read response to match what was sent.
func requireEcho(t *testing.T, conn net.Conn) {
t.Helper()
_, err := conn.Write([]byte("test"))
require.NoError(t, err)
b := make([]byte, 4)
_, err = conn.Read(b)
require.NoError(t, err)
require.Equal(t, "test", string(b))
}
func requireFileWrite(t testing.TB, fs afero.Fs, fp, data string) {
t.Helper()
err := afero.WriteFile(fs, fp, []byte(data), 0o600)
require.NoError(t, err)
}
func requireFileEquals(t testing.TB, fs afero.Fs, fp, expect string) {
t.Helper()
actual, err := afero.ReadFile(fs, fp)
require.NoError(t, err)
require.Equal(t, expect, string(actual))
}