feat: replace ssh maxTimeout with keep-alive mechanism (#8062)

* Bump up coder/ssh

* feat: Set default agent timeout to ~72h

* Address PR comments

* Fix
This commit is contained in:
Marcin Tojek 2023-06-16 15:22:18 +02:00 committed by GitHub
parent 751c0505bf
commit 247f8a973f
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 30 additions and 13 deletions

View File

@ -105,7 +105,7 @@ func NewServer(ctx context.Context, logger slog.Logger, prometheusRegistry *prom
metrics: metrics,
}
s.srv = &ssh.Server{
srv := &ssh.Server{
ChannelHandlers: map[string]ssh.ChannelHandler{
"direct-tcpip": ssh.DirectTCPIPHandler,
"direct-streamlocal@openssh.com": directStreamLocalHandler,
@ -149,9 +149,19 @@ func NewServer(ctx context.Context, logger slog.Logger, prometheusRegistry *prom
SubsystemHandlers: map[string]ssh.SubsystemHandler{
"sftp": s.sessionHandler,
},
MaxTimeout: maxTimeout,
}
// The MaxTimeout functionality has been substituted with the introduction of the KeepAlive feature.
// In cases where very short timeouts are set, the SSH server will automatically switch to the connection timeout for both read and write operations.
if maxTimeout >= 3*time.Second {
srv.ClientAliveCountMax = 3
srv.ClientAliveInterval = maxTimeout / time.Duration(srv.ClientAliveCountMax)
srv.MaxTimeout = 0
} else {
srv.MaxTimeout = maxTimeout
}
s.srv = srv
return s, nil
}

View File

@ -191,3 +191,7 @@ func (testSSHContext) Permissions() *gliderssh.Permissions {
func (testSSHContext) SetValue(_, _ interface{}) {
panic("not implemented")
}
func (testSSHContext) KeepAlive() *gliderssh.SessionKeepAlive {
panic("not implemented")
}

View File

@ -323,10 +323,11 @@ func (r *RootCmd) workspaceAgent() *clibase.Cmd {
Value: clibase.BoolOf(&noReap),
},
{
Flag: "ssh-max-timeout",
Default: "0",
Flag: "ssh-max-timeout",
// tcpip.KeepaliveIdleOption = 72h + 1min (forwardTCPSockOpts() in tailnet/conn.go)
Default: "72h",
Env: "CODER_AGENT_SSH_MAX_TIMEOUT",
Description: "Specify the max timeout for a SSH connection.",
Description: "Specify the max timeout for a SSH connection, it is advisable to set it to a minimum of 60s, but no more than 72h.",
Value: clibase.DurationOf(&sshMaxTimeout),
},
{

View File

@ -30,8 +30,9 @@ Starts the Coder workspace agent.
--prometheus-address string, $CODER_AGENT_PROMETHEUS_ADDRESS (default: 127.0.0.1:2112)
The bind address to serve Prometheus metrics.
--ssh-max-timeout duration, $CODER_AGENT_SSH_MAX_TIMEOUT (default: 0)
Specify the max timeout for a SSH connection.
--ssh-max-timeout duration, $CODER_AGENT_SSH_MAX_TIMEOUT (default: 72h)
Specify the max timeout for a SSH connection, it is advisable to set
it to a minimum of 60s, but no more than 72h.
--tailnet-listen-port int, $CODER_AGENT_TAILNET_LISTEN_PORT (default: 0)
Specify a static port for Tailscale to use for listening.

4
go.mod
View File

@ -45,7 +45,7 @@ replace tailscale.com => github.com/coder/tailscale v0.0.0-20230522123520-747122
// repo as tailscale.com/tempfork/gliderlabs/ssh, however, we can't replace the
// subpath and it includes changes to golang.org/x/crypto/ssh as well which
// makes importing it directly a bit messy.
replace github.com/gliderlabs/ssh => github.com/coder/ssh v0.0.0-20230421140225-04bb837133e1
replace github.com/gliderlabs/ssh => github.com/coder/ssh v0.0.0-20230615124436-fc6e4b009688
// Waiting on https://github.com/imulab/go-scim/pull/95 to merge.
replace github.com/imulab/go-scim/pkg/v2 => github.com/coder/go-scim/pkg/v2 v2.0.0-20230221055123-1d63c1222136
@ -140,7 +140,7 @@ require (
github.com/robfig/cron/v3 v3.0.1
github.com/spf13/afero v1.9.3
github.com/spf13/pflag v1.0.5
github.com/stretchr/testify v1.8.3
github.com/stretchr/testify v1.8.4
github.com/swaggo/http-swagger/v2 v2.0.1
github.com/swaggo/swag v1.8.6
github.com/tabbed/pqtype v0.1.1

7
go.sum
View File

@ -187,8 +187,8 @@ github.com/coder/go-scim/pkg/v2 v2.0.0-20230221055123-1d63c1222136 h1:0RgB61LcNs
github.com/coder/go-scim/pkg/v2 v2.0.0-20230221055123-1d63c1222136/go.mod h1:VkD1P761nykiq75dz+4iFqIQIZka189tx1BQLOp0Skc=
github.com/coder/retry v1.4.0 h1:g0fojHFxcdgM3sBULqgjFDxw1UIvaCqk4ngUDu0EWag=
github.com/coder/retry v1.4.0/go.mod h1:blHMk9vs6LkoRT9ZHyuZo360cufXEhrxqvEzeMtRGoY=
github.com/coder/ssh v0.0.0-20230421140225-04bb837133e1 h1:LBw76rEDuhNJyohve11mbvYv5CmCLmcuUQGiz7Guk50=
github.com/coder/ssh v0.0.0-20230421140225-04bb837133e1/go.mod h1:ZSS+CUoKHDrqVakTfTWUlKSr9MtMFkC4UvtQKD7O914=
github.com/coder/ssh v0.0.0-20230615124436-fc6e4b009688 h1:udcMVKmo37Jv6Nq+Z2gCsDcF5F6zDvwArRGgUdVFD8s=
github.com/coder/ssh v0.0.0-20230615124436-fc6e4b009688/go.mod h1:aGQbuCLyhRLMzZF067xc84Lh7JDs1FKwCmF1Crl9dxQ=
github.com/coder/tailscale v0.0.0-20230522123520-74712221d00f h1:F0Xr1d8h8dAHn7tab1HXuzYFkcjmCydnEfdMbkOhlVk=
github.com/coder/tailscale v0.0.0-20230522123520-74712221d00f/go.mod h1:jpg+77g19FpXL43U1VoIqoSg1K/Vh5CVxycGldQ8KhA=
github.com/coder/terraform-provider-coder v0.8.2 h1:EPhkdpsNd8fcg6eqpAQr+W1eRrEAMtugoqujoTK4O6o=
@ -786,8 +786,9 @@ github.com/stretchr/testify v1.7.2/go.mod h1:R6va5+xMeoiuVRoj+gSkQ7d3FALtqAAGI1F
github.com/stretchr/testify v1.7.4/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU=
github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU=
github.com/stretchr/testify v1.8.2/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4=
github.com/stretchr/testify v1.8.3 h1:RP3t2pwF7cMEbC1dqtB6poj3niw/9gnV4Cjg5oW5gtY=
github.com/stretchr/testify v1.8.3/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo=
github.com/stretchr/testify v1.8.4 h1:CcVxjf3Q8PM0mHUKJCdn+eZZtm5yQwehR5yeSVQQcUk=
github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo=
github.com/swaggest/assertjson v1.7.0 h1:SKw5Rn0LQs6UvmGrIdaKQbMR1R3ncXm5KNon+QJ7jtw=
github.com/swaggo/files/v2 v2.0.0 h1:hmAt8Dkynw7Ssz46F6pn8ok6YmGZqHSVLZ+HQM7i0kw=
github.com/swaggo/files/v2 v2.0.0/go.mod h1:24kk2Y9NYEJ5lHuCra6iVwkMjIekMCaFq/0JQj66kyM=

View File

@ -769,7 +769,7 @@ func (*Conn) forwardTCPSockOpts(port uint16) []tcpip.SettableSocketOption {
// See: https://github.com/tailscale/tailscale/blob/c7cea825aea39a00aca71ea02bab7266afc03e7c/wgengine/netstack/netstack.go#L888
if port == WorkspaceAgentSSHPort || port == 22 {
opt := tcpip.KeepaliveIdleOption(72 * time.Hour)
opt := tcpip.KeepaliveIdleOption(72*time.Hour + time.Minute) // Default ssh-max-timeout is 72h, so let's add some extra time.
opts = append(opts, &opt)
}