fix(tailnet): set TCP keepalive idle to 72 hours for SSH conns (#7196)

This commit is contained in:
Colin Adler 2023-04-18 17:53:11 -05:00 committed by GitHub
parent 57c4de4647
commit fbf329fbb7
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 41 additions and 36 deletions

View File

@ -30,9 +30,9 @@ import (
var WorkspaceAgentIP = netip.MustParseAddr("fd7a:115c:a1e0:49d6:b259:b7ac:b1b2:48f4")
const (
WorkspaceAgentSSHPort = 1
WorkspaceAgentReconnectingPTYPort = 2
WorkspaceAgentSpeedtestPort = 3
WorkspaceAgentSSHPort = tailnet.WorkspaceAgentSSHPort
WorkspaceAgentReconnectingPTYPort = tailnet.WorkspaceAgentReconnectingPTYPort
WorkspaceAgentSpeedtestPort = tailnet.WorkspaceAgentSpeedtestPort
// WorkspaceAgentHTTPAPIServerPort serves a HTTP server with endpoints for e.g.
// gathering agent statistics.
WorkspaceAgentHTTPAPIServerPort = 4

2
go.mod
View File

@ -36,7 +36,7 @@ replace github.com/dlclark/regexp2 => github.com/dlclark/regexp2 v1.7.0
// There are a few minor changes we make to Tailscale that we're slowly upstreaming. Compare here:
// https://github.com/tailscale/tailscale/compare/main...coder:tailscale:main
replace tailscale.com => github.com/coder/tailscale v1.1.1-0.20230411160749-27a458a0ac0a
replace tailscale.com => github.com/coder/tailscale v1.1.1-0.20230418202606-ed9307cf1b22
// Switch to our fork that imports fixes from http://github.com/tailscale/ssh.
// See: https://github.com/coder/coder/issues/3371

4
go.sum
View File

@ -380,8 +380,8 @@ github.com/coder/retry v1.3.1-0.20230210155434-e90a2e1e091d h1:09JG37IgTB6n3ouX9
github.com/coder/retry v1.3.1-0.20230210155434-e90a2e1e091d/go.mod h1:r+1J5i/989wt6CUeNSuvFKKA9hHuKKPMxdzDbTuvwwk=
github.com/coder/ssh v0.0.0-20220811105153-fcea99919338 h1:tN5GKFT68YLVzJoA8AHuiMNJ0qlhoD3pGN3JY9gxSko=
github.com/coder/ssh v0.0.0-20220811105153-fcea99919338/go.mod h1:ZSS+CUoKHDrqVakTfTWUlKSr9MtMFkC4UvtQKD7O914=
github.com/coder/tailscale v1.1.1-0.20230411160749-27a458a0ac0a h1:kgfkNHT0yiDAfs5AKwxICqsFWeiHD/pR+bd0w20LXYI=
github.com/coder/tailscale v1.1.1-0.20230411160749-27a458a0ac0a/go.mod h1:jpg+77g19FpXL43U1VoIqoSg1K/Vh5CVxycGldQ8KhA=
github.com/coder/tailscale v1.1.1-0.20230418202606-ed9307cf1b22 h1:bvGOqnI0ITbwOZFQ0SZ4MBw/8LLUEjxmNu57XEujrfQ=
github.com/coder/tailscale v1.1.1-0.20230418202606-ed9307cf1b22/go.mod h1:jpg+77g19FpXL43U1VoIqoSg1K/Vh5CVxycGldQ8KhA=
github.com/coder/terraform-provider-coder v0.6.23 h1:O2Rcj0umez4DfVdGnKZi63z1Xzxd0IQOn9VQDB8YU8g=
github.com/coder/terraform-provider-coder v0.6.23/go.mod h1:UIfU3bYNeSzJJvHyJ30tEKjD6Z9utloI+HUM/7n94CY=
github.com/coder/wgtunnel v0.1.5 h1:WP3sCj/3iJ34eKvpMQEp1oJHvm24RYh0NHbj1kfUKfs=

View File

@ -17,6 +17,7 @@ import (
"github.com/google/uuid"
"go4.org/netipx"
"golang.org/x/xerrors"
"gvisor.dev/gvisor/pkg/tcpip"
"gvisor.dev/gvisor/pkg/tcpip/adapters/gonet"
"tailscale.com/hostinfo"
"tailscale.com/ipn/ipnstate"
@ -44,6 +45,12 @@ import (
"github.com/coder/coder/cryptorand"
)
const (
WorkspaceAgentSSHPort = 1
WorkspaceAgentReconnectingPTYPort = 2
WorkspaceAgentSpeedtestPort = 3
)
func init() {
// Globally disable network namespacing. All networking happens in
// userspace.
@ -267,6 +274,7 @@ func NewConn(options *Options) (conn *Conn, err error) {
server.sendNode()
})
netStack.ForwardTCPIn = server.forwardTCP
netStack.ForwardTCPSockOpts = server.forwardTCPSockOpts
err = netStack.Start(nil)
if err != nil {
@ -301,17 +309,16 @@ type Conn struct {
logger slog.Logger
blockEndpoints bool
dialer *tsdial.Dialer
tunDevice *tstun.Wrapper
peerMap map[tailcfg.NodeID]*tailcfg.Node
netMap *netmap.NetworkMap
netStack *netstack.Impl
magicConn *magicsock.Conn
wireguardMonitor *monitor.Mon
wireguardRouter *router.Config
wireguardEngine wgengine.Engine
listeners map[listenKey]*listener
forwardTCPCallback func(conn net.Conn, listenerExists bool) net.Conn
dialer *tsdial.Dialer
tunDevice *tstun.Wrapper
peerMap map[tailcfg.NodeID]*tailcfg.Node
netMap *netmap.NetworkMap
netStack *netstack.Impl
magicConn *magicsock.Conn
wireguardMonitor *monitor.Mon
wireguardRouter *router.Config
wireguardEngine wgengine.Engine
listeners map[listenKey]*listener
lastMutex sync.Mutex
nodeSending bool
@ -327,17 +334,6 @@ type Conn struct {
trafficStats *connstats.Statistics
}
// SetForwardTCPCallback is called every time a TCP connection is initiated inbound.
// listenerExists is true if a listener is registered for the target port. If there
// isn't one, traffic is forwarded to the local listening port.
//
// This allows wrapping a Conn to track reads and writes.
func (c *Conn) SetForwardTCPCallback(callback func(conn net.Conn, listenerExists bool) net.Conn) {
c.mutex.Lock()
defer c.mutex.Unlock()
c.forwardTCPCallback = callback
}
func (c *Conn) SetNodeCallback(callback func(node *Node)) {
c.lastMutex.Lock()
c.nodeCallback = callback
@ -699,12 +695,11 @@ func (c *Conn) selfNode() *Node {
// This and below is taken _mostly_ verbatim from Tailscale:
// https://github.com/tailscale/tailscale/blob/c88bd53b1b7b2fcf7ba302f2e53dd1ce8c32dad4/tsnet/tsnet.go#L459-L494
// Listen announces only on the Tailscale network.
// It will start the server if it has not been started yet.
// Listen listens for connections only on the Tailscale network.
func (c *Conn) Listen(network, addr string) (net.Listener, error) {
host, port, err := net.SplitHostPort(addr)
if err != nil {
return nil, xerrors.Errorf("wgnet: %w", err)
return nil, xerrors.Errorf("tailnet: split host port for listen: %w", err)
}
lk := listenKey{network, host, port}
ln := &listener{
@ -725,7 +720,7 @@ func (c *Conn) Listen(network, addr string) (net.Listener, error) {
}
if _, ok := c.listeners[lk]; ok {
c.mutex.Unlock()
return nil, xerrors.Errorf("wgnet: listener already open for %s, %s", network, addr)
return nil, xerrors.Errorf("tailnet: listener already open for %s, %s", network, addr)
}
c.listeners[lk] = ln
c.mutex.Unlock()
@ -743,14 +738,12 @@ func (c *Conn) DialContextUDP(ctx context.Context, ipp netip.AddrPort) (*gonet.U
func (c *Conn) forwardTCP(conn net.Conn, port uint16) {
c.mutex.Lock()
ln, ok := c.listeners[listenKey{"tcp", "", fmt.Sprint(port)}]
if c.forwardTCPCallback != nil {
conn = c.forwardTCPCallback(conn, ok)
}
c.mutex.Unlock()
if !ok {
c.forwardTCPToLocal(conn, port)
return
}
t := time.NewTimer(time.Second)
defer t.Stop()
select {
@ -763,6 +756,18 @@ func (c *Conn) forwardTCP(conn net.Conn, port uint16) {
_ = conn.Close()
}
func (*Conn) forwardTCPSockOpts(port uint16) []tcpip.SettableSocketOption {
opts := []tcpip.SettableSocketOption{}
// See: https://github.com/tailscale/tailscale/blob/c7cea825aea39a00aca71ea02bab7266afc03e7c/wgengine/netstack/netstack.go#L888
if port == WorkspaceAgentSSHPort || port == 22 {
opt := tcpip.KeepaliveIdleOption(72 * time.Hour)
opts = append(opts, &opt)
}
return opts
}
func (c *Conn) forwardTCPToLocal(conn net.Conn, port uint16) {
defer conn.Close()
dialAddrStr := net.JoinHostPort("127.0.0.1", strconv.Itoa(int(port)))
@ -842,7 +847,7 @@ func (ln *listener) Accept() (net.Conn, error) {
select {
case c = <-ln.conn:
case <-ln.closed:
return nil, xerrors.Errorf("wgnet: %w", net.ErrClosed)
return nil, xerrors.Errorf("tailnet: %w", net.ErrClosed)
}
return c, nil
}