fix(wsconncache): only allow one peer per connection (#5886)

If an agent went away and reconnected, the wsconncache connection would
be polluted for about 10m because there would be two peers with the
same IP. The old peer always had priority, which caused the dashboard to
try and always dial the old peer until it was removed.

Fixes: https://github.com/coder/coder/issues/5292
This commit is contained in:
Colin Adler 2023-01-26 16:23:35 -06:00 committed by GitHub
parent b0a16150a3
commit 52ecd35c8f
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 39 additions and 6 deletions

View File

@ -402,11 +402,6 @@ func (api *API) workspaceAgentListeningPorts(rw http.ResponseWriter, r *http.Req
func (api *API) dialWorkspaceAgentTailnet(r *http.Request, agentID uuid.UUID) (*codersdk.AgentConn, error) {
clientConn, serverConn := net.Pipe()
go func() {
<-r.Context().Done()
_ = clientConn.Close()
_ = serverConn.Close()
}()
derpMap := api.DERPMap.Clone()
for _, region := range derpMap.Regions {
@ -453,7 +448,16 @@ func (api *API) dialWorkspaceAgentTailnet(r *http.Request, agentID uuid.UUID) (*
}
sendNodes, _ := tailnet.ServeCoordinator(clientConn, func(node []*tailnet.Node) error {
return conn.UpdateNodes(node)
err := conn.RemoveAllPeers()
if err != nil {
return xerrors.Errorf("remove all peers: %w", err)
}
err = conn.UpdateNodes(node)
if err != nil {
return xerrors.Errorf("update nodes: %w", err)
}
return nil
})
conn.SetNodeCallback(sendNodes)
go func() {
@ -465,6 +469,10 @@ func (api *API) dialWorkspaceAgentTailnet(r *http.Request, agentID uuid.UUID) (*
}()
return &codersdk.AgentConn{
Conn: conn,
CloseFunc: func() {
_ = clientConn.Close()
_ = serverConn.Close()
},
}, nil
}

View File

@ -325,6 +325,31 @@ func (c *Conn) SetDERPMap(derpMap *tailcfg.DERPMap) {
c.wireguardEngine.SetDERPMap(derpMap)
}
func (c *Conn) RemoveAllPeers() error {
c.mutex.Lock()
defer c.mutex.Unlock()
c.netMap.Peers = []*tailcfg.Node{}
c.peerMap = map[tailcfg.NodeID]*tailcfg.Node{}
netMapCopy := *c.netMap
c.wireguardEngine.SetNetworkMap(&netMapCopy)
cfg, err := nmcfg.WGCfg(c.netMap, Logger(c.logger.Named("wgconfig")), netmap.AllowSingleHosts, "")
if err != nil {
return xerrors.Errorf("update wireguard config: %w", err)
}
err = c.wireguardEngine.Reconfig(cfg, c.wireguardRouter, &dns.Config{}, &tailcfg.Debug{})
if err != nil {
if c.isClosed() {
return nil
}
if errors.Is(err, wgengine.ErrNoChanges) {
return nil
}
return xerrors.Errorf("reconfig: %w", err)
}
return nil
}
// UpdateNodes connects with a set of peers. This can be constantly updated,
// and peers will continually be reconnected as necessary.
func (c *Conn) UpdateNodes(nodes []*Node) error {