fix: cache disconnected agent names in tailnet coordinator debug (#5870)

This commit is contained in:
Colin Adler 2023-01-25 21:23:14 -06:00 committed by GitHub
parent 16d8cc4176
commit dd8eab5675
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 35 additions and 2 deletions

View File

@ -531,6 +531,15 @@ func (api *API) workspaceAgentCoordinate(rw http.ResponseWriter, r *http.Request
return
}
owner, err := api.Database.GetUserByID(ctx, workspace.OwnerID)
if err != nil {
httpapi.Write(ctx, rw, http.StatusBadRequest, codersdk.Response{
Message: "Internal error fetching user.",
Detail: err.Error(),
})
return
}
// Ensure the resource is still valid!
// We only accept agents for resources on the latest build.
ensureLatestBuild := func() error {
@ -628,7 +637,9 @@ func (api *API) workspaceAgentCoordinate(rw http.ResponseWriter, r *http.Request
closeChan := make(chan struct{})
go func() {
defer close(closeChan)
err := (*api.TailnetCoordinator.Load()).ServeAgent(wsNetConn, workspaceAgent.ID, fmt.Sprintf("%s-%s", workspace.Name, workspaceAgent.Name))
err := (*api.TailnetCoordinator.Load()).ServeAgent(wsNetConn, workspaceAgent.ID,
fmt.Sprintf("%s-%s-%s", owner.Username, workspace.Name, workspaceAgent.Name),
)
if err != nil {
api.Logger.Warn(ctx, "tailnet coordinator agent error", slog.Error(err))
_ = conn.Close(websocket.StatusInternalError, err.Error())

1
go.mod
View File

@ -96,6 +96,7 @@ require (
github.com/google/uuid v1.3.0
github.com/hashicorp/go-reap v0.0.0-20170704170343-bf58d8a43e7b
github.com/hashicorp/go-version v1.6.0
github.com/hashicorp/golang-lru/v2 v2.0.1
github.com/hashicorp/hc-install v0.4.1-0.20220912074615-4487b02cbcbb
github.com/hashicorp/hcl/v2 v2.14.0
github.com/hashicorp/terraform-config-inspect v0.0.0-20211115214459-90acf1ca460f

2
go.sum
View File

@ -1020,6 +1020,8 @@ github.com/hashicorp/go.net v0.0.1/go.mod h1:hjKkEWcCURg++eb33jQU7oqQcI9XDCnUzHA
github.com/hashicorp/golang-lru v0.5.0/go.mod h1:/m3WP610KZHVQ1SGc6re/UDhFvYD7pJ4Ao+sR/qLZy8=
github.com/hashicorp/golang-lru v0.5.1/go.mod h1:/m3WP610KZHVQ1SGc6re/UDhFvYD7pJ4Ao+sR/qLZy8=
github.com/hashicorp/golang-lru v0.5.4/go.mod h1:iADmTwqILo4mZ8BN3D2Q6+9jd8WM5uGBxy+E8yxSoD4=
github.com/hashicorp/golang-lru/v2 v2.0.1 h1:5pv5N1lT1fjLg2VQ5KWc7kmucp2x/kvFOnxuVTqZ6x4=
github.com/hashicorp/golang-lru/v2 v2.0.1/go.mod h1:QeFd9opnmA6QUJc5vARoKUSoFhyfM2/ZepoAG6RGpeM=
github.com/hashicorp/hc-install v0.4.1-0.20220912074615-4487b02cbcbb h1:0AmumMAu6gi5zXEyXvLKDu/HALK+rIcVBZU5XJNyjRM=
github.com/hashicorp/hc-install v0.4.1-0.20220912074615-4487b02cbcbb/go.mod h1:b3vG+IG40BBISnWiQb9/nHqZI/N3oiunwTtyTDaMGOA=
github.com/hashicorp/hcl v0.0.0-20170504190234-a4b07c25de5f/go.mod h1:oZtUIOe8dh44I2q6ScRibXws4Ajl+d+nod3AaR9vL5w=

View File

@ -14,6 +14,7 @@ import (
"time"
"github.com/google/uuid"
lru "github.com/hashicorp/golang-lru/v2"
"golang.org/x/exp/slices"
"golang.org/x/xerrors"
"tailscale.com/tailcfg"
@ -109,11 +110,17 @@ func ServeCoordinator(conn net.Conn, updateNodes func(node []*Node) error) (func
// coordinator is incompatible with multiple Coder replicas as all node data is
// in-memory.
func NewCoordinator() Coordinator {
cache, err := lru.New[uuid.UUID, string](512)
if err != nil {
panic("make lru cache: " + err.Error())
}
return &coordinator{
closed: false,
nodes: map[uuid.UUID]*Node{},
agentSockets: map[uuid.UUID]*trackedConn{},
agentToConnectionSockets: map[uuid.UUID]map[uuid.UUID]*trackedConn{},
agentNameCache: cache,
}
}
@ -135,6 +142,10 @@ type coordinator struct {
// agentToConnectionSockets maps agent IDs to connection IDs of conns that
// are subscribed to updates for that agent.
agentToConnectionSockets map[uuid.UUID]map[uuid.UUID]*trackedConn
// agentNameCache holds a cache of agent names. If one of them disappears,
// it's helpful to have a name cached for debugging.
agentNameCache *lru.Cache[uuid.UUID, string]
}
type trackedConn struct {
@ -288,6 +299,8 @@ func (c *coordinator) ServeAgent(conn net.Conn, id uuid.UUID, name string) error
return xerrors.New("coordinator is closed")
}
c.agentNameCache.Add(id, name)
sockets, ok := c.agentToConnectionSockets[id]
if ok {
// Publish all nodes that want to connect to the
@ -532,7 +545,13 @@ func (c *coordinator) ServeHTTPDebug(w http.ResponseWriter, _ *http.Request) {
fmt.Fprintln(w, "<ul>")
for _, agentConns := range missingAgents {
fmt.Fprintf(w, "<li style=\"margin-top:4px\"><b>unknown</b> (<code>%s</code>): created ? ago, write ? ago, overwrites ? </li>\n",
agentName, ok := c.agentNameCache.Get(agentConns.id)
if !ok {
agentName = "unknown"
}
fmt.Fprintf(w, "<li style=\"margin-top:4px\"><b>%s</b> (<code>%s</code>): created ? ago, write ? ago, overwrites ? </li>\n",
agentName,
agentConns.id.String(),
)