mirror of https://github.com/coder/coder.git
feat: add debug server for tailnet coordinators (#5861)
Implements a Tailscale-like debug server for our in-memory coordinator. This should provide some visibility into why connections could be failing. Resolves: https://github.com/coder/coder/issues/5845 ![image](https://user-images.githubusercontent.com/6332295/214680832-2724d633-2d54-44d6-a7ce-5841e5824ee5.png)
This commit is contained in:
parent
8830ddfd56
commit
1cd5f38cb0
|
@ -1193,7 +1193,7 @@ func (c *client) ListenWorkspaceAgent(_ context.Context) (net.Conn, error) {
|
||||||
}
|
}
|
||||||
c.t.Cleanup(c.lastWorkspaceAgent)
|
c.t.Cleanup(c.lastWorkspaceAgent)
|
||||||
go func() {
|
go func() {
|
||||||
_ = c.coordinator.ServeAgent(serverConn, c.agentID)
|
_ = c.coordinator.ServeAgent(serverConn, c.agentID, "")
|
||||||
close(closed)
|
close(closed)
|
||||||
}()
|
}()
|
||||||
return clientConn, nil
|
return clientConn, nil
|
||||||
|
|
|
@ -362,6 +362,28 @@ const docTemplate = `{
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
"/debug/coordinator": {
|
||||||
|
"get": {
|
||||||
|
"security": [
|
||||||
|
{
|
||||||
|
"CoderSessionToken": []
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"produces": [
|
||||||
|
"text/html"
|
||||||
|
],
|
||||||
|
"tags": [
|
||||||
|
"Debug"
|
||||||
|
],
|
||||||
|
"summary": "Debug Info Wireguard Coordinator",
|
||||||
|
"operationId": "debug-info-wireguard-coordinator",
|
||||||
|
"responses": {
|
||||||
|
"200": {
|
||||||
|
"description": "OK"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
"/entitlements": {
|
"/entitlements": {
|
||||||
"get": {
|
"get": {
|
||||||
"security": [
|
"security": [
|
||||||
|
|
|
@ -308,6 +308,24 @@
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
"/debug/coordinator": {
|
||||||
|
"get": {
|
||||||
|
"security": [
|
||||||
|
{
|
||||||
|
"CoderSessionToken": []
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"produces": ["text/html"],
|
||||||
|
"tags": ["Debug"],
|
||||||
|
"summary": "Debug Info Wireguard Coordinator",
|
||||||
|
"operationId": "debug-info-wireguard-coordinator",
|
||||||
|
"responses": {
|
||||||
|
"200": {
|
||||||
|
"description": "OK"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
"/entitlements": {
|
"/entitlements": {
|
||||||
"get": {
|
"get": {
|
||||||
"security": [
|
"security": [
|
||||||
|
|
|
@ -613,6 +613,25 @@ func New(options *Options) *API {
|
||||||
r.Get("/", api.workspaceApplicationAuth)
|
r.Get("/", api.workspaceApplicationAuth)
|
||||||
})
|
})
|
||||||
})
|
})
|
||||||
|
|
||||||
|
r.Route("/debug", func(r chi.Router) {
|
||||||
|
r.Use(
|
||||||
|
apiKeyMiddleware,
|
||||||
|
// Ensure only owners can access debug endpoints.
|
||||||
|
func(next http.Handler) http.Handler {
|
||||||
|
return http.HandlerFunc(func(rw http.ResponseWriter, r *http.Request) {
|
||||||
|
if !api.Authorize(r, rbac.ActionRead, rbac.ResourceDebugInfo) {
|
||||||
|
httpapi.ResourceNotFound(rw)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
next.ServeHTTP(rw, r)
|
||||||
|
})
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
|
r.Get("/coordinator", api.debugCoordinator)
|
||||||
|
})
|
||||||
})
|
})
|
||||||
|
|
||||||
if options.SwaggerEndpoint {
|
if options.SwaggerEndpoint {
|
||||||
|
|
|
@ -272,6 +272,11 @@ func AGPLRoutes(a *AuthTester) (map[string]string, map[string]RouteCheck) {
|
||||||
AssertAction: rbac.ActionRead,
|
AssertAction: rbac.ActionRead,
|
||||||
AssertObject: rbac.ResourceTemplate,
|
AssertObject: rbac.ResourceTemplate,
|
||||||
},
|
},
|
||||||
|
|
||||||
|
"GET:/api/v2/debug/coordinator": {
|
||||||
|
AssertAction: rbac.ActionRead,
|
||||||
|
AssertObject: rbac.ResourceDebugInfo,
|
||||||
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
// Routes like proxy routes support all HTTP methods. A helper func to expand
|
// Routes like proxy routes support all HTTP methods. A helper func to expand
|
||||||
|
|
|
@ -327,7 +327,7 @@ func assertAccept(t *testing.T, comment SwaggerComment) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
var allowedProduceTypes = []string{"json", "text/event-stream"}
|
var allowedProduceTypes = []string{"json", "text/event-stream", "text/html"}
|
||||||
|
|
||||||
func assertProduce(t *testing.T, comment SwaggerComment) {
|
func assertProduce(t *testing.T, comment SwaggerComment) {
|
||||||
var hasResponseModel bool
|
var hasResponseModel bool
|
||||||
|
@ -344,7 +344,8 @@ func assertProduce(t *testing.T, comment SwaggerComment) {
|
||||||
} else {
|
} else {
|
||||||
if (comment.router == "/workspaceagents/me/app-health" && comment.method == "post") ||
|
if (comment.router == "/workspaceagents/me/app-health" && comment.method == "post") ||
|
||||||
(comment.router == "/workspaceagents/me/version" && comment.method == "post") ||
|
(comment.router == "/workspaceagents/me/version" && comment.method == "post") ||
|
||||||
(comment.router == "/licenses/{id}" && comment.method == "delete") {
|
(comment.router == "/licenses/{id}" && comment.method == "delete") ||
|
||||||
|
(comment.router == "/debug/coordinator" && comment.method == "get") {
|
||||||
return // Exception: HTTP 200 is returned without response entity
|
return // Exception: HTTP 200 is returned without response entity
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,14 @@
|
||||||
|
package coderd
|
||||||
|
|
||||||
|
import "net/http"
|
||||||
|
|
||||||
|
// @Summary Debug Info Wireguard Coordinator
|
||||||
|
// @ID debug-info-wireguard-coordinator
|
||||||
|
// @Security CoderSessionToken
|
||||||
|
// @Produce text/html
|
||||||
|
// @Tags Debug
|
||||||
|
// @Success 200
|
||||||
|
// @Router /debug/coordinator [get]
|
||||||
|
func (api *API) debugCoordinator(rw http.ResponseWriter, r *http.Request) {
|
||||||
|
(*api.TailnetCoordinator.Load()).ServeHTTPDebug(rw, r)
|
||||||
|
}
|
|
@ -150,6 +150,11 @@ var (
|
||||||
ResourceReplicas = Object{
|
ResourceReplicas = Object{
|
||||||
Type: "replicas",
|
Type: "replicas",
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// ResourceDebugInfo controls access to the debug routes `/api/v2/debug/*`.
|
||||||
|
ResourceDebugInfo = Object{
|
||||||
|
Type: "debug_info",
|
||||||
|
}
|
||||||
)
|
)
|
||||||
|
|
||||||
// Object is used to create objects for authz checks when you have none in
|
// Object is used to create objects for authz checks when you have none in
|
||||||
|
|
|
@ -521,6 +521,16 @@ func (api *API) workspaceAgentCoordinate(rw http.ResponseWriter, r *http.Request
|
||||||
})
|
})
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
|
workspace, err := api.Database.GetWorkspaceByID(ctx, build.WorkspaceID)
|
||||||
|
if err != nil {
|
||||||
|
httpapi.Write(ctx, rw, http.StatusBadRequest, codersdk.Response{
|
||||||
|
Message: "Internal error fetching workspace.",
|
||||||
|
Detail: err.Error(),
|
||||||
|
})
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
// Ensure the resource is still valid!
|
// Ensure the resource is still valid!
|
||||||
// We only accept agents for resources on the latest build.
|
// We only accept agents for resources on the latest build.
|
||||||
ensureLatestBuild := func() error {
|
ensureLatestBuild := func() error {
|
||||||
|
@ -618,7 +628,7 @@ func (api *API) workspaceAgentCoordinate(rw http.ResponseWriter, r *http.Request
|
||||||
closeChan := make(chan struct{})
|
closeChan := make(chan struct{})
|
||||||
go func() {
|
go func() {
|
||||||
defer close(closeChan)
|
defer close(closeChan)
|
||||||
err := (*api.TailnetCoordinator.Load()).ServeAgent(wsNetConn, workspaceAgent.ID)
|
err := (*api.TailnetCoordinator.Load()).ServeAgent(wsNetConn, workspaceAgent.ID, fmt.Sprintf("%s-%s", workspace.Name, workspaceAgent.Name))
|
||||||
if err != nil {
|
if err != nil {
|
||||||
api.Logger.Warn(ctx, "tailnet coordinator agent error", slog.Error(err))
|
api.Logger.Warn(ctx, "tailnet coordinator agent error", slog.Error(err))
|
||||||
_ = conn.Close(websocket.StatusInternalError, err.Error())
|
_ = conn.Close(websocket.StatusInternalError, err.Error())
|
||||||
|
|
|
@ -207,7 +207,7 @@ func (c *client) ListenWorkspaceAgent(_ context.Context) (net.Conn, error) {
|
||||||
<-closed
|
<-closed
|
||||||
})
|
})
|
||||||
go func() {
|
go func() {
|
||||||
_ = c.coordinator.ServeAgent(serverConn, c.agentID)
|
_ = c.coordinator.ServeAgent(serverConn, c.agentID, "")
|
||||||
close(closed)
|
close(closed)
|
||||||
}()
|
}()
|
||||||
return clientConn, nil
|
return clientConn, nil
|
||||||
|
|
|
@ -0,0 +1,21 @@
|
||||||
|
# Debug
|
||||||
|
|
||||||
|
## Debug Info Wireguard Coordinator
|
||||||
|
|
||||||
|
### Code samples
|
||||||
|
|
||||||
|
```shell
|
||||||
|
# Example request using curl
|
||||||
|
curl -X GET http://coder-server:8080/api/v2/debug/coordinator \
|
||||||
|
-H 'Coder-Session-Token: API_KEY'
|
||||||
|
```
|
||||||
|
|
||||||
|
`GET /debug/coordinator`
|
||||||
|
|
||||||
|
### Responses
|
||||||
|
|
||||||
|
| Status | Meaning | Description | Schema |
|
||||||
|
| ------ | ------------------------------------------------------- | ----------- | ------ |
|
||||||
|
| 200 | [OK](https://tools.ietf.org/html/rfc7231#section-6.3.1) | OK | |
|
||||||
|
|
||||||
|
To perform this operation, you must be authenticated. [Learn more](authentication.md).
|
|
@ -364,6 +364,10 @@
|
||||||
"title": "Builds",
|
"title": "Builds",
|
||||||
"path": "./api/builds.md"
|
"path": "./api/builds.md"
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
"title": "Debug",
|
||||||
|
"path": "./api/debug.md"
|
||||||
|
},
|
||||||
{
|
{
|
||||||
"title": "Enterprise",
|
"title": "Enterprise",
|
||||||
"path": "./api/enterprise.md"
|
"path": "./api/enterprise.md"
|
||||||
|
|
|
@ -5,8 +5,10 @@ import (
|
||||||
"context"
|
"context"
|
||||||
"encoding/json"
|
"encoding/json"
|
||||||
"errors"
|
"errors"
|
||||||
|
"fmt"
|
||||||
"io"
|
"io"
|
||||||
"net"
|
"net"
|
||||||
|
"net/http"
|
||||||
"sync"
|
"sync"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
|
@ -174,7 +176,7 @@ func (c *haCoordinator) handleNextClientMessage(id, agent uuid.UUID, decoder *js
|
||||||
|
|
||||||
// ServeAgent accepts a WebSocket connection to an agent that listens to
|
// ServeAgent accepts a WebSocket connection to an agent that listens to
|
||||||
// incoming connections and publishes node updates.
|
// incoming connections and publishes node updates.
|
||||||
func (c *haCoordinator) ServeAgent(conn net.Conn, id uuid.UUID) error {
|
func (c *haCoordinator) ServeAgent(conn net.Conn, id uuid.UUID, _ string) error {
|
||||||
// Tell clients on other instances to send a callmemaybe to us.
|
// Tell clients on other instances to send a callmemaybe to us.
|
||||||
err := c.publishAgentHello(id)
|
err := c.publishAgentHello(id)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
@ -573,3 +575,9 @@ func (c *haCoordinator) formatAgentUpdate(id uuid.UUID, node *agpl.Node) ([]byte
|
||||||
|
|
||||||
return buf.Bytes(), nil
|
return buf.Bytes(), nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (*haCoordinator) ServeHTTPDebug(w http.ResponseWriter, _ *http.Request) {
|
||||||
|
w.Header().Set("Content-Type", "text/html; charset=utf-8")
|
||||||
|
fmt.Fprintf(w, "<h1>coordinator</h1>")
|
||||||
|
fmt.Fprintf(w, "<h2>ha debug coming soon</h2>")
|
||||||
|
}
|
||||||
|
|
|
@ -60,7 +60,7 @@ func TestCoordinatorSingle(t *testing.T) {
|
||||||
id := uuid.New()
|
id := uuid.New()
|
||||||
closeChan := make(chan struct{})
|
closeChan := make(chan struct{})
|
||||||
go func() {
|
go func() {
|
||||||
err := coordinator.ServeAgent(server, id)
|
err := coordinator.ServeAgent(server, id, "")
|
||||||
assert.NoError(t, err)
|
assert.NoError(t, err)
|
||||||
close(closeChan)
|
close(closeChan)
|
||||||
}()
|
}()
|
||||||
|
@ -91,7 +91,7 @@ func TestCoordinatorSingle(t *testing.T) {
|
||||||
agentID := uuid.New()
|
agentID := uuid.New()
|
||||||
closeAgentChan := make(chan struct{})
|
closeAgentChan := make(chan struct{})
|
||||||
go func() {
|
go func() {
|
||||||
err := coordinator.ServeAgent(agentServerWS, agentID)
|
err := coordinator.ServeAgent(agentServerWS, agentID, "")
|
||||||
assert.NoError(t, err)
|
assert.NoError(t, err)
|
||||||
close(closeAgentChan)
|
close(closeAgentChan)
|
||||||
}()
|
}()
|
||||||
|
@ -142,7 +142,7 @@ func TestCoordinatorSingle(t *testing.T) {
|
||||||
})
|
})
|
||||||
closeAgentChan = make(chan struct{})
|
closeAgentChan = make(chan struct{})
|
||||||
go func() {
|
go func() {
|
||||||
err := coordinator.ServeAgent(agentServerWS, agentID)
|
err := coordinator.ServeAgent(agentServerWS, agentID, "")
|
||||||
assert.NoError(t, err)
|
assert.NoError(t, err)
|
||||||
close(closeAgentChan)
|
close(closeAgentChan)
|
||||||
}()
|
}()
|
||||||
|
@ -184,7 +184,7 @@ func TestCoordinatorHA(t *testing.T) {
|
||||||
agentID := uuid.New()
|
agentID := uuid.New()
|
||||||
closeAgentChan := make(chan struct{})
|
closeAgentChan := make(chan struct{})
|
||||||
go func() {
|
go func() {
|
||||||
err := coordinator1.ServeAgent(agentServerWS, agentID)
|
err := coordinator1.ServeAgent(agentServerWS, agentID, "")
|
||||||
assert.NoError(t, err)
|
assert.NoError(t, err)
|
||||||
close(closeAgentChan)
|
close(closeAgentChan)
|
||||||
}()
|
}()
|
||||||
|
@ -240,7 +240,7 @@ func TestCoordinatorHA(t *testing.T) {
|
||||||
})
|
})
|
||||||
closeAgentChan = make(chan struct{})
|
closeAgentChan = make(chan struct{})
|
||||||
go func() {
|
go func() {
|
||||||
err := coordinator1.ServeAgent(agentServerWS, agentID)
|
err := coordinator1.ServeAgent(agentServerWS, agentID, "")
|
||||||
assert.NoError(t, err)
|
assert.NoError(t, err)
|
||||||
close(closeAgentChan)
|
close(closeAgentChan)
|
||||||
}()
|
}()
|
||||||
|
|
|
@ -4,10 +4,13 @@ import (
|
||||||
"context"
|
"context"
|
||||||
"encoding/json"
|
"encoding/json"
|
||||||
"errors"
|
"errors"
|
||||||
|
"fmt"
|
||||||
"io"
|
"io"
|
||||||
"net"
|
"net"
|
||||||
|
"net/http"
|
||||||
"net/netip"
|
"net/netip"
|
||||||
"sync"
|
"sync"
|
||||||
|
"sync/atomic"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
"github.com/google/uuid"
|
"github.com/google/uuid"
|
||||||
|
@ -22,6 +25,9 @@ import (
|
||||||
// └──────────────────┘ └────────────────────┘ └───────────────────┘ └──────────────────┘
|
// └──────────────────┘ └────────────────────┘ └───────────────────┘ └──────────────────┘
|
||||||
// Coordinators have different guarantees for HA support.
|
// Coordinators have different guarantees for HA support.
|
||||||
type Coordinator interface {
|
type Coordinator interface {
|
||||||
|
// ServeHTTPDebug serves a debug webpage that shows the internal state of
|
||||||
|
// the coordinator.
|
||||||
|
ServeHTTPDebug(w http.ResponseWriter, r *http.Request)
|
||||||
// Node returns an in-memory node by ID.
|
// Node returns an in-memory node by ID.
|
||||||
Node(id uuid.UUID) *Node
|
Node(id uuid.UUID) *Node
|
||||||
// ServeClient accepts a WebSocket connection that wants to connect to an agent
|
// ServeClient accepts a WebSocket connection that wants to connect to an agent
|
||||||
|
@ -29,7 +35,8 @@ type Coordinator interface {
|
||||||
ServeClient(conn net.Conn, id uuid.UUID, agent uuid.UUID) error
|
ServeClient(conn net.Conn, id uuid.UUID, agent uuid.UUID) error
|
||||||
// ServeAgent accepts a WebSocket connection to an agent that listens to
|
// ServeAgent accepts a WebSocket connection to an agent that listens to
|
||||||
// incoming connections and publishes node updates.
|
// incoming connections and publishes node updates.
|
||||||
ServeAgent(conn net.Conn, id uuid.UUID) error
|
// Name is just used for debug information. It can be left blank.
|
||||||
|
ServeAgent(conn net.Conn, id uuid.UUID, name string) error
|
||||||
// Close closes the coordinator.
|
// Close closes the coordinator.
|
||||||
Close() error
|
Close() error
|
||||||
}
|
}
|
||||||
|
@ -104,8 +111,8 @@ func NewCoordinator() Coordinator {
|
||||||
return &coordinator{
|
return &coordinator{
|
||||||
closed: false,
|
closed: false,
|
||||||
nodes: map[uuid.UUID]*Node{},
|
nodes: map[uuid.UUID]*Node{},
|
||||||
agentSockets: map[uuid.UUID]idConn{},
|
agentSockets: map[uuid.UUID]*trackedConn{},
|
||||||
agentToConnectionSockets: map[uuid.UUID]map[uuid.UUID]net.Conn{},
|
agentToConnectionSockets: map[uuid.UUID]map[uuid.UUID]*trackedConn{},
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -117,23 +124,34 @@ func NewCoordinator() Coordinator {
|
||||||
// This coordinator is incompatible with multiple Coder
|
// This coordinator is incompatible with multiple Coder
|
||||||
// replicas as all node data is in-memory.
|
// replicas as all node data is in-memory.
|
||||||
type coordinator struct {
|
type coordinator struct {
|
||||||
mutex sync.Mutex
|
mutex sync.RWMutex
|
||||||
closed bool
|
closed bool
|
||||||
|
|
||||||
// nodes maps agent and connection IDs their respective node.
|
// nodes maps agent and connection IDs their respective node.
|
||||||
nodes map[uuid.UUID]*Node
|
nodes map[uuid.UUID]*Node
|
||||||
// agentSockets maps agent IDs to their open websocket.
|
// agentSockets maps agent IDs to their open websocket.
|
||||||
agentSockets map[uuid.UUID]idConn
|
agentSockets map[uuid.UUID]*trackedConn
|
||||||
// agentToConnectionSockets maps agent IDs to connection IDs of conns that
|
// agentToConnectionSockets maps agent IDs to connection IDs of conns that
|
||||||
// are subscribed to updates for that agent.
|
// are subscribed to updates for that agent.
|
||||||
agentToConnectionSockets map[uuid.UUID]map[uuid.UUID]net.Conn
|
agentToConnectionSockets map[uuid.UUID]map[uuid.UUID]*trackedConn
|
||||||
}
|
}
|
||||||
|
|
||||||
type idConn struct {
|
type trackedConn struct {
|
||||||
|
net.Conn
|
||||||
|
|
||||||
// id is an ephemeral UUID used to uniquely identify the owner of the
|
// id is an ephemeral UUID used to uniquely identify the owner of the
|
||||||
// connection.
|
// connection.
|
||||||
id uuid.UUID
|
id uuid.UUID
|
||||||
conn net.Conn
|
|
||||||
|
name string
|
||||||
|
start int64
|
||||||
|
lastWrite int64
|
||||||
|
overwrites int64
|
||||||
|
}
|
||||||
|
|
||||||
|
func (t *trackedConn) Write(b []byte) (n int, err error) {
|
||||||
|
atomic.StoreInt64(&t.lastWrite, time.Now().Unix())
|
||||||
|
return t.Conn.Write(b)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Node returns an in-memory node by ID.
|
// Node returns an in-memory node by ID.
|
||||||
|
@ -182,12 +200,18 @@ func (c *coordinator) ServeClient(conn net.Conn, id uuid.UUID, agent uuid.UUID)
|
||||||
c.mutex.Lock()
|
c.mutex.Lock()
|
||||||
connectionSockets, ok := c.agentToConnectionSockets[agent]
|
connectionSockets, ok := c.agentToConnectionSockets[agent]
|
||||||
if !ok {
|
if !ok {
|
||||||
connectionSockets = map[uuid.UUID]net.Conn{}
|
connectionSockets = map[uuid.UUID]*trackedConn{}
|
||||||
c.agentToConnectionSockets[agent] = connectionSockets
|
c.agentToConnectionSockets[agent] = connectionSockets
|
||||||
}
|
}
|
||||||
|
|
||||||
|
now := time.Now().Unix()
|
||||||
// Insert this connection into a map so the agent
|
// Insert this connection into a map so the agent
|
||||||
// can publish node updates.
|
// can publish node updates.
|
||||||
connectionSockets[id] = conn
|
connectionSockets[id] = &trackedConn{
|
||||||
|
Conn: conn,
|
||||||
|
start: now,
|
||||||
|
lastWrite: now,
|
||||||
|
}
|
||||||
c.mutex.Unlock()
|
c.mutex.Unlock()
|
||||||
defer func() {
|
defer func() {
|
||||||
c.mutex.Lock()
|
c.mutex.Lock()
|
||||||
|
@ -243,7 +267,7 @@ func (c *coordinator) handleNextClientMessage(id, agent uuid.UUID, decoder *json
|
||||||
return xerrors.Errorf("marshal nodes: %w", err)
|
return xerrors.Errorf("marshal nodes: %w", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
_, err = agentSocket.conn.Write(data)
|
_, err = agentSocket.Write(data)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
if errors.Is(err, io.EOF) || errors.Is(err, io.ErrClosedPipe) || errors.Is(err, context.Canceled) {
|
if errors.Is(err, io.EOF) || errors.Is(err, io.ErrClosedPipe) || errors.Is(err, context.Canceled) {
|
||||||
return nil
|
return nil
|
||||||
|
@ -256,7 +280,7 @@ func (c *coordinator) handleNextClientMessage(id, agent uuid.UUID, decoder *json
|
||||||
|
|
||||||
// ServeAgent accepts a WebSocket connection to an agent that
|
// ServeAgent accepts a WebSocket connection to an agent that
|
||||||
// listens to incoming connections and publishes node updates.
|
// listens to incoming connections and publishes node updates.
|
||||||
func (c *coordinator) ServeAgent(conn net.Conn, id uuid.UUID) error {
|
func (c *coordinator) ServeAgent(conn net.Conn, id uuid.UUID, name string) error {
|
||||||
c.mutex.Lock()
|
c.mutex.Lock()
|
||||||
if c.closed {
|
if c.closed {
|
||||||
c.mutex.Unlock()
|
c.mutex.Unlock()
|
||||||
|
@ -289,6 +313,8 @@ func (c *coordinator) ServeAgent(conn net.Conn, id uuid.UUID) error {
|
||||||
|
|
||||||
// This uniquely identifies a connection that belongs to this goroutine.
|
// This uniquely identifies a connection that belongs to this goroutine.
|
||||||
unique := uuid.New()
|
unique := uuid.New()
|
||||||
|
now := time.Now().Unix()
|
||||||
|
overwrites := int64(0)
|
||||||
|
|
||||||
// If an old agent socket is connected, we close it to avoid any leaks. This
|
// If an old agent socket is connected, we close it to avoid any leaks. This
|
||||||
// shouldn't ever occur because we expect one agent to be running, but it's
|
// shouldn't ever occur because we expect one agent to be running, but it's
|
||||||
|
@ -297,11 +323,17 @@ func (c *coordinator) ServeAgent(conn net.Conn, id uuid.UUID) error {
|
||||||
// dead.
|
// dead.
|
||||||
oldAgentSocket, ok := c.agentSockets[id]
|
oldAgentSocket, ok := c.agentSockets[id]
|
||||||
if ok {
|
if ok {
|
||||||
_ = oldAgentSocket.conn.Close()
|
overwrites = oldAgentSocket.overwrites + 1
|
||||||
|
_ = oldAgentSocket.Close()
|
||||||
}
|
}
|
||||||
c.agentSockets[id] = idConn{
|
c.agentSockets[id] = &trackedConn{
|
||||||
id: unique,
|
id: unique,
|
||||||
conn: conn,
|
Conn: conn,
|
||||||
|
|
||||||
|
name: name,
|
||||||
|
start: now,
|
||||||
|
lastWrite: now,
|
||||||
|
overwrites: overwrites,
|
||||||
}
|
}
|
||||||
|
|
||||||
c.mutex.Unlock()
|
c.mutex.Unlock()
|
||||||
|
@ -311,7 +343,7 @@ func (c *coordinator) ServeAgent(conn net.Conn, id uuid.UUID) error {
|
||||||
|
|
||||||
// Only delete the connection if it's ours. It could have been
|
// Only delete the connection if it's ours. It could have been
|
||||||
// overwritten.
|
// overwritten.
|
||||||
if idConn := c.agentSockets[id]; idConn.id == unique {
|
if idConn, ok := c.agentSockets[id]; ok && idConn.id == unique {
|
||||||
delete(c.agentSockets, id)
|
delete(c.agentSockets, id)
|
||||||
delete(c.nodes, id)
|
delete(c.nodes, id)
|
||||||
}
|
}
|
||||||
|
@ -382,7 +414,7 @@ func (c *coordinator) Close() error {
|
||||||
for _, socket := range c.agentSockets {
|
for _, socket := range c.agentSockets {
|
||||||
socket := socket
|
socket := socket
|
||||||
go func() {
|
go func() {
|
||||||
_ = socket.conn.Close()
|
_ = socket.Close()
|
||||||
wg.Done()
|
wg.Done()
|
||||||
}()
|
}()
|
||||||
}
|
}
|
||||||
|
@ -403,3 +435,71 @@ func (c *coordinator) Close() error {
|
||||||
wg.Wait()
|
wg.Wait()
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (c *coordinator) ServeHTTPDebug(w http.ResponseWriter, _ *http.Request) {
|
||||||
|
w.Header().Set("Content-Type", "text/html; charset=utf-8")
|
||||||
|
now := time.Now()
|
||||||
|
|
||||||
|
c.mutex.RLock()
|
||||||
|
defer c.mutex.RUnlock()
|
||||||
|
|
||||||
|
fmt.Fprintln(w, "<h1>in-memory wireguard coordinator debug</h1>")
|
||||||
|
fmt.Fprintf(w, "<h2 id=agents><a href=#agents>#</a> agents: total %d</h2>\n", len(c.agentSockets))
|
||||||
|
fmt.Fprintln(w, "<ul>")
|
||||||
|
for id, conn := range c.agentSockets {
|
||||||
|
fmt.Fprintf(w, "<li><b>%s</b> (%s): created %v ago, write %v ago, overwrites %d </li>\n",
|
||||||
|
conn.name,
|
||||||
|
id.String(),
|
||||||
|
now.Sub(time.Unix(conn.start, 0)).Round(time.Second),
|
||||||
|
now.Sub(time.Unix(conn.lastWrite, 0)).Round(time.Second),
|
||||||
|
conn.overwrites,
|
||||||
|
)
|
||||||
|
|
||||||
|
if connCount := len(c.agentToConnectionSockets[id]); connCount > 0 {
|
||||||
|
fmt.Fprintf(w, "<h3>connections: total %d</h3>\n", connCount)
|
||||||
|
fmt.Fprintln(w, "<ul>")
|
||||||
|
for id, conn := range c.agentToConnectionSockets[id] {
|
||||||
|
fmt.Fprintf(w, "<li><b>%s</b> (%s): created %v ago, write %v ago </li>\n",
|
||||||
|
conn.name,
|
||||||
|
id.String(),
|
||||||
|
now.Sub(time.Unix(conn.start, 0)).Round(time.Second),
|
||||||
|
now.Sub(time.Unix(conn.lastWrite, 0)).Round(time.Second),
|
||||||
|
)
|
||||||
|
}
|
||||||
|
fmt.Fprintln(w, "</ul>")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
fmt.Fprintln(w, "</ul>")
|
||||||
|
|
||||||
|
missingAgents := map[uuid.UUID]map[uuid.UUID]*trackedConn{}
|
||||||
|
for agentID, conns := range c.agentToConnectionSockets {
|
||||||
|
if len(conns) == 0 {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
if _, ok := c.agentSockets[agentID]; !ok {
|
||||||
|
missingAgents[agentID] = conns
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fmt.Fprintf(w, "<h2 id=missing-agents><a href=#missing-agents>#</a> missing agents: total %d</h2>\n", len(missingAgents))
|
||||||
|
fmt.Fprintln(w, "<ul>")
|
||||||
|
for agentID, conns := range missingAgents {
|
||||||
|
fmt.Fprintf(w, "<li><b>unknown</b> (%s): created ? ago, write ? ago, overwrites ? </li>\n",
|
||||||
|
agentID.String(),
|
||||||
|
)
|
||||||
|
|
||||||
|
fmt.Fprintf(w, "<h3>connections: total %d</h3>\n", len(conns))
|
||||||
|
fmt.Fprintln(w, "<ul>")
|
||||||
|
for id, conn := range conns {
|
||||||
|
fmt.Fprintf(w, "<li><b>%s</b> (%s): created %v ago, write %v ago </li>\n",
|
||||||
|
conn.name,
|
||||||
|
id.String(),
|
||||||
|
now.Sub(time.Unix(conn.start, 0)).Round(time.Second),
|
||||||
|
now.Sub(time.Unix(conn.lastWrite, 0)).Round(time.Second),
|
||||||
|
)
|
||||||
|
}
|
||||||
|
fmt.Fprintln(w, "</ul>")
|
||||||
|
}
|
||||||
|
fmt.Fprintln(w, "</ul>")
|
||||||
|
}
|
||||||
|
|
|
@ -48,7 +48,7 @@ func TestCoordinator(t *testing.T) {
|
||||||
id := uuid.New()
|
id := uuid.New()
|
||||||
closeChan := make(chan struct{})
|
closeChan := make(chan struct{})
|
||||||
go func() {
|
go func() {
|
||||||
err := coordinator.ServeAgent(server, id)
|
err := coordinator.ServeAgent(server, id, "")
|
||||||
assert.NoError(t, err)
|
assert.NoError(t, err)
|
||||||
close(closeChan)
|
close(closeChan)
|
||||||
}()
|
}()
|
||||||
|
@ -76,7 +76,7 @@ func TestCoordinator(t *testing.T) {
|
||||||
agentID := uuid.New()
|
agentID := uuid.New()
|
||||||
closeAgentChan := make(chan struct{})
|
closeAgentChan := make(chan struct{})
|
||||||
go func() {
|
go func() {
|
||||||
err := coordinator.ServeAgent(agentServerWS, agentID)
|
err := coordinator.ServeAgent(agentServerWS, agentID, "")
|
||||||
assert.NoError(t, err)
|
assert.NoError(t, err)
|
||||||
close(closeAgentChan)
|
close(closeAgentChan)
|
||||||
}()
|
}()
|
||||||
|
@ -127,7 +127,7 @@ func TestCoordinator(t *testing.T) {
|
||||||
})
|
})
|
||||||
closeAgentChan = make(chan struct{})
|
closeAgentChan = make(chan struct{})
|
||||||
go func() {
|
go func() {
|
||||||
err := coordinator.ServeAgent(agentServerWS, agentID)
|
err := coordinator.ServeAgent(agentServerWS, agentID, "")
|
||||||
assert.NoError(t, err)
|
assert.NoError(t, err)
|
||||||
close(closeAgentChan)
|
close(closeAgentChan)
|
||||||
}()
|
}()
|
||||||
|
@ -160,7 +160,7 @@ func TestCoordinator(t *testing.T) {
|
||||||
agentID := uuid.New()
|
agentID := uuid.New()
|
||||||
closeAgentChan1 := make(chan struct{})
|
closeAgentChan1 := make(chan struct{})
|
||||||
go func() {
|
go func() {
|
||||||
err := coordinator.ServeAgent(agentServerWS1, agentID)
|
err := coordinator.ServeAgent(agentServerWS1, agentID, "")
|
||||||
assert.NoError(t, err)
|
assert.NoError(t, err)
|
||||||
close(closeAgentChan1)
|
close(closeAgentChan1)
|
||||||
}()
|
}()
|
||||||
|
@ -205,7 +205,7 @@ func TestCoordinator(t *testing.T) {
|
||||||
})
|
})
|
||||||
closeAgentChan2 := make(chan struct{})
|
closeAgentChan2 := make(chan struct{})
|
||||||
go func() {
|
go func() {
|
||||||
err := coordinator.ServeAgent(agentServerWS2, agentID)
|
err := coordinator.ServeAgent(agentServerWS2, agentID, "")
|
||||||
assert.NoError(t, err)
|
assert.NoError(t, err)
|
||||||
close(closeAgentChan2)
|
close(closeAgentChan2)
|
||||||
}()
|
}()
|
||||||
|
|
Loading…
Reference in New Issue