coder/tailnet/coordinator.go

1131 lines
33 KiB
Go
Raw Permalink Normal View History

feat: Add Tailscale networking (#3505) * fix: Add coder user to docker group on installation This makes for a simpler setup, and reduces the likelihood a user runs into a strange issue. * Add wgnet * Add ping * Add listening * Finish refactor to make this work * Add interface for swapping * Fix conncache with interface * chore: update gvisor * fix tailscale types * linting * more linting * Add coordinator * Add coordinator tests * Fix coordination * It compiles! * Move all connection negotiation in-memory * Migrate coordinator to use net.conn * Add closed func * Fix close listener func * Make reconnecting PTY work * Fix reconnecting PTY * Update CI to Go 1.19 * Add CLI flags for DERP mapping * Fix Tailnet test * Rename ConnCoordinator to TailnetCoordinator * Remove print statement from workspace agent test * Refactor wsconncache to use tailnet * Remove STUN from unit tests * Add migrate back to dump * chore: Upgrade to Go 1.19 This is required as part of #3505. * Fix reconnecting PTY tests * fix: update wireguard-go to fix devtunnel * fix migration numbers * linting * Return early for status if endpoints are empty * Update cli/server.go Co-authored-by: Colin Adler <colin1adler@gmail.com> * Update cli/server.go Co-authored-by: Colin Adler <colin1adler@gmail.com> * Fix frontend entites * Fix agent bicopy * Fix race condition for the last node * Fix down migration * Fix connection RBAC * Fix migration numbers * Fix forwarding TCP to a local port * Implement ping for tailnet * Rename to ForceHTTP * Add external derpmapping * Expose DERP region names to the API * Add global option to enable Tailscale networking for web * Mark DERP flags hidden while testing * Update DERP map on reconnect * Add close func to workspace agents * Fix race condition in upstream dependency * Fix feature columns race condition Co-authored-by: Colin Adler <colin1adler@gmail.com>
2022-09-01 01:09:44 +00:00
package tailnet
import (
"context"
feat: Add Tailscale networking (#3505) * fix: Add coder user to docker group on installation This makes for a simpler setup, and reduces the likelihood a user runs into a strange issue. * Add wgnet * Add ping * Add listening * Finish refactor to make this work * Add interface for swapping * Fix conncache with interface * chore: update gvisor * fix tailscale types * linting * more linting * Add coordinator * Add coordinator tests * Fix coordination * It compiles! * Move all connection negotiation in-memory * Migrate coordinator to use net.conn * Add closed func * Fix close listener func * Make reconnecting PTY work * Fix reconnecting PTY * Update CI to Go 1.19 * Add CLI flags for DERP mapping * Fix Tailnet test * Rename ConnCoordinator to TailnetCoordinator * Remove print statement from workspace agent test * Refactor wsconncache to use tailnet * Remove STUN from unit tests * Add migrate back to dump * chore: Upgrade to Go 1.19 This is required as part of #3505. * Fix reconnecting PTY tests * fix: update wireguard-go to fix devtunnel * fix migration numbers * linting * Return early for status if endpoints are empty * Update cli/server.go Co-authored-by: Colin Adler <colin1adler@gmail.com> * Update cli/server.go Co-authored-by: Colin Adler <colin1adler@gmail.com> * Fix frontend entites * Fix agent bicopy * Fix race condition for the last node * Fix down migration * Fix connection RBAC * Fix migration numbers * Fix forwarding TCP to a local port * Implement ping for tailnet * Rename to ForceHTTP * Add external derpmapping * Expose DERP region names to the API * Add global option to enable Tailscale networking for web * Mark DERP flags hidden while testing * Update DERP map on reconnect * Add close func to workspace agents * Fix race condition in upstream dependency * Fix feature columns race condition Co-authored-by: Colin Adler <colin1adler@gmail.com>
2022-09-01 01:09:44 +00:00
"encoding/json"
"fmt"
"html/template"
feat: Add Tailscale networking (#3505) * fix: Add coder user to docker group on installation This makes for a simpler setup, and reduces the likelihood a user runs into a strange issue. * Add wgnet * Add ping * Add listening * Finish refactor to make this work * Add interface for swapping * Fix conncache with interface * chore: update gvisor * fix tailscale types * linting * more linting * Add coordinator * Add coordinator tests * Fix coordination * It compiles! * Move all connection negotiation in-memory * Migrate coordinator to use net.conn * Add closed func * Fix close listener func * Make reconnecting PTY work * Fix reconnecting PTY * Update CI to Go 1.19 * Add CLI flags for DERP mapping * Fix Tailnet test * Rename ConnCoordinator to TailnetCoordinator * Remove print statement from workspace agent test * Refactor wsconncache to use tailnet * Remove STUN from unit tests * Add migrate back to dump * chore: Upgrade to Go 1.19 This is required as part of #3505. * Fix reconnecting PTY tests * fix: update wireguard-go to fix devtunnel * fix migration numbers * linting * Return early for status if endpoints are empty * Update cli/server.go Co-authored-by: Colin Adler <colin1adler@gmail.com> * Update cli/server.go Co-authored-by: Colin Adler <colin1adler@gmail.com> * Fix frontend entites * Fix agent bicopy * Fix race condition for the last node * Fix down migration * Fix connection RBAC * Fix migration numbers * Fix forwarding TCP to a local port * Implement ping for tailnet * Rename to ForceHTTP * Add external derpmapping * Expose DERP region names to the API * Add global option to enable Tailscale networking for web * Mark DERP flags hidden while testing * Update DERP map on reconnect * Add close func to workspace agents * Fix race condition in upstream dependency * Fix feature columns race condition Co-authored-by: Colin Adler <colin1adler@gmail.com>
2022-09-01 01:09:44 +00:00
"io"
"net"
"net/http"
feat: Add Tailscale networking (#3505) * fix: Add coder user to docker group on installation This makes for a simpler setup, and reduces the likelihood a user runs into a strange issue. * Add wgnet * Add ping * Add listening * Finish refactor to make this work * Add interface for swapping * Fix conncache with interface * chore: update gvisor * fix tailscale types * linting * more linting * Add coordinator * Add coordinator tests * Fix coordination * It compiles! * Move all connection negotiation in-memory * Migrate coordinator to use net.conn * Add closed func * Fix close listener func * Make reconnecting PTY work * Fix reconnecting PTY * Update CI to Go 1.19 * Add CLI flags for DERP mapping * Fix Tailnet test * Rename ConnCoordinator to TailnetCoordinator * Remove print statement from workspace agent test * Refactor wsconncache to use tailnet * Remove STUN from unit tests * Add migrate back to dump * chore: Upgrade to Go 1.19 This is required as part of #3505. * Fix reconnecting PTY tests * fix: update wireguard-go to fix devtunnel * fix migration numbers * linting * Return early for status if endpoints are empty * Update cli/server.go Co-authored-by: Colin Adler <colin1adler@gmail.com> * Update cli/server.go Co-authored-by: Colin Adler <colin1adler@gmail.com> * Fix frontend entites * Fix agent bicopy * Fix race condition for the last node * Fix down migration * Fix connection RBAC * Fix migration numbers * Fix forwarding TCP to a local port * Implement ping for tailnet * Rename to ForceHTTP * Add external derpmapping * Expose DERP region names to the API * Add global option to enable Tailscale networking for web * Mark DERP flags hidden while testing * Update DERP map on reconnect * Add close func to workspace agents * Fix race condition in upstream dependency * Fix feature columns race condition Co-authored-by: Colin Adler <colin1adler@gmail.com>
2022-09-01 01:09:44 +00:00
"net/netip"
"sync"
feat: Add high availability for multiple replicas (#4555) * feat: HA tailnet coordinator * fixup! feat: HA tailnet coordinator * fixup! feat: HA tailnet coordinator * remove printlns * close all connections on coordinator * impelement high availability feature * fixup! impelement high availability feature * fixup! impelement high availability feature * fixup! impelement high availability feature * fixup! impelement high availability feature * Add replicas * Add DERP meshing to arbitrary addresses * Move packages to highavailability folder * Move coordinator to high availability package * Add flags for HA * Rename to replicasync * Denest packages for replicas * Add test for multiple replicas * Fix coordination test * Add HA to the helm chart * Rename function pointer * Add warnings for HA * Add the ability to block endpoints * Add flag to disable P2P connections * Wow, I made the tests pass * Add replicas endpoint * Ensure close kills replica * Update sql * Add database latency to high availability * Pipe TLS to DERP mesh * Fix DERP mesh with TLS * Add tests for TLS * Fix replica sync TLS * Fix RootCA for replica meshing * Remove ID from replicasync * Fix getting certificates for meshing * Remove excessive locking * Fix linting * Store mesh key in the database * Fix replica key for tests * Fix types gen * Fix unlocking unlocked * Fix race in tests * Update enterprise/derpmesh/derpmesh.go Co-authored-by: Colin Adler <colin1adler@gmail.com> * Rename to syncReplicas * Reuse http client * Delete old replicas on a CRON * Fix race condition in connection tests * Fix linting * Fix nil type * Move pubsub to in-memory for twenty test * Add comment for configuration tweaking * Fix leak with transport * Fix close leak in derpmesh * Fix race when creating server * Remove handler update * Skip test on Windows * Fix DERP mesh test * Wrap HTTP handler replacement in mutex * Fix error message for relay * Fix API handler for normal tests * Fix speedtest * Fix replica resend * Fix derpmesh send * Ping async * Increase wait time of template version jobd * Fix race when closing replica sync * Add name to client * Log the derpmap being used * Don't connect if DERP is empty * Improve agent coordinator logging * Fix lock in coordinator * Fix relay addr * Fix race when updating durations * Fix client publish race * Run pubsub loop in a queue * Store agent nodes in order * Fix coordinator locking * Check for closed pipe Co-authored-by: Colin Adler <colin1adler@gmail.com>
2022-10-17 13:43:30 +00:00
"time"
feat: Add Tailscale networking (#3505) * fix: Add coder user to docker group on installation This makes for a simpler setup, and reduces the likelihood a user runs into a strange issue. * Add wgnet * Add ping * Add listening * Finish refactor to make this work * Add interface for swapping * Fix conncache with interface * chore: update gvisor * fix tailscale types * linting * more linting * Add coordinator * Add coordinator tests * Fix coordination * It compiles! * Move all connection negotiation in-memory * Migrate coordinator to use net.conn * Add closed func * Fix close listener func * Make reconnecting PTY work * Fix reconnecting PTY * Update CI to Go 1.19 * Add CLI flags for DERP mapping * Fix Tailnet test * Rename ConnCoordinator to TailnetCoordinator * Remove print statement from workspace agent test * Refactor wsconncache to use tailnet * Remove STUN from unit tests * Add migrate back to dump * chore: Upgrade to Go 1.19 This is required as part of #3505. * Fix reconnecting PTY tests * fix: update wireguard-go to fix devtunnel * fix migration numbers * linting * Return early for status if endpoints are empty * Update cli/server.go Co-authored-by: Colin Adler <colin1adler@gmail.com> * Update cli/server.go Co-authored-by: Colin Adler <colin1adler@gmail.com> * Fix frontend entites * Fix agent bicopy * Fix race condition for the last node * Fix down migration * Fix connection RBAC * Fix migration numbers * Fix forwarding TCP to a local port * Implement ping for tailnet * Rename to ForceHTTP * Add external derpmapping * Expose DERP region names to the API * Add global option to enable Tailscale networking for web * Mark DERP flags hidden while testing * Update DERP map on reconnect * Add close func to workspace agents * Fix race condition in upstream dependency * Fix feature columns race condition Co-authored-by: Colin Adler <colin1adler@gmail.com>
2022-09-01 01:09:44 +00:00
"github.com/google/uuid"
"golang.org/x/xerrors"
"nhooyr.io/websocket"
feat: Add Tailscale networking (#3505) * fix: Add coder user to docker group on installation This makes for a simpler setup, and reduces the likelihood a user runs into a strange issue. * Add wgnet * Add ping * Add listening * Finish refactor to make this work * Add interface for swapping * Fix conncache with interface * chore: update gvisor * fix tailscale types * linting * more linting * Add coordinator * Add coordinator tests * Fix coordination * It compiles! * Move all connection negotiation in-memory * Migrate coordinator to use net.conn * Add closed func * Fix close listener func * Make reconnecting PTY work * Fix reconnecting PTY * Update CI to Go 1.19 * Add CLI flags for DERP mapping * Fix Tailnet test * Rename ConnCoordinator to TailnetCoordinator * Remove print statement from workspace agent test * Refactor wsconncache to use tailnet * Remove STUN from unit tests * Add migrate back to dump * chore: Upgrade to Go 1.19 This is required as part of #3505. * Fix reconnecting PTY tests * fix: update wireguard-go to fix devtunnel * fix migration numbers * linting * Return early for status if endpoints are empty * Update cli/server.go Co-authored-by: Colin Adler <colin1adler@gmail.com> * Update cli/server.go Co-authored-by: Colin Adler <colin1adler@gmail.com> * Fix frontend entites * Fix agent bicopy * Fix race condition for the last node * Fix down migration * Fix connection RBAC * Fix migration numbers * Fix forwarding TCP to a local port * Implement ping for tailnet * Rename to ForceHTTP * Add external derpmapping * Expose DERP region names to the API * Add global option to enable Tailscale networking for web * Mark DERP flags hidden while testing * Update DERP map on reconnect * Add close func to workspace agents * Fix race condition in upstream dependency * Fix feature columns race condition Co-authored-by: Colin Adler <colin1adler@gmail.com>
2022-09-01 01:09:44 +00:00
"tailscale.com/tailcfg"
"tailscale.com/types/key"
"cdr.dev/slog"
"github.com/coder/coder/v2/tailnet/proto"
feat: Add Tailscale networking (#3505) * fix: Add coder user to docker group on installation This makes for a simpler setup, and reduces the likelihood a user runs into a strange issue. * Add wgnet * Add ping * Add listening * Finish refactor to make this work * Add interface for swapping * Fix conncache with interface * chore: update gvisor * fix tailscale types * linting * more linting * Add coordinator * Add coordinator tests * Fix coordination * It compiles! * Move all connection negotiation in-memory * Migrate coordinator to use net.conn * Add closed func * Fix close listener func * Make reconnecting PTY work * Fix reconnecting PTY * Update CI to Go 1.19 * Add CLI flags for DERP mapping * Fix Tailnet test * Rename ConnCoordinator to TailnetCoordinator * Remove print statement from workspace agent test * Refactor wsconncache to use tailnet * Remove STUN from unit tests * Add migrate back to dump * chore: Upgrade to Go 1.19 This is required as part of #3505. * Fix reconnecting PTY tests * fix: update wireguard-go to fix devtunnel * fix migration numbers * linting * Return early for status if endpoints are empty * Update cli/server.go Co-authored-by: Colin Adler <colin1adler@gmail.com> * Update cli/server.go Co-authored-by: Colin Adler <colin1adler@gmail.com> * Fix frontend entites * Fix agent bicopy * Fix race condition for the last node * Fix down migration * Fix connection RBAC * Fix migration numbers * Fix forwarding TCP to a local port * Implement ping for tailnet * Rename to ForceHTTP * Add external derpmapping * Expose DERP region names to the API * Add global option to enable Tailscale networking for web * Mark DERP flags hidden while testing * Update DERP map on reconnect * Add close func to workspace agents * Fix race condition in upstream dependency * Fix feature columns race condition Co-authored-by: Colin Adler <colin1adler@gmail.com>
2022-09-01 01:09:44 +00:00
)
feat: Add high availability for multiple replicas (#4555) * feat: HA tailnet coordinator * fixup! feat: HA tailnet coordinator * fixup! feat: HA tailnet coordinator * remove printlns * close all connections on coordinator * impelement high availability feature * fixup! impelement high availability feature * fixup! impelement high availability feature * fixup! impelement high availability feature * fixup! impelement high availability feature * Add replicas * Add DERP meshing to arbitrary addresses * Move packages to highavailability folder * Move coordinator to high availability package * Add flags for HA * Rename to replicasync * Denest packages for replicas * Add test for multiple replicas * Fix coordination test * Add HA to the helm chart * Rename function pointer * Add warnings for HA * Add the ability to block endpoints * Add flag to disable P2P connections * Wow, I made the tests pass * Add replicas endpoint * Ensure close kills replica * Update sql * Add database latency to high availability * Pipe TLS to DERP mesh * Fix DERP mesh with TLS * Add tests for TLS * Fix replica sync TLS * Fix RootCA for replica meshing * Remove ID from replicasync * Fix getting certificates for meshing * Remove excessive locking * Fix linting * Store mesh key in the database * Fix replica key for tests * Fix types gen * Fix unlocking unlocked * Fix race in tests * Update enterprise/derpmesh/derpmesh.go Co-authored-by: Colin Adler <colin1adler@gmail.com> * Rename to syncReplicas * Reuse http client * Delete old replicas on a CRON * Fix race condition in connection tests * Fix linting * Fix nil type * Move pubsub to in-memory for twenty test * Add comment for configuration tweaking * Fix leak with transport * Fix close leak in derpmesh * Fix race when creating server * Remove handler update * Skip test on Windows * Fix DERP mesh test * Wrap HTTP handler replacement in mutex * Fix error message for relay * Fix API handler for normal tests * Fix speedtest * Fix replica resend * Fix derpmesh send * Ping async * Increase wait time of template version jobd * Fix race when closing replica sync * Add name to client * Log the derpmap being used * Don't connect if DERP is empty * Improve agent coordinator logging * Fix lock in coordinator * Fix relay addr * Fix race when updating durations * Fix client publish race * Run pubsub loop in a queue * Store agent nodes in order * Fix coordinator locking * Check for closed pipe Co-authored-by: Colin Adler <colin1adler@gmail.com>
2022-10-17 13:43:30 +00:00
// Coordinator exchanges nodes with agents to establish connections.
// ┌──────────────────┐ ┌────────────────────┐ ┌───────────────────┐ ┌──────────────────┐
// │tailnet.Coordinate├──►│tailnet.AcceptClient│◄─►│tailnet.AcceptAgent│◄──┤tailnet.Coordinate│
// └──────────────────┘ └────────────────────┘ └───────────────────┘ └──────────────────┘
// Coordinators have different guarantees for HA support.
type Coordinator interface {
CoordinatorV1
CoordinatorV2
}
type CoordinatorV1 interface {
// ServeHTTPDebug serves a debug webpage that shows the internal state of
// the coordinator.
ServeHTTPDebug(w http.ResponseWriter, r *http.Request)
feat: Add high availability for multiple replicas (#4555) * feat: HA tailnet coordinator * fixup! feat: HA tailnet coordinator * fixup! feat: HA tailnet coordinator * remove printlns * close all connections on coordinator * impelement high availability feature * fixup! impelement high availability feature * fixup! impelement high availability feature * fixup! impelement high availability feature * fixup! impelement high availability feature * Add replicas * Add DERP meshing to arbitrary addresses * Move packages to highavailability folder * Move coordinator to high availability package * Add flags for HA * Rename to replicasync * Denest packages for replicas * Add test for multiple replicas * Fix coordination test * Add HA to the helm chart * Rename function pointer * Add warnings for HA * Add the ability to block endpoints * Add flag to disable P2P connections * Wow, I made the tests pass * Add replicas endpoint * Ensure close kills replica * Update sql * Add database latency to high availability * Pipe TLS to DERP mesh * Fix DERP mesh with TLS * Add tests for TLS * Fix replica sync TLS * Fix RootCA for replica meshing * Remove ID from replicasync * Fix getting certificates for meshing * Remove excessive locking * Fix linting * Store mesh key in the database * Fix replica key for tests * Fix types gen * Fix unlocking unlocked * Fix race in tests * Update enterprise/derpmesh/derpmesh.go Co-authored-by: Colin Adler <colin1adler@gmail.com> * Rename to syncReplicas * Reuse http client * Delete old replicas on a CRON * Fix race condition in connection tests * Fix linting * Fix nil type * Move pubsub to in-memory for twenty test * Add comment for configuration tweaking * Fix leak with transport * Fix close leak in derpmesh * Fix race when creating server * Remove handler update * Skip test on Windows * Fix DERP mesh test * Wrap HTTP handler replacement in mutex * Fix error message for relay * Fix API handler for normal tests * Fix speedtest * Fix replica resend * Fix derpmesh send * Ping async * Increase wait time of template version jobd * Fix race when closing replica sync * Add name to client * Log the derpmap being used * Don't connect if DERP is empty * Improve agent coordinator logging * Fix lock in coordinator * Fix relay addr * Fix race when updating durations * Fix client publish race * Run pubsub loop in a queue * Store agent nodes in order * Fix coordinator locking * Check for closed pipe Co-authored-by: Colin Adler <colin1adler@gmail.com>
2022-10-17 13:43:30 +00:00
// Node returns an in-memory node by ID.
Node(id uuid.UUID) *Node
// ServeClient accepts a WebSocket connection that wants to connect to an agent
// with the specified ID.
ServeClient(conn net.Conn, id uuid.UUID, agent uuid.UUID) error
// ServeAgent accepts a WebSocket connection to an agent that listens to
// incoming connections and publishes node updates.
// Name is just used for debug information. It can be left blank.
ServeAgent(conn net.Conn, id uuid.UUID, name string) error
feat: Add high availability for multiple replicas (#4555) * feat: HA tailnet coordinator * fixup! feat: HA tailnet coordinator * fixup! feat: HA tailnet coordinator * remove printlns * close all connections on coordinator * impelement high availability feature * fixup! impelement high availability feature * fixup! impelement high availability feature * fixup! impelement high availability feature * fixup! impelement high availability feature * Add replicas * Add DERP meshing to arbitrary addresses * Move packages to highavailability folder * Move coordinator to high availability package * Add flags for HA * Rename to replicasync * Denest packages for replicas * Add test for multiple replicas * Fix coordination test * Add HA to the helm chart * Rename function pointer * Add warnings for HA * Add the ability to block endpoints * Add flag to disable P2P connections * Wow, I made the tests pass * Add replicas endpoint * Ensure close kills replica * Update sql * Add database latency to high availability * Pipe TLS to DERP mesh * Fix DERP mesh with TLS * Add tests for TLS * Fix replica sync TLS * Fix RootCA for replica meshing * Remove ID from replicasync * Fix getting certificates for meshing * Remove excessive locking * Fix linting * Store mesh key in the database * Fix replica key for tests * Fix types gen * Fix unlocking unlocked * Fix race in tests * Update enterprise/derpmesh/derpmesh.go Co-authored-by: Colin Adler <colin1adler@gmail.com> * Rename to syncReplicas * Reuse http client * Delete old replicas on a CRON * Fix race condition in connection tests * Fix linting * Fix nil type * Move pubsub to in-memory for twenty test * Add comment for configuration tweaking * Fix leak with transport * Fix close leak in derpmesh * Fix race when creating server * Remove handler update * Skip test on Windows * Fix DERP mesh test * Wrap HTTP handler replacement in mutex * Fix error message for relay * Fix API handler for normal tests * Fix speedtest * Fix replica resend * Fix derpmesh send * Ping async * Increase wait time of template version jobd * Fix race when closing replica sync * Add name to client * Log the derpmap being used * Don't connect if DERP is empty * Improve agent coordinator logging * Fix lock in coordinator * Fix relay addr * Fix race when updating durations * Fix client publish race * Run pubsub loop in a queue * Store agent nodes in order * Fix coordinator locking * Check for closed pipe Co-authored-by: Colin Adler <colin1adler@gmail.com>
2022-10-17 13:43:30 +00:00
// Close closes the coordinator.
Close() error
ServeMultiAgent(id uuid.UUID) MultiAgentConn
feat: Add high availability for multiple replicas (#4555) * feat: HA tailnet coordinator * fixup! feat: HA tailnet coordinator * fixup! feat: HA tailnet coordinator * remove printlns * close all connections on coordinator * impelement high availability feature * fixup! impelement high availability feature * fixup! impelement high availability feature * fixup! impelement high availability feature * fixup! impelement high availability feature * Add replicas * Add DERP meshing to arbitrary addresses * Move packages to highavailability folder * Move coordinator to high availability package * Add flags for HA * Rename to replicasync * Denest packages for replicas * Add test for multiple replicas * Fix coordination test * Add HA to the helm chart * Rename function pointer * Add warnings for HA * Add the ability to block endpoints * Add flag to disable P2P connections * Wow, I made the tests pass * Add replicas endpoint * Ensure close kills replica * Update sql * Add database latency to high availability * Pipe TLS to DERP mesh * Fix DERP mesh with TLS * Add tests for TLS * Fix replica sync TLS * Fix RootCA for replica meshing * Remove ID from replicasync * Fix getting certificates for meshing * Remove excessive locking * Fix linting * Store mesh key in the database * Fix replica key for tests * Fix types gen * Fix unlocking unlocked * Fix race in tests * Update enterprise/derpmesh/derpmesh.go Co-authored-by: Colin Adler <colin1adler@gmail.com> * Rename to syncReplicas * Reuse http client * Delete old replicas on a CRON * Fix race condition in connection tests * Fix linting * Fix nil type * Move pubsub to in-memory for twenty test * Add comment for configuration tweaking * Fix leak with transport * Fix close leak in derpmesh * Fix race when creating server * Remove handler update * Skip test on Windows * Fix DERP mesh test * Wrap HTTP handler replacement in mutex * Fix error message for relay * Fix API handler for normal tests * Fix speedtest * Fix replica resend * Fix derpmesh send * Ping async * Increase wait time of template version jobd * Fix race when closing replica sync * Add name to client * Log the derpmap being used * Don't connect if DERP is empty * Improve agent coordinator logging * Fix lock in coordinator * Fix relay addr * Fix race when updating durations * Fix client publish race * Run pubsub loop in a queue * Store agent nodes in order * Fix coordinator locking * Check for closed pipe Co-authored-by: Colin Adler <colin1adler@gmail.com>
2022-10-17 13:43:30 +00:00
}
// CoordinatorV2 is the interface for interacting with the coordinator via the 2.0 tailnet API.
type CoordinatorV2 interface {
// ServeHTTPDebug serves a debug webpage that shows the internal state of
// the coordinator.
ServeHTTPDebug(w http.ResponseWriter, r *http.Request)
// Node returns a node by peer ID, if known to the coordinator. Returns nil if unknown.
Node(id uuid.UUID) *Node
Close() error
Coordinate(ctx context.Context, id uuid.UUID, name string, a CoordinateeAuth) (chan<- *proto.CoordinateRequest, <-chan *proto.CoordinateResponse)
}
feat: Add Tailscale networking (#3505) * fix: Add coder user to docker group on installation This makes for a simpler setup, and reduces the likelihood a user runs into a strange issue. * Add wgnet * Add ping * Add listening * Finish refactor to make this work * Add interface for swapping * Fix conncache with interface * chore: update gvisor * fix tailscale types * linting * more linting * Add coordinator * Add coordinator tests * Fix coordination * It compiles! * Move all connection negotiation in-memory * Migrate coordinator to use net.conn * Add closed func * Fix close listener func * Make reconnecting PTY work * Fix reconnecting PTY * Update CI to Go 1.19 * Add CLI flags for DERP mapping * Fix Tailnet test * Rename ConnCoordinator to TailnetCoordinator * Remove print statement from workspace agent test * Refactor wsconncache to use tailnet * Remove STUN from unit tests * Add migrate back to dump * chore: Upgrade to Go 1.19 This is required as part of #3505. * Fix reconnecting PTY tests * fix: update wireguard-go to fix devtunnel * fix migration numbers * linting * Return early for status if endpoints are empty * Update cli/server.go Co-authored-by: Colin Adler <colin1adler@gmail.com> * Update cli/server.go Co-authored-by: Colin Adler <colin1adler@gmail.com> * Fix frontend entites * Fix agent bicopy * Fix race condition for the last node * Fix down migration * Fix connection RBAC * Fix migration numbers * Fix forwarding TCP to a local port * Implement ping for tailnet * Rename to ForceHTTP * Add external derpmapping * Expose DERP region names to the API * Add global option to enable Tailscale networking for web * Mark DERP flags hidden while testing * Update DERP map on reconnect * Add close func to workspace agents * Fix race condition in upstream dependency * Fix feature columns race condition Co-authored-by: Colin Adler <colin1adler@gmail.com>
2022-09-01 01:09:44 +00:00
// Node represents a node in the network.
type Node struct {
// ID is used to identify the connection.
ID tailcfg.NodeID `json:"id"`
feat: Add high availability for multiple replicas (#4555) * feat: HA tailnet coordinator * fixup! feat: HA tailnet coordinator * fixup! feat: HA tailnet coordinator * remove printlns * close all connections on coordinator * impelement high availability feature * fixup! impelement high availability feature * fixup! impelement high availability feature * fixup! impelement high availability feature * fixup! impelement high availability feature * Add replicas * Add DERP meshing to arbitrary addresses * Move packages to highavailability folder * Move coordinator to high availability package * Add flags for HA * Rename to replicasync * Denest packages for replicas * Add test for multiple replicas * Fix coordination test * Add HA to the helm chart * Rename function pointer * Add warnings for HA * Add the ability to block endpoints * Add flag to disable P2P connections * Wow, I made the tests pass * Add replicas endpoint * Ensure close kills replica * Update sql * Add database latency to high availability * Pipe TLS to DERP mesh * Fix DERP mesh with TLS * Add tests for TLS * Fix replica sync TLS * Fix RootCA for replica meshing * Remove ID from replicasync * Fix getting certificates for meshing * Remove excessive locking * Fix linting * Store mesh key in the database * Fix replica key for tests * Fix types gen * Fix unlocking unlocked * Fix race in tests * Update enterprise/derpmesh/derpmesh.go Co-authored-by: Colin Adler <colin1adler@gmail.com> * Rename to syncReplicas * Reuse http client * Delete old replicas on a CRON * Fix race condition in connection tests * Fix linting * Fix nil type * Move pubsub to in-memory for twenty test * Add comment for configuration tweaking * Fix leak with transport * Fix close leak in derpmesh * Fix race when creating server * Remove handler update * Skip test on Windows * Fix DERP mesh test * Wrap HTTP handler replacement in mutex * Fix error message for relay * Fix API handler for normal tests * Fix speedtest * Fix replica resend * Fix derpmesh send * Ping async * Increase wait time of template version jobd * Fix race when closing replica sync * Add name to client * Log the derpmap being used * Don't connect if DERP is empty * Improve agent coordinator logging * Fix lock in coordinator * Fix relay addr * Fix race when updating durations * Fix client publish race * Run pubsub loop in a queue * Store agent nodes in order * Fix coordinator locking * Check for closed pipe Co-authored-by: Colin Adler <colin1adler@gmail.com>
2022-10-17 13:43:30 +00:00
// AsOf is the time the node was created.
AsOf time.Time `json:"as_of"`
// Key is the Wireguard public key of the node.
Key key.NodePublic `json:"key"`
// DiscoKey is used for discovery messages over DERP to establish
// peer-to-peer connections.
DiscoKey key.DiscoPublic `json:"disco"`
// PreferredDERP is the DERP server that peered connections should meet at
// to establish.
PreferredDERP int `json:"preferred_derp"`
// DERPLatency is the latency in seconds to each DERP server.
DERPLatency map[string]float64 `json:"derp_latency"`
// DERPForcedWebsocket contains a mapping of DERP regions to
// error messages that caused the connection to be forced to
// use WebSockets. We don't use WebSockets by default because
// they are less performant.
DERPForcedWebsocket map[int]string `json:"derp_forced_websockets"`
// Addresses are the IP address ranges this connection exposes.
Addresses []netip.Prefix `json:"addresses"`
// AllowedIPs specify what addresses can dial the connection. We allow all
// by default.
AllowedIPs []netip.Prefix `json:"allowed_ips"`
// Endpoints are ip:port combinations that can be used to establish
// peer-to-peer connections.
Endpoints []string `json:"endpoints"`
feat: Add Tailscale networking (#3505) * fix: Add coder user to docker group on installation This makes for a simpler setup, and reduces the likelihood a user runs into a strange issue. * Add wgnet * Add ping * Add listening * Finish refactor to make this work * Add interface for swapping * Fix conncache with interface * chore: update gvisor * fix tailscale types * linting * more linting * Add coordinator * Add coordinator tests * Fix coordination * It compiles! * Move all connection negotiation in-memory * Migrate coordinator to use net.conn * Add closed func * Fix close listener func * Make reconnecting PTY work * Fix reconnecting PTY * Update CI to Go 1.19 * Add CLI flags for DERP mapping * Fix Tailnet test * Rename ConnCoordinator to TailnetCoordinator * Remove print statement from workspace agent test * Refactor wsconncache to use tailnet * Remove STUN from unit tests * Add migrate back to dump * chore: Upgrade to Go 1.19 This is required as part of #3505. * Fix reconnecting PTY tests * fix: update wireguard-go to fix devtunnel * fix migration numbers * linting * Return early for status if endpoints are empty * Update cli/server.go Co-authored-by: Colin Adler <colin1adler@gmail.com> * Update cli/server.go Co-authored-by: Colin Adler <colin1adler@gmail.com> * Fix frontend entites * Fix agent bicopy * Fix race condition for the last node * Fix down migration * Fix connection RBAC * Fix migration numbers * Fix forwarding TCP to a local port * Implement ping for tailnet * Rename to ForceHTTP * Add external derpmapping * Expose DERP region names to the API * Add global option to enable Tailscale networking for web * Mark DERP flags hidden while testing * Update DERP map on reconnect * Add close func to workspace agents * Fix race condition in upstream dependency * Fix feature columns race condition Co-authored-by: Colin Adler <colin1adler@gmail.com>
2022-09-01 01:09:44 +00:00
}
// Coordinatee is something that can be coordinated over the Coordinate protocol. Usually this is a
// Conn.
type Coordinatee interface {
UpdatePeers([]*proto.CoordinateResponse_PeerUpdate) error
SetAllPeersLost()
SetNodeCallback(func(*Node))
// SetTunnelDestination indicates to tailnet that the peer id is a
// destination.
SetTunnelDestination(id uuid.UUID)
}
type Coordination interface {
io.Closer
Error() <-chan error
}
type remoteCoordination struct {
sync.Mutex
closed bool
errChan chan error
coordinatee Coordinatee
tgt uuid.UUID
logger slog.Logger
protocol proto.DRPCTailnet_CoordinateClient
respLoopDone chan struct{}
}
func (c *remoteCoordination) Close() (retErr error) {
c.Lock()
defer c.Unlock()
if c.closed {
return nil
}
c.closed = true
defer func() {
protoErr := c.protocol.Close()
<-c.respLoopDone
if retErr == nil {
retErr = protoErr
}
}()
err := c.protocol.Send(&proto.CoordinateRequest{Disconnect: &proto.CoordinateRequest_Disconnect{}})
if err != nil && !xerrors.Is(err, io.EOF) {
// Coordinator RPC hangs up when it gets disconnect, so EOF is expected.
return xerrors.Errorf("send disconnect: %w", err)
}
c.logger.Debug(context.Background(), "sent disconnect")
return nil
}
func (c *remoteCoordination) Error() <-chan error {
return c.errChan
}
func (c *remoteCoordination) sendErr(err error) {
select {
case c.errChan <- err:
default:
}
}
func (c *remoteCoordination) respLoop() {
defer func() {
c.coordinatee.SetAllPeersLost()
close(c.respLoopDone)
}()
for {
resp, err := c.protocol.Recv()
if err != nil {
c.sendErr(xerrors.Errorf("read: %w", err))
return
}
err = c.coordinatee.UpdatePeers(resp.GetPeerUpdates())
if err != nil {
c.sendErr(xerrors.Errorf("update peers: %w", err))
return
}
// Only send acks from peers without a target.
if c.tgt == uuid.Nil {
// Send an ack back for all received peers. This could
// potentially be smarter to only send an ACK once per client,
// but there's nothing currently stopping clients from reusing
// IDs.
rfh := []*proto.CoordinateRequest_ReadyForHandshake{}
for _, peer := range resp.GetPeerUpdates() {
if peer.Kind != proto.CoordinateResponse_PeerUpdate_NODE {
continue
}
rfh = append(rfh, &proto.CoordinateRequest_ReadyForHandshake{Id: peer.Id})
}
if len(rfh) > 0 {
err := c.protocol.Send(&proto.CoordinateRequest{
ReadyForHandshake: rfh,
})
if err != nil {
c.sendErr(xerrors.Errorf("send: %w", err))
return
}
}
}
}
}
// NewRemoteCoordination uses the provided protocol to coordinate the provided coordinatee (usually a
// Conn). If the tunnelTarget is not uuid.Nil, then we add a tunnel to the peer (i.e. we are acting as
// a client---agents should NOT set this!).
func NewRemoteCoordination(logger slog.Logger,
protocol proto.DRPCTailnet_CoordinateClient, coordinatee Coordinatee,
tunnelTarget uuid.UUID,
) Coordination {
c := &remoteCoordination{
errChan: make(chan error, 1),
coordinatee: coordinatee,
tgt: tunnelTarget,
logger: logger,
protocol: protocol,
respLoopDone: make(chan struct{}),
}
if tunnelTarget != uuid.Nil {
c.coordinatee.SetTunnelDestination(tunnelTarget)
c.Lock()
err := c.protocol.Send(&proto.CoordinateRequest{AddTunnel: &proto.CoordinateRequest_Tunnel{Id: tunnelTarget[:]}})
c.Unlock()
if err != nil {
c.sendErr(err)
}
}
coordinatee.SetNodeCallback(func(node *Node) {
pn, err := NodeToProto(node)
if err != nil {
c.logger.Critical(context.Background(), "failed to convert node", slog.Error(err))
c.sendErr(err)
return
}
c.Lock()
defer c.Unlock()
if c.closed {
c.logger.Debug(context.Background(), "ignored node update because coordination is closed")
return
}
err = c.protocol.Send(&proto.CoordinateRequest{UpdateSelf: &proto.CoordinateRequest_UpdateSelf{Node: pn}})
if err != nil {
c.sendErr(xerrors.Errorf("write: %w", err))
}
})
go c.respLoop()
return c
}
type inMemoryCoordination struct {
sync.Mutex
ctx context.Context
errChan chan error
closed bool
closedCh chan struct{}
respLoopDone chan struct{}
coordinatee Coordinatee
logger slog.Logger
resps <-chan *proto.CoordinateResponse
reqs chan<- *proto.CoordinateRequest
}
func (c *inMemoryCoordination) sendErr(err error) {
select {
case c.errChan <- err:
default:
}
}
func (c *inMemoryCoordination) Error() <-chan error {
return c.errChan
}
// NewInMemoryCoordination connects a Coordinatee (usually Conn) to an in memory Coordinator, for testing
// or local clients. Set ClientID to uuid.Nil for an agent.
func NewInMemoryCoordination(
ctx context.Context, logger slog.Logger,
clientID, agentID uuid.UUID,
coordinator Coordinator, coordinatee Coordinatee,
) Coordination {
thisID := agentID
logger = logger.With(slog.F("agent_id", agentID))
var auth CoordinateeAuth = AgentCoordinateeAuth{ID: agentID}
if clientID != uuid.Nil {
// this is a client connection
auth = ClientCoordinateeAuth{AgentID: agentID}
logger = logger.With(slog.F("client_id", clientID))
thisID = clientID
}
c := &inMemoryCoordination{
ctx: ctx,
errChan: make(chan error, 1),
coordinatee: coordinatee,
logger: logger,
closedCh: make(chan struct{}),
respLoopDone: make(chan struct{}),
}
// use the background context since we will depend exclusively on closing the req channel to
// tell the coordinator we are done.
c.reqs, c.resps = coordinator.Coordinate(context.Background(),
thisID, fmt.Sprintf("inmemory%s", thisID),
auth,
)
go c.respLoop()
if agentID != uuid.Nil {
select {
case <-ctx.Done():
c.logger.Warn(ctx, "context expired before we could add tunnel", slog.Error(ctx.Err()))
return c
case c.reqs <- &proto.CoordinateRequest{AddTunnel: &proto.CoordinateRequest_Tunnel{Id: agentID[:]}}:
// OK!
}
}
coordinatee.SetNodeCallback(func(n *Node) {
pn, err := NodeToProto(n)
if err != nil {
c.logger.Critical(ctx, "failed to convert node", slog.Error(err))
c.sendErr(err)
return
}
c.Lock()
defer c.Unlock()
if c.closed {
return
}
select {
case <-ctx.Done():
c.logger.Info(ctx, "context expired before sending node update")
return
case c.reqs <- &proto.CoordinateRequest{UpdateSelf: &proto.CoordinateRequest_UpdateSelf{Node: pn}}:
c.logger.Debug(ctx, "sent node in-memory to coordinator")
}
})
return c
}
func (c *inMemoryCoordination) respLoop() {
defer func() {
c.coordinatee.SetAllPeersLost()
close(c.respLoopDone)
}()
for {
select {
case <-c.closedCh:
c.logger.Debug(context.Background(), "in-memory coordination closed")
return
case resp, ok := <-c.resps:
if !ok {
c.logger.Debug(context.Background(), "in-memory response channel closed")
return
}
c.logger.Debug(context.Background(), "got in-memory response from coordinator", slog.F("resp", resp))
err := c.coordinatee.UpdatePeers(resp.GetPeerUpdates())
if err != nil {
c.sendErr(xerrors.Errorf("failed to update peers: %w", err))
return
}
}
}
}
func (*inMemoryCoordination) AwaitAck() <-chan struct{} {
// This is only used for tests, so just return a closed channel.
ch := make(chan struct{})
close(ch)
return ch
}
func (c *inMemoryCoordination) Close() error {
c.Lock()
defer c.Unlock()
c.logger.Debug(context.Background(), "closing in-memory coordination")
if c.closed {
return nil
}
defer close(c.reqs)
c.closed = true
close(c.closedCh)
<-c.respLoopDone
select {
case <-c.ctx.Done():
return xerrors.Errorf("failed to gracefully disconnect: %w", c.ctx.Err())
case c.reqs <- &proto.CoordinateRequest{Disconnect: &proto.CoordinateRequest_Disconnect{}}:
c.logger.Debug(context.Background(), "sent graceful disconnect in-memory")
return nil
}
}
feat: Add Tailscale networking (#3505) * fix: Add coder user to docker group on installation This makes for a simpler setup, and reduces the likelihood a user runs into a strange issue. * Add wgnet * Add ping * Add listening * Finish refactor to make this work * Add interface for swapping * Fix conncache with interface * chore: update gvisor * fix tailscale types * linting * more linting * Add coordinator * Add coordinator tests * Fix coordination * It compiles! * Move all connection negotiation in-memory * Migrate coordinator to use net.conn * Add closed func * Fix close listener func * Make reconnecting PTY work * Fix reconnecting PTY * Update CI to Go 1.19 * Add CLI flags for DERP mapping * Fix Tailnet test * Rename ConnCoordinator to TailnetCoordinator * Remove print statement from workspace agent test * Refactor wsconncache to use tailnet * Remove STUN from unit tests * Add migrate back to dump * chore: Upgrade to Go 1.19 This is required as part of #3505. * Fix reconnecting PTY tests * fix: update wireguard-go to fix devtunnel * fix migration numbers * linting * Return early for status if endpoints are empty * Update cli/server.go Co-authored-by: Colin Adler <colin1adler@gmail.com> * Update cli/server.go Co-authored-by: Colin Adler <colin1adler@gmail.com> * Fix frontend entites * Fix agent bicopy * Fix race condition for the last node * Fix down migration * Fix connection RBAC * Fix migration numbers * Fix forwarding TCP to a local port * Implement ping for tailnet * Rename to ForceHTTP * Add external derpmapping * Expose DERP region names to the API * Add global option to enable Tailscale networking for web * Mark DERP flags hidden while testing * Update DERP map on reconnect * Add close func to workspace agents * Fix race condition in upstream dependency * Fix feature columns race condition Co-authored-by: Colin Adler <colin1adler@gmail.com>
2022-09-01 01:09:44 +00:00
// ServeCoordinator matches the RW structure of a coordinator to exchange node messages.
func ServeCoordinator(conn net.Conn, updateNodes func(node []*Node) error) (func(node *Node), <-chan error) {
errChan := make(chan error, 1)
sendErr := func(err error) {
select {
case errChan <- err:
default:
}
}
feat: Add Tailscale networking (#3505) * fix: Add coder user to docker group on installation This makes for a simpler setup, and reduces the likelihood a user runs into a strange issue. * Add wgnet * Add ping * Add listening * Finish refactor to make this work * Add interface for swapping * Fix conncache with interface * chore: update gvisor * fix tailscale types * linting * more linting * Add coordinator * Add coordinator tests * Fix coordination * It compiles! * Move all connection negotiation in-memory * Migrate coordinator to use net.conn * Add closed func * Fix close listener func * Make reconnecting PTY work * Fix reconnecting PTY * Update CI to Go 1.19 * Add CLI flags for DERP mapping * Fix Tailnet test * Rename ConnCoordinator to TailnetCoordinator * Remove print statement from workspace agent test * Refactor wsconncache to use tailnet * Remove STUN from unit tests * Add migrate back to dump * chore: Upgrade to Go 1.19 This is required as part of #3505. * Fix reconnecting PTY tests * fix: update wireguard-go to fix devtunnel * fix migration numbers * linting * Return early for status if endpoints are empty * Update cli/server.go Co-authored-by: Colin Adler <colin1adler@gmail.com> * Update cli/server.go Co-authored-by: Colin Adler <colin1adler@gmail.com> * Fix frontend entites * Fix agent bicopy * Fix race condition for the last node * Fix down migration * Fix connection RBAC * Fix migration numbers * Fix forwarding TCP to a local port * Implement ping for tailnet * Rename to ForceHTTP * Add external derpmapping * Expose DERP region names to the API * Add global option to enable Tailscale networking for web * Mark DERP flags hidden while testing * Update DERP map on reconnect * Add close func to workspace agents * Fix race condition in upstream dependency * Fix feature columns race condition Co-authored-by: Colin Adler <colin1adler@gmail.com>
2022-09-01 01:09:44 +00:00
go func() {
decoder := json.NewDecoder(conn)
for {
var nodes []*Node
err := decoder.Decode(&nodes)
if err != nil {
sendErr(xerrors.Errorf("read: %w", err))
feat: Add Tailscale networking (#3505) * fix: Add coder user to docker group on installation This makes for a simpler setup, and reduces the likelihood a user runs into a strange issue. * Add wgnet * Add ping * Add listening * Finish refactor to make this work * Add interface for swapping * Fix conncache with interface * chore: update gvisor * fix tailscale types * linting * more linting * Add coordinator * Add coordinator tests * Fix coordination * It compiles! * Move all connection negotiation in-memory * Migrate coordinator to use net.conn * Add closed func * Fix close listener func * Make reconnecting PTY work * Fix reconnecting PTY * Update CI to Go 1.19 * Add CLI flags for DERP mapping * Fix Tailnet test * Rename ConnCoordinator to TailnetCoordinator * Remove print statement from workspace agent test * Refactor wsconncache to use tailnet * Remove STUN from unit tests * Add migrate back to dump * chore: Upgrade to Go 1.19 This is required as part of #3505. * Fix reconnecting PTY tests * fix: update wireguard-go to fix devtunnel * fix migration numbers * linting * Return early for status if endpoints are empty * Update cli/server.go Co-authored-by: Colin Adler <colin1adler@gmail.com> * Update cli/server.go Co-authored-by: Colin Adler <colin1adler@gmail.com> * Fix frontend entites * Fix agent bicopy * Fix race condition for the last node * Fix down migration * Fix connection RBAC * Fix migration numbers * Fix forwarding TCP to a local port * Implement ping for tailnet * Rename to ForceHTTP * Add external derpmapping * Expose DERP region names to the API * Add global option to enable Tailscale networking for web * Mark DERP flags hidden while testing * Update DERP map on reconnect * Add close func to workspace agents * Fix race condition in upstream dependency * Fix feature columns race condition Co-authored-by: Colin Adler <colin1adler@gmail.com>
2022-09-01 01:09:44 +00:00
return
}
err = updateNodes(nodes)
if err != nil {
sendErr(xerrors.Errorf("update nodes: %w", err))
feat: Add Tailscale networking (#3505) * fix: Add coder user to docker group on installation This makes for a simpler setup, and reduces the likelihood a user runs into a strange issue. * Add wgnet * Add ping * Add listening * Finish refactor to make this work * Add interface for swapping * Fix conncache with interface * chore: update gvisor * fix tailscale types * linting * more linting * Add coordinator * Add coordinator tests * Fix coordination * It compiles! * Move all connection negotiation in-memory * Migrate coordinator to use net.conn * Add closed func * Fix close listener func * Make reconnecting PTY work * Fix reconnecting PTY * Update CI to Go 1.19 * Add CLI flags for DERP mapping * Fix Tailnet test * Rename ConnCoordinator to TailnetCoordinator * Remove print statement from workspace agent test * Refactor wsconncache to use tailnet * Remove STUN from unit tests * Add migrate back to dump * chore: Upgrade to Go 1.19 This is required as part of #3505. * Fix reconnecting PTY tests * fix: update wireguard-go to fix devtunnel * fix migration numbers * linting * Return early for status if endpoints are empty * Update cli/server.go Co-authored-by: Colin Adler <colin1adler@gmail.com> * Update cli/server.go Co-authored-by: Colin Adler <colin1adler@gmail.com> * Fix frontend entites * Fix agent bicopy * Fix race condition for the last node * Fix down migration * Fix connection RBAC * Fix migration numbers * Fix forwarding TCP to a local port * Implement ping for tailnet * Rename to ForceHTTP * Add external derpmapping * Expose DERP region names to the API * Add global option to enable Tailscale networking for web * Mark DERP flags hidden while testing * Update DERP map on reconnect * Add close func to workspace agents * Fix race condition in upstream dependency * Fix feature columns race condition Co-authored-by: Colin Adler <colin1adler@gmail.com>
2022-09-01 01:09:44 +00:00
}
}
}()
return func(node *Node) {
data, err := json.Marshal(node)
if err != nil {
sendErr(xerrors.Errorf("marshal node: %w", err))
feat: Add Tailscale networking (#3505) * fix: Add coder user to docker group on installation This makes for a simpler setup, and reduces the likelihood a user runs into a strange issue. * Add wgnet * Add ping * Add listening * Finish refactor to make this work * Add interface for swapping * Fix conncache with interface * chore: update gvisor * fix tailscale types * linting * more linting * Add coordinator * Add coordinator tests * Fix coordination * It compiles! * Move all connection negotiation in-memory * Migrate coordinator to use net.conn * Add closed func * Fix close listener func * Make reconnecting PTY work * Fix reconnecting PTY * Update CI to Go 1.19 * Add CLI flags for DERP mapping * Fix Tailnet test * Rename ConnCoordinator to TailnetCoordinator * Remove print statement from workspace agent test * Refactor wsconncache to use tailnet * Remove STUN from unit tests * Add migrate back to dump * chore: Upgrade to Go 1.19 This is required as part of #3505. * Fix reconnecting PTY tests * fix: update wireguard-go to fix devtunnel * fix migration numbers * linting * Return early for status if endpoints are empty * Update cli/server.go Co-authored-by: Colin Adler <colin1adler@gmail.com> * Update cli/server.go Co-authored-by: Colin Adler <colin1adler@gmail.com> * Fix frontend entites * Fix agent bicopy * Fix race condition for the last node * Fix down migration * Fix connection RBAC * Fix migration numbers * Fix forwarding TCP to a local port * Implement ping for tailnet * Rename to ForceHTTP * Add external derpmapping * Expose DERP region names to the API * Add global option to enable Tailscale networking for web * Mark DERP flags hidden while testing * Update DERP map on reconnect * Add close func to workspace agents * Fix race condition in upstream dependency * Fix feature columns race condition Co-authored-by: Colin Adler <colin1adler@gmail.com>
2022-09-01 01:09:44 +00:00
return
}
_, err = conn.Write(data)
if err != nil {
sendErr(xerrors.Errorf("write: %w", err))
feat: Add Tailscale networking (#3505) * fix: Add coder user to docker group on installation This makes for a simpler setup, and reduces the likelihood a user runs into a strange issue. * Add wgnet * Add ping * Add listening * Finish refactor to make this work * Add interface for swapping * Fix conncache with interface * chore: update gvisor * fix tailscale types * linting * more linting * Add coordinator * Add coordinator tests * Fix coordination * It compiles! * Move all connection negotiation in-memory * Migrate coordinator to use net.conn * Add closed func * Fix close listener func * Make reconnecting PTY work * Fix reconnecting PTY * Update CI to Go 1.19 * Add CLI flags for DERP mapping * Fix Tailnet test * Rename ConnCoordinator to TailnetCoordinator * Remove print statement from workspace agent test * Refactor wsconncache to use tailnet * Remove STUN from unit tests * Add migrate back to dump * chore: Upgrade to Go 1.19 This is required as part of #3505. * Fix reconnecting PTY tests * fix: update wireguard-go to fix devtunnel * fix migration numbers * linting * Return early for status if endpoints are empty * Update cli/server.go Co-authored-by: Colin Adler <colin1adler@gmail.com> * Update cli/server.go Co-authored-by: Colin Adler <colin1adler@gmail.com> * Fix frontend entites * Fix agent bicopy * Fix race condition for the last node * Fix down migration * Fix connection RBAC * Fix migration numbers * Fix forwarding TCP to a local port * Implement ping for tailnet * Rename to ForceHTTP * Add external derpmapping * Expose DERP region names to the API * Add global option to enable Tailscale networking for web * Mark DERP flags hidden while testing * Update DERP map on reconnect * Add close func to workspace agents * Fix race condition in upstream dependency * Fix feature columns race condition Co-authored-by: Colin Adler <colin1adler@gmail.com>
2022-09-01 01:09:44 +00:00
}
}, errChan
}
const LoggerName = "coord"
var (
ErrClosed = xerrors.New("coordinator is closed")
ErrWouldBlock = xerrors.New("would block")
ErrAlreadyRemoved = xerrors.New("already removed")
)
feat: Add high availability for multiple replicas (#4555) * feat: HA tailnet coordinator * fixup! feat: HA tailnet coordinator * fixup! feat: HA tailnet coordinator * remove printlns * close all connections on coordinator * impelement high availability feature * fixup! impelement high availability feature * fixup! impelement high availability feature * fixup! impelement high availability feature * fixup! impelement high availability feature * Add replicas * Add DERP meshing to arbitrary addresses * Move packages to highavailability folder * Move coordinator to high availability package * Add flags for HA * Rename to replicasync * Denest packages for replicas * Add test for multiple replicas * Fix coordination test * Add HA to the helm chart * Rename function pointer * Add warnings for HA * Add the ability to block endpoints * Add flag to disable P2P connections * Wow, I made the tests pass * Add replicas endpoint * Ensure close kills replica * Update sql * Add database latency to high availability * Pipe TLS to DERP mesh * Fix DERP mesh with TLS * Add tests for TLS * Fix replica sync TLS * Fix RootCA for replica meshing * Remove ID from replicasync * Fix getting certificates for meshing * Remove excessive locking * Fix linting * Store mesh key in the database * Fix replica key for tests * Fix types gen * Fix unlocking unlocked * Fix race in tests * Update enterprise/derpmesh/derpmesh.go Co-authored-by: Colin Adler <colin1adler@gmail.com> * Rename to syncReplicas * Reuse http client * Delete old replicas on a CRON * Fix race condition in connection tests * Fix linting * Fix nil type * Move pubsub to in-memory for twenty test * Add comment for configuration tweaking * Fix leak with transport * Fix close leak in derpmesh * Fix race when creating server * Remove handler update * Skip test on Windows * Fix DERP mesh test * Wrap HTTP handler replacement in mutex * Fix error message for relay * Fix API handler for normal tests * Fix speedtest * Fix replica resend * Fix derpmesh send * Ping async * Increase wait time of template version jobd * Fix race when closing replica sync * Add name to client * Log the derpmap being used * Don't connect if DERP is empty * Improve agent coordinator logging * Fix lock in coordinator * Fix relay addr * Fix race when updating durations * Fix client publish race * Run pubsub loop in a queue * Store agent nodes in order * Fix coordinator locking * Check for closed pipe Co-authored-by: Colin Adler <colin1adler@gmail.com>
2022-10-17 13:43:30 +00:00
// NewCoordinator constructs a new in-memory connection coordinator. This
// coordinator is incompatible with multiple Coder replicas as all node data is
// in-memory.
func NewCoordinator(logger slog.Logger) Coordinator {
feat: Add high availability for multiple replicas (#4555) * feat: HA tailnet coordinator * fixup! feat: HA tailnet coordinator * fixup! feat: HA tailnet coordinator * remove printlns * close all connections on coordinator * impelement high availability feature * fixup! impelement high availability feature * fixup! impelement high availability feature * fixup! impelement high availability feature * fixup! impelement high availability feature * Add replicas * Add DERP meshing to arbitrary addresses * Move packages to highavailability folder * Move coordinator to high availability package * Add flags for HA * Rename to replicasync * Denest packages for replicas * Add test for multiple replicas * Fix coordination test * Add HA to the helm chart * Rename function pointer * Add warnings for HA * Add the ability to block endpoints * Add flag to disable P2P connections * Wow, I made the tests pass * Add replicas endpoint * Ensure close kills replica * Update sql * Add database latency to high availability * Pipe TLS to DERP mesh * Fix DERP mesh with TLS * Add tests for TLS * Fix replica sync TLS * Fix RootCA for replica meshing * Remove ID from replicasync * Fix getting certificates for meshing * Remove excessive locking * Fix linting * Store mesh key in the database * Fix replica key for tests * Fix types gen * Fix unlocking unlocked * Fix race in tests * Update enterprise/derpmesh/derpmesh.go Co-authored-by: Colin Adler <colin1adler@gmail.com> * Rename to syncReplicas * Reuse http client * Delete old replicas on a CRON * Fix race condition in connection tests * Fix linting * Fix nil type * Move pubsub to in-memory for twenty test * Add comment for configuration tweaking * Fix leak with transport * Fix close leak in derpmesh * Fix race when creating server * Remove handler update * Skip test on Windows * Fix DERP mesh test * Wrap HTTP handler replacement in mutex * Fix error message for relay * Fix API handler for normal tests * Fix speedtest * Fix replica resend * Fix derpmesh send * Ping async * Increase wait time of template version jobd * Fix race when closing replica sync * Add name to client * Log the derpmap being used * Don't connect if DERP is empty * Improve agent coordinator logging * Fix lock in coordinator * Fix relay addr * Fix race when updating durations * Fix client publish race * Run pubsub loop in a queue * Store agent nodes in order * Fix coordinator locking * Check for closed pipe Co-authored-by: Colin Adler <colin1adler@gmail.com>
2022-10-17 13:43:30 +00:00
return &coordinator{
core: newCore(logger.Named(LoggerName)),
closedChan: make(chan struct{}),
}
}
feat: Add high availability for multiple replicas (#4555) * feat: HA tailnet coordinator * fixup! feat: HA tailnet coordinator * fixup! feat: HA tailnet coordinator * remove printlns * close all connections on coordinator * impelement high availability feature * fixup! impelement high availability feature * fixup! impelement high availability feature * fixup! impelement high availability feature * fixup! impelement high availability feature * Add replicas * Add DERP meshing to arbitrary addresses * Move packages to highavailability folder * Move coordinator to high availability package * Add flags for HA * Rename to replicasync * Denest packages for replicas * Add test for multiple replicas * Fix coordination test * Add HA to the helm chart * Rename function pointer * Add warnings for HA * Add the ability to block endpoints * Add flag to disable P2P connections * Wow, I made the tests pass * Add replicas endpoint * Ensure close kills replica * Update sql * Add database latency to high availability * Pipe TLS to DERP mesh * Fix DERP mesh with TLS * Add tests for TLS * Fix replica sync TLS * Fix RootCA for replica meshing * Remove ID from replicasync * Fix getting certificates for meshing * Remove excessive locking * Fix linting * Store mesh key in the database * Fix replica key for tests * Fix types gen * Fix unlocking unlocked * Fix race in tests * Update enterprise/derpmesh/derpmesh.go Co-authored-by: Colin Adler <colin1adler@gmail.com> * Rename to syncReplicas * Reuse http client * Delete old replicas on a CRON * Fix race condition in connection tests * Fix linting * Fix nil type * Move pubsub to in-memory for twenty test * Add comment for configuration tweaking * Fix leak with transport * Fix close leak in derpmesh * Fix race when creating server * Remove handler update * Skip test on Windows * Fix DERP mesh test * Wrap HTTP handler replacement in mutex * Fix error message for relay * Fix API handler for normal tests * Fix speedtest * Fix replica resend * Fix derpmesh send * Ping async * Increase wait time of template version jobd * Fix race when closing replica sync * Add name to client * Log the derpmap being used * Don't connect if DERP is empty * Improve agent coordinator logging * Fix lock in coordinator * Fix relay addr * Fix race when updating durations * Fix client publish race * Run pubsub loop in a queue * Store agent nodes in order * Fix coordinator locking * Check for closed pipe Co-authored-by: Colin Adler <colin1adler@gmail.com>
2022-10-17 13:43:30 +00:00
// coordinator exchanges nodes with agents to establish connections entirely in-memory.
// The Enterprise implementation provides this for high-availability.
feat: Add Tailscale networking (#3505) * fix: Add coder user to docker group on installation This makes for a simpler setup, and reduces the likelihood a user runs into a strange issue. * Add wgnet * Add ping * Add listening * Finish refactor to make this work * Add interface for swapping * Fix conncache with interface * chore: update gvisor * fix tailscale types * linting * more linting * Add coordinator * Add coordinator tests * Fix coordination * It compiles! * Move all connection negotiation in-memory * Migrate coordinator to use net.conn * Add closed func * Fix close listener func * Make reconnecting PTY work * Fix reconnecting PTY * Update CI to Go 1.19 * Add CLI flags for DERP mapping * Fix Tailnet test * Rename ConnCoordinator to TailnetCoordinator * Remove print statement from workspace agent test * Refactor wsconncache to use tailnet * Remove STUN from unit tests * Add migrate back to dump * chore: Upgrade to Go 1.19 This is required as part of #3505. * Fix reconnecting PTY tests * fix: update wireguard-go to fix devtunnel * fix migration numbers * linting * Return early for status if endpoints are empty * Update cli/server.go Co-authored-by: Colin Adler <colin1adler@gmail.com> * Update cli/server.go Co-authored-by: Colin Adler <colin1adler@gmail.com> * Fix frontend entites * Fix agent bicopy * Fix race condition for the last node * Fix down migration * Fix connection RBAC * Fix migration numbers * Fix forwarding TCP to a local port * Implement ping for tailnet * Rename to ForceHTTP * Add external derpmapping * Expose DERP region names to the API * Add global option to enable Tailscale networking for web * Mark DERP flags hidden while testing * Update DERP map on reconnect * Add close func to workspace agents * Fix race condition in upstream dependency * Fix feature columns race condition Co-authored-by: Colin Adler <colin1adler@gmail.com>
2022-09-01 01:09:44 +00:00
// ┌──────────────────┐ ┌────────────────────┐ ┌───────────────────┐ ┌──────────────────┐
// │tailnet.Coordinate├──►│tailnet.AcceptClient│◄─►│tailnet.AcceptAgent│◄──┤tailnet.Coordinate│
// └──────────────────┘ └────────────────────┘ └───────────────────┘ └──────────────────┘
// This coordinator is incompatible with multiple Coder replicas as all node
// data is in-memory.
feat: Add high availability for multiple replicas (#4555) * feat: HA tailnet coordinator * fixup! feat: HA tailnet coordinator * fixup! feat: HA tailnet coordinator * remove printlns * close all connections on coordinator * impelement high availability feature * fixup! impelement high availability feature * fixup! impelement high availability feature * fixup! impelement high availability feature * fixup! impelement high availability feature * Add replicas * Add DERP meshing to arbitrary addresses * Move packages to highavailability folder * Move coordinator to high availability package * Add flags for HA * Rename to replicasync * Denest packages for replicas * Add test for multiple replicas * Fix coordination test * Add HA to the helm chart * Rename function pointer * Add warnings for HA * Add the ability to block endpoints * Add flag to disable P2P connections * Wow, I made the tests pass * Add replicas endpoint * Ensure close kills replica * Update sql * Add database latency to high availability * Pipe TLS to DERP mesh * Fix DERP mesh with TLS * Add tests for TLS * Fix replica sync TLS * Fix RootCA for replica meshing * Remove ID from replicasync * Fix getting certificates for meshing * Remove excessive locking * Fix linting * Store mesh key in the database * Fix replica key for tests * Fix types gen * Fix unlocking unlocked * Fix race in tests * Update enterprise/derpmesh/derpmesh.go Co-authored-by: Colin Adler <colin1adler@gmail.com> * Rename to syncReplicas * Reuse http client * Delete old replicas on a CRON * Fix race condition in connection tests * Fix linting * Fix nil type * Move pubsub to in-memory for twenty test * Add comment for configuration tweaking * Fix leak with transport * Fix close leak in derpmesh * Fix race when creating server * Remove handler update * Skip test on Windows * Fix DERP mesh test * Wrap HTTP handler replacement in mutex * Fix error message for relay * Fix API handler for normal tests * Fix speedtest * Fix replica resend * Fix derpmesh send * Ping async * Increase wait time of template version jobd * Fix race when closing replica sync * Add name to client * Log the derpmap being used * Don't connect if DERP is empty * Improve agent coordinator logging * Fix lock in coordinator * Fix relay addr * Fix race when updating durations * Fix client publish race * Run pubsub loop in a queue * Store agent nodes in order * Fix coordinator locking * Check for closed pipe Co-authored-by: Colin Adler <colin1adler@gmail.com>
2022-10-17 13:43:30 +00:00
type coordinator struct {
core *core
mu sync.Mutex
closed bool
wg sync.WaitGroup
closedChan chan struct{}
}
func (c *coordinator) Coordinate(
ctx context.Context, id uuid.UUID, name string, a CoordinateeAuth,
) (
chan<- *proto.CoordinateRequest, <-chan *proto.CoordinateResponse,
) {
logger := c.core.logger.With(
slog.F("peer_id", id),
slog.F("peer_name", name),
)
reqs := make(chan *proto.CoordinateRequest, RequestBufferSize)
resps := make(chan *proto.CoordinateResponse, ResponseBufferSize)
p := &peer{
logger: logger,
id: id,
name: name,
resps: resps,
reqs: reqs,
auth: a,
sent: make(map[uuid.UUID]*proto.Node),
}
err := c.core.initPeer(p)
if err != nil {
if xerrors.Is(err, ErrClosed) {
logger.Debug(ctx, "coordinate failed: Coordinator is closed")
} else {
logger.Critical(ctx, "coordinate failed: %s", err.Error())
}
}
c.mu.Lock()
defer c.mu.Unlock()
if c.closed {
// don't start the readLoop if we are closed.
return reqs, resps
}
c.wg.Add(1)
go func() {
defer c.wg.Done()
p.reqLoop(ctx, logger, c.core.handleRequest)
err := c.core.lostPeer(p)
if xerrors.Is(err, ErrClosed) || xerrors.Is(err, ErrAlreadyRemoved) {
return
}
if err != nil {
logger.Error(context.Background(), "failed to process lost peer", slog.Error(err))
}
}()
return reqs, resps
}
func (c *coordinator) ServeMultiAgent(id uuid.UUID) MultiAgentConn {
return ServeMultiAgent(c, c.core.logger, id)
}
func ServeMultiAgent(c CoordinatorV2, logger slog.Logger, id uuid.UUID) MultiAgentConn {
logger = logger.With(slog.F("client_id", id)).Named("multiagent")
ctx, cancel := context.WithCancel(context.Background())
reqs, resps := c.Coordinate(ctx, id, id.String(), SingleTailnetCoordinateeAuth{})
m := (&MultiAgent{
ID: id,
OnSubscribe: func(enq Queue, agent uuid.UUID) error {
err := SendCtx(ctx, reqs, &proto.CoordinateRequest{AddTunnel: &proto.CoordinateRequest_Tunnel{Id: UUIDToByteSlice(agent)}})
return err
},
OnUnsubscribe: func(enq Queue, agent uuid.UUID) error {
err := SendCtx(ctx, reqs, &proto.CoordinateRequest{RemoveTunnel: &proto.CoordinateRequest_Tunnel{Id: UUIDToByteSlice(agent)}})
return err
},
OnNodeUpdate: func(id uuid.UUID, node *proto.Node) error {
return SendCtx(ctx, reqs, &proto.CoordinateRequest{UpdateSelf: &proto.CoordinateRequest_UpdateSelf{
Node: node,
}})
},
OnRemove: func(_ Queue) {
_ = SendCtx(ctx, reqs, &proto.CoordinateRequest{Disconnect: &proto.CoordinateRequest_Disconnect{}})
cancel()
},
}).Init()
go v1RespLoop(ctx, cancel, logger, m, resps)
return m
}
// core is an in-memory structure of Node and TrackedConn mappings. Its methods may be called from multiple goroutines;
// it is protected by a mutex to ensure data stay consistent.
type core struct {
logger slog.Logger
mutex sync.RWMutex
feat: Add high availability for multiple replicas (#4555) * feat: HA tailnet coordinator * fixup! feat: HA tailnet coordinator * fixup! feat: HA tailnet coordinator * remove printlns * close all connections on coordinator * impelement high availability feature * fixup! impelement high availability feature * fixup! impelement high availability feature * fixup! impelement high availability feature * fixup! impelement high availability feature * Add replicas * Add DERP meshing to arbitrary addresses * Move packages to highavailability folder * Move coordinator to high availability package * Add flags for HA * Rename to replicasync * Denest packages for replicas * Add test for multiple replicas * Fix coordination test * Add HA to the helm chart * Rename function pointer * Add warnings for HA * Add the ability to block endpoints * Add flag to disable P2P connections * Wow, I made the tests pass * Add replicas endpoint * Ensure close kills replica * Update sql * Add database latency to high availability * Pipe TLS to DERP mesh * Fix DERP mesh with TLS * Add tests for TLS * Fix replica sync TLS * Fix RootCA for replica meshing * Remove ID from replicasync * Fix getting certificates for meshing * Remove excessive locking * Fix linting * Store mesh key in the database * Fix replica key for tests * Fix types gen * Fix unlocking unlocked * Fix race in tests * Update enterprise/derpmesh/derpmesh.go Co-authored-by: Colin Adler <colin1adler@gmail.com> * Rename to syncReplicas * Reuse http client * Delete old replicas on a CRON * Fix race condition in connection tests * Fix linting * Fix nil type * Move pubsub to in-memory for twenty test * Add comment for configuration tweaking * Fix leak with transport * Fix close leak in derpmesh * Fix race when creating server * Remove handler update * Skip test on Windows * Fix DERP mesh test * Wrap HTTP handler replacement in mutex * Fix error message for relay * Fix API handler for normal tests * Fix speedtest * Fix replica resend * Fix derpmesh send * Ping async * Increase wait time of template version jobd * Fix race when closing replica sync * Add name to client * Log the derpmap being used * Don't connect if DERP is empty * Improve agent coordinator logging * Fix lock in coordinator * Fix relay addr * Fix race when updating durations * Fix client publish race * Run pubsub loop in a queue * Store agent nodes in order * Fix coordinator locking * Check for closed pipe Co-authored-by: Colin Adler <colin1adler@gmail.com>
2022-10-17 13:43:30 +00:00
closed bool
feat: Add Tailscale networking (#3505) * fix: Add coder user to docker group on installation This makes for a simpler setup, and reduces the likelihood a user runs into a strange issue. * Add wgnet * Add ping * Add listening * Finish refactor to make this work * Add interface for swapping * Fix conncache with interface * chore: update gvisor * fix tailscale types * linting * more linting * Add coordinator * Add coordinator tests * Fix coordination * It compiles! * Move all connection negotiation in-memory * Migrate coordinator to use net.conn * Add closed func * Fix close listener func * Make reconnecting PTY work * Fix reconnecting PTY * Update CI to Go 1.19 * Add CLI flags for DERP mapping * Fix Tailnet test * Rename ConnCoordinator to TailnetCoordinator * Remove print statement from workspace agent test * Refactor wsconncache to use tailnet * Remove STUN from unit tests * Add migrate back to dump * chore: Upgrade to Go 1.19 This is required as part of #3505. * Fix reconnecting PTY tests * fix: update wireguard-go to fix devtunnel * fix migration numbers * linting * Return early for status if endpoints are empty * Update cli/server.go Co-authored-by: Colin Adler <colin1adler@gmail.com> * Update cli/server.go Co-authored-by: Colin Adler <colin1adler@gmail.com> * Fix frontend entites * Fix agent bicopy * Fix race condition for the last node * Fix down migration * Fix connection RBAC * Fix migration numbers * Fix forwarding TCP to a local port * Implement ping for tailnet * Rename to ForceHTTP * Add external derpmapping * Expose DERP region names to the API * Add global option to enable Tailscale networking for web * Mark DERP flags hidden while testing * Update DERP map on reconnect * Add close func to workspace agents * Fix race condition in upstream dependency * Fix feature columns race condition Co-authored-by: Colin Adler <colin1adler@gmail.com>
2022-09-01 01:09:44 +00:00
peers map[uuid.UUID]*peer
tunnels *tunnelStore
}
type QueueKind int
const (
QueueKindClient QueueKind = 1 + iota
QueueKindAgent
)
type Queue interface {
UniqueID() uuid.UUID
Kind() QueueKind
Enqueue(resp *proto.CoordinateResponse) error
Name() string
Stats() (start, lastWrite int64)
Overwrites() int64
// CoordinatorClose is used by the coordinator when closing a Queue. It
// should skip removing itself from the coordinator.
CoordinatorClose() error
Done() <-chan struct{}
Close() error
feat: Add Tailscale networking (#3505) * fix: Add coder user to docker group on installation This makes for a simpler setup, and reduces the likelihood a user runs into a strange issue. * Add wgnet * Add ping * Add listening * Finish refactor to make this work * Add interface for swapping * Fix conncache with interface * chore: update gvisor * fix tailscale types * linting * more linting * Add coordinator * Add coordinator tests * Fix coordination * It compiles! * Move all connection negotiation in-memory * Migrate coordinator to use net.conn * Add closed func * Fix close listener func * Make reconnecting PTY work * Fix reconnecting PTY * Update CI to Go 1.19 * Add CLI flags for DERP mapping * Fix Tailnet test * Rename ConnCoordinator to TailnetCoordinator * Remove print statement from workspace agent test * Refactor wsconncache to use tailnet * Remove STUN from unit tests * Add migrate back to dump * chore: Upgrade to Go 1.19 This is required as part of #3505. * Fix reconnecting PTY tests * fix: update wireguard-go to fix devtunnel * fix migration numbers * linting * Return early for status if endpoints are empty * Update cli/server.go Co-authored-by: Colin Adler <colin1adler@gmail.com> * Update cli/server.go Co-authored-by: Colin Adler <colin1adler@gmail.com> * Fix frontend entites * Fix agent bicopy * Fix race condition for the last node * Fix down migration * Fix connection RBAC * Fix migration numbers * Fix forwarding TCP to a local port * Implement ping for tailnet * Rename to ForceHTTP * Add external derpmapping * Expose DERP region names to the API * Add global option to enable Tailscale networking for web * Mark DERP flags hidden while testing * Update DERP map on reconnect * Add close func to workspace agents * Fix race condition in upstream dependency * Fix feature columns race condition Co-authored-by: Colin Adler <colin1adler@gmail.com>
2022-09-01 01:09:44 +00:00
}
func newCore(logger slog.Logger) *core {
return &core{
logger: logger,
closed: false,
peers: make(map[uuid.UUID]*peer),
tunnels: newTunnelStore(),
}
}
feat: Add Tailscale networking (#3505) * fix: Add coder user to docker group on installation This makes for a simpler setup, and reduces the likelihood a user runs into a strange issue. * Add wgnet * Add ping * Add listening * Finish refactor to make this work * Add interface for swapping * Fix conncache with interface * chore: update gvisor * fix tailscale types * linting * more linting * Add coordinator * Add coordinator tests * Fix coordination * It compiles! * Move all connection negotiation in-memory * Migrate coordinator to use net.conn * Add closed func * Fix close listener func * Make reconnecting PTY work * Fix reconnecting PTY * Update CI to Go 1.19 * Add CLI flags for DERP mapping * Fix Tailnet test * Rename ConnCoordinator to TailnetCoordinator * Remove print statement from workspace agent test * Refactor wsconncache to use tailnet * Remove STUN from unit tests * Add migrate back to dump * chore: Upgrade to Go 1.19 This is required as part of #3505. * Fix reconnecting PTY tests * fix: update wireguard-go to fix devtunnel * fix migration numbers * linting * Return early for status if endpoints are empty * Update cli/server.go Co-authored-by: Colin Adler <colin1adler@gmail.com> * Update cli/server.go Co-authored-by: Colin Adler <colin1adler@gmail.com> * Fix frontend entites * Fix agent bicopy * Fix race condition for the last node * Fix down migration * Fix connection RBAC * Fix migration numbers * Fix forwarding TCP to a local port * Implement ping for tailnet * Rename to ForceHTTP * Add external derpmapping * Expose DERP region names to the API * Add global option to enable Tailscale networking for web * Mark DERP flags hidden while testing * Update DERP map on reconnect * Add close func to workspace agents * Fix race condition in upstream dependency * Fix feature columns race condition Co-authored-by: Colin Adler <colin1adler@gmail.com>
2022-09-01 01:09:44 +00:00
// Node returns an in-memory node by ID.
feat: Add high availability for multiple replicas (#4555) * feat: HA tailnet coordinator * fixup! feat: HA tailnet coordinator * fixup! feat: HA tailnet coordinator * remove printlns * close all connections on coordinator * impelement high availability feature * fixup! impelement high availability feature * fixup! impelement high availability feature * fixup! impelement high availability feature * fixup! impelement high availability feature * Add replicas * Add DERP meshing to arbitrary addresses * Move packages to highavailability folder * Move coordinator to high availability package * Add flags for HA * Rename to replicasync * Denest packages for replicas * Add test for multiple replicas * Fix coordination test * Add HA to the helm chart * Rename function pointer * Add warnings for HA * Add the ability to block endpoints * Add flag to disable P2P connections * Wow, I made the tests pass * Add replicas endpoint * Ensure close kills replica * Update sql * Add database latency to high availability * Pipe TLS to DERP mesh * Fix DERP mesh with TLS * Add tests for TLS * Fix replica sync TLS * Fix RootCA for replica meshing * Remove ID from replicasync * Fix getting certificates for meshing * Remove excessive locking * Fix linting * Store mesh key in the database * Fix replica key for tests * Fix types gen * Fix unlocking unlocked * Fix race in tests * Update enterprise/derpmesh/derpmesh.go Co-authored-by: Colin Adler <colin1adler@gmail.com> * Rename to syncReplicas * Reuse http client * Delete old replicas on a CRON * Fix race condition in connection tests * Fix linting * Fix nil type * Move pubsub to in-memory for twenty test * Add comment for configuration tweaking * Fix leak with transport * Fix close leak in derpmesh * Fix race when creating server * Remove handler update * Skip test on Windows * Fix DERP mesh test * Wrap HTTP handler replacement in mutex * Fix error message for relay * Fix API handler for normal tests * Fix speedtest * Fix replica resend * Fix derpmesh send * Ping async * Increase wait time of template version jobd * Fix race when closing replica sync * Add name to client * Log the derpmap being used * Don't connect if DERP is empty * Improve agent coordinator logging * Fix lock in coordinator * Fix relay addr * Fix race when updating durations * Fix client publish race * Run pubsub loop in a queue * Store agent nodes in order * Fix coordinator locking * Check for closed pipe Co-authored-by: Colin Adler <colin1adler@gmail.com>
2022-10-17 13:43:30 +00:00
// If the node does not exist, nil is returned.
func (c *coordinator) Node(id uuid.UUID) *Node {
return c.core.node(id)
}
func (c *core) node(id uuid.UUID) *Node {
feat: Add Tailscale networking (#3505) * fix: Add coder user to docker group on installation This makes for a simpler setup, and reduces the likelihood a user runs into a strange issue. * Add wgnet * Add ping * Add listening * Finish refactor to make this work * Add interface for swapping * Fix conncache with interface * chore: update gvisor * fix tailscale types * linting * more linting * Add coordinator * Add coordinator tests * Fix coordination * It compiles! * Move all connection negotiation in-memory * Migrate coordinator to use net.conn * Add closed func * Fix close listener func * Make reconnecting PTY work * Fix reconnecting PTY * Update CI to Go 1.19 * Add CLI flags for DERP mapping * Fix Tailnet test * Rename ConnCoordinator to TailnetCoordinator * Remove print statement from workspace agent test * Refactor wsconncache to use tailnet * Remove STUN from unit tests * Add migrate back to dump * chore: Upgrade to Go 1.19 This is required as part of #3505. * Fix reconnecting PTY tests * fix: update wireguard-go to fix devtunnel * fix migration numbers * linting * Return early for status if endpoints are empty * Update cli/server.go Co-authored-by: Colin Adler <colin1adler@gmail.com> * Update cli/server.go Co-authored-by: Colin Adler <colin1adler@gmail.com> * Fix frontend entites * Fix agent bicopy * Fix race condition for the last node * Fix down migration * Fix connection RBAC * Fix migration numbers * Fix forwarding TCP to a local port * Implement ping for tailnet * Rename to ForceHTTP * Add external derpmapping * Expose DERP region names to the API * Add global option to enable Tailscale networking for web * Mark DERP flags hidden while testing * Update DERP map on reconnect * Add close func to workspace agents * Fix race condition in upstream dependency * Fix feature columns race condition Co-authored-by: Colin Adler <colin1adler@gmail.com>
2022-09-01 01:09:44 +00:00
c.mutex.Lock()
defer c.mutex.Unlock()
p := c.peers[id]
if p == nil || p.node == nil {
return nil
}
v1Node, err := ProtoToNode(p.node)
if err != nil {
c.logger.Critical(context.Background(),
"failed to convert node", slog.Error(err), slog.F("node", p.node))
return nil
}
return v1Node
}
feat: Add high availability for multiple replicas (#4555) * feat: HA tailnet coordinator * fixup! feat: HA tailnet coordinator * fixup! feat: HA tailnet coordinator * remove printlns * close all connections on coordinator * impelement high availability feature * fixup! impelement high availability feature * fixup! impelement high availability feature * fixup! impelement high availability feature * fixup! impelement high availability feature * Add replicas * Add DERP meshing to arbitrary addresses * Move packages to highavailability folder * Move coordinator to high availability package * Add flags for HA * Rename to replicasync * Denest packages for replicas * Add test for multiple replicas * Fix coordination test * Add HA to the helm chart * Rename function pointer * Add warnings for HA * Add the ability to block endpoints * Add flag to disable P2P connections * Wow, I made the tests pass * Add replicas endpoint * Ensure close kills replica * Update sql * Add database latency to high availability * Pipe TLS to DERP mesh * Fix DERP mesh with TLS * Add tests for TLS * Fix replica sync TLS * Fix RootCA for replica meshing * Remove ID from replicasync * Fix getting certificates for meshing * Remove excessive locking * Fix linting * Store mesh key in the database * Fix replica key for tests * Fix types gen * Fix unlocking unlocked * Fix race in tests * Update enterprise/derpmesh/derpmesh.go Co-authored-by: Colin Adler <colin1adler@gmail.com> * Rename to syncReplicas * Reuse http client * Delete old replicas on a CRON * Fix race condition in connection tests * Fix linting * Fix nil type * Move pubsub to in-memory for twenty test * Add comment for configuration tweaking * Fix leak with transport * Fix close leak in derpmesh * Fix race when creating server * Remove handler update * Skip test on Windows * Fix DERP mesh test * Wrap HTTP handler replacement in mutex * Fix error message for relay * Fix API handler for normal tests * Fix speedtest * Fix replica resend * Fix derpmesh send * Ping async * Increase wait time of template version jobd * Fix race when closing replica sync * Add name to client * Log the derpmap being used * Don't connect if DERP is empty * Improve agent coordinator logging * Fix lock in coordinator * Fix relay addr * Fix race when updating durations * Fix client publish race * Run pubsub loop in a queue * Store agent nodes in order * Fix coordinator locking * Check for closed pipe Co-authored-by: Colin Adler <colin1adler@gmail.com>
2022-10-17 13:43:30 +00:00
// ServeClient accepts a WebSocket connection that wants to connect to an agent
// with the specified ID.
func (c *coordinator) ServeClient(conn net.Conn, id, agentID uuid.UUID) error {
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
return ServeClientV1(ctx, c.core.logger, c, conn, id, agentID)
}
// ServeClientV1 adapts a v1 Client to a v2 Coordinator
func ServeClientV1(ctx context.Context, logger slog.Logger, c CoordinatorV2, conn net.Conn, id uuid.UUID, agent uuid.UUID) error {
logger = logger.With(slog.F("client_id", id), slog.F("agent_id", agent))
defer func() {
err := conn.Close()
if err != nil {
logger.Debug(ctx, "closing client connection", slog.Error(err))
}
}()
ctx, cancel := context.WithCancel(ctx)
defer cancel()
reqs, resps := c.Coordinate(ctx, id, id.String(), ClientCoordinateeAuth{AgentID: agent})
err := SendCtx(ctx, reqs, &proto.CoordinateRequest{
AddTunnel: &proto.CoordinateRequest_Tunnel{Id: UUIDToByteSlice(agent)},
})
if err != nil {
// can only be a context error, no need to log here.
return err
}
tc := NewTrackedConn(ctx, cancel, conn, id, logger, id.String(), 0, QueueKindClient)
go tc.SendUpdates()
go v1RespLoop(ctx, cancel, logger, tc, resps)
go v1ReqLoop(ctx, cancel, logger, conn, reqs)
<-ctx.Done()
return nil
}
func (c *core) handleRequest(p *peer, req *proto.CoordinateRequest) error {
c.mutex.Lock()
defer c.mutex.Unlock()
if c.closed {
return ErrClosed
}
pr, ok := c.peers[p.id]
if !ok || pr != p {
return ErrAlreadyRemoved
}
if err := pr.auth.Authorize(req); err != nil {
return xerrors.Errorf("authorize request: %w", err)
}
if req.UpdateSelf != nil {
err := c.nodeUpdateLocked(p, req.UpdateSelf.Node)
if xerrors.Is(err, ErrAlreadyRemoved) || xerrors.Is(err, ErrClosed) {
return nil
}
if err != nil {
return xerrors.Errorf("node update failed: %w", err)
}
}
if req.AddTunnel != nil {
dstID, err := uuid.FromBytes(req.AddTunnel.Id)
if err != nil {
// this shouldn't happen unless there is a client error. Close the connection so the client
// doesn't just happily continue thinking everything is fine.
return xerrors.Errorf("unable to convert bytes to UUID: %w", err)
}
err = c.addTunnelLocked(p, dstID)
if xerrors.Is(err, ErrAlreadyRemoved) || xerrors.Is(err, ErrClosed) {
return nil
}
if err != nil {
return xerrors.Errorf("add tunnel failed: %w", err)
}
feat: Add Tailscale networking (#3505) * fix: Add coder user to docker group on installation This makes for a simpler setup, and reduces the likelihood a user runs into a strange issue. * Add wgnet * Add ping * Add listening * Finish refactor to make this work * Add interface for swapping * Fix conncache with interface * chore: update gvisor * fix tailscale types * linting * more linting * Add coordinator * Add coordinator tests * Fix coordination * It compiles! * Move all connection negotiation in-memory * Migrate coordinator to use net.conn * Add closed func * Fix close listener func * Make reconnecting PTY work * Fix reconnecting PTY * Update CI to Go 1.19 * Add CLI flags for DERP mapping * Fix Tailnet test * Rename ConnCoordinator to TailnetCoordinator * Remove print statement from workspace agent test * Refactor wsconncache to use tailnet * Remove STUN from unit tests * Add migrate back to dump * chore: Upgrade to Go 1.19 This is required as part of #3505. * Fix reconnecting PTY tests * fix: update wireguard-go to fix devtunnel * fix migration numbers * linting * Return early for status if endpoints are empty * Update cli/server.go Co-authored-by: Colin Adler <colin1adler@gmail.com> * Update cli/server.go Co-authored-by: Colin Adler <colin1adler@gmail.com> * Fix frontend entites * Fix agent bicopy * Fix race condition for the last node * Fix down migration * Fix connection RBAC * Fix migration numbers * Fix forwarding TCP to a local port * Implement ping for tailnet * Rename to ForceHTTP * Add external derpmapping * Expose DERP region names to the API * Add global option to enable Tailscale networking for web * Mark DERP flags hidden while testing * Update DERP map on reconnect * Add close func to workspace agents * Fix race condition in upstream dependency * Fix feature columns race condition Co-authored-by: Colin Adler <colin1adler@gmail.com>
2022-09-01 01:09:44 +00:00
}
if req.RemoveTunnel != nil {
dstID, err := uuid.FromBytes(req.RemoveTunnel.Id)
if err != nil {
// this shouldn't happen unless there is a client error. Close the connection so the client
// doesn't just happily continue thinking everything is fine.
return xerrors.Errorf("unable to convert bytes to UUID: %w", err)
}
err = c.removeTunnelLocked(p, dstID)
if xerrors.Is(err, ErrAlreadyRemoved) || xerrors.Is(err, ErrClosed) {
return nil
}
if err != nil {
return xerrors.Errorf("remove tunnel failed: %w", err)
}
feat: Add high availability for multiple replicas (#4555) * feat: HA tailnet coordinator * fixup! feat: HA tailnet coordinator * fixup! feat: HA tailnet coordinator * remove printlns * close all connections on coordinator * impelement high availability feature * fixup! impelement high availability feature * fixup! impelement high availability feature * fixup! impelement high availability feature * fixup! impelement high availability feature * Add replicas * Add DERP meshing to arbitrary addresses * Move packages to highavailability folder * Move coordinator to high availability package * Add flags for HA * Rename to replicasync * Denest packages for replicas * Add test for multiple replicas * Fix coordination test * Add HA to the helm chart * Rename function pointer * Add warnings for HA * Add the ability to block endpoints * Add flag to disable P2P connections * Wow, I made the tests pass * Add replicas endpoint * Ensure close kills replica * Update sql * Add database latency to high availability * Pipe TLS to DERP mesh * Fix DERP mesh with TLS * Add tests for TLS * Fix replica sync TLS * Fix RootCA for replica meshing * Remove ID from replicasync * Fix getting certificates for meshing * Remove excessive locking * Fix linting * Store mesh key in the database * Fix replica key for tests * Fix types gen * Fix unlocking unlocked * Fix race in tests * Update enterprise/derpmesh/derpmesh.go Co-authored-by: Colin Adler <colin1adler@gmail.com> * Rename to syncReplicas * Reuse http client * Delete old replicas on a CRON * Fix race condition in connection tests * Fix linting * Fix nil type * Move pubsub to in-memory for twenty test * Add comment for configuration tweaking * Fix leak with transport * Fix close leak in derpmesh * Fix race when creating server * Remove handler update * Skip test on Windows * Fix DERP mesh test * Wrap HTTP handler replacement in mutex * Fix error message for relay * Fix API handler for normal tests * Fix speedtest * Fix replica resend * Fix derpmesh send * Ping async * Increase wait time of template version jobd * Fix race when closing replica sync * Add name to client * Log the derpmap being used * Don't connect if DERP is empty * Improve agent coordinator logging * Fix lock in coordinator * Fix relay addr * Fix race when updating durations * Fix client publish race * Run pubsub loop in a queue * Store agent nodes in order * Fix coordinator locking * Check for closed pipe Co-authored-by: Colin Adler <colin1adler@gmail.com>
2022-10-17 13:43:30 +00:00
}
if req.Disconnect != nil {
c.removePeerLocked(p.id, proto.CoordinateResponse_PeerUpdate_DISCONNECTED, "graceful disconnect")
feat: Add high availability for multiple replicas (#4555) * feat: HA tailnet coordinator * fixup! feat: HA tailnet coordinator * fixup! feat: HA tailnet coordinator * remove printlns * close all connections on coordinator * impelement high availability feature * fixup! impelement high availability feature * fixup! impelement high availability feature * fixup! impelement high availability feature * fixup! impelement high availability feature * Add replicas * Add DERP meshing to arbitrary addresses * Move packages to highavailability folder * Move coordinator to high availability package * Add flags for HA * Rename to replicasync * Denest packages for replicas * Add test for multiple replicas * Fix coordination test * Add HA to the helm chart * Rename function pointer * Add warnings for HA * Add the ability to block endpoints * Add flag to disable P2P connections * Wow, I made the tests pass * Add replicas endpoint * Ensure close kills replica * Update sql * Add database latency to high availability * Pipe TLS to DERP mesh * Fix DERP mesh with TLS * Add tests for TLS * Fix replica sync TLS * Fix RootCA for replica meshing * Remove ID from replicasync * Fix getting certificates for meshing * Remove excessive locking * Fix linting * Store mesh key in the database * Fix replica key for tests * Fix types gen * Fix unlocking unlocked * Fix race in tests * Update enterprise/derpmesh/derpmesh.go Co-authored-by: Colin Adler <colin1adler@gmail.com> * Rename to syncReplicas * Reuse http client * Delete old replicas on a CRON * Fix race condition in connection tests * Fix linting * Fix nil type * Move pubsub to in-memory for twenty test * Add comment for configuration tweaking * Fix leak with transport * Fix close leak in derpmesh * Fix race when creating server * Remove handler update * Skip test on Windows * Fix DERP mesh test * Wrap HTTP handler replacement in mutex * Fix error message for relay * Fix API handler for normal tests * Fix speedtest * Fix replica resend * Fix derpmesh send * Ping async * Increase wait time of template version jobd * Fix race when closing replica sync * Add name to client * Log the derpmap being used * Don't connect if DERP is empty * Improve agent coordinator logging * Fix lock in coordinator * Fix relay addr * Fix race when updating durations * Fix client publish race * Run pubsub loop in a queue * Store agent nodes in order * Fix coordinator locking * Check for closed pipe Co-authored-by: Colin Adler <colin1adler@gmail.com>
2022-10-17 13:43:30 +00:00
}
if rfhs := req.ReadyForHandshake; rfhs != nil {
err := c.handleReadyForHandshakeLocked(pr, rfhs)
if err != nil {
return xerrors.Errorf("handle ack: %w", err)
}
}
return nil
}
func (c *core) handleReadyForHandshakeLocked(src *peer, rfhs []*proto.CoordinateRequest_ReadyForHandshake) error {
for _, rfh := range rfhs {
dstID, err := uuid.FromBytes(rfh.Id)
if err != nil {
// this shouldn't happen unless there is a client error. Close the connection so the client
// doesn't just happily continue thinking everything is fine.
return xerrors.Errorf("unable to convert bytes to UUID: %w", err)
}
if !c.tunnels.tunnelExists(src.id, dstID) {
// We intentionally do not return an error here, since it's
// inherently racy. It's possible for a source to connect, then
// subsequently disconnect before the agent has sent back the RFH.
// Since this could potentially happen to a non-malicious agent, we
// don't want to kill its connection.
select {
case src.resps <- &proto.CoordinateResponse{
Error: fmt.Sprintf("you do not share a tunnel with %q", dstID.String()),
}:
default:
return ErrWouldBlock
}
continue
}
dst, ok := c.peers[dstID]
if ok {
select {
case dst.resps <- &proto.CoordinateResponse{
PeerUpdates: []*proto.CoordinateResponse_PeerUpdate{{
Id: src.id[:],
Kind: proto.CoordinateResponse_PeerUpdate_READY_FOR_HANDSHAKE,
}},
}:
default:
return ErrWouldBlock
}
}
}
return nil
}
feat: Add high availability for multiple replicas (#4555) * feat: HA tailnet coordinator * fixup! feat: HA tailnet coordinator * fixup! feat: HA tailnet coordinator * remove printlns * close all connections on coordinator * impelement high availability feature * fixup! impelement high availability feature * fixup! impelement high availability feature * fixup! impelement high availability feature * fixup! impelement high availability feature * Add replicas * Add DERP meshing to arbitrary addresses * Move packages to highavailability folder * Move coordinator to high availability package * Add flags for HA * Rename to replicasync * Denest packages for replicas * Add test for multiple replicas * Fix coordination test * Add HA to the helm chart * Rename function pointer * Add warnings for HA * Add the ability to block endpoints * Add flag to disable P2P connections * Wow, I made the tests pass * Add replicas endpoint * Ensure close kills replica * Update sql * Add database latency to high availability * Pipe TLS to DERP mesh * Fix DERP mesh with TLS * Add tests for TLS * Fix replica sync TLS * Fix RootCA for replica meshing * Remove ID from replicasync * Fix getting certificates for meshing * Remove excessive locking * Fix linting * Store mesh key in the database * Fix replica key for tests * Fix types gen * Fix unlocking unlocked * Fix race in tests * Update enterprise/derpmesh/derpmesh.go Co-authored-by: Colin Adler <colin1adler@gmail.com> * Rename to syncReplicas * Reuse http client * Delete old replicas on a CRON * Fix race condition in connection tests * Fix linting * Fix nil type * Move pubsub to in-memory for twenty test * Add comment for configuration tweaking * Fix leak with transport * Fix close leak in derpmesh * Fix race when creating server * Remove handler update * Skip test on Windows * Fix DERP mesh test * Wrap HTTP handler replacement in mutex * Fix error message for relay * Fix API handler for normal tests * Fix speedtest * Fix replica resend * Fix derpmesh send * Ping async * Increase wait time of template version jobd * Fix race when closing replica sync * Add name to client * Log the derpmap being used * Don't connect if DERP is empty * Improve agent coordinator logging * Fix lock in coordinator * Fix relay addr * Fix race when updating durations * Fix client publish race * Run pubsub loop in a queue * Store agent nodes in order * Fix coordinator locking * Check for closed pipe Co-authored-by: Colin Adler <colin1adler@gmail.com>
2022-10-17 13:43:30 +00:00
func (c *core) nodeUpdateLocked(p *peer, node *proto.Node) error {
c.logger.Debug(context.Background(), "processing node update",
slog.F("peer_id", p.id),
slog.F("node", node.String()))
p.node = node
c.updateTunnelPeersLocked(p.id, node, proto.CoordinateResponse_PeerUpdate_NODE, "node update")
return nil
}
func (c *core) updateTunnelPeersLocked(id uuid.UUID, n *proto.Node, k proto.CoordinateResponse_PeerUpdate_Kind, reason string) {
tp := c.tunnels.findTunnelPeers(id)
c.logger.Debug(context.Background(), "got tunnel peers", slog.F("peer_id", id), slog.F("tunnel_peers", tp))
for _, otherID := range tp {
other, ok := c.peers[otherID]
if !ok {
continue
}
err := other.updateMappingLocked(id, n, k, reason)
if err != nil {
other.logger.Error(context.Background(), "failed to update mapping", slog.Error(err))
c.removePeerLocked(other.id, proto.CoordinateResponse_PeerUpdate_DISCONNECTED, "failed update")
}
feat: Add high availability for multiple replicas (#4555) * feat: HA tailnet coordinator * fixup! feat: HA tailnet coordinator * fixup! feat: HA tailnet coordinator * remove printlns * close all connections on coordinator * impelement high availability feature * fixup! impelement high availability feature * fixup! impelement high availability feature * fixup! impelement high availability feature * fixup! impelement high availability feature * Add replicas * Add DERP meshing to arbitrary addresses * Move packages to highavailability folder * Move coordinator to high availability package * Add flags for HA * Rename to replicasync * Denest packages for replicas * Add test for multiple replicas * Fix coordination test * Add HA to the helm chart * Rename function pointer * Add warnings for HA * Add the ability to block endpoints * Add flag to disable P2P connections * Wow, I made the tests pass * Add replicas endpoint * Ensure close kills replica * Update sql * Add database latency to high availability * Pipe TLS to DERP mesh * Fix DERP mesh with TLS * Add tests for TLS * Fix replica sync TLS * Fix RootCA for replica meshing * Remove ID from replicasync * Fix getting certificates for meshing * Remove excessive locking * Fix linting * Store mesh key in the database * Fix replica key for tests * Fix types gen * Fix unlocking unlocked * Fix race in tests * Update enterprise/derpmesh/derpmesh.go Co-authored-by: Colin Adler <colin1adler@gmail.com> * Rename to syncReplicas * Reuse http client * Delete old replicas on a CRON * Fix race condition in connection tests * Fix linting * Fix nil type * Move pubsub to in-memory for twenty test * Add comment for configuration tweaking * Fix leak with transport * Fix close leak in derpmesh * Fix race when creating server * Remove handler update * Skip test on Windows * Fix DERP mesh test * Wrap HTTP handler replacement in mutex * Fix error message for relay * Fix API handler for normal tests * Fix speedtest * Fix replica resend * Fix derpmesh send * Ping async * Increase wait time of template version jobd * Fix race when closing replica sync * Add name to client * Log the derpmap being used * Don't connect if DERP is empty * Improve agent coordinator logging * Fix lock in coordinator * Fix relay addr * Fix race when updating durations * Fix client publish race * Run pubsub loop in a queue * Store agent nodes in order * Fix coordinator locking * Check for closed pipe Co-authored-by: Colin Adler <colin1adler@gmail.com>
2022-10-17 13:43:30 +00:00
}
}
func (c *core) addTunnelLocked(src *peer, dstID uuid.UUID) error {
c.tunnels.add(src.id, dstID)
c.logger.Debug(context.Background(), "adding tunnel",
slog.F("src_id", src.id),
slog.F("dst_id", dstID))
dst, ok := c.peers[dstID]
if ok {
if dst.node != nil {
err := src.updateMappingLocked(dstID, dst.node, proto.CoordinateResponse_PeerUpdate_NODE, "add tunnel")
if err != nil {
src.logger.Error(context.Background(), "failed update of tunnel src", slog.Error(err))
c.removePeerLocked(src.id, proto.CoordinateResponse_PeerUpdate_DISCONNECTED, "failed update")
// if the source fails, then the tunnel is also removed and there is no reason to continue
// processing.
return err
}
}
if src.node != nil {
err := dst.updateMappingLocked(src.id, src.node, proto.CoordinateResponse_PeerUpdate_NODE, "add tunnel")
if err != nil {
dst.logger.Error(context.Background(), "failed update of tunnel dst", slog.Error(err))
c.removePeerLocked(dst.id, proto.CoordinateResponse_PeerUpdate_DISCONNECTED, "failed update")
}
}
}
return nil
}
func (c *core) removeTunnelLocked(src *peer, dstID uuid.UUID) error {
err := src.updateMappingLocked(dstID, nil, proto.CoordinateResponse_PeerUpdate_DISCONNECTED, "remove tunnel")
feat: Add high availability for multiple replicas (#4555) * feat: HA tailnet coordinator * fixup! feat: HA tailnet coordinator * fixup! feat: HA tailnet coordinator * remove printlns * close all connections on coordinator * impelement high availability feature * fixup! impelement high availability feature * fixup! impelement high availability feature * fixup! impelement high availability feature * fixup! impelement high availability feature * Add replicas * Add DERP meshing to arbitrary addresses * Move packages to highavailability folder * Move coordinator to high availability package * Add flags for HA * Rename to replicasync * Denest packages for replicas * Add test for multiple replicas * Fix coordination test * Add HA to the helm chart * Rename function pointer * Add warnings for HA * Add the ability to block endpoints * Add flag to disable P2P connections * Wow, I made the tests pass * Add replicas endpoint * Ensure close kills replica * Update sql * Add database latency to high availability * Pipe TLS to DERP mesh * Fix DERP mesh with TLS * Add tests for TLS * Fix replica sync TLS * Fix RootCA for replica meshing * Remove ID from replicasync * Fix getting certificates for meshing * Remove excessive locking * Fix linting * Store mesh key in the database * Fix replica key for tests * Fix types gen * Fix unlocking unlocked * Fix race in tests * Update enterprise/derpmesh/derpmesh.go Co-authored-by: Colin Adler <colin1adler@gmail.com> * Rename to syncReplicas * Reuse http client * Delete old replicas on a CRON * Fix race condition in connection tests * Fix linting * Fix nil type * Move pubsub to in-memory for twenty test * Add comment for configuration tweaking * Fix leak with transport * Fix close leak in derpmesh * Fix race when creating server * Remove handler update * Skip test on Windows * Fix DERP mesh test * Wrap HTTP handler replacement in mutex * Fix error message for relay * Fix API handler for normal tests * Fix speedtest * Fix replica resend * Fix derpmesh send * Ping async * Increase wait time of template version jobd * Fix race when closing replica sync * Add name to client * Log the derpmap being used * Don't connect if DERP is empty * Improve agent coordinator logging * Fix lock in coordinator * Fix relay addr * Fix race when updating durations * Fix client publish race * Run pubsub loop in a queue * Store agent nodes in order * Fix coordinator locking * Check for closed pipe Co-authored-by: Colin Adler <colin1adler@gmail.com>
2022-10-17 13:43:30 +00:00
if err != nil {
src.logger.Error(context.Background(), "failed to update", slog.Error(err))
c.removePeerLocked(src.id, proto.CoordinateResponse_PeerUpdate_DISCONNECTED, "failed update")
// removing the peer also removes all other tunnels and notifies destinations, so it's safe to
// return here.
return err
}
dst, ok := c.peers[dstID]
if ok {
err = dst.updateMappingLocked(src.id, nil, proto.CoordinateResponse_PeerUpdate_DISCONNECTED, "remove tunnel")
if err != nil {
dst.logger.Error(context.Background(), "failed to update", slog.Error(err))
c.removePeerLocked(dst.id, proto.CoordinateResponse_PeerUpdate_DISCONNECTED, "failed update")
// don't return here because we still want to remove the tunnel, and an error at the
// destination doesn't count as an error removing the tunnel at the source.
feat: Add Tailscale networking (#3505) * fix: Add coder user to docker group on installation This makes for a simpler setup, and reduces the likelihood a user runs into a strange issue. * Add wgnet * Add ping * Add listening * Finish refactor to make this work * Add interface for swapping * Fix conncache with interface * chore: update gvisor * fix tailscale types * linting * more linting * Add coordinator * Add coordinator tests * Fix coordination * It compiles! * Move all connection negotiation in-memory * Migrate coordinator to use net.conn * Add closed func * Fix close listener func * Make reconnecting PTY work * Fix reconnecting PTY * Update CI to Go 1.19 * Add CLI flags for DERP mapping * Fix Tailnet test * Rename ConnCoordinator to TailnetCoordinator * Remove print statement from workspace agent test * Refactor wsconncache to use tailnet * Remove STUN from unit tests * Add migrate back to dump * chore: Upgrade to Go 1.19 This is required as part of #3505. * Fix reconnecting PTY tests * fix: update wireguard-go to fix devtunnel * fix migration numbers * linting * Return early for status if endpoints are empty * Update cli/server.go Co-authored-by: Colin Adler <colin1adler@gmail.com> * Update cli/server.go Co-authored-by: Colin Adler <colin1adler@gmail.com> * Fix frontend entites * Fix agent bicopy * Fix race condition for the last node * Fix down migration * Fix connection RBAC * Fix migration numbers * Fix forwarding TCP to a local port * Implement ping for tailnet * Rename to ForceHTTP * Add external derpmapping * Expose DERP region names to the API * Add global option to enable Tailscale networking for web * Mark DERP flags hidden while testing * Update DERP map on reconnect * Add close func to workspace agents * Fix race condition in upstream dependency * Fix feature columns race condition Co-authored-by: Colin Adler <colin1adler@gmail.com>
2022-09-01 01:09:44 +00:00
}
}
c.tunnels.remove(src.id, dstID)
return nil
}
feat: Add high availability for multiple replicas (#4555) * feat: HA tailnet coordinator * fixup! feat: HA tailnet coordinator * fixup! feat: HA tailnet coordinator * remove printlns * close all connections on coordinator * impelement high availability feature * fixup! impelement high availability feature * fixup! impelement high availability feature * fixup! impelement high availability feature * fixup! impelement high availability feature * Add replicas * Add DERP meshing to arbitrary addresses * Move packages to highavailability folder * Move coordinator to high availability package * Add flags for HA * Rename to replicasync * Denest packages for replicas * Add test for multiple replicas * Fix coordination test * Add HA to the helm chart * Rename function pointer * Add warnings for HA * Add the ability to block endpoints * Add flag to disable P2P connections * Wow, I made the tests pass * Add replicas endpoint * Ensure close kills replica * Update sql * Add database latency to high availability * Pipe TLS to DERP mesh * Fix DERP mesh with TLS * Add tests for TLS * Fix replica sync TLS * Fix RootCA for replica meshing * Remove ID from replicasync * Fix getting certificates for meshing * Remove excessive locking * Fix linting * Store mesh key in the database * Fix replica key for tests * Fix types gen * Fix unlocking unlocked * Fix race in tests * Update enterprise/derpmesh/derpmesh.go Co-authored-by: Colin Adler <colin1adler@gmail.com> * Rename to syncReplicas * Reuse http client * Delete old replicas on a CRON * Fix race condition in connection tests * Fix linting * Fix nil type * Move pubsub to in-memory for twenty test * Add comment for configuration tweaking * Fix leak with transport * Fix close leak in derpmesh * Fix race when creating server * Remove handler update * Skip test on Windows * Fix DERP mesh test * Wrap HTTP handler replacement in mutex * Fix error message for relay * Fix API handler for normal tests * Fix speedtest * Fix replica resend * Fix derpmesh send * Ping async * Increase wait time of template version jobd * Fix race when closing replica sync * Add name to client * Log the derpmap being used * Don't connect if DERP is empty * Improve agent coordinator logging * Fix lock in coordinator * Fix relay addr * Fix race when updating durations * Fix client publish race * Run pubsub loop in a queue * Store agent nodes in order * Fix coordinator locking * Check for closed pipe Co-authored-by: Colin Adler <colin1adler@gmail.com>
2022-10-17 13:43:30 +00:00
func (c *core) initPeer(p *peer) error {
c.mutex.Lock()
defer c.mutex.Unlock()
p.logger.Debug(context.Background(), "initPeer")
if c.closed {
return ErrClosed
}
if p.node != nil {
return xerrors.Errorf("peer (%s) must be initialized with nil node", p.id)
}
if old, ok := c.peers[p.id]; ok {
// rare and interesting enough to log at Info, but it isn't an error per se
old.logger.Info(context.Background(), "overwritten by new connection")
close(old.resps)
p.overwrites = old.overwrites + 1
}
now := time.Now()
p.start = now
p.lastWrite = now
c.peers[p.id] = p
tp := c.tunnels.findTunnelPeers(p.id)
p.logger.Debug(context.Background(), "initial tunnel peers", slog.F("tunnel_peers", tp))
var others []*peer
for _, otherID := range tp {
// ok to append nil here because the batch call below filters them out
others = append(others, c.peers[otherID])
}
return p.batchUpdateMappingLocked(others, proto.CoordinateResponse_PeerUpdate_NODE, "init")
feat: Add Tailscale networking (#3505) * fix: Add coder user to docker group on installation This makes for a simpler setup, and reduces the likelihood a user runs into a strange issue. * Add wgnet * Add ping * Add listening * Finish refactor to make this work * Add interface for swapping * Fix conncache with interface * chore: update gvisor * fix tailscale types * linting * more linting * Add coordinator * Add coordinator tests * Fix coordination * It compiles! * Move all connection negotiation in-memory * Migrate coordinator to use net.conn * Add closed func * Fix close listener func * Make reconnecting PTY work * Fix reconnecting PTY * Update CI to Go 1.19 * Add CLI flags for DERP mapping * Fix Tailnet test * Rename ConnCoordinator to TailnetCoordinator * Remove print statement from workspace agent test * Refactor wsconncache to use tailnet * Remove STUN from unit tests * Add migrate back to dump * chore: Upgrade to Go 1.19 This is required as part of #3505. * Fix reconnecting PTY tests * fix: update wireguard-go to fix devtunnel * fix migration numbers * linting * Return early for status if endpoints are empty * Update cli/server.go Co-authored-by: Colin Adler <colin1adler@gmail.com> * Update cli/server.go Co-authored-by: Colin Adler <colin1adler@gmail.com> * Fix frontend entites * Fix agent bicopy * Fix race condition for the last node * Fix down migration * Fix connection RBAC * Fix migration numbers * Fix forwarding TCP to a local port * Implement ping for tailnet * Rename to ForceHTTP * Add external derpmapping * Expose DERP region names to the API * Add global option to enable Tailscale networking for web * Mark DERP flags hidden while testing * Update DERP map on reconnect * Add close func to workspace agents * Fix race condition in upstream dependency * Fix feature columns race condition Co-authored-by: Colin Adler <colin1adler@gmail.com>
2022-09-01 01:09:44 +00:00
}
// removePeer removes and cleans up a lost peer. It updates all peers it shares a tunnel with, deletes
// all tunnels from which the removed peer is the source.
func (c *core) lostPeer(p *peer) error {
feat: Add Tailscale networking (#3505) * fix: Add coder user to docker group on installation This makes for a simpler setup, and reduces the likelihood a user runs into a strange issue. * Add wgnet * Add ping * Add listening * Finish refactor to make this work * Add interface for swapping * Fix conncache with interface * chore: update gvisor * fix tailscale types * linting * more linting * Add coordinator * Add coordinator tests * Fix coordination * It compiles! * Move all connection negotiation in-memory * Migrate coordinator to use net.conn * Add closed func * Fix close listener func * Make reconnecting PTY work * Fix reconnecting PTY * Update CI to Go 1.19 * Add CLI flags for DERP mapping * Fix Tailnet test * Rename ConnCoordinator to TailnetCoordinator * Remove print statement from workspace agent test * Refactor wsconncache to use tailnet * Remove STUN from unit tests * Add migrate back to dump * chore: Upgrade to Go 1.19 This is required as part of #3505. * Fix reconnecting PTY tests * fix: update wireguard-go to fix devtunnel * fix migration numbers * linting * Return early for status if endpoints are empty * Update cli/server.go Co-authored-by: Colin Adler <colin1adler@gmail.com> * Update cli/server.go Co-authored-by: Colin Adler <colin1adler@gmail.com> * Fix frontend entites * Fix agent bicopy * Fix race condition for the last node * Fix down migration * Fix connection RBAC * Fix migration numbers * Fix forwarding TCP to a local port * Implement ping for tailnet * Rename to ForceHTTP * Add external derpmapping * Expose DERP region names to the API * Add global option to enable Tailscale networking for web * Mark DERP flags hidden while testing * Update DERP map on reconnect * Add close func to workspace agents * Fix race condition in upstream dependency * Fix feature columns race condition Co-authored-by: Colin Adler <colin1adler@gmail.com>
2022-09-01 01:09:44 +00:00
c.mutex.Lock()
defer c.mutex.Unlock()
c.logger.Debug(context.Background(), "lostPeer", slog.F("peer_id", p.id))
feat: Add high availability for multiple replicas (#4555) * feat: HA tailnet coordinator * fixup! feat: HA tailnet coordinator * fixup! feat: HA tailnet coordinator * remove printlns * close all connections on coordinator * impelement high availability feature * fixup! impelement high availability feature * fixup! impelement high availability feature * fixup! impelement high availability feature * fixup! impelement high availability feature * Add replicas * Add DERP meshing to arbitrary addresses * Move packages to highavailability folder * Move coordinator to high availability package * Add flags for HA * Rename to replicasync * Denest packages for replicas * Add test for multiple replicas * Fix coordination test * Add HA to the helm chart * Rename function pointer * Add warnings for HA * Add the ability to block endpoints * Add flag to disable P2P connections * Wow, I made the tests pass * Add replicas endpoint * Ensure close kills replica * Update sql * Add database latency to high availability * Pipe TLS to DERP mesh * Fix DERP mesh with TLS * Add tests for TLS * Fix replica sync TLS * Fix RootCA for replica meshing * Remove ID from replicasync * Fix getting certificates for meshing * Remove excessive locking * Fix linting * Store mesh key in the database * Fix replica key for tests * Fix types gen * Fix unlocking unlocked * Fix race in tests * Update enterprise/derpmesh/derpmesh.go Co-authored-by: Colin Adler <colin1adler@gmail.com> * Rename to syncReplicas * Reuse http client * Delete old replicas on a CRON * Fix race condition in connection tests * Fix linting * Fix nil type * Move pubsub to in-memory for twenty test * Add comment for configuration tweaking * Fix leak with transport * Fix close leak in derpmesh * Fix race when creating server * Remove handler update * Skip test on Windows * Fix DERP mesh test * Wrap HTTP handler replacement in mutex * Fix error message for relay * Fix API handler for normal tests * Fix speedtest * Fix replica resend * Fix derpmesh send * Ping async * Increase wait time of template version jobd * Fix race when closing replica sync * Add name to client * Log the derpmap being used * Don't connect if DERP is empty * Improve agent coordinator logging * Fix lock in coordinator * Fix relay addr * Fix race when updating durations * Fix client publish race * Run pubsub loop in a queue * Store agent nodes in order * Fix coordinator locking * Check for closed pipe Co-authored-by: Colin Adler <colin1adler@gmail.com>
2022-10-17 13:43:30 +00:00
if c.closed {
return ErrClosed
feat: Add high availability for multiple replicas (#4555) * feat: HA tailnet coordinator * fixup! feat: HA tailnet coordinator * fixup! feat: HA tailnet coordinator * remove printlns * close all connections on coordinator * impelement high availability feature * fixup! impelement high availability feature * fixup! impelement high availability feature * fixup! impelement high availability feature * fixup! impelement high availability feature * Add replicas * Add DERP meshing to arbitrary addresses * Move packages to highavailability folder * Move coordinator to high availability package * Add flags for HA * Rename to replicasync * Denest packages for replicas * Add test for multiple replicas * Fix coordination test * Add HA to the helm chart * Rename function pointer * Add warnings for HA * Add the ability to block endpoints * Add flag to disable P2P connections * Wow, I made the tests pass * Add replicas endpoint * Ensure close kills replica * Update sql * Add database latency to high availability * Pipe TLS to DERP mesh * Fix DERP mesh with TLS * Add tests for TLS * Fix replica sync TLS * Fix RootCA for replica meshing * Remove ID from replicasync * Fix getting certificates for meshing * Remove excessive locking * Fix linting * Store mesh key in the database * Fix replica key for tests * Fix types gen * Fix unlocking unlocked * Fix race in tests * Update enterprise/derpmesh/derpmesh.go Co-authored-by: Colin Adler <colin1adler@gmail.com> * Rename to syncReplicas * Reuse http client * Delete old replicas on a CRON * Fix race condition in connection tests * Fix linting * Fix nil type * Move pubsub to in-memory for twenty test * Add comment for configuration tweaking * Fix leak with transport * Fix close leak in derpmesh * Fix race when creating server * Remove handler update * Skip test on Windows * Fix DERP mesh test * Wrap HTTP handler replacement in mutex * Fix error message for relay * Fix API handler for normal tests * Fix speedtest * Fix replica resend * Fix derpmesh send * Ping async * Increase wait time of template version jobd * Fix race when closing replica sync * Add name to client * Log the derpmap being used * Don't connect if DERP is empty * Improve agent coordinator logging * Fix lock in coordinator * Fix relay addr * Fix race when updating durations * Fix client publish race * Run pubsub loop in a queue * Store agent nodes in order * Fix coordinator locking * Check for closed pipe Co-authored-by: Colin Adler <colin1adler@gmail.com>
2022-10-17 13:43:30 +00:00
}
if existing, ok := c.peers[p.id]; !ok || existing != p {
return ErrAlreadyRemoved
feat: Add Tailscale networking (#3505) * fix: Add coder user to docker group on installation This makes for a simpler setup, and reduces the likelihood a user runs into a strange issue. * Add wgnet * Add ping * Add listening * Finish refactor to make this work * Add interface for swapping * Fix conncache with interface * chore: update gvisor * fix tailscale types * linting * more linting * Add coordinator * Add coordinator tests * Fix coordination * It compiles! * Move all connection negotiation in-memory * Migrate coordinator to use net.conn * Add closed func * Fix close listener func * Make reconnecting PTY work * Fix reconnecting PTY * Update CI to Go 1.19 * Add CLI flags for DERP mapping * Fix Tailnet test * Rename ConnCoordinator to TailnetCoordinator * Remove print statement from workspace agent test * Refactor wsconncache to use tailnet * Remove STUN from unit tests * Add migrate back to dump * chore: Upgrade to Go 1.19 This is required as part of #3505. * Fix reconnecting PTY tests * fix: update wireguard-go to fix devtunnel * fix migration numbers * linting * Return early for status if endpoints are empty * Update cli/server.go Co-authored-by: Colin Adler <colin1adler@gmail.com> * Update cli/server.go Co-authored-by: Colin Adler <colin1adler@gmail.com> * Fix frontend entites * Fix agent bicopy * Fix race condition for the last node * Fix down migration * Fix connection RBAC * Fix migration numbers * Fix forwarding TCP to a local port * Implement ping for tailnet * Rename to ForceHTTP * Add external derpmapping * Expose DERP region names to the API * Add global option to enable Tailscale networking for web * Mark DERP flags hidden while testing * Update DERP map on reconnect * Add close func to workspace agents * Fix race condition in upstream dependency * Fix feature columns race condition Co-authored-by: Colin Adler <colin1adler@gmail.com>
2022-09-01 01:09:44 +00:00
}
c.removePeerLocked(p.id, proto.CoordinateResponse_PeerUpdate_LOST, "lost")
return nil
}
feat: Add Tailscale networking (#3505) * fix: Add coder user to docker group on installation This makes for a simpler setup, and reduces the likelihood a user runs into a strange issue. * Add wgnet * Add ping * Add listening * Finish refactor to make this work * Add interface for swapping * Fix conncache with interface * chore: update gvisor * fix tailscale types * linting * more linting * Add coordinator * Add coordinator tests * Fix coordination * It compiles! * Move all connection negotiation in-memory * Migrate coordinator to use net.conn * Add closed func * Fix close listener func * Make reconnecting PTY work * Fix reconnecting PTY * Update CI to Go 1.19 * Add CLI flags for DERP mapping * Fix Tailnet test * Rename ConnCoordinator to TailnetCoordinator * Remove print statement from workspace agent test * Refactor wsconncache to use tailnet * Remove STUN from unit tests * Add migrate back to dump * chore: Upgrade to Go 1.19 This is required as part of #3505. * Fix reconnecting PTY tests * fix: update wireguard-go to fix devtunnel * fix migration numbers * linting * Return early for status if endpoints are empty * Update cli/server.go Co-authored-by: Colin Adler <colin1adler@gmail.com> * Update cli/server.go Co-authored-by: Colin Adler <colin1adler@gmail.com> * Fix frontend entites * Fix agent bicopy * Fix race condition for the last node * Fix down migration * Fix connection RBAC * Fix migration numbers * Fix forwarding TCP to a local port * Implement ping for tailnet * Rename to ForceHTTP * Add external derpmapping * Expose DERP region names to the API * Add global option to enable Tailscale networking for web * Mark DERP flags hidden while testing * Update DERP map on reconnect * Add close func to workspace agents * Fix race condition in upstream dependency * Fix feature columns race condition Co-authored-by: Colin Adler <colin1adler@gmail.com>
2022-09-01 01:09:44 +00:00
func (c *core) removePeerLocked(id uuid.UUID, kind proto.CoordinateResponse_PeerUpdate_Kind, reason string) {
p, ok := c.peers[id]
if !ok {
c.logger.Critical(context.Background(), "removed non-existent peer %s", id)
return
}
c.updateTunnelPeersLocked(id, nil, kind, reason)
c.tunnels.removeAll(id)
close(p.resps)
delete(c.peers, id)
}
// ServeAgent accepts a WebSocket connection to an agent that
// listens to incoming connections and publishes node updates.
func (c *coordinator) ServeAgent(conn net.Conn, id uuid.UUID, name string) error {
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
return ServeAgentV1(ctx, c.core.logger, c, conn, id, name)
feat: Add high availability for multiple replicas (#4555) * feat: HA tailnet coordinator * fixup! feat: HA tailnet coordinator * fixup! feat: HA tailnet coordinator * remove printlns * close all connections on coordinator * impelement high availability feature * fixup! impelement high availability feature * fixup! impelement high availability feature * fixup! impelement high availability feature * fixup! impelement high availability feature * Add replicas * Add DERP meshing to arbitrary addresses * Move packages to highavailability folder * Move coordinator to high availability package * Add flags for HA * Rename to replicasync * Denest packages for replicas * Add test for multiple replicas * Fix coordination test * Add HA to the helm chart * Rename function pointer * Add warnings for HA * Add the ability to block endpoints * Add flag to disable P2P connections * Wow, I made the tests pass * Add replicas endpoint * Ensure close kills replica * Update sql * Add database latency to high availability * Pipe TLS to DERP mesh * Fix DERP mesh with TLS * Add tests for TLS * Fix replica sync TLS * Fix RootCA for replica meshing * Remove ID from replicasync * Fix getting certificates for meshing * Remove excessive locking * Fix linting * Store mesh key in the database * Fix replica key for tests * Fix types gen * Fix unlocking unlocked * Fix race in tests * Update enterprise/derpmesh/derpmesh.go Co-authored-by: Colin Adler <colin1adler@gmail.com> * Rename to syncReplicas * Reuse http client * Delete old replicas on a CRON * Fix race condition in connection tests * Fix linting * Fix nil type * Move pubsub to in-memory for twenty test * Add comment for configuration tweaking * Fix leak with transport * Fix close leak in derpmesh * Fix race when creating server * Remove handler update * Skip test on Windows * Fix DERP mesh test * Wrap HTTP handler replacement in mutex * Fix error message for relay * Fix API handler for normal tests * Fix speedtest * Fix replica resend * Fix derpmesh send * Ping async * Increase wait time of template version jobd * Fix race when closing replica sync * Add name to client * Log the derpmap being used * Don't connect if DERP is empty * Improve agent coordinator logging * Fix lock in coordinator * Fix relay addr * Fix race when updating durations * Fix client publish race * Run pubsub loop in a queue * Store agent nodes in order * Fix coordinator locking * Check for closed pipe Co-authored-by: Colin Adler <colin1adler@gmail.com>
2022-10-17 13:43:30 +00:00
}
func ServeAgentV1(ctx context.Context, logger slog.Logger, c CoordinatorV2, conn net.Conn, id uuid.UUID, name string) error {
logger = logger.With(slog.F("agent_id", id), slog.F("name", name))
defer func() {
logger.Debug(ctx, "closing agent connection")
err := conn.Close()
logger.Debug(ctx, "closed agent connection", slog.Error(err))
}()
ctx, cancel := context.WithCancel(ctx)
defer cancel()
logger.Debug(ctx, "starting new agent connection")
reqs, resps := c.Coordinate(ctx, id, name, AgentCoordinateeAuth{ID: id})
tc := NewTrackedConn(ctx, cancel, conn, id, logger, name, 0, QueueKindAgent)
go tc.SendUpdates()
go v1RespLoop(ctx, cancel, logger, tc, resps)
go v1ReqLoop(ctx, cancel, logger, conn, reqs)
<-ctx.Done()
logger.Debug(ctx, "ending agent connection")
return nil
}
feat: Add high availability for multiple replicas (#4555) * feat: HA tailnet coordinator * fixup! feat: HA tailnet coordinator * fixup! feat: HA tailnet coordinator * remove printlns * close all connections on coordinator * impelement high availability feature * fixup! impelement high availability feature * fixup! impelement high availability feature * fixup! impelement high availability feature * fixup! impelement high availability feature * Add replicas * Add DERP meshing to arbitrary addresses * Move packages to highavailability folder * Move coordinator to high availability package * Add flags for HA * Rename to replicasync * Denest packages for replicas * Add test for multiple replicas * Fix coordination test * Add HA to the helm chart * Rename function pointer * Add warnings for HA * Add the ability to block endpoints * Add flag to disable P2P connections * Wow, I made the tests pass * Add replicas endpoint * Ensure close kills replica * Update sql * Add database latency to high availability * Pipe TLS to DERP mesh * Fix DERP mesh with TLS * Add tests for TLS * Fix replica sync TLS * Fix RootCA for replica meshing * Remove ID from replicasync * Fix getting certificates for meshing * Remove excessive locking * Fix linting * Store mesh key in the database * Fix replica key for tests * Fix types gen * Fix unlocking unlocked * Fix race in tests * Update enterprise/derpmesh/derpmesh.go Co-authored-by: Colin Adler <colin1adler@gmail.com> * Rename to syncReplicas * Reuse http client * Delete old replicas on a CRON * Fix race condition in connection tests * Fix linting * Fix nil type * Move pubsub to in-memory for twenty test * Add comment for configuration tweaking * Fix leak with transport * Fix close leak in derpmesh * Fix race when creating server * Remove handler update * Skip test on Windows * Fix DERP mesh test * Wrap HTTP handler replacement in mutex * Fix error message for relay * Fix API handler for normal tests * Fix speedtest * Fix replica resend * Fix derpmesh send * Ping async * Increase wait time of template version jobd * Fix race when closing replica sync * Add name to client * Log the derpmap being used * Don't connect if DERP is empty * Improve agent coordinator logging * Fix lock in coordinator * Fix relay addr * Fix race when updating durations * Fix client publish race * Run pubsub loop in a queue * Store agent nodes in order * Fix coordinator locking * Check for closed pipe Co-authored-by: Colin Adler <colin1adler@gmail.com>
2022-10-17 13:43:30 +00:00
// Close closes all of the open connections in the coordinator and stops the
// coordinator from accepting new connections.
func (c *coordinator) Close() error {
c.mu.Lock()
if c.closed {
c.mu.Unlock()
return nil
}
c.closed = true
close(c.closedChan)
c.mu.Unlock()
err := c.core.close()
// wait for all request loops to complete
c.wg.Wait()
return err
}
func (c *core) close() error {
feat: Add high availability for multiple replicas (#4555) * feat: HA tailnet coordinator * fixup! feat: HA tailnet coordinator * fixup! feat: HA tailnet coordinator * remove printlns * close all connections on coordinator * impelement high availability feature * fixup! impelement high availability feature * fixup! impelement high availability feature * fixup! impelement high availability feature * fixup! impelement high availability feature * Add replicas * Add DERP meshing to arbitrary addresses * Move packages to highavailability folder * Move coordinator to high availability package * Add flags for HA * Rename to replicasync * Denest packages for replicas * Add test for multiple replicas * Fix coordination test * Add HA to the helm chart * Rename function pointer * Add warnings for HA * Add the ability to block endpoints * Add flag to disable P2P connections * Wow, I made the tests pass * Add replicas endpoint * Ensure close kills replica * Update sql * Add database latency to high availability * Pipe TLS to DERP mesh * Fix DERP mesh with TLS * Add tests for TLS * Fix replica sync TLS * Fix RootCA for replica meshing * Remove ID from replicasync * Fix getting certificates for meshing * Remove excessive locking * Fix linting * Store mesh key in the database * Fix replica key for tests * Fix types gen * Fix unlocking unlocked * Fix race in tests * Update enterprise/derpmesh/derpmesh.go Co-authored-by: Colin Adler <colin1adler@gmail.com> * Rename to syncReplicas * Reuse http client * Delete old replicas on a CRON * Fix race condition in connection tests * Fix linting * Fix nil type * Move pubsub to in-memory for twenty test * Add comment for configuration tweaking * Fix leak with transport * Fix close leak in derpmesh * Fix race when creating server * Remove handler update * Skip test on Windows * Fix DERP mesh test * Wrap HTTP handler replacement in mutex * Fix error message for relay * Fix API handler for normal tests * Fix speedtest * Fix replica resend * Fix derpmesh send * Ping async * Increase wait time of template version jobd * Fix race when closing replica sync * Add name to client * Log the derpmap being used * Don't connect if DERP is empty * Improve agent coordinator logging * Fix lock in coordinator * Fix relay addr * Fix race when updating durations * Fix client publish race * Run pubsub loop in a queue * Store agent nodes in order * Fix coordinator locking * Check for closed pipe Co-authored-by: Colin Adler <colin1adler@gmail.com>
2022-10-17 13:43:30 +00:00
c.mutex.Lock()
defer c.mutex.Unlock()
feat: Add high availability for multiple replicas (#4555) * feat: HA tailnet coordinator * fixup! feat: HA tailnet coordinator * fixup! feat: HA tailnet coordinator * remove printlns * close all connections on coordinator * impelement high availability feature * fixup! impelement high availability feature * fixup! impelement high availability feature * fixup! impelement high availability feature * fixup! impelement high availability feature * Add replicas * Add DERP meshing to arbitrary addresses * Move packages to highavailability folder * Move coordinator to high availability package * Add flags for HA * Rename to replicasync * Denest packages for replicas * Add test for multiple replicas * Fix coordination test * Add HA to the helm chart * Rename function pointer * Add warnings for HA * Add the ability to block endpoints * Add flag to disable P2P connections * Wow, I made the tests pass * Add replicas endpoint * Ensure close kills replica * Update sql * Add database latency to high availability * Pipe TLS to DERP mesh * Fix DERP mesh with TLS * Add tests for TLS * Fix replica sync TLS * Fix RootCA for replica meshing * Remove ID from replicasync * Fix getting certificates for meshing * Remove excessive locking * Fix linting * Store mesh key in the database * Fix replica key for tests * Fix types gen * Fix unlocking unlocked * Fix race in tests * Update enterprise/derpmesh/derpmesh.go Co-authored-by: Colin Adler <colin1adler@gmail.com> * Rename to syncReplicas * Reuse http client * Delete old replicas on a CRON * Fix race condition in connection tests * Fix linting * Fix nil type * Move pubsub to in-memory for twenty test * Add comment for configuration tweaking * Fix leak with transport * Fix close leak in derpmesh * Fix race when creating server * Remove handler update * Skip test on Windows * Fix DERP mesh test * Wrap HTTP handler replacement in mutex * Fix error message for relay * Fix API handler for normal tests * Fix speedtest * Fix replica resend * Fix derpmesh send * Ping async * Increase wait time of template version jobd * Fix race when closing replica sync * Add name to client * Log the derpmap being used * Don't connect if DERP is empty * Improve agent coordinator logging * Fix lock in coordinator * Fix relay addr * Fix race when updating durations * Fix client publish race * Run pubsub loop in a queue * Store agent nodes in order * Fix coordinator locking * Check for closed pipe Co-authored-by: Colin Adler <colin1adler@gmail.com>
2022-10-17 13:43:30 +00:00
if c.closed {
return nil
}
c.closed = true
for id := range c.peers {
// when closing, mark them as LOST so that we don't disrupt in-progress
// connections.
c.removePeerLocked(id, proto.CoordinateResponse_PeerUpdate_LOST, "coordinator close")
feat: Add Tailscale networking (#3505) * fix: Add coder user to docker group on installation This makes for a simpler setup, and reduces the likelihood a user runs into a strange issue. * Add wgnet * Add ping * Add listening * Finish refactor to make this work * Add interface for swapping * Fix conncache with interface * chore: update gvisor * fix tailscale types * linting * more linting * Add coordinator * Add coordinator tests * Fix coordination * It compiles! * Move all connection negotiation in-memory * Migrate coordinator to use net.conn * Add closed func * Fix close listener func * Make reconnecting PTY work * Fix reconnecting PTY * Update CI to Go 1.19 * Add CLI flags for DERP mapping * Fix Tailnet test * Rename ConnCoordinator to TailnetCoordinator * Remove print statement from workspace agent test * Refactor wsconncache to use tailnet * Remove STUN from unit tests * Add migrate back to dump * chore: Upgrade to Go 1.19 This is required as part of #3505. * Fix reconnecting PTY tests * fix: update wireguard-go to fix devtunnel * fix migration numbers * linting * Return early for status if endpoints are empty * Update cli/server.go Co-authored-by: Colin Adler <colin1adler@gmail.com> * Update cli/server.go Co-authored-by: Colin Adler <colin1adler@gmail.com> * Fix frontend entites * Fix agent bicopy * Fix race condition for the last node * Fix down migration * Fix connection RBAC * Fix migration numbers * Fix forwarding TCP to a local port * Implement ping for tailnet * Rename to ForceHTTP * Add external derpmapping * Expose DERP region names to the API * Add global option to enable Tailscale networking for web * Mark DERP flags hidden while testing * Update DERP map on reconnect * Add close func to workspace agents * Fix race condition in upstream dependency * Fix feature columns race condition Co-authored-by: Colin Adler <colin1adler@gmail.com>
2022-09-01 01:09:44 +00:00
}
feat: Add high availability for multiple replicas (#4555) * feat: HA tailnet coordinator * fixup! feat: HA tailnet coordinator * fixup! feat: HA tailnet coordinator * remove printlns * close all connections on coordinator * impelement high availability feature * fixup! impelement high availability feature * fixup! impelement high availability feature * fixup! impelement high availability feature * fixup! impelement high availability feature * Add replicas * Add DERP meshing to arbitrary addresses * Move packages to highavailability folder * Move coordinator to high availability package * Add flags for HA * Rename to replicasync * Denest packages for replicas * Add test for multiple replicas * Fix coordination test * Add HA to the helm chart * Rename function pointer * Add warnings for HA * Add the ability to block endpoints * Add flag to disable P2P connections * Wow, I made the tests pass * Add replicas endpoint * Ensure close kills replica * Update sql * Add database latency to high availability * Pipe TLS to DERP mesh * Fix DERP mesh with TLS * Add tests for TLS * Fix replica sync TLS * Fix RootCA for replica meshing * Remove ID from replicasync * Fix getting certificates for meshing * Remove excessive locking * Fix linting * Store mesh key in the database * Fix replica key for tests * Fix types gen * Fix unlocking unlocked * Fix race in tests * Update enterprise/derpmesh/derpmesh.go Co-authored-by: Colin Adler <colin1adler@gmail.com> * Rename to syncReplicas * Reuse http client * Delete old replicas on a CRON * Fix race condition in connection tests * Fix linting * Fix nil type * Move pubsub to in-memory for twenty test * Add comment for configuration tweaking * Fix leak with transport * Fix close leak in derpmesh * Fix race when creating server * Remove handler update * Skip test on Windows * Fix DERP mesh test * Wrap HTTP handler replacement in mutex * Fix error message for relay * Fix API handler for normal tests * Fix speedtest * Fix replica resend * Fix derpmesh send * Ping async * Increase wait time of template version jobd * Fix race when closing replica sync * Add name to client * Log the derpmap being used * Don't connect if DERP is empty * Improve agent coordinator logging * Fix lock in coordinator * Fix relay addr * Fix race when updating durations * Fix client publish race * Run pubsub loop in a queue * Store agent nodes in order * Fix coordinator locking * Check for closed pipe Co-authored-by: Colin Adler <colin1adler@gmail.com>
2022-10-17 13:43:30 +00:00
return nil
feat: Add Tailscale networking (#3505) * fix: Add coder user to docker group on installation This makes for a simpler setup, and reduces the likelihood a user runs into a strange issue. * Add wgnet * Add ping * Add listening * Finish refactor to make this work * Add interface for swapping * Fix conncache with interface * chore: update gvisor * fix tailscale types * linting * more linting * Add coordinator * Add coordinator tests * Fix coordination * It compiles! * Move all connection negotiation in-memory * Migrate coordinator to use net.conn * Add closed func * Fix close listener func * Make reconnecting PTY work * Fix reconnecting PTY * Update CI to Go 1.19 * Add CLI flags for DERP mapping * Fix Tailnet test * Rename ConnCoordinator to TailnetCoordinator * Remove print statement from workspace agent test * Refactor wsconncache to use tailnet * Remove STUN from unit tests * Add migrate back to dump * chore: Upgrade to Go 1.19 This is required as part of #3505. * Fix reconnecting PTY tests * fix: update wireguard-go to fix devtunnel * fix migration numbers * linting * Return early for status if endpoints are empty * Update cli/server.go Co-authored-by: Colin Adler <colin1adler@gmail.com> * Update cli/server.go Co-authored-by: Colin Adler <colin1adler@gmail.com> * Fix frontend entites * Fix agent bicopy * Fix race condition for the last node * Fix down migration * Fix connection RBAC * Fix migration numbers * Fix forwarding TCP to a local port * Implement ping for tailnet * Rename to ForceHTTP * Add external derpmapping * Expose DERP region names to the API * Add global option to enable Tailscale networking for web * Mark DERP flags hidden while testing * Update DERP map on reconnect * Add close func to workspace agents * Fix race condition in upstream dependency * Fix feature columns race condition Co-authored-by: Colin Adler <colin1adler@gmail.com>
2022-09-01 01:09:44 +00:00
}
func (c *coordinator) ServeHTTPDebug(w http.ResponseWriter, r *http.Request) {
c.core.serveHTTPDebug(w, r)
}
func (c *core) serveHTTPDebug(w http.ResponseWriter, _ *http.Request) {
debug := c.getHTMLDebug()
w.Header().Set("Content-Type", "text/html; charset=utf-8")
err := debugTempl.Execute(w, debug)
if err != nil {
w.WriteHeader(http.StatusInternalServerError)
_, _ = w.Write([]byte(err.Error()))
return
}
}
func (c *core) getHTMLDebug() HTMLDebug {
c.mutex.RLock()
defer c.mutex.RUnlock()
debug := HTMLDebug{Tunnels: c.tunnels.htmlDebug()}
for _, p := range c.peers {
debug.Peers = append(debug.Peers, p.htmlDebug())
}
return debug
}
type HTMLDebug struct {
Peers []HTMLPeer
Tunnels []HTMLTunnel
}
type HTMLPeer struct {
ID uuid.UUID
Name string
CreatedAge time.Duration
LastWriteAge time.Duration
Overwrites int
Node string
}
type HTMLTunnel struct {
Src, Dst uuid.UUID
}
var coordinatorDebugTmpl = `
<!DOCTYPE html>
<html>
<head>
<meta charset="UTF-8">
<style>
th, td {
padding-top: 6px;
padding-bottom: 6px;
padding-left: 10px;
padding-right: 10px;
text-align: left;
}
tr {
border-bottom: 1px solid #ddd;
}
</style>
</head>
<body>
<h1>in-memory wireguard coordinator debug</h1>
<h2 id=peers> <a href=#peers>#</a> peers: total {{ len .Peers }} </h2>
<table>
<tr style="margin-top:4px">
<th>Name</th>
<th>ID</th>
<th>Created Age</th>
<th>Last Write Age</th>
<th>Overwrites</th>
<th>Node</th>
</tr>
{{- range .Peers }}
<tr style="margin-top:4px">
<td>{{ .Name }}</td>
<td>{{ .ID }}</td>
<td>{{ .CreatedAge }}</td>
<td>{{ .LastWriteAge }} ago</td>
<td>{{ .Overwrites }}</td>
<td style="white-space: pre;"><code>{{ .Node }}</code></td>
</tr>
{{- end }}
</table>
<h2 id=tunnels><a href=#tunnels>#</a> tunnels: total {{ len .Tunnels }}</h2>
<table>
<tr style="margin-top:4px">
<th>SrcID</th>
<th>DstID</th>
</tr>
{{- range .Tunnels }}
<tr style="margin-top:4px">
<td>{{ .Src }}</td>
<td>{{ .Dst }}</td>
</tr>
{{- end }}
</table>
</body>
</html>
`
var debugTempl = template.Must(template.New("coordinator_debug").Parse(coordinatorDebugTmpl))
func SendCtx[A any](ctx context.Context, c chan<- A, a A) (err error) {
select {
case <-ctx.Done():
return ctx.Err()
case c <- a:
return nil
}
}
func RecvCtx[A any](ctx context.Context, c <-chan A) (a A, err error) {
select {
case <-ctx.Done():
return a, ctx.Err()
case a, ok := <-c:
if ok {
return a, nil
}
return a, io.EOF
}
}
func v1ReqLoop(ctx context.Context, cancel context.CancelFunc, logger slog.Logger,
conn net.Conn, reqs chan<- *proto.CoordinateRequest,
) {
defer close(reqs)
defer cancel()
decoder := json.NewDecoder(conn)
for {
var node Node
err := decoder.Decode(&node)
if err != nil {
if xerrors.Is(err, io.EOF) ||
xerrors.Is(err, io.ErrClosedPipe) ||
xerrors.Is(err, context.Canceled) ||
xerrors.Is(err, context.DeadlineExceeded) ||
websocket.CloseStatus(err) > 0 {
logger.Debug(ctx, "v1ReqLoop exiting", slog.Error(err))
} else {
logger.Info(ctx, "v1ReqLoop failed to decode Node update", slog.Error(err))
}
return
}
logger.Debug(ctx, "v1ReqLoop got node update", slog.F("node", node))
pn, err := NodeToProto(&node)
if err != nil {
logger.Critical(ctx, "v1ReqLoop failed to convert v1 node", slog.F("node", node), slog.Error(err))
return
}
req := &proto.CoordinateRequest{UpdateSelf: &proto.CoordinateRequest_UpdateSelf{
Node: pn,
}}
if err := SendCtx(ctx, reqs, req); err != nil {
logger.Debug(ctx, "v1ReqLoop ctx expired", slog.Error(err))
return
}
}
}
func v1RespLoop(ctx context.Context, cancel context.CancelFunc, logger slog.Logger, q Queue, resps <-chan *proto.CoordinateResponse) {
defer func() {
cErr := q.Close()
if cErr != nil {
logger.Info(ctx, "error closing response Queue", slog.Error(cErr))
}
cancel()
}()
for {
resp, err := RecvCtx(ctx, resps)
if err != nil {
logger.Debug(ctx, "v1RespLoop done reading responses", slog.Error(err))
return
}
logger.Debug(ctx, "v1RespLoop got response", slog.F("resp", resp))
err = q.Enqueue(resp)
if err != nil && !xerrors.Is(err, context.Canceled) {
logger.Error(ctx, "v1RespLoop failed to enqueue v1 update", slog.Error(err))
}
}
}