mirror of https://github.com/coder/coder.git
feat: use v2 API for agent metadata updates (#12281)
Switches the agent to report metadata over the v2 API. Fixes #10534
This commit is contained in:
parent
7a245e61b1
commit
b0afffbafb
|
@ -90,7 +90,6 @@ type Options struct {
|
||||||
|
|
||||||
type Client interface {
|
type Client interface {
|
||||||
ConnectRPC(ctx context.Context) (drpc.Conn, error)
|
ConnectRPC(ctx context.Context) (drpc.Conn, error)
|
||||||
PostMetadata(ctx context.Context, req agentsdk.PostMetadataRequest) error
|
|
||||||
RewriteDERPMap(derpMap *tailcfg.DERPMap)
|
RewriteDERPMap(derpMap *tailcfg.DERPMap)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -298,7 +297,6 @@ func (a *agent) init() {
|
||||||
// may be happening, but regardless after the intermittent
|
// may be happening, but regardless after the intermittent
|
||||||
// failure, you'll want the agent to reconnect.
|
// failure, you'll want the agent to reconnect.
|
||||||
func (a *agent) runLoop() {
|
func (a *agent) runLoop() {
|
||||||
go a.reportMetadataUntilGracefulShutdown()
|
|
||||||
go a.manageProcessPriorityUntilGracefulShutdown()
|
go a.manageProcessPriorityUntilGracefulShutdown()
|
||||||
|
|
||||||
// need to keep retrying up to the hardCtx so that we can send graceful shutdown-related
|
// need to keep retrying up to the hardCtx so that we can send graceful shutdown-related
|
||||||
|
@ -405,9 +403,7 @@ func (t *trySingleflight) Do(key string, fn func()) {
|
||||||
fn()
|
fn()
|
||||||
}
|
}
|
||||||
|
|
||||||
func (a *agent) reportMetadataUntilGracefulShutdown() {
|
func (a *agent) reportMetadata(ctx context.Context, conn drpc.Conn) error {
|
||||||
// metadata reporting can cease as soon as we start gracefully shutting down.
|
|
||||||
ctx := a.gracefulCtx
|
|
||||||
tickerDone := make(chan struct{})
|
tickerDone := make(chan struct{})
|
||||||
collectDone := make(chan struct{})
|
collectDone := make(chan struct{})
|
||||||
ctx, cancel := context.WithCancel(ctx)
|
ctx, cancel := context.WithCancel(ctx)
|
||||||
|
@ -567,51 +563,55 @@ func (a *agent) reportMetadataUntilGracefulShutdown() {
|
||||||
var (
|
var (
|
||||||
updatedMetadata = make(map[string]*codersdk.WorkspaceAgentMetadataResult)
|
updatedMetadata = make(map[string]*codersdk.WorkspaceAgentMetadataResult)
|
||||||
reportTimeout = 30 * time.Second
|
reportTimeout = 30 * time.Second
|
||||||
reportSemaphore = make(chan struct{}, 1)
|
reportError = make(chan error, 1)
|
||||||
|
reportInFlight = false
|
||||||
|
aAPI = proto.NewDRPCAgentClient(conn)
|
||||||
)
|
)
|
||||||
reportSemaphore <- struct{}{}
|
|
||||||
|
|
||||||
for {
|
for {
|
||||||
select {
|
select {
|
||||||
case <-ctx.Done():
|
case <-ctx.Done():
|
||||||
return
|
return ctx.Err()
|
||||||
case mr := <-metadataResults:
|
case mr := <-metadataResults:
|
||||||
// This can overwrite unsent values, but that's fine because
|
// This can overwrite unsent values, but that's fine because
|
||||||
// we're only interested about up-to-date values.
|
// we're only interested about up-to-date values.
|
||||||
updatedMetadata[mr.key] = mr.result
|
updatedMetadata[mr.key] = mr.result
|
||||||
continue
|
continue
|
||||||
case <-report:
|
case err := <-reportError:
|
||||||
if len(updatedMetadata) > 0 {
|
a.logger.Debug(ctx, "batch update metadata complete", slog.Error(err))
|
||||||
select {
|
if err != nil {
|
||||||
case <-reportSemaphore:
|
return xerrors.Errorf("failed to report metadata: %w", err)
|
||||||
default:
|
|
||||||
// If there's already a report in flight, don't send
|
|
||||||
// another one, wait for next tick instead.
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
|
|
||||||
metadata := make([]agentsdk.Metadata, 0, len(updatedMetadata))
|
|
||||||
for key, result := range updatedMetadata {
|
|
||||||
metadata = append(metadata, agentsdk.Metadata{
|
|
||||||
Key: key,
|
|
||||||
WorkspaceAgentMetadataResult: *result,
|
|
||||||
})
|
|
||||||
delete(updatedMetadata, key)
|
|
||||||
}
|
|
||||||
|
|
||||||
go func() {
|
|
||||||
ctx, cancel := context.WithTimeout(ctx, reportTimeout)
|
|
||||||
defer func() {
|
|
||||||
cancel()
|
|
||||||
reportSemaphore <- struct{}{}
|
|
||||||
}()
|
|
||||||
|
|
||||||
err := a.client.PostMetadata(ctx, agentsdk.PostMetadataRequest{Metadata: metadata})
|
|
||||||
if err != nil {
|
|
||||||
a.logger.Error(ctx, "agent failed to report metadata", slog.Error(err))
|
|
||||||
}
|
|
||||||
}()
|
|
||||||
}
|
}
|
||||||
|
reportInFlight = false
|
||||||
|
case <-report:
|
||||||
|
if len(updatedMetadata) == 0 {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
if reportInFlight {
|
||||||
|
// If there's already a report in flight, don't send
|
||||||
|
// another one, wait for next tick instead.
|
||||||
|
a.logger.Debug(ctx, "skipped metadata report tick because report is in flight")
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
metadata := make([]*proto.Metadata, 0, len(updatedMetadata))
|
||||||
|
for key, result := range updatedMetadata {
|
||||||
|
pr := agentsdk.ProtoFromMetadataResult(*result)
|
||||||
|
metadata = append(metadata, &proto.Metadata{
|
||||||
|
Key: key,
|
||||||
|
Result: pr,
|
||||||
|
})
|
||||||
|
delete(updatedMetadata, key)
|
||||||
|
}
|
||||||
|
|
||||||
|
reportInFlight = true
|
||||||
|
go func() {
|
||||||
|
a.logger.Debug(ctx, "batch updating metadata")
|
||||||
|
ctx, cancel := context.WithTimeout(ctx, reportTimeout)
|
||||||
|
defer cancel()
|
||||||
|
|
||||||
|
_, err := aAPI.BatchUpdateMetadata(ctx, &proto.BatchUpdateMetadataRequest{Metadata: metadata})
|
||||||
|
reportError <- err
|
||||||
|
}()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -783,6 +783,9 @@ func (a *agent) run() (retErr error) {
|
||||||
// lifecycle reporting has to be via gracefulShutdownBehaviorRemain
|
// lifecycle reporting has to be via gracefulShutdownBehaviorRemain
|
||||||
connMan.start("report lifecycle", gracefulShutdownBehaviorRemain, a.reportLifecycle)
|
connMan.start("report lifecycle", gracefulShutdownBehaviorRemain, a.reportLifecycle)
|
||||||
|
|
||||||
|
// metadata reporting can cease as soon as we start gracefully shutting down
|
||||||
|
connMan.start("report metadata", gracefulShutdownBehaviorStop, a.reportMetadata)
|
||||||
|
|
||||||
// channels to sync goroutines below
|
// channels to sync goroutines below
|
||||||
// handle manifest
|
// handle manifest
|
||||||
// |
|
// |
|
||||||
|
|
|
@ -82,7 +82,6 @@ type Client struct {
|
||||||
t testing.TB
|
t testing.TB
|
||||||
logger slog.Logger
|
logger slog.Logger
|
||||||
agentID uuid.UUID
|
agentID uuid.UUID
|
||||||
metadata map[string]agentsdk.Metadata
|
|
||||||
coordinator tailnet.Coordinator
|
coordinator tailnet.Coordinator
|
||||||
server *drpcserver.Server
|
server *drpcserver.Server
|
||||||
fakeAgentAPI *FakeAgentAPI
|
fakeAgentAPI *FakeAgentAPI
|
||||||
|
@ -131,22 +130,7 @@ func (c *Client) GetStartup() <-chan *agentproto.Startup {
|
||||||
}
|
}
|
||||||
|
|
||||||
func (c *Client) GetMetadata() map[string]agentsdk.Metadata {
|
func (c *Client) GetMetadata() map[string]agentsdk.Metadata {
|
||||||
c.mu.Lock()
|
return c.fakeAgentAPI.GetMetadata()
|
||||||
defer c.mu.Unlock()
|
|
||||||
return maps.Clone(c.metadata)
|
|
||||||
}
|
|
||||||
|
|
||||||
func (c *Client) PostMetadata(ctx context.Context, req agentsdk.PostMetadataRequest) error {
|
|
||||||
c.mu.Lock()
|
|
||||||
defer c.mu.Unlock()
|
|
||||||
if c.metadata == nil {
|
|
||||||
c.metadata = make(map[string]agentsdk.Metadata)
|
|
||||||
}
|
|
||||||
for _, md := range req.Metadata {
|
|
||||||
c.metadata[md.Key] = md
|
|
||||||
c.logger.Debug(ctx, "post metadata", slog.F("key", md.Key), slog.F("md", md))
|
|
||||||
}
|
|
||||||
return nil
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func (c *Client) GetStartupLogs() []agentsdk.Log {
|
func (c *Client) GetStartupLogs() []agentsdk.Log {
|
||||||
|
@ -186,6 +170,7 @@ type FakeAgentAPI struct {
|
||||||
appHealthCh chan *agentproto.BatchUpdateAppHealthRequest
|
appHealthCh chan *agentproto.BatchUpdateAppHealthRequest
|
||||||
logsCh chan<- *agentproto.BatchCreateLogsRequest
|
logsCh chan<- *agentproto.BatchCreateLogsRequest
|
||||||
lifecycleStates []codersdk.WorkspaceAgentLifecycle
|
lifecycleStates []codersdk.WorkspaceAgentLifecycle
|
||||||
|
metadata map[string]agentsdk.Metadata
|
||||||
|
|
||||||
getServiceBannerFunc func() (codersdk.ServiceBannerConfig, error)
|
getServiceBannerFunc func() (codersdk.ServiceBannerConfig, error)
|
||||||
}
|
}
|
||||||
|
@ -254,9 +239,24 @@ func (f *FakeAgentAPI) UpdateStartup(_ context.Context, req *agentproto.UpdateSt
|
||||||
return req.GetStartup(), nil
|
return req.GetStartup(), nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (*FakeAgentAPI) BatchUpdateMetadata(context.Context, *agentproto.BatchUpdateMetadataRequest) (*agentproto.BatchUpdateMetadataResponse, error) {
|
func (f *FakeAgentAPI) GetMetadata() map[string]agentsdk.Metadata {
|
||||||
// TODO implement me
|
f.Lock()
|
||||||
panic("implement me")
|
defer f.Unlock()
|
||||||
|
return maps.Clone(f.metadata)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (f *FakeAgentAPI) BatchUpdateMetadata(ctx context.Context, req *agentproto.BatchUpdateMetadataRequest) (*agentproto.BatchUpdateMetadataResponse, error) {
|
||||||
|
f.Lock()
|
||||||
|
defer f.Unlock()
|
||||||
|
if f.metadata == nil {
|
||||||
|
f.metadata = make(map[string]agentsdk.Metadata)
|
||||||
|
}
|
||||||
|
for _, md := range req.Metadata {
|
||||||
|
smd := agentsdk.MetadataFromProto(md)
|
||||||
|
f.metadata[md.Key] = smd
|
||||||
|
f.logger.Debug(ctx, "post metadata", slog.F("key", md.Key), slog.F("md", md))
|
||||||
|
}
|
||||||
|
return &agentproto.BatchUpdateMetadataResponse{}, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (f *FakeAgentAPI) SetLogsChannel(ch chan<- *agentproto.BatchCreateLogsRequest) {
|
func (f *FakeAgentAPI) SetLogsChannel(ch chan<- *agentproto.BatchCreateLogsRequest) {
|
||||||
|
|
|
@ -85,6 +85,9 @@ type PostMetadataRequest struct {
|
||||||
// performance.
|
// performance.
|
||||||
type PostMetadataRequestDeprecated = codersdk.WorkspaceAgentMetadataResult
|
type PostMetadataRequestDeprecated = codersdk.WorkspaceAgentMetadataResult
|
||||||
|
|
||||||
|
// PostMetadata posts agent metadata to the Coder server.
|
||||||
|
//
|
||||||
|
// Deprecated: use BatchUpdateMetadata on the agent dRPC API instead
|
||||||
func (c *Client) PostMetadata(ctx context.Context, req PostMetadataRequest) error {
|
func (c *Client) PostMetadata(ctx context.Context, req PostMetadataRequest) error {
|
||||||
res, err := c.SDK.Request(ctx, http.MethodPost, "/api/v2/workspaceagents/me/metadata", req)
|
res, err := c.SDK.Request(ctx, http.MethodPost, "/api/v2/workspaceagents/me/metadata", req)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
|
|
@ -112,6 +112,31 @@ func ProtoFromMetadataDescription(d codersdk.WorkspaceAgentMetadataDescription)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func ProtoFromMetadataResult(r codersdk.WorkspaceAgentMetadataResult) *proto.WorkspaceAgentMetadata_Result {
|
||||||
|
return &proto.WorkspaceAgentMetadata_Result{
|
||||||
|
CollectedAt: timestamppb.New(r.CollectedAt),
|
||||||
|
Age: r.Age,
|
||||||
|
Value: r.Value,
|
||||||
|
Error: r.Error,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func MetadataResultFromProto(r *proto.WorkspaceAgentMetadata_Result) codersdk.WorkspaceAgentMetadataResult {
|
||||||
|
return codersdk.WorkspaceAgentMetadataResult{
|
||||||
|
CollectedAt: r.GetCollectedAt().AsTime(),
|
||||||
|
Age: r.GetAge(),
|
||||||
|
Value: r.GetValue(),
|
||||||
|
Error: r.GetError(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func MetadataFromProto(m *proto.Metadata) Metadata {
|
||||||
|
return Metadata{
|
||||||
|
Key: m.GetKey(),
|
||||||
|
WorkspaceAgentMetadataResult: MetadataResultFromProto(m.GetResult()),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
func AgentScriptsFromProto(protoScripts []*proto.WorkspaceAgentScript) ([]codersdk.WorkspaceAgentScript, error) {
|
func AgentScriptsFromProto(protoScripts []*proto.WorkspaceAgentScript) ([]codersdk.WorkspaceAgentScript, error) {
|
||||||
ret := make([]codersdk.WorkspaceAgentScript, len(protoScripts))
|
ret := make([]codersdk.WorkspaceAgentScript, len(protoScripts))
|
||||||
for i, protoScript := range protoScripts {
|
for i, protoScript := range protoScripts {
|
||||||
|
|
|
@ -6,6 +6,7 @@ import (
|
||||||
|
|
||||||
"github.com/google/uuid"
|
"github.com/google/uuid"
|
||||||
"github.com/stretchr/testify/require"
|
"github.com/stretchr/testify/require"
|
||||||
|
"google.golang.org/protobuf/types/known/timestamppb"
|
||||||
"tailscale.com/tailcfg"
|
"tailscale.com/tailcfg"
|
||||||
|
|
||||||
"github.com/coder/coder/v2/agent/proto"
|
"github.com/coder/coder/v2/agent/proto"
|
||||||
|
@ -176,3 +177,42 @@ func TestProtoFromLifecycle(t *testing.T) {
|
||||||
require.Equal(t, s, state)
|
require.Equal(t, s, state)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestProtoFromMetadataResult(t *testing.T) {
|
||||||
|
t.Parallel()
|
||||||
|
now := dbtime.Now()
|
||||||
|
result := codersdk.WorkspaceAgentMetadataResult{
|
||||||
|
CollectedAt: now,
|
||||||
|
Age: 4,
|
||||||
|
Value: "lemons",
|
||||||
|
Error: "rats",
|
||||||
|
}
|
||||||
|
pr := agentsdk.ProtoFromMetadataResult(result)
|
||||||
|
require.NotNil(t, pr)
|
||||||
|
require.Equal(t, now, pr.CollectedAt.AsTime())
|
||||||
|
require.EqualValues(t, 4, pr.Age)
|
||||||
|
require.Equal(t, "lemons", pr.Value)
|
||||||
|
require.Equal(t, "rats", pr.Error)
|
||||||
|
result2 := agentsdk.MetadataResultFromProto(pr)
|
||||||
|
require.Equal(t, result, result2)
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestMetadataFromProto(t *testing.T) {
|
||||||
|
t.Parallel()
|
||||||
|
now := dbtime.Now()
|
||||||
|
pmd := &proto.Metadata{
|
||||||
|
Key: "a flat",
|
||||||
|
Result: &proto.WorkspaceAgentMetadata_Result{
|
||||||
|
CollectedAt: timestamppb.New(now),
|
||||||
|
Age: 88,
|
||||||
|
Value: "lemons",
|
||||||
|
Error: "rats",
|
||||||
|
},
|
||||||
|
}
|
||||||
|
smd := agentsdk.MetadataFromProto(pmd)
|
||||||
|
require.Equal(t, "a flat", smd.Key)
|
||||||
|
require.Equal(t, now, smd.CollectedAt)
|
||||||
|
require.EqualValues(t, 88, smd.Age)
|
||||||
|
require.Equal(t, "lemons", smd.Value)
|
||||||
|
require.Equal(t, "rats", smd.Error)
|
||||||
|
}
|
||||||
|
|
Loading…
Reference in New Issue