diff --git a/agent/agent.go b/agent/agent.go
index 10ecdfb5d5..34af2b59f3 100644
--- a/agent/agent.go
+++ b/agent/agent.go
@@ -210,25 +210,31 @@ func (a *agent) collectMetadata(ctx context.Context, md codersdk.WorkspaceAgentM
 	var out bytes.Buffer
 	result := &codersdk.WorkspaceAgentMetadataResult{
 		// CollectedAt is set here for testing purposes and overrode by
-		// the server to the time the server received the result to protect
-		// against clock skew.
+		// coderd to the time of server receipt to solve clock skew.
 		//
 		// In the future, the server may accept the timestamp from the agent
-		// if it is certain the clocks are in sync.
+		// if it can guarantee the clocks are synchronized.
 		CollectedAt: time.Now(),
 	}
 	cmd, err := a.sshServer.CreateCommand(ctx, md.Script, nil)
 	if err != nil {
-		result.Error = err.Error()
+		result.Error = fmt.Sprintf("create cmd: %+v", err)
 		return result
 	}
 
 	cmd.Stdout = &out
 	cmd.Stderr = &out
+	cmd.Stdin = io.LimitReader(nil, 0)
 
-	// The error isn't mutually exclusive with useful output.
-	err = cmd.Run()
+	// We split up Start and Wait instead of calling Run so that we can return a more precise error.
+	err = cmd.Start()
+	if err != nil {
+		result.Error = fmt.Sprintf("start cmd: %+v", err)
+		return result
+	}
 
+	// This error isn't mutually exclusive with useful output.
+	err = cmd.Wait()
 	const bufLimit = 10 << 10
 	if out.Len() > bufLimit {
 		err = errors.Join(
@@ -238,8 +244,12 @@ func (a *agent) collectMetadata(ctx context.Context, md codersdk.WorkspaceAgentM
 		out.Truncate(bufLimit)
 	}
 
+	// Important: if the command times out, we may see a misleading error like
+	// "exit status 1", so it's important to include the context error.
+	err = errors.Join(err, ctx.Err())
+
 	if err != nil {
-		result.Error = err.Error()
+		result.Error = fmt.Sprintf("run cmd: %+v", err)
 	}
 	result.Value = out.String()
 	return result
diff --git a/agent/agent_test.go b/agent/agent_test.go
index 1d5a852f7d..ee135a05ce 100644
--- a/agent/agent_test.go
+++ b/agent/agent_test.go
@@ -951,19 +951,17 @@ func TestAgent_StartupScript(t *testing.T) {
 func TestAgent_Metadata(t *testing.T) {
 	t.Parallel()
 
+	echoHello := "echo 'hello'"
+
 	t.Run("Once", func(t *testing.T) {
 		t.Parallel()
-		script := "echo -n hello"
-		if runtime.GOOS == "windows" {
-			script = "powershell " + script
-		}
 		//nolint:dogsled
 		_, client, _, _, _ := setupAgent(t, agentsdk.Manifest{
 			Metadata: []codersdk.WorkspaceAgentMetadataDescription{
 				{
 					Key:      "greeting",
 					Interval: 0,
-					Script:   script,
+					Script:   echoHello,
 				},
 			},
 		}, 0)
@@ -986,80 +984,113 @@ func TestAgent_Metadata(t *testing.T) {
 	})
 
 	t.Run("Many", func(t *testing.T) {
-		if runtime.GOOS == "windows" {
-			// Shell scripting in Windows is a pain, and we have already tested
-			// that the OS logic works in the simpler "Once" test above.
-			t.Skip()
-		}
 		t.Parallel()
-
-		dir := t.TempDir()
-
-		const reportInterval = 2
-		const intervalUnit = 100 * time.Millisecond
-		var (
-			greetingPath = filepath.Join(dir, "greeting")
-			script       = "echo hello | tee -a " + greetingPath
-		)
+		//nolint:dogsled
 		_, client, _, _, _ := setupAgent(t, agentsdk.Manifest{
 			Metadata: []codersdk.WorkspaceAgentMetadataDescription{
 				{
 					Key:      "greeting",
-					Interval: reportInterval,
-					Script:   script,
-				},
-				{
-					Key:      "bad",
-					Interval: reportInterval,
-					Script:   "exit 1",
+					Interval: 1,
+					Timeout:  100,
+					Script:   echoHello,
 				},
 			},
 		}, 0)
 
+		var gotMd map[string]agentsdk.PostMetadataRequest
 		require.Eventually(t, func() bool {
-			return len(client.getMetadata()) == 2
+			gotMd = client.getMetadata()
+			return len(gotMd) == 1
 		}, testutil.WaitShort, testutil.IntervalMedium)
 
-		for start := time.Now(); time.Since(start) < testutil.WaitMedium; time.Sleep(testutil.IntervalMedium) {
-			md := client.getMetadata()
-			if len(md) != 2 {
-				panic("unexpected number of metadata entries")
-			}
+		collectedAt1 := gotMd["greeting"].CollectedAt
+		if !assert.Equal(t, "hello", strings.TrimSpace(gotMd["greeting"].Value)) {
+			t.Errorf("got: %+v", gotMd)
+		}
 
-			require.Equal(t, "hello\n", md["greeting"].Value)
-			require.Equal(t, "exit status 1", md["bad"].Error)
-
-			greetingByt, err := os.ReadFile(greetingPath)
-			require.NoError(t, err)
-
-			var (
-				numGreetings      = bytes.Count(greetingByt, []byte("hello"))
-				idealNumGreetings = time.Since(start) / (reportInterval * intervalUnit)
-				// We allow a 50% error margin because the report loop may backlog
-				// in CI and other toasters. In production, there is no hard
-				// guarantee on timing either, and the frontend gives similar
-				// wiggle room to the staleness of the value.
-				upperBound = int(idealNumGreetings) + 1
-				lowerBound = (int(idealNumGreetings) / 2)
-			)
-
-			if idealNumGreetings < 50 {
-				// There is an insufficient sample size.
-				continue
-			}
-
-			t.Logf("numGreetings: %d, idealNumGreetings: %d", numGreetings, idealNumGreetings)
-			// The report loop may slow down on load, but it should never, ever
-			// speed up.
-			if numGreetings > upperBound {
-				t.Fatalf("too many greetings: %d > %d in %v", numGreetings, upperBound, time.Since(start))
-			} else if numGreetings < lowerBound {
-				t.Fatalf("too few greetings: %d < %d", numGreetings, lowerBound)
-			}
+		if !assert.Eventually(t, func() bool {
+			gotMd = client.getMetadata()
+			return gotMd["greeting"].CollectedAt.After(collectedAt1)
+		}, testutil.WaitShort, testutil.IntervalMedium) {
+			t.Fatalf("expected metadata to be collected again")
 		}
 	})
 }
 
+func TestAgentMetadata_Timing(t *testing.T) {
+	if runtime.GOOS == "windows" {
+		// Shell scripting in Windows is a pain, and we have already tested
+		// that the OS logic works in the simpler tests.
+		t.Skip()
+	}
+	testutil.SkipIfNotTiming(t)
+	t.Parallel()
+
+	dir := t.TempDir()
+
+	const reportInterval = 2
+	const intervalUnit = 100 * time.Millisecond
+	var (
+		greetingPath = filepath.Join(dir, "greeting")
+		script       = "echo hello | tee -a " + greetingPath
+	)
+	//nolint:dogsled
+	_, client, _, _, _ := setupAgent(t, agentsdk.Manifest{
+		Metadata: []codersdk.WorkspaceAgentMetadataDescription{
+			{
+				Key:      "greeting",
+				Interval: reportInterval,
+				Script:   script,
+			},
+			{
+				Key:      "bad",
+				Interval: reportInterval,
+				Script:   "exit 1",
+			},
+		},
+	}, 0)
+
+	require.Eventually(t, func() bool {
+		return len(client.getMetadata()) == 2
+	}, testutil.WaitShort, testutil.IntervalMedium)
+
+	for start := time.Now(); time.Since(start) < testutil.WaitMedium; time.Sleep(testutil.IntervalMedium) {
+		md := client.getMetadata()
+		require.Len(t, md, 2, "got: %+v", md)
+
+		require.Equal(t, "hello\n", md["greeting"].Value)
+		require.Equal(t, "run cmd: exit status 1", md["bad"].Error)
+
+		greetingByt, err := os.ReadFile(greetingPath)
+		require.NoError(t, err)
+
+		var (
+			numGreetings      = bytes.Count(greetingByt, []byte("hello"))
+			idealNumGreetings = time.Since(start) / (reportInterval * intervalUnit)
+			// We allow a 50% error margin because the report loop may backlog
+			// in CI and other toasters. In production, there is no hard
+			// guarantee on timing either, and the frontend gives similar
+			// wiggle room to the staleness of the value.
+			upperBound = int(idealNumGreetings) + 1
+			lowerBound = (int(idealNumGreetings) / 2)
+		)
+
+		if idealNumGreetings < 50 {
+			// There is an insufficient sample size.
+			continue
+		}
+
+		t.Logf("numGreetings: %d, idealNumGreetings: %d", numGreetings, idealNumGreetings)
+		// The report loop may slow down on load, but it should never, ever
+		// speed up.
+		if numGreetings > upperBound {
+			t.Fatalf("too many greetings: %d > %d in %v", numGreetings, upperBound, time.Since(start))
+		} else if numGreetings < lowerBound {
+			t.Fatalf("too few greetings: %d < %d", numGreetings, lowerBound)
+		}
+	}
+}
+
 func TestAgent_Lifecycle(t *testing.T) {
 	t.Parallel()
 
diff --git a/testutil/enable_timing.go b/testutil/enable_timing.go
new file mode 100644
index 0000000000..d9fffafd95
--- /dev/null
+++ b/testutil/enable_timing.go
@@ -0,0 +1,8 @@
+//go:build timing
+
+package testutil
+
+var _ = func() any {
+	timing = true
+	return nil
+}()
diff --git a/testutil/timing.go b/testutil/timing.go
new file mode 100644
index 0000000000..9cdd3bd8f6
--- /dev/null
+++ b/testutil/timing.go
@@ -0,0 +1,20 @@
+package testutil
+
+import (
+	"testing"
+)
+
+// We can't run timing-sensitive tests in CI because of the
+// great variance in runner performance. Instead of not testing timing at all,
+// we relegate it to humans manually running certain tests with the "-timing"
+// flag from time to time.
+//
+// Eventually, we should run all timing tests in a self-hosted runner.
+
+var timing bool
+
+func SkipIfNotTiming(t *testing.T) {
+	if !timing {
+		t.Skip("skipping timing-sensitive test")
+	}
+}