chore: skip timing-sensistive AgentMetadata test in the standard suite (#7237)

* chore: skip timing-sensistive AgentMetadata test in the standard suite

* Add test-timing target

* fix windows?

* Works on my Windows desktop?

* Use tag system

* fixup! Use tag system
This commit is contained in:
Ammar Bandukwala 2023-05-02 05:41:41 -05:00 committed by GitHub
parent 398d08a0cf
commit 465fe8658d
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 138 additions and 69 deletions

View File

@ -210,25 +210,31 @@ func (a *agent) collectMetadata(ctx context.Context, md codersdk.WorkspaceAgentM
var out bytes.Buffer
result := &codersdk.WorkspaceAgentMetadataResult{
// CollectedAt is set here for testing purposes and overrode by
// the server to the time the server received the result to protect
// against clock skew.
// coderd to the time of server receipt to solve clock skew.
//
// In the future, the server may accept the timestamp from the agent
// if it is certain the clocks are in sync.
// if it can guarantee the clocks are synchronized.
CollectedAt: time.Now(),
}
cmd, err := a.sshServer.CreateCommand(ctx, md.Script, nil)
if err != nil {
result.Error = err.Error()
result.Error = fmt.Sprintf("create cmd: %+v", err)
return result
}
cmd.Stdout = &out
cmd.Stderr = &out
cmd.Stdin = io.LimitReader(nil, 0)
// The error isn't mutually exclusive with useful output.
err = cmd.Run()
// We split up Start and Wait instead of calling Run so that we can return a more precise error.
err = cmd.Start()
if err != nil {
result.Error = fmt.Sprintf("start cmd: %+v", err)
return result
}
// This error isn't mutually exclusive with useful output.
err = cmd.Wait()
const bufLimit = 10 << 10
if out.Len() > bufLimit {
err = errors.Join(
@ -238,8 +244,12 @@ func (a *agent) collectMetadata(ctx context.Context, md codersdk.WorkspaceAgentM
out.Truncate(bufLimit)
}
// Important: if the command times out, we may see a misleading error like
// "exit status 1", so it's important to include the context error.
err = errors.Join(err, ctx.Err())
if err != nil {
result.Error = err.Error()
result.Error = fmt.Sprintf("run cmd: %+v", err)
}
result.Value = out.String()
return result

View File

@ -951,19 +951,17 @@ func TestAgent_StartupScript(t *testing.T) {
func TestAgent_Metadata(t *testing.T) {
t.Parallel()
echoHello := "echo 'hello'"
t.Run("Once", func(t *testing.T) {
t.Parallel()
script := "echo -n hello"
if runtime.GOOS == "windows" {
script = "powershell " + script
}
//nolint:dogsled
_, client, _, _, _ := setupAgent(t, agentsdk.Manifest{
Metadata: []codersdk.WorkspaceAgentMetadataDescription{
{
Key: "greeting",
Interval: 0,
Script: script,
Script: echoHello,
},
},
}, 0)
@ -986,80 +984,113 @@ func TestAgent_Metadata(t *testing.T) {
})
t.Run("Many", func(t *testing.T) {
if runtime.GOOS == "windows" {
// Shell scripting in Windows is a pain, and we have already tested
// that the OS logic works in the simpler "Once" test above.
t.Skip()
}
t.Parallel()
dir := t.TempDir()
const reportInterval = 2
const intervalUnit = 100 * time.Millisecond
var (
greetingPath = filepath.Join(dir, "greeting")
script = "echo hello | tee -a " + greetingPath
)
//nolint:dogsled
_, client, _, _, _ := setupAgent(t, agentsdk.Manifest{
Metadata: []codersdk.WorkspaceAgentMetadataDescription{
{
Key: "greeting",
Interval: reportInterval,
Script: script,
},
{
Key: "bad",
Interval: reportInterval,
Script: "exit 1",
Interval: 1,
Timeout: 100,
Script: echoHello,
},
},
}, 0)
var gotMd map[string]agentsdk.PostMetadataRequest
require.Eventually(t, func() bool {
return len(client.getMetadata()) == 2
gotMd = client.getMetadata()
return len(gotMd) == 1
}, testutil.WaitShort, testutil.IntervalMedium)
for start := time.Now(); time.Since(start) < testutil.WaitMedium; time.Sleep(testutil.IntervalMedium) {
md := client.getMetadata()
if len(md) != 2 {
panic("unexpected number of metadata entries")
}
collectedAt1 := gotMd["greeting"].CollectedAt
if !assert.Equal(t, "hello", strings.TrimSpace(gotMd["greeting"].Value)) {
t.Errorf("got: %+v", gotMd)
}
require.Equal(t, "hello\n", md["greeting"].Value)
require.Equal(t, "exit status 1", md["bad"].Error)
greetingByt, err := os.ReadFile(greetingPath)
require.NoError(t, err)
var (
numGreetings = bytes.Count(greetingByt, []byte("hello"))
idealNumGreetings = time.Since(start) / (reportInterval * intervalUnit)
// We allow a 50% error margin because the report loop may backlog
// in CI and other toasters. In production, there is no hard
// guarantee on timing either, and the frontend gives similar
// wiggle room to the staleness of the value.
upperBound = int(idealNumGreetings) + 1
lowerBound = (int(idealNumGreetings) / 2)
)
if idealNumGreetings < 50 {
// There is an insufficient sample size.
continue
}
t.Logf("numGreetings: %d, idealNumGreetings: %d", numGreetings, idealNumGreetings)
// The report loop may slow down on load, but it should never, ever
// speed up.
if numGreetings > upperBound {
t.Fatalf("too many greetings: %d > %d in %v", numGreetings, upperBound, time.Since(start))
} else if numGreetings < lowerBound {
t.Fatalf("too few greetings: %d < %d", numGreetings, lowerBound)
}
if !assert.Eventually(t, func() bool {
gotMd = client.getMetadata()
return gotMd["greeting"].CollectedAt.After(collectedAt1)
}, testutil.WaitShort, testutil.IntervalMedium) {
t.Fatalf("expected metadata to be collected again")
}
})
}
func TestAgentMetadata_Timing(t *testing.T) {
if runtime.GOOS == "windows" {
// Shell scripting in Windows is a pain, and we have already tested
// that the OS logic works in the simpler tests.
t.Skip()
}
testutil.SkipIfNotTiming(t)
t.Parallel()
dir := t.TempDir()
const reportInterval = 2
const intervalUnit = 100 * time.Millisecond
var (
greetingPath = filepath.Join(dir, "greeting")
script = "echo hello | tee -a " + greetingPath
)
//nolint:dogsled
_, client, _, _, _ := setupAgent(t, agentsdk.Manifest{
Metadata: []codersdk.WorkspaceAgentMetadataDescription{
{
Key: "greeting",
Interval: reportInterval,
Script: script,
},
{
Key: "bad",
Interval: reportInterval,
Script: "exit 1",
},
},
}, 0)
require.Eventually(t, func() bool {
return len(client.getMetadata()) == 2
}, testutil.WaitShort, testutil.IntervalMedium)
for start := time.Now(); time.Since(start) < testutil.WaitMedium; time.Sleep(testutil.IntervalMedium) {
md := client.getMetadata()
require.Len(t, md, 2, "got: %+v", md)
require.Equal(t, "hello\n", md["greeting"].Value)
require.Equal(t, "run cmd: exit status 1", md["bad"].Error)
greetingByt, err := os.ReadFile(greetingPath)
require.NoError(t, err)
var (
numGreetings = bytes.Count(greetingByt, []byte("hello"))
idealNumGreetings = time.Since(start) / (reportInterval * intervalUnit)
// We allow a 50% error margin because the report loop may backlog
// in CI and other toasters. In production, there is no hard
// guarantee on timing either, and the frontend gives similar
// wiggle room to the staleness of the value.
upperBound = int(idealNumGreetings) + 1
lowerBound = (int(idealNumGreetings) / 2)
)
if idealNumGreetings < 50 {
// There is an insufficient sample size.
continue
}
t.Logf("numGreetings: %d, idealNumGreetings: %d", numGreetings, idealNumGreetings)
// The report loop may slow down on load, but it should never, ever
// speed up.
if numGreetings > upperBound {
t.Fatalf("too many greetings: %d > %d in %v", numGreetings, upperBound, time.Since(start))
} else if numGreetings < lowerBound {
t.Fatalf("too few greetings: %d < %d", numGreetings, lowerBound)
}
}
}
func TestAgent_Lifecycle(t *testing.T) {
t.Parallel()

View File

@ -0,0 +1,8 @@
//go:build timing
package testutil
var _ = func() any {
timing = true
return nil
}()

20
testutil/timing.go Normal file
View File

@ -0,0 +1,20 @@
package testutil
import (
"testing"
)
// We can't run timing-sensitive tests in CI because of the
// great variance in runner performance. Instead of not testing timing at all,
// we relegate it to humans manually running certain tests with the "-timing"
// flag from time to time.
//
// Eventually, we should run all timing tests in a self-hosted runner.
var timing bool
func SkipIfNotTiming(t *testing.T) {
if !timing {
t.Skip("skipping timing-sensitive test")
}
}