mirror of https://github.com/coder/coder.git
chore: add prometheus monitoring of workspace traffic generation (#7583)
- Exposes reads/writes from scaletest traffic generation (default: 0.0.0.0:21112) - Adds self-hosted prometheus with remote_write to loadtest terraform - Adds convenience script to run a traffic generation test
This commit is contained in:
parent
0fd2ea4044
commit
795050bba3
|
@ -59,3 +59,4 @@ site/stats/
|
||||||
./scaletest/terraform/.terraform
|
./scaletest/terraform/.terraform
|
||||||
./scaletest/terraform/.terraform.lock.hcl
|
./scaletest/terraform/.terraform.lock.hcl
|
||||||
terraform.tfstate.*
|
terraform.tfstate.*
|
||||||
|
**/*.tfvars
|
||||||
|
|
|
@ -62,6 +62,7 @@ site/stats/
|
||||||
./scaletest/terraform/.terraform
|
./scaletest/terraform/.terraform
|
||||||
./scaletest/terraform/.terraform.lock.hcl
|
./scaletest/terraform/.terraform.lock.hcl
|
||||||
terraform.tfstate.*
|
terraform.tfstate.*
|
||||||
|
**/*.tfvars
|
||||||
# .prettierignore.include:
|
# .prettierignore.include:
|
||||||
# Helm templates contain variables that are invalid YAML and can't be formatted
|
# Helm templates contain variables that are invalid YAML and can't be formatted
|
||||||
# by Prettier.
|
# by Prettier.
|
||||||
|
|
|
@ -14,9 +14,14 @@ import (
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
"github.com/google/uuid"
|
"github.com/google/uuid"
|
||||||
|
"github.com/prometheus/client_golang/prometheus"
|
||||||
|
"github.com/prometheus/client_golang/prometheus/promhttp"
|
||||||
"go.opentelemetry.io/otel/trace"
|
"go.opentelemetry.io/otel/trace"
|
||||||
"golang.org/x/xerrors"
|
"golang.org/x/xerrors"
|
||||||
|
|
||||||
|
"cdr.dev/slog"
|
||||||
|
"cdr.dev/slog/sloggers/sloghuman"
|
||||||
|
|
||||||
"github.com/coder/coder/cli/clibase"
|
"github.com/coder/coder/cli/clibase"
|
||||||
"github.com/coder/coder/cli/cliui"
|
"github.com/coder/coder/cli/cliui"
|
||||||
"github.com/coder/coder/coderd/httpapi"
|
"github.com/coder/coder/coderd/httpapi"
|
||||||
|
@ -896,8 +901,11 @@ func (r *RootCmd) scaletestCreateWorkspaces() *clibase.Cmd {
|
||||||
|
|
||||||
func (r *RootCmd) scaletestWorkspaceTraffic() *clibase.Cmd {
|
func (r *RootCmd) scaletestWorkspaceTraffic() *clibase.Cmd {
|
||||||
var (
|
var (
|
||||||
tickInterval time.Duration
|
tickInterval time.Duration
|
||||||
bytesPerTick int64
|
bytesPerTick int64
|
||||||
|
scaletestPrometheusAddress string
|
||||||
|
scaletestPrometheusWait time.Duration
|
||||||
|
|
||||||
client = &codersdk.Client{}
|
client = &codersdk.Client{}
|
||||||
tracingFlags = &scaletestTracingFlags{}
|
tracingFlags = &scaletestTracingFlags{}
|
||||||
strategy = &scaletestStrategyFlags{}
|
strategy = &scaletestStrategyFlags{}
|
||||||
|
@ -913,6 +921,12 @@ func (r *RootCmd) scaletestWorkspaceTraffic() *clibase.Cmd {
|
||||||
),
|
),
|
||||||
Handler: func(inv *clibase.Invocation) error {
|
Handler: func(inv *clibase.Invocation) error {
|
||||||
ctx := inv.Context()
|
ctx := inv.Context()
|
||||||
|
reg := prometheus.NewRegistry()
|
||||||
|
metrics := workspacetraffic.NewMetrics(reg, "username", "workspace_name", "agent_name")
|
||||||
|
|
||||||
|
logger := slog.Make(sloghuman.Sink(io.Discard))
|
||||||
|
prometheusSrvClose := ServeHandler(ctx, logger, promhttp.HandlerFor(reg, promhttp.HandlerOpts{}), scaletestPrometheusAddress, "prometheus")
|
||||||
|
defer prometheusSrvClose()
|
||||||
|
|
||||||
// Bypass rate limiting
|
// Bypass rate limiting
|
||||||
client.HTTPClient = &http.Client{
|
client.HTTPClient = &http.Client{
|
||||||
|
@ -943,6 +957,9 @@ func (r *RootCmd) scaletestWorkspaceTraffic() *clibase.Cmd {
|
||||||
_, _ = fmt.Fprintln(inv.Stderr, "\nUploading traces...")
|
_, _ = fmt.Fprintln(inv.Stderr, "\nUploading traces...")
|
||||||
if err := closeTracing(ctx); err != nil {
|
if err := closeTracing(ctx); err != nil {
|
||||||
_, _ = fmt.Fprintf(inv.Stderr, "\nError uploading traces: %+v\n", err)
|
_, _ = fmt.Fprintf(inv.Stderr, "\nError uploading traces: %+v\n", err)
|
||||||
|
// Wait for prometheus metrics to be scraped
|
||||||
|
_, _ = fmt.Fprintf(inv.Stderr, "Waiting %s for prometheus metrics to be scraped\n", scaletestPrometheusWait)
|
||||||
|
<-time.After(scaletestPrometheusWait)
|
||||||
}
|
}
|
||||||
}()
|
}()
|
||||||
tracer := tracerProvider.Tracer(scaletestTracerName)
|
tracer := tracerProvider.Tracer(scaletestTracerName)
|
||||||
|
@ -955,9 +972,10 @@ func (r *RootCmd) scaletestWorkspaceTraffic() *clibase.Cmd {
|
||||||
th := harness.NewTestHarness(strategy.toStrategy(), cleanupStrategy.toStrategy())
|
th := harness.NewTestHarness(strategy.toStrategy(), cleanupStrategy.toStrategy())
|
||||||
for idx, ws := range workspaces {
|
for idx, ws := range workspaces {
|
||||||
var (
|
var (
|
||||||
agentID uuid.UUID
|
agentID uuid.UUID
|
||||||
name = "workspace-traffic"
|
agentName string
|
||||||
id = strconv.Itoa(idx)
|
name = "workspace-traffic"
|
||||||
|
id = strconv.Itoa(idx)
|
||||||
)
|
)
|
||||||
|
|
||||||
for _, res := range ws.LatestBuild.Resources {
|
for _, res := range ws.LatestBuild.Resources {
|
||||||
|
@ -965,6 +983,7 @@ func (r *RootCmd) scaletestWorkspaceTraffic() *clibase.Cmd {
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
agentID = res.Agents[0].ID
|
agentID = res.Agents[0].ID
|
||||||
|
agentName = res.Agents[0].Name
|
||||||
}
|
}
|
||||||
|
|
||||||
if agentID == uuid.Nil {
|
if agentID == uuid.Nil {
|
||||||
|
@ -974,16 +993,20 @@ func (r *RootCmd) scaletestWorkspaceTraffic() *clibase.Cmd {
|
||||||
|
|
||||||
// Setup our workspace agent connection.
|
// Setup our workspace agent connection.
|
||||||
config := workspacetraffic.Config{
|
config := workspacetraffic.Config{
|
||||||
AgentID: agentID,
|
AgentID: agentID,
|
||||||
BytesPerTick: bytesPerTick,
|
AgentName: agentName,
|
||||||
Duration: strategy.timeout,
|
BytesPerTick: bytesPerTick,
|
||||||
TickInterval: tickInterval,
|
Duration: strategy.timeout,
|
||||||
|
TickInterval: tickInterval,
|
||||||
|
WorkspaceName: ws.Name,
|
||||||
|
WorkspaceOwner: ws.OwnerName,
|
||||||
|
Registry: reg,
|
||||||
}
|
}
|
||||||
|
|
||||||
if err := config.Validate(); err != nil {
|
if err := config.Validate(); err != nil {
|
||||||
return xerrors.Errorf("validate config: %w", err)
|
return xerrors.Errorf("validate config: %w", err)
|
||||||
}
|
}
|
||||||
var runner harness.Runnable = workspacetraffic.NewRunner(client, config)
|
var runner harness.Runnable = workspacetraffic.NewRunner(client, config, metrics)
|
||||||
if tracingEnabled {
|
if tracingEnabled {
|
||||||
runner = &runnableTraceWrapper{
|
runner = &runnableTraceWrapper{
|
||||||
tracer: tracer,
|
tracer: tracer,
|
||||||
|
@ -1034,6 +1057,20 @@ func (r *RootCmd) scaletestWorkspaceTraffic() *clibase.Cmd {
|
||||||
Description: "How often to send traffic.",
|
Description: "How often to send traffic.",
|
||||||
Value: clibase.DurationOf(&tickInterval),
|
Value: clibase.DurationOf(&tickInterval),
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
Flag: "scaletest-prometheus-address",
|
||||||
|
Env: "CODER_SCALETEST_PROMETHEUS_ADDRESS",
|
||||||
|
Default: "0.0.0.0:21112",
|
||||||
|
Description: "Address on which to expose scaletest Prometheus metrics.",
|
||||||
|
Value: clibase.StringOf(&scaletestPrometheusAddress),
|
||||||
|
},
|
||||||
|
{
|
||||||
|
Flag: "scaletest-prometheus-wait",
|
||||||
|
Env: "CODER_SCALETEST_PROMETHEUS_WAIT",
|
||||||
|
Default: "5s",
|
||||||
|
Description: "How long to wait before exiting in order to allow Prometheus metrics to be scraped.",
|
||||||
|
Value: clibase.DurationOf(&scaletestPrometheusWait),
|
||||||
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
tracingFlags.attach(&cmd.Options)
|
tracingFlags.attach(&cmd.Options)
|
||||||
|
|
|
@ -8,17 +8,12 @@ import (
|
||||||
"path/filepath"
|
"path/filepath"
|
||||||
"testing"
|
"testing"
|
||||||
|
|
||||||
"github.com/google/uuid"
|
|
||||||
"github.com/stretchr/testify/assert"
|
"github.com/stretchr/testify/assert"
|
||||||
"github.com/stretchr/testify/require"
|
"github.com/stretchr/testify/require"
|
||||||
|
|
||||||
"github.com/coder/coder/agent"
|
|
||||||
"github.com/coder/coder/cli/clitest"
|
"github.com/coder/coder/cli/clitest"
|
||||||
"github.com/coder/coder/coderd/coderdtest"
|
"github.com/coder/coder/coderd/coderdtest"
|
||||||
"github.com/coder/coder/codersdk"
|
"github.com/coder/coder/codersdk"
|
||||||
"github.com/coder/coder/codersdk/agentsdk"
|
|
||||||
"github.com/coder/coder/provisioner/echo"
|
|
||||||
"github.com/coder/coder/provisionersdk/proto"
|
|
||||||
"github.com/coder/coder/pty/ptytest"
|
"github.com/coder/coder/pty/ptytest"
|
||||||
"github.com/coder/coder/scaletest/harness"
|
"github.com/coder/coder/scaletest/harness"
|
||||||
"github.com/coder/coder/testutil"
|
"github.com/coder/coder/testutil"
|
||||||
|
@ -205,70 +200,28 @@ param3: 1
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
// This test pretends to stand up a workspace and run a no-op traffic generation test.
|
// This test just validates that the CLI command accepts its known arguments.
|
||||||
// It's not a real test, but it's useful for debugging.
|
// A more comprehensive test is performed in workspacetraffic/run_test.go
|
||||||
// We do not perform any cleanup.
|
|
||||||
func TestScaleTestWorkspaceTraffic(t *testing.T) {
|
func TestScaleTestWorkspaceTraffic(t *testing.T) {
|
||||||
t.Parallel()
|
t.Parallel()
|
||||||
|
|
||||||
ctx, cancelFunc := context.WithTimeout(context.Background(), testutil.WaitMedium)
|
ctx, cancelFunc := context.WithTimeout(context.Background(), testutil.WaitMedium)
|
||||||
defer cancelFunc()
|
defer cancelFunc()
|
||||||
|
|
||||||
client := coderdtest.New(t, &coderdtest.Options{IncludeProvisionerDaemon: true})
|
client := coderdtest.New(t, nil)
|
||||||
user := coderdtest.CreateFirstUser(t, client)
|
_ = coderdtest.CreateFirstUser(t, client)
|
||||||
|
|
||||||
authToken := uuid.NewString()
|
|
||||||
version := coderdtest.CreateTemplateVersion(t, client, user.OrganizationID, &echo.Responses{
|
|
||||||
Parse: echo.ParseComplete,
|
|
||||||
ProvisionPlan: echo.ProvisionComplete,
|
|
||||||
ProvisionApply: []*proto.Provision_Response{{
|
|
||||||
Type: &proto.Provision_Response_Complete{
|
|
||||||
Complete: &proto.Provision_Complete{
|
|
||||||
Resources: []*proto.Resource{{
|
|
||||||
Name: "example",
|
|
||||||
Type: "aws_instance",
|
|
||||||
Agents: []*proto.Agent{{
|
|
||||||
Id: uuid.NewString(),
|
|
||||||
Name: "agent",
|
|
||||||
Auth: &proto.Agent_Token{
|
|
||||||
Token: authToken,
|
|
||||||
},
|
|
||||||
Apps: []*proto.App{},
|
|
||||||
}},
|
|
||||||
}},
|
|
||||||
},
|
|
||||||
},
|
|
||||||
}},
|
|
||||||
})
|
|
||||||
template := coderdtest.CreateTemplate(t, client, user.OrganizationID, version.ID)
|
|
||||||
coderdtest.AwaitTemplateVersionJob(t, client, version.ID)
|
|
||||||
|
|
||||||
ws := coderdtest.CreateWorkspace(t, client, user.OrganizationID, template.ID, func(cwr *codersdk.CreateWorkspaceRequest) {
|
|
||||||
cwr.Name = "scaletest-test"
|
|
||||||
})
|
|
||||||
coderdtest.AwaitWorkspaceBuildJob(t, client, ws.LatestBuild.ID)
|
|
||||||
|
|
||||||
agentClient := agentsdk.New(client.URL)
|
|
||||||
agentClient.SetSessionToken(authToken)
|
|
||||||
agentCloser := agent.New(agent.Options{
|
|
||||||
Client: agentClient,
|
|
||||||
})
|
|
||||||
t.Cleanup(func() {
|
|
||||||
_ = agentCloser.Close()
|
|
||||||
})
|
|
||||||
|
|
||||||
coderdtest.AwaitWorkspaceAgents(t, client, ws.ID)
|
|
||||||
|
|
||||||
inv, root := clitest.New(t, "scaletest", "workspace-traffic",
|
inv, root := clitest.New(t, "scaletest", "workspace-traffic",
|
||||||
"--timeout", "1s",
|
"--timeout", "1s",
|
||||||
"--bytes-per-tick", "1024",
|
"--bytes-per-tick", "1024",
|
||||||
"--tick-interval", "100ms",
|
"--tick-interval", "100ms",
|
||||||
|
"--scaletest-prometheus-address", "127.0.0.1:0",
|
||||||
|
"--scaletest-prometheus-wait", "0s",
|
||||||
)
|
)
|
||||||
clitest.SetupConfig(t, client, root)
|
clitest.SetupConfig(t, client, root)
|
||||||
var stdout, stderr bytes.Buffer
|
var stdout, stderr bytes.Buffer
|
||||||
inv.Stdout = &stdout
|
inv.Stdout = &stdout
|
||||||
inv.Stderr = &stderr
|
inv.Stderr = &stderr
|
||||||
err := inv.WithContext(ctx).Run()
|
err := inv.WithContext(ctx).Run()
|
||||||
require.NoError(t, err)
|
require.ErrorContains(t, err, "no scaletest workspaces exist")
|
||||||
require.Contains(t, stdout.String(), "Pass: 1")
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -27,6 +27,13 @@ Generate traffic to scaletest workspaces through coderd
|
||||||
Output format specs in the format "<format>[:<path>]". Not specifying
|
Output format specs in the format "<format>[:<path>]". Not specifying
|
||||||
a path will default to stdout. Available formats: text, json.
|
a path will default to stdout. Available formats: text, json.
|
||||||
|
|
||||||
|
--scaletest-prometheus-address string, $CODER_SCALETEST_PROMETHEUS_ADDRESS (default: 0.0.0.0:21112)
|
||||||
|
Address on which to expose scaletest Prometheus metrics.
|
||||||
|
|
||||||
|
--scaletest-prometheus-wait duration, $CODER_SCALETEST_PROMETHEUS_WAIT (default: 5s)
|
||||||
|
How long to wait before exiting in order to allow Prometheus metrics
|
||||||
|
to be scraped.
|
||||||
|
|
||||||
--tick-interval duration, $CODER_SCALETEST_WORKSPACE_TRAFFIC_TICK_INTERVAL (default: 100ms)
|
--tick-interval duration, $CODER_SCALETEST_WORKSPACE_TRAFFIC_TICK_INTERVAL (default: 100ms)
|
||||||
How often to send traffic.
|
How often to send traffic.
|
||||||
|
|
||||||
|
|
|
@ -82,6 +82,26 @@ Timeout per job. Jobs may take longer to complete under higher concurrency limit
|
||||||
|
|
||||||
Output format specs in the format "<format>[:<path>]". Not specifying a path will default to stdout. Available formats: text, json.
|
Output format specs in the format "<format>[:<path>]". Not specifying a path will default to stdout. Available formats: text, json.
|
||||||
|
|
||||||
|
### --scaletest-prometheus-address
|
||||||
|
|
||||||
|
| | |
|
||||||
|
| ----------- | ------------------------------------------------ |
|
||||||
|
| Type | <code>string</code> |
|
||||||
|
| Environment | <code>$CODER_SCALETEST_PROMETHEUS_ADDRESS</code> |
|
||||||
|
| Default | <code>0.0.0.0:21112</code> |
|
||||||
|
|
||||||
|
Address on which to expose scaletest Prometheus metrics.
|
||||||
|
|
||||||
|
### --scaletest-prometheus-wait
|
||||||
|
|
||||||
|
| | |
|
||||||
|
| ----------- | --------------------------------------------- |
|
||||||
|
| Type | <code>duration</code> |
|
||||||
|
| Environment | <code>$CODER_SCALETEST_PROMETHEUS_WAIT</code> |
|
||||||
|
| Default | <code>5s</code> |
|
||||||
|
|
||||||
|
How long to wait before exiting in order to allow Prometheus metrics to be scraped.
|
||||||
|
|
||||||
### --tick-interval
|
### --tick-interval
|
||||||
|
|
||||||
| | |
|
| | |
|
||||||
|
|
|
@ -32,9 +32,12 @@ project_id = "some_google_project_id"
|
||||||
1. Run `coder_init.sh <coder_url>` to setup an initial user and a pre-configured Kubernetes
|
1. Run `coder_init.sh <coder_url>` to setup an initial user and a pre-configured Kubernetes
|
||||||
template. It will also download the Coder CLI from the Coder instance locally.
|
template. It will also download the Coder CLI from the Coder instance locally.
|
||||||
|
|
||||||
1. Do whatever you need to do with the Coder instance.
|
1. Do whatever you need to do with the Coder instance:
|
||||||
|
|
||||||
> To run Coder commands against the instance, you can use `coder_shim.sh <command>`.
|
> Note: To run Coder commands against the instance, you can use `coder_shim.sh <command>`.
|
||||||
> You don't need to run `coder login` yourself.
|
> You don't need to run `coder login` yourself.
|
||||||
|
|
||||||
|
- To create workspaces, run `./coder_shim.sh scaletest create-workspaces --template="kubernetes" --count=N`
|
||||||
|
- To generate workspace traffic, run `./coder_trafficgen.sh <name of loadtest from your Terraform vars>`. This will keep running until you delete the pod `coder-scaletest-workspace-traffic`.
|
||||||
|
|
||||||
1. When you are finished, you can run `terraform destroy -var-file=override.tfvars`.
|
1. When you are finished, you can run `terraform destroy -var-file=override.tfvars`.
|
||||||
|
|
|
@ -128,34 +128,6 @@ EOF
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
|
|
||||||
resource "local_file" "coder-monitoring-manifest" {
|
|
||||||
filename = "${path.module}/.coderv2/coder-monitoring.yaml"
|
|
||||||
content = <<EOF
|
|
||||||
apiVersion: monitoring.googleapis.com/v1
|
|
||||||
kind: PodMonitoring
|
|
||||||
metadata:
|
|
||||||
namespace: ${kubernetes_namespace.coder_namespace.metadata.0.name}
|
|
||||||
name: coder-monitoring
|
|
||||||
spec:
|
|
||||||
selector:
|
|
||||||
matchLabels:
|
|
||||||
app.kubernetes.io/name: coder
|
|
||||||
endpoints:
|
|
||||||
- port: prometheus-http
|
|
||||||
interval: 30s
|
|
||||||
EOF
|
|
||||||
}
|
|
||||||
|
|
||||||
resource "null_resource" "coder-monitoring-manifest_apply" {
|
|
||||||
provisioner "local-exec" {
|
|
||||||
working_dir = "${abspath(path.module)}/.coderv2"
|
|
||||||
command = <<EOF
|
|
||||||
KUBECONFIG=${var.name}-cluster.kubeconfig gcloud container clusters get-credentials ${var.name}-cluster --project=${var.project_id} --zone=${var.zone} && \
|
|
||||||
KUBECONFIG=${var.name}-cluster.kubeconfig kubectl apply -f ${abspath(local_file.coder-monitoring-manifest.filename)}
|
|
||||||
EOF
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
resource "local_file" "kubernetes_template" {
|
resource "local_file" "kubernetes_template" {
|
||||||
filename = "${path.module}/.coderv2/templates/kubernetes/main.tf"
|
filename = "${path.module}/.coderv2/templates/kubernetes/main.tf"
|
||||||
content = <<EOF
|
content = <<EOF
|
||||||
|
|
|
@ -0,0 +1,73 @@
|
||||||
|
#!/usr/bin/env bash
|
||||||
|
|
||||||
|
set -euo pipefail
|
||||||
|
|
||||||
|
if [[ $# -lt 1 ]]; then
|
||||||
|
echo "Usage: $0 <loadtest name>"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Allow toggling verbose output
|
||||||
|
[[ -n ${VERBOSE:-} ]] && set -x
|
||||||
|
|
||||||
|
LOADTEST_NAME="$1"
|
||||||
|
CODER_TOKEN=$(./coder_shim.sh tokens create)
|
||||||
|
CODER_URL="http://coder.coder-${LOADTEST_NAME}.svc.cluster.local"
|
||||||
|
export KUBECONFIG="${PWD}/.coderv2/${LOADTEST_NAME}-cluster.kubeconfig"
|
||||||
|
|
||||||
|
cat <<EOF | kubectl apply -f -
|
||||||
|
apiVersion: v1
|
||||||
|
kind: Pod
|
||||||
|
metadata:
|
||||||
|
name: coder-scaletest-workspace-traffic
|
||||||
|
namespace: coder-${LOADTEST_NAME}
|
||||||
|
labels:
|
||||||
|
app.kubernetes.io/name: coder-scaletest-workspace-traffic
|
||||||
|
spec:
|
||||||
|
affinity:
|
||||||
|
nodeAffinity:
|
||||||
|
requiredDuringSchedulingIgnoredDuringExecution:
|
||||||
|
nodeSelectorTerms:
|
||||||
|
- matchExpressions:
|
||||||
|
- key: cloud.google.com/gke-nodepool
|
||||||
|
operator: In
|
||||||
|
values:
|
||||||
|
- ${LOADTEST_NAME}-misc
|
||||||
|
containers:
|
||||||
|
- command:
|
||||||
|
- sh
|
||||||
|
- -c
|
||||||
|
- "curl -fsSL $CODER_URL/bin/coder-linux-amd64 -o /tmp/coder && chmod +x /tmp/coder && /tmp/coder --url=$CODER_URL --token=$CODER_TOKEN scaletest workspace-traffic"
|
||||||
|
env:
|
||||||
|
- name: CODER_URL
|
||||||
|
value: $CODER_URL
|
||||||
|
- name: CODER_TOKEN
|
||||||
|
value: $CODER_TOKEN
|
||||||
|
- name: CODER_SCALETEST_PROMETHEUS_ADDRESS
|
||||||
|
value: "0.0.0.0:21112"
|
||||||
|
- name: CODER_SCALETEST_JOB_TIMEOUT
|
||||||
|
value: "30m"
|
||||||
|
- name: CODER_SCALETEST_CONCURRENCY
|
||||||
|
value: "0"
|
||||||
|
- name: CODER_SCALETEST_WORKSPACE_TRAFFIC_BYTES_PER_TICK
|
||||||
|
value: "2048"
|
||||||
|
ports:
|
||||||
|
- containerPort: 21112
|
||||||
|
name: prometheus-http
|
||||||
|
protocol: TCP
|
||||||
|
name: cli
|
||||||
|
image: docker.io/codercom/enterprise-minimal:ubuntu
|
||||||
|
---
|
||||||
|
apiVersion: monitoring.coreos.com/v1
|
||||||
|
kind: PodMonitor
|
||||||
|
metadata:
|
||||||
|
namespace: coder-${LOADTEST_NAME}
|
||||||
|
name: coder-workspacetraffic-monitoring
|
||||||
|
spec:
|
||||||
|
selector:
|
||||||
|
matchLabels:
|
||||||
|
app.kubernetes.io/name: coder-scaletest-workspace-traffic
|
||||||
|
podMetricsEndpoints:
|
||||||
|
- port: prometheus-http
|
||||||
|
interval: 15s
|
||||||
|
EOF
|
|
@ -26,7 +26,7 @@ resource "google_container_cluster" "primary" {
|
||||||
monitoring_config {
|
monitoring_config {
|
||||||
enable_components = ["SYSTEM_COMPONENTS"]
|
enable_components = ["SYSTEM_COMPONENTS"]
|
||||||
managed_prometheus {
|
managed_prometheus {
|
||||||
enabled = true
|
enabled = false
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
workload_identity_config {
|
workload_identity_config {
|
||||||
|
|
|
@ -0,0 +1,132 @@
|
||||||
|
locals {
|
||||||
|
prometheus_helm_repo = "https://charts.bitnami.com/bitnami"
|
||||||
|
prometheus_helm_chart = "kube-prometheus"
|
||||||
|
prometheus_helm_version = null // just use latest
|
||||||
|
prometheus_release_name = "prometheus"
|
||||||
|
prometheus_namespace = "prometheus"
|
||||||
|
prometheus_remote_write_enabled = var.prometheus_remote_write_password != ""
|
||||||
|
}
|
||||||
|
|
||||||
|
# Create a namespace to hold our Prometheus deployment.
|
||||||
|
resource "kubernetes_namespace" "prometheus_namespace" {
|
||||||
|
metadata {
|
||||||
|
name = local.prometheus_namespace
|
||||||
|
}
|
||||||
|
depends_on = [
|
||||||
|
google_container_node_pool.misc
|
||||||
|
]
|
||||||
|
}
|
||||||
|
|
||||||
|
# Create a secret to store the remote write key
|
||||||
|
resource "kubernetes_secret" "prometheus-credentials" {
|
||||||
|
count = local.prometheus_remote_write_enabled ? 1 : 0
|
||||||
|
type = "kubernetes.io/basic-auth"
|
||||||
|
metadata {
|
||||||
|
name = "prometheus-credentials"
|
||||||
|
namespace = kubernetes_namespace.prometheus_namespace.metadata.0.name
|
||||||
|
}
|
||||||
|
|
||||||
|
data = {
|
||||||
|
username = var.prometheus_remote_write_user
|
||||||
|
password = var.prometheus_remote_write_password
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# Install Prometheus using the Bitnami Prometheus helm chart.
|
||||||
|
resource "helm_release" "prometheus-chart" {
|
||||||
|
repository = local.prometheus_helm_repo
|
||||||
|
chart = local.prometheus_helm_chart
|
||||||
|
name = local.prometheus_release_name
|
||||||
|
version = local.prometheus_helm_version
|
||||||
|
namespace = kubernetes_namespace.prometheus_namespace.metadata.0.name
|
||||||
|
values = [<<EOF
|
||||||
|
alertmanager:
|
||||||
|
enabled: false
|
||||||
|
blackboxExporter:
|
||||||
|
affinity:
|
||||||
|
nodeAffinity:
|
||||||
|
requiredDuringSchedulingIgnoredDuringExecution:
|
||||||
|
nodeSelectorTerms:
|
||||||
|
- matchExpressions:
|
||||||
|
- key: "cloud.google.com/gke-nodepool"
|
||||||
|
operator: "In"
|
||||||
|
values: ["${google_container_node_pool.misc.name}"]
|
||||||
|
operator:
|
||||||
|
affinity:
|
||||||
|
nodeAffinity:
|
||||||
|
requiredDuringSchedulingIgnoredDuringExecution:
|
||||||
|
nodeSelectorTerms:
|
||||||
|
- matchExpressions:
|
||||||
|
- key: "cloud.google.com/gke-nodepool"
|
||||||
|
operator: "In"
|
||||||
|
values: ["${google_container_node_pool.misc.name}"]
|
||||||
|
prometheus:
|
||||||
|
affinity:
|
||||||
|
nodeAffinity:
|
||||||
|
requiredDuringSchedulingIgnoredDuringExecution:
|
||||||
|
nodeSelectorTerms:
|
||||||
|
- matchExpressions:
|
||||||
|
- key: "cloud.google.com/gke-nodepool"
|
||||||
|
operator: "In"
|
||||||
|
values: ["${google_container_node_pool.misc.name}"]
|
||||||
|
externalLabels:
|
||||||
|
cluster: "${google_container_cluster.primary.name}"
|
||||||
|
persistence:
|
||||||
|
enabled: true
|
||||||
|
storageClass: standard
|
||||||
|
%{if local.prometheus_remote_write_enabled~}
|
||||||
|
remoteWrite:
|
||||||
|
- url: "${var.prometheus_remote_write_url}"
|
||||||
|
basicAuth:
|
||||||
|
username:
|
||||||
|
name: "${kubernetes_secret.prometheus-credentials[0].metadata[0].name}"
|
||||||
|
key: username
|
||||||
|
password:
|
||||||
|
name: "${kubernetes_secret.prometheus-credentials[0].metadata[0].name}"
|
||||||
|
key: password
|
||||||
|
tlsConfig:
|
||||||
|
insecureSkipVerify: ${var.prometheus_remote_write_insecure_skip_verify}
|
||||||
|
writeRelabelConfigs:
|
||||||
|
- sourceLabels: [__name__]
|
||||||
|
regex: "${var.prometheus_remote_write_metrics_regex}"
|
||||||
|
action: keep
|
||||||
|
metadataConfig:
|
||||||
|
sendInterval: "${var.prometheus_remote_write_send_interval}"
|
||||||
|
%{endif~}
|
||||||
|
EOF
|
||||||
|
]
|
||||||
|
}
|
||||||
|
|
||||||
|
# NOTE: this is created as a local file before being applied
|
||||||
|
# as the kubernetes_manifest resource needs to be run separately
|
||||||
|
# after creating a cluster, and we want this to be brought up
|
||||||
|
# with a single command.
|
||||||
|
resource "local_file" "coder-monitoring-manifest" {
|
||||||
|
filename = "${path.module}/.coderv2/coder-monitoring.yaml"
|
||||||
|
depends_on = [helm_release.prometheus-chart]
|
||||||
|
content = <<EOF
|
||||||
|
apiVersion: monitoring.coreos.com/v1
|
||||||
|
kind: PodMonitor
|
||||||
|
metadata:
|
||||||
|
namespace: ${kubernetes_namespace.coder_namespace.metadata.0.name}
|
||||||
|
name: coder-monitoring
|
||||||
|
spec:
|
||||||
|
selector:
|
||||||
|
matchLabels:
|
||||||
|
app.kubernetes.io/name: coder
|
||||||
|
podMetricsEndpoints:
|
||||||
|
- port: prometheus-http
|
||||||
|
interval: 30s
|
||||||
|
EOF
|
||||||
|
}
|
||||||
|
|
||||||
|
resource "null_resource" "coder-monitoring-manifest_apply" {
|
||||||
|
provisioner "local-exec" {
|
||||||
|
working_dir = "${abspath(path.module)}/.coderv2"
|
||||||
|
command = <<EOF
|
||||||
|
KUBECONFIG=${var.name}-cluster.kubeconfig gcloud container clusters get-credentials ${google_container_cluster.primary.name} --project=${var.project_id} --zone=${var.zone} && \
|
||||||
|
KUBECONFIG=${var.name}-cluster.kubeconfig kubectl apply -f ${abspath(local_file.coder-monitoring-manifest.filename)}
|
||||||
|
EOF
|
||||||
|
}
|
||||||
|
depends_on = [helm_release.prometheus-chart]
|
||||||
|
}
|
|
@ -127,3 +127,33 @@ variable "workspace_image" {
|
||||||
description = "Image and tag to use for workspaces."
|
description = "Image and tag to use for workspaces."
|
||||||
default = "docker.io/codercom/enterprise-minimal:ubuntu"
|
default = "docker.io/codercom/enterprise-minimal:ubuntu"
|
||||||
}
|
}
|
||||||
|
|
||||||
|
variable "prometheus_remote_write_user" {
|
||||||
|
description = "Username for Prometheus remote write."
|
||||||
|
default = ""
|
||||||
|
}
|
||||||
|
|
||||||
|
variable "prometheus_remote_write_password" {
|
||||||
|
description = "Password for Prometheus remote write."
|
||||||
|
default = ""
|
||||||
|
}
|
||||||
|
|
||||||
|
variable "prometheus_remote_write_url" {
|
||||||
|
description = "URL for Prometheus remote write. Defaults to stats.dev.c8s.io"
|
||||||
|
default = "https://stats.dev.c8s.io:9443/api/v1/write"
|
||||||
|
}
|
||||||
|
|
||||||
|
variable "prometheus_remote_write_insecure_skip_verify" {
|
||||||
|
description = "Skip TLS verification for Prometheus remote write."
|
||||||
|
default = true
|
||||||
|
}
|
||||||
|
|
||||||
|
variable "prometheus_remote_write_metrics_regex" {
|
||||||
|
description = "Allowlist regex of metrics for Prometheus remote write."
|
||||||
|
default = ".*"
|
||||||
|
}
|
||||||
|
|
||||||
|
variable "prometheus_remote_write_send_interval" {
|
||||||
|
description = "Prometheus remote write interval."
|
||||||
|
default = "15s"
|
||||||
|
}
|
||||||
|
|
|
@ -4,6 +4,7 @@ import (
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
"github.com/google/uuid"
|
"github.com/google/uuid"
|
||||||
|
"github.com/prometheus/client_golang/prometheus"
|
||||||
"golang.org/x/xerrors"
|
"golang.org/x/xerrors"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -11,6 +12,15 @@ type Config struct {
|
||||||
// AgentID is the workspace agent ID to which to connect.
|
// AgentID is the workspace agent ID to which to connect.
|
||||||
AgentID uuid.UUID `json:"agent_id"`
|
AgentID uuid.UUID `json:"agent_id"`
|
||||||
|
|
||||||
|
// AgentName is the name of the agent. Used for metrics.
|
||||||
|
AgentName string `json:"agent_name"`
|
||||||
|
|
||||||
|
// WorkspaceName is the name of the workspace. Used for metrics.
|
||||||
|
WorkspaceName string `json:"workspace_name"`
|
||||||
|
|
||||||
|
// WorkspaceOwner is the owner of the workspace. Used for metrics.
|
||||||
|
WorkspaceOwner string `json:"workspace_owner"`
|
||||||
|
|
||||||
// BytesPerTick is the number of bytes to send to the agent per tick.
|
// BytesPerTick is the number of bytes to send to the agent per tick.
|
||||||
BytesPerTick int64 `json:"bytes_per_tick"`
|
BytesPerTick int64 `json:"bytes_per_tick"`
|
||||||
|
|
||||||
|
@ -20,6 +30,9 @@ type Config struct {
|
||||||
// TickInterval specifies the interval between ticks (that is, attempts to
|
// TickInterval specifies the interval between ticks (that is, attempts to
|
||||||
// send data to workspace agents).
|
// send data to workspace agents).
|
||||||
TickInterval time.Duration `json:"tick_interval"`
|
TickInterval time.Duration `json:"tick_interval"`
|
||||||
|
|
||||||
|
// Registry is a prometheus.Registerer for logging metrics
|
||||||
|
Registry prometheus.Registerer
|
||||||
}
|
}
|
||||||
|
|
||||||
func (c Config) Validate() error {
|
func (c Config) Validate() error {
|
||||||
|
|
|
@ -0,0 +1,56 @@
|
||||||
|
package workspacetraffic
|
||||||
|
|
||||||
|
import "github.com/prometheus/client_golang/prometheus"
|
||||||
|
|
||||||
|
type Metrics struct {
|
||||||
|
BytesReadTotal prometheus.CounterVec
|
||||||
|
BytesWrittenTotal prometheus.CounterVec
|
||||||
|
ReadErrorsTotal prometheus.CounterVec
|
||||||
|
WriteErrorsTotal prometheus.CounterVec
|
||||||
|
ReadLatencySeconds prometheus.HistogramVec
|
||||||
|
WriteLatencySeconds prometheus.HistogramVec
|
||||||
|
LabelNames []string
|
||||||
|
}
|
||||||
|
|
||||||
|
func NewMetrics(reg prometheus.Registerer, labelNames ...string) *Metrics {
|
||||||
|
m := &Metrics{
|
||||||
|
BytesReadTotal: *prometheus.NewCounterVec(prometheus.CounterOpts{
|
||||||
|
Namespace: "coderd",
|
||||||
|
Subsystem: "scaletest",
|
||||||
|
Name: "bytes_read_total",
|
||||||
|
}, labelNames),
|
||||||
|
BytesWrittenTotal: *prometheus.NewCounterVec(prometheus.CounterOpts{
|
||||||
|
Namespace: "coderd",
|
||||||
|
Subsystem: "scaletest",
|
||||||
|
Name: "bytes_written_total",
|
||||||
|
}, labelNames),
|
||||||
|
ReadErrorsTotal: *prometheus.NewCounterVec(prometheus.CounterOpts{
|
||||||
|
Namespace: "coderd",
|
||||||
|
Subsystem: "scaletest",
|
||||||
|
Name: "read_errors_total",
|
||||||
|
}, labelNames),
|
||||||
|
WriteErrorsTotal: *prometheus.NewCounterVec(prometheus.CounterOpts{
|
||||||
|
Namespace: "coderd",
|
||||||
|
Subsystem: "scaletest",
|
||||||
|
Name: "write_errors_total",
|
||||||
|
}, labelNames),
|
||||||
|
ReadLatencySeconds: *prometheus.NewHistogramVec(prometheus.HistogramOpts{
|
||||||
|
Namespace: "coderd",
|
||||||
|
Subsystem: "scaletest",
|
||||||
|
Name: "read_latency_seconds",
|
||||||
|
}, labelNames),
|
||||||
|
WriteLatencySeconds: *prometheus.NewHistogramVec(prometheus.HistogramOpts{
|
||||||
|
Namespace: "coderd",
|
||||||
|
Subsystem: "scaletest",
|
||||||
|
Name: "write_latency_seconds",
|
||||||
|
}, labelNames),
|
||||||
|
}
|
||||||
|
|
||||||
|
reg.MustRegister(m.BytesReadTotal)
|
||||||
|
reg.MustRegister(m.BytesWrittenTotal)
|
||||||
|
reg.MustRegister(m.ReadErrorsTotal)
|
||||||
|
reg.MustRegister(m.WriteErrorsTotal)
|
||||||
|
reg.MustRegister(m.ReadLatencySeconds)
|
||||||
|
reg.MustRegister(m.WriteLatencySeconds)
|
||||||
|
return m
|
||||||
|
}
|
|
@ -3,8 +3,8 @@ package workspacetraffic
|
||||||
import (
|
import (
|
||||||
"context"
|
"context"
|
||||||
"encoding/json"
|
"encoding/json"
|
||||||
|
"errors"
|
||||||
"io"
|
"io"
|
||||||
"sync/atomic"
|
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
"github.com/google/uuid"
|
"github.com/google/uuid"
|
||||||
|
@ -19,11 +19,14 @@ import (
|
||||||
"github.com/coder/coder/cryptorand"
|
"github.com/coder/coder/cryptorand"
|
||||||
"github.com/coder/coder/scaletest/harness"
|
"github.com/coder/coder/scaletest/harness"
|
||||||
"github.com/coder/coder/scaletest/loadtestutil"
|
"github.com/coder/coder/scaletest/loadtestutil"
|
||||||
|
|
||||||
|
promtest "github.com/prometheus/client_golang/prometheus/testutil"
|
||||||
)
|
)
|
||||||
|
|
||||||
type Runner struct {
|
type Runner struct {
|
||||||
client *codersdk.Client
|
client *codersdk.Client
|
||||||
cfg Config
|
cfg Config
|
||||||
|
metrics *Metrics
|
||||||
}
|
}
|
||||||
|
|
||||||
var (
|
var (
|
||||||
|
@ -31,10 +34,11 @@ var (
|
||||||
_ harness.Cleanable = &Runner{}
|
_ harness.Cleanable = &Runner{}
|
||||||
)
|
)
|
||||||
|
|
||||||
func NewRunner(client *codersdk.Client, cfg Config) *Runner {
|
func NewRunner(client *codersdk.Client, cfg Config, metrics *Metrics) *Runner {
|
||||||
return &Runner{
|
return &Runner{
|
||||||
client: client,
|
client: client,
|
||||||
cfg: cfg,
|
cfg: cfg,
|
||||||
|
metrics: metrics,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -47,6 +51,16 @@ func (r *Runner) Run(ctx context.Context, _ string, logs io.Writer) error {
|
||||||
r.client.Logger = logger
|
r.client.Logger = logger
|
||||||
r.client.LogBodies = true
|
r.client.LogBodies = true
|
||||||
|
|
||||||
|
// Initialize our metrics eagerly. This is mainly so that we can test for the
|
||||||
|
// presence of a zero-valued metric as opposed to the absence of a metric.
|
||||||
|
lvs := []string{r.cfg.WorkspaceOwner, r.cfg.WorkspaceName, r.cfg.AgentName}
|
||||||
|
r.metrics.BytesReadTotal.WithLabelValues(lvs...).Add(0)
|
||||||
|
r.metrics.BytesWrittenTotal.WithLabelValues(lvs...).Add(0)
|
||||||
|
r.metrics.ReadErrorsTotal.WithLabelValues(lvs...).Add(0)
|
||||||
|
r.metrics.WriteErrorsTotal.WithLabelValues(lvs...).Add(0)
|
||||||
|
r.metrics.ReadLatencySeconds.WithLabelValues(lvs...).Observe(0)
|
||||||
|
r.metrics.WriteLatencySeconds.WithLabelValues(lvs...).Observe(0)
|
||||||
|
|
||||||
var (
|
var (
|
||||||
agentID = r.cfg.AgentID
|
agentID = r.cfg.AgentID
|
||||||
reconnect = uuid.New()
|
reconnect = uuid.New()
|
||||||
|
@ -90,7 +104,7 @@ func (r *Runner) Run(ctx context.Context, _ string, logs io.Writer) error {
|
||||||
}()
|
}()
|
||||||
|
|
||||||
// Wrap the conn in a countReadWriter so we can monitor bytes sent/rcvd.
|
// Wrap the conn in a countReadWriter so we can monitor bytes sent/rcvd.
|
||||||
crw := countReadWriter{ReadWriter: conn}
|
crw := countReadWriter{ReadWriter: conn, metrics: r.metrics, labels: lvs}
|
||||||
|
|
||||||
// Create a ticker for sending data to the PTY.
|
// Create a ticker for sending data to the PTY.
|
||||||
tick := time.NewTicker(tickInterval)
|
tick := time.NewTicker(tickInterval)
|
||||||
|
@ -131,11 +145,12 @@ func (r *Runner) Run(ctx context.Context, _ string, logs io.Writer) error {
|
||||||
}
|
}
|
||||||
|
|
||||||
duration := time.Since(start)
|
duration := time.Since(start)
|
||||||
|
logger.Info(ctx, "Test Results",
|
||||||
logger.Info(ctx, "results",
|
|
||||||
slog.F("duration", duration),
|
slog.F("duration", duration),
|
||||||
slog.F("sent", crw.BytesWritten()),
|
slog.F("bytes_read_total", promtest.ToFloat64(r.metrics.BytesReadTotal)),
|
||||||
slog.F("rcvd", crw.BytesRead()),
|
slog.F("bytes_written_total", promtest.ToFloat64(r.metrics.BytesWrittenTotal)),
|
||||||
|
slog.F("read_errors_total", promtest.ToFloat64(r.metrics.ReadErrorsTotal)),
|
||||||
|
slog.F("write_errors_total", promtest.ToFloat64(r.metrics.WriteErrorsTotal)),
|
||||||
)
|
)
|
||||||
|
|
||||||
return nil
|
return nil
|
||||||
|
@ -184,34 +199,36 @@ func writeRandomData(dst io.Writer, size int64, tick <-chan time.Time) error {
|
||||||
// countReadWriter wraps an io.ReadWriter and counts the number of bytes read and written.
|
// countReadWriter wraps an io.ReadWriter and counts the number of bytes read and written.
|
||||||
type countReadWriter struct {
|
type countReadWriter struct {
|
||||||
io.ReadWriter
|
io.ReadWriter
|
||||||
bytesRead atomic.Int64
|
metrics *Metrics
|
||||||
bytesWritten atomic.Int64
|
labels []string
|
||||||
}
|
}
|
||||||
|
|
||||||
func (w *countReadWriter) Read(p []byte) (int, error) {
|
func (w *countReadWriter) Read(p []byte) (int, error) {
|
||||||
|
start := time.Now()
|
||||||
n, err := w.ReadWriter.Read(p)
|
n, err := w.ReadWriter.Read(p)
|
||||||
if err == nil {
|
if reportableErr(err) {
|
||||||
w.bytesRead.Add(int64(n))
|
w.metrics.ReadErrorsTotal.WithLabelValues(w.labels...).Inc()
|
||||||
|
}
|
||||||
|
w.metrics.ReadLatencySeconds.WithLabelValues(w.labels...).Observe(time.Since(start).Seconds())
|
||||||
|
if n > 0 {
|
||||||
|
w.metrics.BytesReadTotal.WithLabelValues(w.labels...).Add(float64(n))
|
||||||
}
|
}
|
||||||
return n, err
|
return n, err
|
||||||
}
|
}
|
||||||
|
|
||||||
func (w *countReadWriter) Write(p []byte) (int, error) {
|
func (w *countReadWriter) Write(p []byte) (int, error) {
|
||||||
|
start := time.Now()
|
||||||
n, err := w.ReadWriter.Write(p)
|
n, err := w.ReadWriter.Write(p)
|
||||||
if err == nil {
|
if reportableErr(err) {
|
||||||
w.bytesWritten.Add(int64(n))
|
w.metrics.WriteErrorsTotal.WithLabelValues(w.labels...).Inc()
|
||||||
|
}
|
||||||
|
w.metrics.WriteLatencySeconds.WithLabelValues(w.labels...).Observe(time.Since(start).Seconds())
|
||||||
|
if n > 0 {
|
||||||
|
w.metrics.BytesWrittenTotal.WithLabelValues(w.labels...).Add(float64(n))
|
||||||
}
|
}
|
||||||
return n, err
|
return n, err
|
||||||
}
|
}
|
||||||
|
|
||||||
func (w *countReadWriter) BytesRead() int64 {
|
|
||||||
return w.bytesRead.Load()
|
|
||||||
}
|
|
||||||
|
|
||||||
func (w *countReadWriter) BytesWritten() int64 {
|
|
||||||
return w.bytesWritten.Load()
|
|
||||||
}
|
|
||||||
|
|
||||||
func mustRandStr(l int64) string {
|
func mustRandStr(l int64) string {
|
||||||
if l < 1 {
|
if l < 1 {
|
||||||
l = 1
|
l = 1
|
||||||
|
@ -222,3 +239,19 @@ func mustRandStr(l int64) string {
|
||||||
}
|
}
|
||||||
return randStr
|
return randStr
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// some errors we want to report in metrics; others we want to ignore
|
||||||
|
// such as websocket.StatusNormalClosure or context.Canceled
|
||||||
|
func reportableErr(err error) bool {
|
||||||
|
if err == nil {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
if xerrors.Is(err, context.Canceled) {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
var wsErr websocket.CloseError
|
||||||
|
if errors.As(err, &wsErr) {
|
||||||
|
return wsErr.Code != websocket.StatusNormalClosure
|
||||||
|
}
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
|
@ -0,0 +1,177 @@
|
||||||
|
package workspacetraffic_test
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"strings"
|
||||||
|
"testing"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"github.com/coder/coder/agent"
|
||||||
|
"github.com/coder/coder/coderd/coderdtest"
|
||||||
|
"github.com/coder/coder/codersdk"
|
||||||
|
"github.com/coder/coder/codersdk/agentsdk"
|
||||||
|
"github.com/coder/coder/provisioner/echo"
|
||||||
|
"github.com/coder/coder/provisionersdk/proto"
|
||||||
|
"github.com/coder/coder/scaletest/workspacetraffic"
|
||||||
|
"github.com/coder/coder/testutil"
|
||||||
|
|
||||||
|
"github.com/google/uuid"
|
||||||
|
"github.com/prometheus/client_golang/prometheus"
|
||||||
|
dto "github.com/prometheus/client_model/go"
|
||||||
|
"github.com/stretchr/testify/assert"
|
||||||
|
"github.com/stretchr/testify/require"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestRun(t *testing.T) {
|
||||||
|
t.Parallel()
|
||||||
|
|
||||||
|
// We need to stand up an in-memory coderd and run a fake workspace.
|
||||||
|
var (
|
||||||
|
client = coderdtest.New(t, &coderdtest.Options{IncludeProvisionerDaemon: true})
|
||||||
|
firstUser = coderdtest.CreateFirstUser(t, client)
|
||||||
|
authToken = uuid.NewString()
|
||||||
|
agentName = "agent"
|
||||||
|
version = coderdtest.CreateTemplateVersion(t, client, firstUser.OrganizationID, &echo.Responses{
|
||||||
|
Parse: echo.ParseComplete,
|
||||||
|
ProvisionPlan: echo.ProvisionComplete,
|
||||||
|
ProvisionApply: []*proto.Provision_Response{{
|
||||||
|
Type: &proto.Provision_Response_Complete{
|
||||||
|
Complete: &proto.Provision_Complete{
|
||||||
|
Resources: []*proto.Resource{{
|
||||||
|
Name: "example",
|
||||||
|
Type: "aws_instance",
|
||||||
|
Agents: []*proto.Agent{{
|
||||||
|
// Agent ID gets generated no matter what we say ¯\_(ツ)_/¯
|
||||||
|
Name: agentName,
|
||||||
|
Auth: &proto.Agent_Token{
|
||||||
|
Token: authToken,
|
||||||
|
},
|
||||||
|
Apps: []*proto.App{},
|
||||||
|
}},
|
||||||
|
}},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}},
|
||||||
|
})
|
||||||
|
template = coderdtest.CreateTemplate(t, client, firstUser.OrganizationID, version.ID)
|
||||||
|
_ = coderdtest.AwaitTemplateVersionJob(t, client, version.ID)
|
||||||
|
// In order to be picked up as a scaletest workspace, the workspace must be named specifically
|
||||||
|
ws = coderdtest.CreateWorkspace(t, client, firstUser.OrganizationID, template.ID, func(cwr *codersdk.CreateWorkspaceRequest) {
|
||||||
|
cwr.Name = "scaletest-test"
|
||||||
|
})
|
||||||
|
_ = coderdtest.AwaitWorkspaceBuildJob(t, client, ws.LatestBuild.ID)
|
||||||
|
)
|
||||||
|
|
||||||
|
// We also need a running agent to run this test.
|
||||||
|
agentClient := agentsdk.New(client.URL)
|
||||||
|
agentClient.SetSessionToken(authToken)
|
||||||
|
agentCloser := agent.New(agent.Options{
|
||||||
|
Client: agentClient,
|
||||||
|
})
|
||||||
|
ctx, cancel := context.WithCancel(context.Background())
|
||||||
|
t.Cleanup(cancel)
|
||||||
|
t.Cleanup(func() {
|
||||||
|
_ = agentCloser.Close()
|
||||||
|
})
|
||||||
|
// We actually need to know the full user and not just the UserID / OrgID
|
||||||
|
user, err := client.User(ctx, firstUser.UserID.String())
|
||||||
|
require.NoError(t, err, "get first user")
|
||||||
|
|
||||||
|
// Make sure the agent is connected before we go any further.
|
||||||
|
resources := coderdtest.AwaitWorkspaceAgents(t, client, ws.ID)
|
||||||
|
var agentID uuid.UUID
|
||||||
|
for _, res := range resources {
|
||||||
|
for _, agt := range res.Agents {
|
||||||
|
agentID = agt.ID
|
||||||
|
}
|
||||||
|
}
|
||||||
|
require.NotEqual(t, uuid.Nil, agentID, "did not expect agentID to be nil")
|
||||||
|
|
||||||
|
// Now we can start the runner.
|
||||||
|
var (
|
||||||
|
bytesPerTick = 1024
|
||||||
|
tickInterval = 1000 * time.Millisecond
|
||||||
|
cancelAfter = 1500 * time.Millisecond
|
||||||
|
fudgeWrite = 12 // The ReconnectingPTY payload incurs some overhead
|
||||||
|
)
|
||||||
|
reg := prometheus.NewRegistry()
|
||||||
|
metrics := workspacetraffic.NewMetrics(reg, "username", "workspace_name", "agent_name")
|
||||||
|
runner := workspacetraffic.NewRunner(client, workspacetraffic.Config{
|
||||||
|
AgentID: agentID,
|
||||||
|
AgentName: agentName,
|
||||||
|
WorkspaceName: ws.Name,
|
||||||
|
WorkspaceOwner: ws.OwnerName,
|
||||||
|
BytesPerTick: int64(bytesPerTick),
|
||||||
|
TickInterval: tickInterval,
|
||||||
|
Duration: testutil.WaitLong,
|
||||||
|
Registry: reg,
|
||||||
|
}, metrics)
|
||||||
|
|
||||||
|
var logs strings.Builder
|
||||||
|
// Stop the test after one 'tick'. This will cause an EOF.
|
||||||
|
go func() {
|
||||||
|
<-time.After(cancelAfter)
|
||||||
|
cancel()
|
||||||
|
}()
|
||||||
|
require.NoError(t, runner.Run(ctx, "", &logs), "unexpected error calling Run()")
|
||||||
|
|
||||||
|
// We want to ensure the metrics are somewhat accurate.
|
||||||
|
lvs := []string{user.Username, ws.Name, agentName}
|
||||||
|
assert.InDelta(t, bytesPerTick+fudgeWrite, toFloat64(t, metrics.BytesWrittenTotal.WithLabelValues(lvs...)), 0.1)
|
||||||
|
// Read is highly variable, depending on how far we read before stopping.
|
||||||
|
// Just ensure it's not zero.
|
||||||
|
assert.NotZero(t, bytesPerTick, toFloat64(t, metrics.BytesReadTotal.WithLabelValues(lvs...)))
|
||||||
|
// Latency should report non-zero values.
|
||||||
|
assert.NotZero(t, toFloat64(t, metrics.ReadLatencySeconds))
|
||||||
|
assert.NotZero(t, toFloat64(t, metrics.WriteLatencySeconds))
|
||||||
|
// Should not report any errors!
|
||||||
|
assert.Zero(t, toFloat64(t, metrics.ReadErrorsTotal.WithLabelValues(lvs...)))
|
||||||
|
assert.Zero(t, toFloat64(t, metrics.ReadErrorsTotal.WithLabelValues(lvs...)))
|
||||||
|
}
|
||||||
|
|
||||||
|
// toFloat64 version of Prometheus' testutil.ToFloat64 that integrates with
|
||||||
|
// github.com/stretchr/testify/require and handles histograms (somewhat)
|
||||||
|
func toFloat64(t testing.TB, c prometheus.Collector) float64 {
|
||||||
|
var (
|
||||||
|
m prometheus.Metric
|
||||||
|
mCount int
|
||||||
|
mChan = make(chan prometheus.Metric)
|
||||||
|
done = make(chan struct{})
|
||||||
|
)
|
||||||
|
|
||||||
|
go func() {
|
||||||
|
for m = range mChan {
|
||||||
|
mCount++
|
||||||
|
}
|
||||||
|
close(done)
|
||||||
|
}()
|
||||||
|
|
||||||
|
c.Collect(mChan)
|
||||||
|
close(mChan)
|
||||||
|
<-done
|
||||||
|
|
||||||
|
require.Equal(t, 1, mCount, "expected exactly 1 metric but got %d", mCount)
|
||||||
|
|
||||||
|
pb := &dto.Metric{}
|
||||||
|
require.NoError(t, m.Write(pb), "unexpected error collecting metrics")
|
||||||
|
|
||||||
|
if pb.Gauge != nil {
|
||||||
|
return pb.Gauge.GetValue()
|
||||||
|
}
|
||||||
|
if pb.Counter != nil {
|
||||||
|
return pb.Counter.GetValue()
|
||||||
|
}
|
||||||
|
if pb.Untyped != nil {
|
||||||
|
return pb.Untyped.GetValue()
|
||||||
|
}
|
||||||
|
if pb.Histogram != nil {
|
||||||
|
// If no samples, just return zero.
|
||||||
|
if pb.Histogram.GetSampleCount() == 0 {
|
||||||
|
return 0
|
||||||
|
}
|
||||||
|
// Average is sufficient for testing purposes.
|
||||||
|
return pb.Histogram.GetSampleSum() / pb.Histogram.GetSampleCountFloat()
|
||||||
|
}
|
||||||
|
require.Fail(t, "collected a non-gauge/counter/untyped/histogram metric: %s", pb)
|
||||||
|
return 0
|
||||||
|
}
|
|
@ -62,6 +62,7 @@ stats/
|
||||||
.././scaletest/terraform/.terraform
|
.././scaletest/terraform/.terraform
|
||||||
.././scaletest/terraform/.terraform.lock.hcl
|
.././scaletest/terraform/.terraform.lock.hcl
|
||||||
terraform.tfstate.*
|
terraform.tfstate.*
|
||||||
|
**/*.tfvars
|
||||||
# .prettierignore.include:
|
# .prettierignore.include:
|
||||||
# Helm templates contain variables that are invalid YAML and can't be formatted
|
# Helm templates contain variables that are invalid YAML and can't be formatted
|
||||||
# by Prettier.
|
# by Prettier.
|
||||||
|
|
|
@ -62,6 +62,7 @@ stats/
|
||||||
.././scaletest/terraform/.terraform
|
.././scaletest/terraform/.terraform
|
||||||
.././scaletest/terraform/.terraform.lock.hcl
|
.././scaletest/terraform/.terraform.lock.hcl
|
||||||
terraform.tfstate.*
|
terraform.tfstate.*
|
||||||
|
**/*.tfvars
|
||||||
# .prettierignore.include:
|
# .prettierignore.include:
|
||||||
# Helm templates contain variables that are invalid YAML and can't be formatted
|
# Helm templates contain variables that are invalid YAML and can't be formatted
|
||||||
# by Prettier.
|
# by Prettier.
|
||||||
|
|
Loading…
Reference in New Issue