mirror of https://github.com/coder/coder.git
parent
d0d64bbdca
commit
bc97eaa41b
|
@ -857,7 +857,7 @@ func (r *RootCmd) scaletestCreateWorkspaces() *clibase.Cmd {
|
||||||
Flag: "use-host-login",
|
Flag: "use-host-login",
|
||||||
Env: "CODER_SCALETEST_USE_HOST_LOGIN",
|
Env: "CODER_SCALETEST_USE_HOST_LOGIN",
|
||||||
Default: "false",
|
Default: "false",
|
||||||
Description: "Use the use logged in on the host machine, instead of creating users.",
|
Description: "Use the user logged in on the host machine, instead of creating users.",
|
||||||
Value: clibase.BoolOf(&useHostUser),
|
Value: clibase.BoolOf(&useHostUser),
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
|
@ -0,0 +1,51 @@
|
||||||
|
---
|
||||||
|
apiVersion: v1
|
||||||
|
kind: ServiceAccount
|
||||||
|
metadata:
|
||||||
|
name: scaletest-sa
|
||||||
|
namespace: coder-big
|
||||||
|
---
|
||||||
|
apiVersion: rbac.authorization.k8s.io/v1
|
||||||
|
kind: Role
|
||||||
|
metadata:
|
||||||
|
name: scaletest-role
|
||||||
|
namespace: coder-big
|
||||||
|
rules:
|
||||||
|
- apiGroups:
|
||||||
|
- ""
|
||||||
|
resources: ["*"]
|
||||||
|
verbs:
|
||||||
|
- get
|
||||||
|
- list
|
||||||
|
- watch
|
||||||
|
- create
|
||||||
|
- update
|
||||||
|
- patch
|
||||||
|
- delete
|
||||||
|
- deletecollection
|
||||||
|
---
|
||||||
|
apiVersion: rbac.authorization.k8s.io/v1
|
||||||
|
kind: RoleBinding
|
||||||
|
metadata:
|
||||||
|
name: scaletest-rolebinding
|
||||||
|
namespace: coder-big
|
||||||
|
subjects:
|
||||||
|
- kind: ServiceAccount
|
||||||
|
name: scaletest-sa
|
||||||
|
roleRef:
|
||||||
|
kind: Role
|
||||||
|
name: scaletest-role
|
||||||
|
---
|
||||||
|
apiVersion: rbac.authorization.k8s.io/v1
|
||||||
|
kind: RoleBinding
|
||||||
|
metadata:
|
||||||
|
name: coder-provisioner-podmonitor-rolebinding
|
||||||
|
namespace: coder-big
|
||||||
|
subjects:
|
||||||
|
- kind: ServiceAccount
|
||||||
|
name: coder-provisioner
|
||||||
|
roleRef:
|
||||||
|
kind: Role
|
||||||
|
name: coder-podmonitor
|
||||||
|
---
|
||||||
|
|
|
@ -0,0 +1,36 @@
|
||||||
|
# This image is used to run scaletest jobs and, although it is inside
|
||||||
|
# the template directory, it is built separately and pushed to
|
||||||
|
# gcr.io/coder-dev-1/scaletest-runner:latest.
|
||||||
|
#
|
||||||
|
# Future improvements will include versioning and including the version
|
||||||
|
# in the template push.
|
||||||
|
|
||||||
|
FROM codercom/enterprise-base:ubuntu
|
||||||
|
|
||||||
|
ARG DEBIAN_FRONTEND=noninteractive
|
||||||
|
|
||||||
|
USER root
|
||||||
|
|
||||||
|
# TODO(mafredri): Remove unneeded dependencies once we have a clear idea of what's needed.
|
||||||
|
RUN wget --quiet -O /tmp/terraform.zip https://releases.hashicorp.com/terraform/1.5.7/terraform_1.5.7_linux_amd64.zip \
|
||||||
|
&& unzip /tmp/terraform.zip -d /usr/local/bin \
|
||||||
|
&& rm /tmp/terraform.zip \
|
||||||
|
&& terraform --version
|
||||||
|
|
||||||
|
RUN wget --quiet -O /tmp/envsubst "https://github.com/a8m/envsubst/releases/download/v1.2.0/envsubst-$(uname -s)-$(uname -m)" \
|
||||||
|
&& chmod +x /tmp/envsubst \
|
||||||
|
&& mv /tmp/envsubst /usr/local/bin
|
||||||
|
|
||||||
|
RUN echo "deb [signed-by=/usr/share/keyrings/cloud.google.gpg] https://packages.cloud.google.com/apt cloud-sdk main" | tee -a /etc/apt/sources.list.d/google-cloud-sdk.list \
|
||||||
|
&& curl https://packages.cloud.google.com/apt/doc/apt-key.gpg | apt-key --keyring /usr/share/keyrings/cloud.google.gpg add - \
|
||||||
|
&& apt-get update \
|
||||||
|
&& apt-get install --yes \
|
||||||
|
google-cloud-cli \
|
||||||
|
jq \
|
||||||
|
kubectl \
|
||||||
|
zstd \
|
||||||
|
&& gcloud --version \
|
||||||
|
&& kubectl version --client \
|
||||||
|
&& rm -rf /var/lib/apt/lists/*
|
||||||
|
|
||||||
|
USER coder
|
|
@ -0,0 +1,9 @@
|
||||||
|
---
|
||||||
|
name: Scaletest Runner
|
||||||
|
description: Run a scaletest.
|
||||||
|
tags: [local]
|
||||||
|
---
|
||||||
|
|
||||||
|
# Scaletest Runner
|
||||||
|
|
||||||
|
Run a scaletest.
|
|
@ -0,0 +1,531 @@
|
||||||
|
terraform {
|
||||||
|
required_providers {
|
||||||
|
coder = {
|
||||||
|
source = "coder/coder"
|
||||||
|
version = "~> 0.11"
|
||||||
|
}
|
||||||
|
kubernetes = {
|
||||||
|
source = "hashicorp/kubernetes"
|
||||||
|
version = "~> 2.22"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
resource "time_static" "start_time" {
|
||||||
|
# We con't set `count = data.coder_workspace.me.start_count` here because then
|
||||||
|
# we can't use this value in `locals`. The permission check is recreated on
|
||||||
|
# start, which will update the timestamp.
|
||||||
|
triggers = {
|
||||||
|
count : length(null_resource.permission_check)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
resource "null_resource" "permission_check" {
|
||||||
|
count = data.coder_workspace.me.start_count
|
||||||
|
|
||||||
|
# Limit which users can create a workspace in this template.
|
||||||
|
# The "default" user and workspace are present because they are needed
|
||||||
|
# for the plan, and consequently, updating the template.
|
||||||
|
lifecycle {
|
||||||
|
precondition {
|
||||||
|
condition = can(regex("^(default/default|scaletest/runner)$", "${data.coder_workspace.me.owner}/${data.coder_workspace.me.name}"))
|
||||||
|
error_message = "User and workspace name is not allowed, expected 'scaletest/runner'."
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
locals {
|
||||||
|
workspace_pod_name = "coder-scaletest-runner-${lower(data.coder_workspace.me.owner)}-${lower(data.coder_workspace.me.name)}"
|
||||||
|
workspace_pod_instance = "coder-workspace-${lower(data.coder_workspace.me.owner)}-${lower(data.coder_workspace.me.name)}"
|
||||||
|
service_account_name = "scaletest-sa"
|
||||||
|
cpu = 2
|
||||||
|
memory = 2
|
||||||
|
home_disk_size = 10
|
||||||
|
scaletest_run_id = "scaletest-${time_static.start_time.rfc3339}"
|
||||||
|
scaletest_run_dir = "/home/coder/${local.scaletest_run_id}"
|
||||||
|
}
|
||||||
|
|
||||||
|
data "coder_provisioner" "me" {
|
||||||
|
}
|
||||||
|
|
||||||
|
data "coder_workspace" "me" {
|
||||||
|
}
|
||||||
|
|
||||||
|
data "coder_parameter" "verbose" {
|
||||||
|
order = 1
|
||||||
|
type = "bool"
|
||||||
|
name = "Verbose"
|
||||||
|
default = false
|
||||||
|
description = "Show debug output."
|
||||||
|
mutable = true
|
||||||
|
ephemeral = true
|
||||||
|
}
|
||||||
|
|
||||||
|
data "coder_parameter" "dry_run" {
|
||||||
|
order = 2
|
||||||
|
type = "bool"
|
||||||
|
name = "Dry-run"
|
||||||
|
default = true
|
||||||
|
description = "Perform a dry-run to see what would happen."
|
||||||
|
mutable = true
|
||||||
|
ephemeral = true
|
||||||
|
}
|
||||||
|
|
||||||
|
data "coder_parameter" "create_concurrency" {
|
||||||
|
order = 10
|
||||||
|
type = "number"
|
||||||
|
name = "Create concurrency"
|
||||||
|
default = 10
|
||||||
|
description = "The number of workspaces to create concurrently."
|
||||||
|
mutable = true
|
||||||
|
|
||||||
|
# Setting zero = unlimited, but perhaps not a good idea,
|
||||||
|
# we can raise this limit instead.
|
||||||
|
validation {
|
||||||
|
min = 1
|
||||||
|
max = 100
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
data "coder_parameter" "job_concurrency" {
|
||||||
|
order = 11
|
||||||
|
type = "number"
|
||||||
|
name = "Job concurrency"
|
||||||
|
default = 10
|
||||||
|
description = "The number of concurrent jobs (e.g. when producing workspace traffic)."
|
||||||
|
mutable = true
|
||||||
|
|
||||||
|
# Setting zero = unlimited, but perhaps not a good idea,
|
||||||
|
# we can raise this limit instead.
|
||||||
|
validation {
|
||||||
|
min = 1
|
||||||
|
max = 100
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
data "coder_parameter" "cleanup_concurrency" {
|
||||||
|
order = 12
|
||||||
|
type = "number"
|
||||||
|
name = "Cleanup concurrency"
|
||||||
|
default = 10
|
||||||
|
description = "The number of concurrent cleanup jobs."
|
||||||
|
mutable = true
|
||||||
|
|
||||||
|
# Setting zero = unlimited, but perhaps not a good idea,
|
||||||
|
# we can raise this limit instead.
|
||||||
|
validation {
|
||||||
|
min = 1
|
||||||
|
max = 100
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
data "coder_parameter" "cleanup_strategy" {
|
||||||
|
order = 13
|
||||||
|
name = "Cleanup strategy"
|
||||||
|
default = "always"
|
||||||
|
description = "The strategy used to cleanup workspaces after the scaletest is complete."
|
||||||
|
mutable = true
|
||||||
|
ephemeral = true
|
||||||
|
option {
|
||||||
|
name = "Always"
|
||||||
|
value = "always"
|
||||||
|
description = "Automatically cleanup workspaces after the scaletest ends."
|
||||||
|
}
|
||||||
|
option {
|
||||||
|
name = "On stop"
|
||||||
|
value = "on_stop"
|
||||||
|
description = "Cleanup workspaces when the workspace is stopped."
|
||||||
|
}
|
||||||
|
option {
|
||||||
|
name = "On success"
|
||||||
|
value = "on_success"
|
||||||
|
description = "Automatically cleanup workspaces after the scaletest is complete if no error occurs."
|
||||||
|
}
|
||||||
|
option {
|
||||||
|
name = "On error"
|
||||||
|
value = "on_error"
|
||||||
|
description = "Automatically cleanup workspaces after the scaletest is complete if an error occurs."
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
data "coder_parameter" "workspace_template" {
|
||||||
|
order = 20
|
||||||
|
name = "workspace_template"
|
||||||
|
display_name = "Workspace Template"
|
||||||
|
description = "The template used for workspace creation."
|
||||||
|
default = "kubernetes-minimal"
|
||||||
|
icon = "/emojis/1f4dc.png" # Scroll.
|
||||||
|
mutable = true
|
||||||
|
option {
|
||||||
|
name = "Minimal"
|
||||||
|
value = "kubernetes-minimal" # Feather.
|
||||||
|
icon = "/emojis/1fab6.png"
|
||||||
|
description = "Sized to fit approx. 32 per t2d-standard-8 instance."
|
||||||
|
}
|
||||||
|
option {
|
||||||
|
name = "Small"
|
||||||
|
value = "kubernetes-small"
|
||||||
|
icon = "/emojis/1f42d.png" # Mouse.
|
||||||
|
description = "Provisions a small-sized workspace with no persistent storage."
|
||||||
|
}
|
||||||
|
option {
|
||||||
|
name = "Medium"
|
||||||
|
value = "kubernetes-medium"
|
||||||
|
icon = "/emojis/1f436.png" # Dog.
|
||||||
|
description = "Provisions a medium-sized workspace with no persistent storage."
|
||||||
|
}
|
||||||
|
option {
|
||||||
|
name = "Large"
|
||||||
|
value = "kubernetes-large"
|
||||||
|
icon = "/emojis/1f434.png" # Horse.
|
||||||
|
description = "Provisions a large-sized workspace with no persistent storage."
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
data "coder_parameter" "num_workspaces" {
|
||||||
|
order = 21
|
||||||
|
type = "number"
|
||||||
|
name = "Number of workspaces to create"
|
||||||
|
default = 100
|
||||||
|
description = "The scaletest suite will create this number of workspaces."
|
||||||
|
mutable = true
|
||||||
|
|
||||||
|
validation {
|
||||||
|
min = 0
|
||||||
|
max = 1000
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
data "coder_parameter" "namespace" {
|
||||||
|
order = 999
|
||||||
|
type = "string"
|
||||||
|
name = "Namespace"
|
||||||
|
default = "coder-big"
|
||||||
|
description = "The Kubernetes namespace to create the scaletest runner resources in."
|
||||||
|
}
|
||||||
|
|
||||||
|
data "archive_file" "scripts_zip" {
|
||||||
|
type = "zip"
|
||||||
|
output_path = "${path.module}/scripts.zip"
|
||||||
|
source_dir = "${path.module}/scripts"
|
||||||
|
}
|
||||||
|
|
||||||
|
resource "coder_agent" "main" {
|
||||||
|
arch = data.coder_provisioner.me.arch
|
||||||
|
dir = local.scaletest_run_dir
|
||||||
|
os = "linux"
|
||||||
|
env = {
|
||||||
|
VERBOSE : data.coder_parameter.verbose.value ? "1" : "0",
|
||||||
|
DRY_RUN : data.coder_parameter.dry_run.value ? "1" : "0",
|
||||||
|
CODER_CONFIG_DIR : "/home/coder/.config/coderv2",
|
||||||
|
CODER_USER_TOKEN : data.coder_workspace.me.owner_session_token,
|
||||||
|
CODER_URL : data.coder_workspace.me.access_url,
|
||||||
|
|
||||||
|
# Global scaletest envs that may affect each `coder exp scaletest` invocation.
|
||||||
|
CODER_SCALETEST_PROMETHEUS_ADDRESS : "0.0.0.0:21112",
|
||||||
|
CODER_SCALETEST_PROMETHEUS_WAIT : "60s",
|
||||||
|
CODER_SCALETEST_CONCURRENCY : "${data.coder_parameter.job_concurrency.value}",
|
||||||
|
CODER_SCALETEST_CLEANUP_CONCURRENCY : "${data.coder_parameter.cleanup_concurrency.value}",
|
||||||
|
|
||||||
|
# Local envs passed as arguments to `coder exp scaletest` invocations.
|
||||||
|
SCALETEST_RUN_ID : local.scaletest_run_id,
|
||||||
|
SCALETEST_RUN_DIR : local.scaletest_run_dir,
|
||||||
|
SCALETEST_TEMPLATE : data.coder_parameter.workspace_template.value,
|
||||||
|
SCALETEST_SKIP_CLEANUP : "1",
|
||||||
|
SCALETEST_NUM_WORKSPACES : data.coder_parameter.num_workspaces.value,
|
||||||
|
SCALETEST_CREATE_CONCURRENCY : "${data.coder_parameter.create_concurrency.value}",
|
||||||
|
SCALETEST_CLEANUP_STRATEGY : data.coder_parameter.cleanup_strategy.value,
|
||||||
|
|
||||||
|
SCRIPTS_ZIP : filebase64(data.archive_file.scripts_zip.output_path),
|
||||||
|
SCRIPTS_DIR : "/tmp/scripts",
|
||||||
|
}
|
||||||
|
display_apps {
|
||||||
|
vscode = false
|
||||||
|
ssh_helper = false
|
||||||
|
}
|
||||||
|
startup_script_timeout = 3600
|
||||||
|
shutdown_script_timeout = 1800
|
||||||
|
startup_script_behavior = "blocking"
|
||||||
|
startup_script = file("startup.sh")
|
||||||
|
shutdown_script = file("shutdown.sh")
|
||||||
|
|
||||||
|
# Scaletest metadata.
|
||||||
|
metadata {
|
||||||
|
display_name = "Scaletest status"
|
||||||
|
key = "00_scaletest_status"
|
||||||
|
script = file("metadata_status.sh")
|
||||||
|
interval = 1
|
||||||
|
timeout = 1
|
||||||
|
}
|
||||||
|
|
||||||
|
metadata {
|
||||||
|
display_name = "Scaletest phase"
|
||||||
|
key = "01_scaletest_phase"
|
||||||
|
script = file("metadata_phase.sh")
|
||||||
|
interval = 1
|
||||||
|
timeout = 1
|
||||||
|
}
|
||||||
|
|
||||||
|
metadata {
|
||||||
|
display_name = "Scaletest phase (previous)"
|
||||||
|
key = "02_scaletest_previous_phase"
|
||||||
|
script = file("metadata_previous_phase.sh")
|
||||||
|
interval = 1
|
||||||
|
timeout = 1
|
||||||
|
}
|
||||||
|
|
||||||
|
# Misc workspace metadata.
|
||||||
|
metadata {
|
||||||
|
display_name = "CPU Usage"
|
||||||
|
key = "80_cpu_usage"
|
||||||
|
script = "coder stat cpu"
|
||||||
|
interval = 10
|
||||||
|
timeout = 1
|
||||||
|
}
|
||||||
|
|
||||||
|
metadata {
|
||||||
|
display_name = "RAM Usage"
|
||||||
|
key = "81_ram_usage"
|
||||||
|
script = "coder stat mem"
|
||||||
|
interval = 10
|
||||||
|
timeout = 1
|
||||||
|
}
|
||||||
|
|
||||||
|
metadata {
|
||||||
|
display_name = "Home Disk"
|
||||||
|
key = "82_home_disk"
|
||||||
|
script = "coder stat disk --path $${HOME}"
|
||||||
|
interval = 60
|
||||||
|
timeout = 1
|
||||||
|
}
|
||||||
|
|
||||||
|
metadata {
|
||||||
|
display_name = "CPU Usage (Host)"
|
||||||
|
key = "83_cpu_usage_host"
|
||||||
|
script = "coder stat cpu --host"
|
||||||
|
interval = 10
|
||||||
|
timeout = 1
|
||||||
|
}
|
||||||
|
|
||||||
|
metadata {
|
||||||
|
display_name = "Memory Usage (Host)"
|
||||||
|
key = "84_mem_usage_host"
|
||||||
|
script = "coder stat mem --host"
|
||||||
|
interval = 10
|
||||||
|
timeout = 1
|
||||||
|
}
|
||||||
|
|
||||||
|
metadata {
|
||||||
|
display_name = "Load Average (Host)"
|
||||||
|
key = "85_load_host"
|
||||||
|
# Get load avg scaled by number of cores.
|
||||||
|
script = <<-EOS
|
||||||
|
echo "`cat /proc/loadavg | awk '{ print $1 }'` `nproc`" | awk '{ printf "%0.2f", $1/$2 }'
|
||||||
|
EOS
|
||||||
|
interval = 60
|
||||||
|
timeout = 1
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
resource "coder_app" "grafana" {
|
||||||
|
agent_id = coder_agent.main.id
|
||||||
|
slug = "00-grafana"
|
||||||
|
display_name = "Grafana"
|
||||||
|
url = "https://stats.dev.c8s.io/d/qLVSTR-Vz/coderv2-loadtest-dashboard?orgId=1&from=${time_static.start_time.unix * 1000}&to=now"
|
||||||
|
icon = "https://grafana.com/static/assets/img/fav32.png"
|
||||||
|
external = true
|
||||||
|
}
|
||||||
|
|
||||||
|
resource "coder_app" "prometheus" {
|
||||||
|
agent_id = coder_agent.main.id
|
||||||
|
slug = "01-prometheus"
|
||||||
|
display_name = "Prometheus"
|
||||||
|
// https://stats.dev.c8s.io:9443/classic/graph?g0.range_input=2h&g0.end_input=2023-09-08%2015%3A58&g0.stacked=0&g0.expr=rate(pg_stat_database_xact_commit%7Bcluster%3D%22big%22%2Cdatname%3D%22big-coder%22%7D%5B1m%5D)&g0.tab=0
|
||||||
|
url = "https://stats.dev.c8s.io:9443"
|
||||||
|
icon = "https://prometheus.io/assets/favicons/favicon-32x32.png"
|
||||||
|
external = true
|
||||||
|
}
|
||||||
|
|
||||||
|
resource "coder_app" "manual_cleanup" {
|
||||||
|
agent_id = coder_agent.main.id
|
||||||
|
slug = "02-manual-cleanup"
|
||||||
|
display_name = "Manual cleanup"
|
||||||
|
icon = "/emojis/1f9f9.png"
|
||||||
|
command = "/tmp/scripts/cleanup.sh manual"
|
||||||
|
}
|
||||||
|
|
||||||
|
resource "kubernetes_persistent_volume_claim" "home" {
|
||||||
|
depends_on = [null_resource.permission_check]
|
||||||
|
metadata {
|
||||||
|
name = "${local.workspace_pod_name}-home"
|
||||||
|
namespace = data.coder_parameter.namespace.value
|
||||||
|
labels = {
|
||||||
|
"app.kubernetes.io/name" = "coder-pvc"
|
||||||
|
"app.kubernetes.io/instance" = "coder-pvc-${lower(data.coder_workspace.me.owner)}-${lower(data.coder_workspace.me.name)}"
|
||||||
|
"app.kubernetes.io/part-of" = "coder"
|
||||||
|
// Coder specific labels.
|
||||||
|
"com.coder.resource" = "true"
|
||||||
|
"com.coder.workspace.id" = data.coder_workspace.me.id
|
||||||
|
"com.coder.workspace.name" = data.coder_workspace.me.name
|
||||||
|
"com.coder.user.id" = data.coder_workspace.me.owner_id
|
||||||
|
"com.coder.user.username" = data.coder_workspace.me.owner
|
||||||
|
}
|
||||||
|
annotations = {
|
||||||
|
"com.coder.user.email" = data.coder_workspace.me.owner_email
|
||||||
|
}
|
||||||
|
}
|
||||||
|
wait_until_bound = false
|
||||||
|
spec {
|
||||||
|
access_modes = ["ReadWriteOnce"]
|
||||||
|
resources {
|
||||||
|
requests = {
|
||||||
|
storage = "${local.home_disk_size}Gi"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
resource "kubernetes_pod" "main" {
|
||||||
|
depends_on = [null_resource.permission_check]
|
||||||
|
count = data.coder_workspace.me.start_count
|
||||||
|
metadata {
|
||||||
|
name = local.workspace_pod_name
|
||||||
|
namespace = data.coder_parameter.namespace.value
|
||||||
|
labels = {
|
||||||
|
"app.kubernetes.io/name" = "coder-workspace"
|
||||||
|
"app.kubernetes.io/instance" = local.workspace_pod_instance
|
||||||
|
"app.kubernetes.io/part-of" = "coder"
|
||||||
|
// Coder specific labels.
|
||||||
|
"com.coder.resource" = "true"
|
||||||
|
"com.coder.workspace.id" = data.coder_workspace.me.id
|
||||||
|
"com.coder.workspace.name" = data.coder_workspace.me.name
|
||||||
|
"com.coder.user.id" = data.coder_workspace.me.owner_id
|
||||||
|
"com.coder.user.username" = data.coder_workspace.me.owner
|
||||||
|
}
|
||||||
|
annotations = {
|
||||||
|
"com.coder.user.email" = data.coder_workspace.me.owner_email
|
||||||
|
}
|
||||||
|
}
|
||||||
|
# Set the pod delete timeout to termination_grace_period_seconds + 1m.
|
||||||
|
timeouts {
|
||||||
|
delete = "32m"
|
||||||
|
}
|
||||||
|
spec {
|
||||||
|
security_context {
|
||||||
|
run_as_user = "1000"
|
||||||
|
fs_group = "1000"
|
||||||
|
}
|
||||||
|
|
||||||
|
# Allow this pod to perform scale tests.
|
||||||
|
service_account_name = local.service_account_name
|
||||||
|
|
||||||
|
# Allow the coder agent to perform graceful shutdown and cleanup of
|
||||||
|
# scaletest resources, 30 minutes (cleanup timeout) + 1 minute.
|
||||||
|
termination_grace_period_seconds = 1860
|
||||||
|
|
||||||
|
container {
|
||||||
|
name = "dev"
|
||||||
|
image = "gcr.io/coder-dev-1/scaletest-runner:latest"
|
||||||
|
image_pull_policy = "Always"
|
||||||
|
command = ["sh", "-c", coder_agent.main.init_script]
|
||||||
|
security_context {
|
||||||
|
run_as_user = "1000"
|
||||||
|
}
|
||||||
|
env {
|
||||||
|
name = "CODER_AGENT_TOKEN"
|
||||||
|
value = coder_agent.main.token
|
||||||
|
}
|
||||||
|
env {
|
||||||
|
name = "CODER_AGENT_LOG_DIR"
|
||||||
|
value = "${local.scaletest_run_dir}/logs"
|
||||||
|
}
|
||||||
|
resources {
|
||||||
|
# Set requests and limits values such that we can do performant
|
||||||
|
# execution of `coder scaletest` commands.
|
||||||
|
requests = {
|
||||||
|
"cpu" = "250m"
|
||||||
|
"memory" = "512Mi"
|
||||||
|
}
|
||||||
|
limits = {
|
||||||
|
"cpu" = "${local.cpu}"
|
||||||
|
"memory" = "${local.memory}Gi"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
volume_mount {
|
||||||
|
mount_path = "/home/coder"
|
||||||
|
name = "home"
|
||||||
|
read_only = false
|
||||||
|
}
|
||||||
|
port {
|
||||||
|
container_port = 21112
|
||||||
|
name = "prometheus-http"
|
||||||
|
protocol = "TCP"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
volume {
|
||||||
|
name = "home"
|
||||||
|
persistent_volume_claim {
|
||||||
|
claim_name = kubernetes_persistent_volume_claim.home.metadata.0.name
|
||||||
|
read_only = false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
affinity {
|
||||||
|
pod_anti_affinity {
|
||||||
|
// This affinity attempts to spread out all workspace pods evenly across
|
||||||
|
// nodes.
|
||||||
|
preferred_during_scheduling_ignored_during_execution {
|
||||||
|
weight = 1
|
||||||
|
pod_affinity_term {
|
||||||
|
topology_key = "kubernetes.io/hostname"
|
||||||
|
label_selector {
|
||||||
|
match_expressions {
|
||||||
|
key = "app.kubernetes.io/name"
|
||||||
|
operator = "In"
|
||||||
|
values = ["coder-workspace"]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
node_affinity {
|
||||||
|
required_during_scheduling_ignored_during_execution {
|
||||||
|
node_selector_term {
|
||||||
|
match_expressions {
|
||||||
|
key = "cloud.google.com/gke-nodepool"
|
||||||
|
operator = "In"
|
||||||
|
values = ["big-misc"] # Avoid placing on the same nodes as scaletest workspaces.
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
resource "kubernetes_manifest" "pod_monitor" {
|
||||||
|
count = data.coder_workspace.me.start_count
|
||||||
|
manifest = {
|
||||||
|
apiVersion = "monitoring.coreos.com/v1"
|
||||||
|
kind = "PodMonitor"
|
||||||
|
metadata = {
|
||||||
|
namespace = data.coder_parameter.namespace.value
|
||||||
|
name = "podmonitor-${local.workspace_pod_name}"
|
||||||
|
}
|
||||||
|
spec = {
|
||||||
|
selector = {
|
||||||
|
matchLabels = {
|
||||||
|
"app.kubernetes.io/instance" : local.workspace_pod_instance
|
||||||
|
}
|
||||||
|
}
|
||||||
|
podMetricsEndpoints = [
|
||||||
|
{
|
||||||
|
port = "prometheus-http"
|
||||||
|
interval = "15s"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,6 @@
|
||||||
|
#!/bin/bash
|
||||||
|
|
||||||
|
# shellcheck disable=SC2153 source=scaletest/templates/scaletest-runner/scripts/lib.sh
|
||||||
|
. "${SCRIPTS_DIR}/lib.sh"
|
||||||
|
|
||||||
|
get_phase
|
|
@ -0,0 +1,6 @@
|
||||||
|
#!/bin/bash
|
||||||
|
|
||||||
|
# shellcheck disable=SC2153 source=scaletest/templates/scaletest-runner/scripts/lib.sh
|
||||||
|
. "${SCRIPTS_DIR}/lib.sh" 2>/dev/null || return
|
||||||
|
|
||||||
|
get_previous_phase
|
|
@ -0,0 +1,6 @@
|
||||||
|
#!/bin/bash
|
||||||
|
|
||||||
|
# shellcheck disable=SC2153 source=scaletest/templates/scaletest-runner/scripts/lib.sh
|
||||||
|
. "${SCRIPTS_DIR}/lib.sh" 2>/dev/null || return
|
||||||
|
|
||||||
|
get_status
|
|
@ -0,0 +1,34 @@
|
||||||
|
#!/bin/bash
|
||||||
|
set -euo pipefail
|
||||||
|
|
||||||
|
[[ $VERBOSE == 1 ]] && set -x
|
||||||
|
|
||||||
|
# shellcheck disable=SC2153 source=scaletest/templates/scaletest-runner/scripts/lib.sh
|
||||||
|
. "${SCRIPTS_DIR}/lib.sh"
|
||||||
|
|
||||||
|
event=${1:-}
|
||||||
|
|
||||||
|
if [[ -z $event ]]; then
|
||||||
|
event=manual
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [[ $event = manual ]]; then
|
||||||
|
echo -n 'WARNING: This will clean up all scaletest resources, continue? (y/n) '
|
||||||
|
read -r -n 1
|
||||||
|
if [[ $REPLY != [yY] ]]; then
|
||||||
|
echo $'\nAborting...'
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
|
||||||
|
start_phase "Cleanup (${event})"
|
||||||
|
coder exp scaletest cleanup \
|
||||||
|
--cleanup-job-timeout 15m \
|
||||||
|
--cleanup-timeout 30m |
|
||||||
|
tee "${SCALETEST_RESULTS_DIR}/cleanup-${event}.txt"
|
||||||
|
end_phase
|
||||||
|
|
||||||
|
if [[ $event = manual ]]; then
|
||||||
|
echo 'Press any key to continue...'
|
||||||
|
read -s -r -n 1
|
||||||
|
fi
|
|
@ -0,0 +1,94 @@
|
||||||
|
#!/bin/bash
|
||||||
|
set -euo pipefail
|
||||||
|
|
||||||
|
# Only source this script once, this env comes from sourcing
|
||||||
|
# scripts/lib.sh from coder/coder below.
|
||||||
|
if [[ ${SCRIPTS_LIB_IS_SOURCED:-0} == 1 ]]; then
|
||||||
|
return 0
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Source scripts/lib.sh from coder/coder for common functions.
|
||||||
|
# shellcheck source=scripts/lib.sh
|
||||||
|
. "${HOME}/coder/scripts/lib.sh"
|
||||||
|
|
||||||
|
# Make shellcheck happy.
|
||||||
|
DRY_RUN=${DRY_RUN:-0}
|
||||||
|
|
||||||
|
# Environment variables shared between scripts.
|
||||||
|
SCALETEST_STATE_DIR="${SCALETEST_RUN_DIR}/state"
|
||||||
|
SCALETEST_PHASE_FILE="${SCALETEST_STATE_DIR}/phase"
|
||||||
|
# shellcheck disable=SC2034
|
||||||
|
SCALETEST_RESULTS_DIR="${SCALETEST_RUN_DIR}/results"
|
||||||
|
|
||||||
|
coder() {
|
||||||
|
maybedryrun "${DRY_RUN}" command coder "${@}"
|
||||||
|
}
|
||||||
|
|
||||||
|
show_json() {
|
||||||
|
maybedryrun "${DRY_RUN}" jq 'del(.. | .logs?)' "${1}"
|
||||||
|
}
|
||||||
|
|
||||||
|
set_status() {
|
||||||
|
dry_run=
|
||||||
|
if [[ ${DRY_RUN} == 1 ]]; then
|
||||||
|
dry_run=" (dry-ryn)"
|
||||||
|
fi
|
||||||
|
echo "$(date -Ins) ${*}${dry_run}" >>"${SCALETEST_STATE_DIR}/status"
|
||||||
|
}
|
||||||
|
lock_status() {
|
||||||
|
chmod 0440 "${SCALETEST_STATE_DIR}/status"
|
||||||
|
}
|
||||||
|
get_status() {
|
||||||
|
# Order of importance (reverse of creation).
|
||||||
|
if [[ -f "${SCALETEST_STATE_DIR}/status" ]]; then
|
||||||
|
tail -n1 "${SCALETEST_STATE_DIR}/status" | cut -d' ' -f2-
|
||||||
|
else
|
||||||
|
echo "Not started"
|
||||||
|
fi
|
||||||
|
}
|
||||||
|
|
||||||
|
phase_num=0
|
||||||
|
start_phase() {
|
||||||
|
# This may be incremented from another script, so we read it every time.
|
||||||
|
if [[ -f "${SCALETEST_PHASE_FILE}" ]]; then
|
||||||
|
phase_num="$(grep -c START: "${SCALETEST_PHASE_FILE}")"
|
||||||
|
fi
|
||||||
|
phase_num=$((phase_num + 1))
|
||||||
|
log "Start phase ${phase_num}: ${*}"
|
||||||
|
echo "$(date -Ins) START:${phase_num}: ${*}" >>"${SCALETEST_PHASE_FILE}"
|
||||||
|
}
|
||||||
|
end_phase() {
|
||||||
|
phase="$(tail -n 1 "${SCALETEST_PHASE_FILE}" | grep "START:${phase_num}:" | cut -d' ' -f3-)"
|
||||||
|
if [[ -z ${phase} ]]; then
|
||||||
|
log "BUG: Could not find start phase ${phase_num} in ${SCALETEST_PHASE_FILE}"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
log "End phase ${phase_num}: ${phase}"
|
||||||
|
echo "$(date -Ins) END:${phase_num}: ${phase}" >>"${SCALETEST_PHASE_FILE}"
|
||||||
|
}
|
||||||
|
get_phase() {
|
||||||
|
if [[ -f "${SCALETEST_PHASE_FILE}" ]]; then
|
||||||
|
phase_raw="$(tail -n1 "${SCALETEST_PHASE_FILE}")"
|
||||||
|
phase="$(echo "${phase_raw}" | cut -d' ' -f3-)"
|
||||||
|
if [[ ${phase_raw} == *"END:"* ]]; then
|
||||||
|
phase+=" [done]"
|
||||||
|
fi
|
||||||
|
echo "${phase}"
|
||||||
|
else
|
||||||
|
echo "None"
|
||||||
|
fi
|
||||||
|
}
|
||||||
|
get_previous_phase() {
|
||||||
|
if [[ -f "${SCALETEST_PHASE_FILE}" ]] && [[ $(grep -c START: "${SCALETEST_PHASE_FILE}") -gt 1 ]]; then
|
||||||
|
grep START: "${SCALETEST_PHASE_FILE}" | tail -n2 | head -n1 | cut -d' ' -f3-
|
||||||
|
else
|
||||||
|
echo "None"
|
||||||
|
fi
|
||||||
|
}
|
||||||
|
|
||||||
|
wait_baseline() {
|
||||||
|
s=${1:-2}
|
||||||
|
start_phase "Waiting ${s}m to establish baseline"
|
||||||
|
maybedryrun "$DRY_RUN" sleep $((s * 60))
|
||||||
|
end_phase
|
||||||
|
}
|
|
@ -0,0 +1,57 @@
|
||||||
|
#!/bin/bash
|
||||||
|
set -euo pipefail
|
||||||
|
|
||||||
|
[[ $VERBOSE == 1 ]] && set -x
|
||||||
|
|
||||||
|
# shellcheck disable=SC2153 source=scaletest/templates/scaletest-runner/scripts/lib.sh
|
||||||
|
. "${SCRIPTS_DIR}/lib.sh"
|
||||||
|
|
||||||
|
mkdir -p "${SCALETEST_STATE_DIR}"
|
||||||
|
mkdir -p "${SCALETEST_RESULTS_DIR}"
|
||||||
|
|
||||||
|
log "Preparing scaletest workspace environment..."
|
||||||
|
set_status Preparing
|
||||||
|
|
||||||
|
log "Compressing previous run logs (if applicable)..."
|
||||||
|
mkdir -p "${HOME}/archive"
|
||||||
|
for dir in "${HOME}/scaletest-"*; do
|
||||||
|
if [[ ${dir} = "${SCALETEST_RUN_DIR}" ]]; then
|
||||||
|
continue
|
||||||
|
fi
|
||||||
|
if [[ -d ${dir} ]]; then
|
||||||
|
name="$(basename "${dir}")"
|
||||||
|
(
|
||||||
|
cd "$(dirname "${dir}")"
|
||||||
|
ZSTD_CLEVEL=12 maybedryrun "$DRY_RUN" tar --zstd -cf "${HOME}/archive/${name}.tar.zst" "${name}"
|
||||||
|
)
|
||||||
|
maybedryrun "$DRY_RUN" rm -rf "${dir}"
|
||||||
|
fi
|
||||||
|
done
|
||||||
|
|
||||||
|
log "Cloning coder/coder repo..."
|
||||||
|
|
||||||
|
if [[ ! -d "${HOME}/coder" ]]; then
|
||||||
|
git clone https://github.com/coder/coder.git "${HOME}/coder"
|
||||||
|
fi
|
||||||
|
(cd "${HOME}/coder" && git pull)
|
||||||
|
|
||||||
|
log "Creating coder CLI token (needed for cleanup during shutdown)..."
|
||||||
|
|
||||||
|
mkdir -p "${CODER_CONFIG_DIR}"
|
||||||
|
echo -n "${CODER_URL}" >"${CODER_CONFIG_DIR}/url"
|
||||||
|
|
||||||
|
set +x # Avoid logging the token.
|
||||||
|
# Persist configuration for shutdown script too since the
|
||||||
|
# owner token is invalidated immediately on workspace stop.
|
||||||
|
export CODER_SESSION_TOKEN=$CODER_USER_TOKEN
|
||||||
|
coder tokens delete scaletest_runner >/dev/null 2>&1 || true
|
||||||
|
# TODO(mafredri): Set TTL? This could interfere with delayed stop though.
|
||||||
|
token=$(coder tokens create --name scaletest_runner)
|
||||||
|
unset CODER_SESSION_TOKEN
|
||||||
|
echo -n "${token}" >"${CODER_CONFIG_DIR}/session"
|
||||||
|
[[ $VERBOSE == 1 ]] && set -x # Restore logging (if enabled).
|
||||||
|
|
||||||
|
log "Cleaning up from previous runs (if applicable)..."
|
||||||
|
"${SCRIPTS_DIR}/cleanup.sh" "prepare"
|
||||||
|
|
||||||
|
log "Preparation complete!"
|
|
@ -0,0 +1,59 @@
|
||||||
|
#!/bin/bash
|
||||||
|
set -euo pipefail
|
||||||
|
|
||||||
|
[[ $VERBOSE == 1 ]] && set -x
|
||||||
|
|
||||||
|
# shellcheck disable=SC2153 source=scaletest/templates/scaletest-runner/scripts/lib.sh
|
||||||
|
. "${SCRIPTS_DIR}/lib.sh"
|
||||||
|
|
||||||
|
log "Running scaletest..."
|
||||||
|
set_status Running
|
||||||
|
|
||||||
|
start_phase "Creating workspaces"
|
||||||
|
coder exp scaletest create-workspaces \
|
||||||
|
--count "${SCALETEST_NUM_WORKSPACES}" \
|
||||||
|
--template "${SCALETEST_TEMPLATE}" \
|
||||||
|
--concurrency "${SCALETEST_CREATE_CONCURRENCY}" \
|
||||||
|
--job-timeout 15m \
|
||||||
|
--no-cleanup \
|
||||||
|
--output json:"${SCALETEST_RESULTS_DIR}/create-workspaces.json"
|
||||||
|
show_json "${SCALETEST_RESULTS_DIR}/create-workspaces.json"
|
||||||
|
end_phase
|
||||||
|
|
||||||
|
wait_baseline 5
|
||||||
|
|
||||||
|
start_phase "SSH traffic"
|
||||||
|
coder exp scaletest workspace-traffic \
|
||||||
|
--ssh \
|
||||||
|
--bytes-per-tick 10240 \
|
||||||
|
--tick-interval 1s \
|
||||||
|
--timeout 5m \
|
||||||
|
--output json:"${SCALETEST_RESULTS_DIR}/traffic-ssh.json"
|
||||||
|
show_json "${SCALETEST_RESULTS_DIR}/traffic-ssh.json"
|
||||||
|
end_phase
|
||||||
|
|
||||||
|
wait_baseline 5
|
||||||
|
|
||||||
|
start_phase "ReconnectingPTY traffic"
|
||||||
|
coder exp scaletest workspace-traffic \
|
||||||
|
--bytes-per-tick 10240 \
|
||||||
|
--tick-interval 1s \
|
||||||
|
--timeout 5m \
|
||||||
|
--output json:"${SCALETEST_RESULTS_DIR}/traffic-reconnectingpty.json"
|
||||||
|
show_json "${SCALETEST_RESULTS_DIR}/traffic-reconnectingpty.json"
|
||||||
|
end_phase
|
||||||
|
|
||||||
|
wait_baseline 5
|
||||||
|
|
||||||
|
start_phase "Dashboard traffic"
|
||||||
|
coder exp scaletest dashboard \
|
||||||
|
--count "${SCALETEST_NUM_WORKSPACES}" \
|
||||||
|
--job-timeout 5m \
|
||||||
|
--output json:"${SCALETEST_RESULTS_DIR}/traffic-dashboard.json"
|
||||||
|
show_json "${SCALETEST_RESULTS_DIR}/traffic-dashboard.json"
|
||||||
|
end_phase
|
||||||
|
|
||||||
|
wait_baseline 5
|
||||||
|
|
||||||
|
log "Scaletest complete!"
|
||||||
|
set_status Complete
|
|
@ -0,0 +1,14 @@
|
||||||
|
#!/bin/bash
|
||||||
|
set -e
|
||||||
|
|
||||||
|
[[ $VERBOSE == 1 ]] && set -x
|
||||||
|
|
||||||
|
# shellcheck disable=SC2153 source=scaletest/templates/scaletest-runner/scripts/lib.sh
|
||||||
|
. "${SCRIPTS_DIR}/lib.sh"
|
||||||
|
|
||||||
|
cleanup() {
|
||||||
|
coder tokens remove scaletest_runner >/dev/null 2>&1 || true
|
||||||
|
}
|
||||||
|
trap cleanup EXIT
|
||||||
|
|
||||||
|
"${SCRIPTS_DIR}/cleanup.sh" shutdown
|
|
@ -0,0 +1,52 @@
|
||||||
|
#!/bin/bash
|
||||||
|
set -euo pipefail
|
||||||
|
|
||||||
|
[[ $VERBOSE == 1 ]] && set -x
|
||||||
|
|
||||||
|
# Unzip scripts and add to path.
|
||||||
|
# shellcheck disable=SC2153
|
||||||
|
echo "Extracting scaletest scripts into ${SCRIPTS_DIR}..."
|
||||||
|
base64 -d <<<"${SCRIPTS_ZIP}" >/tmp/scripts.zip
|
||||||
|
rm -rf "${SCRIPTS_DIR}" || true
|
||||||
|
mkdir -p "${SCRIPTS_DIR}"
|
||||||
|
unzip -o /tmp/scripts.zip -d "${SCRIPTS_DIR}"
|
||||||
|
rm /tmp/scripts.zip
|
||||||
|
|
||||||
|
# shellcheck disable=SC2153 source=scaletest/templates/scaletest-runner/scripts/lib.sh
|
||||||
|
. "${SCRIPTS_DIR}/lib.sh"
|
||||||
|
|
||||||
|
# Show failure in the UI if script exits with error.
|
||||||
|
failed_status=Failed
|
||||||
|
on_exit() {
|
||||||
|
trap - ERR EXIT
|
||||||
|
|
||||||
|
case "${SCALETEST_CLEANUP_STRATEGY}" in
|
||||||
|
on_stop)
|
||||||
|
# Handled by shutdown script.
|
||||||
|
;;
|
||||||
|
on_success)
|
||||||
|
if [[ $(get_status) != "${failed_status}" ]]; then
|
||||||
|
"${SCRIPTS_DIR}/cleanup.sh" "${SCALETEST_CLEANUP_STRATEGY}"
|
||||||
|
fi
|
||||||
|
;;
|
||||||
|
on_error)
|
||||||
|
if [[ $(get_status) = "${failed_status}" ]]; then
|
||||||
|
"${SCRIPTS_DIR}/cleanup.sh" "${SCALETEST_CLEANUP_STRATEGY}"
|
||||||
|
fi
|
||||||
|
;;
|
||||||
|
*)
|
||||||
|
"${SCRIPTS_DIR}/cleanup.sh" "${SCALETEST_CLEANUP_STRATEGY}"
|
||||||
|
;;
|
||||||
|
esac
|
||||||
|
}
|
||||||
|
trap on_exit EXIT
|
||||||
|
|
||||||
|
on_err() {
|
||||||
|
log "Scaletest failed!"
|
||||||
|
set_status "${failed_status}"
|
||||||
|
lock_status # Ensure we never rewrite the status after a failure.
|
||||||
|
}
|
||||||
|
trap on_err ERR
|
||||||
|
|
||||||
|
"${SCRIPTS_DIR}/prepare.sh"
|
||||||
|
"${SCRIPTS_DIR}/run.sh"
|
Loading…
Reference in New Issue