feat(scaletest): add greedy agent test to runner (#10559)

This commit is contained in:
Mathias Fredriksson 2023-12-05 12:37:10 +02:00 committed by GitHub
parent dca8125263
commit e300b036be
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 153 additions and 4 deletions

View File

@ -198,6 +198,12 @@ data "coder_parameter" "workspace_template" {
icon = "/emojis/1f436.png" # Dog.
description = "Provisions a medium-sized workspace with no persistent storage."
}
option {
name = "Medium (Greedy)"
value = "kubernetes-medium-greedy"
icon = "/emojis/1f436.png" # Dog.
description = "Provisions a medium-sized workspace with no persistent storage. Greedy agent variant."
}
option {
name = "Large"
value = "kubernetes-large"
@ -216,7 +222,7 @@ data "coder_parameter" "num_workspaces" {
validation {
min = 0
max = 1000
max = 2000
}
}
@ -335,6 +341,56 @@ data "coder_parameter" "load_scenario_baseline_duration" {
}
}
data "coder_parameter" "greedy_agent" {
order = 30
type = "bool"
name = "Greedy Agent"
default = false
description = "If true, the agent will attempt to consume all available resources."
mutable = true
ephemeral = true
}
data "coder_parameter" "greedy_agent_template" {
order = 31
name = "Greedy Agent Template"
display_name = "Greedy Agent Template"
description = "The template used for the greedy agent workspace (must not be same as workspace template)."
default = "kubernetes-medium-greedy"
icon = "/emojis/1f4dc.png" # Scroll.
mutable = true
option {
name = "Minimal"
value = "kubernetes-minimal" # Feather.
icon = "/emojis/1fab6.png"
description = "Sized to fit approx. 32 per t2d-standard-8 instance."
}
option {
name = "Small"
value = "kubernetes-small"
icon = "/emojis/1f42d.png" # Mouse.
description = "Provisions a small-sized workspace with no persistent storage."
}
option {
name = "Medium"
value = "kubernetes-medium"
icon = "/emojis/1f436.png" # Dog.
description = "Provisions a medium-sized workspace with no persistent storage."
}
option {
name = "Medium (Greedy)"
value = "kubernetes-medium-greedy"
icon = "/emojis/1f436.png" # Dog.
description = "Provisions a medium-sized workspace with no persistent storage. Greedy agent variant."
}
option {
name = "Large"
value = "kubernetes-large"
icon = "/emojis/1f434.png" # Horse.
description = "Provisions a large-sized workspace with no persistent storage."
}
}
data "coder_parameter" "namespace" {
order = 999
type = "string"
@ -395,6 +451,8 @@ resource "coder_agent" "main" {
SCALETEST_PARAM_LOAD_SCENARIO_WEB_TERMINAL_TRAFFIC_TICK_INTERVAL : "${data.coder_parameter.load_scenario_web_terminal_tick_interval.value}",
SCALETEST_PARAM_LOAD_SCENARIO_DASHBOARD_TRAFFIC_DURATION : "${data.coder_parameter.load_scenario_dashboard_traffic_duration.value}",
SCALETEST_PARAM_LOAD_SCENARIO_BASELINE_DURATION : "${data.coder_parameter.load_scenario_baseline_duration.value}",
SCALETEST_PARAM_GREEDY_AGENT : data.coder_parameter.greedy_agent.value ? "1" : "0",
SCALETEST_PARAM_GREEDY_AGENT_TEMPLATE : data.coder_parameter.greedy_agent_template.value,
GRAFANA_URL : local.grafana_url,
@ -584,7 +642,7 @@ resource "kubernetes_pod" "main" {
}
# Set the pod delete timeout to termination_grace_period_seconds + 1m.
timeouts {
delete = "${(local.workspace_pod_termination_grace_period_seconds + 120) / 60}s"
delete = "${(local.workspace_pod_termination_grace_period_seconds + 120)}s"
}
spec {
security_context {

View File

@ -26,6 +26,70 @@ end_phase
wait_baseline "${SCALETEST_PARAM_LOAD_SCENARIO_BASELINE_DURATION}"
non_greedy_agent_traffic_args=()
if [[ ${SCALETEST_PARAM_GREEDY_AGENT} != 1 ]]; then
greedy_agent_traffic() { :; }
else
echo "WARNING: Greedy agent enabled, this may cause the load tests to fail." >&2
non_greedy_agent_traffic_args=(
# Let the greedy agent traffic command be scraped.
# --scaletest-prometheus-address 0.0.0.0:21113
# --trace=false
)
annotate_grafana greedy_agent "Create greedy agent"
coder exp scaletest create-workspaces \
--count 1 \
--template "${SCALETEST_PARAM_GREEDY_AGENT_TEMPLATE}" \
--concurrency 1 \
--timeout 5h \
--job-timeout 5h \
--no-cleanup \
--output json:"${SCALETEST_RESULTS_DIR}/create-workspaces-greedy-agent.json"
wait_baseline "${SCALETEST_PARAM_LOAD_SCENARIO_BASELINE_DURATION}"
greedy_agent_traffic() {
local timeout=${1} scenario=${2}
# Run the greedy test for ~1/3 of the timeout.
delay=$((timeout * 60 / 3))
local type=web-terminal
args=()
if [[ ${scenario} == "SSH Traffic" ]]; then
type=ssh
args+=(--ssh)
fi
sleep "${delay}"
annotate_grafana greedy_agent "${scenario}: Greedy agent traffic"
# Produce load at about 1000MB/s (25MB/40ms).
set +e
coder exp scaletest workspace-traffic \
--template "${SCALETEST_PARAM_GREEDY_AGENT_TEMPLATE}" \
--bytes-per-tick $((1024 * 1024 * 25)) \
--tick-interval 40ms \
--timeout "$((delay))s" \
--job-timeout "$((delay))s" \
--output json:"${SCALETEST_RESULTS_DIR}/traffic-${type}-greedy-agent.json" \
--scaletest-prometheus-address 0.0.0.0:21113 \
--trace=false \
"${args[@]}"
status=${?}
show_json "${SCALETEST_RESULTS_DIR}/traffic-${type}-greedy-agent.json"
export GRAFANA_ADD_TAGS=
if [[ ${status} != 0 ]]; then
GRAFANA_ADD_TAGS=error
fi
annotate_grafana_end greedy_agent "${scenario}: Greedy agent traffic"
return ${status}
}
fi
declare -A failed=()
for scenario in "${SCALETEST_PARAM_LOAD_SCENARIOS[@]}"; do
start_phase "Load scenario: ${scenario}"
@ -34,24 +98,40 @@ for scenario in "${SCALETEST_PARAM_LOAD_SCENARIOS[@]}"; do
status=0
case "${scenario}" in
"SSH Traffic")
greedy_agent_traffic "${SCALETEST_PARAM_LOAD_SCENARIO_SSH_TRAFFIC_DURATION}" "${scenario}" &
coder exp scaletest workspace-traffic \
--template "${SCALETEST_PARAM_TEMPLATE}" \
--ssh \
--bytes-per-tick "${SCALETEST_PARAM_LOAD_SCENARIO_SSH_TRAFFIC_BYTES_PER_TICK}" \
--tick-interval "${SCALETEST_PARAM_LOAD_SCENARIO_SSH_TRAFFIC_TICK_INTERVAL}ms" \
--timeout "${SCALETEST_PARAM_LOAD_SCENARIO_SSH_TRAFFIC_DURATION}m" \
--job-timeout "${SCALETEST_PARAM_LOAD_SCENARIO_SSH_TRAFFIC_DURATION}m30s" \
--output json:"${SCALETEST_RESULTS_DIR}/traffic-ssh.json"
--output json:"${SCALETEST_RESULTS_DIR}/traffic-ssh.json" \
"${non_greedy_agent_traffic_args[@]}"
status=$?
wait
status2=$?
if [[ ${status} == 0 ]]; then
status=${status2}
fi
show_json "${SCALETEST_RESULTS_DIR}/traffic-ssh.json"
;;
"Web Terminal Traffic")
greedy_agent_traffic "${SCALETEST_PARAM_LOAD_SCENARIO_WEB_TERMINAL_TRAFFIC_DURATION}" "${scenario}" &
coder exp scaletest workspace-traffic \
--template "${SCALETEST_PARAM_TEMPLATE}" \
--bytes-per-tick "${SCALETEST_PARAM_LOAD_SCENARIO_WEB_TERMINAL_TRAFFIC_BYTES_PER_TICK}" \
--tick-interval "${SCALETEST_PARAM_LOAD_SCENARIO_WEB_TERMINAL_TRAFFIC_TICK_INTERVAL}ms" \
--timeout "${SCALETEST_PARAM_LOAD_SCENARIO_WEB_TERMINAL_TRAFFIC_DURATION}m" \
--job-timeout "${SCALETEST_PARAM_LOAD_SCENARIO_WEB_TERMINAL_TRAFFIC_DURATION}m30s" \
--output json:"${SCALETEST_RESULTS_DIR}/traffic-web-terminal.json"
--output json:"${SCALETEST_RESULTS_DIR}/traffic-web-terminal.json" \
"${non_greedy_agent_traffic_args[@]}"
status=$?
wait
status2=$?
if [[ ${status} == 0 ]]; then
status=${status2}
fi
show_json "${SCALETEST_RESULTS_DIR}/traffic-web-terminal.json"
;;
"Dashboard Traffic")
@ -65,6 +145,10 @@ for scenario in "${SCALETEST_PARAM_LOAD_SCENARIOS[@]}"; do
;;
# Debug scenarios, for testing the runner.
"debug:greedy_agent_traffic")
greedy_agent_traffic 10 "${scenario}"
status=$?
;;
"debug:success")
maybedryrun "$DRY_RUN" sleep 10
status=0

View File

@ -3,6 +3,11 @@ set -euo pipefail
[[ $VERBOSE == 1 ]] && set -x
if [[ ${SCALETEST_PARAM_GREEDY_AGENT_TEMPLATE} == "${SCALETEST_PARAM_TEMPLATE}" ]]; then
echo "ERROR: Greedy agent template must be different from the scaletest template." >&2
exit 1
fi
# Unzip scripts and add to path.
# shellcheck disable=SC2153
echo "Extracting scaletest scripts into ${SCRIPTS_DIR}..."
@ -10,6 +15,8 @@ base64 -d <<<"${SCRIPTS_ZIP}" >/tmp/scripts.zip
rm -rf "${SCRIPTS_DIR}" || true
mkdir -p "${SCRIPTS_DIR}"
unzip -o /tmp/scripts.zip -d "${SCRIPTS_DIR}"
# Chmod to work around https://github.com/coder/coder/issues/10034
chmod +x "${SCRIPTS_DIR}"/*.sh
rm /tmp/scripts.zip
echo "Cloning coder/coder repo..."