feat(scaletest/templates): gather pod logs at the end of a scale test (#10288)

This commit is contained in:
Mathias Fredriksson 2023-10-16 16:50:04 +03:00 committed by GitHub
parent 8efa1239e7
commit 8ffe0e22b6
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 35 additions and 3 deletions

View File

@ -42,8 +42,9 @@ locals {
cpu = 16
memory = 64
home_disk_size = 10
scaletest_run_id = "scaletest-${time_static.start_time.rfc3339}"
scaletest_run_id = "scaletest-${replace(time_static.start_time.rfc3339, ":", "-")}"
scaletest_run_dir = "/home/coder/${local.scaletest_run_id}"
scaletest_run_start_time = time_static.start_time.rfc3339
grafana_url = "https://stats.dev.c8s.io"
grafana_dashboard_uid = "qLVSTR-Vz"
grafana_dashboard_name = "coderv2-loadtest-dashboard"
@ -374,6 +375,7 @@ resource "coder_agent" "main" {
# Local envs passed as arguments to `coder exp scaletest` invocations.
SCALETEST_RUN_ID : local.scaletest_run_id,
SCALETEST_RUN_DIR : local.scaletest_run_dir,
SCALETEST_RUN_START_TIME : local.scaletest_run_start_time,
# Comment is a scaletest param, but we want to surface it separately from
# the rest, so we use a different name.

View File

@ -19,11 +19,12 @@ SCALETEST_STATE_DIR="${SCALETEST_RUN_DIR}/state"
SCALETEST_PHASE_FILE="${SCALETEST_STATE_DIR}/phase"
# shellcheck disable=SC2034
SCALETEST_RESULTS_DIR="${SCALETEST_RUN_DIR}/results"
SCALETEST_LOGS_DIR="${SCALETEST_RUN_DIR}/logs"
SCALETEST_PPROF_DIR="${SCALETEST_RUN_DIR}/pprof"
# https://github.com/kubernetes/kubernetes/issues/72501 :-(
SCALETEST_CODER_BINARY="/tmp/coder-full-${SCALETEST_RUN_ID//:/-}"
SCALETEST_CODER_BINARY="/tmp/coder-full-${SCALETEST_RUN_ID}"
mkdir -p "${SCALETEST_STATE_DIR}" "${SCALETEST_RESULTS_DIR}" "${SCALETEST_PPROF_DIR}"
mkdir -p "${SCALETEST_STATE_DIR}" "${SCALETEST_RESULTS_DIR}" "${SCALETEST_LOGS_DIR}" "${SCALETEST_PPROF_DIR}"
coder() {
if [[ ! -x "${SCALETEST_CODER_BINARY}" ]]; then

View File

@ -63,6 +63,28 @@ annotate_grafana "workspace" "Agent running" # Ended in shutdown.sh.
} &
pprof_pid=$!
logs_gathered=0
gather_logs() {
if ((logs_gathered == 1)); then
return
fi
logs_gathered=1
# Gather logs from all coderd and provisioner instances, and all workspaces.
annotate_grafana "logs" "Gather logs"
podsraw="$(
kubectl -n coder-big get pods -l app.kubernetes.io/name=coder -o name
kubectl -n coder-big get pods -l app.kubernetes.io/name=coder-provisioner -o name
kubectl -n coder-big get pods -l app.kubernetes.io/name=coder-workspace -o name | grep "^pod/scaletest-"
)"
mapfile -t pods <<<"${podsraw}"
for pod in "${pods[@]}"; do
pod_name="${pod#pod/}"
kubectl -n coder-big logs "${pod}" --since="${SCALETEST_RUN_START_TIME}" >"${SCALETEST_LOGS_DIR}/${pod_name}.txt"
done
annotate_grafana_end "logs" "Gather logs"
}
set_appearance "${appearance_json}" "${service_banner_color}" "${service_banner_message} | Scaletest running: [${CODER_USER}/${CODER_WORKSPACE}](${CODER_URL}/@${CODER_USER}/${CODER_WORKSPACE})!"
# Show failure in the UI if script exits with error.
@ -80,6 +102,10 @@ on_exit() {
message_status=FAILED
fi
# In case the test failed before gathering logs, gather them before
# cleaning up, whilst the workspaces are still present.
gather_logs
case "${SCALETEST_PARAM_CLEANUP_STRATEGY}" in
on_stop)
# Handled by shutdown script.
@ -130,4 +156,7 @@ annotate_grafana "" "Start scaletest: ${SCALETEST_COMMENT}"
"${SCRIPTS_DIR}/run.sh"
# Gather logs before ending the test.
gather_logs
"${SCRIPTS_DIR}/report.sh" completed