fix(scaletest/templates): fix bugs and improve debugging (#10316)

This commit is contained in:
Mathias Fredriksson 2023-10-17 18:18:49 +03:00 committed by GitHub
parent 2f4ca0f566
commit 8f39ec5cc3
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 24 additions and 17 deletions

View File

@ -82,7 +82,7 @@ end_phase() {
phase=$(tail -n 1 "${SCALETEST_PHASE_FILE}" | grep "START:${phase_num}:" | cut -d' ' -f3-)
if [[ -z ${phase} ]]; then
log "BUG: Could not find start phase ${phase_num} in ${SCALETEST_PHASE_FILE}"
exit 1
return 1
fi
log "End phase ${phase_num}: ${phase}"
echo "$(date -Ins) END:${phase_num}: ${phase}" >>"${SCALETEST_PHASE_FILE}"
@ -132,6 +132,7 @@ annotate_grafana() {
'{time: $time, tags: $tags | split(","), text: $text}' <<<'{}'
)"
if [[ ${DRY_RUN} == 1 ]]; then
echo "FAKEID:${tags}:${text}:${start}" >>"${SCALETEST_STATE_DIR}/grafana-annotations"
log "Would have annotated Grafana, data=${json}"
return 0
fi
@ -171,23 +172,18 @@ annotate_grafana_end() {
tags="${tags},${GRAFANA_EXTRA_TAGS}"
fi
if [[ ${DRY_RUN} == 1 ]]; then
log "Would have updated Grafana annotation (end=${end}): ${text} [${tags}]"
return 0
fi
if ! id=$(grep ":${tags}:${text}:${start}" "${SCALETEST_STATE_DIR}/grafana-annotations" | sort -n | tail -n1 | cut -d: -f1); then
log "NOTICE: Could not find Grafana annotation to end: '${tags}:${text}:${start}', skipping..."
return 0
fi
log "Annotating Grafana (end=${end}): ${text} [${tags}]"
log "Updating Grafana annotation (end=${end}): ${text} [${tags}, add=${GRAFANA_ADD_TAGS:-}]"
if [[ -n ${GRAFANA_ADD_TAGS:-} ]]; then
json="$(
jq -n \
--argjson timeEnd "${end}" \
--argjson tags "${tags},${GRAFANA_ADD_TAGS}" \
--arg tags "${tags},${GRAFANA_ADD_TAGS}" \
'{timeEnd: $timeEnd, tags: $tags | split(",")}'
)"
else
@ -275,7 +271,7 @@ coder_pods() {
fetch_coder_full() {
if [[ -x "${SCALETEST_CODER_BINARY}" ]]; then
log "Full Coder binary already exists at ${SCALETEST_CODER_BINARY}"
return
return 0
fi
ns=$(namespace)
if [[ -z "${ns}" ]]; then
@ -286,12 +282,12 @@ fetch_coder_full() {
pods=$(coder_pods)
if [[ -z ${pods} ]]; then
log "Could not find coder pods!"
return
return 1
fi
pod=$(cut -d ' ' -f 1 <<<"${pods}")
if [[ -z ${pod} ]]; then
log "Could not find coder pod!"
return
return 1
fi
log "Fetching full Coder binary from ${pod}"
# We need --retries due to https://github.com/kubernetes/kubernetes/issues/60140 :(
@ -309,8 +305,8 @@ fetch_coder_full() {
# com.coder.scaletest.status. It will overwrite the previous status.
set_pod_status_annotation() {
if [[ $# -ne 1 ]]; then
log "must specify an annotation value"
return
log "BUG: Must specify an annotation value"
return 1
else
maybedryrun "${DRY_RUN}" kubectl --namespace "$(namespace)" annotate pod "$(hostname)" "com.coder.scaletest.status=$1" --overwrite
fi

View File

@ -73,6 +73,10 @@ for scenario in "${SCALETEST_PARAM_LOAD_SCENARIOS[@]}"; do
maybedryrun "$DRY_RUN" sleep 10
status=1
;;
*)
log "WARNING: Unknown load scenario: ${scenario}, skipping..."
;;
esac
set -e
if ((status > 0)); then

View File

@ -47,7 +47,11 @@ annotate_grafana "workspace" "Agent running" # Ended in shutdown.sh.
trap 'trap - EXIT; kill -INT "${pids[@]}"; exit 1' INT EXIT
while :; do
sleep 285 # ~300 when accounting for profile and trace.
# Sleep for short periods of time so that we can exit quickly.
# This adds up to ~300 when accounting for profile and trace.
for ((i = 0; i < 285; i++)); do
sleep 1
done
log "Grabbing pprof dumps"
start="$(date +%s)"
annotate_grafana "pprof" "Grab pprof dumps (start=${start})"
@ -74,13 +78,13 @@ gather_logs() {
annotate_grafana "logs" "Gather logs"
podsraw="$(
kubectl -n coder-big get pods -l app.kubernetes.io/name=coder -o name
kubectl -n coder-big get pods -l app.kubernetes.io/name=coder-provisioner -o name
kubectl -n coder-big get pods -l app.kubernetes.io/name=coder-workspace -o name | grep "^pod/scaletest-"
kubectl -n coder-big get pods -l app.kubernetes.io/name=coder-provisioner -o name || true
kubectl -n coder-big get pods -l app.kubernetes.io/name=coder-workspace -o name | grep "^pod/scaletest-" || true
)"
mapfile -t pods <<<"${podsraw}"
for pod in "${pods[@]}"; do
pod_name="${pod#pod/}"
kubectl -n coder-big logs "${pod}" --since="${SCALETEST_RUN_START_TIME}" >"${SCALETEST_LOGS_DIR}/${pod_name}.txt"
kubectl -n coder-big logs "${pod}" --since-time="${SCALETEST_RUN_START_TIME}" >"${SCALETEST_LOGS_DIR}/${pod_name}.txt"
done
annotate_grafana_end "logs" "Gather logs"
}
@ -131,6 +135,9 @@ on_exit() {
set_appearance "${appearance_json}" "${message_color}" "${service_banner_message} | Scaletest ${message_status}: [${CODER_USER}/${CODER_WORKSPACE}](${CODER_URL}/@${CODER_USER}/${CODER_WORKSPACE})!"
annotate_grafana_end "" "Start scaletest: ${SCALETEST_COMMENT}"
wait "${pprof_pid}"
exit "${code}"
}
trap on_exit EXIT