diff --git a/scripts/ci-report/fetch_stats_from_ci.sh b/scripts/ci-report/fetch_stats_from_ci.sh index 2ca8f586b5..4e155f1955 100755 --- a/scripts/ci-report/fetch_stats_from_ci.sh +++ b/scripts/ci-report/fetch_stats_from_ci.sh @@ -12,6 +12,9 @@ set -euo pipefail dir="$(dirname "$0")"/ci-stats mkdir -p "${dir}" +# Disable gh run view logs, it's unreliable. +USE_GH_RUN_VIEW_LOGS=0 + pushd "${dir}" >/dev/null # Stats step name, used for filtering log. @@ -75,35 +78,111 @@ while read -r run; do job_log=run-"${database_id}"-job-"${job_database_id}"-"${job_name}".log if [[ ! -f "${job_log}" ]]; then echo "Fetching log for: ${job_name} (${job_database_id}, ${job_url})" - # Example log (partial). - # test-go (ubuntu-latest) Print test stats 2023-04-11T03:02:18.4063489Z ##[group]Run # Artifacts are not available after rerunning a job, - # test-go (ubuntu-latest) Print test stats 2023-04-11T03:02:18.4063872Z # Artifacts are not available after rerunning a job, - # test-go (ubuntu-latest) Print test stats 2023-04-11T03:02:18.4064188Z # so we need to print the test stats to the log. - # test-go (ubuntu-latest) Print test stats 2023-04-11T03:02:18.4064642Z go run ./scripts/ci-report/main.go gotests.json | tee gotests_stats.json - # test-go (ubuntu-latest) Print test stats 2023-04-11T03:02:18.4110112Z shell: /usr/bin/bash -e {0} - # test-go (ubuntu-latest) Print test stats 2023-04-11T03:02:18.4110364Z ##[endgroup] - # test-go (ubuntu-latest) Print test stats 2023-04-11T03:02:19.3440469Z { - # test-go (ubuntu-latest) Print test stats 2023-04-11T03:02:19.3441078Z "packages": [ - # test-go (ubuntu-latest) Print test stats 2023-04-11T03:02:19.3441448Z { - # test-go (ubuntu-latest) Print test stats 2023-04-11T03:02:19.3442927Z "name": "agent", - # test-go (ubuntu-latest) Print test stats 2023-04-11T03:02:19.3443311Z "time": 17.538 - # test-go (ubuntu-latest) Print test stats 2023-04-11T03:02:19.3444048Z }, - # ... - gh run view --job "${job_database_id}" --log >"${job_log}" || { - # Sometimes gh fails to extract ZIP, etc. :'( - rm -f "${job_log}" - echo "Failed to fetch log for: ${job_name} (${job_database_id}, ${job_url}), skipping..." - continue - } - log_lines="$(wc -l "${job_log}" | awk '{print $1}')" - if [[ ${log_lines} -lt 2 ]]; then - # Sometimes gh returns nothing and gives no error :'( - rm -f "${job_log}" - echo "Log is empty for: ${job_name} (${job_database_id}, ${job_url}), skipping..." - continue + + if [[ ${USE_GH_RUN_VIEW_LOGS} -eq 0 ]]; then + # Since gh run view is unreliable, we will fetch the logs via API + # instead, however, unfortunately the API does not provide the job + # name in the log output. + # + # TODO(mafredri): This would be more reliably fetched from the following URL: + # https://github.com/coder/coder/commit/${head_sha}/checks/${job_database_id}/logs/${job_step_number} + # but it requires browser-level authentication(?). + # + # Example output: + # + # 2023-04-14T05:43:34.4763012Z ##[group]Run # Artifacts are not available after rerunning a job, + # 2023-04-14T05:43:34.4763385Z # Artifacts are not available after rerunning a job, + # 2023-04-14T05:43:34.4763815Z # so we need to print the test stats to the log. + # 2023-04-14T05:43:34.4764149Z go run ./scripts/ci-report/main.go gotests.json | tee gotests_stats.json + # 2023-04-14T05:43:34.4809056Z shell: /usr/bin/bash -e {0} + # 2023-04-14T05:43:34.4809308Z ##[endgroup] + # 2023-04-14T05:43:35.5934784Z { + # 2023-04-14T05:43:35.5935419Z "packages": [ + # 2023-04-14T05:43:35.5936020Z { + # 2023-04-14T05:43:35.5936585Z "name": "agent", + # 2023-04-14T05:43:35.5937105Z "time": 17.044 + # 2023-04-14T05:43:35.5937631Z }, + gh api "/repos/coder/coder/actions/jobs/${job_database_id}/logs" >"${job_log}" || { + # Sometimes gh fails to extract ZIP, etc. :'( + rm -f "${job_log}" + echo "Failed to fetch log for: ${job_name} (${job_database_id}, ${job_url}), skipping..." + continue + } + + # Elaborate loop for finding the starting point for $job_step_name. + # We check for the first occurrence of "##[group]" which contains + # the go run command and then continue until we find the next + # "##[group]". We then print everything in between. + log_buffer=() + found_step=0 + while read -r line; do + if [[ ${found_step} -eq 1 ]] && [[ ${#log_buffer[@]} -eq 0 ]]; then + if [[ ${line} == *"##[group]"* ]]; then + break + fi + # Mimic output from gh run view. + echo "${job_name}"$'\t'"${job_step_name}"$'\t'"${line}" + fi + if [[ ${found_step} -eq 0 ]] && [[ ${#log_buffer[@]} -eq 0 ]] && [[ ${line} != *"##[group]"* ]]; then + continue + fi + if [[ ${line} == *"##[group]"* ]]; then + log_buffer=("${line}") + continue + fi + if [[ ${#log_buffer[@]} -gt 0 ]]; then + log_buffer+=("${line}") + fi + if [[ ${line} == *"##[endgroup]"* ]]; then + if [[ ${found_step} -eq 1 ]]; then + for bufline in "${log_buffer[@]}"; do + # Mimic output from gh run view. + echo "${job_name}"$'\t'"${job_step_name}"$'\t'"${bufline}" + done + fi + log_buffer=() + continue + fi + # If line contains go run ./scripts/ci-report/main.go gotests.json + if [[ ${line} == *"go run ./scripts/ci-report/main.go"* ]]; then + found_step=1 + fi + done <"${job_log}" >"${job_log}.parsed" + mv "${job_log}.parsed" "${job_log}" + else + # Example log (partial). + # test-go (ubuntu-latest) Print test stats 2023-04-11T03:02:18.4063489Z ##[group]Run # Artifacts are not available after rerunning a job, + # test-go (ubuntu-latest) Print test stats 2023-04-11T03:02:18.4063872Z # Artifacts are not available after rerunning a job, + # test-go (ubuntu-latest) Print test stats 2023-04-11T03:02:18.4064188Z # so we need to print the test stats to the log. + # test-go (ubuntu-latest) Print test stats 2023-04-11T03:02:18.4064642Z go run ./scripts/ci-report/main.go gotests.json | tee gotests_stats.json + # test-go (ubuntu-latest) Print test stats 2023-04-11T03:02:18.4110112Z shell: /usr/bin/bash -e {0} + # test-go (ubuntu-latest) Print test stats 2023-04-11T03:02:18.4110364Z ##[endgroup] + # test-go (ubuntu-latest) Print test stats 2023-04-11T03:02:19.3440469Z { + # test-go (ubuntu-latest) Print test stats 2023-04-11T03:02:19.3441078Z "packages": [ + # test-go (ubuntu-latest) Print test stats 2023-04-11T03:02:19.3441448Z { + # test-go (ubuntu-latest) Print test stats 2023-04-11T03:02:19.3442927Z "name": "agent", + # test-go (ubuntu-latest) Print test stats 2023-04-11T03:02:19.3443311Z "time": 17.538 + # test-go (ubuntu-latest) Print test stats 2023-04-11T03:02:19.3444048Z }, + # ... + gh run view --job "${job_database_id}" --log >"${job_log}" || { + # Sometimes gh fails to extract ZIP, etc. :'( + rm -f "${job_log}" + echo "Failed to fetch log for: ${job_name} (${job_database_id}, ${job_url}), skipping..." + continue + } fi fi + log_lines="$(wc -l "${job_log}" | awk '{print $1}')" + if [[ ${log_lines} -lt 7 ]]; then + # Sanity check in case something went wrong, the ##[group] + # and ##[endgroup] header is 6 lines and start of JSON ("{") + # makes the 7th. + rm -f "${job_log}" + echo "Log is empty for: ${job_name} (${job_database_id}, ${job_url}), skipping..." + continue + fi + if ! job_stats="$( # Extract the stats job output (JSON) from the job log, # discarding the timestamp and non-JSON header.