fix: make agent scripts easier to troubleshoot (#2922)

- Adds distinct exit statuses to the bootstrap scripts
- Makes the bootstrap scripts loop forever trying to download the coder agent
- Surfaces and logs the status codes returned by the download tool
This commit is contained in:
Cian Johnston 2022-07-13 10:17:40 +01:00 committed by GitHub
parent 6f34cbff1e
commit 0f5f30b6f6
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 93 additions and 20 deletions

View File

@ -1,11 +1,34 @@
#!/usr/bin/env sh
set -eux pipefail
trap "echo === Agent script exited with non-zero code. Sleeping 24h to preserve logs... && sleep 86400" EXIT
# Sleep for a good long while before exiting.
# This is to allow folks to exec into a failed workspace and poke around to
# troubleshoot.
waitonexit() {
echo "=== Agent script exited with non-zero code. Sleeping 24h to preserve logs..."
sleep 86400
}
trap waitonexit EXIT
BINARY_DIR=$(mktemp -d -t coder.XXXXXX)
BINARY_NAME=coder
BINARY_URL=${ACCESS_URL}bin/coder-darwin-${ARCH}
cd "$BINARY_DIR"
curl -fsSL --compressed "${ACCESS_URL}bin/coder-darwin-${ARCH}" -o "${BINARY_NAME}"
chmod +x $BINARY_NAME
# Attempt to download the coder agent.
# This could fail for a number of reasons, many of which are likely transient.
# So just keep trying!
while :; do
curl -fsSL --compressed "${BINARY_URL}" -o "${BINARY_NAME}" && break
status=$?
echo "error: failed to download coder agent using curl"
echo "curl exit code: ${status}"
echo "Trying again in 30 seconds..."
sleep 30
done
if ! chmod +x $BINARY_NAME; then
echo "Failed to make $BINARY_NAME executable"
exit 1
fi
export CODER_AGENT_AUTH="${AUTH_TYPE}"
export CODER_AGENT_URL="${ACCESS_URL}"
exec ./$BINARY_NAME agent

View File

@ -1,21 +1,48 @@
#!/usr/bin/env sh
set -eux pipefail
trap "echo === Agent script exited with non-zero code. Sleeping 24h to preserve logs... && sleep 86400" EXIT
# Sleep for a good long while before exiting.
# This is to allow folks to exec into a failed workspace and poke around to
# troubleshoot.
waitonexit() {
echo "=== Agent script exited with non-zero code. Sleeping 24h to preserve logs..."
sleep 86400
}
trap waitonexit EXIT
BINARY_DIR=$(mktemp -d -t coder.XXXXXX)
BINARY_NAME=coder
BINARY_URL=${ACCESS_URL}bin/coder-linux-${ARCH}
cd "$BINARY_DIR"
if command -v curl >/dev/null 2>&1; then
curl -fsSL --compressed "${BINARY_URL}" -o "${BINARY_NAME}"
elif command -v wget >/dev/null 2>&1; then
wget -q "${BINARY_URL}" -O "${BINARY_NAME}"
elif command -v busybox >/dev/null 2>&1; then
busybox wget -q "${BINARY_URL}" -O "${BINARY_NAME}"
else
echo "error: no download tool found, please install curl, wget or busybox wget"
# Attempt to download the coder agent.
# This could fail for a number of reasons, many of which are likely transient.
# So just keep trying!
while :; do
# Try a number of different download tools, as we don't know what we'll
# have available
status=""
if command -v curl >/dev/null 2>&1; then
curl -fsSL --compressed "${BINARY_URL}" -o "${BINARY_NAME}" && break
status=$?
elif command -v wget >/dev/null 2>&1; then
wget -q "${BINARY_URL}" -O "${BINARY_NAME}" && break
status=$?
elif command -v busybox >/dev/null 2>&1; then
busybox wget -q "${BINARY_URL}" -O "${BINARY_NAME}" && break
status=$?
else
echo "error: no download tool found, please install curl, wget or busybox wget"
exit 127
fi
echo "error: failed to download coder agent"
echo " command returned: ${status}"
echo "Trying again in 30 seconds..."
sleep 30
done
if ! chmod +x $BINARY_NAME; then
echo "Failed to make $BINARY_NAME executable"
exit 1
fi
chmod +x $BINARY_NAME
export CODER_AGENT_AUTH="${AUTH_TYPE}"
export CODER_AGENT_URL="${ACCESS_URL}"
exec ./$BINARY_NAME agent

View File

@ -1,8 +1,31 @@
# On Windows, VS Code Remote requires a parent process of the
# executing shell to be named "sshd", otherwise it fails. See:
# https://github.com/microsoft/vscode-remote-release/issues/5699
$ProgressPreference = "SilentlyContinue"
Invoke-WebRequest -Uri ${ACCESS_URL}bin/coder-windows-${ARCH}.exe -OutFile $env:TEMP\sshd.exe
# Sleep for a while in case the underlying provider deletes the resource on error.
trap {
Write-Error '=== Agent script exited with non-zero code. Sleeping 24h to preserve logs...'
Start-Sleep -Seconds 86400
}
# Attempt to download the coder agent.
# This could fail for a number of reasons, many of which are likely transient.
# So just keep trying!
while ($true) {
try {
$ProgressPreference = "SilentlyContinue"
# On Windows, VS Code Remote requires a parent process of the
# executing shell to be named "sshd", otherwise it fails. See:
# https://github.com/microsoft/vscode-remote-release/issues/5699
$BINARY_URL="${ACCESS_URL}/bin/coder-windows-${ARCH}.exe"
Write-Output "Fetching coder agent from ${BINARY_URL}"
Invoke-WebRequest -Uri "${BINARY_URL}" -OutFile $env:TEMP\sshd.exe
break
} catch {
Write-Output "error: unhandled exception fetching coder agent:"
Write-Output $_
Write-Output "trying again in 30 seconds..."
Start-Sleep -Seconds 30
}
}
# If the below fails, retrying probably won't help.
Set-MpPreference -DisableRealtimeMonitoring $true -ExclusionPath $env:TEMP\sshd.exe
$env:CODER_AGENT_AUTH = "${AUTH_TYPE}"
$env:CODER_AGENT_URL = "${ACCESS_URL}"

View File

@ -5,7 +5,7 @@
set -euo pipefail
SCRIPT_DIR=$(dirname "${BASH_SOURCE[0]}")
# shellcheck disable=SC1091
# shellcheck disable=SC1091,SC1090
source "${SCRIPT_DIR}/lib.sh"
PROJECT_ROOT=$(cd "$SCRIPT_DIR" && git rev-parse --show-toplevel)

View File

@ -5,7 +5,7 @@
set -euo pipefail
SCRIPT_DIR=$(dirname "${BASH_SOURCE[0]}")
# shellcheck disable=SC1091
# shellcheck disable=SC1091,SC1090
source "${SCRIPT_DIR}/lib.sh"
PROJECT_ROOT=$(cd "$SCRIPT_DIR" && git rev-parse --show-toplevel)
CODER_DEV_BIN="${PROJECT_ROOT}/.coderv2/coder"