feat(agent): Handle signals and shutdown gracefully (#5914)

This change allows the agent to handle common shutdown signals like
interrupt, hangup and terminate and initiate a graceful shutdown.

As long as terraform providers initiate graceful shutdowns via the
aforementioned signals, things like SSH connections will be closed
immediately on shutdown instead of being left hanging/timing out due to
the agent being abruptly killed.

Refs: #4677, #5901
This commit is contained in:
Mathias Fredriksson 2023-01-30 15:59:13 +02:00 committed by GitHub
parent 91ef8d90d5
commit cf8d4029fb
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
1 changed files with 31 additions and 10 deletions

View File

@ -7,6 +7,7 @@ import (
"net/http/pprof"
"net/url"
"os"
"os/signal"
"path/filepath"
"runtime"
"time"
@ -35,12 +36,10 @@ func workspaceAgent() *cobra.Command {
Use: "agent",
// This command isn't useful to manually execute.
Hidden: true,
RunE: func(cmd *cobra.Command, args []string) error {
RunE: func(cmd *cobra.Command, _ []string) error {
ctx, cancel := context.WithCancel(cmd.Context())
defer cancel()
go dumpHandler(ctx)
rawURL, err := cmd.Flags().GetString(varAgentURL)
if err != nil {
return xerrors.Errorf("CODER_AGENT_URL must be set: %w", err)
@ -50,18 +49,18 @@ func workspaceAgent() *cobra.Command {
return xerrors.Errorf("parse %q: %w", rawURL, err)
}
logWriter := &lumberjack.Logger{
Filename: filepath.Join(os.TempDir(), "coder-agent.log"),
MaxSize: 5, // MB
}
defer logWriter.Close()
logger := slog.Make(sloghuman.Sink(cmd.ErrOrStderr()), sloghuman.Sink(logWriter)).Leveled(slog.LevelDebug)
isLinux := runtime.GOOS == "linux"
// Spawn a reaper so that we don't accumulate a ton
// of zombie processes.
if reaper.IsInitProcess() && !noReap && isLinux {
logWriter := &lumberjack.Logger{
Filename: filepath.Join(os.TempDir(), "coder-agent-init.log"),
MaxSize: 5, // MB
}
defer logWriter.Close()
logger := slog.Make(sloghuman.Sink(cmd.ErrOrStderr()), sloghuman.Sink(logWriter)).Leveled(slog.LevelDebug)
logger.Info(ctx, "spawning reaper process")
// Do not start a reaper on the child process. It's important
// to do this else we fork bomb ourselves.
@ -76,6 +75,28 @@ func workspaceAgent() *cobra.Command {
return nil
}
// Handle interrupt signals to allow for graceful shutdown,
// note that calling stopNotify disables the signal handler
// and the next interrupt will terminate the program (you
// probably want cancel instead).
//
// Note that we don't want to handle these signals in the
// process that runs as PID 1, that's why we do this after
// the reaper forked.
ctx, stopNotify := signal.NotifyContext(ctx, InterruptSignals...)
defer stopNotify()
// dumpHandler does signal handling, so we call it after the
// reaper.
go dumpHandler(ctx)
logWriter := &lumberjack.Logger{
Filename: filepath.Join(os.TempDir(), "coder-agent.log"),
MaxSize: 5, // MB
}
defer logWriter.Close()
logger := slog.Make(sloghuman.Sink(cmd.ErrOrStderr()), sloghuman.Sink(logWriter)).Leveled(slog.LevelDebug)
version := buildinfo.Version()
logger.Info(ctx, "starting agent",
slog.F("url", coderURL),