mirror of https://github.com/coder/coder.git
feat(coderd/database): keep only 1 day of `workspace_agent_stats` after rollup (#12674)
This commit is contained in:
parent
4a6693a171
commit
e17e8aa3c9
|
@ -965,7 +965,7 @@ func (r *RootCmd) Server(newAPI func(context.Context, *coderd.Options) (*coderd.
|
||||||
defer shutdownConns()
|
defer shutdownConns()
|
||||||
|
|
||||||
// Ensures that old database entries are cleaned up over time!
|
// Ensures that old database entries are cleaned up over time!
|
||||||
purger := dbpurge.New(ctx, logger, options.Database)
|
purger := dbpurge.New(ctx, logger.Named("dbpurge"), options.Database)
|
||||||
defer purger.Close()
|
defer purger.Close()
|
||||||
|
|
||||||
// Updates workspace usage
|
// Updates workspace usage
|
||||||
|
|
|
@ -1506,13 +1506,65 @@ func (q *FakeQuerier) DeleteOldWorkspaceAgentStats(_ context.Context) error {
|
||||||
q.mutex.Lock()
|
q.mutex.Lock()
|
||||||
defer q.mutex.Unlock()
|
defer q.mutex.Unlock()
|
||||||
|
|
||||||
|
/*
|
||||||
|
DELETE FROM
|
||||||
|
workspace_agent_stats
|
||||||
|
WHERE
|
||||||
|
created_at < (
|
||||||
|
SELECT
|
||||||
|
COALESCE(
|
||||||
|
-- When generating initial template usage stats, all the
|
||||||
|
-- raw agent stats are needed, after that only ~30 mins
|
||||||
|
-- from last rollup is needed. Deployment stats seem to
|
||||||
|
-- use between 15 mins and 1 hour of data. We keep a
|
||||||
|
-- little bit more (1 day) just in case.
|
||||||
|
MAX(start_time) - '1 days'::interval,
|
||||||
|
-- Fall back to 6 months ago if there are no template
|
||||||
|
-- usage stats so that we don't delete the data before
|
||||||
|
-- it's rolled up.
|
||||||
|
NOW() - '6 months'::interval
|
||||||
|
)
|
||||||
|
FROM
|
||||||
|
template_usage_stats
|
||||||
|
)
|
||||||
|
AND created_at < (
|
||||||
|
-- Delete at most in batches of 3 days (with a batch size of 3 days, we
|
||||||
|
-- can clear out the previous 6 months of data in ~60 iterations) whilst
|
||||||
|
-- keeping the DB load relatively low.
|
||||||
|
SELECT
|
||||||
|
COALESCE(MIN(created_at) + '3 days'::interval, NOW())
|
||||||
|
FROM
|
||||||
|
workspace_agent_stats
|
||||||
|
);
|
||||||
|
*/
|
||||||
|
|
||||||
now := dbtime.Now()
|
now := dbtime.Now()
|
||||||
sixMonthInterval := 6 * 30 * 24 * time.Hour
|
var limit time.Time
|
||||||
sixMonthsAgo := now.Add(-sixMonthInterval)
|
// MAX
|
||||||
|
for _, stat := range q.templateUsageStats {
|
||||||
|
if stat.StartTime.After(limit) {
|
||||||
|
limit = stat.StartTime.AddDate(0, 0, -1)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// COALESCE
|
||||||
|
if limit.IsZero() {
|
||||||
|
limit = now.AddDate(0, -6, 0)
|
||||||
|
}
|
||||||
|
|
||||||
var validStats []database.WorkspaceAgentStat
|
var validStats []database.WorkspaceAgentStat
|
||||||
|
var batchLimit time.Time
|
||||||
for _, stat := range q.workspaceAgentStats {
|
for _, stat := range q.workspaceAgentStats {
|
||||||
if stat.CreatedAt.Before(sixMonthsAgo) {
|
if batchLimit.IsZero() || stat.CreatedAt.Before(batchLimit) {
|
||||||
|
batchLimit = stat.CreatedAt
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if batchLimit.IsZero() {
|
||||||
|
batchLimit = time.Now()
|
||||||
|
} else {
|
||||||
|
batchLimit = batchLimit.AddDate(0, 0, 3)
|
||||||
|
}
|
||||||
|
for _, stat := range q.workspaceAgentStats {
|
||||||
|
if stat.CreatedAt.Before(limit) && stat.CreatedAt.Before(batchLimit) {
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
validStats = append(validStats, stat)
|
validStats = append(validStats, stat)
|
||||||
|
|
|
@ -24,7 +24,6 @@ const (
|
||||||
// This is for cleaning up old, unused resources from the database that take up space.
|
// This is for cleaning up old, unused resources from the database that take up space.
|
||||||
func New(ctx context.Context, logger slog.Logger, db database.Store) io.Closer {
|
func New(ctx context.Context, logger slog.Logger, db database.Store) io.Closer {
|
||||||
closed := make(chan struct{})
|
closed := make(chan struct{})
|
||||||
logger = logger.Named("dbpurge")
|
|
||||||
|
|
||||||
ctx, cancelFunc := context.WithCancel(ctx)
|
ctx, cancelFunc := context.WithCancel(ctx)
|
||||||
//nolint:gocritic // The system purges old db records without user input.
|
//nolint:gocritic // The system purges old db records without user input.
|
||||||
|
|
|
@ -11,12 +11,14 @@ import (
|
||||||
"go.uber.org/goleak"
|
"go.uber.org/goleak"
|
||||||
"golang.org/x/exp/slices"
|
"golang.org/x/exp/slices"
|
||||||
|
|
||||||
|
"cdr.dev/slog"
|
||||||
"cdr.dev/slog/sloggers/slogtest"
|
"cdr.dev/slog/sloggers/slogtest"
|
||||||
|
|
||||||
"github.com/coder/coder/v2/coderd/database"
|
"github.com/coder/coder/v2/coderd/database"
|
||||||
"github.com/coder/coder/v2/coderd/database/dbgen"
|
"github.com/coder/coder/v2/coderd/database/dbgen"
|
||||||
"github.com/coder/coder/v2/coderd/database/dbmem"
|
"github.com/coder/coder/v2/coderd/database/dbmem"
|
||||||
"github.com/coder/coder/v2/coderd/database/dbpurge"
|
"github.com/coder/coder/v2/coderd/database/dbpurge"
|
||||||
|
"github.com/coder/coder/v2/coderd/database/dbrollup"
|
||||||
"github.com/coder/coder/v2/coderd/database/dbtestutil"
|
"github.com/coder/coder/v2/coderd/database/dbtestutil"
|
||||||
"github.com/coder/coder/v2/coderd/database/dbtime"
|
"github.com/coder/coder/v2/coderd/database/dbtime"
|
||||||
"github.com/coder/coder/v2/provisionerd/proto"
|
"github.com/coder/coder/v2/provisionerd/proto"
|
||||||
|
@ -40,27 +42,62 @@ func TestDeleteOldWorkspaceAgentStats(t *testing.T) {
|
||||||
t.Parallel()
|
t.Parallel()
|
||||||
|
|
||||||
db, _ := dbtestutil.NewDB(t)
|
db, _ := dbtestutil.NewDB(t)
|
||||||
logger := slogtest.Make(t, &slogtest.Options{IgnoreErrors: true})
|
logger := slogtest.Make(t, &slogtest.Options{IgnoreErrors: true}).Leveled(slog.LevelDebug)
|
||||||
|
|
||||||
|
now := dbtime.Now()
|
||||||
|
|
||||||
|
defer func() {
|
||||||
|
if t.Failed() {
|
||||||
|
t.Logf("Test failed, printing rows...")
|
||||||
|
ctx := testutil.Context(t, testutil.WaitShort)
|
||||||
|
wasRows, err := db.GetWorkspaceAgentStats(ctx, now.AddDate(0, -7, 0))
|
||||||
|
if err == nil {
|
||||||
|
for _, row := range wasRows {
|
||||||
|
t.Logf("workspace agent stat: %v", row)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
tusRows, err := db.GetTemplateUsageStats(context.Background(), database.GetTemplateUsageStatsParams{
|
||||||
|
StartTime: now.AddDate(0, -7, 0),
|
||||||
|
EndTime: now,
|
||||||
|
})
|
||||||
|
if err == nil {
|
||||||
|
for _, row := range tusRows {
|
||||||
|
t.Logf("template usage stat: %v", row)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}()
|
||||||
|
|
||||||
ctx, cancel := context.WithTimeout(context.Background(), testutil.WaitShort)
|
ctx, cancel := context.WithTimeout(context.Background(), testutil.WaitShort)
|
||||||
defer cancel()
|
defer cancel()
|
||||||
|
|
||||||
now := dbtime.Now()
|
|
||||||
|
|
||||||
// given
|
// given
|
||||||
// Let's use RxBytes to identify stat entries.
|
// Let's use RxBytes to identify stat entries.
|
||||||
// Stat inserted 6 months + 1 hour ago, should be deleted.
|
// Stat inserted 6 months + 1 hour ago, should be deleted.
|
||||||
first := dbgen.WorkspaceAgentStat(t, db, database.WorkspaceAgentStat{
|
first := dbgen.WorkspaceAgentStat(t, db, database.WorkspaceAgentStat{
|
||||||
CreatedAt: now.Add(-6*30*24*time.Hour - time.Hour),
|
CreatedAt: now.AddDate(0, -6, 0).Add(-time.Hour),
|
||||||
|
ConnectionCount: 1,
|
||||||
ConnectionMedianLatencyMS: 1,
|
ConnectionMedianLatencyMS: 1,
|
||||||
RxBytes: 1111,
|
RxBytes: 1111,
|
||||||
|
SessionCountSSH: 1,
|
||||||
})
|
})
|
||||||
|
|
||||||
// Stat inserted 6 months - 1 hour ago, should not be deleted.
|
// Stat inserted 6 months - 1 hour ago, should not be deleted before rollup.
|
||||||
second := dbgen.WorkspaceAgentStat(t, db, database.WorkspaceAgentStat{
|
second := dbgen.WorkspaceAgentStat(t, db, database.WorkspaceAgentStat{
|
||||||
CreatedAt: now.Add(-5*30*24*time.Hour + time.Hour),
|
CreatedAt: now.AddDate(0, -6, 0).Add(time.Hour),
|
||||||
|
ConnectionCount: 1,
|
||||||
ConnectionMedianLatencyMS: 1,
|
ConnectionMedianLatencyMS: 1,
|
||||||
RxBytes: 2222,
|
RxBytes: 2222,
|
||||||
|
SessionCountSSH: 1,
|
||||||
|
})
|
||||||
|
|
||||||
|
// Stat inserted 6 months - 1 day - 2 hour ago, should not be deleted at all.
|
||||||
|
third := dbgen.WorkspaceAgentStat(t, db, database.WorkspaceAgentStat{
|
||||||
|
CreatedAt: now.AddDate(0, -6, 0).AddDate(0, 0, 1).Add(2 * time.Hour),
|
||||||
|
ConnectionCount: 1,
|
||||||
|
ConnectionMedianLatencyMS: 1,
|
||||||
|
RxBytes: 3333,
|
||||||
|
SessionCountSSH: 1,
|
||||||
})
|
})
|
||||||
|
|
||||||
// when
|
// when
|
||||||
|
@ -70,15 +107,39 @@ func TestDeleteOldWorkspaceAgentStats(t *testing.T) {
|
||||||
// then
|
// then
|
||||||
var stats []database.GetWorkspaceAgentStatsRow
|
var stats []database.GetWorkspaceAgentStatsRow
|
||||||
var err error
|
var err error
|
||||||
require.Eventually(t, func() bool {
|
require.Eventuallyf(t, func() bool {
|
||||||
// Query all stats created not earlier than 7 months ago
|
// Query all stats created not earlier than 7 months ago
|
||||||
stats, err = db.GetWorkspaceAgentStats(ctx, now.Add(-7*30*24*time.Hour))
|
stats, err = db.GetWorkspaceAgentStats(ctx, now.AddDate(0, -7, 0))
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
return !containsWorkspaceAgentStat(stats, first) &&
|
return !containsWorkspaceAgentStat(stats, first) &&
|
||||||
containsWorkspaceAgentStat(stats, second)
|
containsWorkspaceAgentStat(stats, second)
|
||||||
}, testutil.WaitShort, testutil.IntervalFast, stats)
|
}, testutil.WaitShort, testutil.IntervalFast, "it should delete old stats: %v", stats)
|
||||||
|
|
||||||
|
// when
|
||||||
|
events := make(chan dbrollup.Event)
|
||||||
|
rolluper := dbrollup.New(logger, db, dbrollup.WithEventChannel(events))
|
||||||
|
defer rolluper.Close()
|
||||||
|
|
||||||
|
_, _ = <-events, <-events
|
||||||
|
|
||||||
|
// Start a new purger to immediately trigger delete after rollup.
|
||||||
|
_ = closer.Close()
|
||||||
|
closer = dbpurge.New(ctx, logger, db)
|
||||||
|
defer closer.Close()
|
||||||
|
|
||||||
|
// then
|
||||||
|
require.Eventuallyf(t, func() bool {
|
||||||
|
// Query all stats created not earlier than 7 months ago
|
||||||
|
stats, err = db.GetWorkspaceAgentStats(ctx, now.AddDate(0, -7, 0))
|
||||||
|
if err != nil {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
return !containsWorkspaceAgentStat(stats, first) &&
|
||||||
|
!containsWorkspaceAgentStat(stats, second) &&
|
||||||
|
containsWorkspaceAgentStat(stats, third)
|
||||||
|
}, testutil.WaitShort, testutil.IntervalFast, "it should delete old stats after rollup: %v", stats)
|
||||||
}
|
}
|
||||||
|
|
||||||
func containsWorkspaceAgentStat(stats []database.GetWorkspaceAgentStatsRow, needle database.WorkspaceAgentStat) bool {
|
func containsWorkspaceAgentStat(stats []database.GetWorkspaceAgentStatsRow, needle database.WorkspaceAgentStat) bool {
|
||||||
|
|
|
@ -10111,7 +10111,35 @@ func (q *sqlQuerier) UpdateWorkspaceAgentStartupByID(ctx context.Context, arg Up
|
||||||
}
|
}
|
||||||
|
|
||||||
const deleteOldWorkspaceAgentStats = `-- name: DeleteOldWorkspaceAgentStats :exec
|
const deleteOldWorkspaceAgentStats = `-- name: DeleteOldWorkspaceAgentStats :exec
|
||||||
DELETE FROM workspace_agent_stats WHERE created_at < NOW() - INTERVAL '180 days'
|
DELETE FROM
|
||||||
|
workspace_agent_stats
|
||||||
|
WHERE
|
||||||
|
created_at < (
|
||||||
|
SELECT
|
||||||
|
COALESCE(
|
||||||
|
-- When generating initial template usage stats, all the
|
||||||
|
-- raw agent stats are needed, after that only ~30 mins
|
||||||
|
-- from last rollup is needed. Deployment stats seem to
|
||||||
|
-- use between 15 mins and 1 hour of data. We keep a
|
||||||
|
-- little bit more (1 day) just in case.
|
||||||
|
MAX(start_time) - '1 days'::interval,
|
||||||
|
-- Fall back to 6 months ago if there are no template
|
||||||
|
-- usage stats so that we don't delete the data before
|
||||||
|
-- it's rolled up.
|
||||||
|
NOW() - '6 months'::interval
|
||||||
|
)
|
||||||
|
FROM
|
||||||
|
template_usage_stats
|
||||||
|
)
|
||||||
|
AND created_at < (
|
||||||
|
-- Delete at most in batches of 3 days (with a batch size of 3 days, we
|
||||||
|
-- can clear out the previous 6 months of data in ~60 iterations) whilst
|
||||||
|
-- keeping the DB load relatively low.
|
||||||
|
SELECT
|
||||||
|
COALESCE(MIN(created_at) + '3 days'::interval, NOW())
|
||||||
|
FROM
|
||||||
|
workspace_agent_stats
|
||||||
|
)
|
||||||
`
|
`
|
||||||
|
|
||||||
func (q *sqlQuerier) DeleteOldWorkspaceAgentStats(ctx context.Context) error {
|
func (q *sqlQuerier) DeleteOldWorkspaceAgentStats(ctx context.Context) error {
|
||||||
|
|
|
@ -66,7 +66,35 @@ ORDER BY
|
||||||
date ASC;
|
date ASC;
|
||||||
|
|
||||||
-- name: DeleteOldWorkspaceAgentStats :exec
|
-- name: DeleteOldWorkspaceAgentStats :exec
|
||||||
DELETE FROM workspace_agent_stats WHERE created_at < NOW() - INTERVAL '180 days';
|
DELETE FROM
|
||||||
|
workspace_agent_stats
|
||||||
|
WHERE
|
||||||
|
created_at < (
|
||||||
|
SELECT
|
||||||
|
COALESCE(
|
||||||
|
-- When generating initial template usage stats, all the
|
||||||
|
-- raw agent stats are needed, after that only ~30 mins
|
||||||
|
-- from last rollup is needed. Deployment stats seem to
|
||||||
|
-- use between 15 mins and 1 hour of data. We keep a
|
||||||
|
-- little bit more (1 day) just in case.
|
||||||
|
MAX(start_time) - '1 days'::interval,
|
||||||
|
-- Fall back to 6 months ago if there are no template
|
||||||
|
-- usage stats so that we don't delete the data before
|
||||||
|
-- it's rolled up.
|
||||||
|
NOW() - '6 months'::interval
|
||||||
|
)
|
||||||
|
FROM
|
||||||
|
template_usage_stats
|
||||||
|
)
|
||||||
|
AND created_at < (
|
||||||
|
-- Delete at most in batches of 3 days (with a batch size of 3 days, we
|
||||||
|
-- can clear out the previous 6 months of data in ~60 iterations) whilst
|
||||||
|
-- keeping the DB load relatively low.
|
||||||
|
SELECT
|
||||||
|
COALESCE(MIN(created_at) + '3 days'::interval, NOW())
|
||||||
|
FROM
|
||||||
|
workspace_agent_stats
|
||||||
|
);
|
||||||
|
|
||||||
-- name: GetDeploymentWorkspaceAgentStats :one
|
-- name: GetDeploymentWorkspaceAgentStats :one
|
||||||
WITH agent_stats AS (
|
WITH agent_stats AS (
|
||||||
|
|
Loading…
Reference in New Issue