From 04f0510b095c042111a888cf2bac35a7f351d14d Mon Sep 17 00:00:00 2001 From: Mathias Fredriksson Date: Fri, 22 Mar 2024 18:33:34 +0200 Subject: [PATCH] feat(coderd/database): add `template_usage_stats` table and rollup query (#12664) Add `template_usage_stats` table for aggregating tempalte usage data. Data is rolled up by the `UpsertTemplateUsageStats` query, which fetches data from the `workspace_agent_stats` and `workspace_app_stats` tables. --- coderd/database/dbauthz/dbauthz.go | 30 + coderd/database/dbauthz/dbauthz_test.go | 6 + coderd/database/dbmem/dbmem.go | 621 ++++++++++++++++++ coderd/database/dbmetrics/dbmetrics.go | 14 + coderd/database/dbmock/dbmock.go | 29 + coderd/database/dump.sql | 52 ++ .../000203_template_usage_stats.down.sql | 1 + .../000203_template_usage_stats.up.sql | 36 + .../000203_template_usage_stats.up.sql | 30 + coderd/database/models.go | 28 + coderd/database/querier.go | 6 + coderd/database/queries.sql.go | 314 +++++++++ coderd/database/queries/insights.sql | 265 ++++++++ coderd/database/sqlc.yaml | 3 + coderd/database/types.go | 22 + coderd/database/unique_constraint.go | 2 + 16 files changed, 1459 insertions(+) create mode 100644 coderd/database/migrations/000203_template_usage_stats.down.sql create mode 100644 coderd/database/migrations/000203_template_usage_stats.up.sql create mode 100644 coderd/database/migrations/testdata/fixtures/000203_template_usage_stats.up.sql diff --git a/coderd/database/dbauthz/dbauthz.go b/coderd/database/dbauthz/dbauthz.go index 2dd15e073b..93dd6617c4 100644 --- a/coderd/database/dbauthz/dbauthz.go +++ b/coderd/database/dbauthz/dbauthz.go @@ -1617,6 +1617,29 @@ func (q *querier) GetTemplateParameterInsights(ctx context.Context, arg database return q.db.GetTemplateParameterInsights(ctx, arg) } +func (q *querier) GetTemplateUsageStats(ctx context.Context, arg database.GetTemplateUsageStatsParams) ([]database.TemplateUsageStat, error) { + // Used by dbrollup tests, use same safe-guard as other insights endpoints. + // For auditors, check read template_insights, and fall back to update template. + if err := q.authorizeContext(ctx, rbac.ActionRead, rbac.ResourceTemplateInsights); err != nil { + for _, templateID := range arg.TemplateIDs { + template, err := q.db.GetTemplateByID(ctx, templateID) + if err != nil { + return nil, err + } + + if err := q.authorizeContext(ctx, rbac.ActionUpdate, template); err != nil { + return nil, err + } + } + if len(arg.TemplateIDs) == 0 { + if err := q.authorizeContext(ctx, rbac.ActionUpdate, rbac.ResourceTemplate.All()); err != nil { + return nil, err + } + } + } + return q.db.GetTemplateUsageStats(ctx, arg) +} + func (q *querier) GetTemplateVersionByID(ctx context.Context, tvid uuid.UUID) (database.TemplateVersion, error) { tv, err := q.db.GetTemplateVersionByID(ctx, tvid) if err != nil { @@ -3413,6 +3436,13 @@ func (q *querier) UpsertTailnetTunnel(ctx context.Context, arg database.UpsertTa return q.db.UpsertTailnetTunnel(ctx, arg) } +func (q *querier) UpsertTemplateUsageStats(ctx context.Context) error { + if err := q.authorizeContext(ctx, rbac.ActionUpdate, rbac.ResourceSystem); err != nil { + return err + } + return q.db.UpsertTemplateUsageStats(ctx) +} + func (q *querier) UpsertWorkspaceAgentPortShare(ctx context.Context, arg database.UpsertWorkspaceAgentPortShareParams) (database.WorkspaceAgentPortShare, error) { workspace, err := q.db.GetWorkspaceByID(ctx, arg.WorkspaceID) if err != nil { diff --git a/coderd/database/dbauthz/dbauthz_test.go b/coderd/database/dbauthz/dbauthz_test.go index 8840fe5525..19f970d2af 100644 --- a/coderd/database/dbauthz/dbauthz_test.go +++ b/coderd/database/dbauthz/dbauthz_test.go @@ -956,6 +956,12 @@ func (s *MethodTestSuite) TestTemplate() { s.Run("GetTemplateAppInsightsByTemplate", s.Subtest(func(db database.Store, check *expects) { check.Args(database.GetTemplateAppInsightsByTemplateParams{}).Asserts(rbac.ResourceTemplateInsights, rbac.ActionRead) })) + s.Run("GetTemplateUsageStats", s.Subtest(func(db database.Store, check *expects) { + check.Args(database.GetTemplateUsageStatsParams{}).Asserts(rbac.ResourceTemplateInsights, rbac.ActionRead).Errors(sql.ErrNoRows) + })) + s.Run("UpsertTemplateUsageStats", s.Subtest(func(db database.Store, check *expects) { + check.Asserts(rbac.ResourceSystem, rbac.ActionUpdate) + })) } func (s *MethodTestSuite) TestUser() { diff --git a/coderd/database/dbmem/dbmem.go b/coderd/database/dbmem/dbmem.go index ac53fa586f..dd18e313ce 100644 --- a/coderd/database/dbmem/dbmem.go +++ b/coderd/database/dbmem/dbmem.go @@ -162,6 +162,7 @@ type data struct { templateVersionParameters []database.TemplateVersionParameter templateVersionVariables []database.TemplateVersionVariable templates []database.TemplateTable + templateUsageStats []database.TemplateUsageStat workspaceAgents []database.WorkspaceAgent workspaceAgentMetadata []database.WorkspaceAgentMetadatum workspaceAgentLogs []database.WorkspaceAgentLog @@ -3584,6 +3585,34 @@ func (q *FakeQuerier) GetTemplateParameterInsights(ctx context.Context, arg data return rows, nil } +func (q *FakeQuerier) GetTemplateUsageStats(_ context.Context, arg database.GetTemplateUsageStatsParams) ([]database.TemplateUsageStat, error) { + err := validateDatabaseType(arg) + if err != nil { + return nil, err + } + + q.mutex.RLock() + defer q.mutex.RUnlock() + + var stats []database.TemplateUsageStat + for _, stat := range q.templateUsageStats { + // Exclude all chunks that don't fall exactly within the range. + if stat.StartTime.Before(arg.StartTime) || stat.EndTime.After(arg.EndTime) { + continue + } + if len(arg.TemplateIDs) > 0 && !slices.Contains(arg.TemplateIDs, stat.TemplateID) { + continue + } + stats = append(stats, stat) + } + + if len(stats) == 0 { + return nil, sql.ErrNoRows + } + + return stats, nil +} + func (q *FakeQuerier) GetTemplateVersionByID(ctx context.Context, templateVersionID uuid.UUID) (database.TemplateVersion, error) { q.mutex.RLock() defer q.mutex.RUnlock() @@ -7923,6 +7952,598 @@ func (*FakeQuerier) UpsertTailnetTunnel(_ context.Context, arg database.UpsertTa return database.TailnetTunnel{}, ErrUnimplemented } +func (q *FakeQuerier) UpsertTemplateUsageStats(ctx context.Context) error { + q.mutex.Lock() + defer q.mutex.Unlock() + + /* + WITH + */ + + /* + latest_start AS ( + SELECT + -- Truncate to hour so that we always look at even ranges of data. + date_trunc('hour', COALESCE( + MAX(start_time) - '1 hour'::interval), + -- Fallback when there are no template usage stats yet. + -- App stats can exist before this, but not agent stats, + -- limit the lookback to avoid inconsistency. + (SELECT MIN(created_at) FROM workspace_agent_stats) + )) AS t + FROM + template_usage_stats + ), + */ + + now := time.Now() + latestStart := time.Time{} + for _, stat := range q.templateUsageStats { + if stat.StartTime.After(latestStart) { + latestStart = stat.StartTime.Add(-time.Hour) + } + } + if latestStart.IsZero() { + for _, stat := range q.workspaceAgentStats { + if latestStart.IsZero() || stat.CreatedAt.Before(latestStart) { + latestStart = stat.CreatedAt + } + } + } + if latestStart.IsZero() { + return nil + } + latestStart = latestStart.Truncate(time.Hour) + + /* + workspace_app_stat_buckets AS ( + SELECT + -- Truncate the minute to the nearest half hour, this is the bucket size + -- for the data. + date_trunc('hour', s.minute_bucket) + trunc(date_part('minute', s.minute_bucket) / 30) * 30 * '1 minute'::interval AS time_bucket, + w.template_id, + was.user_id, + -- Both app stats and agent stats track web terminal usage, but + -- by different means. The app stats value should be more + -- accurate so we don't want to discard it just yet. + CASE + WHEN was.access_method = 'terminal' + THEN '[terminal]' -- Unique name, app names can't contain brackets. + ELSE was.slug_or_port + END AS app_name, + COUNT(DISTINCT s.minute_bucket) AS app_minutes, + -- Store each unique minute bucket for later merge between datasets. + array_agg(DISTINCT s.minute_bucket) AS minute_buckets + FROM + workspace_app_stats AS was + JOIN + workspaces AS w + ON + w.id = was.workspace_id + -- Generate a series of minute buckets for each session for computing the + -- mintes/bucket. + CROSS JOIN + generate_series( + date_trunc('minute', was.session_started_at), + -- Subtract 1 microsecond to avoid creating an extra series. + date_trunc('minute', was.session_ended_at - '1 microsecond'::interval), + '1 minute'::interval + ) AS s(minute_bucket) + WHERE + -- s.minute_bucket >= @start_time::timestamptz + -- AND s.minute_bucket < @end_time::timestamptz + s.minute_bucket >= (SELECT t FROM latest_start) + AND s.minute_bucket < NOW() + GROUP BY + time_bucket, w.template_id, was.user_id, was.access_method, was.slug_or_port + ), + */ + + type workspaceAppStatGroupBy struct { + TimeBucket time.Time + TemplateID uuid.UUID + UserID uuid.UUID + AccessMethod string + SlugOrPort string + } + type workspaceAppStatRow struct { + workspaceAppStatGroupBy + AppName string + AppMinutes int + MinuteBuckets map[time.Time]struct{} + } + workspaceAppStatRows := make(map[workspaceAppStatGroupBy]workspaceAppStatRow) + for _, was := range q.workspaceAppStats { + // Preflight: s.minute_bucket >= (SELECT t FROM latest_start) + if was.SessionEndedAt.Before(latestStart) { + continue + } + // JOIN workspaces + w, err := q.getWorkspaceByIDNoLock(ctx, was.WorkspaceID) + if err != nil { + return err + } + // CROSS JOIN generate_series + for t := was.SessionStartedAt; t.Before(was.SessionEndedAt); t = t.Add(time.Minute) { + // WHERE + if t.Before(latestStart) || t.After(now) || t.Equal(now) { + continue + } + + bucket := t.Truncate(30 * time.Minute) + // GROUP BY + key := workspaceAppStatGroupBy{ + TimeBucket: bucket, + TemplateID: w.TemplateID, + UserID: was.UserID, + AccessMethod: was.AccessMethod, + SlugOrPort: was.SlugOrPort, + } + // SELECT + row, ok := workspaceAppStatRows[key] + if !ok { + row = workspaceAppStatRow{ + workspaceAppStatGroupBy: key, + AppName: was.SlugOrPort, + AppMinutes: 0, + MinuteBuckets: make(map[time.Time]struct{}), + } + if was.AccessMethod == "terminal" { + row.AppName = "[terminal]" + } + } + row.MinuteBuckets[t] = struct{}{} + row.AppMinutes = len(row.MinuteBuckets) + workspaceAppStatRows[key] = row + } + } + + /* + agent_stats_buckets AS ( + SELECT + -- Truncate the minute to the nearest half hour, this is the bucket size + -- for the data. + date_trunc('hour', created_at) + trunc(date_part('minute', created_at) / 30) * 30 * '1 minute'::interval AS time_bucket, + template_id, + user_id, + -- Store each unique minute bucket for later merge between datasets. + array_agg( + DISTINCT CASE + WHEN + session_count_ssh > 0 + -- TODO(mafredri): Enable when we have the column. + -- OR session_count_sftp > 0 + OR session_count_reconnecting_pty > 0 + OR session_count_vscode > 0 + OR session_count_jetbrains > 0 + THEN + date_trunc('minute', created_at) + ELSE + NULL + END + ) AS minute_buckets, + COUNT(DISTINCT CASE WHEN session_count_ssh > 0 THEN date_trunc('minute', created_at) ELSE NULL END) AS ssh_mins, + -- TODO(mafredri): Enable when we have the column. + -- COUNT(DISTINCT CASE WHEN session_count_sftp > 0 THEN date_trunc('minute', created_at) ELSE NULL END) AS sftp_mins, + COUNT(DISTINCT CASE WHEN session_count_reconnecting_pty > 0 THEN date_trunc('minute', created_at) ELSE NULL END) AS reconnecting_pty_mins, + COUNT(DISTINCT CASE WHEN session_count_vscode > 0 THEN date_trunc('minute', created_at) ELSE NULL END) AS vscode_mins, + COUNT(DISTINCT CASE WHEN session_count_jetbrains > 0 THEN date_trunc('minute', created_at) ELSE NULL END) AS jetbrains_mins, + -- NOTE(mafredri): The agent stats are currently very unreliable, and + -- sometimes the connections are missing, even during active sessions. + -- Since we can't fully rely on this, we check for "any connection + -- during this half-hour". A better solution here would be preferable. + MAX(connection_count) > 0 AS has_connection + FROM + workspace_agent_stats + WHERE + -- created_at >= @start_time::timestamptz + -- AND created_at < @end_time::timestamptz + created_at >= (SELECT t FROM latest_start) + AND created_at < NOW() + -- Inclusion criteria to filter out empty results. + AND ( + session_count_ssh > 0 + -- TODO(mafredri): Enable when we have the column. + -- OR session_count_sftp > 0 + OR session_count_reconnecting_pty > 0 + OR session_count_vscode > 0 + OR session_count_jetbrains > 0 + ) + GROUP BY + time_bucket, template_id, user_id + ), + */ + + type agentStatGroupBy struct { + TimeBucket time.Time + TemplateID uuid.UUID + UserID uuid.UUID + } + type agentStatRow struct { + agentStatGroupBy + MinuteBuckets map[time.Time]struct{} + SSHMinuteBuckets map[time.Time]struct{} + SSHMins int + SFTPMinuteBuckets map[time.Time]struct{} + SFTPMins int + ReconnectingPTYMinuteBuckets map[time.Time]struct{} + ReconnectingPTYMins int + VSCodeMinuteBuckets map[time.Time]struct{} + VSCodeMins int + JetBrainsMinuteBuckets map[time.Time]struct{} + JetBrainsMins int + HasConnection bool + } + agentStatRows := make(map[agentStatGroupBy]agentStatRow) + for _, was := range q.workspaceAgentStats { + // WHERE + if was.CreatedAt.Before(latestStart) || was.CreatedAt.After(now) || was.CreatedAt.Equal(now) { + continue + } + if was.SessionCountSSH == 0 && was.SessionCountReconnectingPTY == 0 && was.SessionCountVSCode == 0 && was.SessionCountJetBrains == 0 { + continue + } + // GROUP BY + key := agentStatGroupBy{ + TimeBucket: was.CreatedAt.Truncate(30 * time.Minute), + TemplateID: was.TemplateID, + UserID: was.UserID, + } + // SELECT + row, ok := agentStatRows[key] + if !ok { + row = agentStatRow{ + agentStatGroupBy: key, + MinuteBuckets: make(map[time.Time]struct{}), + SSHMinuteBuckets: make(map[time.Time]struct{}), + SFTPMinuteBuckets: make(map[time.Time]struct{}), + ReconnectingPTYMinuteBuckets: make(map[time.Time]struct{}), + VSCodeMinuteBuckets: make(map[time.Time]struct{}), + JetBrainsMinuteBuckets: make(map[time.Time]struct{}), + } + } + minute := was.CreatedAt.Truncate(time.Minute) + row.MinuteBuckets[minute] = struct{}{} + if was.SessionCountSSH > 0 { + row.SSHMinuteBuckets[minute] = struct{}{} + row.SSHMins = len(row.SSHMinuteBuckets) + } + // TODO(mafredri): Enable when we have the column. + // if was.SessionCountSFTP > 0 { + // row.SFTPMinuteBuckets[minute] = struct{}{} + // row.SFTPMins = len(row.SFTPMinuteBuckets) + // } + _ = row.SFTPMinuteBuckets + if was.SessionCountReconnectingPTY > 0 { + row.ReconnectingPTYMinuteBuckets[minute] = struct{}{} + row.ReconnectingPTYMins = len(row.ReconnectingPTYMinuteBuckets) + } + if was.SessionCountVSCode > 0 { + row.VSCodeMinuteBuckets[minute] = struct{}{} + row.VSCodeMins = len(row.VSCodeMinuteBuckets) + } + if was.SessionCountJetBrains > 0 { + row.JetBrainsMinuteBuckets[minute] = struct{}{} + row.JetBrainsMins = len(row.JetBrainsMinuteBuckets) + } + if !row.HasConnection { + row.HasConnection = was.ConnectionCount > 0 + } + agentStatRows[key] = row + } + + /* + stats AS ( + SELECT + stats.time_bucket AS start_time, + stats.time_bucket + '30 minutes'::interval AS end_time, + stats.template_id, + stats.user_id, + -- Sum/distinct to handle zero/duplicate values due union and to unnest. + COUNT(DISTINCT minute_bucket) AS usage_mins, + array_agg(DISTINCT minute_bucket) AS minute_buckets, + SUM(DISTINCT stats.ssh_mins) AS ssh_mins, + SUM(DISTINCT stats.sftp_mins) AS sftp_mins, + SUM(DISTINCT stats.reconnecting_pty_mins) AS reconnecting_pty_mins, + SUM(DISTINCT stats.vscode_mins) AS vscode_mins, + SUM(DISTINCT stats.jetbrains_mins) AS jetbrains_mins, + -- This is what we unnested, re-nest as json. + jsonb_object_agg(stats.app_name, stats.app_minutes) FILTER (WHERE stats.app_name IS NOT NULL) AS app_usage_mins + FROM ( + SELECT + time_bucket, + template_id, + user_id, + 0 AS ssh_mins, + 0 AS sftp_mins, + 0 AS reconnecting_pty_mins, + 0 AS vscode_mins, + 0 AS jetbrains_mins, + app_name, + app_minutes, + minute_buckets + FROM + workspace_app_stat_buckets + + UNION ALL + + SELECT + time_bucket, + template_id, + user_id, + ssh_mins, + -- TODO(mafredri): Enable when we have the column. + 0 AS sftp_mins, + reconnecting_pty_mins, + vscode_mins, + jetbrains_mins, + NULL AS app_name, + NULL AS app_minutes, + minute_buckets + FROM + agent_stats_buckets + WHERE + -- See note in the agent_stats_buckets CTE. + has_connection + ) AS stats, unnest(minute_buckets) AS minute_bucket + GROUP BY + stats.time_bucket, stats.template_id, stats.user_id + ), + */ + + type statsGroupBy struct { + TimeBucket time.Time + TemplateID uuid.UUID + UserID uuid.UUID + } + type statsRow struct { + statsGroupBy + UsageMinuteBuckets map[time.Time]struct{} + UsageMins int + SSHMins int + SFTPMins int + ReconnectingPTYMins int + VSCodeMins int + JetBrainsMins int + AppUsageMinutes map[string]int + } + statsRows := make(map[statsGroupBy]statsRow) + for _, was := range workspaceAppStatRows { + // GROUP BY + key := statsGroupBy{ + TimeBucket: was.TimeBucket, + TemplateID: was.TemplateID, + UserID: was.UserID, + } + // SELECT + row, ok := statsRows[key] + if !ok { + row = statsRow{ + statsGroupBy: key, + UsageMinuteBuckets: make(map[time.Time]struct{}), + AppUsageMinutes: make(map[string]int), + } + } + for t := range was.MinuteBuckets { + row.UsageMinuteBuckets[t] = struct{}{} + } + row.UsageMins = len(row.UsageMinuteBuckets) + row.AppUsageMinutes[was.AppName] = was.AppMinutes + statsRows[key] = row + } + for _, was := range agentStatRows { + // GROUP BY + key := statsGroupBy{ + TimeBucket: was.TimeBucket, + TemplateID: was.TemplateID, + UserID: was.UserID, + } + // SELECT + row, ok := statsRows[key] + if !ok { + row = statsRow{ + statsGroupBy: key, + UsageMinuteBuckets: make(map[time.Time]struct{}), + AppUsageMinutes: make(map[string]int), + } + } + for t := range was.MinuteBuckets { + row.UsageMinuteBuckets[t] = struct{}{} + } + row.UsageMins = len(row.UsageMinuteBuckets) + row.SSHMins += was.SSHMins + row.SFTPMins += was.SFTPMins + row.ReconnectingPTYMins += was.ReconnectingPTYMins + row.VSCodeMins += was.VSCodeMins + row.JetBrainsMins += was.JetBrainsMins + statsRows[key] = row + } + + /* + minute_buckets AS ( + -- Create distinct minute buckets for user-activity, so we can filter out + -- irrelevant latencies. + SELECT DISTINCT ON (stats.start_time, stats.template_id, stats.user_id, minute_bucket) + stats.start_time, + stats.template_id, + stats.user_id, + minute_bucket + FROM + stats, unnest(minute_buckets) AS minute_bucket + ), + latencies AS ( + -- Select all non-zero latencies for all the minutes that a user used the + -- workspace in some way. + SELECT + mb.start_time, + mb.template_id, + mb.user_id, + -- TODO(mafredri): We're doing medians on medians here, we may want to + -- improve upon this at some point. + PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY was.connection_median_latency_ms)::real AS median_latency_ms + FROM + minute_buckets AS mb + JOIN + workspace_agent_stats AS was + ON + date_trunc('minute', was.created_at) = mb.minute_bucket + AND was.template_id = mb.template_id + AND was.user_id = mb.user_id + AND was.connection_median_latency_ms >= 0 + GROUP BY + mb.start_time, mb.template_id, mb.user_id + ) + */ + + type latenciesGroupBy struct { + StartTime time.Time + TemplateID uuid.UUID + UserID uuid.UUID + } + type latenciesRow struct { + latenciesGroupBy + Latencies []float64 + MedianLatencyMS float64 + } + latenciesRows := make(map[latenciesGroupBy]latenciesRow) + for _, stat := range statsRows { + for t := range stat.UsageMinuteBuckets { + // GROUP BY + key := latenciesGroupBy{ + StartTime: stat.TimeBucket, + TemplateID: stat.TemplateID, + UserID: stat.UserID, + } + // JOIN + for _, was := range q.workspaceAgentStats { + if !t.Equal(was.CreatedAt.Truncate(time.Minute)) { + continue + } + if was.TemplateID != stat.TemplateID || was.UserID != stat.UserID { + continue + } + if was.ConnectionMedianLatencyMS < 0 { + continue + } + // SELECT + row, ok := latenciesRows[key] + if !ok { + row = latenciesRow{ + latenciesGroupBy: key, + } + } + row.Latencies = append(row.Latencies, was.ConnectionMedianLatencyMS) + sort.Float64s(row.Latencies) + if len(row.Latencies) == 1 { + row.MedianLatencyMS = was.ConnectionMedianLatencyMS + } else if len(row.Latencies)%2 == 0 { + row.MedianLatencyMS = (row.Latencies[len(row.Latencies)/2-1] + row.Latencies[len(row.Latencies)/2]) / 2 + } else { + row.MedianLatencyMS = row.Latencies[len(row.Latencies)/2] + } + latenciesRows[key] = row + } + } + } + + /* + INSERT INTO template_usage_stats AS tus ( + start_time, + end_time, + template_id, + user_id, + usage_mins, + median_latency_ms, + ssh_mins, + sftp_mins, + reconnecting_pty_mins, + vscode_mins, + jetbrains_mins, + app_usage_mins + ) ( + SELECT + stats.start_time, + stats.end_time, + stats.template_id, + stats.user_id, + stats.usage_mins, + latencies.median_latency_ms, + stats.ssh_mins, + stats.sftp_mins, + stats.reconnecting_pty_mins, + stats.vscode_mins, + stats.jetbrains_mins, + stats.app_usage_mins + FROM + stats + LEFT JOIN + latencies + ON + -- The latencies group-by ensures there at most one row. + latencies.start_time = stats.start_time + AND latencies.template_id = stats.template_id + AND latencies.user_id = stats.user_id + ) + ON CONFLICT + (start_time, template_id, user_id) + DO UPDATE + SET + usage_mins = EXCLUDED.usage_mins, + median_latency_ms = EXCLUDED.median_latency_ms, + ssh_mins = EXCLUDED.ssh_mins, + sftp_mins = EXCLUDED.sftp_mins, + reconnecting_pty_mins = EXCLUDED.reconnecting_pty_mins, + vscode_mins = EXCLUDED.vscode_mins, + jetbrains_mins = EXCLUDED.jetbrains_mins, + app_usage_mins = EXCLUDED.app_usage_mins + WHERE + (tus.*) IS DISTINCT FROM (EXCLUDED.*); + */ + +TemplateUsageStatsInsertLoop: + for _, stat := range statsRows { + // LEFT JOIN latencies + latency, latencyOk := latenciesRows[latenciesGroupBy{ + StartTime: stat.TimeBucket, + TemplateID: stat.TemplateID, + UserID: stat.UserID, + }] + + // SELECT + tus := database.TemplateUsageStat{ + StartTime: stat.TimeBucket, + EndTime: stat.TimeBucket.Add(30 * time.Minute), + TemplateID: stat.TemplateID, + UserID: stat.UserID, + UsageMins: int16(stat.UsageMins), + MedianLatencyMs: sql.NullFloat64{Float64: latency.MedianLatencyMS, Valid: latencyOk}, + SshMins: int16(stat.SSHMins), + SftpMins: int16(stat.SFTPMins), + ReconnectingPtyMins: int16(stat.ReconnectingPTYMins), + VscodeMins: int16(stat.VSCodeMins), + JetbrainsMins: int16(stat.JetBrainsMins), + } + if len(stat.AppUsageMinutes) > 0 { + tus.AppUsageMins = make(map[string]int64, len(stat.AppUsageMinutes)) + for k, v := range stat.AppUsageMinutes { + tus.AppUsageMins[k] = int64(v) + } + } + + // ON CONFLICT + for i, existing := range q.templateUsageStats { + if existing.StartTime.Equal(tus.StartTime) && existing.TemplateID == tus.TemplateID && existing.UserID == tus.UserID { + q.templateUsageStats[i] = tus + continue TemplateUsageStatsInsertLoop + } + } + // INSERT INTO + q.templateUsageStats = append(q.templateUsageStats, tus) + } + + return nil +} + func (q *FakeQuerier) UpsertWorkspaceAgentPortShare(_ context.Context, arg database.UpsertWorkspaceAgentPortShareParams) (database.WorkspaceAgentPortShare, error) { err := validateDatabaseType(arg) if err != nil { diff --git a/coderd/database/dbmetrics/dbmetrics.go b/coderd/database/dbmetrics/dbmetrics.go index 2350588511..5cd452d328 100644 --- a/coderd/database/dbmetrics/dbmetrics.go +++ b/coderd/database/dbmetrics/dbmetrics.go @@ -949,6 +949,13 @@ func (m metricsStore) GetTemplateParameterInsights(ctx context.Context, arg data return r0, r1 } +func (m metricsStore) GetTemplateUsageStats(ctx context.Context, arg database.GetTemplateUsageStatsParams) ([]database.TemplateUsageStat, error) { + start := time.Now() + r0, r1 := m.s.GetTemplateUsageStats(ctx, arg) + m.queryLatencies.WithLabelValues("GetTemplateUsageStats").Observe(time.Since(start).Seconds()) + return r0, r1 +} + func (m metricsStore) GetTemplateVersionByID(ctx context.Context, id uuid.UUID) (database.TemplateVersion, error) { start := time.Now() version, err := m.s.GetTemplateVersionByID(ctx, id) @@ -2234,6 +2241,13 @@ func (m metricsStore) UpsertTailnetTunnel(ctx context.Context, arg database.Upse return r0, r1 } +func (m metricsStore) UpsertTemplateUsageStats(ctx context.Context) error { + start := time.Now() + r0 := m.s.UpsertTemplateUsageStats(ctx) + m.queryLatencies.WithLabelValues("UpsertTemplateUsageStats").Observe(time.Since(start).Seconds()) + return r0 +} + func (m metricsStore) UpsertWorkspaceAgentPortShare(ctx context.Context, arg database.UpsertWorkspaceAgentPortShareParams) (database.WorkspaceAgentPortShare, error) { start := time.Now() r0, r1 := m.s.UpsertWorkspaceAgentPortShare(ctx, arg) diff --git a/coderd/database/dbmock/dbmock.go b/coderd/database/dbmock/dbmock.go index ba5b3c965c..32049ba072 100644 --- a/coderd/database/dbmock/dbmock.go +++ b/coderd/database/dbmock/dbmock.go @@ -1950,6 +1950,21 @@ func (mr *MockStoreMockRecorder) GetTemplateParameterInsights(arg0, arg1 any) *g return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "GetTemplateParameterInsights", reflect.TypeOf((*MockStore)(nil).GetTemplateParameterInsights), arg0, arg1) } +// GetTemplateUsageStats mocks base method. +func (m *MockStore) GetTemplateUsageStats(arg0 context.Context, arg1 database.GetTemplateUsageStatsParams) ([]database.TemplateUsageStat, error) { + m.ctrl.T.Helper() + ret := m.ctrl.Call(m, "GetTemplateUsageStats", arg0, arg1) + ret0, _ := ret[0].([]database.TemplateUsageStat) + ret1, _ := ret[1].(error) + return ret0, ret1 +} + +// GetTemplateUsageStats indicates an expected call of GetTemplateUsageStats. +func (mr *MockStoreMockRecorder) GetTemplateUsageStats(arg0, arg1 any) *gomock.Call { + mr.mock.ctrl.T.Helper() + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "GetTemplateUsageStats", reflect.TypeOf((*MockStore)(nil).GetTemplateUsageStats), arg0, arg1) +} + // GetTemplateUserRoles mocks base method. func (m *MockStore) GetTemplateUserRoles(arg0 context.Context, arg1 uuid.UUID) ([]database.TemplateUser, error) { m.ctrl.T.Helper() @@ -4694,6 +4709,20 @@ func (mr *MockStoreMockRecorder) UpsertTailnetTunnel(arg0, arg1 any) *gomock.Cal return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "UpsertTailnetTunnel", reflect.TypeOf((*MockStore)(nil).UpsertTailnetTunnel), arg0, arg1) } +// UpsertTemplateUsageStats mocks base method. +func (m *MockStore) UpsertTemplateUsageStats(arg0 context.Context) error { + m.ctrl.T.Helper() + ret := m.ctrl.Call(m, "UpsertTemplateUsageStats", arg0) + ret0, _ := ret[0].(error) + return ret0 +} + +// UpsertTemplateUsageStats indicates an expected call of UpsertTemplateUsageStats. +func (mr *MockStoreMockRecorder) UpsertTemplateUsageStats(arg0 any) *gomock.Call { + mr.mock.ctrl.T.Helper() + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "UpsertTemplateUsageStats", reflect.TypeOf((*MockStore)(nil).UpsertTemplateUsageStats), arg0) +} + // UpsertWorkspaceAgentPortShare mocks base method. func (m *MockStore) UpsertWorkspaceAgentPortShare(arg0 context.Context, arg1 database.UpsertWorkspaceAgentPortShareParams) (database.WorkspaceAgentPortShare, error) { m.ctrl.T.Helper() diff --git a/coderd/database/dump.sql b/coderd/database/dump.sql index 93e6ef23b2..da2707b589 100644 --- a/coderd/database/dump.sql +++ b/coderd/database/dump.sql @@ -741,6 +741,47 @@ CREATE TABLE tailnet_tunnels ( updated_at timestamp with time zone NOT NULL ); +CREATE TABLE template_usage_stats ( + start_time timestamp with time zone NOT NULL, + end_time timestamp with time zone NOT NULL, + template_id uuid NOT NULL, + user_id uuid NOT NULL, + median_latency_ms real, + usage_mins smallint NOT NULL, + ssh_mins smallint NOT NULL, + sftp_mins smallint NOT NULL, + reconnecting_pty_mins smallint NOT NULL, + vscode_mins smallint NOT NULL, + jetbrains_mins smallint NOT NULL, + app_usage_mins jsonb +); + +COMMENT ON TABLE template_usage_stats IS 'Records aggregated usage statistics for templates/users. All usage is rounded up to the nearest minute.'; + +COMMENT ON COLUMN template_usage_stats.start_time IS 'Start time of the usage period.'; + +COMMENT ON COLUMN template_usage_stats.end_time IS 'End time of the usage period.'; + +COMMENT ON COLUMN template_usage_stats.template_id IS 'ID of the template being used.'; + +COMMENT ON COLUMN template_usage_stats.user_id IS 'ID of the user using the template.'; + +COMMENT ON COLUMN template_usage_stats.median_latency_ms IS 'Median latency the user is experiencing, in milliseconds. Null means no value was recorded.'; + +COMMENT ON COLUMN template_usage_stats.usage_mins IS 'Total minutes the user has been using the template.'; + +COMMENT ON COLUMN template_usage_stats.ssh_mins IS 'Total minutes the user has been using SSH.'; + +COMMENT ON COLUMN template_usage_stats.sftp_mins IS 'Total minutes the user has been using SFTP.'; + +COMMENT ON COLUMN template_usage_stats.reconnecting_pty_mins IS 'Total minutes the user has been using the reconnecting PTY.'; + +COMMENT ON COLUMN template_usage_stats.vscode_mins IS 'Total minutes the user has been using VSCode.'; + +COMMENT ON COLUMN template_usage_stats.jetbrains_mins IS 'Total minutes the user has been using JetBrains.'; + +COMMENT ON COLUMN template_usage_stats.app_usage_mins IS 'Object with app names as keys and total minutes used as values. Null means no app usage was recorded.'; + CREATE TABLE template_version_parameters ( template_version_id uuid NOT NULL, name text NOT NULL, @@ -1468,6 +1509,9 @@ ALTER TABLE ONLY tailnet_peers ALTER TABLE ONLY tailnet_tunnels ADD CONSTRAINT tailnet_tunnels_pkey PRIMARY KEY (coordinator_id, src_id, dst_id); +ALTER TABLE ONLY template_usage_stats + ADD CONSTRAINT template_usage_stats_pkey PRIMARY KEY (start_time, template_id, user_id); + ALTER TABLE ONLY template_version_parameters ADD CONSTRAINT template_version_parameters_template_version_id_name_key UNIQUE (template_version_id, name); @@ -1590,6 +1634,14 @@ CREATE INDEX provisioner_job_logs_id_job_id_idx ON provisioner_job_logs USING bt CREATE INDEX provisioner_jobs_started_at_idx ON provisioner_jobs USING btree (started_at) WHERE (started_at IS NULL); +CREATE INDEX template_usage_stats_start_time_idx ON template_usage_stats USING btree (start_time DESC); + +COMMENT ON INDEX template_usage_stats_start_time_idx IS 'Index for querying MAX(start_time).'; + +CREATE UNIQUE INDEX template_usage_stats_start_time_template_id_user_id_idx ON template_usage_stats USING btree (start_time, template_id, user_id); + +COMMENT ON INDEX template_usage_stats_start_time_template_id_user_id_idx IS 'Index for primary key.'; + CREATE UNIQUE INDEX templates_organization_id_name_idx ON templates USING btree (organization_id, lower((name)::text)) WHERE (deleted = false); CREATE UNIQUE INDEX users_email_lower_idx ON users USING btree (lower(email)) WHERE (deleted = false); diff --git a/coderd/database/migrations/000203_template_usage_stats.down.sql b/coderd/database/migrations/000203_template_usage_stats.down.sql new file mode 100644 index 0000000000..728b27a611 --- /dev/null +++ b/coderd/database/migrations/000203_template_usage_stats.down.sql @@ -0,0 +1 @@ +DROP TABLE template_usage_stats; diff --git a/coderd/database/migrations/000203_template_usage_stats.up.sql b/coderd/database/migrations/000203_template_usage_stats.up.sql new file mode 100644 index 0000000000..1508bbbd63 --- /dev/null +++ b/coderd/database/migrations/000203_template_usage_stats.up.sql @@ -0,0 +1,36 @@ +CREATE TABLE template_usage_stats ( + start_time timestamptz NOT NULL, + end_time timestamptz NOT NULL, + template_id uuid NOT NULL, + user_id uuid NOT NULL, + median_latency_ms real NULL, + usage_mins smallint NOT NULL, + ssh_mins smallint NOT NULL, + sftp_mins smallint NOT NULL, + reconnecting_pty_mins smallint NOT NULL, + vscode_mins smallint NOT NULL, + jetbrains_mins smallint NOT NULL, + app_usage_mins jsonb NULL, + + PRIMARY KEY (start_time, template_id, user_id) +); + +COMMENT ON TABLE template_usage_stats IS 'Records aggregated usage statistics for templates/users. All usage is rounded up to the nearest minute.'; +COMMENT ON COLUMN template_usage_stats.start_time IS 'Start time of the usage period.'; +COMMENT ON COLUMN template_usage_stats.end_time IS 'End time of the usage period.'; +COMMENT ON COLUMN template_usage_stats.template_id IS 'ID of the template being used.'; +COMMENT ON COLUMN template_usage_stats.user_id IS 'ID of the user using the template.'; +COMMENT ON COLUMN template_usage_stats.median_latency_ms IS 'Median latency the user is experiencing, in milliseconds. Null means no value was recorded.'; +COMMENT ON COLUMN template_usage_stats.usage_mins IS 'Total minutes the user has been using the template.'; +COMMENT ON COLUMN template_usage_stats.ssh_mins IS 'Total minutes the user has been using SSH.'; +COMMENT ON COLUMN template_usage_stats.sftp_mins IS 'Total minutes the user has been using SFTP.'; +COMMENT ON COLUMN template_usage_stats.reconnecting_pty_mins IS 'Total minutes the user has been using the reconnecting PTY.'; +COMMENT ON COLUMN template_usage_stats.vscode_mins IS 'Total minutes the user has been using VSCode.'; +COMMENT ON COLUMN template_usage_stats.jetbrains_mins IS 'Total minutes the user has been using JetBrains.'; +COMMENT ON COLUMN template_usage_stats.app_usage_mins IS 'Object with app names as keys and total minutes used as values. Null means no app usage was recorded.'; + +CREATE UNIQUE INDEX ON template_usage_stats (start_time, template_id, user_id); +CREATE INDEX ON template_usage_stats (start_time DESC); + +COMMENT ON INDEX template_usage_stats_start_time_template_id_user_id_idx IS 'Index for primary key.'; +COMMENT ON INDEX template_usage_stats_start_time_idx IS 'Index for querying MAX(start_time).'; diff --git a/coderd/database/migrations/testdata/fixtures/000203_template_usage_stats.up.sql b/coderd/database/migrations/testdata/fixtures/000203_template_usage_stats.up.sql new file mode 100644 index 0000000000..38b273f89e --- /dev/null +++ b/coderd/database/migrations/testdata/fixtures/000203_template_usage_stats.up.sql @@ -0,0 +1,30 @@ +INSERT INTO + template_usage_stats ( + start_time, + end_time, + template_id, + user_id, + median_latency_ms, + usage_mins, + ssh_mins, + sftp_mins, + reconnecting_pty_mins, + vscode_mins, + jetbrains_mins, + app_usage_mins + ) +VALUES + ( + date_trunc('hour', NOW()), + date_trunc('hour', NOW()) + '30 minute'::interval, + gen_random_uuid(), + gen_random_uuid(), + 45.342::real, + 30, -- usage + 30, -- ssh + 5, -- sftp + 2, -- reconnecting_pty + 10, -- vscode + 10, -- jetbrains + '{"[terminal]": 2, "code-server": 30}'::jsonb + ); diff --git a/coderd/database/models.go b/coderd/database/models.go index 49eb5bf811..18587b05ad 100644 --- a/coderd/database/models.go +++ b/coderd/database/models.go @@ -2139,6 +2139,34 @@ type TemplateTable struct { MaxPortSharingLevel AppSharingLevel `db:"max_port_sharing_level" json:"max_port_sharing_level"` } +// Records aggregated usage statistics for templates/users. All usage is rounded up to the nearest minute. +type TemplateUsageStat struct { + // Start time of the usage period. + StartTime time.Time `db:"start_time" json:"start_time"` + // End time of the usage period. + EndTime time.Time `db:"end_time" json:"end_time"` + // ID of the template being used. + TemplateID uuid.UUID `db:"template_id" json:"template_id"` + // ID of the user using the template. + UserID uuid.UUID `db:"user_id" json:"user_id"` + // Median latency the user is experiencing, in milliseconds. Null means no value was recorded. + MedianLatencyMs sql.NullFloat64 `db:"median_latency_ms" json:"median_latency_ms"` + // Total minutes the user has been using the template. + UsageMins int16 `db:"usage_mins" json:"usage_mins"` + // Total minutes the user has been using SSH. + SshMins int16 `db:"ssh_mins" json:"ssh_mins"` + // Total minutes the user has been using SFTP. + SftpMins int16 `db:"sftp_mins" json:"sftp_mins"` + // Total minutes the user has been using the reconnecting PTY. + ReconnectingPtyMins int16 `db:"reconnecting_pty_mins" json:"reconnecting_pty_mins"` + // Total minutes the user has been using VSCode. + VscodeMins int16 `db:"vscode_mins" json:"vscode_mins"` + // Total minutes the user has been using JetBrains. + JetbrainsMins int16 `db:"jetbrains_mins" json:"jetbrains_mins"` + // Object with app names as keys and total minutes used as values. Null means no app usage was recorded. + AppUsageMins StringMapOfInt `db:"app_usage_mins" json:"app_usage_mins"` +} + // Joins in the username + avatar url of the created by user. type TemplateVersion struct { ID uuid.UUID `db:"id" json:"id"` diff --git a/coderd/database/querier.go b/coderd/database/querier.go index fc47f58204..87d5d77711 100644 --- a/coderd/database/querier.go +++ b/coderd/database/querier.go @@ -193,6 +193,7 @@ type sqlcQuerier interface { // created in the timeframe and return the aggregate usage counts of parameter // values. GetTemplateParameterInsights(ctx context.Context, arg GetTemplateParameterInsightsParams) ([]GetTemplateParameterInsightsRow, error) + GetTemplateUsageStats(ctx context.Context, arg GetTemplateUsageStatsParams) ([]TemplateUsageStat, error) GetTemplateVersionByID(ctx context.Context, id uuid.UUID) (TemplateVersion, error) GetTemplateVersionByJobID(ctx context.Context, jobID uuid.UUID) (TemplateVersion, error) GetTemplateVersionByTemplateIDAndName(ctx context.Context, arg GetTemplateVersionByTemplateIDAndNameParams) (TemplateVersion, error) @@ -416,6 +417,11 @@ type sqlcQuerier interface { UpsertTailnetCoordinator(ctx context.Context, id uuid.UUID) (TailnetCoordinator, error) UpsertTailnetPeer(ctx context.Context, arg UpsertTailnetPeerParams) (TailnetPeer, error) UpsertTailnetTunnel(ctx context.Context, arg UpsertTailnetTunnelParams) (TailnetTunnel, error) + // This query aggregates the workspace_agent_stats and workspace_app_stats data + // into a single table for efficient storage and querying. Half-hour buckets are + // used to store the data, and the minutes are summed for each user and template + // combination. The result is stored in the template_usage_stats table. + UpsertTemplateUsageStats(ctx context.Context) error UpsertWorkspaceAgentPortShare(ctx context.Context, arg UpsertWorkspaceAgentPortShareParams) (WorkspaceAgentPortShare, error) } diff --git a/coderd/database/queries.sql.go b/coderd/database/queries.sql.go index 057c10d267..5852308025 100644 --- a/coderd/database/queries.sql.go +++ b/coderd/database/queries.sql.go @@ -2250,6 +2250,59 @@ func (q *sqlQuerier) GetTemplateParameterInsights(ctx context.Context, arg GetTe return items, nil } +const getTemplateUsageStats = `-- name: GetTemplateUsageStats :many +SELECT + start_time, end_time, template_id, user_id, median_latency_ms, usage_mins, ssh_mins, sftp_mins, reconnecting_pty_mins, vscode_mins, jetbrains_mins, app_usage_mins +FROM + template_usage_stats +WHERE + start_time >= $1::timestamptz + AND end_time <= $2::timestamptz + AND CASE WHEN COALESCE(array_length($3::uuid[], 1), 0) > 0 THEN template_id = ANY($3::uuid[]) ELSE TRUE END +` + +type GetTemplateUsageStatsParams struct { + StartTime time.Time `db:"start_time" json:"start_time"` + EndTime time.Time `db:"end_time" json:"end_time"` + TemplateIDs []uuid.UUID `db:"template_ids" json:"template_ids"` +} + +func (q *sqlQuerier) GetTemplateUsageStats(ctx context.Context, arg GetTemplateUsageStatsParams) ([]TemplateUsageStat, error) { + rows, err := q.db.QueryContext(ctx, getTemplateUsageStats, arg.StartTime, arg.EndTime, pq.Array(arg.TemplateIDs)) + if err != nil { + return nil, err + } + defer rows.Close() + var items []TemplateUsageStat + for rows.Next() { + var i TemplateUsageStat + if err := rows.Scan( + &i.StartTime, + &i.EndTime, + &i.TemplateID, + &i.UserID, + &i.MedianLatencyMs, + &i.UsageMins, + &i.SshMins, + &i.SftpMins, + &i.ReconnectingPtyMins, + &i.VscodeMins, + &i.JetbrainsMins, + &i.AppUsageMins, + ); err != nil { + return nil, err + } + items = append(items, i) + } + if err := rows.Close(); err != nil { + return nil, err + } + if err := rows.Err(); err != nil { + return nil, err + } + return items, nil +} + const getUserActivityInsights = `-- name: GetUserActivityInsights :many WITH app_stats AS ( SELECT @@ -2440,6 +2493,267 @@ func (q *sqlQuerier) GetUserLatencyInsights(ctx context.Context, arg GetUserLate return items, nil } +const upsertTemplateUsageStats = `-- name: UpsertTemplateUsageStats :exec +WITH + latest_start AS ( + SELECT + -- Truncate to hour so that we always look at even ranges of data. + date_trunc('hour', COALESCE( + MAX(start_time) - '1 hour'::interval, + -- Fallback when there are no template usage stats yet. + -- App stats can exist before this, but not agent stats, + -- limit the lookback to avoid inconsistency. + (SELECT MIN(created_at) FROM workspace_agent_stats) + )) AS t + FROM + template_usage_stats + ), + workspace_app_stat_buckets AS ( + SELECT + -- Truncate the minute to the nearest half hour, this is the bucket size + -- for the data. + date_trunc('hour', s.minute_bucket) + trunc(date_part('minute', s.minute_bucket) / 30) * 30 * '1 minute'::interval AS time_bucket, + w.template_id, + was.user_id, + -- Both app stats and agent stats track web terminal usage, but + -- by different means. The app stats value should be more + -- accurate so we don't want to discard it just yet. + CASE + WHEN was.access_method = 'terminal' + THEN '[terminal]' -- Unique name, app names can't contain brackets. + ELSE was.slug_or_port + END AS app_name, + COUNT(DISTINCT s.minute_bucket) AS app_minutes, + -- Store each unique minute bucket for later merge between datasets. + array_agg(DISTINCT s.minute_bucket) AS minute_buckets + FROM + workspace_app_stats AS was + JOIN + workspaces AS w + ON + w.id = was.workspace_id + -- Generate a series of minute buckets for each session for computing the + -- mintes/bucket. + CROSS JOIN + generate_series( + date_trunc('minute', was.session_started_at), + -- Subtract 1 microsecond to avoid creating an extra series. + date_trunc('minute', was.session_ended_at - '1 microsecond'::interval), + '1 minute'::interval + ) AS s(minute_bucket) + WHERE + -- s.minute_bucket >= @start_time::timestamptz + -- AND s.minute_bucket < @end_time::timestamptz + s.minute_bucket >= (SELECT t FROM latest_start) + AND s.minute_bucket < NOW() + GROUP BY + time_bucket, w.template_id, was.user_id, was.access_method, was.slug_or_port + ), + agent_stats_buckets AS ( + SELECT + -- Truncate the minute to the nearest half hour, this is the bucket size + -- for the data. + date_trunc('hour', created_at) + trunc(date_part('minute', created_at) / 30) * 30 * '1 minute'::interval AS time_bucket, + template_id, + user_id, + -- Store each unique minute bucket for later merge between datasets. + array_agg( + DISTINCT CASE + WHEN + session_count_ssh > 0 + -- TODO(mafredri): Enable when we have the column. + -- OR session_count_sftp > 0 + OR session_count_reconnecting_pty > 0 + OR session_count_vscode > 0 + OR session_count_jetbrains > 0 + THEN + date_trunc('minute', created_at) + ELSE + NULL + END + ) AS minute_buckets, + COUNT(DISTINCT CASE WHEN session_count_ssh > 0 THEN date_trunc('minute', created_at) ELSE NULL END) AS ssh_mins, + -- TODO(mafredri): Enable when we have the column. + -- COUNT(DISTINCT CASE WHEN session_count_sftp > 0 THEN date_trunc('minute', created_at) ELSE NULL END) AS sftp_mins, + COUNT(DISTINCT CASE WHEN session_count_reconnecting_pty > 0 THEN date_trunc('minute', created_at) ELSE NULL END) AS reconnecting_pty_mins, + COUNT(DISTINCT CASE WHEN session_count_vscode > 0 THEN date_trunc('minute', created_at) ELSE NULL END) AS vscode_mins, + COUNT(DISTINCT CASE WHEN session_count_jetbrains > 0 THEN date_trunc('minute', created_at) ELSE NULL END) AS jetbrains_mins, + -- NOTE(mafredri): The agent stats are currently very unreliable, and + -- sometimes the connections are missing, even during active sessions. + -- Since we can't fully rely on this, we check for "any connection + -- during this half-hour". A better solution here would be preferable. + MAX(connection_count) > 0 AS has_connection + FROM + workspace_agent_stats + WHERE + -- created_at >= @start_time::timestamptz + -- AND created_at < @end_time::timestamptz + created_at >= (SELECT t FROM latest_start) + AND created_at < NOW() + -- Inclusion criteria to filter out empty results. + AND ( + session_count_ssh > 0 + -- TODO(mafredri): Enable when we have the column. + -- OR session_count_sftp > 0 + OR session_count_reconnecting_pty > 0 + OR session_count_vscode > 0 + OR session_count_jetbrains > 0 + ) + GROUP BY + time_bucket, template_id, user_id + ), + stats AS ( + SELECT + stats.time_bucket AS start_time, + stats.time_bucket + '30 minutes'::interval AS end_time, + stats.template_id, + stats.user_id, + -- Sum/distinct to handle zero/duplicate values due union and to unnest. + COUNT(DISTINCT minute_bucket) AS usage_mins, + array_agg(DISTINCT minute_bucket) AS minute_buckets, + SUM(DISTINCT stats.ssh_mins) AS ssh_mins, + SUM(DISTINCT stats.sftp_mins) AS sftp_mins, + SUM(DISTINCT stats.reconnecting_pty_mins) AS reconnecting_pty_mins, + SUM(DISTINCT stats.vscode_mins) AS vscode_mins, + SUM(DISTINCT stats.jetbrains_mins) AS jetbrains_mins, + -- This is what we unnested, re-nest as json. + jsonb_object_agg(stats.app_name, stats.app_minutes) FILTER (WHERE stats.app_name IS NOT NULL) AS app_usage_mins + FROM ( + SELECT + time_bucket, + template_id, + user_id, + 0 AS ssh_mins, + 0 AS sftp_mins, + 0 AS reconnecting_pty_mins, + 0 AS vscode_mins, + 0 AS jetbrains_mins, + app_name, + app_minutes, + minute_buckets + FROM + workspace_app_stat_buckets + + UNION ALL + + SELECT + time_bucket, + template_id, + user_id, + ssh_mins, + -- TODO(mafredri): Enable when we have the column. + 0 AS sftp_mins, + reconnecting_pty_mins, + vscode_mins, + jetbrains_mins, + NULL AS app_name, + NULL AS app_minutes, + minute_buckets + FROM + agent_stats_buckets + WHERE + -- See note in the agent_stats_buckets CTE. + has_connection + ) AS stats, unnest(minute_buckets) AS minute_bucket + GROUP BY + stats.time_bucket, stats.template_id, stats.user_id + ), + minute_buckets AS ( + -- Create distinct minute buckets for user-activity, so we can filter out + -- irrelevant latencies. + SELECT DISTINCT ON (stats.start_time, stats.template_id, stats.user_id, minute_bucket) + stats.start_time, + stats.template_id, + stats.user_id, + minute_bucket + FROM + stats, unnest(minute_buckets) AS minute_bucket + ), + latencies AS ( + -- Select all non-zero latencies for all the minutes that a user used the + -- workspace in some way. + SELECT + mb.start_time, + mb.template_id, + mb.user_id, + -- TODO(mafredri): We're doing medians on medians here, we may want to + -- improve upon this at some point. + PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY was.connection_median_latency_ms)::real AS median_latency_ms + FROM + minute_buckets AS mb + JOIN + workspace_agent_stats AS was + ON + date_trunc('minute', was.created_at) = mb.minute_bucket + AND was.template_id = mb.template_id + AND was.user_id = mb.user_id + AND was.connection_median_latency_ms >= 0 + GROUP BY + mb.start_time, mb.template_id, mb.user_id + ) + +INSERT INTO template_usage_stats AS tus ( + start_time, + end_time, + template_id, + user_id, + usage_mins, + median_latency_ms, + ssh_mins, + sftp_mins, + reconnecting_pty_mins, + vscode_mins, + jetbrains_mins, + app_usage_mins +) ( + SELECT + stats.start_time, + stats.end_time, + stats.template_id, + stats.user_id, + stats.usage_mins, + latencies.median_latency_ms, + stats.ssh_mins, + stats.sftp_mins, + stats.reconnecting_pty_mins, + stats.vscode_mins, + stats.jetbrains_mins, + stats.app_usage_mins + FROM + stats + LEFT JOIN + latencies + ON + -- The latencies group-by ensures there at most one row. + latencies.start_time = stats.start_time + AND latencies.template_id = stats.template_id + AND latencies.user_id = stats.user_id +) +ON CONFLICT + (start_time, template_id, user_id) +DO UPDATE +SET + usage_mins = EXCLUDED.usage_mins, + median_latency_ms = EXCLUDED.median_latency_ms, + ssh_mins = EXCLUDED.ssh_mins, + sftp_mins = EXCLUDED.sftp_mins, + reconnecting_pty_mins = EXCLUDED.reconnecting_pty_mins, + vscode_mins = EXCLUDED.vscode_mins, + jetbrains_mins = EXCLUDED.jetbrains_mins, + app_usage_mins = EXCLUDED.app_usage_mins +WHERE + (tus.*) IS DISTINCT FROM (EXCLUDED.*) +` + +// This query aggregates the workspace_agent_stats and workspace_app_stats data +// into a single table for efficient storage and querying. Half-hour buckets are +// used to store the data, and the minutes are summed for each user and template +// combination. The result is stored in the template_usage_stats table. +func (q *sqlQuerier) UpsertTemplateUsageStats(ctx context.Context) error { + _, err := q.db.ExecContext(ctx, upsertTemplateUsageStats) + return err +} + const getJFrogXrayScanByWorkspaceAndAgentID = `-- name: GetJFrogXrayScanByWorkspaceAndAgentID :one SELECT agent_id, workspace_id, critical, high, medium, results_url diff --git a/coderd/database/queries/insights.sql b/coderd/database/queries/insights.sql index f81141d06f..540950d451 100644 --- a/coderd/database/queries/insights.sql +++ b/coderd/database/queries/insights.sql @@ -329,6 +329,271 @@ SELECT FROM unflattened_usage_by_interval GROUP BY from_, to_; +-- name: GetTemplateUsageStats :many +SELECT + * +FROM + template_usage_stats +WHERE + start_time >= @start_time::timestamptz + AND end_time <= @end_time::timestamptz + AND CASE WHEN COALESCE(array_length(@template_ids::uuid[], 1), 0) > 0 THEN template_id = ANY(@template_ids::uuid[]) ELSE TRUE END; + +-- name: UpsertTemplateUsageStats :exec +-- This query aggregates the workspace_agent_stats and workspace_app_stats data +-- into a single table for efficient storage and querying. Half-hour buckets are +-- used to store the data, and the minutes are summed for each user and template +-- combination. The result is stored in the template_usage_stats table. +WITH + latest_start AS ( + SELECT + -- Truncate to hour so that we always look at even ranges of data. + date_trunc('hour', COALESCE( + MAX(start_time) - '1 hour'::interval, + -- Fallback when there are no template usage stats yet. + -- App stats can exist before this, but not agent stats, + -- limit the lookback to avoid inconsistency. + (SELECT MIN(created_at) FROM workspace_agent_stats) + )) AS t + FROM + template_usage_stats + ), + workspace_app_stat_buckets AS ( + SELECT + -- Truncate the minute to the nearest half hour, this is the bucket size + -- for the data. + date_trunc('hour', s.minute_bucket) + trunc(date_part('minute', s.minute_bucket) / 30) * 30 * '1 minute'::interval AS time_bucket, + w.template_id, + was.user_id, + -- Both app stats and agent stats track web terminal usage, but + -- by different means. The app stats value should be more + -- accurate so we don't want to discard it just yet. + CASE + WHEN was.access_method = 'terminal' + THEN '[terminal]' -- Unique name, app names can't contain brackets. + ELSE was.slug_or_port + END AS app_name, + COUNT(DISTINCT s.minute_bucket) AS app_minutes, + -- Store each unique minute bucket for later merge between datasets. + array_agg(DISTINCT s.minute_bucket) AS minute_buckets + FROM + workspace_app_stats AS was + JOIN + workspaces AS w + ON + w.id = was.workspace_id + -- Generate a series of minute buckets for each session for computing the + -- mintes/bucket. + CROSS JOIN + generate_series( + date_trunc('minute', was.session_started_at), + -- Subtract 1 microsecond to avoid creating an extra series. + date_trunc('minute', was.session_ended_at - '1 microsecond'::interval), + '1 minute'::interval + ) AS s(minute_bucket) + WHERE + -- s.minute_bucket >= @start_time::timestamptz + -- AND s.minute_bucket < @end_time::timestamptz + s.minute_bucket >= (SELECT t FROM latest_start) + AND s.minute_bucket < NOW() + GROUP BY + time_bucket, w.template_id, was.user_id, was.access_method, was.slug_or_port + ), + agent_stats_buckets AS ( + SELECT + -- Truncate the minute to the nearest half hour, this is the bucket size + -- for the data. + date_trunc('hour', created_at) + trunc(date_part('minute', created_at) / 30) * 30 * '1 minute'::interval AS time_bucket, + template_id, + user_id, + -- Store each unique minute bucket for later merge between datasets. + array_agg( + DISTINCT CASE + WHEN + session_count_ssh > 0 + -- TODO(mafredri): Enable when we have the column. + -- OR session_count_sftp > 0 + OR session_count_reconnecting_pty > 0 + OR session_count_vscode > 0 + OR session_count_jetbrains > 0 + THEN + date_trunc('minute', created_at) + ELSE + NULL + END + ) AS minute_buckets, + COUNT(DISTINCT CASE WHEN session_count_ssh > 0 THEN date_trunc('minute', created_at) ELSE NULL END) AS ssh_mins, + -- TODO(mafredri): Enable when we have the column. + -- COUNT(DISTINCT CASE WHEN session_count_sftp > 0 THEN date_trunc('minute', created_at) ELSE NULL END) AS sftp_mins, + COUNT(DISTINCT CASE WHEN session_count_reconnecting_pty > 0 THEN date_trunc('minute', created_at) ELSE NULL END) AS reconnecting_pty_mins, + COUNT(DISTINCT CASE WHEN session_count_vscode > 0 THEN date_trunc('minute', created_at) ELSE NULL END) AS vscode_mins, + COUNT(DISTINCT CASE WHEN session_count_jetbrains > 0 THEN date_trunc('minute', created_at) ELSE NULL END) AS jetbrains_mins, + -- NOTE(mafredri): The agent stats are currently very unreliable, and + -- sometimes the connections are missing, even during active sessions. + -- Since we can't fully rely on this, we check for "any connection + -- during this half-hour". A better solution here would be preferable. + MAX(connection_count) > 0 AS has_connection + FROM + workspace_agent_stats + WHERE + -- created_at >= @start_time::timestamptz + -- AND created_at < @end_time::timestamptz + created_at >= (SELECT t FROM latest_start) + AND created_at < NOW() + -- Inclusion criteria to filter out empty results. + AND ( + session_count_ssh > 0 + -- TODO(mafredri): Enable when we have the column. + -- OR session_count_sftp > 0 + OR session_count_reconnecting_pty > 0 + OR session_count_vscode > 0 + OR session_count_jetbrains > 0 + ) + GROUP BY + time_bucket, template_id, user_id + ), + stats AS ( + SELECT + stats.time_bucket AS start_time, + stats.time_bucket + '30 minutes'::interval AS end_time, + stats.template_id, + stats.user_id, + -- Sum/distinct to handle zero/duplicate values due union and to unnest. + COUNT(DISTINCT minute_bucket) AS usage_mins, + array_agg(DISTINCT minute_bucket) AS minute_buckets, + SUM(DISTINCT stats.ssh_mins) AS ssh_mins, + SUM(DISTINCT stats.sftp_mins) AS sftp_mins, + SUM(DISTINCT stats.reconnecting_pty_mins) AS reconnecting_pty_mins, + SUM(DISTINCT stats.vscode_mins) AS vscode_mins, + SUM(DISTINCT stats.jetbrains_mins) AS jetbrains_mins, + -- This is what we unnested, re-nest as json. + jsonb_object_agg(stats.app_name, stats.app_minutes) FILTER (WHERE stats.app_name IS NOT NULL) AS app_usage_mins + FROM ( + SELECT + time_bucket, + template_id, + user_id, + 0 AS ssh_mins, + 0 AS sftp_mins, + 0 AS reconnecting_pty_mins, + 0 AS vscode_mins, + 0 AS jetbrains_mins, + app_name, + app_minutes, + minute_buckets + FROM + workspace_app_stat_buckets + + UNION ALL + + SELECT + time_bucket, + template_id, + user_id, + ssh_mins, + -- TODO(mafredri): Enable when we have the column. + 0 AS sftp_mins, + reconnecting_pty_mins, + vscode_mins, + jetbrains_mins, + NULL AS app_name, + NULL AS app_minutes, + minute_buckets + FROM + agent_stats_buckets + WHERE + -- See note in the agent_stats_buckets CTE. + has_connection + ) AS stats, unnest(minute_buckets) AS minute_bucket + GROUP BY + stats.time_bucket, stats.template_id, stats.user_id + ), + minute_buckets AS ( + -- Create distinct minute buckets for user-activity, so we can filter out + -- irrelevant latencies. + SELECT DISTINCT ON (stats.start_time, stats.template_id, stats.user_id, minute_bucket) + stats.start_time, + stats.template_id, + stats.user_id, + minute_bucket + FROM + stats, unnest(minute_buckets) AS minute_bucket + ), + latencies AS ( + -- Select all non-zero latencies for all the minutes that a user used the + -- workspace in some way. + SELECT + mb.start_time, + mb.template_id, + mb.user_id, + -- TODO(mafredri): We're doing medians on medians here, we may want to + -- improve upon this at some point. + PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY was.connection_median_latency_ms)::real AS median_latency_ms + FROM + minute_buckets AS mb + JOIN + workspace_agent_stats AS was + ON + date_trunc('minute', was.created_at) = mb.minute_bucket + AND was.template_id = mb.template_id + AND was.user_id = mb.user_id + AND was.connection_median_latency_ms >= 0 + GROUP BY + mb.start_time, mb.template_id, mb.user_id + ) + +INSERT INTO template_usage_stats AS tus ( + start_time, + end_time, + template_id, + user_id, + usage_mins, + median_latency_ms, + ssh_mins, + sftp_mins, + reconnecting_pty_mins, + vscode_mins, + jetbrains_mins, + app_usage_mins +) ( + SELECT + stats.start_time, + stats.end_time, + stats.template_id, + stats.user_id, + stats.usage_mins, + latencies.median_latency_ms, + stats.ssh_mins, + stats.sftp_mins, + stats.reconnecting_pty_mins, + stats.vscode_mins, + stats.jetbrains_mins, + stats.app_usage_mins + FROM + stats + LEFT JOIN + latencies + ON + -- The latencies group-by ensures there at most one row. + latencies.start_time = stats.start_time + AND latencies.template_id = stats.template_id + AND latencies.user_id = stats.user_id +) +ON CONFLICT + (start_time, template_id, user_id) +DO UPDATE +SET + usage_mins = EXCLUDED.usage_mins, + median_latency_ms = EXCLUDED.median_latency_ms, + ssh_mins = EXCLUDED.ssh_mins, + sftp_mins = EXCLUDED.sftp_mins, + reconnecting_pty_mins = EXCLUDED.reconnecting_pty_mins, + vscode_mins = EXCLUDED.vscode_mins, + jetbrains_mins = EXCLUDED.jetbrains_mins, + app_usage_mins = EXCLUDED.app_usage_mins +WHERE + (tus.*) IS DISTINCT FROM (EXCLUDED.*); + -- name: GetTemplateParameterInsights :many -- GetTemplateParameterInsights does for each template in a given timeframe, -- look for the latest workspace build (for every workspace) that has been diff --git a/coderd/database/sqlc.yaml b/coderd/database/sqlc.yaml index 2884ff76b0..7913a9acf1 100644 --- a/coderd/database/sqlc.yaml +++ b/coderd/database/sqlc.yaml @@ -48,6 +48,9 @@ sql: - column: "template_with_users.group_acl" go_type: type: "TemplateACL" + - column: "template_usage_stats.app_usage_mins" + go_type: + type: "StringMapOfInt" rename: template: TemplateTable template_with_user: Template diff --git a/coderd/database/types.go b/coderd/database/types.go index f21a3d46f5..2beb74c868 100644 --- a/coderd/database/types.go +++ b/coderd/database/types.go @@ -90,3 +90,25 @@ func (m *StringMap) Scan(src interface{}) error { func (m StringMap) Value() (driver.Value, error) { return json.Marshal(m) } + +type StringMapOfInt map[string]int64 + +func (m *StringMapOfInt) Scan(src interface{}) error { + if src == nil { + return nil + } + switch src := src.(type) { + case []byte: + err := json.Unmarshal(src, m) + if err != nil { + return err + } + default: + return xerrors.Errorf("unsupported Scan, storing driver.Value type %T into type %T", src, m) + } + return nil +} + +func (m StringMapOfInt) Value() (driver.Value, error) { + return json.Marshal(m) +} diff --git a/coderd/database/unique_constraint.go b/coderd/database/unique_constraint.go index 498fc24a80..52de0a50f6 100644 --- a/coderd/database/unique_constraint.go +++ b/coderd/database/unique_constraint.go @@ -46,6 +46,7 @@ const ( UniqueTailnetCoordinatorsPkey UniqueConstraint = "tailnet_coordinators_pkey" // ALTER TABLE ONLY tailnet_coordinators ADD CONSTRAINT tailnet_coordinators_pkey PRIMARY KEY (id); UniqueTailnetPeersPkey UniqueConstraint = "tailnet_peers_pkey" // ALTER TABLE ONLY tailnet_peers ADD CONSTRAINT tailnet_peers_pkey PRIMARY KEY (id, coordinator_id); UniqueTailnetTunnelsPkey UniqueConstraint = "tailnet_tunnels_pkey" // ALTER TABLE ONLY tailnet_tunnels ADD CONSTRAINT tailnet_tunnels_pkey PRIMARY KEY (coordinator_id, src_id, dst_id); + UniqueTemplateUsageStatsPkey UniqueConstraint = "template_usage_stats_pkey" // ALTER TABLE ONLY template_usage_stats ADD CONSTRAINT template_usage_stats_pkey PRIMARY KEY (start_time, template_id, user_id); UniqueTemplateVersionParametersTemplateVersionIDNameKey UniqueConstraint = "template_version_parameters_template_version_id_name_key" // ALTER TABLE ONLY template_version_parameters ADD CONSTRAINT template_version_parameters_template_version_id_name_key UNIQUE (template_version_id, name); UniqueTemplateVersionVariablesTemplateVersionIDNameKey UniqueConstraint = "template_version_variables_template_version_id_name_key" // ALTER TABLE ONLY template_version_variables ADD CONSTRAINT template_version_variables_template_version_id_name_key UNIQUE (template_version_id, name); UniqueTemplateVersionsPkey UniqueConstraint = "template_versions_pkey" // ALTER TABLE ONLY template_versions ADD CONSTRAINT template_versions_pkey PRIMARY KEY (id); @@ -79,6 +80,7 @@ const ( UniqueIndexUsersEmail UniqueConstraint = "idx_users_email" // CREATE UNIQUE INDEX idx_users_email ON users USING btree (email) WHERE (deleted = false); UniqueIndexUsersUsername UniqueConstraint = "idx_users_username" // CREATE UNIQUE INDEX idx_users_username ON users USING btree (username) WHERE (deleted = false); UniqueOrganizationsSingleDefaultOrg UniqueConstraint = "organizations_single_default_org" // CREATE UNIQUE INDEX organizations_single_default_org ON organizations USING btree (is_default) WHERE (is_default = true); + UniqueTemplateUsageStatsStartTimeTemplateIDUserIDIndex UniqueConstraint = "template_usage_stats_start_time_template_id_user_id_idx" // CREATE UNIQUE INDEX template_usage_stats_start_time_template_id_user_id_idx ON template_usage_stats USING btree (start_time, template_id, user_id); UniqueTemplatesOrganizationIDNameIndex UniqueConstraint = "templates_organization_id_name_idx" // CREATE UNIQUE INDEX templates_organization_id_name_idx ON templates USING btree (organization_id, lower((name)::text)) WHERE (deleted = false); UniqueUsersEmailLowerIndex UniqueConstraint = "users_email_lower_idx" // CREATE UNIQUE INDEX users_email_lower_idx ON users USING btree (lower(email)) WHERE (deleted = false); UniqueUsersUsernameLowerIndex UniqueConstraint = "users_username_lower_idx" // CREATE UNIQUE INDEX users_username_lower_idx ON users USING btree (lower(username)) WHERE (deleted = false);