chore(scaletest): update scaletest terraform to match big.cdr.dev (#9860)

- Removes usage of null_resources as no longer required
- Updates terraform in both infra/ and k8s/ to match
- Updates provisionerd deployment to use helm chart
This commit is contained in:
Cian Johnston 2023-09-27 09:02:35 +01:00 committed by GitHub
parent 726a4dadf2
commit 399b428149
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 274 additions and 262 deletions

View File

@ -41,16 +41,25 @@ resource "google_container_cluster" "primary" {
workload_identity_config {
workload_pool = "${data.google_project.project.project_id}.svc.id.goog"
}
lifecycle {
ignore_changes = [
maintenance_policy,
release_channel,
remove_default_node_pool
]
}
}
resource "google_container_node_pool" "coder" {
name = "${var.name}-coder"
location = var.zone
project = var.project_id
cluster = google_container_cluster.primary.name
node_count = var.state == "stopped" ? 0 : var.nodepool_size_coder
management {
auto_upgrade = false
name = "${var.name}-coder"
location = var.zone
project = var.project_id
cluster = google_container_cluster.primary.name
autoscaling {
min_node_count = 1
max_node_count = var.nodepool_size_coder
}
node_config {
oauth_scopes = [
@ -74,14 +83,20 @@ resource "google_container_node_pool" "coder" {
disable-legacy-endpoints = "true"
}
}
lifecycle {
ignore_changes = [management[0].auto_repair, management[0].auto_upgrade, timeouts]
}
}
resource "google_container_node_pool" "workspaces" {
name = "${var.name}-workspaces"
location = var.zone
project = var.project_id
cluster = google_container_cluster.primary.name
node_count = var.state == "stopped" ? 0 : var.nodepool_size_workspaces
name = "${var.name}-workspaces"
location = var.zone
project = var.project_id
cluster = google_container_cluster.primary.name
autoscaling {
min_node_count = 0
total_max_node_count = var.nodepool_size_workspaces
}
management {
auto_upgrade = false
}
@ -107,6 +122,9 @@ resource "google_container_node_pool" "workspaces" {
disable-legacy-endpoints = "true"
}
}
lifecycle {
ignore_changes = [management[0].auto_repair, management[0].auto_upgrade, timeouts]
}
}
resource "google_container_node_pool" "misc" {
@ -140,6 +158,9 @@ resource "google_container_node_pool" "misc" {
disable-legacy-endpoints = "true"
}
}
lifecycle {
ignore_changes = [management[0].auto_repair, management[0].auto_upgrade, timeouts]
}
}
resource "null_resource" "cluster_kubeconfig" {

View File

@ -32,6 +32,10 @@ resource "google_sql_database_instance" "db" {
record_client_address = false
}
}
lifecycle {
ignore_changes = [deletion_protection, timeouts]
}
}
resource "google_sql_database" "coder" {
@ -40,6 +44,9 @@ resource "google_sql_database" "coder" {
name = "${var.name}-coder"
# required for postgres, otherwise db fails to delete
deletion_policy = "ABANDON"
lifecycle {
ignore_changes = [deletion_policy]
}
}
resource "random_password" "coder-postgres-password" {
@ -58,6 +65,9 @@ resource "google_sql_user" "coder" {
password = random_password.coder-postgres-password.result
# required for postgres, otherwise user fails to delete
deletion_policy = "ABANDON"
lifecycle {
ignore_changes = [deletion_policy, password]
}
}
resource "google_sql_user" "prometheus" {
@ -68,6 +78,9 @@ resource "google_sql_user" "prometheus" {
password = random_password.prometheus-postgres-password.result
# required for postgres, otherwise user fails to delete
deletion_policy = "ABANDON"
lifecycle {
ignore_changes = [deletion_policy, password]
}
}
locals {

View File

@ -12,7 +12,7 @@ resource "google_compute_subnetwork" "subnet" {
project = var.project_id
region = var.region
network = google_compute_network.vpc.name
ip_cidr_range = "10.200.0.0/24"
ip_cidr_range = var.subnet_cidr
}
resource "google_compute_global_address" "sql_peering" {

View File

@ -25,6 +25,11 @@ variable "zone" {
default = "us-east1-c"
}
variable "subnet_cidr" {
description = "CIDR range for the subnet."
default = "10.200.0.0/24"
}
variable "k8s_version" {
description = "Kubernetes version to provision."
default = "1.24"

View File

@ -1,42 +1,80 @@
data "google_client_config" "default" {}
locals {
coder_helm_repo = "https://helm.coder.com/v2"
coder_helm_chart = "coder"
coder_release_name = var.name
coder_namespace = "coder-${var.name}"
coder_admin_email = "admin@coder.com"
coder_admin_user = "coder"
coder_access_url = "http://${var.coder_address}"
coder_url = var.coder_access_url == "" ? "http://${var.coder_address}" : var.coder_access_url
coder_admin_email = "admin@coder.com"
coder_admin_user = "coder"
coder_helm_repo = "https://helm.coder.com/v2"
coder_helm_chart = "coder"
coder_namespace = "coder-${var.name}"
coder_release_name = var.name
provisionerd_helm_chart = "coder-provisioner"
provisionerd_release_name = "${var.name}-provisionerd"
}
resource "null_resource" "coder_namespace" {
triggers = {
namespace = local.coder_namespace
kubeconfig_path = var.kubernetes_kubeconfig_path
resource "kubernetes_namespace" "coder_namespace" {
metadata {
name = local.coder_namespace
}
provisioner "local-exec" {
when = create
command = <<EOF
KUBECONFIG=${self.triggers.kubeconfig_path} kubectl create namespace ${self.triggers.namespace}
EOF
}
provisioner "local-exec" {
when = destroy
command = "true"
lifecycle {
ignore_changes = [timeouts, wait_for_default_service_account]
}
}
resource "random_password" "provisionerd_psk" {
length = 26
}
resource "kubernetes_secret" "coder-db" {
type = "Opaque"
metadata {
name = "coder-db-url"
namespace = local.coder_namespace
namespace = kubernetes_namespace.coder_namespace.metadata.0.name
}
depends_on = [null_resource.coder_namespace]
data = {
url = var.coder_db_url
}
lifecycle {
ignore_changes = [timeouts, wait_for_service_account_token]
}
}
resource "kubernetes_secret" "provisionerd_psk" {
type = "Opaque"
metadata {
name = "coder-provisioner-psk"
namespace = kubernetes_namespace.coder_namespace.metadata.0.name
}
data = {
psk = random_password.provisionerd_psk.result
}
lifecycle {
ignore_changes = [timeouts, wait_for_service_account_token]
}
}
# OIDC secret needs to be manually provisioned for now.
data "kubernetes_secret" "coder_oidc" {
metadata {
namespace = kubernetes_namespace.coder_namespace.metadata.0.name
name = "coder-oidc"
}
}
# TLS needs to be provisioned manually for now.
data "kubernetes_secret" "coder_tls" {
metadata {
namespace = kubernetes_namespace.coder_namespace.metadata.0.name
name = "${var.name}-tls"
}
}
# Also need an OTEL collector deployed. Manual for now.
data "kubernetes_service" "otel_collector" {
metadata {
namespace = kubernetes_namespace.coder_namespace.metadata.0.name
name = "otel-collector"
}
}
resource "helm_release" "coder-chart" {
@ -44,10 +82,7 @@ resource "helm_release" "coder-chart" {
chart = local.coder_helm_chart
name = local.coder_release_name
version = var.coder_chart_version
namespace = local.coder_namespace
depends_on = [
null_resource.coder_namespace
]
namespace = kubernetes_namespace.coder_namespace.metadata.0.name
values = [<<EOF
coder:
affinity:
@ -70,10 +105,10 @@ coder:
values: ["${local.coder_release_name}"]
env:
- name: "CODER_ACCESS_URL"
value: "${local.coder_access_url}"
value: "${local.coder_url}"
- name: "CODER_CACHE_DIRECTORY"
value: "/tmp/coder"
- name: "CODER_ENABLE_TELEMETRY"
- name: "CODER_TELEMETRY_ENABLE"
value: "false"
- name: "CODER_LOGGING_HUMAN"
value: "/dev/null"
@ -101,6 +136,39 @@ coder:
# Disabling built-in provisioner daemons
- name: "CODER_PROVISIONER_DAEMONS"
value: "0"
- name: CODER_PROVISIONER_DAEMON_PSK
valueFrom:
secretKeyRef:
key: psk
name: "${kubernetes_secret.provisionerd_psk.metadata.0.name}"
# Enable OIDC
- name: "CODER_OIDC_ISSUER_URL"
valueFrom:
secretKeyRef:
key: issuer-url
name: "${data.kubernetes_secret.coder_oidc.metadata.0.name}"
- name: "CODER_OIDC_EMAIL_DOMAIN"
valueFrom:
secretKeyRef:
key: email-domain
name: "${data.kubernetes_secret.coder_oidc.metadata.0.name}"
- name: "CODER_OIDC_CLIENT_ID"
valueFrom:
secretKeyRef:
key: client-id
name: "${data.kubernetes_secret.coder_oidc.metadata.0.name}"
- name: "CODER_OIDC_CLIENT_SECRET"
valueFrom:
secretKeyRef:
key: client-secret
name: "${data.kubernetes_secret.coder_oidc.metadata.0.name}"
# Send OTEL traces to the cluster-local collector to sample 10%
- name: "OTEL_EXPORTER_OTLP_ENDPOINT"
value: "http://${data.kubernetes_service.otel_collector.metadata.0.name}.${kubernetes_namespace.coder_namespace.metadata.0.name}.svc.cluster.local:4317"
- name: "OTEL_TRACES_SAMPLER"
value: parentbased_traceidratio
- name: "OTEL_TRACES_SAMPLER_ARG"
value: "0.1"
image:
repo: ${var.coder_image_repo}
tag: ${var.coder_image_tag}
@ -130,6 +198,74 @@ EOF
]
}
resource "helm_release" "provisionerd-chart" {
repository = local.coder_helm_repo
chart = local.provisionerd_helm_chart
name = local.provisionerd_release_name
version = var.provisionerd_chart_version
namespace = kubernetes_namespace.coder_namespace.metadata.0.name
values = [<<EOF
coder:
affinity:
nodeAffinity:
requiredDuringSchedulingIgnoredDuringExecution:
nodeSelectorTerms:
- matchExpressions:
- key: "cloud.google.com/gke-nodepool"
operator: "In"
values: ["${var.kubernetes_nodepool_coder}"]
podAntiAffinity:
preferredDuringSchedulingIgnoredDuringExecution:
- weight: 1
podAffinityTerm:
topologyKey: "kubernetes.io/hostname"
labelSelector:
matchExpressions:
- key: "app.kubernetes.io/instance"
operator: "In"
values: ["${local.coder_release_name}"]
env:
- name: "CODER_URL"
value: "${local.coder_url}"
- name: "CODER_VERBOSE"
value: "true"
- name: "CODER_CACHE_DIRECTORY"
value: "/tmp/coder"
- name: "CODER_TELEMETRY_ENABLE"
value: "false"
- name: "CODER_LOGGING_HUMAN"
value: "/dev/null"
- name: "CODER_LOGGING_STACKDRIVER"
value: "/dev/stderr"
- name: "CODER_PROMETHEUS_ENABLE"
value: "true"
- name: "CODER_PROVISIONERD_TAGS"
value = "socpe=organization"
image:
repo: ${var.provisionerd_image_repo}
tag: ${var.provisionerd_image_tag}
replicaCount: "${var.provisionerd_replicas}"
resources:
requests:
cpu: "${var.provisionerd_cpu_request}"
memory: "${var.provisionerd_mem_request}"
limits:
cpu: "${var.provisionerd_cpu_limit}"
memory: "${var.provisionerd_mem_limit}"
securityContext:
readOnlyRootFilesystem: true
volumeMounts:
- mountPath: "/tmp"
name: cache
readOnly: false
volumes:
- emptyDir:
sizeLimit: 1024Mi
name: cache
EOF
]
}
resource "local_file" "kubernetes_template" {
filename = "${path.module}/../.coderv2/templates/kubernetes/main.tf"
content = <<EOF
@ -218,174 +354,12 @@ resource "local_file" "kubernetes_template" {
EOF
}
# TODO(cian): Remove this when we have support in the Helm chart.
# Ref: https://github.com/coder/coder/issues/8243
resource "local_file" "provisionerd_deployment" {
filename = "${path.module}/../.coderv2/provisionerd-deployment.yaml"
content = <<EOF
apiVersion: apps/v1
kind: Deployment
metadata:
labels:
app.kubernetes.io/instance: ${var.name}
app.kubernetes.io/name: provisionerd
name: provisionerd
namespace: ${local.coder_namespace}
spec:
replicas: ${var.provisionerd_replicas}
selector:
matchLabels:
app.kubernetes.io/instance: ${var.name}
app.kubernetes.io/name: provisionerd
strategy:
rollingUpdate:
maxSurge: 25%
maxUnavailable: 25%
type: RollingUpdate
template:
metadata:
creationTimestamp: null
labels:
app.kubernetes.io/instance: ${var.name}
app.kubernetes.io/name: provisionerd
spec:
affinity:
nodeAffinity:
requiredDuringSchedulingIgnoredDuringExecution:
nodeSelectorTerms:
- matchExpressions:
- key: cloud.google.com/gke-nodepool
operator: In
values:
- ${var.kubernetes_nodepool_coder}
podAntiAffinity:
preferredDuringSchedulingIgnoredDuringExecution:
- podAffinityTerm:
labelSelector:
matchExpressions:
- key: app.kubernetes.io/instance
operator: In
values:
- ${var.name}
topologyKey: kubernetes.io/hostname
weight: 1
containers:
- args:
- server
command:
- /opt/coder
env:
- name: CODER_HTTP_ADDRESS
value: 0.0.0.0:8080
- name: CODER_PROMETHEUS_ADDRESS
value: 0.0.0.0:2112
- name: CODER_ACCESS_URL
value: ${local.coder_access_url}
- name: CODER_CACHE_DIRECTORY
value: /tmp/coder
- name: CODER_ENABLE_TELEMETRY
value: "false"
- name: CODER_LOGGING_HUMAN
value: /dev/null
- name: CODER_LOGGING_STACKDRIVER
value: /dev/stderr
- name: CODER_PG_CONNECTION_URL
valueFrom:
secretKeyRef:
key: url
name: coder-db-url
- name: CODER_PPROF_ENABLE
value: "true"
- name: CODER_PROMETHEUS_ENABLE
value: "true"
- name: CODER_PROMETHEUS_COLLECT_AGENT_STATS
value: "true"
- name: CODER_PROMETHEUS_COLLECT_DB_METRICS
value: "true"
- name: CODER_VERBOSE
value: "true"
- name: CODER_PROVISIONER_DAEMONS
value: "${var.provisionerd_concurrency}"
image: "${var.coder_image_repo}:${var.coder_image_tag}"
imagePullPolicy: IfNotPresent
lifecycle: {}
livenessProbe:
failureThreshold: 3
httpGet:
path: /api/v2/buildinfo
port: http
scheme: HTTP
periodSeconds: 10
successThreshold: 1
timeoutSeconds: 1
name: provisionerd
ports:
- containerPort: 8080
name: http
protocol: TCP
- containerPort: 2112
name: prometheus-http
protocol: TCP
readinessProbe:
failureThreshold: 3
httpGet:
path: /api/v2/buildinfo
port: http
scheme: HTTP
periodSeconds: 10
successThreshold: 1
timeoutSeconds: 1
resources:
limits:
cpu: "${var.provisionerd_cpu_limit}"
memory: "${var.provisionerd_mem_limit}"
requests:
cpu: "${var.provisionerd_cpu_request}"
memory: "${var.provisionerd_mem_request}"
securityContext:
allowPrivilegeEscalation: false
readOnlyRootFilesystem: true
runAsGroup: 1000
runAsNonRoot: true
runAsUser: 1000
seccompProfile:
type: RuntimeDefault
terminationMessagePath: /dev/termination-log
terminationMessagePolicy: File
volumeMounts:
- mountPath: /tmp
name: cache
dnsPolicy: ClusterFirst
restartPolicy: Always
serviceAccount: coder
serviceAccountName: coder
terminationGracePeriodSeconds: 60
volumes:
- emptyDir:
sizeLimit: 10Gi
name: cache
EOF
}
resource "null_resource" "provisionerd_deployment_apply" {
depends_on = [helm_release.coder-chart, local_file.provisionerd_deployment]
triggers = {
kubeconfig_path = var.kubernetes_kubeconfig_path
manifest_path = local_file.provisionerd_deployment.filename
}
provisioner "local-exec" {
command = <<EOF
KUBECONFIG=${self.triggers.kubeconfig_path} kubectl apply -f ${self.triggers.manifest_path}
EOF
}
}
resource "local_file" "output_vars" {
filename = "${path.module}/../../.coderv2/url"
content = local.coder_access_url
content = local.coder_url
}
output "coder_url" {
description = "URL of the Coder deployment"
value = local.coder_access_url
value = local.coder_url
}

View File

@ -10,38 +10,31 @@ locals {
}
# Create a namespace to hold our Prometheus deployment.
resource "null_resource" "prometheus_namespace" {
triggers = {
namespace = local.prometheus_namespace
kubeconfig_path = var.kubernetes_kubeconfig_path
resource "kubernetes_namespace" "prometheus_namespace" {
metadata {
name = local.prometheus_namespace
}
depends_on = []
provisioner "local-exec" {
when = create
command = <<EOF
KUBECONFIG=${self.triggers.kubeconfig_path} kubectl create namespace ${self.triggers.namespace}
EOF
}
provisioner "local-exec" {
when = destroy
command = "true"
lifecycle {
ignore_changes = [timeouts, wait_for_default_service_account]
}
}
# Create a secret to store the remote write key
resource "kubernetes_secret" "prometheus-credentials" {
count = local.prometheus_remote_write_enabled ? 1 : 0
type = "kubernetes.io/basic-auth"
depends_on = [null_resource.prometheus_namespace]
count = local.prometheus_remote_write_enabled ? 1 : 0
type = "kubernetes.io/basic-auth"
metadata {
name = "prometheus-credentials"
namespace = local.prometheus_namespace
namespace = kubernetes_namespace.prometheus_namespace.metadata.0.name
}
data = {
username = var.prometheus_remote_write_user
password = var.prometheus_remote_write_password
}
lifecycle {
ignore_changes = [timeouts, wait_for_service_account_token]
}
}
# Install Prometheus using the Bitnami Prometheus helm chart.
@ -49,8 +42,7 @@ resource "helm_release" "prometheus-chart" {
repository = local.prometheus_helm_repo
chart = local.prometheus_helm_chart
name = local.prometheus_release_name
namespace = local.prometheus_namespace
depends_on = [null_resource.prometheus_namespace]
namespace = kubernetes_namespace.prometheus_namespace.metadata.0.name
values = [<<EOF
alertmanager:
enabled: false
@ -113,13 +105,15 @@ resource "kubernetes_secret" "prometheus-postgres-password" {
type = "kubernetes.io/basic-auth"
metadata {
name = "prometheus-postgres"
namespace = local.prometheus_namespace
namespace = kubernetes_namespace.prometheus_namespace.metadata.0.name
}
depends_on = [null_resource.prometheus_namespace]
data = {
username = var.prometheus_postgres_user
password = var.prometheus_postgres_password
}
lifecycle {
ignore_changes = [timeouts, wait_for_service_account_token]
}
}
# Install Prometheus Postgres exporter helm chart
@ -153,35 +147,27 @@ serviceMonitor:
]
}
# NOTE: this is created as a local file before being applied
# as the kubernetes_manifest resource needs to be run separately
# after creating a cluster, and we want this to be brought up
# with a single command.
resource "local_file" "coder-monitoring-manifest" {
filename = "${path.module}/../.coderv2/coder-monitoring.yaml"
resource "kubernetes_manifest" "coder_monitoring" {
depends_on = [helm_release.prometheus-chart]
content = <<EOF
apiVersion: monitoring.coreos.com/v1
kind: PodMonitor
metadata:
namespace: ${local.coder_namespace}
name: coder-monitoring
spec:
selector:
matchLabels:
app.kubernetes.io/name: coder
podMetricsEndpoints:
- port: prometheus-http
interval: 30s
EOF
}
resource "null_resource" "coder-monitoring-manifest_apply" {
provisioner "local-exec" {
working_dir = "${abspath(path.module)}/../.coderv2"
command = <<EOF
KUBECONFIG=${var.kubernetes_kubeconfig_path} kubectl apply -f ${abspath(local_file.coder-monitoring-manifest.filename)}
EOF
manifest = {
apiVersion = "monitoring.coreos.com/v1"
kind = "PodMonitor"
metadata = {
namespace = kubernetes_namespace.coder_namespace.metadata.0.name
name = "coder-monitoring"
}
spec = {
selector = {
matchLabels = {
"app.kubernetes.io/name" : "coder"
}
}
podMetricsEndpoints = [
{
port = "prometheus-http"
interval = "30s"
}
]
}
}
depends_on = [helm_release.prometheus-chart]
}

View File

@ -28,6 +28,9 @@ variable "kubernetes_nodepool_misc" {
}
// These variables control the Coder deployment.
variable "coder_access_url" {
description = "Access URL for the Coder deployment."
}
variable "coder_replicas" {
description = "Number of Coder replicas to provision."
default = 1
@ -68,12 +71,12 @@ variable "coder_mem_limit" {
// Allow independently scaling provisionerd resources
variable "provisionerd_cpu_request" {
description = "CPU request to allocate to provisionerd."
default = "500m"
default = "100m"
}
variable "provisionerd_mem_request" {
description = "Memory request to allocate to provisionerd."
default = "512Mi"
default = "1Gi"
}
variable "provisionerd_cpu_limit" {
@ -83,7 +86,7 @@ variable "provisionerd_cpu_limit" {
variable "provisionerd_mem_limit" {
description = "Memory limit to allocate to provisionerd."
default = "1024Mi"
default = "1Gi"
}
variable "provisionerd_replicas" {
@ -91,9 +94,19 @@ variable "provisionerd_replicas" {
default = 1
}
variable "provisionerd_concurrency" {
description = "Number of concurrent provisioner jobs per provisionerd instance."
default = 3
variable "provisionerd_chart_version" {
description = "Version of the Provisionerd Helm chart to install. Defaults to latest."
default = null
}
variable "provisionerd_image_repo" {
description = "Repository to use for Provisionerd image."
default = "ghcr.io/coder/coder"
}
variable "provisionerd_image_tag" {
description = "Tag to use for Provisionerd image."
default = "latest"
}
variable "coder_chart_version" {