fix(scaletest): deploy external provisionerd (#8618)

* scaletest: stop kubernetes_secret from being constantly recreated
* scaletest: ensure we do not get auto-upgraded
* scaletest: add external provisionerd deployment, the lazy way
This commit is contained in:
Cian Johnston 2023-07-20 11:38:46 +01:00 committed by GitHub
parent 9689bca5d2
commit 68a46198d3
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 199 additions and 2 deletions

View File

@ -54,7 +54,7 @@ resource "random_password" "prometheus-postgres-password" {
}
resource "kubernetes_secret" "coder-db" {
type = "" # Opaque
type = "Opaque"
metadata {
name = "coder-db-url"
namespace = local.coder_namespace
@ -125,6 +125,9 @@ coder:
value: "${var.coder_experiments}"
- name: "CODER_DANGEROUS_DISABLE_RATE_LIMITS"
value: "true"
# Disabling built-in provisioner daemons
- name: "CODER_PROVISIONER_DAEMONS"
value: "0"
image:
repo: ${var.coder_image_repo}
tag: ${var.coder_image_tag}
@ -242,6 +245,168 @@ resource "local_file" "kubernetes_template" {
EOF
}
# TODO(cian): Remove this when we have support in the Helm chart.
# Ref: https://github.com/coder/coder/issues/8243
resource "local_file" "provisionerd_deployment" {
filename = "${path.module}/../.coderv2/provisionerd-deployment.yaml"
content = <<EOF
apiVersion: apps/v1
kind: Deployment
metadata:
labels:
app.kubernetes.io/instance: ${var.name}
app.kubernetes.io/name: provisionerd
name: provisionerd
namespace: ${local.coder_namespace}
spec:
replicas: ${var.provisionerd_replicas}
selector:
matchLabels:
app.kubernetes.io/instance: ${var.name}
app.kubernetes.io/name: provisionerd
strategy:
rollingUpdate:
maxSurge: 25%
maxUnavailable: 25%
type: RollingUpdate
template:
metadata:
creationTimestamp: null
labels:
app.kubernetes.io/instance: ${var.name}
app.kubernetes.io/name: provisionerd
spec:
affinity:
nodeAffinity:
requiredDuringSchedulingIgnoredDuringExecution:
nodeSelectorTerms:
- matchExpressions:
- key: cloud.google.com/gke-nodepool
operator: In
values:
- ${google_container_node_pool.coder.name}
podAntiAffinity:
preferredDuringSchedulingIgnoredDuringExecution:
- podAffinityTerm:
labelSelector:
matchExpressions:
- key: app.kubernetes.io/instance
operator: In
values:
- ${var.name}
topologyKey: kubernetes.io/hostname
weight: 1
containers:
- args:
- server
command:
- /opt/coder
env:
- name: CODER_HTTP_ADDRESS
value: 0.0.0.0:8080
- name: CODER_PROMETHEUS_ADDRESS
value: 0.0.0.0:2112
- name: CODER_ACCESS_URL
value: ${local.coder_url}
- name: CODER_CACHE_DIRECTORY
value: /tmp/coder
- name: CODER_ENABLE_TELEMETRY
value: "false"
- name: CODER_LOGGING_HUMAN
value: /dev/null
- name: CODER_LOGGING_STACKDRIVER
value: /dev/stderr
- name: CODER_PG_CONNECTION_URL
valueFrom:
secretKeyRef:
key: url
name: coder-db-url
- name: CODER_PPROF_ENABLE
value: "true"
- name: CODER_PROMETHEUS_ENABLE
value: "true"
- name: CODER_PROMETHEUS_COLLECT_AGENT_STATS
value: "true"
- name: CODER_PROMETHEUS_COLLECT_DB_METRICS
value: "true"
- name: CODER_VERBOSE
value: "true"
- name: CODER_PROVISIONER_DAEMONS
value: "${var.provisionerd_concurrency}"
image: "${var.coder_image_repo}:${var.coder_image_tag}"
imagePullPolicy: IfNotPresent
lifecycle: {}
livenessProbe:
failureThreshold: 3
httpGet:
path: /api/v2/buildinfo
port: http
scheme: HTTP
periodSeconds: 10
successThreshold: 1
timeoutSeconds: 1
name: provisionerd
ports:
- containerPort: 8080
name: http
protocol: TCP
- containerPort: 2112
name: prometheus-http
protocol: TCP
readinessProbe:
failureThreshold: 3
httpGet:
path: /api/v2/buildinfo
port: http
scheme: HTTP
periodSeconds: 10
successThreshold: 1
timeoutSeconds: 1
resources:
limits:
cpu: "${var.provisionerd_cpu_limit}"
memory: "${var.provisionerd_mem_limit}"
requests:
cpu: "${var.provisionerd_cpu_request}"
memory: "${var.provisionerd_mem_request}"
securityContext:
allowPrivilegeEscalation: false
readOnlyRootFilesystem: true
runAsGroup: 1000
runAsNonRoot: true
runAsUser: 1000
seccompProfile:
type: RuntimeDefault
terminationMessagePath: /dev/termination-log
terminationMessagePolicy: File
volumeMounts:
- mountPath: /tmp
name: cache
dnsPolicy: ClusterFirst
restartPolicy: Always
serviceAccount: coder
serviceAccountName: coder
terminationGracePeriodSeconds: 60
volumes:
- emptyDir:
sizeLimit: 10Gi
name: cache
EOF
}
resource "null_resource" "provisionerd_deployment_apply" {
depends_on = [helm_release.coder-chart, local_file.provisionerd_deployment, null_resource.cluster_kubeconfig]
triggers = {
kubeconfig_path = local.cluster_kubeconfig_path
manifest_path = local_file.provisionerd_deployment.filename
}
provisioner "local-exec" {
command = <<EOF
KUBECONFIG=${self.triggers.kubeconfig_path} kubectl apply -f ${self.triggers.manifest_path}
EOF
}
}
resource "local_file" "output_vars" {
filename = "${path.module}/../.coderv2/url"
content = local.coder_url

View File

@ -18,7 +18,8 @@ resource "google_container_cluster" "primary" {
}
release_channel {
channel = "STABLE"
# Setting release channel as STABLE can cause unexpected cluster upgrades.
channel = "UNSPECIFIED"
}
initial_node_count = 1
remove_default_node_pool = true

View File

@ -130,6 +130,37 @@ variable "coder_mem_limit" {
default = "1024Mi"
}
// Allow independently scaling provisionerd resources
variable "provisionerd_cpu_request" {
description = "CPU request to allocate to provisionerd."
default = "500m"
}
variable "provisionerd_mem_request" {
description = "Memory request to allocate to provisionerd."
default = "512Mi"
}
variable "provisionerd_cpu_limit" {
description = "CPU limit to allocate to provisionerd."
default = "1000m"
}
variable "provisionerd_mem_limit" {
description = "Memory limit to allocate to provisionerd."
default = "1024Mi"
}
variable "provisionerd_replicas" {
description = "Number of Provisionerd replicas."
default = 1
}
variable "provisionerd_concurrency" {
description = "Number of concurrent provisioner jobs per provisionerd instance."
default = 3
}
variable "coder_chart_version" {
description = "Version of the Coder Helm chart to install. Defaults to latest."
default = null