update

2023-02-05 21:42:11 +01:00 · 2023-02-05 21:42:11 +01:00 · 6b152c1404
parent fb75aaea58
commit 6b152c1404
1 changed files with 198 additions and 27 deletions
--- a/prometheus_grafana/readme.md
+++ b/prometheus_grafana/readme.md
@ -20,8 +20,8 @@ So maybe just go play with that.
 Setup here starts off with the basics and then theres chapters how to add features

 * **[Core prometheus+grafana](#Overview)** - to get nice dashboards with metrics from docker host and containers
-* **Pushgateway** - how to use it to allow pushing metrics in to prometheus from anywhere
-* **Alertmanager** - how to use it for notifications
+* **[Pushgateway](#Pushgateway)** - how to use it to allow pushing metrics in to prometheus from anywhere
+* **[Alertmanager](#Alertmanager)** - how to use it for notifications
 * **Loki** - how to do the above things but for logs, not just metrics
 * **Caddy** - adding dashboard for reverse proxy info

@ -111,6 +111,7 @@ services:
    container_name: prometheus
    hostname: prometheus
    restart: unless-stopped
+    user: root
    depends_on:
      - cadvisor
    command:
@ -211,9 +212,7 @@ If one does not exist yet: `docker network create caddy_net`

 [Official documentation.](https://prometheus.io/docs/prometheus/latest/configuration/configuration/)

-A config file for prometheus, bind mounted in to prometheus container.</br>
-Contains the bare minimum setup of targets from where metrics are to be pulled.
-
+Contains the bare minimum setup of targets from where metrics are to be pulled.<br>
 Stefanprodan [gives](https://github.com/stefanprodan/dockprom/blob/master/prometheus/prometheus.yml)
 a custom shorter scrape intervals, but I feel thats not really
 [necessary](https://www.robustperception.io/keep-it-simple-scrape_interval-id/).
@ -224,10 +223,6 @@ global:
  scrape_interval:     15s
  evaluation_interval: 15s

-rule_files:
-  - "/etc/prometheus/rules/*.rules"
-
-# A scrape configuration containing exactly one endpoint to scrape.
 scrape_configs:
  - job_name: 'nodeexporter'
    static_configs:
@ -256,10 +251,6 @@ graf.{$MY_DOMAIN} {
 prom.{$MY_DOMAIN} {
  reverse_proxy prometheus:9090
 }
-
-push.{$MY_DOMAIN} {
-  reverse_proxy pushgateway:9091
-}
 ```

 # First run and Grafana configuration
@ -283,16 +274,151 @@ the time interval is set to show last 1h instead of last 15m
 * **monitoring_services.json** - dashboar showing docker containers metrics
  of containers that are labeled `monitoring`

---
-
 ![interface-pic](https://i.imgur.com/wzwgBkp.png)

-# Alertmanager
+---

-[ntfy](https://github.com/DoTheEvo/selfhosted-apps-docker/tree/master/gotify-ntfy-signal)
-will be used to notify about prometheus alerts.
+<details>
+<summary><h1>Pushgateway</h1></summary>

-`alertmanager.yml`
+The setup and real world use of pushgateway, along with small steps 
+when learning it are in the repo - 
+[Veeam Prometheus Grafana](https://github.com/DoTheEvo/veeam-prometheus-grafana)
+
+Including pushing information from windows powershell.
+
+</details>
+
+---
+---
+
+<details>
+<summary><h1>Alertmanager</h1></summary>
+
+Several changes are needed
+
+- New container - `alertmanager` added to the compose file.
+- New file - `alertmanager.yml` bind mounted in the alertmanager container.<br>
+  This file contains configuration about where and how to deliver alerts.<br>
+  A selfhosted
+  [ntfy](https://github.com/DoTheEvo/selfhosted-apps-docker/tree/master/gotify-ntfy-signal)
+  webhook is used that gets alerts to a phone app.
+- New file - `alert.rules` mounted in to prometheus container<br>
+  This files defines when value of some metric becomes an alert event.
+- Changed file - `prometheus.yml` added `alerting` section
+  and the path to the `rule_files`
+
+<details>
+<summary>`docker-compose.yml`</summary>
+```yml
+services:
+
+  # MONITORING SYSTEM AND THE METRICS DATABASE
+  prometheus:
+    image: prom/prometheus:v2.42.0
+    container_name: prometheus
+    hostname: prometheus
+    restart: unless-stopped
+    user: root
+    depends_on:
+      - cadvisor
+    command:
+      - '--config.file=/etc/prometheus/prometheus.yml'
+      - '--storage.tsdb.path=/prometheus'
+      - '--web.console.libraries=/etc/prometheus/console_libraries'
+      - '--web.console.templates=/etc/prometheus/consoles'
+      - '--storage.tsdb.retention.time=500h'
+      - '--web.enable-lifecycle'
+    volumes:
+      - ./prometheus_data:/prometheus
+      - ./prometheus.yml:/etc/prometheus/prometheus.yml
+      - ./alert.rules:/etc/prometheus/rules/alert.rules
+    expose:
+      - 9090:9090
+    labels:
+      org.label-schema.group: "monitoring"
+
+  # WEB BASED UI VISUALISATION OF METRICS
+  grafana:
+    image: grafana/grafana:9.3.6
+    container_name: grafana
+    hostname: grafana
+    restart: unless-stopped
+    env_file: .env
+    user: root
+    volumes:
+      - ./grafana_data:/var/lib/grafana
+    expose:
+      - 3000
+    labels:
+      org.label-schema.group: "monitoring"
+
+  # HOST LINUX MACHINE METRICS EXPORTER
+  nodeexporter:
+    image: prom/node-exporter:v1.5.0
+    container_name: nodeexporter
+    hostname: nodeexporter
+    restart: unless-stopped
+    command:
+      - '--path.procfs=/host/proc'
+      - '--path.rootfs=/rootfs'
+      - '--path.sysfs=/host/sys'
+      - '--collector.filesystem.mount-points-exclude=^/(sys|proc|dev|host|etc)($$|/)'
+    volumes:
+      - /proc:/host/proc:ro
+      - /sys:/host/sys:ro
+      - /:/rootfs:ro
+    expose:
+      - 9100
+    labels:
+      org.label-schema.group: "monitoring"
+
+  # DOCKER CONTAINERS METRICS EXPORTER
+  cadvisor:
+    image: gcr.io/cadvisor/cadvisor:v0.47.1
+    container_name: cadvisor
+    hostname: cadvisor
+    restart: unless-stopped
+    privileged: true
+    devices:
+      - /dev/kmsg:/dev/kmsg
+    volumes:
+      - /:/rootfs:ro
+      - /var/run:/var/run:ro
+      - /sys:/sys:ro
+      - /var/lib/docker:/var/lib/docker:ro
+      - /cgroup:/cgroup:ro #doesn't work on MacOS only for Linux
+    expose:
+      - 3000
+    labels:
+      org.label-schema.group: "monitoring"
+
+  # ALERT MANAGMENT BY PROMETHEUS
+  alertmanager:
+    image: prom/alertmanager:v0.25.0
+    container_name: alertmanager
+    hostname: alertmanager
+    restart: unless-stopped
+    volumes:
+      - ./alertmanager.yml:/etc/alertmanager.yml
+      - ./alertmanager_data:/alertmanager
+    command:
+      - '--config.file=/etc/alertmanager.yml'
+      - '--storage.path=/alertmanager'
+    expose:
+      - 9093
+    labels:
+      org.label-schema.group: "monitoring"
+
+networks:
+  default:
+    name: $DOCKER_MY_NETWORK
+    external: true
+```
+</details>
+
+<details>
+<summary>`alertmanager.yml`</summary>
 ```yml
 route:
    receiver: 'ntfy'
@ -303,15 +429,10 @@ receivers:
  - url: 'https://ntfy.example.com/alertmanager'
    send_resolved: true
 ```
+</details>

-test:<br>
-`curl -H 'Content-Type: application/json' -d '[{"labels":{"alertname":"blabla"}}]' https://alert.example.com/api/v1/alerts`
-
-reload rules
-`curl -X POST http://admin:admin@<host-ip>:9090/-/reload`
-
-
-`alert.rules`
+<details>
+<summary>`alert.rules`</summary>
 ```yml
 groups:
  - name: host
@ -324,8 +445,58 @@ groups:
        annotations:
          description: "Diskspace is low!"
 ```
+</details>


+
+<details>
+<summary>`prometheus.yml`</summary>
+```yml
+global:
+  scrape_interval:     15s
+  evaluation_interval: 15s
+
+scrape_configs:
+  - job_name: 'nodeexporter'
+    static_configs:
+      - targets: ['nodeexporter:9100']
+
+  - job_name: 'cadvisor'
+    static_configs:
+      - targets: ['cadvisor:8080']
+
+  - job_name: 'prometheus'
+    static_configs:
+      - targets: ['localhost:9090']
+
+alerting:
+  alertmanagers:
+  - scheme: http
+    static_configs:
+    - targets: 
+      - 'alertmanager:9093'
+
+rule_files:
+  - '/etc/prometheus/rules/alert.rules'
+```
+</details>
+
+
+[ntfy](https://github.com/DoTheEvo/selfhosted-apps-docker/tree/master/gotify-ntfy-signal)
+will be used to notify about prometheus alerts.
+
+test:<br>
+`curl -H 'Content-Type: application/json' -d '[{"labels":{"alertname":"blabla"}}]' https://alert.example.com/api/v1/alerts`
+
+reload rules
+`curl -X POST http://admin:admin@<host-ip>:9090/-/reload`
+
+
+</details>
+
+---
+---
+
 # Update

 Manual image update: