update
This commit is contained in:
parent
fb75aaea58
commit
6b152c1404
|
@ -20,8 +20,8 @@ So maybe just go play with that.
|
|||
Setup here starts off with the basics and then theres chapters how to add features
|
||||
|
||||
* **[Core prometheus+grafana](#Overview)** - to get nice dashboards with metrics from docker host and containers
|
||||
* **Pushgateway** - how to use it to allow pushing metrics in to prometheus from anywhere
|
||||
* **Alertmanager** - how to use it for notifications
|
||||
* **[Pushgateway](#Pushgateway)** - how to use it to allow pushing metrics in to prometheus from anywhere
|
||||
* **[Alertmanager](#Alertmanager)** - how to use it for notifications
|
||||
* **Loki** - how to do the above things but for logs, not just metrics
|
||||
* **Caddy** - adding dashboard for reverse proxy info
|
||||
|
||||
|
@ -111,6 +111,7 @@ services:
|
|||
container_name: prometheus
|
||||
hostname: prometheus
|
||||
restart: unless-stopped
|
||||
user: root
|
||||
depends_on:
|
||||
- cadvisor
|
||||
command:
|
||||
|
@ -211,9 +212,7 @@ If one does not exist yet: `docker network create caddy_net`
|
|||
|
||||
[Official documentation.](https://prometheus.io/docs/prometheus/latest/configuration/configuration/)
|
||||
|
||||
A config file for prometheus, bind mounted in to prometheus container.</br>
|
||||
Contains the bare minimum setup of targets from where metrics are to be pulled.
|
||||
|
||||
Contains the bare minimum setup of targets from where metrics are to be pulled.<br>
|
||||
Stefanprodan [gives](https://github.com/stefanprodan/dockprom/blob/master/prometheus/prometheus.yml)
|
||||
a custom shorter scrape intervals, but I feel thats not really
|
||||
[necessary](https://www.robustperception.io/keep-it-simple-scrape_interval-id/).
|
||||
|
@ -224,10 +223,6 @@ global:
|
|||
scrape_interval: 15s
|
||||
evaluation_interval: 15s
|
||||
|
||||
rule_files:
|
||||
- "/etc/prometheus/rules/*.rules"
|
||||
|
||||
# A scrape configuration containing exactly one endpoint to scrape.
|
||||
scrape_configs:
|
||||
- job_name: 'nodeexporter'
|
||||
static_configs:
|
||||
|
@ -256,10 +251,6 @@ graf.{$MY_DOMAIN} {
|
|||
prom.{$MY_DOMAIN} {
|
||||
reverse_proxy prometheus:9090
|
||||
}
|
||||
|
||||
push.{$MY_DOMAIN} {
|
||||
reverse_proxy pushgateway:9091
|
||||
}
|
||||
```
|
||||
|
||||
# First run and Grafana configuration
|
||||
|
@ -283,16 +274,151 @@ the time interval is set to show last 1h instead of last 15m
|
|||
* **monitoring_services.json** - dashboar showing docker containers metrics
|
||||
of containers that are labeled `monitoring`
|
||||
|
||||
---
|
||||
|
||||
![interface-pic](https://i.imgur.com/wzwgBkp.png)
|
||||
|
||||
# Alertmanager
|
||||
---
|
||||
|
||||
[ntfy](https://github.com/DoTheEvo/selfhosted-apps-docker/tree/master/gotify-ntfy-signal)
|
||||
will be used to notify about prometheus alerts.
|
||||
<details>
|
||||
<summary><h1>Pushgateway</h1></summary>
|
||||
|
||||
`alertmanager.yml`
|
||||
The setup and real world use of pushgateway, along with small steps
|
||||
when learning it are in the repo -
|
||||
[Veeam Prometheus Grafana](https://github.com/DoTheEvo/veeam-prometheus-grafana)
|
||||
|
||||
Including pushing information from windows powershell.
|
||||
|
||||
</details>
|
||||
|
||||
---
|
||||
---
|
||||
|
||||
<details>
|
||||
<summary><h1>Alertmanager</h1></summary>
|
||||
|
||||
Several changes are needed
|
||||
|
||||
- New container - `alertmanager` added to the compose file.
|
||||
- New file - `alertmanager.yml` bind mounted in the alertmanager container.<br>
|
||||
This file contains configuration about where and how to deliver alerts.<br>
|
||||
A selfhosted
|
||||
[ntfy](https://github.com/DoTheEvo/selfhosted-apps-docker/tree/master/gotify-ntfy-signal)
|
||||
webhook is used that gets alerts to a phone app.
|
||||
- New file - `alert.rules` mounted in to prometheus container<br>
|
||||
This files defines when value of some metric becomes an alert event.
|
||||
- Changed file - `prometheus.yml` added `alerting` section
|
||||
and the path to the `rule_files`
|
||||
|
||||
<details>
|
||||
<summary>`docker-compose.yml`</summary>
|
||||
```yml
|
||||
services:
|
||||
|
||||
# MONITORING SYSTEM AND THE METRICS DATABASE
|
||||
prometheus:
|
||||
image: prom/prometheus:v2.42.0
|
||||
container_name: prometheus
|
||||
hostname: prometheus
|
||||
restart: unless-stopped
|
||||
user: root
|
||||
depends_on:
|
||||
- cadvisor
|
||||
command:
|
||||
- '--config.file=/etc/prometheus/prometheus.yml'
|
||||
- '--storage.tsdb.path=/prometheus'
|
||||
- '--web.console.libraries=/etc/prometheus/console_libraries'
|
||||
- '--web.console.templates=/etc/prometheus/consoles'
|
||||
- '--storage.tsdb.retention.time=500h'
|
||||
- '--web.enable-lifecycle'
|
||||
volumes:
|
||||
- ./prometheus_data:/prometheus
|
||||
- ./prometheus.yml:/etc/prometheus/prometheus.yml
|
||||
- ./alert.rules:/etc/prometheus/rules/alert.rules
|
||||
expose:
|
||||
- 9090:9090
|
||||
labels:
|
||||
org.label-schema.group: "monitoring"
|
||||
|
||||
# WEB BASED UI VISUALISATION OF METRICS
|
||||
grafana:
|
||||
image: grafana/grafana:9.3.6
|
||||
container_name: grafana
|
||||
hostname: grafana
|
||||
restart: unless-stopped
|
||||
env_file: .env
|
||||
user: root
|
||||
volumes:
|
||||
- ./grafana_data:/var/lib/grafana
|
||||
expose:
|
||||
- 3000
|
||||
labels:
|
||||
org.label-schema.group: "monitoring"
|
||||
|
||||
# HOST LINUX MACHINE METRICS EXPORTER
|
||||
nodeexporter:
|
||||
image: prom/node-exporter:v1.5.0
|
||||
container_name: nodeexporter
|
||||
hostname: nodeexporter
|
||||
restart: unless-stopped
|
||||
command:
|
||||
- '--path.procfs=/host/proc'
|
||||
- '--path.rootfs=/rootfs'
|
||||
- '--path.sysfs=/host/sys'
|
||||
- '--collector.filesystem.mount-points-exclude=^/(sys|proc|dev|host|etc)($$|/)'
|
||||
volumes:
|
||||
- /proc:/host/proc:ro
|
||||
- /sys:/host/sys:ro
|
||||
- /:/rootfs:ro
|
||||
expose:
|
||||
- 9100
|
||||
labels:
|
||||
org.label-schema.group: "monitoring"
|
||||
|
||||
# DOCKER CONTAINERS METRICS EXPORTER
|
||||
cadvisor:
|
||||
image: gcr.io/cadvisor/cadvisor:v0.47.1
|
||||
container_name: cadvisor
|
||||
hostname: cadvisor
|
||||
restart: unless-stopped
|
||||
privileged: true
|
||||
devices:
|
||||
- /dev/kmsg:/dev/kmsg
|
||||
volumes:
|
||||
- /:/rootfs:ro
|
||||
- /var/run:/var/run:ro
|
||||
- /sys:/sys:ro
|
||||
- /var/lib/docker:/var/lib/docker:ro
|
||||
- /cgroup:/cgroup:ro #doesn't work on MacOS only for Linux
|
||||
expose:
|
||||
- 3000
|
||||
labels:
|
||||
org.label-schema.group: "monitoring"
|
||||
|
||||
# ALERT MANAGMENT BY PROMETHEUS
|
||||
alertmanager:
|
||||
image: prom/alertmanager:v0.25.0
|
||||
container_name: alertmanager
|
||||
hostname: alertmanager
|
||||
restart: unless-stopped
|
||||
volumes:
|
||||
- ./alertmanager.yml:/etc/alertmanager.yml
|
||||
- ./alertmanager_data:/alertmanager
|
||||
command:
|
||||
- '--config.file=/etc/alertmanager.yml'
|
||||
- '--storage.path=/alertmanager'
|
||||
expose:
|
||||
- 9093
|
||||
labels:
|
||||
org.label-schema.group: "monitoring"
|
||||
|
||||
networks:
|
||||
default:
|
||||
name: $DOCKER_MY_NETWORK
|
||||
external: true
|
||||
```
|
||||
</details>
|
||||
|
||||
<details>
|
||||
<summary>`alertmanager.yml`</summary>
|
||||
```yml
|
||||
route:
|
||||
receiver: 'ntfy'
|
||||
|
@ -303,15 +429,10 @@ receivers:
|
|||
- url: 'https://ntfy.example.com/alertmanager'
|
||||
send_resolved: true
|
||||
```
|
||||
</details>
|
||||
|
||||
test:<br>
|
||||
`curl -H 'Content-Type: application/json' -d '[{"labels":{"alertname":"blabla"}}]' https://alert.example.com/api/v1/alerts`
|
||||
|
||||
reload rules
|
||||
`curl -X POST http://admin:admin@<host-ip>:9090/-/reload`
|
||||
|
||||
|
||||
`alert.rules`
|
||||
<details>
|
||||
<summary>`alert.rules`</summary>
|
||||
```yml
|
||||
groups:
|
||||
- name: host
|
||||
|
@ -324,8 +445,58 @@ groups:
|
|||
annotations:
|
||||
description: "Diskspace is low!"
|
||||
```
|
||||
</details>
|
||||
|
||||
|
||||
|
||||
<details>
|
||||
<summary>`prometheus.yml`</summary>
|
||||
```yml
|
||||
global:
|
||||
scrape_interval: 15s
|
||||
evaluation_interval: 15s
|
||||
|
||||
scrape_configs:
|
||||
- job_name: 'nodeexporter'
|
||||
static_configs:
|
||||
- targets: ['nodeexporter:9100']
|
||||
|
||||
- job_name: 'cadvisor'
|
||||
static_configs:
|
||||
- targets: ['cadvisor:8080']
|
||||
|
||||
- job_name: 'prometheus'
|
||||
static_configs:
|
||||
- targets: ['localhost:9090']
|
||||
|
||||
alerting:
|
||||
alertmanagers:
|
||||
- scheme: http
|
||||
static_configs:
|
||||
- targets:
|
||||
- 'alertmanager:9093'
|
||||
|
||||
rule_files:
|
||||
- '/etc/prometheus/rules/alert.rules'
|
||||
```
|
||||
</details>
|
||||
|
||||
|
||||
[ntfy](https://github.com/DoTheEvo/selfhosted-apps-docker/tree/master/gotify-ntfy-signal)
|
||||
will be used to notify about prometheus alerts.
|
||||
|
||||
test:<br>
|
||||
`curl -H 'Content-Type: application/json' -d '[{"labels":{"alertname":"blabla"}}]' https://alert.example.com/api/v1/alerts`
|
||||
|
||||
reload rules
|
||||
`curl -X POST http://admin:admin@<host-ip>:9090/-/reload`
|
||||
|
||||
|
||||
</details>
|
||||
|
||||
---
|
||||
---
|
||||
|
||||
# Update
|
||||
|
||||
Manual image update:
|
||||
|
|
Loading…
Reference in New Issue