123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412 |
- prometheus:
- prometheusSpec:
- resources:
- limits:
- cpu: 1
- memory: 512Mi
- requests:
- cpu: 1
- memory: 256Mi
- additionalScrapeConfigs:
- - static_configs:
- - targets:
- - 192.168.0.1
- scrape_interval: 10s
- job_name: tp-link
- relabel_configs:
- - source_labels:
- - __address__
- target_label: __param_target
- - source_labels:
- - __param_target
- target_label: instance
- - replacement: 192.168.0.145:9101
- target_label: __address__
- - static_configs:
- - targets:
- - 192.168.0.145:9633
- scrape_interval: 10m
- job_name: smartctl
- - static_configs:
- - targets:
- - vm1.wugi.info:9324
- scrape_interval: 1m
- job_name: bird
- - static_configs:
- - targets:
- - vm1.wugi.info:9636
- scrape_interval: 1m
- job_name: exim
- - static_configs:
- - targets:
- - vm1.wugi.info:9100
- - vm2.wugi.info:9100
- - notebook.wugi.info:9100
- scrape_interval: 5s
- job_name: node
- - static_configs:
- - targets:
- - 192.168.0.145:9153
- honor_labels: true
- job_name: dnsmasq
- - static_configs:
- - targets:
- - https://wugi.info/
- - https://guix.wugi.info/
- - https://blog.wugi.info/
- - https://peertube.home.wugi.info/
- - http://ci.guix.gnu.org.intr
- - http://ci.guix.gnu.org.wugi.info
- - https://ci.guix.gnu.org
- - https://bordeaux.guix.gnu.org
- scrape_interval: 30s
- metrics_path: /probe
- params:
- module:
- - http_2xx
- relabel_configs:
- - source_labels:
- - __address__
- target_label: __param_target
- - source_labels:
- - __param_target
- target_label: instance
- - replacement: 192.168.0.145:9115
- target_label: __address__
- job_name: http
- - static_configs:
- - targets:
- - 192.168.0.1
- - 81.95.28.27
- - 78.108.80.230
- - 78.108.87.250
- - 78.108.91.250
- - 172.16.103.1
- - ci.guix.gnu.org
- scrape_interval: 30s
- metrics_path: /probe
- params:
- module:
- - icmp
- relabel_configs:
- - source_labels:
- - __address__
- target_label: __param_target
- - source_labels:
- - __param_target
- target_label: instance
- - replacement: 192.168.0.145:9115
- target_label: __address__
- job_name: icmp
- - static_configs:
- - targets:
- - smtp.wugi.info:25
- scrape_interval: 30s
- metrics_path: /probe
- params:
- module:
- - smtp_starttls
- relabel_configs:
- - source_labels:
- - __address__
- target_label: __param_target
- - source_labels:
- - __param_target
- target_label: instance
- - replacement: 192.168.0.145:9115
- target_label: __address__
- job_name: smtps
- - static_configs:
- - targets:
- - imap.wugi.info:143
- scrape_interval: 30s
- metrics_path: /probe
- params:
- module:
- - imap_starttls
- relabel_configs:
- - source_labels:
- - __address__
- target_label: __param_target
- - source_labels:
- - __param_target
- target_label: instance
- - replacement: 192.168.0.145:9115
- target_label: __address__
- job_name: imaps
- - static_configs:
- - targets:
- - 8.8.8.8
- scrape_interval: 10m
- metrics_path: /probe
- params:
- module:
- - dns_udp_mjru_wugi_info
- relabel_configs:
- - source_labels:
- - __address__
- target_label: __param_target
- - source_labels:
- - __param_target
- target_label: instance
- - replacement: 192.168.0.145:9115
- target_label: __address__
- job_name: dns
- - static_configs:
- - targets:
- - vm1.wugi.info:22
- labels:
- module: default
- relabel_configs:
- - target_label: __param_target
- source_labels:
- - __address__
- - target_label: instance
- source_labels:
- - __param_target
- - target_label: __address__
- replacement: 192.168.0.145:9312
- - target_label: __param_module
- source_labels:
- - module
- metrics_path: /ssh
- metric_relabel_configs:
- - regex: ^(module)$
- action: labeldrop
- job_name: ssh
- - static_configs:
- - targets:
- - 192.168.0.145:9312
- metrics_path: /metrics
- job_name: ssh-metrics
- - static_configs:
- - targets:
- - 192.168.0.145:9080
- scrape_interval: 5m
- job_name: lvm
- - static_configs:
- - targets:
- - 192.168.0.145:6060
- labels:
- machine: guixsd
- scrape_interval: 10s
- job_name: crowdsec_guixsd
- - static_configs:
- - targets:
- - 192.168.0.145:9407
- metrics_path: /metrics
- job_name: obs
- - static_configs:
- - targets:
- - windows.local:9182
- scrape_interval: 1m
- job_name: windows
- - static_configs:
- - targets:
- - 192.168.0.145:9095
- honor_labels: true
- job_name: pushgateway
- ingress:
- enabled: true
- ingressClassName: nginx
- annotations:
- acme.cert-manager.io/http01-ingress-class: nginx
- cert-manager.io/cluster-issuer: letsencrypt
- nginx.ingress.kubernetes.io/whitelist-source-range: "10.0.0.0/8,192.168.0.0/16"
- tls:
- - secretName: prometheus-general-tls
- hosts:
- - prometheus.home.wugi.info
- hosts:
- - prometheus.home.wugi.info
- alertmanager:
- ingress:
- enabled: true
- ingressClassName: nginx
- annotations:
- acme.cert-manager.io/http01-ingress-class: nginx
- cert-manager.io/cluster-issuer: letsencrypt
- nginx.ingress.kubernetes.io/whitelist-source-range: "10.0.0.0/8,192.168.0.0/16"
- tls:
- - secretName: alertmanager-general-tls
- hosts:
- - alertmanager.home.wugi.info
- hosts:
- - alertmanager.home.wugi.info
- ## Provide custom recording or alerting rules to be deployed into the cluster.
- ##
- additionalPrometheusRulesMap:
- bird:
- groups:
- - rules:
- - labels:
- severity: critical
- expr: absent(sum by (instance,import_filter,proto) (bird_protocol_prefix_import_count{proto="BGP"}))
- for: 1m
- annotations:
- summary: Absent bird imports
- description: Absent bird imports.
- alert: BirdAbsent
- - labels:
- severity: critical
- expr: sum by (instance,import_filter,proto) (bird_protocol_prefix_import_count{proto="BGP"})
- == 0
- annotations:
- summary: Bird No Imports
- details: '{{ $value }} prefixes imported totally'
- description: All {{ $labels.proto }} sessions are unused! External connectivity
- affected
- alert: BirdNoImports
- name: bird-exporter
- exim:
- groups:
- - rules:
- - labels:
- severity: critical
- expr: absent(exim_queue)
- for: 1m
- annotations:
- summary: Absent exim queue
- description: Absent exim queue.
- alert: EximAbsent
- - labels:
- severity: warning
- expr: exim_queue != 0
- for: 1h
- annotations:
- summary: Exim non-empty queue
- description: '{{ $value }} messages in exim queue.'
- alert: EximQueue
- name: exim-exporter
- ssh:
- groups:
- - rules:
- - labels:
- severity: warning
- expr: ssh_success != 1
- annotations:
- summary: SSH connection failure
- description: SSH connection failure.
- alert: SshFailure
- name: ssh
- lvm:
- groups:
- - rules:
- - labels:
- severity: critical
- expr: 100 - lvm_lv_data_percent{lv_name="thinpool2"} < 10
- annotations:
- summary: Logical Thin Volume has less than 10% space left.
- description: Logical Thin Volume {{ $labels.lv_name }} at {{ $labels.instance
- }} has only {{ printf "%.2f" $value }}% available space left.
- alert: LvmLvDataAlmostOutOfSpace
- - labels:
- severity: critical
- for: 10m
- expr: absent(lvm_lv_data_percent{lv_name="thinpool2"})
- annotations:
- summary: Absent metrics for Logical Thin Volume.
- description: Absent metrics for Logical Thin Volume at {{ $labels.instance
- }}.
- alert: LvmLvDataPercentAbsent
- name: lvm
- smartctl:
- groups:
- - rules:
- - labels:
- severity: critical
- expr: absent(smartctl_device_attribute{attribute_id="5", attribute_value_type="raw"})
- for: 20m
- annotations:
- summary: Absent smartctl reallocated sectors counts
- description: Absent smartctl reallocated sectors counts.
- alert: DiskAbsentReallocatedSectors
- - labels:
- severity: critical
- expr: deriv(smartctl_device_attribute{attribute_id="5",attribute_value_type="raw"}[15m])
- > 0
- annotations:
- summary: Reallocated sectors detected.
- description: Disk {{ $labels.model_name }} has {{ $value }} reallocated
- sectors.
- alert: DiskReallocatedSectors
- name: smartctl-exporter
- windows:
- groups:
- - rules:
- - labels:
- severity: warning
- for: 1m
- expr: windows_exporter_collector_success != 1
- annotations:
- summary: Windows exporter collector {{ $labels.collector }} failed (instance
- {{ $labels.instance }})
- alert: WindowsCollectorFail
- - labels:
- severity: warning
- for: 1h
- expr: windows_logical_disk_free_bytes / windows_logical_disk_size_bytes *
- 100 < 5
- annotations:
- summary: Filesystem has less than 5% space left.
- description: Filesystem on {{ $labels.volume }} at {{ $labels.instance }}
- has only {{ printf "%.2f" $value }}% available space left.
- alert: WindowsFilesystemAlmostOutOfSpace
- name: windows-exporter
- grafana:
- ingress:
- enabled: true
- ingressClassName: nginx
- annotations:
- acme.cert-manager.io/http01-ingress-class: nginx
- cert-manager.io/cluster-issuer: letsencrypt
- nginx.ingress.kubernetes.io/whitelist-source-range: "10.0.0.0/8,192.168.0.0/16"
- tls:
- - secretName: grafana-general-tls
- hosts:
- - grafana.home.wugi.info
- hosts:
- - grafana.home.wugi.info
- prometheus-node-exporter:
- affinity:
- nodeAffinity:
- requiredDuringSchedulingIgnoredDuringExecution:
- nodeSelectorTerms:
- - matchExpressions:
- - key: prometheus-node-exporter.cluster.local/schedulable
- operator: NotIn
- values:
- - "false"
- - key: kubernetes.io/os
- operator: NotIn
- values:
- - "windows"
- kubeControllerManager:
- endpoints:
- - 192.168.154.1
- service:
- port: 10252
- targetPort: 10252
- serviceMonitor:
- https: 'true'
- insecureSkipVerify: 'true'
- kubeScheduler:
- endpoints:
- - 192.168.154.1
- service:
- port: 10251
- targetPort: 10251
- serviceMonitor:
- https: 'true'
- insecureSkipVerify: 'true'
- kubeEtcd:
- endpoints:
- - 192.168.154.1
|