values.yaml 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412
  1. prometheus:
  2. prometheusSpec:
  3. resources:
  4. limits:
  5. cpu: 1
  6. memory: 512Mi
  7. requests:
  8. cpu: 1
  9. memory: 256Mi
  10. additionalScrapeConfigs:
  11. - static_configs:
  12. - targets:
  13. - 192.168.0.1
  14. scrape_interval: 10s
  15. job_name: tp-link
  16. relabel_configs:
  17. - source_labels:
  18. - __address__
  19. target_label: __param_target
  20. - source_labels:
  21. - __param_target
  22. target_label: instance
  23. - replacement: 192.168.0.145:9101
  24. target_label: __address__
  25. - static_configs:
  26. - targets:
  27. - 192.168.0.145:9633
  28. scrape_interval: 10m
  29. job_name: smartctl
  30. - static_configs:
  31. - targets:
  32. - vm1.wugi.info:9324
  33. scrape_interval: 1m
  34. job_name: bird
  35. - static_configs:
  36. - targets:
  37. - vm1.wugi.info:9636
  38. scrape_interval: 1m
  39. job_name: exim
  40. - static_configs:
  41. - targets:
  42. - vm1.wugi.info:9100
  43. - vm2.wugi.info:9100
  44. - notebook.wugi.info:9100
  45. scrape_interval: 5s
  46. job_name: node
  47. - static_configs:
  48. - targets:
  49. - 192.168.0.145:9153
  50. honor_labels: true
  51. job_name: dnsmasq
  52. - static_configs:
  53. - targets:
  54. - https://wugi.info/
  55. - https://guix.wugi.info/
  56. - https://blog.wugi.info/
  57. - https://peertube.home.wugi.info/
  58. - http://ci.guix.gnu.org.intr
  59. - http://ci.guix.gnu.org.wugi.info
  60. - https://ci.guix.gnu.org
  61. - https://bordeaux.guix.gnu.org
  62. scrape_interval: 30s
  63. metrics_path: /probe
  64. params:
  65. module:
  66. - http_2xx
  67. relabel_configs:
  68. - source_labels:
  69. - __address__
  70. target_label: __param_target
  71. - source_labels:
  72. - __param_target
  73. target_label: instance
  74. - replacement: 192.168.0.145:9115
  75. target_label: __address__
  76. job_name: http
  77. - static_configs:
  78. - targets:
  79. - 192.168.0.1
  80. - 81.95.28.27
  81. - 78.108.80.230
  82. - 78.108.87.250
  83. - 78.108.91.250
  84. - 172.16.103.1
  85. - ci.guix.gnu.org
  86. scrape_interval: 30s
  87. metrics_path: /probe
  88. params:
  89. module:
  90. - icmp
  91. relabel_configs:
  92. - source_labels:
  93. - __address__
  94. target_label: __param_target
  95. - source_labels:
  96. - __param_target
  97. target_label: instance
  98. - replacement: 192.168.0.145:9115
  99. target_label: __address__
  100. job_name: icmp
  101. - static_configs:
  102. - targets:
  103. - smtp.wugi.info:25
  104. scrape_interval: 30s
  105. metrics_path: /probe
  106. params:
  107. module:
  108. - smtp_starttls
  109. relabel_configs:
  110. - source_labels:
  111. - __address__
  112. target_label: __param_target
  113. - source_labels:
  114. - __param_target
  115. target_label: instance
  116. - replacement: 192.168.0.145:9115
  117. target_label: __address__
  118. job_name: smtps
  119. - static_configs:
  120. - targets:
  121. - imap.wugi.info:143
  122. scrape_interval: 30s
  123. metrics_path: /probe
  124. params:
  125. module:
  126. - imap_starttls
  127. relabel_configs:
  128. - source_labels:
  129. - __address__
  130. target_label: __param_target
  131. - source_labels:
  132. - __param_target
  133. target_label: instance
  134. - replacement: 192.168.0.145:9115
  135. target_label: __address__
  136. job_name: imaps
  137. - static_configs:
  138. - targets:
  139. - 8.8.8.8
  140. scrape_interval: 10m
  141. metrics_path: /probe
  142. params:
  143. module:
  144. - dns_udp_mjru_wugi_info
  145. relabel_configs:
  146. - source_labels:
  147. - __address__
  148. target_label: __param_target
  149. - source_labels:
  150. - __param_target
  151. target_label: instance
  152. - replacement: 192.168.0.145:9115
  153. target_label: __address__
  154. job_name: dns
  155. - static_configs:
  156. - targets:
  157. - vm1.wugi.info:22
  158. labels:
  159. module: default
  160. relabel_configs:
  161. - target_label: __param_target
  162. source_labels:
  163. - __address__
  164. - target_label: instance
  165. source_labels:
  166. - __param_target
  167. - target_label: __address__
  168. replacement: 192.168.0.145:9312
  169. - target_label: __param_module
  170. source_labels:
  171. - module
  172. metrics_path: /ssh
  173. metric_relabel_configs:
  174. - regex: ^(module)$
  175. action: labeldrop
  176. job_name: ssh
  177. - static_configs:
  178. - targets:
  179. - 192.168.0.145:9312
  180. metrics_path: /metrics
  181. job_name: ssh-metrics
  182. - static_configs:
  183. - targets:
  184. - 192.168.0.145:9080
  185. scrape_interval: 5m
  186. job_name: lvm
  187. - static_configs:
  188. - targets:
  189. - 192.168.0.145:6060
  190. labels:
  191. machine: guixsd
  192. scrape_interval: 10s
  193. job_name: crowdsec_guixsd
  194. - static_configs:
  195. - targets:
  196. - 192.168.0.145:9407
  197. metrics_path: /metrics
  198. job_name: obs
  199. - static_configs:
  200. - targets:
  201. - windows.local:9182
  202. scrape_interval: 1m
  203. job_name: windows
  204. - static_configs:
  205. - targets:
  206. - 192.168.0.145:9095
  207. honor_labels: true
  208. job_name: pushgateway
  209. ingress:
  210. enabled: true
  211. ingressClassName: nginx
  212. annotations:
  213. acme.cert-manager.io/http01-ingress-class: nginx
  214. cert-manager.io/cluster-issuer: letsencrypt
  215. nginx.ingress.kubernetes.io/whitelist-source-range: "10.0.0.0/8,192.168.0.0/16"
  216. tls:
  217. - secretName: prometheus-general-tls
  218. hosts:
  219. - prometheus.home.wugi.info
  220. hosts:
  221. - prometheus.home.wugi.info
  222. alertmanager:
  223. ingress:
  224. enabled: true
  225. ingressClassName: nginx
  226. annotations:
  227. acme.cert-manager.io/http01-ingress-class: nginx
  228. cert-manager.io/cluster-issuer: letsencrypt
  229. nginx.ingress.kubernetes.io/whitelist-source-range: "10.0.0.0/8,192.168.0.0/16"
  230. tls:
  231. - secretName: alertmanager-general-tls
  232. hosts:
  233. - alertmanager.home.wugi.info
  234. hosts:
  235. - alertmanager.home.wugi.info
  236. ## Provide custom recording or alerting rules to be deployed into the cluster.
  237. ##
  238. additionalPrometheusRulesMap:
  239. bird:
  240. groups:
  241. - rules:
  242. - labels:
  243. severity: critical
  244. expr: absent(sum by (instance,import_filter,proto) (bird_protocol_prefix_import_count{proto="BGP"}))
  245. for: 1m
  246. annotations:
  247. summary: Absent bird imports
  248. description: Absent bird imports.
  249. alert: BirdAbsent
  250. - labels:
  251. severity: critical
  252. expr: sum by (instance,import_filter,proto) (bird_protocol_prefix_import_count{proto="BGP"})
  253. == 0
  254. annotations:
  255. summary: Bird No Imports
  256. details: '{{ $value }} prefixes imported totally'
  257. description: All {{ $labels.proto }} sessions are unused! External connectivity
  258. affected
  259. alert: BirdNoImports
  260. name: bird-exporter
  261. exim:
  262. groups:
  263. - rules:
  264. - labels:
  265. severity: critical
  266. expr: absent(exim_queue)
  267. for: 1m
  268. annotations:
  269. summary: Absent exim queue
  270. description: Absent exim queue.
  271. alert: EximAbsent
  272. - labels:
  273. severity: warning
  274. expr: exim_queue != 0
  275. for: 1h
  276. annotations:
  277. summary: Exim non-empty queue
  278. description: '{{ $value }} messages in exim queue.'
  279. alert: EximQueue
  280. name: exim-exporter
  281. ssh:
  282. groups:
  283. - rules:
  284. - labels:
  285. severity: warning
  286. expr: ssh_success != 1
  287. annotations:
  288. summary: SSH connection failure
  289. description: SSH connection failure.
  290. alert: SshFailure
  291. name: ssh
  292. lvm:
  293. groups:
  294. - rules:
  295. - labels:
  296. severity: critical
  297. expr: 100 - lvm_lv_data_percent{lv_name="thinpool2"} < 10
  298. annotations:
  299. summary: Logical Thin Volume has less than 10% space left.
  300. description: Logical Thin Volume {{ $labels.lv_name }} at {{ $labels.instance
  301. }} has only {{ printf "%.2f" $value }}% available space left.
  302. alert: LvmLvDataAlmostOutOfSpace
  303. - labels:
  304. severity: critical
  305. for: 10m
  306. expr: absent(lvm_lv_data_percent{lv_name="thinpool2"})
  307. annotations:
  308. summary: Absent metrics for Logical Thin Volume.
  309. description: Absent metrics for Logical Thin Volume at {{ $labels.instance
  310. }}.
  311. alert: LvmLvDataPercentAbsent
  312. name: lvm
  313. smartctl:
  314. groups:
  315. - rules:
  316. - labels:
  317. severity: critical
  318. expr: absent(smartctl_device_attribute{attribute_id="5", attribute_value_type="raw"})
  319. for: 20m
  320. annotations:
  321. summary: Absent smartctl reallocated sectors counts
  322. description: Absent smartctl reallocated sectors counts.
  323. alert: DiskAbsentReallocatedSectors
  324. - labels:
  325. severity: critical
  326. expr: deriv(smartctl_device_attribute{attribute_id="5",attribute_value_type="raw"}[15m])
  327. > 0
  328. annotations:
  329. summary: Reallocated sectors detected.
  330. description: Disk {{ $labels.model_name }} has {{ $value }} reallocated
  331. sectors.
  332. alert: DiskReallocatedSectors
  333. name: smartctl-exporter
  334. windows:
  335. groups:
  336. - rules:
  337. - labels:
  338. severity: warning
  339. for: 1m
  340. expr: windows_exporter_collector_success != 1
  341. annotations:
  342. summary: Windows exporter collector {{ $labels.collector }} failed (instance
  343. {{ $labels.instance }})
  344. alert: WindowsCollectorFail
  345. - labels:
  346. severity: warning
  347. for: 1h
  348. expr: windows_logical_disk_free_bytes / windows_logical_disk_size_bytes *
  349. 100 < 5
  350. annotations:
  351. summary: Filesystem has less than 5% space left.
  352. description: Filesystem on {{ $labels.volume }} at {{ $labels.instance }}
  353. has only {{ printf "%.2f" $value }}% available space left.
  354. alert: WindowsFilesystemAlmostOutOfSpace
  355. name: windows-exporter
  356. grafana:
  357. ingress:
  358. enabled: true
  359. ingressClassName: nginx
  360. annotations:
  361. acme.cert-manager.io/http01-ingress-class: nginx
  362. cert-manager.io/cluster-issuer: letsencrypt
  363. nginx.ingress.kubernetes.io/whitelist-source-range: "10.0.0.0/8,192.168.0.0/16"
  364. tls:
  365. - secretName: grafana-general-tls
  366. hosts:
  367. - grafana.home.wugi.info
  368. hosts:
  369. - grafana.home.wugi.info
  370. prometheus-node-exporter:
  371. affinity:
  372. nodeAffinity:
  373. requiredDuringSchedulingIgnoredDuringExecution:
  374. nodeSelectorTerms:
  375. - matchExpressions:
  376. - key: prometheus-node-exporter.cluster.local/schedulable
  377. operator: NotIn
  378. values:
  379. - "false"
  380. - key: kubernetes.io/os
  381. operator: NotIn
  382. values:
  383. - "windows"
  384. kubeControllerManager:
  385. endpoints:
  386. - 192.168.154.1
  387. service:
  388. port: 10252
  389. targetPort: 10252
  390. serviceMonitor:
  391. https: 'true'
  392. insecureSkipVerify: 'true'
  393. kubeScheduler:
  394. endpoints:
  395. - 192.168.154.1
  396. service:
  397. port: 10251
  398. targetPort: 10251
  399. serviceMonitor:
  400. https: 'true'
  401. insecureSkipVerify: 'true'
  402. kubeEtcd:
  403. endpoints:
  404. - 192.168.154.1