Merge branch 'master' of gitlab.com:domaindrivenarchitecture/c4k-common

This commit is contained in:
erik 2022-12-23 16:02:34 +01:00
commit 257927b36e
32 changed files with 928 additions and 11 deletions

View file

@ -14,7 +14,7 @@ C4Context
Container_Boundary(k3s, "K3S") { Container_Boundary(k3s, "K3S") {
Component(lb, "metallb") Component(lb, "metallb")
Component(api, "K8s API") Component(api, "K8s API")
Component(grafana-agent, "Grfana Agent") Component(prometheus, "Prometheus in proxy mode")
Container_Boundary(app, "Application") { Container_Boundary(app, "Application") {
Component(app, "App-container") Component(app, "App-container")
Component(app-backup, "backup & restore-container using restic") Component(app-backup, "backup & restore-container using restic")
@ -41,8 +41,8 @@ C4Context
Rel(app, app-file-storage, "file") Rel(app, app-file-storage, "file")
Rel(app, app-db-storage, "*dbc") Rel(app, app-db-storage, "*dbc")
Rel(grafana-agent, api, "http") Rel(prometheus, api, "http")
Rel(grafana-agent, grafana, "http") Rel(prometheus, grafana, "http")
Rel(app-backup, backup, "s3") Rel(app-backup, backup, "s3")
Rel(app-backup, app-file-storage, "file") Rel(app-backup, app-file-storage, "file")

23
doc/Monitoring.md Normal file
View file

@ -0,0 +1,23 @@
# Runtime View
```mermaid
C4Context
title Runtime
Enterprise_Boundary(b0, "Infrastructure") {
System(grafana, "Grafana Cloud", "Monitoring your apps")
Container_Boundary(srv, "Small Server") {
Container_Boundary(k3s, "K3S") {
Component(api, "K8s API")
Container(prometheus, "Prometheus in proxy mode")
Container(node-exporter, "Node-Exporter Daemon Set")
Container_Boundary(app, "Application") {
Container(app, "App-container")
}
}
}
}
Rel(prometheus, api, "rest")
Rel(prometheus, grafana, "rest")
```

View file

@ -9,6 +9,7 @@
[orchestra "2021.01.01-1"] [orchestra "2021.01.01-1"]
[expound "0.9.0"] [expound "0.9.0"]
[clj-commons/clj-yaml "0.7.108"]] [clj-commons/clj-yaml "0.7.108"]]
:target-path "target/%s/"
:source-paths ["src/main/cljc" :source-paths ["src/main/cljc"
"src/main/clj"] "src/main/clj"]
:resource-paths ["src/main/resources"] :resource-paths ["src/main/resources"]
@ -20,10 +21,21 @@
"src/test/cljc"] "src/test/cljc"]
:resource-paths ["src/test/resources"] :resource-paths ["src/test/resources"]
:dependencies [[dda/data-test "0.1.1"]]} :dependencies [[dda/data-test "0.1.1"]]}
:dev {:plugins [[lein-shell "0.5.0"]]}} :dev {:plugins [[lein-shell "0.5.0"]]}
:uberjar {:aot :all
:main dda.c4k-common.uberjar
:uberjar-name "c4k-common-standalone.jar"
:dependencies [[org.clojure/tools.cli "1.0.206"]
[ch.qos.logback/logback-classic "1.3.0-alpha4"
:exclusions [com.sun.mail/javax.mail]]
[org.slf4j/jcl-over-slf4j "2.0.0-alpha1"]]}}
:release-tasks [["test"] :release-tasks [["test"]
["vcs" "assert-committed"] ["vcs" "assert-committed"]
["change" "version" "leiningen.release/bump-version" "release"] ["change" "version" "leiningen.release/bump-version" "release"]
["vcs" "commit"] ["vcs" "commit"]
["vcs" "tag" "v" "--no-sign"] ["vcs" "tag" "v" "--no-sign"]
["change" "version" "leiningen.release/bump-version"]]) ["change" "version" "leiningen.release/bump-version"]]
:aliases {"inst" ["shell"
"sh"
"-c"
"lein uberjar && sudo install -m=755 target/uberjar/c4k-common-standalone.jar /usr/local/bin/c4k-common-standalone.jar"]})

View file

@ -1,9 +1,11 @@
(ns dda.c4k-common.uberjar (ns dda.c4k-common.uberjar
(:gen-class)
(:require (:require
[clojure.spec.alpha :as s] [clojure.spec.alpha :as s]
[clojure.string :as cs] [clojure.string :as cs]
[clojure.tools.reader.edn :as edn] [clojure.tools.reader.edn :as edn]
[dda.c4k-common.common :as cm] [dda.c4k-common.common :as cm]
[dda.c4k-common.core :as core]
[expound.alpha :as expound])) [expound.alpha :as expound]))
(defn usage [name] (defn usage [name]
@ -50,3 +52,11 @@
(when (not auth-valid?) (when (not auth-valid?)
(println (println
(expound/expound-str auth-spec? auth-edn {:print-specs? false}))))))))))) (expound/expound-str auth-spec? auth-edn {:print-specs? false})))))))))))
(defn -main [& cmd-args]
(main-common "c4k-common"
core/config?
core/auth?
core/config-defaults
core/k8s-objects
cmd-args))

View file

@ -44,7 +44,7 @@
coll)) coll))
(defn-spec replace-all-matching-values-by-new-value cp/map-or-seq? (defn-spec replace-all-matching-values-by-new-value cp/map-or-seq?
[coll string? [coll cp/map-or-seq?
value-to-match string? value-to-match string?
value-to-replace string?] value-to-replace string?]
(clojure.walk/postwalk #(if (and (= (type value-to-match) (type %)) (clojure.walk/postwalk #(if (and (= (type value-to-match) (type %))
@ -59,7 +59,7 @@
(apply concat vs))) (apply concat vs)))
(defn generate-common [my-config my-auth config-defaults k8s-objects] (defn generate-common [my-config my-auth config-defaults k8s-objects]
(let [resulting-config (merge config-defaults my-config my-auth)] (let [resulting-config (merge config-defaults my-config)]
(cs/join (cs/join
"\n---\n" "\n---\n"
(k8s-objects resulting-config)))) (k8s-objects resulting-config my-auth))))

View file

@ -0,0 +1,21 @@
(ns dda.c4k-common.core
(:require
[clojure.spec.alpha :as s]
[dda.c4k-common.yaml :as yaml]
[dda.c4k-common.common :as cm]
[dda.c4k-common.monitoring :as monitoring]))
(def config-defaults {})
(def config? (s/keys :req-un []
:opt-un []))
(def auth? (s/keys :req-un []
:opt-un []))
(defn k8s-objects [config auth]
(let []
(map yaml/to-string
(filter #(not (nil? %))
(cm/concat-vec
(monitoring/generate config auth))))))

View file

@ -0,0 +1,131 @@
(ns dda.c4k-common.monitoring
(:require
[clojure.spec.alpha :as s]
#?(:cljs [shadow.resource :as rc])
#?(:clj [orchestra.core :refer [defn-spec]]
:cljs [orchestra.core :refer-macros [defn-spec]])
[dda.c4k-common.yaml :as yaml]
[dda.c4k-common.predicate :as cp]
[dda.c4k-common.common :as cm]
[clojure.string :as str]))
(s/def ::grafana-cloud-user cp/bash-env-string?)
(s/def ::grafana-cloud-password cp/bash-env-string?)
(s/def ::grafana-cloud-url cp/fqdn-string?)
(s/def ::k3s-cluster-name cp/bash-env-string?)
(s/def ::k3s-cluster-stage cp/stage?)
(s/def ::pvc-storage-class-name cp/pvc-storage-class-name?)
(s/def ::node-regex string?)
(s/def ::traefik-regex string?)
(s/def ::kube-state-regex string?)
;; TODO: rename to monitoring
(defn grafana-config? [input]
(s/keys :req-un [::grafana-cloud-url ::k3s-cluster-name ::k3s-cluster-stage]))
(defn grafana-auth? [input]
(s/keys :req-un [::grafana-cloud-user ::grafana-cloud-password]))
(defn grafana-provider? [input]
(s/keys :opt-un [::pvc-storage-class-name]))
(defn filter-regex? [input]
(s/keys :req-un [::node-regex ::traefik-regex ::kube-state-regex]))
(def metric-regex {:node-regex
(str "node_cpu_sec.+|node_load[0-9]+|node_memory_Buf.*|node_memory_Mem.*|"
"node_memory_Cached.*|node_disk_[r,w,i].*|node_filesystem_[s,a].*|"
"node_network_receive_bytes_total|node_network_transmit_bytes_total")
:traefik-regex (str "traefik_entrypoint_.*_total|"
"traefik_entrypoint_.*_seconds_count|"
"traefik_router_.*_total|"
"traefik_router_.*_seconds_count|"
"traefik_service_.*_total|"
"traefik_service_.*_seconds_count|"
"traefik_tls_certs_not_after")
:kube-state-regex (str "kube_pod_container_status_restarts_total|"
"kube_pod_status_reason|kube_node_status_capacity|kube_node_status_allocatable|"
"kube_cronjob_status_active|kube_job_status_failed")})
(def filter-regex-string
(str/join "|" (vals metric-regex)))
#?(:cljs
(defmethod yaml/load-resource :monitoring [resource-name]
(case resource-name
"monitoring/namespace.yaml" (rc/inline "monitoring/namespace.yaml")
"monitoring/prometheus/config.yaml" (rc/inline "monitoring/prometheus/config.yaml")
"monitoring/prometheus/cluster-role.yaml" (rc/inline "monitoring/prometheus/cluster-role.yaml")
"monitoring/prometheus/cluster-role-binding.yaml" (rc/inline "monitoring/prometheus/cluster-role-binding.yaml")
"monitoring/prometheus/deployment.yaml" (rc/inline "monitoring/prometheus/deployment.yaml")
"monitoring/prometheus/prometheus.yaml" (rc/inline "monitoring/prometheus/prometheus.yaml")
"monitoring/prometheus/service.yaml" (rc/inline "monitoring/prometheus/service.yaml")
"monitoring/prometheus/service-account.yaml" (rc/inline "monitoring/prometheus/service-account.yaml")
"monitoring/node-exporter/daemon-set.yaml" (rc/inline "monitoring/node-exporter/daemon-set.yaml")
"monitoring/node-exporter/service.yaml" (rc/inline "monitoring/node-exporter/service.yaml")
"monitoring/node-exporter/cluster-role-binding.yaml" (rc/inline "monitoring/node-exporter/cluster-role-binding.yaml")
"monitoring/node-exporter/cluster-role.yaml" (rc/inline "monitoring/node-exporter/cluster-role.yaml")
"monitoring/node-exporter/service-account.yaml" (rc/inline "monitoring/node-exporter/service-account.yaml")
"monitoring/kube-state-metrics/cluster-role-binding.yaml" (rc/inline "monitoring/kube-state-metrics/cluster-role-binding.yaml")
"monitoring/kube-state-metrics/cluster-role.yaml" (rc/inline "monitoring/kube-state-metrics/cluster-role.yaml")
"monitoring/kube-state-metrics/deployment.yaml" (rc/inline "monitoring/kube-state-metrics/deployment.yaml")
"monitoring/kube-state-metrics/service-account.yaml" (rc/inline "monitoring/kube-state-metrics/service-account.yaml")
"monitoring/kube-state-metrics/service.yaml" (rc/inline "monitoring/kube-state-metrics/service.yaml")
(throw (js/Error. "Undefined Resource!")))))
(defn-spec generate-stateful-set cp/map-or-seq?
[config grafana-provider?]
(let [{:keys [pvc-storage-class-name]
:or {pvc-storage-class-name :manual}} config]
(->
(yaml/from-string (yaml/load-resource "monitoring/stateful-set.yaml"))
(assoc-in [:spec :volumeClaimTemplates 0 :spec :storageClassName] (name pvc-storage-class-name)))))
(defn-spec generate-prometheus-config cp/map-or-seq?
[config grafana-config?
auth grafana-auth?]
(let [{:keys [grafana-cloud-url k3s-cluster-name k3s-cluster-stage]} config
{:keys [grafana-cloud-user grafana-cloud-password]} auth]
(->
(yaml/from-string (yaml/load-resource "monitoring/prometheus/prometheus.yaml"))
(assoc-in [:global :external_labels :cluster]
k3s-cluster-name)
(assoc-in [:global :external_labels :stage]
k3s-cluster-stage)
(assoc-in [:remote_write 0 :url]
grafana-cloud-url)
(assoc-in [:remote_write 0 :basic_auth :username]
grafana-cloud-user)
(assoc-in [:remote_write 0 :basic_auth :password]
grafana-cloud-password)
(cm/replace-all-matching-values-by-new-value "FILTER_REGEX" filter-regex-string))))
(defn-spec generate-config cp/map-or-seq?
[config grafana-config?
auth grafana-auth?]
(->
(yaml/from-string (yaml/load-resource "monitoring/prometheus/config.yaml"))
(assoc-in [:stringData :prometheus.yaml]
(yaml/to-string
(generate-prometheus-config config auth)))))
(defn-spec generate cp/map-or-seq?
[config grafana-config?
auth grafana-auth?]
[(yaml/from-string (yaml/load-resource "monitoring/namespace.yaml"))
(yaml/from-string (yaml/load-resource "monitoring/prometheus/cluster-role.yaml"))
(yaml/from-string (yaml/load-resource "monitoring/prometheus/cluster-role-binding.yaml"))
(yaml/from-string (yaml/load-resource "monitoring/prometheus/service.yaml"))
(yaml/from-string (yaml/load-resource "monitoring/prometheus/service-account.yaml"))
(generate-config config auth)
(yaml/from-string (yaml/load-resource "monitoring/prometheus/deployment.yaml"))
(yaml/from-string (yaml/load-resource "monitoring/node-exporter/service-account.yaml"))
(yaml/from-string (yaml/load-resource "monitoring/node-exporter/cluster-role.yaml"))
(yaml/from-string (yaml/load-resource "monitoring/node-exporter/cluster-role-binding.yaml"))
(yaml/from-string (yaml/load-resource "monitoring/node-exporter/daemon-set.yaml"))
(yaml/from-string (yaml/load-resource "monitoring/node-exporter/service.yaml"))
(yaml/from-string (yaml/load-resource "monitoring/kube-state-metrics/cluster-role-binding.yaml"))
(yaml/from-string (yaml/load-resource "monitoring/kube-state-metrics/cluster-role.yaml"))
(yaml/from-string (yaml/load-resource "monitoring/kube-state-metrics/deployment.yaml"))
(yaml/from-string (yaml/load-resource "monitoring/kube-state-metrics/service-account.yaml"))
(yaml/from-string (yaml/load-resource "monitoring/kube-state-metrics/service.yaml"))])

View file

@ -22,6 +22,10 @@
[input] [input]
(contains? #{"prod" "staging"} input)) (contains? #{"prod" "staging"} input))
(defn stage?
[input]
(contains? #{:prod :acc :int :test :dev} input))
(defn map-or-seq? (defn map-or-seq?
[input] [input]
(or (map? input) (seq? input))) (or (map? input) (seq? input)))

View file

@ -0,0 +1,17 @@
# from https://github.com/kubernetes/kube-state-metrics/tree/main/examples/standard
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
labels:
app.kubernetes.io/component: exporter
app.kubernetes.io/name: kube-state-metrics
app.kubernetes.io/version: 2.7.0
name: kube-state-metrics
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: ClusterRole
name: kube-state-metrics
subjects:
- kind: ServiceAccount
name: kube-state-metrics
namespace: monitoring

View file

@ -0,0 +1,128 @@
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
labels:
app.kubernetes.io/component: exporter
app.kubernetes.io/name: kube-state-metrics
app.kubernetes.io/version: 2.7.0
name: kube-state-metrics
rules:
- apiGroups:
- ""
resources:
- configmaps
- secrets
- nodes
- pods
- services
- serviceaccounts
- resourcequotas
- replicationcontrollers
- limitranges
- persistentvolumeclaims
- persistentvolumes
- namespaces
- endpoints
verbs:
- list
- watch
- apiGroups:
- apps
resources:
- statefulsets
- daemonsets
- deployments
- replicasets
verbs:
- list
- watch
- apiGroups:
- batch
resources:
- cronjobs
- jobs
verbs:
- list
- watch
- apiGroups:
- autoscaling
resources:
- horizontalpodautoscalers
verbs:
- list
- watch
- apiGroups:
- authentication.k8s.io
resources:
- tokenreviews
verbs:
- create
- apiGroups:
- authorization.k8s.io
resources:
- subjectaccessreviews
verbs:
- create
- apiGroups:
- policy
resources:
- poddisruptionbudgets
verbs:
- list
- watch
- apiGroups:
- certificates.k8s.io
resources:
- certificatesigningrequests
verbs:
- list
- watch
- apiGroups:
- discovery.k8s.io
resources:
- endpointslices
verbs:
- list
- watch
- apiGroups:
- storage.k8s.io
resources:
- storageclasses
- volumeattachments
verbs:
- list
- watch
- apiGroups:
- admissionregistration.k8s.io
resources:
- mutatingwebhookconfigurations
- validatingwebhookconfigurations
verbs:
- list
- watch
- apiGroups:
- networking.k8s.io
resources:
- networkpolicies
- ingressclasses
- ingresses
verbs:
- list
- watch
- apiGroups:
- coordination.k8s.io
resources:
- leases
verbs:
- list
- watch
- apiGroups:
- rbac.authorization.k8s.io
resources:
- clusterrolebindings
- clusterroles
- rolebindings
- roles
verbs:
- list
- watch

View file

@ -0,0 +1,53 @@
apiVersion: apps/v1
kind: Deployment
metadata:
labels:
app.kubernetes.io/component: exporter
app.kubernetes.io/name: kube-state-metrics
app.kubernetes.io/version: 2.7.0
name: kube-state-metrics
namespace: monitoring
spec:
replicas: 1
selector:
matchLabels:
app.kubernetes.io/name: kube-state-metrics
template:
metadata:
labels:
app.kubernetes.io/component: exporter
app.kubernetes.io/name: kube-state-metrics
app.kubernetes.io/version: 2.7.0
spec:
serviceAccountName: kube-state-metrics
automountServiceAccountToken: true
containers:
- name: kube-state-metrics
image: registry.k8s.io/kube-state-metrics/kube-state-metrics:v2.7.0
livenessProbe:
httpGet:
path: /healthz
port: 8080
initialDelaySeconds: 5
timeoutSeconds: 5
ports:
- containerPort: 8080
name: http-metrics
- containerPort: 8081
name: telemetry
readinessProbe:
httpGet:
path: /
port: 8081
initialDelaySeconds: 5
timeoutSeconds: 5
securityContext:
allowPrivilegeEscalation: false
capabilities:
drop:
- ALL
readOnlyRootFilesystem: true
runAsUser: 65534
nodeSelector:
kubernetes.io/os: linux

View file

@ -0,0 +1,10 @@
apiVersion: v1
automountServiceAccountToken: false
kind: ServiceAccount
metadata:
labels:
app.kubernetes.io/component: exporter
app.kubernetes.io/name: kube-state-metrics
app.kubernetes.io/version: 2.7.0
name: kube-state-metrics
namespace: monitoring

View file

@ -0,0 +1,20 @@
apiVersion: v1
kind: Service
metadata:
labels:
app.kubernetes.io/component: exporter
app.kubernetes.io/name: kube-state-metrics
app.kubernetes.io/version: 2.7.0
name: kube-state-metrics
namespace: monitoring
spec:
clusterIP: None
ports:
- name: http-metrics
port: 8080
targetPort: http-metrics
- name: telemetry
port: 8081
targetPort: telemetry
selector:
app.kubernetes.io/name: kube-state-metrics

View file

@ -0,0 +1,6 @@
kind: Namespace
apiVersion: v1
metadata:
name: monitoring
labels:
name: monitoring

View file

@ -0,0 +1,14 @@
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
labels:
name: node-exporter
name: node-exporter
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: ClusterRole
name: node-exporter
subjects:
- kind: ServiceAccount
name: node-exporter
namespace: monitoring

View file

@ -0,0 +1,45 @@
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
labels:
name: node-exporter
name: node-exporter
rules:
- apiGroups:
- ""
resources:
- nodes
- nodes/proxy
- nodes/metrics
- services
- endpoints
- pods
- ingresses
- configmaps
- events
verbs:
- get
- list
- watch
- apiGroups:
- "extensions"
- "networking.k8s.io"
resources:
- ingresses/status
- ingresses
verbs:
- get
- list
- watch
- apiGroups:
- extensions
resources:
- podsecuritypolicies
verbs:
- use
resourceNames:
- node-exporter
- nonResourceURLs:
- /metrics
verbs:
- get

View file

@ -0,0 +1,71 @@
apiVersion: apps/v1
kind: DaemonSet
metadata:
labels:
app.kubernetes.io/name: node-exporter
name: node-exporter
namespace: monitoring
spec:
selector:
matchLabels:
app.kubernetes.io/name: node-exporter
template:
metadata:
labels:
app.kubernetes.io/name: node-exporter
spec:
serviceAccountName: node-exporter
containers:
- name: node-exporter
image: prom/node-exporter
imagePullPolicy: "IfNotPresent"
args:
- --path.sysfs=/host/sys
- --path.rootfs=/host/root
- --no-collector.conntrack
- --no-collector.wifi
- --no-collector.hwmon
- --no-collector.infiniband
- --no-collector.filefd
- --no-collector.mdadm
- --no-collector.netclass
- --no-collector.nfs
- --no-collector.nfsd
- --no-collector.powersupplyclass
- --no-collector.pressure
- --no-collector.rapl
- --no-collector.schedstat
- --no-collector.sockstat
- --no-collector.softnet
- --no-collector.tapestats
- --no-collector.thermal_zone
- --no-collector.xfs
- --no-collector.zfs
- --collector.filesystem.ignored-mount-points=^/(dev|proc|sys|var/lib/docker/.+|var/lib/kubelet/pods/.+)($|/)
- --collector.netclass.ignored-devices=^(veth.*)$
ports:
- containerPort: 9100
protocol: TCP
resources:
limits:
cpu: 250m
memory: 180Mi
requests:
cpu: 102m
memory: 180Mi
volumeMounts:
- mountPath: /host/sys
mountPropagation: HostToContainer
name: sys
readOnly: true
- mountPath: /host/root
mountPropagation: HostToContainer
name: root
readOnly: true
volumes:
- hostPath:
path: /sys
name: sys
- hostPath:
path: /
name: root

View file

@ -0,0 +1,7 @@
apiVersion: v1
kind: ServiceAccount
metadata:
labels:
name: node-exporter
name: node-exporter
namespace: monitoring

View file

@ -0,0 +1,18 @@
kind: Service
apiVersion: v1
metadata:
name: node-exporter
namespace: monitoring
labels:
app.kubernetes.io/name: node-exporter
annotations:
prometheus.io/scrape: 'true'
prometheus.io/port: '9100'
spec:
selector:
app.kubernetes.io/name: node-exporter
ports:
- name: node-exporter-http
protocol: TCP
port: 9100
targetPort: 9100

View file

@ -0,0 +1,14 @@
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
labels:
name: prometheus
name: prometheus
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: ClusterRole
name: prometheus
subjects:
- kind: ServiceAccount
name: prometheus
namespace: monitoring

View file

@ -0,0 +1,37 @@
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
labels:
name: prometheus
name: prometheus
rules:
- apiGroups:
- ""
resources:
- nodes
- nodes/proxy
- nodes/metrics
- services
- endpoints
- pods
- ingresses
- configmaps
- events
verbs:
- get
- list
- watch
- apiGroups:
- "extensions"
- "networking.k8s.io"
resources:
- ingresses/status
- ingresses
verbs:
- get
- list
- watch
- nonResourceURLs:
- /metrics
verbs:
- get

View file

@ -0,0 +1,8 @@
apiVersion: v1
kind: Secret
metadata:
name: prometheus-conf
namespace: monitoring
type: Opaque
stringData:
prometheus.yaml: FILECONTENT

View file

@ -0,0 +1,42 @@
apiVersion: apps/v1
kind: Deployment
metadata:
labels:
app.kubernetes.io/name: prometheus
name: prometheus
namespace: monitoring
spec:
selector:
matchLabels:
app.kubernetes.io/name: prometheus
replicas: 1
template:
metadata:
labels:
app.kubernetes.io/name: prometheus
spec:
serviceAccountName: prometheus
enableServiceLinks: true
containers:
- name: prometheus
image: "quay.io/prometheus/prometheus:v2.39.1"
imagePullPolicy: "IfNotPresent"
args:
- --config.file=/etc/prometheus/prometheus.yaml
- --storage.tsdb.path=/prometheus/
- --storage.tsdb.retention.time=1d
ports:
- containerPort: 9090
volumeMounts:
- name: prometheus-config-volume
mountPath: /etc/prometheus/
readOnly: true
- name: prometheus-storage-volume
mountPath: /prometheus/
volumes:
- name: prometheus-config-volume
secret:
secretName: prometheus-conf
defaultMode: 420
- name: prometheus-storage-volume
emptyDir: {}

View file

@ -0,0 +1,64 @@
global:
scrape_interval: 60s
evaluation_interval: 60s
external_labels:
cluster: $CLUSTERNAME
stage: $TEST_OR_PROD
remote_write:
- url: GRAFANA_CLOUD_URL
basic_auth:
username: GRAFANA_CLOUD_USER
password: GRAFANA_CLOUD_PASSWORD
write_relabel_configs:
- source_labels:
- __name__
regex: FILTER_REGEX
action: keep
rule_files:
- /etc/prometheus/prometheus.rules
scrape_configs:
- job_name: 'kubernetes-nodes'
kubernetes_sd_configs:
- role: node
scheme: https
tls_config:
ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
relabel_configs:
- action: labelmap
regex: __meta_kubernetes_node_label_(.+)
- target_label: __address__
replacement: kubernetes.default.svc:443
- source_labels: [__meta_kubernetes_node_name]
regex: (.+)
target_label: __metrics_path__
replacement: /api/v1/nodes/${1}/proxy/metrics
- job_name: 'node-exporter'
kubernetes_sd_configs:
- role: endpoints
relabel_configs:
- source_labels: [__meta_kubernetes_endpoints_name]
regex: 'node-exporter'
action: keep
- job_name: 'traefik'
kubernetes_sd_configs:
- role: endpoints
relabel_configs:
- source_labels: [__meta_kubernetes_endpoints_name]
regex: 'traefik'
action: keep
- job_name: 'kube-state-metrics'
kubernetes_sd_configs:
- role: endpoints
relabel_configs:
- source_labels: [__meta_kubernetes_endpoints_name]
regex: 'kube-state-metrics'
action: keep
- job_name: "prometheus"
static_configs:
- targets: ["localhost:9090"]

View file

@ -0,0 +1,8 @@
apiVersion: v1
kind: ServiceAccount
automountServiceAccountToken: true
metadata:
labels:
name: prometheus
name: prometheus
namespace: monitoring

View file

@ -0,0 +1,18 @@
apiVersion: v1
kind: Service
metadata:
name: prometheus
namespace: monitoring
labels:
app.kubernetes.io/name: prometheus
annotations:
metallb.universe.tf/address-pool: private
spec:
type: LoadBalancer
selector:
app.kubernetes.io/name: prometheus
ports:
- name: prometheus-http
protocol: TCP
port: 9000
targetPort: 9090

View file

@ -0,0 +1,16 @@
(ns dda.c4k-common.common-spec-test
(:require
#?(:clj [clojure.test :refer [deftest is are testing run-tests]]
:cljs [cljs.test :refer-macros [deftest is are testing run-tests]])
[clojure.spec.test.alpha :as st]
[dda.c4k-common.common :as cut]))
(deftest should-refuse-illegal-inputs
(is (thrown? Exception
(cut/concat-vec ["a1" "a2"] "b1")))
(is (thrown? Exception
(cut/concat-vec ["a1" "a2"] nil)))
(is (thrown? Exception
(cut/concat-vec ["a1" "a2"] 2)))
(is (thrown? Exception
(cut/concat-vec {"a1" "a2"} []))))

View file

@ -0,0 +1,24 @@
(ns dda.c4k-common.monitoring-regex-test
(:require
[clojure.test :refer [deftest is are testing run-tests]]
[data-test :refer :all]
[dda.c4k-common.monitoring :as cut]))
(defn filter-by-regex
[regex-str collection]
(filterv #(re-matches (re-pattern regex-str) %)
collection))
(defdatatest should-filter-metrik [input expected]
(is (= (:node-metrics expected)
(filter-by-regex
(:node-regex cut/metric-regex)
(into (:node-metrics expected) (:additional-node-metrics input)))))
(is (= (:traefik-metrics expected)
(filter-by-regex
(:traefik-regex cut/metric-regex)
(into (:traefik-metrics expected) (:additional-traefik-metrics input)))))
(is (= (:kube-state-metrics expected)
(filter-by-regex
(:kube-state-regex cut/metric-regex)
(into (:kube-state-metrics expected) (:additional-kube-state-metrics input))))))

View file

@ -0,0 +1,47 @@
(ns dda.c4k-common.monitoring-test
(:require
#?(:clj [clojure.test :refer [deftest is are testing run-tests]]
:cljs [cljs.test :refer-macros [deftest is are testing run-tests]])
[clojure.string :as s]
[clojure.spec.test.alpha :as st]
[dda.c4k-common.monitoring :as cut]
[dda.c4k-common.yaml :as yaml]
[clojure.string :as str]))
(st/instrument `cut/generate)
(st/instrument `cut/generate-stateful-set)
(st/instrument `cut/generate-agent-config)
(st/instrument `cut/generate-config)
(def conf {:k3s-cluster-name "clustername"
:k3s-cluster-stage :test
:grafana-cloud-url "url"})
(def auth {:grafana-cloud-user "user"
:grafana-cloud-password "password"
:hetzner-cloud-ro-token "ro-token"})
(deftest should-generate
(is (= 17
(count (cut/generate conf auth)))))
(deftest should-generate-prometheus-remote-write-auth
(is (= {:username "user",
:password "password"}
(get-in
(cut/generate-prometheus-config conf auth)
[:remote_write 0 :basic_auth]))))
(deftest should-generate-prometheus-external-labels
(is (= {:cluster "clustername",
:stage :test}
(get-in
(cut/generate-prometheus-config conf auth)
[:global :external_labels]))))
(deftest should-generate-config
(is (s/starts-with?
(get-in
(cut/generate-config conf auth)
[:stringData :prometheus.yaml])
"global:\n scrape_interval:")))

View file

@ -0,0 +1,19 @@
{:input {:additional-node-metrics ["go_gc_duration_seconds"]
:additional-traefik-metrics ["traefik_config_reloads_total"]
:additional-kube-state-metrics ["kube_persistentvolume_annotations"]}
:expected
{:node-metrics
["node_cpu_seconds_total" "node_load1" "node_load5" "node_load15" "node_memory_MemTotal_bytes"
"node_memory_MemFree_bytes" "node_memory_Buffers_bytes" "node_memory_Cached_bytes"
"node_disk_read_bytes_total" "node_disk_written_bytes_total" "node_disk_io_time_seconds_total"
"node_filesystem_size_bytes" "node_filesystem_avail_bytes" "node_network_receive_bytes_total"
"node_network_transmit_bytes_total"]
:traefik-metrics
["traefik_entrypoint_requests_total" "traefik_entrypoint_requests_duration_seconds_count"
"traefik_entrypoint_requests_tls_total" "traefik_router_requests_total"
"traefik_router_requests_tls_total"
"traefik_service_requests_total" "traefik_service_requests_duration_seconds_count"
"traefik_service_requests_tls_total" "traefik_tls_certs_not_after"]
:kube-state-metrics
["kube_pod_container_status_restarts_total" "kube_pod_status_reason" "kube_node_status_allocatable"
"kube_node_status_capacity" "kube_cronjob_status_active" "kube_job_status_failed"]}}

View file

@ -0,0 +1,30 @@
node_cpu_seconds_total
node_load1
node_load5
node_load15
node_memory_MemTotal_bytes
node_memory_MemFree_bytes
node_memory_Buffers_bytes
node_memory_Cached_bytes
node_disk_read_bytes_total
node_disk_written_bytes_total
node_disk_io_time_seconds_total
node_filesystem_size_bytes
node_filesystem_avail_bytes
node_network_receive_bytes_total
node_network_transmit_bytes_total
traefik_entrypoint_requests_total
traefik_entrypoint_requests_duration_seconds_count
traefik_entrypoint_requests_tls_total
traefik_router_requests_total
traefik_router_requests_tls_total
traefik_service_requests_total
traefik_service_requests_duration_seconds_count
traefik_service_requests_tls_total
traefik_tls_certs_not_after
kube_pod_container_status_restarts_total
kube_pod_status_reason
kube_node_status_allocatable
kube_node_status_capacity
kube_cronjob_status_active
kube_job_status_failed