add grafana-agent
This commit is contained in:
parent
968c5d01d6
commit
85697f43f9
15 changed files with 539 additions and 25 deletions
|
@ -11,7 +11,7 @@ internal val configDir = "/etc/prometheus/"
|
||||||
internal val configFile = "prometheus.yml"
|
internal val configFile = "prometheus.yml"
|
||||||
|
|
||||||
|
|
||||||
fun Prov.configurePrometheusDocker(config: String = prometheusDefaultConfig) = task {
|
fun Prov.configurePrometheusDocker(config: String = prometheusDefaultConfig()) = task {
|
||||||
createDirs(configDir, sudo = true)
|
createDirs(configDir, sudo = true)
|
||||||
createFile(configDir + configFile, config, sudo = true)
|
createFile(configDir + configFile, config, sudo = true)
|
||||||
}
|
}
|
||||||
|
@ -51,25 +51,22 @@ fun Prov.runPrometheusDocker(nginxHost: String? = null) = task {
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
private const val prometheusDefaultConfig =
|
private fun prometheusDefaultConfig() =
|
||||||
"""
|
"""
|
||||||
global:
|
global:
|
||||||
scrape_interval: 15s # By default, scrape targets every 15 seconds.
|
scrape_interval: 15s # By default, scrape targets every 15 seconds.
|
||||||
|
|
||||||
# Attach these labels to any time series or alerts when communicating with
|
|
||||||
# external systems (federation, remote storage, Alertmanager).
|
|
||||||
external_labels:
|
|
||||||
monitor: 'codelab-monitor'
|
|
||||||
|
|
||||||
# A scrape configuration containing exactly one endpoint to scrape:
|
# A scrape configuration containing exactly one endpoint to scrape:
|
||||||
# Here it's Prometheus itself.
|
# Here it's Prometheus itself.
|
||||||
scrape_configs:
|
scrape_configs:
|
||||||
# The job name is added as a label `job=<job_name>` to any timeseries scraped from this config.
|
# The job name is added as a label `job=<job_name>` to any timeseries scraped from this config.
|
||||||
- job_name: 'prometheus'
|
- job_name: 'prometheus'
|
||||||
|
|
||||||
# Override the global default and scrape targets from this job every 5 seconds.
|
|
||||||
scrape_interval: 5s
|
|
||||||
|
|
||||||
static_configs:
|
static_configs:
|
||||||
- targets: ['localhost:9090']
|
- targets: ['localhost:9090']
|
||||||
|
|
||||||
|
remote_write:
|
||||||
|
- url: "<Your Metrics instance remote_write endpoint>"
|
||||||
|
basic_auth:
|
||||||
|
username: "your grafana username"
|
||||||
|
password: "your Grafana API key"
|
||||||
"""
|
"""
|
||||||
|
|
|
@ -30,10 +30,8 @@ enum class SecretSourceType() {
|
||||||
|
|
||||||
|
|
||||||
@Serializable
|
@Serializable
|
||||||
@Suppress("unused") // for use in other projects
|
|
||||||
class SecretSupplier(private val source: SecretSourceType, val parameter: String) {
|
class SecretSupplier(private val source: SecretSourceType, val parameter: String) {
|
||||||
fun secret(): Secret {
|
fun secret(): Secret {
|
||||||
return source.secret(parameter)
|
return source.secret(parameter)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -1,15 +1,16 @@
|
||||||
package org.domaindrivenarchitecture.provs.server.domain.k3s
|
package org.domaindrivenarchitecture.provs.server.domain.k3s
|
||||||
|
|
||||||
import org.domaindrivenarchitecture.provs.framework.core.Prov
|
import org.domaindrivenarchitecture.provs.framework.core.Prov
|
||||||
import org.domaindrivenarchitecture.provs.framework.core.ProvResult
|
import org.domaindrivenarchitecture.provs.server.domain.k8s_grafana_agent.GrafanaAgentConfigResolved
|
||||||
|
import org.domaindrivenarchitecture.provs.server.domain.k8s_grafana_agent.provisionGrafanaAgent
|
||||||
import org.domaindrivenarchitecture.provs.server.infrastructure.*
|
import org.domaindrivenarchitecture.provs.server.infrastructure.*
|
||||||
import org.domaindrivenarchitecture.provs.server.infrastructure.getK3sConfig
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Installs a k3s server.
|
* Installs a k3s server.
|
||||||
*/
|
*/
|
||||||
fun Prov.provisionK3s(cli: K3sCliCommand) = task {
|
fun Prov.provisionK3s(cli: K3sCliCommand) = task {
|
||||||
val k3sConfig: K3sConfig = getK3sConfig(cli.configFileName)
|
val k3sConfig: K3sConfig = getK3sConfig(cli.configFileName)
|
||||||
|
val grafanaConfigResolved: GrafanaAgentConfigResolved? = findK8sGrafanaConfig(cli.configFileName)?.resolveSecret()
|
||||||
|
|
||||||
provisionNetwork(k3sConfig)
|
provisionNetwork(k3sConfig)
|
||||||
if (k3sConfig.reprovision && testConfigExists()) {
|
if (k3sConfig.reprovision && testConfigExists()) {
|
||||||
|
@ -21,11 +22,16 @@ fun Prov.provisionK3s(cli: K3sCliCommand) = task {
|
||||||
if (k3sConfig.certmanager != null) {
|
if (k3sConfig.certmanager != null) {
|
||||||
provisionK3sCertManager(k3sConfig.certmanager)
|
provisionK3sCertManager(k3sConfig.certmanager)
|
||||||
}
|
}
|
||||||
|
|
||||||
if (k3sConfig.echo == true) {
|
if (k3sConfig.echo == true) {
|
||||||
provisionK3sEcho(k3sConfig.fqdn, k3sConfig.certmanager?.letsencryptEndpoint)
|
provisionK3sEcho(k3sConfig.fqdn, k3sConfig.certmanager?.letsencryptEndpoint)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (grafanaConfigResolved != null) {
|
||||||
|
provisionGrafanaAgent(grafanaConfigResolved)
|
||||||
|
}
|
||||||
|
|
||||||
if (cli.applicationFileName != null) {
|
if (cli.applicationFileName != null) {
|
||||||
provisionK3sApplication(cli.applicationFileName)
|
provisionK3sApplication(cli.applicationFileName)
|
||||||
}
|
}
|
||||||
ProvResult(true)
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -0,0 +1,9 @@
|
||||||
|
package org.domaindrivenarchitecture.provs.server.domain.k8s_grafana_agent
|
||||||
|
|
||||||
|
import org.domaindrivenarchitecture.provs.framework.core.Prov
|
||||||
|
import org.domaindrivenarchitecture.provs.server.infrastructure.provisionGrafanaAgentForK8s
|
||||||
|
|
||||||
|
|
||||||
|
fun Prov.provisionGrafanaAgent(configResolved: GrafanaAgentConfigResolved) =
|
||||||
|
provisionGrafanaAgentForK8s(configResolved.user, configResolved.password, configResolved.cluster)
|
||||||
|
|
|
@ -0,0 +1,25 @@
|
||||||
|
package org.domaindrivenarchitecture.provs.server.domain.k8s_grafana_agent
|
||||||
|
|
||||||
|
import kotlinx.serialization.Serializable
|
||||||
|
import org.domaindrivenarchitecture.provs.framework.core.Secret
|
||||||
|
import org.domaindrivenarchitecture.provs.framework.ubuntu.secret.SecretSupplier
|
||||||
|
|
||||||
|
@Serializable
|
||||||
|
data class GrafanaAgentConfig(
|
||||||
|
val user: String,
|
||||||
|
val password: SecretSupplier,
|
||||||
|
val cluster: String
|
||||||
|
) {
|
||||||
|
fun resolveSecret(): GrafanaAgentConfigResolved = GrafanaAgentConfigResolved(this)
|
||||||
|
}
|
||||||
|
|
||||||
|
data class GrafanaAgentConfigResolved(val configUnresolved: GrafanaAgentConfig) {
|
||||||
|
val user: String = configUnresolved.user
|
||||||
|
val password: Secret = configUnresolved.password.secret()
|
||||||
|
val cluster: String = configUnresolved.cluster
|
||||||
|
}
|
||||||
|
|
||||||
|
@Serializable
|
||||||
|
data class GrafanaAgentConfigHolder(
|
||||||
|
val grafana: GrafanaAgentConfig
|
||||||
|
)
|
|
@ -0,0 +1,64 @@
|
||||||
|
package org.domaindrivenarchitecture.provs.server.infrastructure
|
||||||
|
|
||||||
|
import org.domaindrivenarchitecture.provs.framework.core.Prov
|
||||||
|
import org.domaindrivenarchitecture.provs.framework.core.Secret
|
||||||
|
import org.domaindrivenarchitecture.provs.framework.ubuntu.filesystem.base.createFileFromResource
|
||||||
|
import org.domaindrivenarchitecture.provs.framework.ubuntu.filesystem.base.createFileFromResourceTemplate
|
||||||
|
import org.domaindrivenarchitecture.provs.server.domain.k3s.FileMode
|
||||||
|
import java.io.File
|
||||||
|
|
||||||
|
|
||||||
|
private const val grafanaResourceDir = "org/domaindrivenarchitecture/provs/server/infrastructure/grafana/"
|
||||||
|
|
||||||
|
|
||||||
|
fun Prov.provisionGrafanaAgentForK8s(user: String, password: Secret, clusterName: String) = task {
|
||||||
|
val namespace = "monitoring"
|
||||||
|
|
||||||
|
// Create namespace if not yet existing
|
||||||
|
if (!chk("kubectl get namespace $namespace")) {
|
||||||
|
cmd("kubectl create namespace $namespace")
|
||||||
|
}
|
||||||
|
|
||||||
|
// Deploy grafana-agent
|
||||||
|
applyGrafanaFileFromResource(File(k3sManualManifestsDir, "grafana-agent.yaml"))
|
||||||
|
|
||||||
|
// Deploy node-exporter
|
||||||
|
applyGrafanaFileFromResource(File(k3sManualManifestsDir, "node-exporter-daemon-set.yaml"))
|
||||||
|
|
||||||
|
// Deploy grafana config
|
||||||
|
createFileFromResourceTemplate(
|
||||||
|
k3sManualManifestsDir + "grafana-agent-config-map.yaml",
|
||||||
|
"grafana-agent-config-map.template.yaml",
|
||||||
|
resourcePath = grafanaResourceDir,
|
||||||
|
posixFilePermission = "644",
|
||||||
|
values = mapOf(
|
||||||
|
"USERNAME" to user,
|
||||||
|
"APIKEY" to password.plain(),
|
||||||
|
"CLUSTERNAME" to clusterName,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
cmd("export NAMESPACE=$namespace && kubectl apply -n \$NAMESPACE -f grafana-agent-config-map.yaml", k3sManualManifestsDir)
|
||||||
|
|
||||||
|
// restart grafana-agent
|
||||||
|
cmd("kubectl -n $namespace rollout restart deployment/grafana-agent")
|
||||||
|
}
|
||||||
|
|
||||||
|
// ============================ private functions =============================
|
||||||
|
|
||||||
|
private fun Prov.createGrafanaFileFromResource(
|
||||||
|
file: File,
|
||||||
|
posixFilePermission: FileMode? = "644"
|
||||||
|
) = task {
|
||||||
|
createFileFromResource(
|
||||||
|
file.path,
|
||||||
|
file.name,
|
||||||
|
grafanaResourceDir,
|
||||||
|
posixFilePermission,
|
||||||
|
sudo = true
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
private fun Prov.applyGrafanaFileFromResource(file: File, posixFilePermission: String? = "644") = task {
|
||||||
|
createGrafanaFileFromResource(file, posixFilePermission)
|
||||||
|
cmd("kubectl apply -f ${file.path}", sudo = true)
|
||||||
|
}
|
|
@ -0,0 +1,30 @@
|
||||||
|
package org.domaindrivenarchitecture.provs.server.infrastructure
|
||||||
|
|
||||||
|
import org.domaindrivenarchitecture.provs.configuration.domain.ConfigFileName
|
||||||
|
import org.domaindrivenarchitecture.provs.framework.core.readFromFile
|
||||||
|
import org.domaindrivenarchitecture.provs.framework.core.toYaml
|
||||||
|
import org.domaindrivenarchitecture.provs.framework.core.yamlToType
|
||||||
|
import org.domaindrivenarchitecture.provs.server.domain.k8s_grafana_agent.GrafanaAgentConfigHolder
|
||||||
|
import org.domaindrivenarchitecture.provs.server.domain.k8s_grafana_agent.GrafanaAgentConfig
|
||||||
|
import java.io.File
|
||||||
|
import java.io.FileWriter
|
||||||
|
|
||||||
|
|
||||||
|
private const val DEFAULT_CONFIG_FILE = "server-config.yaml"
|
||||||
|
|
||||||
|
|
||||||
|
fun findK8sGrafanaConfig(fileName: ConfigFileName? = null): GrafanaAgentConfig? {
|
||||||
|
val filePath = fileName?.fileName ?: DEFAULT_CONFIG_FILE
|
||||||
|
|
||||||
|
// create a default config
|
||||||
|
return if (File(filePath).exists()) {
|
||||||
|
readFromFile(filePath).yamlToType<GrafanaAgentConfigHolder>().grafana
|
||||||
|
} else {
|
||||||
|
null
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@Suppress("unused")
|
||||||
|
internal fun writeConfig(config: GrafanaAgentConfigHolder, fileName: String = "grafana-config.yaml") =
|
||||||
|
FileWriter(fileName).use { it.write(config.toYaml()) }
|
|
@ -13,11 +13,12 @@ import java.io.File
|
||||||
|
|
||||||
// ----------------------------------- directories --------------------------------
|
// ----------------------------------- directories --------------------------------
|
||||||
|
|
||||||
private const val k3sResourceDir = "org/domaindrivenarchitecture/provs/server/infrastructure/k3s/"
|
const val k3sManualManifestsDir = "/etc/rancher/k3s/manifests/"
|
||||||
|
|
||||||
private const val k8sCredentialsDir = "/etc/kubernetes/"
|
|
||||||
private const val k3sAutomatedManifestsDir = "/var/lib/rancher/k3s/server/manifests/"
|
private const val k3sAutomatedManifestsDir = "/var/lib/rancher/k3s/server/manifests/"
|
||||||
private const val k3sManualManifestsDir = "/etc/rancher/k3s/manifests/"
|
private const val k8sCredentialsDir = "/etc/kubernetes/"
|
||||||
|
|
||||||
|
private const val k3sResourceDir = "org/domaindrivenarchitecture/provs/server/infrastructure/k3s/"
|
||||||
|
|
||||||
// ----------------------------------- files --------------------------------
|
// ----------------------------------- files --------------------------------
|
||||||
|
|
||||||
|
@ -146,7 +147,11 @@ fun Prov.provisionK3sApplication(applicationFileName: ApplicationFileName) = tas
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
// ============================ private functions =============================
|
// ============================ private and internal functions =============================
|
||||||
|
|
||||||
|
internal fun Prov.applyK3sFile(file: File) = task {
|
||||||
|
cmd("kubectl apply -f ${file.path}", sudo = true)
|
||||||
|
}
|
||||||
|
|
||||||
private fun Prov.createK3sFileFromResource(
|
private fun Prov.createK3sFileFromResource(
|
||||||
file: File,
|
file: File,
|
||||||
|
@ -192,10 +197,6 @@ private fun Prov.createK3sFileFromResourceTemplate(
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
private fun Prov.applyK3sFile(file: File) = task {
|
|
||||||
cmd("kubectl apply -f ${file.path}", sudo = true)
|
|
||||||
}
|
|
||||||
|
|
||||||
private fun File.templateName(): String {
|
private fun File.templateName(): String {
|
||||||
return this.name.replace(".yaml", ".template.yaml")
|
return this.name.replace(".yaml", ".template.yaml")
|
||||||
}
|
}
|
||||||
|
|
|
@ -0,0 +1,112 @@
|
||||||
|
kind: ConfigMap
|
||||||
|
metadata:
|
||||||
|
name: grafana-agent
|
||||||
|
namespace: monitoring
|
||||||
|
apiVersion: v1
|
||||||
|
data:
|
||||||
|
agent.yaml: |
|
||||||
|
server:
|
||||||
|
http_listen_port: 12345
|
||||||
|
metrics:
|
||||||
|
wal_directory: /tmp/grafana-agent-wal
|
||||||
|
global:
|
||||||
|
scrape_interval: 60s
|
||||||
|
external_labels:
|
||||||
|
cluster: $CLUSTERNAME
|
||||||
|
configs:
|
||||||
|
- name: integrations
|
||||||
|
remote_write:
|
||||||
|
- url: https://prometheus-prod-01-eu-west-0.grafana.net/api/prom/push
|
||||||
|
basic_auth:
|
||||||
|
username: $USERNAME
|
||||||
|
password: $APIKEY
|
||||||
|
scrape_configs:
|
||||||
|
- bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
|
||||||
|
job_name: integrations/kubernetes/cadvisor
|
||||||
|
kubernetes_sd_configs:
|
||||||
|
- role: node
|
||||||
|
metric_relabel_configs:
|
||||||
|
- source_labels: [__name__]
|
||||||
|
regex: node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate|container_memory_swap|kube_statefulset_status_replicas_ready|kube_node_spec_taint|storage_operation_duration_seconds_count|kubelet_pleg_relist_duration_seconds_count|cluster:namespace:pod_memory:active:kube_pod_container_resource_requests|kube_daemonset_status_number_available|kubelet_certificate_manager_server_ttl_seconds|container_network_receive_packets_total|namespace_workload_pod|kubelet_pod_worker_duration_seconds_count|kube_statefulset_status_replicas_updated|kube_pod_status_phase|volume_manager_total_volumes|kubelet_running_containers|storage_operation_errors_total|kube_statefulset_status_observed_generation|kubelet_node_config_error|container_fs_reads_bytes_total|kube_horizontalpodautoscaler_status_current_replicas|rest_client_request_duration_seconds_bucket|container_memory_cache|kube_daemonset_updated_number_scheduled|kube_job_spec_completions|kubelet_volume_stats_capacity_bytes|kube_daemonset_status_number_misscheduled|container_memory_rss|namespace_cpu:kube_pod_container_resource_limits:sum|kubelet_volume_stats_inodes_used|container_network_transmit_bytes_total|kubelet_runtime_operations_errors_total|container_fs_writes_total|kube_daemonset_status_current_number_scheduled|kube_node_status_capacity|container_network_receive_packets_dropped_total|storage_operation_duration_seconds_bucket|cluster:namespace:pod_cpu:active:kube_pod_container_resource_requests|container_network_receive_bytes_total|kubelet_runtime_operations_total|kubelet_pod_start_duration_seconds_count|kube_horizontalpodautoscaler_status_desired_replicas|container_cpu_usage_seconds_total|up|kube_resourcequota|kubelet_cgroup_manager_duration_seconds_bucket|kube_horizontalpodautoscaler_spec_max_replicas|kubelet_server_expiration_renew_errors|container_fs_writes_bytes_total|kubelet_pod_worker_duration_seconds_bucket|machine_memory_bytes|cluster:namespace:pod_memory:active:kube_pod_container_resource_limits|kube_statefulset_status_current_revision|kube_deployment_status_observed_generation|kubelet_volume_stats_inodes|kubelet_volume_stats_available_bytes|kube_deployment_status_replicas_available|container_network_transmit_packets_total|kubelet_pleg_relist_duration_seconds_bucket|cluster:namespace:pod_cpu:active:kube_pod_container_resource_limits|container_cpu_cfs_periods_total|process_cpu_seconds_total|kubelet_pleg_relist_interval_seconds_bucket|kube_horizontalpodautoscaler_spec_min_replicas|kube_statefulset_status_replicas|kube_deployment_spec_replicas|namespace_workload_pod:kube_pod_owner:relabel|kube_pod_owner|node_namespace_pod_container:container_memory_working_set_bytes|kubelet_node_name|kube_job_failed|container_memory_working_set_bytes|kubelet_runtime_operations_duration_seconds_bucket|namespace_memory:kube_pod_container_resource_requests:sum|kube_node_info|kubernetes_build_info|kube_statefulset_status_update_revision|kube_pod_container_resource_limits|kubelet_running_pods|kube_statefulset_replicas|kube_namespace_created|node_namespace_pod_container:container_memory_rss|node_namespace_pod_container:container_memory_cache|kube_node_status_condition|kube_pod_container_resource_requests|kubelet_running_pod_count|namespace_memory:kube_pod_container_resource_limits:sum|node_quantile:kubelet_pleg_relist_duration_seconds:histogram_quantile|kube_pod_container_status_waiting_reason|kubelet_certificate_manager_client_expiration_renew_errors|kubelet_running_container_count|node_namespace_pod_container:container_memory_swap|kube_deployment_status_replicas_updated|process_resident_memory_bytes|rest_client_requests_total|kube_node_status_allocatable|kube_statefulset_metadata_generation|kube_deployment_metadata_generation|container_network_transmit_packets_dropped_total|kube_pod_info|kubelet_cgroup_manager_duration_seconds_count|container_fs_reads_total|kube_daemonset_status_desired_number_scheduled|container_cpu_cfs_throttled_periods_total|kube_job_status_succeeded|kubelet_certificate_manager_client_ttl_seconds|kube_replicaset_owner|go_goroutines|namespace_cpu:kube_pod_container_resource_requests:sum
|
||||||
|
action: keep
|
||||||
|
relabel_configs:
|
||||||
|
- replacement: kubernetes.default.svc.cluster.local:443
|
||||||
|
target_label: __address__
|
||||||
|
- regex: (.+)
|
||||||
|
replacement: /api/v1/nodes/${1}/proxy/metrics/cadvisor
|
||||||
|
source_labels:
|
||||||
|
- __meta_kubernetes_node_name
|
||||||
|
target_label: __metrics_path__
|
||||||
|
scheme: https
|
||||||
|
tls_config:
|
||||||
|
ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
|
||||||
|
insecure_skip_verify: false
|
||||||
|
server_name: kubernetes
|
||||||
|
- bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
|
||||||
|
job_name: integrations/kubernetes/kubelet
|
||||||
|
kubernetes_sd_configs:
|
||||||
|
- role: node
|
||||||
|
metric_relabel_configs:
|
||||||
|
- source_labels: [__name__]
|
||||||
|
regex: node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate|container_memory_swap|kube_statefulset_status_replicas_ready|kube_node_spec_taint|storage_operation_duration_seconds_count|kubelet_pleg_relist_duration_seconds_count|cluster:namespace:pod_memory:active:kube_pod_container_resource_requests|kube_daemonset_status_number_available|kubelet_certificate_manager_server_ttl_seconds|container_network_receive_packets_total|namespace_workload_pod|kubelet_pod_worker_duration_seconds_count|kube_statefulset_status_replicas_updated|kube_pod_status_phase|volume_manager_total_volumes|kubelet_running_containers|storage_operation_errors_total|kube_statefulset_status_observed_generation|kubelet_node_config_error|container_fs_reads_bytes_total|kube_horizontalpodautoscaler_status_current_replicas|rest_client_request_duration_seconds_bucket|container_memory_cache|kube_daemonset_updated_number_scheduled|kube_job_spec_completions|kubelet_volume_stats_capacity_bytes|kube_daemonset_status_number_misscheduled|container_memory_rss|namespace_cpu:kube_pod_container_resource_limits:sum|kubelet_volume_stats_inodes_used|container_network_transmit_bytes_total|kubelet_runtime_operations_errors_total|container_fs_writes_total|kube_daemonset_status_current_number_scheduled|kube_node_status_capacity|container_network_receive_packets_dropped_total|storage_operation_duration_seconds_bucket|cluster:namespace:pod_cpu:active:kube_pod_container_resource_requests|container_network_receive_bytes_total|kubelet_runtime_operations_total|kubelet_pod_start_duration_seconds_count|kube_horizontalpodautoscaler_status_desired_replicas|container_cpu_usage_seconds_total|up|kube_resourcequota|kubelet_cgroup_manager_duration_seconds_bucket|kube_horizontalpodautoscaler_spec_max_replicas|kubelet_server_expiration_renew_errors|container_fs_writes_bytes_total|kubelet_pod_worker_duration_seconds_bucket|machine_memory_bytes|cluster:namespace:pod_memory:active:kube_pod_container_resource_limits|kube_statefulset_status_current_revision|kube_deployment_status_observed_generation|kubelet_volume_stats_inodes|kubelet_volume_stats_available_bytes|kube_deployment_status_replicas_available|container_network_transmit_packets_total|kubelet_pleg_relist_duration_seconds_bucket|cluster:namespace:pod_cpu:active:kube_pod_container_resource_limits|container_cpu_cfs_periods_total|process_cpu_seconds_total|kubelet_pleg_relist_interval_seconds_bucket|kube_horizontalpodautoscaler_spec_min_replicas|kube_statefulset_status_replicas|kube_deployment_spec_replicas|namespace_workload_pod:kube_pod_owner:relabel|kube_pod_owner|node_namespace_pod_container:container_memory_working_set_bytes|kubelet_node_name|kube_job_failed|container_memory_working_set_bytes|kubelet_runtime_operations_duration_seconds_bucket|namespace_memory:kube_pod_container_resource_requests:sum|kube_node_info|kubernetes_build_info|kube_statefulset_status_update_revision|kube_pod_container_resource_limits|kubelet_running_pods|kube_statefulset_replicas|kube_namespace_created|node_namespace_pod_container:container_memory_rss|node_namespace_pod_container:container_memory_cache|kube_node_status_condition|kube_pod_container_resource_requests|kubelet_running_pod_count|namespace_memory:kube_pod_container_resource_limits:sum|node_quantile:kubelet_pleg_relist_duration_seconds:histogram_quantile|kube_pod_container_status_waiting_reason|kubelet_certificate_manager_client_expiration_renew_errors|kubelet_running_container_count|node_namespace_pod_container:container_memory_swap|kube_deployment_status_replicas_updated|process_resident_memory_bytes|rest_client_requests_total|kube_node_status_allocatable|kube_statefulset_metadata_generation|kube_deployment_metadata_generation|container_network_transmit_packets_dropped_total|kube_pod_info|kubelet_cgroup_manager_duration_seconds_count|container_fs_reads_total|kube_daemonset_status_desired_number_scheduled|container_cpu_cfs_throttled_periods_total|kube_job_status_succeeded|kubelet_certificate_manager_client_ttl_seconds|kube_replicaset_owner|go_goroutines|namespace_cpu:kube_pod_container_resource_requests:sum
|
||||||
|
action: keep
|
||||||
|
relabel_configs:
|
||||||
|
- replacement: kubernetes.default.svc.cluster.local:443
|
||||||
|
target_label: __address__
|
||||||
|
- regex: (.+)
|
||||||
|
replacement: /api/v1/nodes/${1}/proxy/metrics
|
||||||
|
source_labels:
|
||||||
|
- __meta_kubernetes_node_name
|
||||||
|
target_label: __metrics_path__
|
||||||
|
scheme: https
|
||||||
|
tls_config:
|
||||||
|
ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
|
||||||
|
insecure_skip_verify: false
|
||||||
|
server_name: kubernetes
|
||||||
|
- job_name: integrations/kubernetes/kube-state-metrics
|
||||||
|
kubernetes_sd_configs:
|
||||||
|
- role: service
|
||||||
|
metric_relabel_configs:
|
||||||
|
- source_labels: [__name__]
|
||||||
|
regex: node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate|container_memory_swap|kube_statefulset_status_replicas_ready|kube_node_spec_taint|storage_operation_duration_seconds_count|kubelet_pleg_relist_duration_seconds_count|cluster:namespace:pod_memory:active:kube_pod_container_resource_requests|kube_daemonset_status_number_available|kubelet_certificate_manager_server_ttl_seconds|container_network_receive_packets_total|namespace_workload_pod|kubelet_pod_worker_duration_seconds_count|kube_statefulset_status_replicas_updated|kube_pod_status_phase|volume_manager_total_volumes|kubelet_running_containers|storage_operation_errors_total|kube_statefulset_status_observed_generation|kubelet_node_config_error|container_fs_reads_bytes_total|kube_horizontalpodautoscaler_status_current_replicas|rest_client_request_duration_seconds_bucket|container_memory_cache|kube_daemonset_updated_number_scheduled|kube_job_spec_completions|kubelet_volume_stats_capacity_bytes|kube_daemonset_status_number_misscheduled|container_memory_rss|namespace_cpu:kube_pod_container_resource_limits:sum|kubelet_volume_stats_inodes_used|container_network_transmit_bytes_total|kubelet_runtime_operations_errors_total|container_fs_writes_total|kube_daemonset_status_current_number_scheduled|kube_node_status_capacity|container_network_receive_packets_dropped_total|storage_operation_duration_seconds_bucket|cluster:namespace:pod_cpu:active:kube_pod_container_resource_requests|container_network_receive_bytes_total|kubelet_runtime_operations_total|kubelet_pod_start_duration_seconds_count|kube_horizontalpodautoscaler_status_desired_replicas|container_cpu_usage_seconds_total|up|kube_resourcequota|kubelet_cgroup_manager_duration_seconds_bucket|kube_horizontalpodautoscaler_spec_max_replicas|kubelet_server_expiration_renew_errors|container_fs_writes_bytes_total|kubelet_pod_worker_duration_seconds_bucket|machine_memory_bytes|cluster:namespace:pod_memory:active:kube_pod_container_resource_limits|kube_statefulset_status_current_revision|kube_deployment_status_observed_generation|kubelet_volume_stats_inodes|kubelet_volume_stats_available_bytes|kube_deployment_status_replicas_available|container_network_transmit_packets_total|kubelet_pleg_relist_duration_seconds_bucket|cluster:namespace:pod_cpu:active:kube_pod_container_resource_limits|container_cpu_cfs_periods_total|process_cpu_seconds_total|kubelet_pleg_relist_interval_seconds_bucket|kube_horizontalpodautoscaler_spec_min_replicas|kube_statefulset_status_replicas|kube_deployment_spec_replicas|namespace_workload_pod:kube_pod_owner:relabel|kube_pod_owner|node_namespace_pod_container:container_memory_working_set_bytes|kubelet_node_name|kube_job_failed|container_memory_working_set_bytes|kubelet_runtime_operations_duration_seconds_bucket|namespace_memory:kube_pod_container_resource_requests:sum|kube_node_info|kubernetes_build_info|kube_statefulset_status_update_revision|kube_pod_container_resource_limits|kubelet_running_pods|kube_statefulset_replicas|kube_namespace_created|node_namespace_pod_container:container_memory_rss|node_namespace_pod_container:container_memory_cache|kube_node_status_condition|kube_pod_container_resource_requests|kubelet_running_pod_count|namespace_memory:kube_pod_container_resource_limits:sum|node_quantile:kubelet_pleg_relist_duration_seconds:histogram_quantile|kube_pod_container_status_waiting_reason|kubelet_certificate_manager_client_expiration_renew_errors|kubelet_running_container_count|node_namespace_pod_container:container_memory_swap|kube_deployment_status_replicas_updated|process_resident_memory_bytes|rest_client_requests_total|kube_node_status_allocatable|kube_statefulset_metadata_generation|kube_deployment_metadata_generation|container_network_transmit_packets_dropped_total|kube_pod_info|kubelet_cgroup_manager_duration_seconds_count|container_fs_reads_total|kube_daemonset_status_desired_number_scheduled|container_cpu_cfs_throttled_periods_total|kube_job_status_succeeded|kubelet_certificate_manager_client_ttl_seconds|kube_replicaset_owner|go_goroutines|namespace_cpu:kube_pod_container_resource_requests:sum
|
||||||
|
action: keep
|
||||||
|
relabel_configs:
|
||||||
|
- action: keep
|
||||||
|
regex: ksm-kube-state-metrics
|
||||||
|
source_labels:
|
||||||
|
- __meta_kubernetes_service_name
|
||||||
|
- bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
|
||||||
|
job_name: 'integrations/node_exporter'
|
||||||
|
kubernetes_sd_configs:
|
||||||
|
- role: endpoints
|
||||||
|
relabel_configs:
|
||||||
|
- source_labels: [__meta_kubernetes_endpoints_name]
|
||||||
|
regex: 'node-exporter'
|
||||||
|
action: keep
|
||||||
|
# relabel 'instance'
|
||||||
|
- source_labels: [__meta_kubernetes_pod_node_name]
|
||||||
|
action: replace
|
||||||
|
target_label: instance
|
||||||
|
|
||||||
|
integrations:
|
||||||
|
prometheus_remote_write:
|
||||||
|
- url: https://prometheus-prod-01-eu-west-0.grafana.net/api/prom/push
|
||||||
|
basic_auth:
|
||||||
|
username: $USERNAME
|
||||||
|
password: $APIKEY
|
||||||
|
|
||||||
|
# logs example
|
||||||
|
# logs:
|
||||||
|
# configs:
|
||||||
|
# - name: integrations
|
||||||
|
# clients:
|
||||||
|
# - url: https://logs-prod-eu-west-0.grafana.net/api/prom/push
|
||||||
|
# basic_auth:
|
||||||
|
# username: 195593
|
||||||
|
# password: $APIKEY
|
||||||
|
# external_labels:
|
||||||
|
# cluster: cloud
|
||||||
|
# positions:
|
||||||
|
# filename: /tmp/positions.yaml
|
||||||
|
# target_config:
|
||||||
|
# sync_period: 10s
|
|
@ -0,0 +1,83 @@
|
||||||
|
# https://raw.githubusercontent.com/grafana/agent/v0.23.0/production/kubernetes/agent-bare.yaml
|
||||||
|
apiVersion: v1
|
||||||
|
kind: ServiceAccount
|
||||||
|
metadata:
|
||||||
|
name: grafana-agent
|
||||||
|
namespace: monitoring
|
||||||
|
---
|
||||||
|
apiVersion: rbac.authorization.k8s.io/v1
|
||||||
|
kind: ClusterRole
|
||||||
|
metadata:
|
||||||
|
name: grafana-agent
|
||||||
|
rules:
|
||||||
|
- apiGroups:
|
||||||
|
- ""
|
||||||
|
resources:
|
||||||
|
- nodes
|
||||||
|
- nodes/proxy
|
||||||
|
- services
|
||||||
|
- endpoints
|
||||||
|
- pods
|
||||||
|
verbs:
|
||||||
|
- get
|
||||||
|
- list
|
||||||
|
- watch
|
||||||
|
- nonResourceURLs:
|
||||||
|
- /metrics
|
||||||
|
verbs:
|
||||||
|
- get
|
||||||
|
---
|
||||||
|
apiVersion: rbac.authorization.k8s.io/v1
|
||||||
|
kind: ClusterRoleBinding
|
||||||
|
metadata:
|
||||||
|
name: grafana-agent
|
||||||
|
roleRef:
|
||||||
|
apiGroup: rbac.authorization.k8s.io
|
||||||
|
kind: ClusterRole
|
||||||
|
name: grafana-agent
|
||||||
|
subjects:
|
||||||
|
- kind: ServiceAccount
|
||||||
|
name: grafana-agent
|
||||||
|
namespace: monitoring
|
||||||
|
---
|
||||||
|
apiVersion: apps/v1
|
||||||
|
kind: Deployment
|
||||||
|
metadata:
|
||||||
|
name: grafana-agent
|
||||||
|
namespace: monitoring
|
||||||
|
spec:
|
||||||
|
minReadySeconds: 10
|
||||||
|
replicas: 1
|
||||||
|
revisionHistoryLimit: 10
|
||||||
|
selector:
|
||||||
|
matchLabels:
|
||||||
|
name: grafana-agent
|
||||||
|
template:
|
||||||
|
metadata:
|
||||||
|
labels:
|
||||||
|
name: grafana-agent
|
||||||
|
spec:
|
||||||
|
containers:
|
||||||
|
- args:
|
||||||
|
- -config.file=/etc/agent/agent.yaml
|
||||||
|
command:
|
||||||
|
- /bin/agent
|
||||||
|
env:
|
||||||
|
- name: HOSTNAME
|
||||||
|
valueFrom:
|
||||||
|
fieldRef:
|
||||||
|
fieldPath: spec.nodeName
|
||||||
|
image: grafana/agent:v0.23.0
|
||||||
|
imagePullPolicy: IfNotPresent
|
||||||
|
name: agent
|
||||||
|
ports:
|
||||||
|
- containerPort: 12345
|
||||||
|
name: http-metrics
|
||||||
|
volumeMounts:
|
||||||
|
- mountPath: /etc/agent
|
||||||
|
name: grafana-agent
|
||||||
|
serviceAccount: grafana-agent
|
||||||
|
volumes:
|
||||||
|
- configMap:
|
||||||
|
name: grafana-agent
|
||||||
|
name: grafana-agent
|
|
@ -0,0 +1,96 @@
|
||||||
|
# see https://devopscube.com/node-exporter-kubernetes/
|
||||||
|
# and https://www.opsramp.com/prometheus-monitoring/prometheus-node-exporter/
|
||||||
|
apiVersion: apps/v1
|
||||||
|
kind: DaemonSet
|
||||||
|
metadata:
|
||||||
|
labels:
|
||||||
|
app.kubernetes.io/component: exporter
|
||||||
|
app.kubernetes.io/name: node-exporter
|
||||||
|
name: node-exporter
|
||||||
|
namespace: monitoring
|
||||||
|
spec:
|
||||||
|
selector:
|
||||||
|
matchLabels:
|
||||||
|
app.kubernetes.io/component: exporter
|
||||||
|
app.kubernetes.io/name: node-exporter
|
||||||
|
template:
|
||||||
|
metadata:
|
||||||
|
labels:
|
||||||
|
app.kubernetes.io/component: exporter
|
||||||
|
app.kubernetes.io/name: node-exporter
|
||||||
|
spec:
|
||||||
|
containers:
|
||||||
|
- args:
|
||||||
|
- --path.sysfs=/host/sys
|
||||||
|
- --path.rootfs=/host/root
|
||||||
|
- --no-collector.wifi
|
||||||
|
- --no-collector.hwmon
|
||||||
|
- --no-collector.infiniband
|
||||||
|
- --no-collector.filefd
|
||||||
|
- --no-collector.ipvs
|
||||||
|
- --no-collector.mdadm
|
||||||
|
- --no-collector.netclass
|
||||||
|
- --no-collector.netstat
|
||||||
|
- --no-collector.nfsd
|
||||||
|
- --no-collector.nvme
|
||||||
|
- --no-collector.powersupplyclass
|
||||||
|
- --no-collector.pressure
|
||||||
|
- --no-collector.rapl
|
||||||
|
- --no-collector.schedstat
|
||||||
|
- --no-collector.sockstat
|
||||||
|
- --no-collector.softnet
|
||||||
|
- --no-collector.tapestats
|
||||||
|
- --no-collector.thermal_zone
|
||||||
|
- --no-collector.udp_queues
|
||||||
|
- --no-collector.xfs
|
||||||
|
- --no-collector.zfs
|
||||||
|
- --collector.filesystem.ignored-mount-points=^/(dev|proc|sys|var/lib/docker/.+|var/lib/kubelet/pods/.+)($|/)
|
||||||
|
- --collector.netclass.ignored-devices=^(veth.*)$
|
||||||
|
name: node-exporter
|
||||||
|
image: prom/node-exporter
|
||||||
|
ports:
|
||||||
|
- containerPort: 9100
|
||||||
|
protocol: TCP
|
||||||
|
resources:
|
||||||
|
limits:
|
||||||
|
cpu: 500m
|
||||||
|
memory: 180Mi
|
||||||
|
requests:
|
||||||
|
cpu: 102m
|
||||||
|
memory: 180Mi
|
||||||
|
volumeMounts:
|
||||||
|
- mountPath: /host/sys
|
||||||
|
mountPropagation: HostToContainer
|
||||||
|
name: sys
|
||||||
|
readOnly: true
|
||||||
|
- mountPath: /host/root
|
||||||
|
mountPropagation: HostToContainer
|
||||||
|
name: root
|
||||||
|
readOnly: true
|
||||||
|
volumes:
|
||||||
|
- hostPath:
|
||||||
|
path: /sys
|
||||||
|
name: sys
|
||||||
|
- hostPath:
|
||||||
|
path: /
|
||||||
|
name: root
|
||||||
|
---
|
||||||
|
kind: Service
|
||||||
|
apiVersion: v1
|
||||||
|
metadata:
|
||||||
|
name: node-exporter
|
||||||
|
namespace: monitoring
|
||||||
|
labels:
|
||||||
|
instance: primary
|
||||||
|
annotations:
|
||||||
|
prometheus.io/scrape: 'true'
|
||||||
|
prometheus.io/port: '9100'
|
||||||
|
spec:
|
||||||
|
selector:
|
||||||
|
app.kubernetes.io/component: exporter
|
||||||
|
app.kubernetes.io/name: node-exporter
|
||||||
|
ports:
|
||||||
|
- name: node-exporter
|
||||||
|
protocol: TCP
|
||||||
|
port: 9100
|
||||||
|
targetPort: 9100
|
|
@ -0,0 +1,22 @@
|
||||||
|
apiVersion: monitoring.coreos.com/v1
|
||||||
|
kind: Prometheus
|
||||||
|
metadata:
|
||||||
|
name: prometheus
|
||||||
|
namespace: monitoring
|
||||||
|
labels:
|
||||||
|
app: prometheus
|
||||||
|
spec:
|
||||||
|
image: quay.io/prometheus/prometheus:v2.22.1
|
||||||
|
nodeSelector:
|
||||||
|
kubernetes.io/os: linux
|
||||||
|
replicas: 2
|
||||||
|
resources:
|
||||||
|
requests:
|
||||||
|
memory: 400Mi
|
||||||
|
securityContext:
|
||||||
|
fsGroup: 2000
|
||||||
|
runAsNonRoot: true
|
||||||
|
runAsUser: 1000
|
||||||
|
serviceAccountName: prometheus
|
||||||
|
version: v2.22.1
|
||||||
|
serviceMonitorSelector: {}
|
|
@ -0,0 +1,42 @@
|
||||||
|
apiVersion: v1
|
||||||
|
kind: ServiceAccount
|
||||||
|
metadata:
|
||||||
|
name: prometheus
|
||||||
|
---
|
||||||
|
apiVersion: rbac.authorization.k8s.io/v1
|
||||||
|
kind: ClusterRole
|
||||||
|
metadata:
|
||||||
|
name: prometheus
|
||||||
|
rules:
|
||||||
|
- apiGroups: [""]
|
||||||
|
resources:
|
||||||
|
- nodes
|
||||||
|
- nodes/metrics
|
||||||
|
- services
|
||||||
|
- endpoints
|
||||||
|
- pods
|
||||||
|
verbs: ["get", "list", "watch"]
|
||||||
|
- apiGroups: [""]
|
||||||
|
resources:
|
||||||
|
- configmaps
|
||||||
|
verbs: ["get"]
|
||||||
|
- apiGroups:
|
||||||
|
- networking.k8s.io
|
||||||
|
resources:
|
||||||
|
- ingresses
|
||||||
|
verbs: ["get", "list", "watch"]
|
||||||
|
- nonResourceURLs: ["/metrics"]
|
||||||
|
verbs: ["get"]
|
||||||
|
---
|
||||||
|
apiVersion: rbac.authorization.k8s.io/v1
|
||||||
|
kind: ClusterRoleBinding
|
||||||
|
metadata:
|
||||||
|
name: prometheus
|
||||||
|
roleRef:
|
||||||
|
apiGroup: rbac.authorization.k8s.io
|
||||||
|
kind: ClusterRole
|
||||||
|
name: prometheus
|
||||||
|
subjects:
|
||||||
|
- kind: ServiceAccount
|
||||||
|
name: prometheus
|
||||||
|
namespace: monitoring
|
|
@ -0,0 +1,15 @@
|
||||||
|
apiVersion: v1
|
||||||
|
kind: Service
|
||||||
|
metadata:
|
||||||
|
name: prometheus
|
||||||
|
namespace: monitoring
|
||||||
|
labels:
|
||||||
|
app: prometheus
|
||||||
|
spec:
|
||||||
|
ports:
|
||||||
|
- name: web
|
||||||
|
port: 9090
|
||||||
|
targetPort: web
|
||||||
|
selector:
|
||||||
|
app: prometheus
|
||||||
|
sessionAffinity: ClientIP
|
|
@ -0,0 +1,14 @@
|
||||||
|
apiVersion: monitoring.coreos.com/v1
|
||||||
|
kind: ServiceMonitor
|
||||||
|
metadata:
|
||||||
|
name: prometheus-self
|
||||||
|
namespace: monitoring
|
||||||
|
labels:
|
||||||
|
app: prometheus
|
||||||
|
spec:
|
||||||
|
endpoints:
|
||||||
|
- interval: 30s
|
||||||
|
port: web
|
||||||
|
selector:
|
||||||
|
matchLabels:
|
||||||
|
app: prometheus
|
Loading…
Reference in a new issue