Add TLS to node exporter (#1775)

Initial round of node-tls-sidecar
Fix image name
Added pod-tls-sidecar for node-exporter
The art of escaping Helm and Ansible
Fix configmap mount
Dance around Helm
Fix volume mount
Mount service token
Added RBAC
Fix role binding name
Switch node exporter to use TLS
Fix mount
Switch to VerifyClientCertIfGiven
diff --git a/roles/defaults/vars/main.yml b/roles/defaults/vars/main.yml
index 321cd6d..86a7f0f 100644
--- a/roles/defaults/vars/main.yml
+++ b/roles/defaults/vars/main.yml
@@ -194,6 +194,7 @@
   percona_version_service: docker.io/perconalab/version-service:production-2048c1f
   placement_db_sync: "registry.atmosphere.dev/library/placement:{{ atmosphere_release }}"
   placement: "registry.atmosphere.dev/library/placement:{{ atmosphere_release }}"
+  pod_tls_sidecar: registry.atmosphere.dev/library/pod-tls-sidecar:latest
   prometheus_config_reloader: quay.io/prometheus-operator/prometheus-config-reloader:v0.73.0
   prometheus_ipmi_exporter: us-docker.pkg.dev/vexxhost-infra/openstack/ipmi-exporter:1.4.0
   prometheus_memcached_exporter: quay.io/prometheus/memcached-exporter:v0.14.3
diff --git a/roles/kube_prometheus_stack/defaults/main.yml b/roles/kube_prometheus_stack/defaults/main.yml
index 0579115..ec15136 100644
--- a/roles/kube_prometheus_stack/defaults/main.yml
+++ b/roles/kube_prometheus_stack/defaults/main.yml
@@ -19,6 +19,16 @@
 kube_prometheus_stack_helm_release_namespace: monitoring
 kube_prometheus_stack_helm_values: {}
 
+kube_prometheus_stack_node_exporter_tls_template: "{{ _kube_prometheus_stack_tls_template }}"
+kube_prometheus_stack_node_exporter_config:
+  tls_server_config:
+    # NOTE(mnaser): The kubelet doesn't have the ability of sending a client
+    #               certificate, so we can't verify with a client certificate.
+    client_auth_type: VerifyClientCertIfGiven
+    client_ca_file: /certs/ca.crt
+    cert_file: /certs/tls.crt
+    key_file: /certs/tls.key
+
 kube_prometheus_stack_ingress_class_name: "{{ atmosphere_ingress_class_name }}"
 kube_prometheus_stack_ingress_cluster_issuer: "{{ atmosphere_ingress_cluster_issuer }}"
 
@@ -38,6 +48,7 @@
 kube_prometheus_stack_prometheus_ingress_annotations:
   cert-manager.io/cluster-issuer: "{{ kube_prometheus_stack_ingress_cluster_issuer }}"
   cert-manager.io/common-name: "{{ kube_prometheus_stack_prometheus_host }}"
+kube_prometheus_stack_prometheus_tls_template: "{{ _kube_prometheus_stack_tls_template }}"
 
 kube_prometheus_stack_keycloak_server_url: "https://{{ keycloak_host }}"
 kube_prometheus_stack_keycloak_admin_realm_name: master
diff --git a/roles/kube_prometheus_stack/tasks/main.yml b/roles/kube_prometheus_stack/tasks/main.yml
index 9d90c2a..3926eab 100644
--- a/roles/kube_prometheus_stack/tasks/main.yml
+++ b/roles/kube_prometheus_stack/tasks/main.yml
@@ -257,6 +257,37 @@
   loop_control:
     label: "{{ item.id }}"
 
+- name: Create certificate issuer
+  kubernetes.core.k8s:
+    state: present
+    definition:
+      - apiVersion: cert-manager.io/v1
+        kind: Certificate
+        metadata:
+          name: kube-prometheus-stack-ca
+          namespace: cert-manager
+        spec:
+          commonName: kube-prometheus-stack
+          duration: 87600h0m0s
+          isCA: true
+          issuerRef:
+            group: cert-manager.io
+            kind: ClusterIssuer
+            name: self-signed
+          privateKey:
+            algorithm: ECDSA
+            size: 256
+          renewBefore: 720h0m0s
+          secretName: kube-prometheus-stack-ca
+
+      - apiVersion: cert-manager.io/v1
+        kind: ClusterIssuer
+        metadata:
+          name: kube-prometheus-stack
+        spec:
+          ca:
+            secretName: kube-prometheus-stack-ca
+
 - name: Install all CRDs
   run_once: true
   changed_when: false
diff --git a/roles/kube_prometheus_stack/vars/main.yml b/roles/kube_prometheus_stack/vars/main.yml
index c7506c1..80a74cb 100644
--- a/roles/kube_prometheus_stack/vars/main.yml
+++ b/roles/kube_prometheus_stack/vars/main.yml
@@ -323,6 +323,35 @@
       secrets:
         - kube-prometheus-stack-etcd-client-cert
       containers:
+        - name: pod-tls-sidecar
+          image: "{{ atmosphere_images['pod_tls_sidecar'] }}"
+          args:
+            - --template=/config/certificate-template.yml
+            - --ca-path=/certs/ca.crt
+            - --cert-path=/certs/tls.crt
+            - --key-path=/certs/tls.key
+          env:
+            - name: POD_UID
+              valueFrom:
+                fieldRef:
+                  fieldPath: metadata.uid
+            - name: POD_NAME
+              valueFrom:
+                fieldRef:
+                  fieldPath: metadata.name
+            - name: POD_NAMESPACE
+              valueFrom:
+                fieldRef:
+                  fieldPath: metadata.namespace
+            - name: POD_IP
+              valueFrom:
+                fieldRef:
+                  fieldPath: status.podIP
+          volumeMounts:
+            - name: kube-prometheus-stack-prometheus-tls
+              mountPath: /config
+            - name: certs
+              mountPath: /certs
         - name: oauth2-proxy
           image: "{{ atmosphere_images['oauth2_proxy'] }}"
           envFrom:
@@ -364,6 +393,15 @@
         - name: ca-certificates
           hostPath:
             path: "{{ defaults_ca_certificates_path }}"
+        - name: certs
+          emptyDir:
+            medium: Memory
+        - name: kube-prometheus-stack-prometheus-tls
+          configMap:
+            name: kube-prometheus-stack-prometheus-tls
+      volumeMounts:
+        - name: certs
+          mountPath: /certs
     additionalServiceMonitors:
       - name: goldpinger
         jobLabel: app.kubernetes.io/instance
@@ -545,6 +583,16 @@
       registry: "{{ atmosphere_images['prometheus_node_exporter'] | vexxhost.kubernetes.docker_image('domain') }}"
       repository: "{{ atmosphere_images['prometheus_node_exporter'] | vexxhost.kubernetes.docker_image('path') }}"
       tag: "{{ atmosphere_images['prometheus_node_exporter'] | vexxhost.kubernetes.docker_image('tag') }}"
+    prometheus:
+      monitor:
+        scheme: https
+        tlsConfig:
+          caFile: /certs/ca.crt
+          certFile: /certs/tls.crt
+          keyFile: /certs/tls.key
+        relabelings: *relabelings_instance_to_node_name
+    serviceAccount:
+      automountServiceAccountToken: true
     extraArgs:
       - --collector.diskstats.ignored-devices=^(ram|loop|nbd|fd|(h|s|v|xv)d[a-z]|nvme\\d+n\\d+p)\\d+$
       - --collector.filesystem.fs-types-exclude=^(autofs|binfmt_misc|bpf|cgroup2?|configfs|debugfs|devpts|devtmpfs|fusectl|fuse.squashfuse_ll|hugetlbfs|iso9660|mqueue|nsfs|overlay|proc|procfs|pstore|rpc_pipefs|securityfs|selinuxfs|squashfs|sysfs|tracefs)$
@@ -554,7 +602,125 @@
       - --collector.processes
       - --collector.systemd
       - --collector.stat.softirq
-    prometheus:
-      monitor:
-        relabelings: *relabelings_instance_to_node_name
+      - --web.config.file=/config/node-exporter.yml
+    configmaps:
+      - name: kube-prometheus-stack-node-exporter
+        mountPath: /config
+    sidecars:
+      - name: pod-tls-sidecar
+        image: "{{ atmosphere_images['pod_tls_sidecar'] }}"
+        args:
+          - --template=/config/certificate-template.yml
+          - --ca-path=/certs/ca.crt
+          - --cert-path=/certs/tls.crt
+          - --key-path=/certs/tls.key
+        env:
+          - name: POD_UID
+            valueFrom:
+              fieldRef:
+                fieldPath: metadata.uid
+          - name: POD_NAME
+            valueFrom:
+              fieldRef:
+                fieldPath: metadata.name
+          - name: POD_NAMESPACE
+            valueFrom:
+              fieldRef:
+                fieldPath: metadata.namespace
+          - name: POD_IP
+            valueFrom:
+              fieldRef:
+                fieldPath: status.podIP
+        volumeMounts:
+          - name: kube-prometheus-stack-node-exporter
+            mountPath: /config
+    sidecarVolumeMount:
+      - name: certs
+        mountPath: /certs
+    livenessProbe:
+      httpGet:
+        scheme: https
+    readinessProbe:
+      httpGet:
+        scheme: https
+    extraManifests:
+      - |
+        apiVersion: v1
+        kind: ConfigMap
+        metadata:
+          name: kube-prometheus-stack-node-exporter
+        data:
+          node-exporter.yml: |
+            {{ kube_prometheus_stack_node_exporter_config | to_nice_yaml | indent(4) }}
+          certificate-template.yml: |
+            {{ kube_prometheus_stack_node_exporter_tls_template | to_nice_yaml | indent(4) }}
   additionalPrometheusRulesMap: "{{ lookup('vexxhost.atmosphere.jsonnet', 'jsonnet/rules.jsonnet') }}"
+  extraManifests:
+    - apiVersion: rbac.authorization.k8s.io/v1
+      kind: Role
+      metadata:
+        name: "{{ kube_prometheus_stack_helm_release_name }}-pod-tls-sidecar"
+        namespace: "{{ kube_prometheus_stack_helm_release_namespace }}"
+      rules:
+        - apiGroups:
+            - cert-manager.io
+          verbs:
+            - get
+            - list
+            - create
+            - watch
+          resources:
+            - certificates
+        - apiGroups:
+            - ""
+          verbs:
+            - get
+            - list
+            - patch
+            - watch
+          resources:
+            - secrets
+    - apiVersion: rbac.authorization.k8s.io/v1
+      kind: RoleBinding
+      metadata:
+        name: "{{ kube_prometheus_stack_helm_release_name }}-pod-tls-sidecar"
+        namespace: "{{ kube_prometheus_stack_helm_release_namespace }}"
+      roleRef:
+        apiGroup: rbac.authorization.k8s.io
+        kind: Role
+        name: "{{ kube_prometheus_stack_helm_release_name }}-pod-tls-sidecar"
+      subjects:
+        - kind: ServiceAccount
+          name: "{{ kube_prometheus_stack_helm_release_name }}-prometheus-node-exporter"
+          namespace: "{{ kube_prometheus_stack_helm_release_namespace }}"
+        - kind: ServiceAccount
+          name: "{{ kube_prometheus_stack_helm_release_name }}-prometheus"
+          namespace: "{{ kube_prometheus_stack_helm_release_namespace }}"
+    - apiVersion: v1
+      kind: ConfigMap
+      metadata:
+        name: "{{ kube_prometheus_stack_helm_release_name }}-prometheus-tls"
+      data:
+        certificate-template.yml: |
+          {{ kube_prometheus_stack_prometheus_tls_template | to_nice_yaml }}
+
+_kube_prometheus_stack_tls_template:
+  apiVersion: cert-manager.io/v1
+  kind: Certificate
+  metadata:
+    name: "{{ '{{`{{ .PodInfo.Name }}`}}' ~ '-tls' }}"
+    namespace: "{{ '{{`{{ .PodInfo.Namespace }}`}}' }}"
+  spec:
+    commonName: "{{ '{{`{{ .Hostname }}`}}' }}"
+    dnsNames:
+      - "{{ '{{`{{ .Hostname }}`}}' }}"
+      - "{{ '{{`{{ .FQDN }}`}}' }}"
+    ipAddresses:
+      - "{{ '{{`{{ .PodInfo.IP }}`}}' }}"
+    issuerRef:
+      kind: ClusterIssuer
+      name: kube-prometheus-stack
+    usages:
+      - client auth
+      - server auth
+    secretName: "{{ '{{`{{ .PodInfo.Name }}`}}' ~ '-tls' }}"