[stable/zed] Add monitoring for stuck VMs (#1133)

This is an automated cherry-pick of #1129
/assign mnaser
diff --git a/roles/defaults/vars/main.yml b/roles/defaults/vars/main.yml
index 4b82047..6d36998 100644
--- a/roles/defaults/vars/main.yml
+++ b/roles/defaults/vars/main.yml
@@ -174,7 +174,7 @@
   prometheus_memcached_exporter: quay.io/prometheus/memcached-exporter:v0.10.0
   prometheus_mysqld_exporter: quay.io/prometheus/mysqld-exporter:v0.14.0
   prometheus_node_exporter: quay.io/prometheus/node-exporter:v1.7.0
-  prometheus_openstack_database_exporter: ghcr.io/vexxhost/openstack-database-exporter:v0.2.0
+  prometheus_openstack_database_exporter: ghcr.io/vexxhost/openstack-database-exporter:v0.3.0
   prometheus_openstack_exporter: ghcr.io/openstack-exporter/openstack-exporter:1.7.0
   prometheus_operator_kube_webhook_certgen: registry.k8s.io/ingress-nginx/kube-webhook-certgen:v20221220-controller-v1.5.1-58-g787ea74b6
   prometheus_operator: quay.io/prometheus-operator/prometheus-operator:v0.73.0
diff --git a/roles/kube_prometheus_stack/files/jsonnet/openstack.libsonnet b/roles/kube_prometheus_stack/files/jsonnet/openstack.libsonnet
index b8e6ac1..e77077e 100644
--- a/roles/kube_prometheus_stack/files/jsonnet/openstack.libsonnet
+++ b/roles/kube_prometheus_stack/files/jsonnet/openstack.libsonnet
@@ -204,6 +204,18 @@
             },
           },
           {
+            alert: 'NovaServerTaskStateStuck',
+            annotations: {
+              summary: 'Nova server stuck in task state',
+              description: 'Nova server with ID {{ $labels.id }} stuck in {{ $labels.task_state }} state for more than 1 hour',
+            },
+            expr: 'openstack_nova_server_task_state > 0',
+            'for': '1h',
+            labels: {
+              severity: 'P3',
+            },
+          },
+          {
             alert: 'NovaInstanceError',
             expr: 'openstack_nova_server_status{status="ERROR"} > 0',
             'for': '24h',
diff --git a/roles/openstack_exporter/tasks/main.yml b/roles/openstack_exporter/tasks/main.yml
index 21e1915..115445d 100644
--- a/roles/openstack_exporter/tasks/main.yml
+++ b/roles/openstack_exporter/tasks/main.yml
@@ -117,15 +117,6 @@
           selector:
             application: openstack-exporter
 
-- name: Fetch Octavia DB secret
-  run_once: true
-  no_log: true
-  kubernetes.core.k8s_info:
-    kind: Secret
-    namespace: openstack
-    name: octavia-db-user
-  register: _octavia_db_user
-
 - name: Fetch Neutron DB secret
   run_once: true
   no_log: true
@@ -135,6 +126,24 @@
     name: neutron-db-user
   register: _neutron_db_user
 
+- name: Fetch Nova DB secret
+  run_once: true
+  no_log: true
+  kubernetes.core.k8s_info:
+    kind: Secret
+    namespace: openstack
+    name: nova-db-user
+  register: _nova_db_user
+
+- name: Fetch Octavia DB secret
+  run_once: true
+  no_log: true
+  kubernetes.core.k8s_info:
+    kind: Secret
+    namespace: openstack
+    name: octavia-db-user
+  register: _octavia_db_user
+
 - name: Create "openstack-database-exporter-dsn" secret
   run_once: true
   no_log: true
@@ -151,6 +160,7 @@
           application: openstack-database-exporter
       stringData:
         NEUTRON_DSN: "{{ _neutron_db_user.resources.0.data.DB_CONNECTION | b64decode |  regex_replace('^.*//', '') | regex_replace('@(.*)/', '@tcp(\\1)/') }}"
+        NOVA_DSN: "{{ _nova_db_user.resources.0.data.DB_CONNECTION | b64decode |  regex_replace('^.*//', '') | regex_replace('@(.*)/', '@tcp(\\1)/') }}"
         OCTAVIA_DSN: "{{ _octavia_db_user.resources.0.data.DB_CONNECTION | b64decode |  regex_replace('^.*//', '') | regex_replace('@(.*)/', '@tcp(\\1)/') }}"
 
 - name: Deploy service