feat(monitoring): add to operator
diff --git a/.ansible-lint b/.ansible-lint
index 4906bcf..4643671 100644
--- a/.ansible-lint
+++ b/.ansible-lint
@@ -1,9 +1,9 @@
---
exclude_paths:
- .github
+ - atmosphere
- molecule
- playbooks
- - roles/kube_prometheus_stack/files/
warn_list:
- jinja[invalid]
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index e621fc8..ac3d06c 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -1,3 +1,5 @@
+exclude: '^atmosphere/jsonnet/vendor'
+
repos:
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v4.3.0
@@ -33,6 +35,14 @@
hooks:
- id: isort
+ - repo: https://github.com/google/go-jsonnet
+ rev: v0.18.0
+ hooks:
+ - id: jsonnet-format
+ files: atmosphere/jsonnet/\w\.(jsonnet|libsonnet)$
+ - id: jsonnet-lint
+ files: atmosphere/jsonnet/\w\.(jsonnet|libsonnet)$
+
- repo: https://github.com/igorshubovych/markdownlint-cli
rev: v0.32.2
hooks:
diff --git a/atmosphere/flows.py b/atmosphere/flows.py
index 3076306..bda7fae 100644
--- a/atmosphere/flows.py
+++ b/atmosphere/flows.py
@@ -42,10 +42,8 @@
*cert_manager.issuer_tasks_from_config(config.issuer),
# monitoring
v1.ApplyNamespaceTask(name=constants.NAMESPACE_MONITORING),
- flux.ApplyHelmRepositoryTask(
- namespace=constants.NAMESPACE_MONITORING,
- name=constants.HELM_REPOSITORY_PROMETHEUS_COMMUINTY,
- url="https://prometheus-community.github.io/helm-charts",
+ *openstack_helm.kube_prometheus_stack_tasks_from_config(
+ config.kube_prometheus_stack
),
flux.ApplyHelmRepositoryTask(
namespace=constants.NAMESPACE_MONITORING,
diff --git a/atmosphere/jsonnet/__init__.py b/atmosphere/jsonnet/__init__.py
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/atmosphere/jsonnet/__init__.py
diff --git a/atmosphere/jsonnet/ceph.libsonnet b/atmosphere/jsonnet/ceph.libsonnet
new file mode 100644
index 0000000..b72a889
--- /dev/null
+++ b/atmosphere/jsonnet/ceph.libsonnet
@@ -0,0 +1,32 @@
+local ceph = import 'vendor/ceph-mixin/mixin.libsonnet';
+
+local DISABLED_NODE_ALERTS = [
+ // * Dropped `CephNodeDiskspaceWarning` because we already have a
+ // few alerts like `NodeFilesystemSpaceFillingUp`, etc.
+ 'CephNodeDiskspaceWarning',
+
+ // * Dropped `CephNodeNetworkPacketDrops` due to noisy alerts with
+ // no actionable items to fix it.
+ 'CephNodeNetworkPacketDrops',
+];
+
+local disableAlerts = {
+ prometheusAlerts+:: {
+ groups: std.map(
+ function(group)
+ if group.name == 'nodes' then
+ group {
+ rules: std.filter(
+ function(rule)
+ std.setMember(rule.alert, DISABLED_NODE_ALERTS) == false,
+ group.rules
+ ),
+ }
+ else
+ group,
+ super.groups
+ ),
+ },
+};
+
+(ceph + disableAlerts)
diff --git a/atmosphere/jsonnet/jsonnetfile.json b/atmosphere/jsonnet/jsonnetfile.json
new file mode 100644
index 0000000..4e393e3
--- /dev/null
+++ b/atmosphere/jsonnet/jsonnetfile.json
@@ -0,0 +1,42 @@
+{
+ "version": 1,
+ "dependencies": [
+ {
+ "source": {
+ "git": {
+ "remote": "https://github.com/ceph/ceph.git",
+ "subdir": "monitoring/ceph-mixin"
+ }
+ },
+ "version": "main"
+ },
+ {
+ "source": {
+ "git": {
+ "remote": "https://github.com/grafana/jsonnet-libs.git",
+ "subdir": "memcached-mixin"
+ }
+ },
+ "version": "master"
+ },
+ {
+ "source": {
+ "git": {
+ "remote": "https://github.com/povilasv/coredns-mixin.git",
+ "subdir": ""
+ }
+ },
+ "version": "master"
+ },
+ {
+ "source": {
+ "git": {
+ "remote": "https://github.com/prometheus/mysqld_exporter.git",
+ "subdir": "mysqld-mixin"
+ }
+ },
+ "version": "main"
+ }
+ ],
+ "legacyImports": true
+}
diff --git a/atmosphere/jsonnet/jsonnetfile.lock.json b/atmosphere/jsonnet/jsonnetfile.lock.json
new file mode 100644
index 0000000..56ba27a
--- /dev/null
+++ b/atmosphere/jsonnet/jsonnetfile.lock.json
@@ -0,0 +1,66 @@
+{
+ "version": 1,
+ "dependencies": [
+ {
+ "source": {
+ "git": {
+ "remote": "https://github.com/ceph/ceph.git",
+ "subdir": "monitoring/ceph-mixin"
+ }
+ },
+ "version": "189861f1b879e92afa922f561e5f4c9ddb77b00d",
+ "sum": "ZnyCIu25NBI6Q3Ru7QK1DHf7DBMEURSMQdEJXzCyIgA="
+ },
+ {
+ "source": {
+ "git": {
+ "remote": "https://github.com/grafana/grafonnet-lib.git",
+ "subdir": "grafonnet"
+ }
+ },
+ "version": "30280196507e0fe6fa978a3e0eaca3a62844f817",
+ "sum": "342u++/7rViR/zj2jeJOjshzglkZ1SY+hFNuyCBFMdc="
+ },
+ {
+ "source": {
+ "git": {
+ "remote": "https://github.com/grafana/jsonnet-libs.git",
+ "subdir": "grafana-builder"
+ }
+ },
+ "version": "d73aff453c9784cd6922119f3ce33d8d355a79e1",
+ "sum": "tDR6yT2GVfw0wTU12iZH+m01HrbIr6g/xN+/8nzNkU0="
+ },
+ {
+ "source": {
+ "git": {
+ "remote": "https://github.com/grafana/jsonnet-libs.git",
+ "subdir": "memcached-mixin"
+ }
+ },
+ "version": "d73aff453c9784cd6922119f3ce33d8d355a79e1",
+ "sum": "kl5GJvwAVAh0qIKsAFpNXm/jhHhv/8tBbpri7VCpZ2I="
+ },
+ {
+ "source": {
+ "git": {
+ "remote": "https://github.com/povilasv/coredns-mixin.git",
+ "subdir": ""
+ }
+ },
+ "version": "8b121c41b6c0741f1603b2d75ccd324050c4bd03",
+ "sum": "4CxwtfVX/OIS5w5+FkExG5evxiatOEMhaWChWaFc9S4="
+ },
+ {
+ "source": {
+ "git": {
+ "remote": "https://github.com/prometheus/mysqld_exporter.git",
+ "subdir": "mysqld-mixin"
+ }
+ },
+ "version": "503f1fa222f0afc74a1dcf4a0ef5a7c2dfa4d105",
+ "sum": "G69++5ExKgQ9niW0Owmw0orc8voP0Qll2WZJ1fHAqzE="
+ }
+ ],
+ "legacyImports": false
+}
diff --git a/atmosphere/jsonnet/legacy.libsonnet b/atmosphere/jsonnet/legacy.libsonnet
new file mode 100644
index 0000000..9eeaf42
--- /dev/null
+++ b/atmosphere/jsonnet/legacy.libsonnet
@@ -0,0 +1,491 @@
+{
+ 'ethtool-exporter': {
+ groups: [
+ {
+ name: 'rules',
+ rules: [
+ {
+ alert: 'EthernetReceiveDiscards',
+ expr: 'rate(node_net_ethtool{type="rx_discards"}[1m]) > 0',
+ labels: {
+ severity: 'warning',
+ },
+ },
+ ],
+ },
+ ],
+ },
+ 'ipmi-exporter': {
+ groups: [
+ {
+ name: 'rules',
+ rules: [
+ {
+ alert: 'IpmiCollectorDown',
+ expr: 'ipmi_up == 0',
+ },
+ ],
+ },
+ {
+ name: 'collectors-state-warning',
+ rules: [
+ {
+ alert: 'IpmiCurrent',
+ expr: 'ipmi_current_state == 1',
+ labels: {
+ severity: 'warning',
+ },
+ },
+ {
+ alert: 'IpmiFanSpeed',
+ expr: 'ipmi_fan_speed_state == 1',
+ labels: {
+ severity: 'warning',
+ },
+ },
+ {
+ alert: 'IpmiPower',
+ expr: 'ipmi_power_state == 1',
+ labels: {
+ severity: 'warning',
+ },
+ },
+ {
+ alert: 'IpmiSensor',
+ expr: 'ipmi_sensor_state == 1',
+ labels: {
+ severity: 'warning',
+ },
+ },
+ {
+ alert: 'IpmiTemperature',
+ expr: 'ipmi_temperature_state == 1',
+ labels: {
+ severity: 'warning',
+ },
+ },
+ {
+ alert: 'IpmiVoltage',
+ expr: 'ipmi_voltage_state == 1',
+ labels: {
+ severity: 'warning',
+ },
+ },
+ ],
+ },
+ {
+ name: 'collectors-state-critical',
+ rules: [
+ {
+ alert: 'IpmiCurrent',
+ expr: 'ipmi_current_state == 2',
+ labels: {
+ severity: 'critical',
+ },
+ },
+ {
+ alert: 'IpmiFanSpeed',
+ expr: 'ipmi_fan_speed_state == 2',
+ labels: {
+ severity: 'critical',
+ },
+ },
+ {
+ alert: 'IpmiPower',
+ expr: 'ipmi_power_state == 2',
+ labels: {
+ severity: 'critical',
+ },
+ },
+ {
+ alert: 'IpmiSensor',
+ expr: 'ipmi_sensor_state == 2',
+ labels: {
+ severity: 'critical',
+ },
+ },
+ {
+ alert: 'IpmiTemperature',
+ expr: 'ipmi_temperature_state == 2',
+ labels: {
+ severity: 'critical',
+ },
+ },
+ {
+ alert: 'IpmiVoltage',
+ expr: 'ipmi_voltage_state == 2',
+ labels: {
+ severity: 'critical',
+ },
+ },
+ ],
+ },
+ ],
+ },
+ 'node-exporter-local': {
+ groups: [
+ {
+ name: 'node',
+ rules: [
+ {
+ alert: 'NodeHighLoadAverage',
+ expr: 'node_load5 / count(node_cpu_seconds_total{mode="system"}) without (cpu, mode) > 1.5',
+ 'for': '30m',
+ labels: {
+ severity: 'warning',
+ },
+ },
+ {
+ alert: 'NodeHighMemoryUsage',
+ expr: '(node_memory_MemAvailable_bytes / node_memory_MemTotal_bytes) * 100 < 2.5',
+ 'for': '2m',
+ labels: {
+ severity: 'critical',
+ },
+ },
+ {
+ alert: 'NodeHighCpuUsage',
+ expr: "sum by(instance)(irate(node_cpu_seconds_total{mode='idle'}[5m])) < 1",
+ 'for': '2m',
+ labels: {
+ severity: 'warning',
+ },
+ },
+ {
+ alert: 'NodeLowEntropy',
+ expr: 'node_entropy_available_bits < 1000',
+ 'for': '5m',
+ labels: {
+ severity: 'warning',
+ },
+ },
+ ],
+ },
+ {
+ name: 'softnet',
+ rules: [
+ {
+ alert: 'NodeSoftNetTimesSqueezed',
+ expr: 'sum(rate(node_softnet_times_squeezed_total[1m])) by (instance) > 10',
+ 'for': '10m',
+ labels: {
+ severity: 'warning',
+ },
+ },
+ {
+ alert: 'NodeSoftNetDrops',
+ expr: 'sum(rate(node_softnet_dropped_total[1m])) by (instance) != 0',
+ 'for': '1m',
+ labels: {
+ severity: 'critical',
+ },
+ },
+ ],
+ },
+ ],
+ },
+ 'openstack-exporter': {
+ groups: [
+ {
+ name: 'cinder',
+ rules: [
+ {
+ alert: 'CinderAgentDown',
+ annotations: {
+ description: 'The service {{ $labels.exported_service }} running on {{ $labels.hostname }} is being reported as down.',
+ summary: '[{{ $labels.hostname }}] {{ $labels.exported_service }} down',
+ },
+ expr: 'openstack_cinder_agent_state != 1',
+ labels: {
+ severity: 'warning',
+ },
+ },
+ {
+ alert: 'CinderAgentDown',
+ annotations: {
+ description: 'The service {{ $labels.exported_service }} running on {{ $labels.hostname }} is being reported as down for 5 minutes. This can affect volume operations so it must be resolved as quickly as possible.',
+ summary: '[{{ $labels.hostname }}] {{ $labels.exported_service }} down',
+ },
+ expr: 'openstack_cinder_agent_state != 1',
+ 'for': '5m',
+ labels: {
+ severity: 'critical',
+ },
+ },
+ {
+ alert: 'CinderAgentDisabled',
+ annotations: {
+ description: 'The service {{ $labels.exported_service }} running on {{ $labels.hostname }} has been disabled for 60 minutes. This can affect volume operations so it must be resolved as quickly as possible.',
+ summary: '[{{ $labels.hostname }}] {{ $labels.exported_service }} disabled',
+ },
+ expr: 'openstack_cinder_agent_state{adminState!="enabled"}',
+ 'for': '1h',
+ labels: {
+ severity: 'warning',
+ },
+ },
+ {
+ alert: 'CinderVolumeInError',
+ annotations: {
+ description: 'The volume {{ $labels.id }} has been in ERROR state for over 24 hours. It must be cleaned up or removed in order to provide a consistent customer experience.',
+ summary: '[{{ $labels.id }}] Volume in ERROR statef endraw %}',
+ },
+ expr: 'openstack_cinder_volume_status{status=~"error.*"}',
+ 'for': '24h',
+ labels: {
+ severity: 'warning',
+ },
+ },
+ ],
+ },
+ {
+ name: 'neutron',
+ rules: [
+ {
+ alert: 'NeutronAgentDown',
+ annotations: {
+ description: 'The service {{ $labels.exported_service }} running on {{ $labels.hostname }} is being reported as down.',
+ summary: '[{{ $labels.hostname }}] {{ $labels.exported_service }} down',
+ },
+ expr: 'openstack_neutron_agent_state != 1',
+ labels: {
+ severity: 'warning',
+ },
+ },
+ {
+ alert: 'NeutronAgentDown',
+ annotations: {
+ description: 'The service {{ $labels.exported_service }} running on {{ $labels.hostname }} is being reported as down for 5 minutes. This can affect network operations so it must be resolved as quickly as possible.',
+ summary: '[{{ $labels.hostname }}] {{ $labels.exported_service }} down',
+ },
+ expr: 'openstack_neutron_agent_state != 1',
+ 'for': '5m',
+ labels: {
+ severity: 'critical',
+ },
+ },
+ {
+ alert: 'NeutronAgentDisabled',
+ annotations: {
+ description: 'The service {{ $labels.exported_service }} running on {{ $labels.hostname }} has been disabled for 60 minutes. This can affect network operations so it must be resolved as quickly as possible.',
+ summary: '[{{ $labels.hostname }}] {{ $labels.exported_service }} disabled',
+ },
+ expr: 'openstack_neutron_agent_state{adminState!="up"}',
+ 'for': '1h',
+ labels: {
+ severity: 'warning',
+ },
+ },
+ {
+ alert: 'NeutronBindingFailedPorts',
+ annotations: {
+ description: 'The NIC {{ $labels.mac_address }} of {{ $labels.device_owner }} has binding failed port now.',
+ summary: '[{{ $labels.device_owner }}] {{ $labels.mac_address }} binding failed',
+ },
+ expr: 'openstack_neutron_port{binding_vif_type="binding_failed"} != 0',
+ labels: {
+ severity: 'warning',
+ },
+ },
+ {
+ alert: 'NeutronNetworkOutOfIPs',
+ annotations: {
+ description: 'The subnet {{ $labels.subnet_name }} within {{ $labels.network_name }} is currently at {{ $value }}% utilization. If the IP addresses run out, it will impact the provisioning of new ports.',
+ summary: '[{{ $labels.network_name }}] {{ $labels.subnet_name }} running out of IPs',
+ },
+ expr: 'sum by (network_id) (openstack_neutron_network_ip_availabilities_used{project_id!=""}) / sum by (network_id) (openstack_neutron_network_ip_availabilities_total{project_id!=""}) * 100 > 80',
+ labels: {
+ severity: 'warning',
+ },
+ },
+ ],
+ },
+ {
+ name: 'nova',
+ rules: [
+ {
+ alert: 'NovaAgentDown',
+ annotations: {
+ description: 'The service {{ $labels.exported_service }} running on {{ $labels.hostname }} is being reported as down.',
+ summary: '[{{ $labels.hostname }}] {{ $labels.exported_service }} down',
+ },
+ expr: 'openstack_nova_agent_state != 1',
+ labels: {
+ severity: 'warning',
+ },
+ },
+ {
+ alert: 'NovaAgentDown',
+ annotations: {
+ description: 'The service {{ $labels.exported_service }} running on {{ $labels.hostname }} is being reported as down. This can affect compute operations so it must be resolved as quickly as possible.',
+ summary: '[{{ $labels.hostname }}] {{ $labels.exported_service }} down',
+ },
+ expr: 'openstack_nova_agent_state != 1',
+ 'for': '5m',
+ labels: {
+ severity: 'critical',
+ },
+ },
+ {
+ alert: 'NovaAgentDisabled',
+ annotations: {
+ description: 'The service {{ $labels.exported_service }} running on {{ $labels.hostname }} has been disabled for 60 minutes. This can affect compute operations so it must be resolved as quickly as possible.',
+ summary: '[{{ $labels.hostname }}] {{ $labels.exported_service }} disabled',
+ },
+ expr: 'openstack_nova_agent_state{adminState!="enabled"}',
+ 'for': '1h',
+ labels: {
+ severity: 'warning',
+ },
+ },
+ {
+ alert: 'NovaInstanceInError',
+ annotations: {
+ description: 'The instance {{ $labels.id }} has been in ERROR state for over 24 hours. It must be cleaned up or removed in order to provide a consistent customer experience.',
+ summary: '[{{ $labels.id }}] Instance in ERROR state',
+ },
+ expr: 'openstack_nova_server_status{status="ERROR"}',
+ 'for': '24h',
+ labels: {
+ severity: 'warning',
+ },
+ },
+ {
+ alert: 'NovaFailureRisk',
+ annotations: {
+ description: 'The cloud capacity will be at {{ $value }} in the event of the failure of a single hypervisor which puts the cloud at risk of not being able to recover should any hypervisor failures occur. Please ensure that adequate amount of infrastructure is assigned to this deployment to prevent this.',
+ summary: '[nova] Failure risk',
+ },
+ expr: '(sum(openstack_nova_memory_available_bytes-openstack_nova_memory_used_bytes) - max(openstack_nova_memory_used_bytes)) / sum(openstack_nova_memory_available_bytes-openstack_nova_memory_used_bytes) * 100 < 0.25',
+ 'for': '6h',
+ labels: {
+ severity: 'warning',
+ },
+ },
+ {
+ alert: 'NovaCapacity',
+ annotations: {
+ description: 'The cloud capacity is currently at `{{ $value }}` which means there is a risk of running out of capacity due to the timeline required to add new nodes. Please ensure that adequate amount of infrastructure is assigned to this deployment to prevent this.',
+ summary: '[nova] Capacity risk',
+ },
+ expr: 'sum ( openstack_nova_memory_used_bytes + on(hostname) group_left(adminState) (0 * openstack_nova_agent_state{exported_service="nova-compute",adminState="enabled"}) ) / sum ( openstack_nova_memory_available_bytes + on(hostname) group_left(adminState) (0 * openstack_nova_agent_state{exported_service="nova-compute",adminState="enabled"}) ) * 100 > 75',
+ 'for': '6h',
+ labels: {
+ severity: 'warning',
+ },
+ },
+ ],
+ },
+ ],
+ },
+ rabbitmq: {
+ groups: [
+ {
+ name: 'recording',
+ rules: [
+ {
+ record: 'rabbitmq:usage:memory',
+ labels: {
+ job: 'rabbitmq',
+ },
+ expr: 'sum without (job) ( rabbitmq_process_resident_memory_bytes ) / sum without ( container, pod, job, namespace, node, resource, uid, unit ) ( label_replace( cluster:namespace:pod_memory:active:kube_pod_container_resource_limits, "instance", "$1", "pod", "(.*)" ) )',
+ },
+ ],
+ },
+ {
+ name: 'alarms',
+ rules: [
+ {
+ alert: 'RabbitmqAlarmFreeDiskSpace',
+ expr: 'rabbitmq_alarms_free_disk_space_watermark == 1',
+ labels: {
+ severity: 'critical',
+ },
+ },
+ {
+ alert: 'RabbitmqAlarmMemoryUsedWatermark',
+ expr: 'rabbitmq_alarms_memory_used_watermark == 1',
+ labels: {
+ severity: 'critical',
+ },
+ },
+ {
+ alert: 'RabbitmqAlarmFileDescriptorLimit',
+ expr: 'rabbitmq_alarms_file_descriptor_limit == 1',
+ labels: {
+ severity: 'critical',
+ },
+ },
+ ],
+ },
+ {
+ name: 'limits',
+ rules: [
+ {
+ alert: 'RabbitmqMemoryHigh',
+ expr: 'rabbitmq:usage:memory > 0.80',
+ labels: {
+ severity: 'warning',
+ },
+ },
+ {
+ alert: 'RabbitmqMemoryHigh',
+ expr: 'rabbitmq:usage:memory > 0.95',
+ labels: {
+ severity: 'critical',
+ },
+ },
+ {
+ alert: 'RabbitmqFileDescriptorsUsage',
+ expr: 'rabbitmq_process_open_fds / rabbitmq_process_max_fds > 0.80',
+ labels: {
+ severity: 'warning',
+ },
+ },
+ {
+ alert: 'RabbitmqFileDescriptorsUsage',
+ expr: 'rabbitmq_process_open_fds / rabbitmq_process_max_fds > 0.95',
+ labels: {
+ severity: 'critical',
+ },
+ },
+ {
+ alert: 'RabbitmqTcpSocketsUsage',
+ expr: 'rabbitmq_process_open_tcp_sockets / rabbitmq_process_max_tcp_sockets > 0.80',
+ labels: {
+ severity: 'warning',
+ },
+ },
+ {
+ alert: 'RabbitmqTcpSocketsUsage',
+ expr: 'rabbitmq_process_open_tcp_sockets / rabbitmq_process_max_tcp_sockets > 0.95',
+ labels: {
+ severity: 'critical',
+ },
+ },
+ ],
+ },
+ {
+ name: 'msgs',
+ rules: [
+ {
+ alert: 'RabbitmqUnackedMessages',
+ expr: 'sum(rabbitmq_queue_messages_unacked) BY (queue) > 1000',
+ 'for': '5m',
+ labels: {
+ severity: 'warning',
+ },
+ },
+ {
+ alert: 'RabbitmqUnackedMessages',
+ expr: 'sum(rabbitmq_queue_messages_unacked) BY (queue) > 1000',
+ 'for': '1h',
+ labels: {
+ severity: 'critical',
+ },
+ },
+ ],
+ },
+ ],
+ },
+}
diff --git a/atmosphere/jsonnet/mysqld.libsonnet b/atmosphere/jsonnet/mysqld.libsonnet
new file mode 100644
index 0000000..9ddf46b
--- /dev/null
+++ b/atmosphere/jsonnet/mysqld.libsonnet
@@ -0,0 +1,54 @@
+local addAlerts = {
+ prometheusAlerts+::
+ {
+ groups+: [
+ {
+ name: 'mysqld-extras',
+
+ rules: [
+ {
+ alert: 'MysqlTooManyConnections',
+ 'for': '1m',
+ expr: |||
+ max_over_time(mysql_global_status_threads_connected[1m]) / mysql_global_variables_max_connections * 100 > 80
+ |||,
+ labels: {
+ severity: 'warning',
+ },
+ },
+ {
+ alert: 'MysqlHighThreadsRunning',
+ 'for': '1m',
+ expr: |||
+ max_over_time(mysql_global_status_threads_running[1m]) / mysql_global_variables_max_connections * 100 > 60
+ |||,
+ labels: {
+ severity: 'warning',
+ },
+ },
+ {
+ alert: 'MysqlSlowQueries',
+ 'for': '2m',
+ expr: |||
+ increase(mysql_global_status_slow_queries[1m]) > 0
+ |||,
+ labels: {
+ severity: 'warning',
+ },
+ },
+ ],
+ },
+ ],
+ },
+};
+
+{
+ prometheusAlerts: {
+ groups:
+ (
+ std.parseYaml(importstr 'vendor/mysqld-mixin/alerts/general.yaml').groups +
+ std.parseYaml(importstr 'vendor/mysqld-mixin/alerts/galera.yaml').groups +
+ std.parseYaml(importstr 'vendor/mysqld-mixin/rules/rules.yaml').groups
+ ),
+ },
+} + addAlerts
diff --git a/atmosphere/jsonnet/rules.jsonnet b/atmosphere/jsonnet/rules.jsonnet
new file mode 100644
index 0000000..0589764
--- /dev/null
+++ b/atmosphere/jsonnet/rules.jsonnet
@@ -0,0 +1,13 @@
+local legacy = import 'legacy.libsonnet';
+
+local ceph = import 'ceph.libsonnet';
+local mysqld = import 'mysqld.libsonnet';
+local coredns = import 'vendor/coredns-mixin/mixin.libsonnet';
+local memcached = import 'vendor/memcached-mixin/mixin.libsonnet';
+
+{
+ ceph: ceph.prometheusAlerts,
+ coredns: coredns.prometheusAlerts,
+ memcached: memcached.prometheusAlerts,
+ 'percona-xtradb-pxc': mysqld.prometheusAlerts,
+} + legacy
diff --git a/atmosphere/jsonnet/vendor/ceph-mixin b/atmosphere/jsonnet/vendor/ceph-mixin
new file mode 120000
index 0000000..dbbe333
--- /dev/null
+++ b/atmosphere/jsonnet/vendor/ceph-mixin
@@ -0,0 +1 @@
+github.com/ceph/ceph/monitoring/ceph-mixin
\ No newline at end of file
diff --git a/atmosphere/jsonnet/vendor/coredns-mixin b/atmosphere/jsonnet/vendor/coredns-mixin
new file mode 120000
index 0000000..56c22bd
--- /dev/null
+++ b/atmosphere/jsonnet/vendor/coredns-mixin
@@ -0,0 +1 @@
+github.com/povilasv/coredns-mixin
\ No newline at end of file
diff --git a/atmosphere/jsonnet/vendor/github.com/ceph/ceph/monitoring/ceph-mixin/.gitignore b/atmosphere/jsonnet/vendor/github.com/ceph/ceph/monitoring/ceph-mixin/.gitignore
new file mode 100644
index 0000000..22d0d82
--- /dev/null
+++ b/atmosphere/jsonnet/vendor/github.com/ceph/ceph/monitoring/ceph-mixin/.gitignore
@@ -0,0 +1 @@
+vendor
diff --git a/atmosphere/jsonnet/vendor/github.com/ceph/ceph/monitoring/ceph-mixin/CMakeLists.txt b/atmosphere/jsonnet/vendor/github.com/ceph/ceph/monitoring/ceph-mixin/CMakeLists.txt
new file mode 100644
index 0000000..e63c740
--- /dev/null
+++ b/atmosphere/jsonnet/vendor/github.com/ceph/ceph/monitoring/ceph-mixin/CMakeLists.txt
@@ -0,0 +1,57 @@
+if(WITH_GRAFANA)
+ set(CEPH_GRAFANA_DASHBOARDS_DIR "${CMAKE_INSTALL_SYSCONFDIR}/grafana/dashboards/ceph-dashboard"
+ CACHE PATH "Location for grafana dashboards")
+ file(GLOB CEPH_GRAFANA_DASHBOARDS "dashboards_out/*.json")
+ install(FILES
+ ${CEPH_GRAFANA_DASHBOARDS}
+ DESTINATION ${CEPH_GRAFANA_DASHBOARDS_DIR})
+ if(WITH_TESTS)
+ set(CEPH_BUILD_VIRTUALENV $ENV{TMPDIR})
+ if(NOT CEPH_BUILD_VIRTUALENV)
+ include(AddCephTest)
+ set(CEPH_BUILD_VIRTUALENV ${CMAKE_BINARY_DIR})
+
+ add_test(NAME jsonnet-bundler-build
+ COMMAND ${CMAKE_CURRENT_SOURCE_DIR}/jsonnet-bundler-build.sh ${CMAKE_CURRENT_BINARY_DIR})
+ set_property(TEST jsonnet-bundler-build PROPERTY
+ FIXTURES_SETUP jsonnet-bundler)
+ add_test(NAME jsonnet-bundler-cleanup
+ COMMAND rm -rf jsonnet-bundler ${CMAKE_CURRENT_BINARY_DIR}/jb)
+ set_property(TEST jsonnet-bundler-cleanup PROPERTY
+ FIXTURES_CLEANUP jsonnet-bundler)
+
+ add_tox_test(grafana-lint TOX_ENVS lint)
+ add_tox_test(jsonnet-lint TOX_ENVS jsonnet-lint)
+ set_property(TEST run-tox-jsonnet-lint PROPERTY
+ FIXTURES_REQUIRED venv-for-jsonnet-lint)
+ add_tox_test(jsonnet-check TOX_ENVS jsonnet-check)
+ set_property(TEST run-tox-jsonnet-check PROPERTY
+ FIXTURES_REQUIRED venv-for-jsonnet-check jsonnet-bundler)
+
+ add_tox_test(alerts-check TOX_ENVS alerts-check)
+ add_tox_test(alerts-lint TOX_ENVS alerts-lint)
+ add_tox_test(promql-query-test TOX_ENVS promql-query-test)
+ endif()
+
+ if(DEFINED PROMTOOL_EXECUTABLE)
+ set(promtool_executable_checked TRUE)
+ endif()
+
+ find_program(PROMTOOL_EXECUTABLE promtool)
+ if(PROMTOOL_EXECUTABLE)
+ execute_process(
+ COMMAND ${PROMTOOL_EXECUTABLE} test rules /dev/null
+ RESULT_VARIABLE rc
+ OUTPUT_QUIET)
+ if(NOT rc)
+ add_ceph_test(run-promtool-unittests
+ ${PROMTOOL_EXECUTABLE} test rules ${CMAKE_SOURCE_DIR}/monitoring/ceph-mixin/tests_alerts/test_alerts.yml)
+ elseif(NOT promtool_executable_checked)
+ message(WARNING "'${PROMTOOL_EXECUTABLE} test rules' does not work, "
+ "please use a newer prometheus")
+ endif()
+ elseif(NOT promtool_executable_checked)
+ message(WARNING "run-promtool-unittests is skipped due to missing promtool")
+ endif()
+ endif()
+endif()
diff --git a/atmosphere/jsonnet/vendor/github.com/ceph/ceph/monitoring/ceph-mixin/Makefile b/atmosphere/jsonnet/vendor/github.com/ceph/ceph/monitoring/ceph-mixin/Makefile
new file mode 100644
index 0000000..0cb7aa5
--- /dev/null
+++ b/atmosphere/jsonnet/vendor/github.com/ceph/ceph/monitoring/ceph-mixin/Makefile
@@ -0,0 +1,24 @@
+all: fmt generate lint test
+
+fmt:
+ ./lint-jsonnet.sh -i
+
+generate: dashboards_out
+
+vendor: jsonnetfile.lock.json
+ tox -ejsonnet-bundler-install
+
+dashboards_out: vendor dashboards
+ tox -ejsonnet-fix
+
+lint:
+ tox -ejsonnet-lint
+ tox -ealerts-lint
+
+test: generate
+ tox -ejsonnet-check
+ tox -epromql-query-test
+ tox -ealerts-check
+check: test
+
+.PHONY: all fmt generate lint test check
diff --git a/atmosphere/jsonnet/vendor/github.com/ceph/ceph/monitoring/ceph-mixin/README.md b/atmosphere/jsonnet/vendor/github.com/ceph/ceph/monitoring/ceph-mixin/README.md
new file mode 100644
index 0000000..4772021
--- /dev/null
+++ b/atmosphere/jsonnet/vendor/github.com/ceph/ceph/monitoring/ceph-mixin/README.md
@@ -0,0 +1,76 @@
+## Prometheus Monitoring Mixin for Ceph
+A set of Grafana dashboards and Prometheus alerts for Ceph.
+
+All the Grafana dashboards are already generated in the `dashboards_out`
+directory and alerts in the `prometheus_alerts.yml` file.
+
+You can use the Grafana dashboards and alerts with Jsonnet like any other
+prometheus mixin. You can find more resources about mixins in general on
+[monitoring.mixins.dev](https://monitoring.mixins.dev/).
+
+### Grafana dashboards for Ceph
+In `dashboards_out` you can find a collection of
+[Grafana](https://grafana.com/grafana) dashboards for Ceph Monitoring.
+
+These dashboards are based on metrics collected
+from [prometheus](https://prometheus.io/) scraping the [prometheus mgr
+plugin](http://docs.ceph.com/en/latest/mgr/prometheus/) and the
+[node_exporter (0.17.0)](https://github.com/prometheus/node_exporter).
+
+
+##### Recommended versions:
+-grafana 8.3.5
+ -grafana-piechart-panel 1.6.2
+ -grafana-status-panel 1.0.11
+
+#### Requirements
+
+- [Status Panel](https://grafana.com/plugins/vonage-status-panel) installed on
+ your Grafana instance
+- [Pie Chart Panel](https://grafana.com/grafana/plugins/grafana-piechart-panel/)
+ installed on your Grafana instance
+
+
+### Prometheus alerts
+In `prometheus_alerts.libsonnet` you'll find a set of Prometheus
+alert rules that should provide a decent set of default alerts for a
+Ceph cluster. After building them with jsonnet put this file in place according to your Prometheus
+configuration (wherever the `rules` configuration stanza points).
+
+### Multi-cluster support
+Ceph-mixin supports dashboards and alerts across multiple clusters.
+To enable this feature you need to configure the following in `config.libsonnnet`:
+
+```
+showMultiCluster: true,
+clusterLabel: '<your cluster label>',
+```
+
+##### Recommended versions:
+-prometheus v2.33.4
+
+#### SNMP
+Ceph provides a MIB (CEPH-PROMETHEUS-ALERT-MIB.txt) to support sending
+Prometheus alerts to an SNMP management platform. The translation from
+Prometheus alert to SNMP trap requires the Prometheus alert to contain an OID
+that maps to a definition within the MIB. When making changes to the Prometheus
+alert rules file, developers should include any necessary changes to the MIB.
+
+
+##### Recommended:
+-alertmanager 0.16.2
+
+### Building from Jsonnet
+
+- Install [jsonnet](https://jsonnet.org/) (at least v0.18.0)
+ - By installing the package `jsonnet` in most of the distro and
+ `golang-github-google-jsonnet` in fedora
+- Install [jsonnet-bundler](https://github.com/jsonnet-bundler/jsonnet-bundler)
+
+To rebuild all the generated files, you can run `tox -egrafonnet-fix`.
+
+The jsonnet code located in this directory depends on some Jsonnet third party
+libraries. To update those libraries you can run `jb update` and then update
+the generated files using `tox -egrafonnet-fix`.
+
+##### Any upgrade or downgrade to different major versions of the recommended tools mentioned above is not supported.
diff --git a/atmosphere/jsonnet/vendor/github.com/ceph/ceph/monitoring/ceph-mixin/alerts.jsonnet b/atmosphere/jsonnet/vendor/github.com/ceph/ceph/monitoring/ceph-mixin/alerts.jsonnet
new file mode 100644
index 0000000..43826ee
--- /dev/null
+++ b/atmosphere/jsonnet/vendor/github.com/ceph/ceph/monitoring/ceph-mixin/alerts.jsonnet
@@ -0,0 +1 @@
+std.manifestYamlDoc((import 'alerts.libsonnet').prometheusAlerts, indent_array_in_object=true, quote_keys=false)
diff --git a/atmosphere/jsonnet/vendor/github.com/ceph/ceph/monitoring/ceph-mixin/alerts.libsonnet b/atmosphere/jsonnet/vendor/github.com/ceph/ceph/monitoring/ceph-mixin/alerts.libsonnet
new file mode 100644
index 0000000..c2d39e2
--- /dev/null
+++ b/atmosphere/jsonnet/vendor/github.com/ceph/ceph/monitoring/ceph-mixin/alerts.libsonnet
@@ -0,0 +1,4 @@
+{
+ prometheusAlerts+:: (import 'prometheus_alerts.libsonnet') +
+ { _config:: $._config },
+}
diff --git a/atmosphere/jsonnet/vendor/github.com/ceph/ceph/monitoring/ceph-mixin/config.libsonnet b/atmosphere/jsonnet/vendor/github.com/ceph/ceph/monitoring/ceph-mixin/config.libsonnet
new file mode 100644
index 0000000..7ee1210
--- /dev/null
+++ b/atmosphere/jsonnet/vendor/github.com/ceph/ceph/monitoring/ceph-mixin/config.libsonnet
@@ -0,0 +1,11 @@
+{
+ _config+:: {
+ dashboardTags: ['ceph-mixin'],
+
+ clusterLabel: 'cluster',
+ showMultiCluster: false,
+
+ CephNodeNetworkPacketDropsThreshold: 0.005,
+ CephNodeNetworkPacketDropsPerSec: 10,
+ },
+}
diff --git a/atmosphere/jsonnet/vendor/github.com/ceph/ceph/monitoring/ceph-mixin/dashboards.jsonnet b/atmosphere/jsonnet/vendor/github.com/ceph/ceph/monitoring/ceph-mixin/dashboards.jsonnet
new file mode 100644
index 0000000..9d913ed
--- /dev/null
+++ b/atmosphere/jsonnet/vendor/github.com/ceph/ceph/monitoring/ceph-mixin/dashboards.jsonnet
@@ -0,0 +1,6 @@
+local dashboards = (import 'mixin.libsonnet').grafanaDashboards;
+
+{
+ [name]: dashboards[name]
+ for name in std.objectFields(dashboards)
+}
diff --git a/atmosphere/jsonnet/vendor/github.com/ceph/ceph/monitoring/ceph-mixin/dashboards.libsonnet b/atmosphere/jsonnet/vendor/github.com/ceph/ceph/monitoring/ceph-mixin/dashboards.libsonnet
new file mode 100644
index 0000000..5cae183
--- /dev/null
+++ b/atmosphere/jsonnet/vendor/github.com/ceph/ceph/monitoring/ceph-mixin/dashboards.libsonnet
@@ -0,0 +1,10 @@
+{
+ grafanaDashboards+::
+ (import 'dashboards/cephfs.libsonnet') +
+ (import 'dashboards/host.libsonnet') +
+ (import 'dashboards/osd.libsonnet') +
+ (import 'dashboards/pool.libsonnet') +
+ (import 'dashboards/rbd.libsonnet') +
+ (import 'dashboards/rgw.libsonnet') +
+ { _config:: $._config },
+}
diff --git a/atmosphere/jsonnet/vendor/github.com/ceph/ceph/monitoring/ceph-mixin/dashboards/cephfs.libsonnet b/atmosphere/jsonnet/vendor/github.com/ceph/ceph/monitoring/ceph-mixin/dashboards/cephfs.libsonnet
new file mode 100644
index 0000000..d12d9f4
--- /dev/null
+++ b/atmosphere/jsonnet/vendor/github.com/ceph/ceph/monitoring/ceph-mixin/dashboards/cephfs.libsonnet
@@ -0,0 +1,89 @@
+local g = import 'grafonnet/grafana.libsonnet';
+
+(import 'utils.libsonnet') {
+ 'cephfs-overview.json':
+ $.dashboardSchema(
+ 'MDS Performance',
+ '',
+ 'tbO9LAiZz',
+ 'now-1h',
+ '30s',
+ 16,
+ $._config.dashboardTags,
+ ''
+ )
+ .addAnnotation(
+ $.addAnnotationSchema(
+ 1,
+ '-- Grafana --',
+ true,
+ true,
+ 'rgba(0, 211, 255, 1)',
+ 'Annotations & Alerts',
+ 'dashboard'
+ )
+ )
+ .addRequired(
+ type='grafana', id='grafana', name='Grafana', version='5.3.2'
+ )
+ .addRequired(
+ type='panel', id='graph', name='Graph', version='5.0.0'
+ )
+ .addTemplate(
+ g.template.datasource('datasource', 'prometheus', 'default', label='Data Source')
+ )
+ .addTemplate(
+ $.addClusterTemplate()
+ )
+ .addTemplate(
+ $.addJobTemplate()
+ )
+ .addTemplate(
+ $.addTemplateSchema('mds_servers',
+ '$datasource',
+ 'label_values(ceph_mds_inodes{%(matchers)s}, ceph_daemon)' % $.matchers(),
+ 1,
+ true,
+ 1,
+ 'MDS Server',
+ '')
+ )
+ .addPanels([
+ $.addRowSchema(false, true, 'MDS Performance') + { gridPos: { x: 0, y: 0, w: 24, h: 1 } },
+ $.simpleGraphPanel(
+ {},
+ 'MDS Workload - $mds_servers',
+ '',
+ 'none',
+ 'Reads(-) / Writes (+)',
+ 0,
+ 'sum(rate(ceph_objecter_op_r{%(matchers)s, ceph_daemon=~"($mds_servers).*"}[$__rate_interval]))' % $.matchers(),
+ 'Read Ops',
+ 0,
+ 1,
+ 12,
+ 9
+ )
+ .addTarget($.addTargetSchema(
+ 'sum(rate(ceph_objecter_op_w{%(matchers)s, ceph_daemon=~"($mds_servers).*"}[$__rate_interval]))' % $.matchers(),
+ 'Write Ops'
+ ))
+ .addSeriesOverride(
+ { alias: '/.*Reads/', transform: 'negative-Y' }
+ ),
+ $.simpleGraphPanel(
+ {},
+ 'Client Request Load - $mds_servers',
+ '',
+ 'none',
+ 'Client Requests',
+ 0,
+ 'ceph_mds_server_handle_client_request{%(matchers)s, ceph_daemon=~"($mds_servers).*"}' % $.matchers(),
+ '{{ceph_daemon}}',
+ 12,
+ 1,
+ 12,
+ 9
+ ),
+ ]),
+}
diff --git a/atmosphere/jsonnet/vendor/github.com/ceph/ceph/monitoring/ceph-mixin/dashboards/host.libsonnet b/atmosphere/jsonnet/vendor/github.com/ceph/ceph/monitoring/ceph-mixin/dashboards/host.libsonnet
new file mode 100644
index 0000000..3e0b31f
--- /dev/null
+++ b/atmosphere/jsonnet/vendor/github.com/ceph/ceph/monitoring/ceph-mixin/dashboards/host.libsonnet
@@ -0,0 +1,723 @@
+local g = import 'grafonnet/grafana.libsonnet';
+
+(import 'utils.libsonnet') {
+ 'hosts-overview.json':
+ $.dashboardSchema(
+ 'Host Overview',
+ '',
+ 'y0KGL0iZz',
+ 'now-1h',
+ '30s',
+ 16,
+ $._config.dashboardTags,
+ '',
+ )
+ .addRequired(
+ type='grafana', id='grafana', name='Grafana', version='5.3.2'
+ )
+ .addRequired(
+ type='panel', id='graph', name='Graph', version='5.0.0'
+ )
+ .addRequired(
+ type='panel', id='singlestat', name='Singlestat', version='5.0.0'
+ )
+ .addAnnotation(
+ $.addAnnotationSchema(
+ 1,
+ '-- Grafana --',
+ true,
+ true,
+ 'rgba(0, 211, 255, 1)',
+ 'Annotations & Alerts',
+ 'dashboard'
+ )
+ )
+ .addTemplate(
+ g.template.datasource('datasource',
+ 'prometheus',
+ 'default',
+ label='Data Source')
+ )
+ .addTemplate(
+ $.addClusterTemplate()
+ )
+ .addTemplate(
+ $.addJobTemplate()
+ )
+ .addTemplate(
+ $.addTemplateSchema('osd_hosts',
+ '$datasource',
+ 'label_values(ceph_disk_occupation{%(matchers)s}, exported_instance)' % $.matchers(),
+ 1,
+ true,
+ 1,
+ null,
+ '([^.]*).*')
+ )
+ .addTemplate(
+ $.addTemplateSchema('mon_hosts',
+ '$datasource',
+ 'label_values(ceph_mon_metadata{%(matchers)s}, ceph_daemon)' % $.matchers(),
+ 1,
+ true,
+ 1,
+ null,
+ 'mon.(.*)')
+ )
+ .addTemplate(
+ $.addTemplateSchema('mds_hosts',
+ '$datasource',
+ 'label_values(ceph_mds_inodes{%(matchers)s}, ceph_daemon)' % $.matchers(),
+ 1,
+ true,
+ 1,
+ null,
+ 'mds.(.*)')
+ )
+ .addTemplate(
+ $.addTemplateSchema('rgw_hosts',
+ '$datasource',
+ 'label_values(ceph_rgw_metadata{%(matchers)s}, ceph_daemon)' % $.matchers(),
+ 1,
+ true,
+ 1,
+ null,
+ 'rgw.(.*)')
+ )
+ .addPanels([
+ $.simpleSingleStatPanel(
+ 'none',
+ 'OSD Hosts',
+ '',
+ 'current',
+ 'count(sum by (hostname) (ceph_osd_metadata{%(matchers)s}))' % $.matchers(),
+ true,
+ 'time_series',
+ 0,
+ 0,
+ 4,
+ 5
+ ),
+ $.simpleSingleStatPanel(
+ 'percentunit',
+ 'AVG CPU Busy',
+ 'Average CPU busy across all hosts (OSD, RGW, MON etc) within the cluster',
+ 'current',
+ |||
+ avg(1 - (
+ avg by(instance) (
+ rate(node_cpu_seconds_total{mode='idle',instance=~"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*"}[$__rate_interval]) or
+ rate(node_cpu{mode='idle',instance=~"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*"}[$__rate_interval])
+ )
+ ))
+ |||,
+ true,
+ 'time_series',
+ 4,
+ 0,
+ 4,
+ 5
+ ),
+ $.simpleSingleStatPanel(
+ 'percentunit',
+ 'AVG RAM Utilization',
+ 'Average Memory Usage across all hosts in the cluster (excludes buffer/cache usage)',
+ 'current',
+ |||
+ avg ((
+ (
+ node_memory_MemTotal{instance=~"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*"} or
+ node_memory_MemTotal_bytes{instance=~"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*"}
+ ) - ((
+ node_memory_MemFree{instance=~"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*"} or
+ node_memory_MemFree_bytes{instance=~"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*"}) +
+ (
+ node_memory_Cached{instance=~"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*"} or
+ node_memory_Cached_bytes{instance=~"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*"}
+ ) + (
+ node_memory_Buffers{instance=~"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*"} or
+ node_memory_Buffers_bytes{instance=~"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*"}
+ ) + (
+ node_memory_Slab{instance=~"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*"} or
+ node_memory_Slab_bytes{instance=~"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*"}
+ )
+ )
+ ) / (
+ node_memory_MemTotal{instance=~"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*"} or
+ node_memory_MemTotal_bytes{instance=~"($osd_hosts|$rgw_hosts|$mon_hosts|$mds_hosts).*"}
+ ))
+ |||,
+ true,
+ 'time_series',
+ 8,
+ 0,
+ 4,
+ 5
+ ),
+ $.simpleSingleStatPanel(
+ 'none',
+ 'Physical IOPS',
+ 'IOPS Load at the device as reported by the OS on all OSD hosts',
+ 'current',
+ |||
+ sum ((
+ rate(node_disk_reads_completed{instance=~"($osd_hosts).*"}[$__rate_interval]) or
+ rate(node_disk_reads_completed_total{instance=~"($osd_hosts).*"}[$__rate_interval])
+ ) + (
+ rate(node_disk_writes_completed{instance=~"($osd_hosts).*"}[$__rate_interval]) or
+ rate(node_disk_writes_completed_total{instance=~"($osd_hosts).*"}[$__rate_interval])
+ ))
+ |||,
+ true,
+ 'time_series',
+ 12,
+ 0,
+ 4,
+ 5
+ ),
+ $.simpleSingleStatPanel(
+ 'percent',
+ 'AVG Disk Utilization',
+ 'Average Disk utilization for all OSD data devices (i.e. excludes journal/WAL)',
+ 'current',
+ |||
+ avg (
+ label_replace(
+ (rate(node_disk_io_time_ms[$__rate_interval]) / 10 ) or
+ (rate(node_disk_io_time_seconds_total[$__rate_interval]) * 100),
+ "instance", "$1", "instance", "([^.:]*).*"
+ ) * on(instance, device) group_left(ceph_daemon) label_replace(
+ label_replace(
+ ceph_disk_occupation_human{%(matchers)s, instance=~"($osd_hosts).*"},
+ "device", "$1", "device", "/dev/(.*)"
+ ), "instance", "$1", "instance", "([^.:]*).*"
+ )
+ )
+ ||| % $.matchers(),
+ true,
+ 'time_series',
+ 16,
+ 0,
+ 4,
+ 5
+ ),
+ $.simpleSingleStatPanel(
+ 'bytes',
+ 'Network Load',
+ 'Total send/receive network load across all hosts in the ceph cluster',
+ 'current',
+ |||
+ sum (
+ (
+ rate(node_network_receive_bytes{instance=~"($osd_hosts|mon_hosts|mds_hosts|rgw_hosts).*",device!="lo"}[$__rate_interval]) or
+ rate(node_network_receive_bytes_total{instance=~"($osd_hosts|mon_hosts|mds_hosts|rgw_hosts).*",device!="lo"}[$__rate_interval])
+ ) unless on (device, instance)
+ label_replace((bonding_slaves > 0), "device", "$1", "master", "(.+)")
+ ) +
+ sum (
+ (
+ rate(node_network_transmit_bytes{instance=~"($osd_hosts|mon_hosts|mds_hosts|rgw_hosts).*",device!="lo"}[$__rate_interval]) or
+ rate(node_network_transmit_bytes_total{instance=~"($osd_hosts|mon_hosts|mds_hosts|rgw_hosts).*",device!="lo"}[$__rate_interval])
+ ) unless on (device, instance)
+ label_replace((bonding_slaves > 0), "device", "$1", "master", "(.+)")
+ )
+ |||,
+ true,
+ 'time_series',
+ 20,
+ 0,
+ 4,
+ 5
+ ),
+ $.simpleGraphPanel(
+ {},
+ 'CPU Busy - Top 10 Hosts',
+ 'Show the top 10 busiest hosts by cpu',
+ 'percent',
+ null,
+ 0,
+ |||
+ topk(10,
+ 100 * (
+ 1 - (
+ avg by(instance) (
+ rate(node_cpu_seconds_total{mode='idle',instance=~"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*"}[$__rate_interval]) or
+ rate(node_cpu{mode='idle',instance=~"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*"}[$__rate_interval])
+ )
+ )
+ )
+ )
+ |||,
+ '{{instance}}',
+ 0,
+ 5,
+ 12,
+ 9
+ ),
+ $.simpleGraphPanel(
+ {},
+ 'Network Load - Top 10 Hosts',
+ 'Top 10 hosts by network load',
+ 'Bps',
+ null,
+ 0,
+ |||
+ topk(10, (sum by(instance) (
+ (
+ rate(node_network_receive_bytes{instance=~"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*",device!="lo"}[$__rate_interval]) or
+ rate(node_network_receive_bytes_total{instance=~"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*",device!="lo"}[$__rate_interval])
+ ) +
+ (
+ rate(node_network_transmit_bytes{instance=~"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*",device!="lo"}[$__rate_interval]) or
+ rate(node_network_transmit_bytes_total{instance=~"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*",device!="lo"}[$__rate_interval])
+ ) unless on (device, instance)
+ label_replace((bonding_slaves > 0), "device", "$1", "master", "(.+)"))
+ ))
+ |||,
+ '{{instance}}',
+ 12,
+ 5,
+ 12,
+ 9
+ ),
+ ]),
+ 'host-details.json':
+ $.dashboardSchema(
+ 'Host Details',
+ '',
+ 'rtOg0AiWz',
+ 'now-1h',
+ '30s',
+ 16,
+ $._config.dashboardTags + ['overview'],
+ ''
+ )
+ .addRequired(
+ type='grafana', id='grafana', name='Grafana', version='5.3.2'
+ )
+ .addRequired(
+ type='panel', id='graph', name='Graph', version='5.0.0'
+ )
+ .addRequired(
+ type='panel', id='singlestat', name='Singlestat', version='5.0.0'
+ )
+ .addAnnotation(
+ $.addAnnotationSchema(
+ 1, '-- Grafana --', true, true, 'rgba(0, 211, 255, 1)', 'Annotations & Alerts', 'dashboard'
+ )
+ )
+ .addTemplate(
+ g.template.datasource('datasource', 'prometheus', 'default', label='Data Source')
+ )
+ .addTemplate(
+ $.addClusterTemplate()
+ )
+ .addTemplate(
+ $.addJobTemplate()
+ )
+ .addTemplate(
+ $.addTemplateSchema('ceph_hosts',
+ '$datasource',
+ 'label_values({%(clusterMatcher)s}, instance)' % $.matchers(),
+ 1,
+ false,
+ 3,
+ 'Hostname',
+ '([^.:]*).*')
+ )
+ .addPanels([
+ $.addRowSchema(false, true, '$ceph_hosts System Overview') + { gridPos: { x: 0, y: 0, w: 24, h: 1 } },
+ $.simpleSingleStatPanel(
+ 'none',
+ 'OSDs',
+ '',
+ 'current',
+ "count(sum by (ceph_daemon) (ceph_osd_metadata{%(matchers)s, hostname='$ceph_hosts'}))" % $.matchers(),
+ null,
+ 'time_series',
+ 0,
+ 1,
+ 3,
+ 5
+ ),
+ $.simpleGraphPanel(
+ {
+ interrupt: '#447EBC',
+ steal: '#6D1F62',
+ system: '#890F02',
+ user: '#3F6833',
+ wait: '#C15C17',
+ },
+ 'CPU Utilization',
+ "Shows the CPU breakdown. When multiple servers are selected, only the first host's cpu data is shown",
+ 'percent',
+ '% Utilization',
+ null,
+ |||
+ sum by (mode) (
+ rate(node_cpu{instance=~"($ceph_hosts)([\\\\.:].*)?", mode=~"(irq|nice|softirq|steal|system|user|iowait)"}[$__rate_interval]) or
+ rate(node_cpu_seconds_total{instance=~"($ceph_hosts)([\\\\.:].*)?", mode=~"(irq|nice|softirq|steal|system|user|iowait)"}[$__rate_interval])
+ ) / (
+ scalar(
+ sum(rate(node_cpu{instance=~"($ceph_hosts)([\\\\.:].*)?"}[$__rate_interval]) or
+ rate(node_cpu_seconds_total{instance=~"($ceph_hosts)([\\\\.:].*)?"}[$__rate_interval]))
+ ) * 100
+ )
+ |||,
+ '{{mode}}',
+ 3,
+ 1,
+ 6,
+ 10
+ ),
+ $.simpleGraphPanel(
+ {
+ Available: '#508642',
+ Free: '#508642',
+ Total: '#bf1b00',
+ Used: '#bf1b00',
+ total: '#bf1b00',
+ used: '#0a50a1',
+ },
+ 'RAM Usage',
+ '',
+ 'bytes',
+ 'RAM used',
+ null,
+ |||
+ node_memory_MemFree{instance=~"$ceph_hosts([\\\\.:].*)?"} or
+ node_memory_MemFree_bytes{instance=~"$ceph_hosts([\\\\.:].*)?"}
+ |||,
+ 'Free',
+ 9,
+ 1,
+ 6,
+ 10
+ )
+ .addTargets(
+ [
+ $.addTargetSchema(
+ |||
+ node_memory_MemTotal{instance=~"$ceph_hosts([\\\\.:].*)?"} or
+ node_memory_MemTotal_bytes{instance=~"$ceph_hosts([\\\\.:].*)?"}
+ |||,
+ 'total'
+ ),
+ $.addTargetSchema(
+ |||
+ (
+ node_memory_Cached{instance=~"$ceph_hosts([\\\\.:].*)?"} or
+ node_memory_Cached_bytes{instance=~"$ceph_hosts([\\\\.:].*)?"}
+ ) + (
+ node_memory_Buffers{instance=~"$ceph_hosts([\\\\.:].*)?"} or
+ node_memory_Buffers_bytes{instance=~"$ceph_hosts([\\\\.:].*)?"}
+ ) + (
+ node_memory_Slab{instance=~"$ceph_hosts([\\\\.:].*)?"} or
+ node_memory_Slab_bytes{instance=~"$ceph_hosts([\\\\.:].*)?"}
+ )
+ |||,
+ 'buffers/cache'
+ ),
+ $.addTargetSchema(
+ |||
+ (
+ node_memory_MemTotal{instance=~"$ceph_hosts([\\\\.:].*)?"} or
+ node_memory_MemTotal_bytes{instance=~"$ceph_hosts([\\\\.:].*)?"}
+ ) - (
+ (
+ node_memory_MemFree{instance=~"$ceph_hosts([\\\\.:].*)?"} or
+ node_memory_MemFree_bytes{instance=~"$ceph_hosts([\\\\.:].*)?"}
+ ) + (
+ node_memory_Cached{instance=~"$ceph_hosts([\\\\.:].*)?"} or
+ node_memory_Cached_bytes{instance=~"$ceph_hosts([\\\\.:].*)?"}
+ ) + (
+ node_memory_Buffers{instance=~"$ceph_hosts([\\\\.:].*)?"} or
+ node_memory_Buffers_bytes{instance=~"$ceph_hosts([\\\\.:].*)?"}
+ ) +
+ (
+ node_memory_Slab{instance=~"$ceph_hosts([\\\\.:].*)?"} or
+ node_memory_Slab_bytes{instance=~"$ceph_hosts([\\\\.:].*)?"}
+ )
+ )
+ |||,
+ 'used'
+ ),
+ ]
+ )
+ .addSeriesOverride(
+ {
+ alias: 'total',
+ color: '#bf1b00',
+ fill: 0,
+ linewidth: 2,
+ stack: false,
+ }
+ ),
+ $.simpleGraphPanel(
+ {},
+ 'Network Load',
+ "Show the network load (rx,tx) across all interfaces (excluding loopback 'lo')",
+ 'decbytes',
+ 'Send (-) / Receive (+)',
+ null,
+ |||
+ sum by (device) (
+ rate(
+ node_network_receive_bytes{instance=~"($ceph_hosts)([\\\\.:].*)?",device!="lo"}[$__rate_interval]) or
+ rate(node_network_receive_bytes_total{instance=~"($ceph_hosts)([\\\\.:].*)?",device!="lo"}[$__rate_interval]
+ )
+ )
+ |||,
+ '{{device}}.rx',
+ 15,
+ 1,
+ 6,
+ 10
+ )
+ .addTargets(
+ [
+ $.addTargetSchema(
+ |||
+ sum by (device) (
+ rate(node_network_transmit_bytes{instance=~"($ceph_hosts)([\\\\.:].*)?",device!="lo"}[$__rate_interval]) or
+ rate(node_network_transmit_bytes_total{instance=~"($ceph_hosts)([\\\\.:].*)?",device!="lo"}[$__rate_interval])
+ )
+ |||,
+ '{{device}}.tx'
+ ),
+ ]
+ )
+ .addSeriesOverride(
+ { alias: '/.*tx/', transform: 'negative-Y' }
+ ),
+ $.simpleGraphPanel(
+ {},
+ 'Network drop rate',
+ '',
+ 'pps',
+ 'Send (-) / Receive (+)',
+ null,
+ |||
+ rate(node_network_receive_drop{instance=~"$ceph_hosts([\\\\.:].*)?"}[$__rate_interval]) or
+ rate(node_network_receive_drop_total{instance=~"$ceph_hosts([\\\\.:].*)?"}[$__rate_interval])
+ |||,
+ '{{device}}.rx',
+ 21,
+ 1,
+ 3,
+ 5
+ )
+ .addTargets(
+ [
+ $.addTargetSchema(
+ |||
+ rate(node_network_transmit_drop{instance=~"$ceph_hosts([\\\\.:].*)?"}[$__rate_interval]) or
+ rate(node_network_transmit_drop_total{instance=~"$ceph_hosts([\\\\.:].*)?"}[$__rate_interval])
+ |||,
+ '{{device}}.tx'
+ ),
+ ]
+ )
+ .addSeriesOverride(
+ {
+ alias: '/.*tx/',
+ transform: 'negative-Y',
+ }
+ ),
+ $.simpleSingleStatPanel(
+ 'bytes',
+ 'Raw Capacity',
+ 'Each OSD consists of a Journal/WAL partition and a data partition. The RAW Capacity shown is the sum of the data partitions across all OSDs on the selected OSD hosts.',
+ 'current',
+ |||
+ sum(
+ ceph_osd_stat_bytes{%(matchers)s} and
+ on (ceph_daemon) ceph_disk_occupation{%(matchers)s, instance=~"($ceph_hosts)([\\\\.:].*)?"}
+ )
+ ||| % $.matchers(),
+ null,
+ 'time_series',
+ 0,
+ 6,
+ 3,
+ 5
+ ),
+ $.simpleGraphPanel(
+ {},
+ 'Network error rate',
+ '',
+ 'pps',
+ 'Send (-) / Receive (+)',
+ null,
+ |||
+ rate(node_network_receive_errs{instance=~"$ceph_hosts([\\\\.:].*)?"}[$__rate_interval]) or
+ rate(node_network_receive_errs_total{instance=~"$ceph_hosts([\\\\.:].*)?"}[$__rate_interval])
+ |||,
+ '{{device}}.rx',
+ 21,
+ 6,
+ 3,
+ 5
+ )
+ .addTargets(
+ [$.addTargetSchema(
+ |||
+ rate(node_network_transmit_errs{instance=~"$ceph_hosts([\\\\.:].*)?"}[$__rate_interval]) or
+ rate(node_network_transmit_errs_total{instance=~"$ceph_hosts([\\\\.:].*)?"}[$__rate_interval])
+ |||,
+ '{{device}}.tx'
+ )]
+ )
+ .addSeriesOverride(
+ {
+ alias: '/.*tx/',
+ transform: 'negative-Y',
+ }
+ ),
+ $.addRowSchema(false,
+ true,
+ 'OSD Disk Performance Statistics') + { gridPos: { x: 0, y: 11, w: 24, h: 1 } },
+ $.simpleGraphPanel(
+ {},
+ '$ceph_hosts Disk IOPS',
+ "For any OSD devices on the host, this chart shows the iops per physical device. Each device is shown by it's name and corresponding OSD id value",
+ 'ops',
+ 'Read (-) / Write (+)',
+ null,
+ |||
+ label_replace(
+ (
+ rate(node_disk_writes_completed{instance=~"($ceph_hosts)([\\\\.:].*)?"}[$__rate_interval]) or
+ rate(node_disk_writes_completed_total{instance=~"($ceph_hosts)([\\\\.:].*)?"}[$__rate_interval])
+ ), "instance", "$1", "instance", "([^:.]*).*"
+ ) * on(instance, device) group_left(ceph_daemon) label_replace(
+ label_replace(
+ ceph_disk_occupation_human{%(matchers)s}, "device", "$1", "device", "/dev/(.*)"
+ ), "instance", "$1", "instance", "([^:.]*).*"
+ )
+ ||| % $.matchers(),
+ '{{device}}({{ceph_daemon}}) writes',
+ 0,
+ 12,
+ 11,
+ 9
+ )
+ .addTargets(
+ [
+ $.addTargetSchema(
+ |||
+ label_replace(
+ (
+ rate(node_disk_reads_completed{instance=~"($ceph_hosts)([\\\\.:].*)?"}[$__rate_interval]) or
+ rate(node_disk_reads_completed_total{instance=~"($ceph_hosts)([\\\\.:].*)?"}[$__rate_interval])
+ ), "instance", "$1", "instance", "([^:.]*).*"
+ ) * on(instance, device) group_left(ceph_daemon) label_replace(
+ label_replace(
+ ceph_disk_occupation_human{%(matchers)s},"device", "$1", "device", "/dev/(.*)"
+ ), "instance", "$1", "instance", "([^:.]*).*"
+ )
+ ||| % $.matchers(),
+ '{{device}}({{ceph_daemon}}) reads'
+ ),
+ ]
+ )
+ .addSeriesOverride(
+ { alias: '/.*reads/', transform: 'negative-Y' }
+ ),
+ $.simpleGraphPanel(
+ {},
+ '$ceph_hosts Throughput by Disk',
+ 'For OSD hosts, this chart shows the disk bandwidth (read bytes/sec + write bytes/sec) of the physical OSD device. Each device is shown by device name, and corresponding OSD id',
+ 'Bps',
+ 'Read (-) / Write (+)',
+ null,
+ |||
+ label_replace(
+ (
+ rate(node_disk_bytes_written{instance=~"($ceph_hosts)([\\\\.:].*)?"}[$__rate_interval]) or
+ rate(node_disk_written_bytes_total{instance=~"($ceph_hosts)([\\\\.:].*)?"}[$__rate_interval])
+ ), "instance", "$1", "instance", "([^:.]*).*") * on(instance, device)
+ group_left(ceph_daemon) label_replace(
+ label_replace(ceph_disk_occupation_human{%(matchers)s}, "device", "$1", "device", "/dev/(.*)"),
+ "instance", "$1", "instance", "([^:.]*).*"
+ )
+ ||| % $.matchers(),
+ '{{device}}({{ceph_daemon}}) write',
+ 12,
+ 12,
+ 11,
+ 9
+ )
+ .addTargets(
+ [$.addTargetSchema(
+ |||
+ label_replace(
+ (
+ rate(node_disk_bytes_read{instance=~"($ceph_hosts)([\\\\.:].*)?"}[$__rate_interval]) or
+ rate(node_disk_read_bytes_total{instance=~"($ceph_hosts)([\\\\.:].*)?"}[$__rate_interval])
+ ),
+ "instance", "$1", "instance", "([^:.]*).*") * on(instance, device)
+ group_left(ceph_daemon) label_replace(
+ label_replace(ceph_disk_occupation_human{%(matchers)s}, "device", "$1", "device", "/dev/(.*)"),
+ "instance", "$1", "instance", "([^:.]*).*"
+ )
+ ||| % $.matchers(),
+ '{{device}}({{ceph_daemon}}) read'
+ )]
+ )
+ .addSeriesOverride(
+ { alias: '/.*read/', transform: 'negative-Y' }
+ ),
+ $.simpleGraphPanel(
+ {},
+ '$ceph_hosts Disk Latency',
+ "For OSD hosts, this chart shows the latency at the physical drive. Each drive is shown by device name, with it's corresponding OSD id",
+ 's',
+ '',
+ null,
+ |||
+ max by(instance, device) (label_replace(
+ (rate(node_disk_write_time_seconds_total{instance=~"($ceph_hosts)([\\\\.:].*)?"}[$__rate_interval])) /
+ clamp_min(rate(node_disk_writes_completed_total{instance=~"($ceph_hosts)([\\\\.:].*)?"}[$__rate_interval]), 0.001) or
+ (rate(node_disk_read_time_seconds_total{instance=~"($ceph_hosts)([\\\\.:].*)?"}[$__rate_interval])) /
+ clamp_min(rate(node_disk_reads_completed_total{instance=~"($ceph_hosts)([\\\\.:].*)?"}[$__rate_interval]), 0.001),
+ "instance", "$1", "instance", "([^:.]*).*"
+ )) * on(instance, device) group_left(ceph_daemon) label_replace(
+ label_replace(
+ ceph_disk_occupation_human{instance=~"($ceph_hosts)([\\\\.:].*)?"},
+ "device", "$1", "device", "/dev/(.*)"
+ ), "instance", "$1", "instance", "([^:.]*).*"
+ )
+ ||| % $.matchers(),
+ '{{device}}({{ceph_daemon}})',
+ 0,
+ 21,
+ 11,
+ 9
+ ),
+ $.simpleGraphPanel(
+ {},
+ '$ceph_hosts Disk utilization',
+ 'Show disk utilization % (util) of any OSD devices on the host by the physical device name and associated OSD id.',
+ 'percent',
+ '%Util',
+ null,
+ |||
+ label_replace(
+ (
+ (rate(node_disk_io_time_ms{instance=~"($ceph_hosts)([\\\\.:].*)?"}[$__rate_interval]) / 10) or
+ rate(node_disk_io_time_seconds_total{instance=~"($ceph_hosts)([\\\\.:].*)?"}[$__rate_interval]) * 100
+ ), "instance", "$1", "instance", "([^:.]*).*"
+ ) * on(instance, device) group_left(ceph_daemon) label_replace(
+ label_replace(ceph_disk_occupation_human{%(matchers)s, instance=~"($ceph_hosts)([\\\\.:].*)?"},
+ "device", "$1", "device", "/dev/(.*)"), "instance", "$1", "instance", "([^:.]*).*"
+ )
+ ||| % $.matchers(),
+ '{{device}}({{ceph_daemon}})',
+ 12,
+ 21,
+ 11,
+ 9
+ ),
+ ]),
+}
diff --git a/atmosphere/jsonnet/vendor/github.com/ceph/ceph/monitoring/ceph-mixin/dashboards/osd.libsonnet b/atmosphere/jsonnet/vendor/github.com/ceph/ceph/monitoring/ceph-mixin/dashboards/osd.libsonnet
new file mode 100644
index 0000000..129b74b
--- /dev/null
+++ b/atmosphere/jsonnet/vendor/github.com/ceph/ceph/monitoring/ceph-mixin/dashboards/osd.libsonnet
@@ -0,0 +1,593 @@
+local g = import 'grafonnet/grafana.libsonnet';
+
+(import 'utils.libsonnet') {
+ 'osds-overview.json':
+ $.dashboardSchema(
+ 'OSD Overview',
+ '',
+ 'lo02I1Aiz',
+ 'now-1h',
+ '30s',
+ 16,
+ $._config.dashboardTags,
+ ''
+ )
+ .addAnnotation(
+ $.addAnnotationSchema(
+ 1,
+ '-- Grafana --',
+ true,
+ true,
+ 'rgba(0, 211, 255, 1)',
+ 'Annotations & Alerts',
+ 'dashboard'
+ )
+ )
+ .addRequired(
+ type='grafana', id='grafana', name='Grafana', version='5.0.0'
+ )
+ .addRequired(
+ type='panel', id='grafana-piechart-panel', name='Pie Chart', version='1.3.3'
+ )
+ .addRequired(
+ type='panel', id='graph', name='Graph', version='5.0.0'
+ )
+ .addRequired(
+ type='panel', id='table', name='Table', version='5.0.0'
+ )
+ .addTemplate(
+ g.template.datasource('datasource', 'prometheus', 'default', label='Data Source')
+ )
+ .addTemplate(
+ $.addClusterTemplate()
+ )
+ .addTemplate(
+ $.addJobTemplate()
+ )
+ .addPanels([
+ $.simpleGraphPanel(
+ { '@95%ile': '#e0752d' },
+ 'OSD Read Latencies',
+ '',
+ 'ms',
+ null,
+ '0',
+ |||
+ avg (
+ rate(ceph_osd_op_r_latency_sum{%(matchers)s}[$__rate_interval]) /
+ on (ceph_daemon) rate(ceph_osd_op_r_latency_count{%(matchers)s}[$__rate_interval]) * 1000
+ )
+ ||| % $.matchers(),
+ 'AVG read',
+ 0,
+ 0,
+ 8,
+ 8
+ )
+ .addTargets(
+ [
+ $.addTargetSchema(
+ |||
+ max(
+ rate(ceph_osd_op_r_latency_sum{%(matchers)s}[$__rate_interval]) /
+ on (ceph_daemon) rate(ceph_osd_op_r_latency_count{%(matchers)s}[$__rate_interval]) * 1000
+ )
+ ||| % $.matchers(),
+ 'MAX read'
+ ),
+ $.addTargetSchema(
+ |||
+ quantile(0.95,
+ (
+ rate(ceph_osd_op_r_latency_sum{%(matchers)s}[$__rate_interval]) /
+ on (ceph_daemon) rate(ceph_osd_op_r_latency_count{%(matchers)s}[$__rate_interval])
+ * 1000
+ )
+ )
+ ||| % $.matchers(),
+ '@95%ile'
+ ),
+ ],
+ ),
+ $.addTableSchema(
+ '$datasource',
+ "This table shows the osd's that are delivering the 10 highest read latencies within the cluster",
+ { col: 2, desc: true },
+ [
+ $.overviewStyle('OSD ID', 'ceph_daemon', 'string', 'short'),
+ $.overviewStyle('Latency (ms)', 'Value', 'number', 'none'),
+ $.overviewStyle('', '/.*/', 'hidden', 'short'),
+ ],
+ 'Highest READ Latencies',
+ 'table'
+ )
+ .addTarget(
+ $.addTargetSchema(
+ |||
+ topk(10,
+ (sort(
+ (
+ rate(ceph_osd_op_r_latency_sum{%(matchers)s}[$__rate_interval]) /
+ on (ceph_daemon) rate(ceph_osd_op_r_latency_count{%(matchers)s}[$__rate_interval]) *
+ 1000
+ )
+ ))
+ )
+ ||| % $.matchers(),
+ '',
+ 'table',
+ 1,
+ true
+ )
+ ) + { gridPos: { x: 8, y: 0, w: 4, h: 8 } },
+ $.simpleGraphPanel(
+ {
+ '@95%ile write': '#e0752d',
+ },
+ 'OSD Write Latencies',
+ '',
+ 'ms',
+ null,
+ '0',
+ |||
+ avg(
+ rate(ceph_osd_op_w_latency_sum{%(matchers)s}[$__rate_interval]) /
+ on (ceph_daemon) rate(ceph_osd_op_w_latency_count{%(matchers)s}[$__rate_interval])
+ * 1000
+ )
+ ||| % $.matchers(),
+ 'AVG write',
+ 12,
+ 0,
+ 8,
+ 8
+ )
+ .addTargets(
+ [
+ $.addTargetSchema(
+ |||
+ max(
+ rate(ceph_osd_op_w_latency_sum{%(matchers)s}[$__rate_interval]) /
+ on (ceph_daemon) rate(ceph_osd_op_w_latency_count{%(matchers)s}[$__rate_interval]) *
+ 1000
+ )
+ ||| % $.matchers(), 'MAX write'
+ ),
+ $.addTargetSchema(
+ |||
+ quantile(0.95, (
+ rate(ceph_osd_op_w_latency_sum{%(matchers)s}[$__rate_interval]) /
+ on (ceph_daemon) rate(ceph_osd_op_w_latency_count{%(matchers)s}[$__rate_interval]) *
+ 1000
+ ))
+ ||| % $.matchers(), '@95%ile write'
+ ),
+ ],
+ ),
+ $.addTableSchema(
+ '$datasource',
+ "This table shows the osd's that are delivering the 10 highest write latencies within the cluster",
+ { col: 2, desc: true },
+ [
+ $.overviewStyle(
+ 'OSD ID', 'ceph_daemon', 'string', 'short'
+ ),
+ $.overviewStyle('Latency (ms)', 'Value', 'number', 'none'),
+ $.overviewStyle('', '/.*/', 'hidden', 'short'),
+ ],
+ 'Highest WRITE Latencies',
+ 'table'
+ )
+ .addTarget(
+ $.addTargetSchema(
+ |||
+ topk(10,
+ (sort(
+ (rate(ceph_osd_op_w_latency_sum{%(matchers)s}[$__rate_interval]) /
+ on (ceph_daemon) rate(ceph_osd_op_w_latency_count{%(matchers)s}[$__rate_interval]) *
+ 1000)
+ ))
+ )
+ ||| % $.matchers(),
+ '',
+ 'table',
+ 1,
+ true
+ )
+ ) + { gridPos: { x: 20, y: 0, w: 4, h: 8 } },
+ $.simplePieChart(
+ {}, '', 'OSD Types Summary'
+ )
+ .addTarget(
+ $.addTargetSchema('count by (device_class) (ceph_osd_metadata{%(matchers)s})' % $.matchers(), '{{device_class}}')
+ ) + { gridPos: { x: 0, y: 8, w: 4, h: 8 } },
+ $.simplePieChart(
+ { 'Non-Encrypted': '#E5AC0E' }, '', 'OSD Objectstore Types'
+ )
+ .addTarget(
+ $.addTargetSchema(
+ 'count(ceph_bluefs_wal_total_bytes{%(matchers)s})' % $.matchers(), 'bluestore', 'time_series', 2
+ )
+ )
+ .addTarget(
+ $.addTargetSchema(
+ 'absent(ceph_bluefs_wal_total_bytes{%(matchers)s}) * count(ceph_osd_metadata{%(matchers)s})' % $.matchers(), 'filestore', 'time_series', 2
+ )
+ ) + { gridPos: { x: 4, y: 8, w: 4, h: 8 } },
+ $.simplePieChart(
+ {}, 'The pie chart shows the various OSD sizes used within the cluster', 'OSD Size Summary'
+ )
+ .addTarget($.addTargetSchema(
+ 'count(ceph_osd_stat_bytes{%(matchers)s} < 1099511627776)' % $.matchers(), '<1TB', 'time_series', 2
+ ))
+ .addTarget($.addTargetSchema(
+ 'count(ceph_osd_stat_bytes{%(matchers)s} >= 1099511627776 < 2199023255552)' % $.matchers(), '<2TB', 'time_series', 2
+ ))
+ .addTarget($.addTargetSchema(
+ 'count(ceph_osd_stat_bytes{%(matchers)s} >= 2199023255552 < 3298534883328)' % $.matchers(), '<3TB', 'time_series', 2
+ ))
+ .addTarget($.addTargetSchema(
+ 'count(ceph_osd_stat_bytes{%(matchers)s} >= 3298534883328 < 4398046511104)' % $.matchers(), '<4TB', 'time_series', 2
+ ))
+ .addTarget($.addTargetSchema(
+ 'count(ceph_osd_stat_bytes{%(matchers)s} >= 4398046511104 < 6597069766656)' % $.matchers(), '<6TB', 'time_series', 2
+ ))
+ .addTarget($.addTargetSchema(
+ 'count(ceph_osd_stat_bytes{%(matchers)s} >= 6597069766656 < 8796093022208)' % $.matchers(), '<8TB', 'time_series', 2
+ ))
+ .addTarget($.addTargetSchema(
+ 'count(ceph_osd_stat_bytes{%(matchers)s} >= 8796093022208 < 10995116277760)' % $.matchers(), '<10TB', 'time_series', 2
+ ))
+ .addTarget($.addTargetSchema(
+ 'count(ceph_osd_stat_bytes{%(matchers)s} >= 10995116277760 < 13194139533312)' % $.matchers(), '<12TB', 'time_series', 2
+ ))
+ .addTarget($.addTargetSchema(
+ 'count(ceph_osd_stat_bytes{%(matchers)s} >= 13194139533312)' % $.matchers(), '<12TB+', 'time_series', 2
+ )) + { gridPos: { x: 8, y: 8, w: 4, h: 8 } },
+ g.graphPanel.new(bars=true,
+ datasource='$datasource',
+ title='Distribution of PGs per OSD',
+ x_axis_buckets=20,
+ x_axis_mode='histogram',
+ x_axis_values=['total'],
+ formatY1='short',
+ formatY2='short',
+ labelY1='# of OSDs',
+ min='0',
+ nullPointMode='null')
+ .addTarget($.addTargetSchema(
+ 'ceph_osd_numpg{%(matchers)s}' % $.matchers(), 'PGs per OSD', 'time_series', 1, true
+ )) + { gridPos: { x: 12, y: 8, w: 8, h: 8 } },
+ $.gaugeSingleStatPanel(
+ 'percentunit',
+ 'OSD onode Hits Ratio',
+ 'This gauge panel shows onode Hits ratio to help determine if increasing RAM per OSD could help improve the performance of the cluster',
+ 'current',
+ true,
+ 1,
+ true,
+ false,
+ '.75',
+ |||
+ sum(ceph_bluestore_onode_hits{%(matchers)s}) / (
+ sum(ceph_bluestore_onode_hits{%(matchers)s}) +
+ sum(ceph_bluestore_onode_misses{%(matchers)s})
+ )
+ ||| % $.matchers(),
+ 'time_series',
+ 20,
+ 8,
+ 4,
+ 8
+ ),
+ $.addRowSchema(false,
+ true,
+ 'R/W Profile') + { gridPos: { x: 0, y: 16, w: 24, h: 1 } },
+ $.simpleGraphPanel(
+ {},
+ 'Read/Write Profile',
+ 'Show the read/write workload profile overtime',
+ 'short',
+ null,
+ null,
+ 'round(sum(rate(ceph_pool_rd{%(matchers)s}[$__rate_interval])))' % $.matchers(),
+ 'Reads',
+ 0,
+ 17,
+ 24,
+ 8
+ )
+ .addTargets([$.addTargetSchema(
+ 'round(sum(rate(ceph_pool_wr{%(matchers)s}[$__rate_interval])))' % $.matchers(), 'Writes'
+ )]),
+ ]),
+ 'osd-device-details.json':
+ local OsdDeviceDetailsPanel(title,
+ description,
+ formatY1,
+ labelY1,
+ expr1,
+ expr2,
+ legendFormat1,
+ legendFormat2,
+ x,
+ y,
+ w,
+ h) =
+ $.graphPanelSchema({},
+ title,
+ description,
+ 'null',
+ false,
+ formatY1,
+ 'short',
+ labelY1,
+ null,
+ null,
+ 1,
+ '$datasource')
+ .addTargets(
+ [
+ $.addTargetSchema(expr1,
+ legendFormat1),
+ $.addTargetSchema(expr2, legendFormat2),
+ ]
+ ) + { gridPos: { x: x, y: y, w: w, h: h } };
+
+ $.dashboardSchema(
+ 'OSD device details',
+ '',
+ 'CrAHE0iZz',
+ 'now-3h',
+ '30s',
+ 16,
+ $._config.dashboardTags,
+ ''
+ )
+ .addAnnotation(
+ $.addAnnotationSchema(
+ 1,
+ '-- Grafana --',
+ true,
+ true,
+ 'rgba(0, 211, 255, 1)',
+ 'Annotations & Alerts',
+ 'dashboard'
+ )
+ )
+ .addRequired(
+ type='grafana', id='grafana', name='Grafana', version='5.3.2'
+ )
+ .addRequired(
+ type='panel', id='graph', name='Graph', version='5.0.0'
+ )
+ .addTemplate(
+ g.template.datasource('datasource',
+ 'prometheus',
+ 'default',
+ label='Data Source')
+ )
+ .addTemplate(
+ $.addClusterTemplate()
+ )
+ .addTemplate(
+ $.addJobTemplate()
+ )
+ .addTemplate(
+ $.addTemplateSchema('osd',
+ '$datasource',
+ 'label_values(ceph_osd_metadata{%(matchers)s}, ceph_daemon)' % $.matchers(),
+ 1,
+ false,
+ 1,
+ 'OSD',
+ '(.*)')
+ )
+ .addPanels([
+ $.addRowSchema(
+ false, true, 'OSD Performance'
+ ) + { gridPos: { x: 0, y: 0, w: 24, h: 1 } },
+ OsdDeviceDetailsPanel(
+ '$osd Latency',
+ '',
+ 's',
+ 'Read (-) / Write (+)',
+ |||
+ rate(ceph_osd_op_r_latency_sum{%(matchers)s, ceph_daemon=~"$osd"}[$__rate_interval]) /
+ on (ceph_daemon) rate(ceph_osd_op_r_latency_count{%(matchers)s}[$__rate_interval])
+ ||| % $.matchers(),
+ |||
+ rate(ceph_osd_op_w_latency_sum{%(matchers)s, ceph_daemon=~"$osd"}[$__rate_interval]) /
+ on (ceph_daemon) rate(ceph_osd_op_w_latency_count{%(matchers)s}[$__rate_interval])
+ ||| % $.matchers(),
+ 'read',
+ 'write',
+ 0,
+ 1,
+ 6,
+ 9
+ )
+ .addSeriesOverride(
+ {
+ alias: 'read',
+ transform: 'negative-Y',
+ }
+ ),
+ OsdDeviceDetailsPanel(
+ '$osd R/W IOPS',
+ '',
+ 'short',
+ 'Read (-) / Write (+)',
+ 'rate(ceph_osd_op_r{%(matchers)s, ceph_daemon=~"$osd"}[$__rate_interval])' % $.matchers(),
+ 'rate(ceph_osd_op_w{%(matchers)s, ceph_daemon=~"$osd"}[$__rate_interval])' % $.matchers(),
+ 'Reads',
+ 'Writes',
+ 6,
+ 1,
+ 6,
+ 9
+ )
+ .addSeriesOverride(
+ { alias: 'Reads', transform: 'negative-Y' }
+ ),
+ OsdDeviceDetailsPanel(
+ '$osd R/W Bytes',
+ '',
+ 'bytes',
+ 'Read (-) / Write (+)',
+ 'rate(ceph_osd_op_r_out_bytes{%(matchers)s, ceph_daemon=~"$osd"}[$__rate_interval])' % $.matchers(),
+ 'rate(ceph_osd_op_w_in_bytes{%(matchers)s, ceph_daemon=~"$osd"}[$__rate_interval])' % $.matchers(),
+ 'Read Bytes',
+ 'Write Bytes',
+ 12,
+ 1,
+ 6,
+ 9
+ )
+ .addSeriesOverride({ alias: 'Read Bytes', transform: 'negative-Y' }),
+ $.addRowSchema(
+ false, true, 'Physical Device Performance'
+ ) + { gridPos: { x: 0, y: 10, w: 24, h: 1 } },
+ OsdDeviceDetailsPanel(
+ 'Physical Device Latency for $osd',
+ '',
+ 's',
+ 'Read (-) / Write (+)',
+ |||
+ (
+ label_replace(
+ rate(node_disk_read_time_seconds_total{%(clusterMatcher)s}[$__rate_interval]) /
+ rate(node_disk_reads_completed_total{%(clusterMatcher)s}[$__rate_interval]),
+ "instance", "$1", "instance", "([^:.]*).*"
+ ) and on (instance, device) label_replace(
+ label_replace(
+ ceph_disk_occupation_human{%(matchers)s, ceph_daemon=~"$osd"},
+ "device", "$1", "device", "/dev/(.*)"
+ ), "instance", "$1", "instance", "([^:.]*).*"
+ )
+ )
+ ||| % $.matchers(),
+ |||
+ (
+ label_replace(
+ rate(node_disk_write_time_seconds_total{%(clusterMatcher)s}[$__rate_interval]) /
+ rate(node_disk_writes_completed_total{%(clusterMatcher)s}[$__rate_interval]),
+ "instance", "$1", "instance", "([^:.]*).*") and on (instance, device)
+ label_replace(
+ label_replace(
+ ceph_disk_occupation_human{%(matchers)s, ceph_daemon=~"$osd"}, "device", "$1", "device", "/dev/(.*)"
+ ), "instance", "$1", "instance", "([^:.]*).*"
+ )
+ )
+ ||| % $.matchers(),
+ '{{instance}}/{{device}} Reads',
+ '{{instance}}/{{device}} Writes',
+ 0,
+ 11,
+ 6,
+ 9
+ )
+ .addSeriesOverride(
+ { alias: '/.*Reads/', transform: 'negative-Y' }
+ ),
+ OsdDeviceDetailsPanel(
+ 'Physical Device R/W IOPS for $osd',
+ '',
+ 'short',
+ 'Read (-) / Write (+)',
+ |||
+ label_replace(
+ rate(node_disk_writes_completed_total{%(clusterMatcher)s}[$__rate_interval]),
+ "instance", "$1", "instance", "([^:.]*).*"
+ ) and on (instance, device) label_replace(
+ label_replace(
+ ceph_disk_occupation_human{%(matchers)s, ceph_daemon=~"$osd"},
+ "device", "$1", "device", "/dev/(.*)"
+ ), "instance", "$1", "instance", "([^:.]*).*"
+ )
+ ||| % $.matchers(),
+ |||
+ label_replace(
+ rate(node_disk_reads_completed_total{%(clusterMatcher)s}[$__rate_interval]),
+ "instance", "$1", "instance", "([^:.]*).*"
+ ) and on (instance, device) label_replace(
+ label_replace(
+ ceph_disk_occupation_human{%(matchers)s, ceph_daemon=~"$osd"},
+ "device", "$1", "device", "/dev/(.*)"
+ ), "instance", "$1", "instance", "([^:.]*).*"
+ )
+ ||| % $.matchers(),
+ '{{device}} on {{instance}} Writes',
+ '{{device}} on {{instance}} Reads',
+ 6,
+ 11,
+ 6,
+ 9
+ )
+ .addSeriesOverride(
+ { alias: '/.*Reads/', transform: 'negative-Y' }
+ ),
+ OsdDeviceDetailsPanel(
+ 'Physical Device R/W Bytes for $osd',
+ '',
+ 'Bps',
+ 'Read (-) / Write (+)',
+ |||
+ label_replace(
+ rate(node_disk_read_bytes_total{%(clusterMatcher)s}[$__rate_interval]), "instance", "$1", "instance", "([^:.]*).*"
+ ) and on (instance, device) label_replace(
+ label_replace(
+ ceph_disk_occupation_human{%(matchers)s, ceph_daemon=~"$osd"},
+ "device", "$1", "device", "/dev/(.*)"
+ ), "instance", "$1", "instance", "([^:.]*).*"
+ )
+ ||| % $.matchers(),
+ |||
+ label_replace(
+ rate(node_disk_written_bytes_total{%(clusterMatcher)s}[$__rate_interval]), "instance", "$1", "instance", "([^:.]*).*"
+ ) and on (instance, device) label_replace(
+ label_replace(
+ ceph_disk_occupation_human{%(matchers)s, ceph_daemon=~"$osd"},
+ "device", "$1", "device", "/dev/(.*)"
+ ), "instance", "$1", "instance", "([^:.]*).*"
+ )
+ ||| % $.matchers(),
+ '{{instance}} {{device}} Reads',
+ '{{instance}} {{device}} Writes',
+ 12,
+ 11,
+ 6,
+ 9
+ )
+ .addSeriesOverride(
+ { alias: '/.*Reads/', transform: 'negative-Y' }
+ ),
+ $.graphPanelSchema(
+ {},
+ 'Physical Device Util% for $osd',
+ '',
+ 'null',
+ false,
+ 'percentunit',
+ 'short',
+ null,
+ null,
+ null,
+ 1,
+ '$datasource'
+ )
+ .addTarget($.addTargetSchema(
+ |||
+ label_replace(
+ rate(node_disk_io_time_seconds_total{%(clusterMatcher)s}[$__rate_interval]),
+ "instance", "$1", "instance", "([^:.]*).*"
+ ) and on (instance, device) label_replace(
+ label_replace(
+ ceph_disk_occupation_human{%(matchers)s, ceph_daemon=~"$osd"}, "device", "$1", "device", "/dev/(.*)"
+ ), "instance", "$1", "instance", "([^:.]*).*"
+ )
+ ||| % $.matchers(),
+ '{{device}} on {{instance}}'
+ )) + { gridPos: { x: 18, y: 11, w: 6, h: 9 } },
+ ]),
+}
diff --git a/atmosphere/jsonnet/vendor/github.com/ceph/ceph/monitoring/ceph-mixin/dashboards/pool.libsonnet b/atmosphere/jsonnet/vendor/github.com/ceph/ceph/monitoring/ceph-mixin/dashboards/pool.libsonnet
new file mode 100644
index 0000000..6444335
--- /dev/null
+++ b/atmosphere/jsonnet/vendor/github.com/ceph/ceph/monitoring/ceph-mixin/dashboards/pool.libsonnet
@@ -0,0 +1,552 @@
+local g = import 'grafonnet/grafana.libsonnet';
+
+(import 'utils.libsonnet') {
+ 'pool-overview.json':
+ $.dashboardSchema(
+ 'Ceph Pools Overview',
+ '',
+ 'z99hzWtmk',
+ 'now-1h',
+ '30s',
+ 22,
+ $._config.dashboardTags,
+ ''
+ )
+ .addAnnotation(
+ $.addAnnotationSchema(
+ 1,
+ '-- Grafana --',
+ true,
+ true,
+ 'rgba(0, 211, 255, 1)',
+ 'Annotations & Alerts',
+ 'dashboard'
+ )
+ )
+ .addTemplate(
+ g.template.datasource('datasource', 'prometheus', 'default', label='Data Source')
+ )
+ .addTemplate(
+ $.addClusterTemplate()
+ )
+ .addTemplate(
+ $.addJobTemplate()
+ )
+ .addTemplate(
+ g.template.custom(label='TopK',
+ name='topk',
+ current='15',
+ query='15')
+ )
+ .addPanels([
+ $.simpleSingleStatPanel(
+ 'none',
+ 'Pools',
+ '',
+ 'avg',
+ 'count(ceph_pool_metadata{%(matchers)s})' % $.matchers(),
+ true,
+ 'table',
+ 0,
+ 0,
+ 3,
+ 3
+ ),
+ $.simpleSingleStatPanel(
+ 'none',
+ 'Pools with Compression',
+ 'Count of the pools that have compression enabled',
+ 'current',
+ 'count(ceph_pool_metadata{%(matchers)s, compression_mode!="none"})' % $.matchers(),
+ null,
+ '',
+ 3,
+ 0,
+ 3,
+ 3
+ ),
+ $.simpleSingleStatPanel(
+ 'bytes',
+ 'Total Raw Capacity',
+ 'Total raw capacity available to the cluster',
+ 'current',
+ 'sum(ceph_osd_stat_bytes{%(matchers)s})' % $.matchers(),
+ null,
+ '',
+ 6,
+ 0,
+ 3,
+ 3
+ ),
+ $.simpleSingleStatPanel(
+ 'bytes',
+ 'Raw Capacity Consumed',
+ 'Total raw capacity consumed by user data and associated overheads (metadata + redundancy)',
+ 'current',
+ 'sum(ceph_pool_bytes_used{%(matchers)s})' % $.matchers(),
+ true,
+ '',
+ 9,
+ 0,
+ 3,
+ 3
+ ),
+ $.simpleSingleStatPanel(
+ 'bytes',
+ 'Logical Stored ',
+ 'Total of client data stored in the cluster',
+ 'current',
+ 'sum(ceph_pool_stored{%(matchers)s})' % $.matchers(),
+ true,
+ '',
+ 12,
+ 0,
+ 3,
+ 3
+ ),
+ $.simpleSingleStatPanel(
+ 'bytes',
+ 'Compression Savings',
+ 'A compression saving is determined as the data eligible to be compressed minus the capacity used to store the data after compression',
+ 'current',
+ |||
+ sum(
+ ceph_pool_compress_under_bytes{%(matchers)s} -
+ ceph_pool_compress_bytes_used{%(matchers)s}
+ )
+ ||| % $.matchers(),
+ null,
+ '',
+ 15,
+ 0,
+ 3,
+ 3
+ ),
+ $.simpleSingleStatPanel(
+ 'percent',
+ 'Compression Eligibility',
+ 'Indicates how suitable the data is within the pools that are/have been enabled for compression - averaged across all pools holding compressed data',
+ 'current',
+ |||
+ (
+ sum(ceph_pool_compress_under_bytes{%(matchers)s} > 0) /
+ sum(ceph_pool_stored_raw{%(matchers)s} and ceph_pool_compress_under_bytes{%(matchers)s} > 0)
+ ) * 100
+ ||| % $.matchers(),
+ null,
+ 'table',
+ 18,
+ 0,
+ 3,
+ 3
+ ),
+ $.simpleSingleStatPanel(
+ 'none',
+ 'Compression Factor',
+ 'This factor describes the average ratio of data eligible to be compressed divided by the data actually stored. It does not account for data written that was ineligible for compression (too small, or compression yield too low)',
+ 'current',
+ |||
+ sum(
+ ceph_pool_compress_under_bytes{%(matchers)s} > 0)
+ / sum(ceph_pool_compress_bytes_used{%(matchers)s} > 0
+ )
+ ||| % $.matchers(),
+ null,
+ '',
+ 21,
+ 0,
+ 3,
+ 3
+ ),
+ $.addTableSchema(
+ '$datasource',
+ '',
+ { col: 5, desc: true },
+ [
+ $.overviewStyle('', 'Time', 'hidden', 'short'),
+ $.overviewStyle('', 'instance', 'hidden', 'short'),
+ $.overviewStyle('', 'job', 'hidden', 'short'),
+ $.overviewStyle('Pool Name', 'name', 'string', 'short'),
+ $.overviewStyle('Pool ID', 'pool_id', 'hidden', 'none'),
+ $.overviewStyle('Compression Factor', 'Value #A', 'number', 'none'),
+ $.overviewStyle('% Used', 'Value #D', 'number', 'percentunit', 'value', ['70', '85']),
+ $.overviewStyle('Usable Free', 'Value #B', 'number', 'bytes'),
+ $.overviewStyle('Compression Eligibility', 'Value #C', 'number', 'percent'),
+ $.overviewStyle('Compression Savings', 'Value #E', 'number', 'bytes'),
+ $.overviewStyle('Growth (5d)', 'Value #F', 'number', 'bytes', 'value', ['0', '0']),
+ $.overviewStyle('IOPS', 'Value #G', 'number', 'none'),
+ $.overviewStyle('Bandwidth', 'Value #H', 'number', 'Bps'),
+ $.overviewStyle('', '__name__', 'hidden', 'short'),
+ $.overviewStyle('', 'type', 'hidden', 'short'),
+ $.overviewStyle('', 'compression_mode', 'hidden', 'short'),
+ $.overviewStyle('Type', 'description', 'string', 'short'),
+ $.overviewStyle('Stored', 'Value #J', 'number', 'bytes'),
+ $.overviewStyle('', 'Value #I', 'hidden', 'short'),
+ $.overviewStyle('Compression', 'Value #K', 'string', 'short', null, [], [{ text: 'ON', value: '1' }]),
+ ],
+ 'Pool Overview',
+ 'table'
+ )
+ .addTargets(
+ [
+ $.addTargetSchema(
+ |||
+ (
+ ceph_pool_compress_under_bytes{%(matchers)s} /
+ ceph_pool_compress_bytes_used{%(matchers)s} > 0
+ ) and on(pool_id) (
+ (
+ (ceph_pool_compress_under_bytes{%(matchers)s} > 0) /
+ ceph_pool_stored_raw{%(matchers)s}
+ ) * 100 > 0.5
+ )
+ ||| % $.matchers(),
+ 'A',
+ 'table',
+ 1,
+ true
+ ),
+ $.addTargetSchema(
+ |||
+ ceph_pool_max_avail{%(matchers)s} *
+ on(pool_id) group_left(name) ceph_pool_metadata{%(matchers)s}
+ ||| % $.matchers(),
+ 'B',
+ 'table',
+ 1,
+ true
+ ),
+ $.addTargetSchema(
+ |||
+ (
+ (ceph_pool_compress_under_bytes{%(matchers)s} > 0) /
+ ceph_pool_stored_raw{%(matchers)s}
+ ) * 100
+ ||| % $.matchers(),
+ 'C',
+ 'table',
+ 1,
+ true
+ ),
+ $.addTargetSchema(
+ |||
+ ceph_pool_percent_used{%(matchers)s} *
+ on(pool_id) group_left(name) ceph_pool_metadata{%(matchers)s}
+ ||| % $.matchers(),
+ 'D',
+ 'table',
+ 1,
+ true
+ ),
+ $.addTargetSchema(
+ |||
+ ceph_pool_compress_under_bytes{%(matchers)s} -
+ ceph_pool_compress_bytes_used{%(matchers)s} > 0
+ ||| % $.matchers(),
+ 'E',
+ 'table',
+ 1,
+ true
+ ),
+ $.addTargetSchema(
+ 'delta(ceph_pool_stored{%(matchers)s}[5d])' % $.matchers(), 'F', 'table', 1, true
+ ),
+ $.addTargetSchema(
+ |||
+ rate(ceph_pool_rd{%(matchers)s}[$__rate_interval])
+ + rate(ceph_pool_wr{%(matchers)s}[$__rate_interval])
+ ||| % $.matchers(),
+ 'G',
+ 'table',
+ 1,
+ true
+ ),
+ $.addTargetSchema(
+ |||
+ rate(ceph_pool_rd_bytes{%(matchers)s}[$__rate_interval]) +
+ rate(ceph_pool_wr_bytes{%(matchers)s}[$__rate_interval])
+ ||| % $.matchers(),
+ 'H',
+ 'table',
+ 1,
+ true
+ ),
+ $.addTargetSchema(
+ 'ceph_pool_metadata{%(matchers)s}' % $.matchers(), 'I', 'table', 1, true
+ ),
+ $.addTargetSchema(
+ 'ceph_pool_stored{%(matchers)s} * on(pool_id) group_left ceph_pool_metadata{%(matchers)s}' % $.matchers(),
+ 'J',
+ 'table',
+ 1,
+ true
+ ),
+ $.addTargetSchema(
+ 'ceph_pool_metadata{%(matchers)s, compression_mode!="none"}' % $.matchers(), 'K', 'table', 1, true
+ ),
+ $.addTargetSchema('', 'L', '', '', null),
+ ]
+ ) + { gridPos: { x: 0, y: 3, w: 24, h: 6 } },
+ $.simpleGraphPanel(
+ {},
+ 'Top $topk Client IOPS by Pool',
+ 'This chart shows the sum of read and write IOPS from all clients by pool',
+ 'short',
+ 'IOPS',
+ 0,
+ |||
+ topk($topk,
+ round(
+ (
+ rate(ceph_pool_rd{%(matchers)s}[$__rate_interval]) +
+ rate(ceph_pool_wr{%(matchers)s}[$__rate_interval])
+ ), 1
+ ) * on(pool_id) group_left(instance,name) ceph_pool_metadata{%(matchers)s})
+ ||| % $.matchers(),
+ '{{name}} ',
+ 0,
+ 9,
+ 12,
+ 8
+ )
+ .addTarget(
+ $.addTargetSchema(
+ |||
+ topk($topk,
+ rate(ceph_pool_wr{%(matchers)s}[$__rate_interval]) +
+ on(pool_id) group_left(instance,name) ceph_pool_metadata{%(matchers)s}
+ )
+ ||| % $.matchers(),
+ '{{name}} - write'
+ )
+ ),
+ $.simpleGraphPanel(
+ {},
+ 'Top $topk Client Bandwidth by Pool',
+ 'The chart shows the sum of read and write bytes from all clients, by pool',
+ 'Bps',
+ 'Throughput',
+ 0,
+ |||
+ topk($topk,
+ (
+ rate(ceph_pool_rd_bytes{%(matchers)s}[$__rate_interval]) +
+ rate(ceph_pool_wr_bytes{%(matchers)s}[$__rate_interval])
+ ) * on(pool_id) group_left(instance, name) ceph_pool_metadata{%(matchers)s}
+ )
+ ||| % $.matchers(),
+ '{{name}}',
+ 12,
+ 9,
+ 12,
+ 8
+ ),
+ $.simpleGraphPanel(
+ {},
+ 'Pool Capacity Usage (RAW)',
+ 'Historical view of capacity usage, to help identify growth and trends in pool consumption',
+ 'bytes',
+ 'Capacity Used',
+ 0,
+ 'ceph_pool_bytes_used{%(matchers)s} * on(pool_id) group_right ceph_pool_metadata{%(matchers)s}' % $.matchers(),
+ '{{name}}',
+ 0,
+ 17,
+ 24,
+ 7
+ ),
+ ]),
+ 'pool-detail.json':
+ $.dashboardSchema(
+ 'Ceph Pool Details',
+ '',
+ '-xyV8KCiz',
+ 'now-1h',
+ '30s',
+ 22,
+ $._config.dashboardTags,
+ ''
+ )
+ .addRequired(
+ type='grafana', id='grafana', name='Grafana', version='5.3.2'
+ )
+ .addRequired(
+ type='panel', id='graph', name='Graph', version='5.0.0'
+ )
+ .addRequired(
+ type='panel', id='singlestat', name='Singlestat', version='5.0.0'
+ )
+ .addAnnotation(
+ $.addAnnotationSchema(
+ 1,
+ '-- Grafana --',
+ true,
+ true,
+ 'rgba(0, 211, 255, 1)',
+ 'Annotations & Alerts',
+ 'dashboard'
+ )
+ )
+ .addTemplate(
+ g.template.datasource('datasource', 'prometheus', 'default', label='Data Source')
+ )
+ .addTemplate(
+ $.addClusterTemplate()
+ )
+ .addTemplate(
+ $.addJobTemplate()
+ )
+ .addTemplate(
+ $.addTemplateSchema('pool_name',
+ '$datasource',
+ 'label_values(ceph_pool_metadata{%(matchers)s}, name)' % $.matchers(),
+ 1,
+ false,
+ 1,
+ 'Pool Name',
+ '')
+ )
+ .addPanels([
+ $.gaugeSingleStatPanel(
+ 'percentunit',
+ 'Capacity used',
+ '',
+ 'current',
+ true,
+ 1,
+ true,
+ true,
+ '.7,.8',
+ |||
+ (ceph_pool_stored{%(matchers)s} / (ceph_pool_stored{%(matchers)s} + ceph_pool_max_avail{%(matchers)s})) *
+ on(pool_id) group_left(instance, name) ceph_pool_metadata{%(matchers)s, name=~"$pool_name"}
+ ||| % $.matchers(),
+ 'time_series',
+ 0,
+ 0,
+ 7,
+ 7
+ ),
+ $.gaugeSingleStatPanel(
+ 's',
+ 'Time till full',
+ 'Time till pool is full assuming the average fill rate of the last 6 hours',
+ false,
+ 100,
+ false,
+ false,
+ '',
+ 'current',
+ |||
+ (ceph_pool_max_avail{%(matchers)s} / deriv(ceph_pool_stored{%(matchers)s}[6h])) *
+ on(pool_id) group_left(instance, name) ceph_pool_metadata{%(matchers)s, name=~"$pool_name"} > 0
+ ||| % $.matchers(),
+ 'time_series',
+ 7,
+ 0,
+ 5,
+ 7
+ ),
+ $.simpleGraphPanel(
+ {
+ read_op_per_sec:
+ '#3F6833',
+ write_op_per_sec: '#E5AC0E',
+ },
+ '$pool_name Object Ingress/Egress',
+ '',
+ 'ops',
+ 'Objects out(-) / in(+) ',
+ null,
+ |||
+ deriv(ceph_pool_objects{%(matchers)s}[1m]) *
+ on(pool_id) group_left(instance, name) ceph_pool_metadata{%(matchers)s, name=~"$pool_name"}
+ ||| % $.matchers(),
+ 'Objects per second',
+ 12,
+ 0,
+ 12,
+ 7
+ ),
+ $.simpleGraphPanel(
+ {
+ read_op_per_sec: '#3F6833',
+ write_op_per_sec: '#E5AC0E',
+ },
+ '$pool_name Client IOPS',
+ '',
+ 'iops',
+ 'Read (-) / Write (+)',
+ null,
+ |||
+ rate(ceph_pool_rd{%(matchers)s}[$__rate_interval]) *
+ on(pool_id) group_left(instance,name) ceph_pool_metadata{%(matchers)s, name=~"$pool_name"}
+ ||| % $.matchers(),
+ 'reads',
+ 0,
+ 7,
+ 12,
+ 7
+ )
+ .addSeriesOverride({ alias: 'reads', transform: 'negative-Y' })
+ .addTarget(
+ $.addTargetSchema(
+ |||
+ rate(ceph_pool_wr{%(matchers)s}[$__rate_interval]) *
+ on(pool_id) group_left(instance, name) ceph_pool_metadata{%(matchers)s, name=~"$pool_name"}
+ ||| % $.matchers(),
+ 'writes'
+ )
+ ),
+ $.simpleGraphPanel(
+ {
+ read_op_per_sec: '#3F6833',
+ write_op_per_sec: '#E5AC0E',
+ },
+ '$pool_name Client Throughput',
+ '',
+ 'Bps',
+ 'Read (-) / Write (+)',
+ null,
+ |||
+ rate(ceph_pool_rd_bytes{%(matchers)s}[$__rate_interval]) +
+ on(pool_id) group_left(instance, name) ceph_pool_metadata{%(matchers)s, name=~"$pool_name"}
+ ||| % $.matchers(),
+ 'reads',
+ 12,
+ 7,
+ 12,
+ 7
+ )
+ .addSeriesOverride({ alias: 'reads', transform: 'negative-Y' })
+ .addTarget(
+ $.addTargetSchema(
+ |||
+ rate(ceph_pool_wr_bytes{%(matchers)s}[$__rate_interval]) +
+ on(pool_id) group_left(instance,name) ceph_pool_metadata{%(matchers)s, name=~"$pool_name"}
+ ||| % $.matchers(),
+ 'writes'
+ )
+ ),
+ $.simpleGraphPanel(
+ {
+ read_op_per_sec: '#3F6833',
+ write_op_per_sec: '#E5AC0E',
+ },
+ '$pool_name Objects',
+ '',
+ 'short',
+ 'Objects',
+ null,
+ |||
+ ceph_pool_objects{%(matchers)s} *
+ on(pool_id) group_left(instance,name) ceph_pool_metadata{%(matchers)s, name=~"$pool_name"}
+ ||| % $.matchers(),
+ 'Number of Objects',
+ 0,
+ 14,
+ 12,
+ 7
+ ),
+ ]),
+}
diff --git a/atmosphere/jsonnet/vendor/github.com/ceph/ceph/monitoring/ceph-mixin/dashboards/rbd.libsonnet b/atmosphere/jsonnet/vendor/github.com/ceph/ceph/monitoring/ceph-mixin/dashboards/rbd.libsonnet
new file mode 100644
index 0000000..0eca5a8
--- /dev/null
+++ b/atmosphere/jsonnet/vendor/github.com/ceph/ceph/monitoring/ceph-mixin/dashboards/rbd.libsonnet
@@ -0,0 +1,337 @@
+local g = import 'grafonnet/grafana.libsonnet';
+local u = import 'utils.libsonnet';
+
+(import 'utils.libsonnet') {
+ 'rbd-details.json':
+ local RbdDetailsPanel(title, formatY1, expr1, expr2, x, y, w, h) =
+ $.graphPanelSchema({},
+ title,
+ '',
+ 'null as zero',
+ false,
+ formatY1,
+ formatY1,
+ null,
+ null,
+ 0,
+ 1,
+ '$datasource')
+ .addTargets(
+ [
+ $.addTargetSchema(expr1,
+ '{{pool}} Write'),
+ $.addTargetSchema(expr2, '{{pool}} Read'),
+ ]
+ ) + { gridPos: { x: x, y: y, w: w, h: h } };
+
+ $.dashboardSchema(
+ 'RBD Details',
+ 'Detailed Performance of RBD Images (IOPS/Throughput/Latency)',
+ 'YhCYGcuZz',
+ 'now-1h',
+ '30s',
+ 16,
+ $._config.dashboardTags,
+ ''
+ )
+ .addAnnotation(
+ $.addAnnotationSchema(
+ 1,
+ '-- Grafana --',
+ true,
+ true,
+ 'rgba(0, 211, 255, 1)',
+ 'Annotations & Alerts',
+ 'dashboard'
+ )
+ )
+ .addRequired(
+ type='grafana', id='grafana', name='Grafana', version='5.3.3'
+ )
+ .addRequired(
+ type='panel', id='graph', name='Graph', version='5.0.0'
+ )
+ .addTemplate(
+ g.template.datasource('datasource', 'prometheus', 'default', label='Data Source')
+ )
+ .addTemplate(
+ $.addClusterTemplate()
+ )
+ .addTemplate(
+ $.addJobTemplate()
+ )
+ .addTemplate(
+ $.addTemplateSchema('pool',
+ '$datasource',
+ 'label_values(pool)',
+ 1,
+ false,
+ 0,
+ '',
+ '')
+ )
+ .addTemplate(
+ $.addTemplateSchema('image',
+ '$datasource',
+ 'label_values(image)',
+ 1,
+ false,
+ 0,
+ '',
+ '')
+ )
+ .addPanels([
+ RbdDetailsPanel(
+ 'IOPS',
+ 'iops',
+ 'rate(ceph_rbd_write_ops{%(matchers)s, pool="$pool", image="$image"}[$__rate_interval])' % $.matchers()
+ ,
+ 'rate(ceph_rbd_read_ops{%(matchers)s, pool="$pool", image="$image"}[$__rate_interval])' % $.matchers(),
+ 0,
+ 0,
+ 8,
+ 9
+ ),
+ RbdDetailsPanel(
+ 'Throughput',
+ 'Bps',
+ 'rate(ceph_rbd_write_bytes{%(matchers)s, pool="$pool", image="$image"}[$__rate_interval])' % $.matchers(),
+ 'rate(ceph_rbd_read_bytes{%(matchers)s, pool="$pool", image="$image"}[$__rate_interval])' % $.matchers(),
+ 8,
+ 0,
+ 8,
+ 9
+ ),
+ RbdDetailsPanel(
+ 'Average Latency',
+ 'ns',
+ |||
+ rate(ceph_rbd_write_latency_sum{%(matchers)s, pool="$pool", image="$image"}[$__rate_interval]) /
+ rate(ceph_rbd_write_latency_count{%(matchers)s, pool="$pool", image="$image"}[$__rate_interval])
+ ||| % $.matchers(),
+ |||
+ rate(ceph_rbd_read_latency_sum{%(matchers)s, pool="$pool", image="$image"}[$__rate_interval]) /
+ rate(ceph_rbd_read_latency_count{%(matchers)s, pool="$pool", image="$image"}[$__rate_interval])
+ ||| % $.matchers(),
+ 16,
+ 0,
+ 8,
+ 9
+ ),
+ ]),
+ 'rbd-overview.json':
+ local RbdOverviewPanel(title,
+ formatY1,
+ expr1,
+ expr2,
+ legendFormat1,
+ legendFormat2,
+ x,
+ y,
+ w,
+ h) =
+ $.graphPanelSchema({},
+ title,
+ '',
+ 'null',
+ false,
+ formatY1,
+ 'short',
+ null,
+ null,
+ 0,
+ 1,
+ '$datasource')
+ .addTargets(
+ [
+ $.addTargetSchema(expr1,
+ legendFormat1),
+ $.addTargetSchema(expr2,
+ legendFormat2),
+ ]
+ ) + { gridPos: { x: x, y: y, w: w, h: h } };
+
+ $.dashboardSchema(
+ 'RBD Overview',
+ '',
+ '41FrpeUiz',
+ 'now-1h',
+ '30s',
+ 16,
+ $._config.dashboardTags + ['overview'],
+ ''
+ )
+ .addAnnotation(
+ $.addAnnotationSchema(
+ 1,
+ '-- Grafana --',
+ true,
+ true,
+ 'rgba(0, 211, 255, 1)',
+ 'Annotations & Alerts',
+ 'dashboard'
+ )
+ )
+ .addRequired(
+ type='grafana', id='grafana', name='Grafana', version='5.4.2'
+ )
+ .addRequired(
+ type='panel', id='graph', name='Graph', version='5.0.0'
+ )
+ .addRequired(
+ type='datasource', id='prometheus', name='Prometheus', version='5.0.0'
+ )
+ .addRequired(
+ type='panel', id='table', name='Table', version='5.0.0'
+ )
+ .addTemplate(
+ g.template.datasource('datasource', 'prometheus', 'default', label='Data Source')
+ )
+ .addTemplate(
+ $.addClusterTemplate()
+ )
+ .addTemplate(
+ $.addJobTemplate()
+ )
+ .addPanels([
+ RbdOverviewPanel(
+ 'IOPS',
+ 'short',
+ 'round(sum(rate(ceph_rbd_write_ops{%(matchers)s}[$__rate_interval])))' % $.matchers(),
+ 'round(sum(rate(ceph_rbd_read_ops{%(matchers)s}[$__rate_interval])))' % $.matchers(),
+ 'Writes',
+ 'Reads',
+ 0,
+ 0,
+ 8,
+ 7
+ ),
+ RbdOverviewPanel(
+ 'Throughput',
+ 'Bps',
+ 'round(sum(rate(ceph_rbd_write_bytes{%(matchers)s}[$__rate_interval])))' % $.matchers(),
+ 'round(sum(rate(ceph_rbd_read_bytes{%(matchers)s}[$__rate_interval])))' % $.matchers(),
+ 'Write',
+ 'Read',
+ 8,
+ 0,
+ 8,
+ 7
+ ),
+ RbdOverviewPanel(
+ 'Average Latency',
+ 'ns',
+ |||
+ round(
+ sum(rate(ceph_rbd_write_latency_sum{%(matchers)s}[$__rate_interval])) /
+ sum(rate(ceph_rbd_write_latency_count{%(matchers)s}[$__rate_interval]))
+ )
+ ||| % $.matchers(),
+ |||
+ round(
+ sum(rate(ceph_rbd_read_latency_sum{%(matchers)s}[$__rate_interval])) /
+ sum(rate(ceph_rbd_read_latency_count{%(matchers)s}[$__rate_interval]))
+ )
+ ||| % $.matchers(),
+ 'Write',
+ 'Read',
+ 16,
+ 0,
+ 8,
+ 7
+ ),
+ $.addTableSchema(
+ '$datasource',
+ '',
+ { col: 3, desc: true },
+ [
+ $.overviewStyle('Pool', 'pool', 'string', 'short'),
+ $.overviewStyle('Image', 'image', 'string', 'short'),
+ $.overviewStyle('IOPS', 'Value', 'number', 'iops'),
+ $.overviewStyle('', '/.*/', 'hidden', 'short'),
+ ],
+ 'Highest IOPS',
+ 'table'
+ )
+ .addTarget(
+ $.addTargetSchema(
+ |||
+ topk(10,
+ (
+ sort((
+ rate(ceph_rbd_write_ops{%(matchers)s}[$__rate_interval]) +
+ on (image, pool, namespace) rate(ceph_rbd_read_ops{%(matchers)s}[$__rate_interval])
+ ))
+ )
+ )
+ ||| % $.matchers(),
+ '',
+ 'table',
+ 1,
+ true
+ )
+ ) + { gridPos: { x: 0, y: 7, w: 8, h: 7 } },
+ $.addTableSchema(
+ '$datasource',
+ '',
+ { col: 3, desc: true },
+ [
+ $.overviewStyle('Pool', 'pool', 'string', 'short'),
+ $.overviewStyle('Image', 'image', 'string', 'short'),
+ $.overviewStyle('Throughput', 'Value', 'number', 'Bps'),
+ $.overviewStyle('', '/.*/', 'hidden', 'short'),
+ ],
+ 'Highest Throughput',
+ 'table'
+ )
+ .addTarget(
+ $.addTargetSchema(
+ |||
+ topk(10,
+ sort(
+ sum(
+ rate(ceph_rbd_read_bytes{%(matchers)s}[$__rate_interval]) +
+ rate(ceph_rbd_write_bytes{%(matchers)s}[$__rate_interval])
+ ) by (pool, image, namespace)
+ )
+ )
+ ||| % $.matchers(),
+ '',
+ 'table',
+ 1,
+ true
+ )
+ ) + { gridPos: { x: 8, y: 7, w: 8, h: 7 } },
+ $.addTableSchema(
+ '$datasource',
+ '',
+ { col: 3, desc: true },
+ [
+ $.overviewStyle('Pool', 'pool', 'string', 'short'),
+ $.overviewStyle('Image', 'image', 'string', 'short'),
+ $.overviewStyle('Latency', 'Value', 'number', 'ns'),
+ $.overviewStyle('', '/.*/', 'hidden', 'short'),
+ ],
+ 'Highest Latency',
+ 'table'
+ )
+ .addTarget(
+ $.addTargetSchema(
+ |||
+ topk(10,
+ sum(
+ rate(ceph_rbd_write_latency_sum{%(matchers)s}[$__rate_interval]) /
+ clamp_min(rate(ceph_rbd_write_latency_count{%(matchers)s}[$__rate_interval]), 1) +
+ rate(ceph_rbd_read_latency_sum{%(matchers)s}[$__rate_interval]) /
+ clamp_min(rate(ceph_rbd_read_latency_count{%(matchers)s}[$__rate_interval]), 1)
+ ) by (pool, image, namespace)
+ )
+ ||| % $.matchers(),
+ '',
+ 'table',
+ 1,
+ true
+ )
+ ) + { gridPos: { x: 16, y: 7, w: 8, h: 7 } },
+ ]),
+}
diff --git a/atmosphere/jsonnet/vendor/github.com/ceph/ceph/monitoring/ceph-mixin/dashboards/rgw.libsonnet b/atmosphere/jsonnet/vendor/github.com/ceph/ceph/monitoring/ceph-mixin/dashboards/rgw.libsonnet
new file mode 100644
index 0000000..892480d
--- /dev/null
+++ b/atmosphere/jsonnet/vendor/github.com/ceph/ceph/monitoring/ceph-mixin/dashboards/rgw.libsonnet
@@ -0,0 +1,872 @@
+local g = import 'grafonnet/grafana.libsonnet';
+local u = import 'utils.libsonnet';
+
+(import 'utils.libsonnet') {
+ 'radosgw-sync-overview.json':
+ local RgwSyncOverviewPanel(title, formatY1, labelY1, rgwMetric, x, y, w, h) =
+ $.graphPanelSchema({},
+ title,
+ '',
+ 'null as zero',
+ true,
+ formatY1,
+ 'short',
+ labelY1,
+ null,
+ 0,
+ 1,
+ '$datasource')
+ .addTargets(
+ [
+ $.addTargetSchema(
+ 'sum by (source_zone) (rate(%(rgwMetric)s{%(matchers)s}[$__rate_interval]))'
+ % ($.matchers() + { rgwMetric: rgwMetric }),
+ '{{source_zone}}'
+ ),
+ ]
+ ) + { gridPos: { x: x, y: y, w: w, h: h } };
+
+ $.dashboardSchema(
+ 'RGW Sync Overview',
+ '',
+ 'rgw-sync-overview',
+ 'now-1h',
+ '30s',
+ 16,
+ $._config.dashboardTags + ['overview'],
+ ''
+ )
+ .addAnnotation(
+ $.addAnnotationSchema(
+ 1,
+ '-- Grafana --',
+ true,
+ true,
+ 'rgba(0, 211, 255, 1)',
+ 'Annotations & Alerts',
+ 'dashboard'
+ )
+ )
+ .addRequired(
+ type='grafana', id='grafana', name='Grafana', version='5.0.0'
+ )
+ .addRequired(
+ type='panel', id='graph', name='Graph', version='5.0.0'
+ )
+ .addTemplate(
+ g.template.datasource('datasource', 'prometheus', 'default', label='Data Source')
+ )
+ .addTemplate(
+ $.addClusterTemplate()
+ )
+ .addTemplate(
+ $.addJobTemplate()
+ )
+ .addTemplate(
+ $.addTemplateSchema(
+ 'rgw_servers',
+ '$datasource',
+ 'label_values(ceph_rgw_metadata{%(matchers)s}, ceph_daemon)' % $.matchers(),
+ 1,
+ true,
+ 1,
+ '',
+ 'RGW Server'
+ )
+ )
+ .addPanels([
+ RgwSyncOverviewPanel(
+ 'Replication (throughput) from Source Zone',
+ 'Bps',
+ null,
+ 'ceph_data_sync_from_zone_fetch_bytes_sum',
+ 0,
+ 0,
+ 8,
+ 7
+ ),
+ RgwSyncOverviewPanel(
+ 'Replication (objects) from Source Zone',
+ 'short',
+ 'Objects/s',
+ 'ceph_data_sync_from_zone_fetch_bytes_count',
+ 8,
+ 0,
+ 8,
+ 7
+ ),
+ RgwSyncOverviewPanel(
+ 'Polling Request Latency from Source Zone',
+ 'ms',
+ null,
+ 'ceph_data_sync_from_zone_poll_latency_sum',
+ 16,
+ 0,
+ 8,
+ 7
+ ),
+ RgwSyncOverviewPanel(
+ 'Unsuccessful Object Replications from Source Zone',
+ 'short',
+ 'Count/s',
+ 'ceph_data_sync_from_zone_fetch_errors',
+ 0,
+ 7,
+ 8,
+ 7
+ ),
+ ]),
+ 'radosgw-overview.json':
+ local RgwOverviewPanel(
+ title,
+ description,
+ formatY1,
+ formatY2,
+ expr1,
+ legendFormat1,
+ x,
+ y,
+ w,
+ h,
+ datasource='$datasource',
+ legend_alignAsTable=false,
+ legend_avg=false,
+ legend_min=false,
+ legend_max=false,
+ legend_current=false,
+ legend_values=false
+ ) =
+ $.graphPanelSchema(
+ {},
+ title,
+ description,
+ 'null',
+ false,
+ formatY1,
+ formatY2,
+ null,
+ null,
+ 0,
+ 1,
+ datasource,
+ legend_alignAsTable,
+ legend_avg,
+ legend_min,
+ legend_max,
+ legend_current,
+ legend_values
+ )
+ .addTargets(
+ [$.addTargetSchema(expr1, legendFormat1)]
+ ) + { gridPos: { x: x, y: y, w: w, h: h } };
+
+ $.dashboardSchema(
+ 'RGW Overview',
+ '',
+ 'WAkugZpiz',
+ 'now-1h',
+ '30s',
+ 16,
+ $._config.dashboardTags + ['overview'],
+ ''
+ )
+ .addAnnotation(
+ $.addAnnotationSchema(
+ 1,
+ '-- Grafana --',
+ true,
+ true,
+ 'rgba(0, 211, 255, 1)',
+ 'Annotations & Alerts',
+ 'dashboard'
+ )
+ )
+ .addRequired(
+ type='grafana', id='grafana', name='Grafana', version='5.0.0'
+ )
+ .addRequired(
+ type='panel', id='graph', name='Graph', version='5.0.0'
+ )
+ .addTemplate(
+ g.template.datasource('datasource',
+ 'prometheus',
+ 'default',
+ label='Data Source')
+ )
+ .addTemplate(
+ $.addClusterTemplate()
+ )
+ .addTemplate(
+ $.addJobTemplate()
+ )
+ .addTemplate(
+ $.addTemplateSchema(
+ 'rgw_servers',
+ '$datasource',
+ 'label_values(ceph_rgw_metadata{%(matchers)s}, ceph_daemon)' % $.matchers(),
+ 1,
+ true,
+ 1,
+ '',
+ 'RGW Server'
+ )
+ )
+ .addTemplate(
+ $.addTemplateSchema(
+ 'code',
+ '$datasource',
+ 'label_values(haproxy_server_http_responses_total{job=~"$job_haproxy", instance=~"$ingress_service"}, code)',
+ 1,
+ true,
+ 1,
+ 'HTTP Code',
+ ''
+ )
+ )
+ .addTemplate(
+ $.addTemplateSchema(
+ 'job_haproxy',
+ '$datasource',
+ 'label_values(haproxy_server_status, job)',
+ 1,
+ true,
+ 1,
+ 'job haproxy',
+ '(.*)',
+ multi=true,
+ allValues='.+',
+ ),
+ )
+ .addTemplate(
+ $.addTemplateSchema(
+ 'ingress_service',
+ '$datasource',
+ 'label_values(haproxy_server_status{job=~"$job_haproxy"}, instance)',
+ 1,
+ true,
+ 1,
+ 'Ingress Service',
+ ''
+ )
+ )
+ .addPanels([
+ $.addRowSchema(false,
+ true,
+ 'RGW Overview - All Gateways') +
+ {
+ gridPos: { x: 0, y: 0, w: 24, h: 1 },
+ },
+ RgwOverviewPanel(
+ 'Average GET/PUT Latencies by RGW Instance',
+ '',
+ 's',
+ 'short',
+ |||
+ label_replace(
+ rate(ceph_rgw_get_initial_lat_sum{%(matchers)s}[$__rate_interval]) /
+ rate(ceph_rgw_get_initial_lat_count{%(matchers)s}[$__rate_interval]) *
+ on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{%(matchers)s},
+ "rgw_host", "$1", "ceph_daemon", "rgw.(.*)"
+ )
+ ||| % $.matchers(),
+ 'GET {{rgw_host}}',
+ 0,
+ 1,
+ 8,
+ 7
+ ).addTargets(
+ [
+ $.addTargetSchema(
+ |||
+ label_replace(
+ rate(ceph_rgw_put_initial_lat_sum{%(matchers)s}[$__rate_interval]) /
+ rate(ceph_rgw_put_initial_lat_count{%(matchers)s}[$__rate_interval]) *
+ on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{%(matchers)s},
+ "rgw_host", "$1", "ceph_daemon", "rgw.(.*)"
+ )
+ ||| % $.matchers(),
+ 'PUT {{rgw_host}}'
+ ),
+ ]
+ ),
+ RgwOverviewPanel(
+ 'Total Requests/sec by RGW Instance',
+ '',
+ 'none',
+ 'short',
+ |||
+ sum by (rgw_host) (
+ label_replace(
+ rate(ceph_rgw_req{%(matchers)s}[$__rate_interval]) *
+ on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{%(matchers)s},
+ "rgw_host", "$1", "ceph_daemon", "rgw.(.*)"
+ )
+ )
+ ||| % $.matchers(),
+ '{{rgw_host}}',
+ 8,
+ 1,
+ 7,
+ 7
+ ),
+ RgwOverviewPanel(
+ 'GET Latencies by RGW Instance',
+ 'Latencies are shown stacked, without a yaxis to provide a visual indication of GET latency imbalance across RGW hosts',
+ 's',
+ 'short',
+ |||
+ label_replace(
+ rate(ceph_rgw_get_initial_lat_sum{%(matchers)s}[$__rate_interval]) /
+ rate(ceph_rgw_get_initial_lat_count{%(matchers)s}[$__rate_interval]) *
+ on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{%(matchers)s},
+ "rgw_host", "$1", "ceph_daemon", "rgw.(.*)"
+ )
+ ||| % $.matchers(),
+ '{{rgw_host}}',
+ 15,
+ 1,
+ 6,
+ 7
+ ),
+ RgwOverviewPanel(
+ 'Bandwidth Consumed by Type',
+ 'Total bytes transferred in/out of all radosgw instances within the cluster',
+ 'bytes',
+ 'short',
+ 'sum(rate(ceph_rgw_get_b{%(matchers)s}[$__rate_interval]))' % $.matchers(),
+ 'GETs',
+ 0,
+ 8,
+ 8,
+ 6
+ ).addTargets(
+ [$.addTargetSchema('sum(rate(ceph_rgw_put_b{%(matchers)s}[$__rate_interval]))' % $.matchers(),
+ 'PUTs')]
+ ),
+ RgwOverviewPanel(
+ 'Bandwidth by RGW Instance',
+ 'Total bytes transferred in/out through get/put operations, by radosgw instance',
+ 'bytes',
+ 'short',
+ |||
+ label_replace(sum by (instance_id) (
+ rate(ceph_rgw_get_b{%(matchers)s}[$__rate_interval]) +
+ rate(ceph_rgw_put_b{%(matchers)s}[$__rate_interval])) *
+ on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{%(matchers)s},
+ "rgw_host", "$1", "ceph_daemon", "rgw.(.*)"
+ )
+ ||| % $.matchers(),
+ '{{rgw_host}}',
+ 8,
+ 8,
+ 7,
+ 6
+ ),
+ RgwOverviewPanel(
+ 'PUT Latencies by RGW Instance',
+ 'Latencies are shown stacked, without a yaxis to provide a visual indication of PUT latency imbalance across RGW hosts',
+ 's',
+ 'short',
+ |||
+ label_replace(
+ rate(ceph_rgw_put_initial_lat_sum{%(matchers)s}[$__rate_interval]) /
+ rate(ceph_rgw_put_initial_lat_count{%(matchers)s}[$__rate_interval]) *
+ on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{%(matchers)s},
+ "rgw_host", "$1", "ceph_daemon", "rgw.(.*)"
+ )
+ ||| % $.matchers(),
+ '{{rgw_host}}',
+ 15,
+ 8,
+ 6,
+ 6
+ ),
+ $.addRowSchema(
+ false, true, 'RGW Overview - HAProxy Metrics'
+ ) + { gridPos: { x: 0, y: 12, w: 9, h: 12 } },
+ RgwOverviewPanel(
+ 'Total responses by HTTP code',
+ '',
+ 'short',
+ 'short',
+ |||
+ sum(
+ rate(
+ haproxy_frontend_http_responses_total{code=~"$code", job=~"$job_haproxy", instance=~"$ingress_service", proxy=~"frontend"}[$__rate_interval]
+ )
+ ) by (code)
+ |||,
+ 'Frontend {{ code }}',
+ 0,
+ 12,
+ 5,
+ 12,
+ '$datasource',
+ true,
+ true,
+ true,
+ true,
+ true,
+ true
+ )
+ .addTargets(
+ [
+ $.addTargetSchema(
+ |||
+ sum(
+ rate(
+ haproxy_backend_http_responses_total{code=~"$code", job=~"$job_haproxy", instance=~"$ingress_service", proxy=~"backend"}[$__rate_interval]
+ )
+ ) by (code)
+ |||, 'Backend {{ code }}'
+ ),
+ ]
+ )
+ .addSeriesOverride([
+ {
+ alias: '/.*Back.*/',
+ transform: 'negative-Y',
+ },
+ { alias: '/.*1.*/' },
+ { alias: '/.*2.*/' },
+ { alias: '/.*3.*/' },
+ { alias: '/.*4.*/' },
+ { alias: '/.*5.*/' },
+ { alias: '/.*other.*/' },
+ ]),
+ RgwOverviewPanel(
+ 'Total requests / responses',
+ '',
+ 'short',
+ 'short',
+ |||
+ sum(
+ rate(
+ haproxy_frontend_http_requests_total{proxy=~"frontend", job=~"$job_haproxy", instance=~"$ingress_service"}[$__rate_interval]
+ )
+ ) by (instance)
+ |||,
+ 'Requests',
+ 5,
+ 12,
+ 5,
+ 12,
+ '$datasource',
+ true,
+ true,
+ true,
+ true,
+ true,
+ true
+ )
+ .addTargets(
+ [
+ $.addTargetSchema(
+ |||
+ sum(
+ rate(
+ haproxy_backend_response_errors_total{proxy=~"backend", job=~"$job_haproxy", instance=~"$ingress_service"}[$__rate_interval]
+ )
+ ) by (instance)
+ |||, 'Response errors', 'time_series', 2
+ ),
+ $.addTargetSchema(
+ |||
+ sum(
+ rate(
+ haproxy_frontend_request_errors_total{proxy=~"frontend", job=~"$job_haproxy", instance=~"$ingress_service"}[$__rate_interval]
+ )
+ ) by (instance)
+ |||, 'Requests errors'
+ ),
+ $.addTargetSchema(
+ |||
+ sum(
+ rate(
+ haproxy_backend_redispatch_warnings_total{proxy=~"backend", job=~"$job_haproxy", instance=~"$ingress_service"}[$__rate_interval]
+ )
+ ) by (instance)
+ |||, 'Backend redispatch', 'time_series', 2
+ ),
+ $.addTargetSchema(
+ |||
+ sum(
+ rate(
+ haproxy_backend_retry_warnings_total{proxy=~"backend", job=~"$job_haproxy", instance=~"$ingress_service"}[$__rate_interval]
+ )
+ ) by (instance)
+ |||, 'Backend retry', 'time_series', 2
+ ),
+ $.addTargetSchema(
+ |||
+ sum(
+ rate(
+ haproxy_frontend_requests_denied_total{proxy=~"frontend", job=~"$job_haproxy", instance=~"$ingress_service"}[$__rate_interval]
+ )
+ ) by (instance)
+ |||, 'Request denied', 'time_series', 2
+ ),
+ $.addTargetSchema(
+ |||
+ sum(
+ haproxy_backend_current_queue{proxy=~"backend", job=~"$job_haproxy", instance=~"$ingress_service"}
+ ) by (instance)
+ |||, 'Backend Queued', 'time_series', 2
+ ),
+ ]
+ )
+ .addSeriesOverride([
+ {
+ alias: '/.*Response.*/',
+ transform: 'negative-Y',
+ },
+ {
+ alias: '/.*Backend.*/',
+ transform: 'negative-Y',
+ },
+ ]),
+ RgwOverviewPanel(
+ 'Total number of connections',
+ '',
+ 'short',
+ 'short',
+ |||
+ sum(
+ rate(
+ haproxy_frontend_connections_total{proxy=~"frontend", job=~"$job_haproxy", instance=~"$ingress_service"}[$__rate_interval]
+ )
+ ) by (instance)
+ |||,
+ 'Front',
+ 10,
+ 12,
+ 5,
+ 12,
+ '$datasource',
+ true,
+ true,
+ true,
+ true,
+ true,
+ true
+ )
+ .addTargets(
+ [
+ $.addTargetSchema(
+ |||
+ sum(
+ rate(
+ haproxy_backend_connection_attempts_total{proxy=~"backend", job=~"$job_haproxy", instance=~"$ingress_service"}[$__rate_interval]
+ )
+ ) by (instance)
+ |||, 'Back'
+ ),
+ $.addTargetSchema(
+ |||
+ sum(
+ rate(
+ haproxy_backend_connection_errors_total{proxy=~"backend", job=~"$job_haproxy", instance=~"$ingress_service"}[$__rate_interval]
+ )
+ ) by (instance)
+ |||, 'Back errors'
+ ),
+ ]
+ )
+ .addSeriesOverride([
+ {
+ alias: '/.*Back.*/',
+ transform: 'negative-Y',
+ },
+ ]),
+ RgwOverviewPanel(
+ 'Current total of incoming / outgoing bytes',
+ '',
+ 'short',
+ 'short',
+ |||
+ sum(
+ rate(
+ haproxy_frontend_bytes_in_total{proxy=~"frontend", job=~"$job_haproxy", instance=~"$ingress_service"}[$__rate_interval]
+ ) * 8
+ ) by (instance)
+ |||,
+ 'IN Front',
+ 15,
+ 12,
+ 6,
+ 12,
+ '$datasource',
+ true,
+ true,
+ true,
+ true,
+ true,
+ true
+ )
+ .addTargets(
+ [
+ $.addTargetSchema(
+ |||
+ sum(
+ rate(
+ haproxy_frontend_bytes_out_total{proxy=~"frontend", job=~"$job_haproxy", instance=~"$ingress_service"}[$__rate_interval]
+ ) * 8
+ ) by (instance)
+ |||, 'OUT Front', 'time_series', 2
+ ),
+ $.addTargetSchema(
+ |||
+ sum(
+ rate(
+ haproxy_backend_bytes_in_total{proxy=~"backend", job=~"$job_haproxy", instance=~"$ingress_service"}[$__rate_interval]
+ ) * 8
+ ) by (instance)
+ |||, 'IN Back', 'time_series', 2
+ ),
+ $.addTargetSchema(
+ |||
+ sum(
+ rate(
+ haproxy_backend_bytes_out_total{proxy=~"backend", job=~"$job_haproxy", instance=~"$ingress_service"}[$__rate_interval]
+ ) * 8
+ ) by (instance)
+ |||, 'OUT Back', 'time_series', 2
+ ),
+ ]
+ )
+ .addSeriesOverride([
+ {
+ alias: '/.*OUT.*/',
+ transform: 'negative-Y',
+ },
+ ]),
+ ]),
+ 'radosgw-detail.json':
+ local RgwDetailsPanel(aliasColors,
+ title,
+ description,
+ formatY1,
+ formatY2,
+ expr1,
+ expr2,
+ legendFormat1,
+ legendFormat2,
+ x,
+ y,
+ w,
+ h) =
+ $.graphPanelSchema(aliasColors,
+ title,
+ description,
+ 'null',
+ false,
+ formatY1,
+ formatY2,
+ null,
+ null,
+ 0,
+ 1,
+ '$datasource')
+ .addTargets(
+ [$.addTargetSchema(expr1, legendFormat1), $.addTargetSchema(expr2, legendFormat2)]
+ ) + { gridPos: { x: x, y: y, w: w, h: h } };
+
+ $.dashboardSchema(
+ 'RGW Instance Detail',
+ '',
+ 'x5ARzZtmk',
+ 'now-1h',
+ '30s',
+ 16,
+ $._config.dashboardTags + ['overview'],
+ ''
+ )
+ .addAnnotation(
+ $.addAnnotationSchema(
+ 1,
+ '-- Grafana --',
+ true,
+ true,
+ 'rgba(0, 211, 255, 1)',
+ 'Annotations & Alerts',
+ 'dashboard'
+ )
+ )
+ .addRequired(
+ type='grafana', id='grafana', name='Grafana', version='5.0.0'
+ )
+ .addRequired(
+ type='panel',
+ id='grafana-piechart-panel',
+ name='Pie Chart',
+ version='1.3.3'
+ )
+ .addRequired(
+ type='panel', id='graph', name='Graph', version='5.0.0'
+ )
+ .addTemplate(
+ g.template.datasource('datasource',
+ 'prometheus',
+ 'default',
+ label='Data Source')
+ )
+ .addTemplate(
+ $.addClusterTemplate()
+ )
+ .addTemplate(
+ $.addJobTemplate()
+ )
+ .addTemplate(
+ $.addTemplateSchema('rgw_servers',
+ '$datasource',
+ 'label_values(ceph_rgw_metadata{%(matchers)s}, ceph_daemon)' % $.matchers(),
+ 1,
+ true,
+ 1,
+ '',
+ '')
+ )
+ .addPanels([
+ $.addRowSchema(false, true, 'RGW Host Detail : $rgw_servers') + { gridPos: { x: 0, y: 0, w: 24, h: 1 } },
+ RgwDetailsPanel(
+ {},
+ '$rgw_servers GET/PUT Latencies',
+ '',
+ 's',
+ 'short',
+ |||
+ sum by (instance_id) (
+ rate(ceph_rgw_get_initial_lat_sum{%(matchers)s}[$__rate_interval]) /
+ rate(ceph_rgw_get_initial_lat_count{%(matchers)s}[$__rate_interval])
+ ) * on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{%(matchers)s, ceph_daemon=~"$rgw_servers"}
+ ||| % $.matchers(),
+ |||
+ sum by (instance_id) (
+ rate(ceph_rgw_put_initial_lat_sum{%(matchers)s}[$__rate_interval]) /
+ rate(ceph_rgw_put_initial_lat_count{%(matchers)s}[$__rate_interval])
+ ) * on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{%(matchers)s, ceph_daemon=~"$rgw_servers"}
+ ||| % $.matchers(),
+ 'GET {{ceph_daemon}}',
+ 'PUT {{ceph_daemon}}',
+ 0,
+ 1,
+ 6,
+ 8
+ ),
+ RgwDetailsPanel(
+ {},
+ 'Bandwidth by HTTP Operation',
+ '',
+ 'bytes',
+ 'short',
+ |||
+ rate(ceph_rgw_get_b{%(matchers)s}[$__rate_interval]) *
+ on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{%(matchers)s, ceph_daemon=~"$rgw_servers"}
+ ||| % $.matchers(),
+ |||
+ rate(ceph_rgw_put_b{%(matchers)s}[$__rate_interval]) *
+ on (instance_id) group_left (ceph_daemon)
+ ceph_rgw_metadata{%(matchers)s, ceph_daemon=~"$rgw_servers"}
+ ||| % $.matchers(),
+ 'GETs {{ceph_daemon}}',
+ 'PUTs {{ceph_daemon}}',
+ 6,
+ 1,
+ 7,
+ 8
+ ),
+ RgwDetailsPanel(
+ {
+ GETs: '#7eb26d',
+ Other: '#447ebc',
+ PUTs: '#eab839',
+ Requests: '#3f2b5b',
+ 'Requests Failed': '#bf1b00',
+ },
+ 'HTTP Request Breakdown',
+ '',
+ 'short',
+ 'short',
+ |||
+ rate(ceph_rgw_failed_req{%(matchers)s}[$__rate_interval]) *
+ on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{%(matchers)s,ceph_daemon=~"$rgw_servers"}
+ ||| % $.matchers(),
+ |||
+ rate(ceph_rgw_get{%(matchers)s}[$__rate_interval]) *
+ on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{%(matchers)s, ceph_daemon=~"$rgw_servers"}
+ ||| % $.matchers(),
+ 'Requests Failed {{ceph_daemon}}',
+ 'GETs {{ceph_daemon}}',
+ 13,
+ 1,
+ 7,
+ 8
+ )
+ .addTargets(
+ [
+ $.addTargetSchema(
+ |||
+ rate(ceph_rgw_put{%(matchers)s}[$__rate_interval]) *
+ on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{%(matchers)s, ceph_daemon=~"$rgw_servers"}
+ ||| % $.matchers(),
+ 'PUTs {{ceph_daemon}}'
+ ),
+ $.addTargetSchema(
+ |||
+ (
+ rate(ceph_rgw_req{%(matchers)s}[$__rate_interval]) -
+ (
+ rate(ceph_rgw_get{%(matchers)s}[$__rate_interval]) +
+ rate(ceph_rgw_put{%(matchers)s}[$__rate_interval])
+ )
+ ) * on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{%(matchers)s, ceph_daemon=~"$rgw_servers"}
+ ||| % $.matchers(),
+ 'Other {{ceph_daemon}}'
+ ),
+ ]
+ ),
+ $.simplePieChart(
+ {
+ GETs: '#7eb26d',
+ 'Other (HEAD,POST,DELETE)': '#447ebc',
+ PUTs: '#eab839',
+ Requests: '#3f2b5b',
+ Failures: '#bf1b00',
+ }, '', 'Workload Breakdown'
+ )
+ .addTarget($.addTargetSchema(
+ |||
+ rate(ceph_rgw_failed_req{%(matchers)s}[$__rate_interval]) *
+ on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{%(matchers)s, ceph_daemon=~"$rgw_servers"}
+ ||| % $.matchers(),
+ 'Failures {{ceph_daemon}}'
+ ))
+ .addTarget($.addTargetSchema(
+ |||
+ rate(ceph_rgw_get{%(matchers)s}[$__rate_interval]) *
+ on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{%(matchers)s, ceph_daemon=~"$rgw_servers"}
+ ||| % $.matchers(),
+ 'GETs {{ceph_daemon}}'
+ ))
+ .addTarget($.addTargetSchema(
+ |||
+ rate(ceph_rgw_put{%(matchers)s}[$__rate_interval]) *
+ on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{%(matchers)s, ceph_daemon=~"$rgw_servers"}
+ ||| % $.matchers(),
+ 'PUTs {{ceph_daemon}}'
+ ))
+ .addTarget($.addTargetSchema(
+ |||
+ (
+ rate(ceph_rgw_req{%(matchers)s}[$__rate_interval]) -
+ (
+ rate(ceph_rgw_get{%(matchers)s}[$__rate_interval]) +
+ rate(ceph_rgw_put{%(matchers)s}[$__rate_interval])
+ )
+ ) * on (instance_id) group_left (ceph_daemon)
+ ceph_rgw_metadata{%(matchers)s, ceph_daemon=~"$rgw_servers"}
+ ||| % $.matchers(),
+ 'Other (DELETE,LIST) {{ceph_daemon}}'
+ )) + { gridPos: { x: 20, y: 1, w: 4, h: 8 } },
+ ]),
+}
diff --git a/atmosphere/jsonnet/vendor/github.com/ceph/ceph/monitoring/ceph-mixin/dashboards/utils.libsonnet b/atmosphere/jsonnet/vendor/github.com/ceph/ceph/monitoring/ceph-mixin/dashboards/utils.libsonnet
new file mode 100644
index 0000000..a7774c7
--- /dev/null
+++ b/atmosphere/jsonnet/vendor/github.com/ceph/ceph/monitoring/ceph-mixin/dashboards/utils.libsonnet
@@ -0,0 +1,333 @@
+local g = import 'grafonnet/grafana.libsonnet';
+
+{
+ _config:: error 'must provide _config',
+
+ dashboardSchema(title,
+ description,
+ uid,
+ time_from,
+ refresh,
+ schemaVersion,
+ tags,
+ timezone)::
+ g.dashboard.new(title=title,
+ description=description,
+ uid=uid,
+ time_from=time_from,
+ refresh=refresh,
+ schemaVersion=schemaVersion,
+ tags=tags,
+ timezone=timezone),
+
+ graphPanelSchema(aliasColors,
+ title,
+ description,
+ nullPointMode,
+ stack,
+ formatY1,
+ formatY2,
+ labelY1,
+ labelY2,
+ min,
+ fill,
+ datasource,
+ legend_alignAsTable=false,
+ legend_avg=false,
+ legend_min=false,
+ legend_max=false,
+ legend_current=false,
+ legend_values=false)::
+ g.graphPanel.new(aliasColors=aliasColors,
+ title=title,
+ description=description,
+ nullPointMode=nullPointMode,
+ stack=stack,
+ formatY1=formatY1,
+ formatY2=formatY2,
+ labelY1=labelY1,
+ labelY2=labelY2,
+ min=min,
+ fill=fill,
+ datasource=datasource,
+ legend_alignAsTable=legend_alignAsTable,
+ legend_avg=legend_avg,
+ legend_min=legend_min,
+ legend_max=legend_max,
+ legend_current=legend_current,
+ legend_values=legend_values),
+
+
+ addTargetSchema(expr, legendFormat='', format='time_series', intervalFactor=1, instant=null)::
+ g.prometheus.target(expr=expr,
+ legendFormat=legendFormat,
+ format=format,
+ intervalFactor=intervalFactor,
+ instant=instant),
+
+ addTemplateSchema(name,
+ datasource,
+ query,
+ refresh,
+ includeAll,
+ sort,
+ label,
+ regex,
+ hide='',
+ multi=false,
+ allValues=null)::
+ g.template.new(name=name,
+ datasource=datasource,
+ query=query,
+ refresh=refresh,
+ includeAll=includeAll,
+ sort=sort,
+ label=label,
+ regex=regex,
+ hide=hide,
+ multi=multi,
+ allValues=allValues),
+
+ addAnnotationSchema(builtIn,
+ datasource,
+ enable,
+ hide,
+ iconColor,
+ name,
+ type)::
+ g.annotation.datasource(builtIn=builtIn,
+ datasource=datasource,
+ enable=enable,
+ hide=hide,
+ iconColor=iconColor,
+ name=name,
+ type=type),
+
+ addRowSchema(collapse, showTitle, title)::
+ g.row.new(collapse=collapse, showTitle=showTitle, title=title),
+
+ addSingleStatSchema(colors,
+ datasource,
+ format,
+ title,
+ description,
+ valueName,
+ colorValue,
+ gaugeMaxValue,
+ gaugeShow,
+ sparklineShow,
+ thresholds)::
+ g.singlestat.new(colors=colors,
+ datasource=datasource,
+ format=format,
+ title=title,
+ description=description,
+ valueName=valueName,
+ colorValue=colorValue,
+ gaugeMaxValue=gaugeMaxValue,
+ gaugeShow=gaugeShow,
+ sparklineShow=sparklineShow,
+ thresholds=thresholds),
+
+ addPieChartSchema(aliasColors,
+ datasource,
+ description,
+ legendType,
+ pieType,
+ title,
+ valueName)::
+ g.pieChartPanel.new(aliasColors=aliasColors,
+ datasource=datasource,
+ description=description,
+ legendType=legendType,
+ pieType=pieType,
+ title=title,
+ valueName=valueName),
+
+ addTableSchema(datasource, description, sort, styles, title, transform)::
+ g.tablePanel.new(datasource=datasource,
+ description=description,
+ sort=sort,
+ styles=styles,
+ title=title,
+ transform=transform),
+
+ addStyle(alias,
+ colorMode,
+ colors,
+ dateFormat,
+ decimals,
+ mappingType,
+ pattern,
+ thresholds,
+ type,
+ unit,
+ valueMaps)::
+ {
+ alias: alias,
+ colorMode: colorMode,
+ colors: colors,
+ dateFormat: dateFormat,
+ decimals: decimals,
+ mappingType: mappingType,
+ pattern: pattern,
+ thresholds: thresholds,
+ type: type,
+ unit: unit,
+ valueMaps: valueMaps,
+ },
+
+ matchers()::
+ local jobMatcher = 'job=~"$job"';
+ local clusterMatcher = '%s=~"$cluster"' % $._config.clusterLabel;
+ {
+ // Common labels
+ jobMatcher: jobMatcher,
+ clusterMatcher: (if $._config.showMultiCluster then clusterMatcher else ''),
+ matchers: jobMatcher +
+ (if $._config.showMultiCluster then ', ' + clusterMatcher else ''),
+ },
+
+ addClusterTemplate()::
+ $.addTemplateSchema(
+ 'cluster',
+ '$datasource',
+ 'label_values(ceph_osd_metadata, %s)' % $._config.clusterLabel,
+ 1,
+ true,
+ 1,
+ 'cluster',
+ '(.*)',
+ if !$._config.showMultiCluster then 'variable' else '',
+ multi=true,
+ allValues='.+',
+ ),
+
+ addJobTemplate()::
+ $.addTemplateSchema(
+ 'job',
+ '$datasource',
+ 'label_values(ceph_osd_metadata{%(clusterMatcher)s}, job)' % $.matchers(),
+ 1,
+ true,
+ 1,
+ 'job',
+ '(.*)',
+ multi=true,
+ allValues='.+',
+ ),
+
+ overviewStyle(alias,
+ pattern,
+ type,
+ unit,
+ colorMode=null,
+ thresholds=[],
+ valueMaps=[])::
+ $.addStyle(alias,
+ colorMode,
+ [
+ 'rgba(245, 54, 54, 0.9)',
+ 'rgba(237, 129, 40, 0.89)',
+ 'rgba(50, 172, 45, 0.97)',
+ ],
+ 'YYYY-MM-DD HH:mm:ss',
+ 2,
+ 1,
+ pattern,
+ thresholds,
+ type,
+ unit,
+ valueMaps),
+
+ simpleGraphPanel(alias,
+ title,
+ description,
+ formatY1,
+ labelY1,
+ min,
+ expr,
+ legendFormat,
+ x,
+ y,
+ w,
+ h)::
+ $.graphPanelSchema(alias,
+ title,
+ description,
+ 'null',
+ false,
+ formatY1,
+ 'short',
+ labelY1,
+ null,
+ min,
+ 1,
+ '$datasource')
+ .addTargets(
+ [$.addTargetSchema(expr, legendFormat)]
+ ) + { gridPos: { x: x, y: y, w: w, h: h } },
+
+ simpleSingleStatPanel(format,
+ title,
+ description,
+ valueName,
+ expr,
+ instant,
+ targetFormat,
+ x,
+ y,
+ w,
+ h)::
+ $.addSingleStatSchema(['#299c46', 'rgba(237, 129, 40, 0.89)', '#d44a3a'],
+ '$datasource',
+ format,
+ title,
+ description,
+ valueName,
+ false,
+ 100,
+ false,
+ false,
+ '')
+ .addTarget($.addTargetSchema(expr, '', targetFormat, 1, instant)) + {
+ gridPos: { x: x, y: y, w: w, h: h },
+ },
+ gaugeSingleStatPanel(format,
+ title,
+ description,
+ valueName,
+ colorValue,
+ gaugeMaxValue,
+ gaugeShow,
+ sparkLineShow,
+ thresholds,
+ expr,
+ targetFormat,
+ x,
+ y,
+ w,
+ h)::
+ $.addSingleStatSchema(['#299c46', 'rgba(237, 129, 40, 0.89)', '#d44a3a'],
+ '$datasource',
+ format,
+ title,
+ description,
+ valueName,
+ colorValue,
+ gaugeMaxValue,
+ gaugeShow,
+ sparkLineShow,
+ thresholds)
+ .addTarget($.addTargetSchema(expr, '', targetFormat)) + { gridPos: { x:
+ x, y: y, w: w, h: h } },
+
+ simplePieChart(alias, description, title)::
+ $.addPieChartSchema(alias,
+ '$datasource',
+ description,
+ 'Under graph',
+ 'pie',
+ title,
+ 'current'),
+}
diff --git a/atmosphere/jsonnet/vendor/github.com/ceph/ceph/monitoring/ceph-mixin/dashboards_out/.lint b/atmosphere/jsonnet/vendor/github.com/ceph/ceph/monitoring/ceph-mixin/dashboards_out/.lint
new file mode 100644
index 0000000..6352e85
--- /dev/null
+++ b/atmosphere/jsonnet/vendor/github.com/ceph/ceph/monitoring/ceph-mixin/dashboards_out/.lint
@@ -0,0 +1,5 @@
+exclusions:
+ template-instance-rule:
+ reason: "Instance template not needed because of ceph-mgr leader election."
+ target-instance-rule:
+ reason: "Instance matcher not needed because of ceph-mgr leader election."
diff --git a/atmosphere/jsonnet/vendor/github.com/ceph/ceph/monitoring/ceph-mixin/dashboards_out/ceph-cluster.json b/atmosphere/jsonnet/vendor/github.com/ceph/ceph/monitoring/ceph-mixin/dashboards_out/ceph-cluster.json
new file mode 100644
index 0000000..6988a62
--- /dev/null
+++ b/atmosphere/jsonnet/vendor/github.com/ceph/ceph/monitoring/ceph-mixin/dashboards_out/ceph-cluster.json
@@ -0,0 +1,1244 @@
+{
+ "__requires": [
+ {
+ "type": "grafana",
+ "id": "grafana",
+ "name": "Grafana",
+ "version": "5.0.0"
+ },
+ {
+ "type": "panel",
+ "id": "graph",
+ "name": "Graph",
+ "version": "5.0.0"
+ },
+ {
+ "type": "panel",
+ "id": "heatmap",
+ "name": "Heatmap",
+ "version": "5.0.0"
+ },
+ {
+ "type": "panel",
+ "id": "singlestat",
+ "name": "Singlestat",
+ "version": "5.0.0"
+ },
+ {
+ "type": "panel",
+ "id": "vonage-status-panel",
+ "name": "Status Panel",
+ "version": "1.0.8"
+ }
+ ],
+ "annotations": {
+ "list": []
+ },
+ "description": "Ceph cluster overview",
+ "editable": false,
+ "gnetId": null,
+ "graphTooltip": 0,
+ "id": null,
+ "iteration": 1525415495309,
+ "links": [],
+ "panels": [
+ {
+ "cacheTimeout": null,
+ "colorBackground": true,
+ "colorValue": false,
+ "colors": [
+ "rgba(50, 128, 45, 0.9)",
+ "rgba(237, 129, 40, 0.9)",
+ "rgb(255, 0, 0)"
+ ],
+ "datasource": "$datasource",
+ "editable": false,
+ "error": false,
+ "format": "none",
+ "gauge": {
+ "maxValue": 100,
+ "minValue": 0,
+ "show": false,
+ "thresholdLabels": false,
+ "thresholdMarkers": true
+ },
+ "gridPos": {
+ "h": 3,
+ "w": 2,
+ "x": 0,
+ "y": 0
+ },
+ "hideTimeOverride": true,
+ "id": 21,
+ "interval": "1m",
+ "links": [],
+ "mappingType": 1,
+ "mappingTypes": [
+ {
+ "name": "value to text",
+ "value": 1
+ },
+ {
+ "name": "range to text",
+ "value": 2
+ }
+ ],
+ "maxDataPoints": 100,
+ "nullPointMode": "connected",
+ "nullText": null,
+ "postfix": "",
+ "postfixFontSize": "50%",
+ "prefix": "",
+ "prefixFontSize": "50%",
+ "rangeMaps": [
+ {
+ "from": "null",
+ "text": "N/A",
+ "to": "null"
+ }
+ ],
+ "span": 2,
+ "sparkline": {
+ "fillColor": "rgba(31, 118, 189, 0.18)",
+ "full": false,
+ "lineColor": "rgb(31, 120, 193)",
+ "show": false
+ },
+ "tableColumn": "",
+ "targets": [
+ {
+ "expr": "ceph_health_status",
+ "format": "time_series",
+ "instant": true,
+ "interval": "$interval",
+ "intervalFactor": 1,
+ "refId": "A",
+ "step": 60
+ }
+ ],
+ "thresholds": "1,2",
+ "timeFrom": null,
+ "title": "Health Status",
+ "transparent": false,
+ "type": "singlestat",
+ "valueFontSize": "50%",
+ "valueMaps": [
+ {
+ "op": "=",
+ "text": "OK",
+ "value": "0"
+ },
+ {
+ "op": "=",
+ "text": "WARN",
+ "value": "1"
+ },
+ {
+ "op": "=",
+ "text": "ERR",
+ "value": "2"
+ }
+ ],
+ "valueName": "current"
+ },
+ {
+ "colorMode": "Panel",
+ "colors": {
+ "crit": "rgb(255, 0, 0)",
+ "disable": "rgba(128, 128, 128, 0.9)",
+ "ok": "rgba(50, 128, 45, 0.9)",
+ "warn": "rgba(237, 129, 40, 0.9)"
+ },
+ "cornerRadius": 0,
+ "datasource": "$datasource",
+ "displayName": "",
+ "flipCard": false,
+ "flipTime": 5,
+ "fontFormat": "Regular",
+ "gridPos": {
+ "h": 3,
+ "w": 2,
+ "x": 2,
+ "y": 0
+ },
+ "id": 43,
+ "isAutoScrollOnOverflow": false,
+ "isGrayOnNoData": false,
+ "isHideAlertsOnDisable": false,
+ "isIgnoreOKColors": false,
+ "links": [],
+ "targets": [
+ {
+ "aggregation": "Last",
+ "alias": "All",
+ "decimals": 2,
+ "displayAliasType": "Always",
+ "displayType": "Regular",
+ "displayValueWithAlias": "When Alias Displayed",
+ "expr": "count(ceph_osd_metadata)",
+ "format": "time_series",
+ "intervalFactor": 1,
+ "legendFormat": "All",
+ "refId": "A",
+ "units": "none",
+ "valueHandler": "Number Threshold"
+ },
+ {
+ "aggregation": "Last",
+ "alias": "In",
+ "decimals": 2,
+ "displayAliasType": "Always",
+ "displayType": "Regular",
+ "displayValueWithAlias": "When Alias Displayed",
+ "expr": "sum(ceph_osd_in)",
+ "format": "time_series",
+ "intervalFactor": 1,
+ "legendFormat": "In",
+ "refId": "B",
+ "units": "none",
+ "valueHandler": "Number Threshold"
+ },
+ {
+ "aggregation": "Last",
+ "alias": "Out",
+ "decimals": 2,
+ "displayAliasType": "Warning / Critical",
+ "displayType": "Regular",
+ "displayValueWithAlias": "When Alias Displayed",
+ "expr": "sum(ceph_osd_in == bool 0)",
+ "format": "time_series",
+ "interval": "",
+ "intervalFactor": 1,
+ "legendFormat": "Out",
+ "refId": "C",
+ "units": "none",
+ "valueHandler": "Number Threshold",
+ "warn": 1
+ },
+ {
+ "aggregation": "Last",
+ "alias": "Up",
+ "decimals": 2,
+ "displayAliasType": "Always",
+ "displayType": "Regular",
+ "displayValueWithAlias": "When Alias Displayed",
+ "expr": "sum(ceph_osd_up)",
+ "format": "time_series",
+ "intervalFactor": 1,
+ "legendFormat": "Up",
+ "refId": "D",
+ "units": "none",
+ "valueHandler": "Number Threshold"
+ },
+ {
+ "aggregation": "Last",
+ "alias": "Down",
+ "crit": 2,
+ "decimals": 2,
+ "displayAliasType": "Warning / Critical",
+ "displayType": "Regular",
+ "displayValueWithAlias": "When Alias Displayed",
+ "expr": "sum(ceph_osd_up == bool 0)",
+ "format": "time_series",
+ "intervalFactor": 1,
+ "legendFormat": "Down",
+ "refId": "E",
+ "units": "none",
+ "valueHandler": "Number Threshold",
+ "warn": 1
+ }
+ ],
+ "title": "OSDs",
+ "type": "vonage-status-panel"
+ },
+ {
+ "cacheTimeout": null,
+ "colorBackground": false,
+ "colorValue": false,
+ "colors": [
+ "#299c46",
+ "rgba(237, 129, 40, 0.89)",
+ "#d44a3a"
+ ],
+ "datasource": "$datasource",
+ "decimals": 2,
+ "format": "percentunit",
+ "gauge": {
+ "maxValue": 1,
+ "minValue": 0,
+ "show": true,
+ "thresholdLabels": false,
+ "thresholdMarkers": true
+ },
+ "gridPos": {
+ "h": 6,
+ "w": 4,
+ "x": 4,
+ "y": 0
+ },
+ "id": 47,
+ "interval": null,
+ "links": [],
+ "mappingType": 1,
+ "mappingTypes": [
+ {
+ "name": "value to text",
+ "value": 1
+ },
+ {
+ "name": "range to text",
+ "value": 2
+ }
+ ],
+ "maxDataPoints": 100,
+ "nullPointMode": "connected",
+ "nullText": null,
+ "postfix": "",
+ "postfixFontSize": "50%",
+ "prefix": "",
+ "prefixFontSize": "50%",
+ "rangeMaps": [
+ {
+ "from": "null",
+ "text": "N/A",
+ "to": "null"
+ }
+ ],
+ "sparkline": {
+ "fillColor": "rgba(31, 118, 189, 0.18)",
+ "full": false,
+ "lineColor": "rgb(31, 120, 193)",
+ "show": true
+ },
+ "tableColumn": "",
+ "targets": [
+ {
+ "expr": "sum(ceph_osd_stat_bytes_used)/sum(ceph_osd_stat_bytes)",
+ "format": "time_series",
+ "intervalFactor": 1,
+ "legendFormat": "Used",
+ "refId": "A"
+ }
+ ],
+ "thresholds": "0.7,0.8",
+ "title": "Capacity used",
+ "type": "singlestat",
+ "valueFontSize": "80%",
+ "valueMaps": [
+ {
+ "op": "=",
+ "text": "N/A",
+ "value": "null"
+ }
+ ],
+ "valueName": "current"
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "$datasource",
+ "fill": 0,
+ "gridPos": {
+ "h": 6,
+ "w": 8,
+ "x": 8,
+ "y": 0
+ },
+ "id": 53,
+ "legend": {
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "show": true,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [
+ {
+ "alias": "Active",
+ "color": "#508642",
+ "fill": 1,
+ "stack": "A"
+ },
+ {
+ "alias": "Total",
+ "color": "#f9e2d2"
+ },
+ {
+ "alias": "Degraded",
+ "color": "#eab839"
+ },
+ {
+ "alias": "Undersized",
+ "color": "#f9934e"
+ },
+ {
+ "alias": "Inconsistent",
+ "color": "#e24d42"
+ },
+ {
+ "alias": "Down",
+ "color": "#bf1b00"
+ },
+ {
+ "alias": "Inactive",
+ "color": "#bf1b00",
+ "fill": 4,
+ "linewidth": 0,
+ "stack": "A"
+ }
+ ],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "sum(ceph_pg_total)",
+ "format": "time_series",
+ "intervalFactor": 1,
+ "legendFormat": "Total",
+ "refId": "A"
+ },
+ {
+ "expr": "sum(ceph_pg_active)",
+ "format": "time_series",
+ "intervalFactor": 1,
+ "legendFormat": "Active",
+ "refId": "B"
+ },
+ {
+ "expr": "sum(ceph_pg_total - ceph_pg_active)",
+ "format": "time_series",
+ "intervalFactor": 1,
+ "legendFormat": "Inactive",
+ "refId": "G"
+ },
+ {
+ "expr": "sum(ceph_pg_undersized)",
+ "format": "time_series",
+ "intervalFactor": 1,
+ "legendFormat": "Undersized",
+ "refId": "F"
+ },
+ {
+ "expr": "sum(ceph_pg_degraded)",
+ "format": "time_series",
+ "intervalFactor": 1,
+ "legendFormat": "Degraded",
+ "refId": "C"
+ },
+ {
+ "expr": "sum(ceph_pg_inconsistent)",
+ "format": "time_series",
+ "intervalFactor": 1,
+ "legendFormat": "Inconsistent",
+ "refId": "D"
+ },
+ {
+ "expr": "sum(ceph_pg_down)",
+ "format": "time_series",
+ "intervalFactor": 1,
+ "legendFormat": "Down",
+ "refId": "E"
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "PG States",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": "0",
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": false
+ }
+ ]
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "$datasource",
+ "fill": 0,
+ "gridPos": {
+ "h": 6,
+ "w": 8,
+ "x": 16,
+ "y": 0
+ },
+ "id": 66,
+ "legend": {
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "show": false,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [
+ {
+ "alias": "Avg Apply Latency",
+ "color": "#7eb26d"
+ }
+ ],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "quantile(0.95, ceph_osd_apply_latency_ms)",
+ "format": "time_series",
+ "intervalFactor": 1,
+ "legendFormat": "Apply Latency P_95",
+ "refId": "A"
+ },
+ {
+ "expr": "quantile(0.95, ceph_osd_commit_latency_ms)",
+ "format": "time_series",
+ "intervalFactor": 1,
+ "legendFormat": "Commit Latency P_95",
+ "refId": "B"
+ },
+ {
+ "expr": "avg(ceph_osd_apply_latency_ms)",
+ "format": "time_series",
+ "intervalFactor": 1,
+ "legendFormat": "Avg Apply Latency",
+ "refId": "C"
+ },
+ {
+ "expr": "avg(ceph_osd_commit_latency_ms)",
+ "format": "time_series",
+ "intervalFactor": 1,
+ "legendFormat": "Avg Commit Latency",
+ "refId": "D"
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "OSD Latencies",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "ms",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ]
+ },
+ {
+ "clusterName": "",
+ "colorMode": "Panel",
+ "colors": {
+ "crit": "rgba(245, 54, 54, 0.9)",
+ "disable": "rgba(128, 128, 128, 0.9)",
+ "ok": "rgba(50, 128, 45, 0.9)",
+ "warn": "rgba(237, 129, 40, 0.9)"
+ },
+ "cornerRadius": 1,
+ "datasource": "$datasource",
+ "displayName": "",
+ "flipCard": false,
+ "flipTime": 5,
+ "fontFormat": "Regular",
+ "gridPos": {
+ "h": 3,
+ "w": 2,
+ "x": 0,
+ "y": 3
+ },
+ "id": 41,
+ "isAutoScrollOnOverflow": false,
+ "isGrayOnNoData": false,
+ "isHideAlertsOnDisable": false,
+ "isIgnoreOKColors": false,
+ "links": [],
+ "targets": [
+ {
+ "aggregation": "Last",
+ "alias": "In Quorum",
+ "decimals": 2,
+ "displayAliasType": "Always",
+ "displayType": "Regular",
+ "displayValueWithAlias": "When Alias Displayed",
+ "expr": "sum(ceph_mon_quorum_status)",
+ "format": "time_series",
+ "interval": "",
+ "intervalFactor": 1,
+ "legendFormat": "In Quorum",
+ "refId": "A",
+ "units": "none",
+ "valueHandler": "Text Only"
+ },
+ {
+ "aggregation": "Last",
+ "alias": "Total",
+ "crit": 1,
+ "decimals": 2,
+ "displayAliasType": "Always",
+ "displayType": "Regular",
+ "displayValueWithAlias": "When Alias Displayed",
+ "expr": "count(ceph_mon_quorum_status)",
+ "format": "time_series",
+ "intervalFactor": 1,
+ "legendFormat": "Total",
+ "refId": "B",
+ "units": "none",
+ "valueHandler": "Text Only",
+ "warn": 2
+ },
+ {
+ "aggregation": "Last",
+ "alias": "MONs out of Quorum",
+ "crit": 1.6,
+ "decimals": 2,
+ "displayAliasType": "Warning / Critical",
+ "displayType": "Annotation",
+ "displayValueWithAlias": "Never",
+ "expr": "count(ceph_mon_quorum_status) / sum(ceph_mon_quorum_status)",
+ "format": "time_series",
+ "intervalFactor": 1,
+ "legendFormat": "MONs out of Quorum",
+ "refId": "C",
+ "units": "none",
+ "valueHandler": "Number Threshold",
+ "warn": 1.1
+ }
+ ],
+ "title": "Monitors",
+ "type": "vonage-status-panel"
+ },
+ {
+ "colorMode": "Panel",
+ "colors": {
+ "crit": "rgba(245, 54, 54, 0.9)",
+ "disable": "rgba(128, 128, 128, 0.9)",
+ "ok": "rgba(50, 128, 45, 0.9)",
+ "warn": "rgba(237, 129, 40, 0.9)"
+ },
+ "cornerRadius": 0,
+ "datasource": "$datasource",
+ "displayName": "",
+ "flipCard": false,
+ "flipTime": 5,
+ "fontFormat": "Regular",
+ "gridPos": {
+ "h": 3,
+ "w": 2,
+ "x": 2,
+ "y": 3
+ },
+ "id": 68,
+ "isAutoScrollOnOverflow": false,
+ "isGrayOnNoData": false,
+ "isHideAlertsOnDisable": false,
+ "isIgnoreOKColors": false,
+ "links": [],
+ "targets": [
+ {
+ "aggregation": "Last",
+ "alias": "Active",
+ "decimals": 2,
+ "displayAliasType": "Always",
+ "displayType": "Regular",
+ "displayValueWithAlias": "When Alias Displayed",
+ "expr": "count(ceph_mgr_status == 1) or vector(0)",
+ "format": "time_series",
+ "intervalFactor": 1,
+ "legendFormat": "Active",
+ "refId": "A",
+ "units": "none",
+ "valueHandler": "Number Threshold"
+ },
+ {
+ "aggregation": "Last",
+ "alias": "Standby",
+ "decimals": 2,
+ "displayAliasType": "Always",
+ "displayType": "Regular",
+ "displayValueWithAlias": "When Alias Displayed",
+ "expr": "count(ceph_mgr_status == 0) or vector(0)",
+ "format": "time_series",
+ "intervalFactor": 1,
+ "legendFormat": "Standby",
+ "refId": "B",
+ "units": "none",
+ "valueHandler": "Number Threshold"
+ }
+ ],
+ "title": "MGRs",
+ "type": "vonage-status-panel"
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "$datasource",
+ "fill": 1,
+ "gridPos": {
+ "h": 9,
+ "w": 12,
+ "x": 0,
+ "y": 6
+ },
+ "id": 45,
+ "legend": {
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "show": true,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 0.5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [
+ {
+ "alias": "Reads",
+ "transform": "negative-Y"
+ }
+ ],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "sum(irate(ceph_osd_op_w_in_bytes[1m]))",
+ "format": "time_series",
+ "intervalFactor": 1,
+ "legendFormat": "Writes",
+ "refId": "A"
+ },
+ {
+ "expr": "sum(irate(ceph_osd_op_r_out_bytes[1m]))",
+ "format": "time_series",
+ "intervalFactor": 1,
+ "legendFormat": "Reads",
+ "refId": "B"
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Cluster I/O",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "Bps",
+ "label": "Read (-) / Write (+)",
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": false
+ }
+ ]
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "$datasource",
+ "fill": 1,
+ "gridPos": {
+ "h": 9,
+ "w": 12,
+ "x": 12,
+ "y": 6
+ },
+ "id": 62,
+ "legend": {
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "show": false,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "sum(deriv(ceph_pool_stored[1m]))",
+ "format": "time_series",
+ "intervalFactor": 1,
+ "refId": "A"
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "In-/Egress",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "Bps",
+ "label": " Egress (-) / Ingress (+)",
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": false
+ }
+ ]
+ },
+ {
+ "cards": {
+ "cardPadding": null,
+ "cardRound": 1
+ },
+ "color": {
+ "cardColor": "rgb(0, 254, 255)",
+ "colorScale": "sqrt",
+ "colorScheme": "interpolateBlues",
+ "exponent": 0.5,
+ "min": null,
+ "mode": "spectrum"
+ },
+ "dataFormat": "timeseries",
+ "datasource": "$datasource",
+ "gridPos": {
+ "h": 9,
+ "w": 6,
+ "x": 0,
+ "y": 15
+ },
+ "heatmap": {},
+ "highlightCards": true,
+ "id": 55,
+ "legend": {
+ "show": true
+ },
+ "links": [],
+ "span": 12,
+ "targets": [
+ {
+ "expr": "ceph_osd_stat_bytes_used / ceph_osd_stat_bytes",
+ "format": "time_series",
+ "interval": "1m",
+ "intervalFactor": 1,
+ "legendFormat": "Util (%)",
+ "refId": "A",
+ "step": 60
+ }
+ ],
+ "timeFrom": null,
+ "title": "OSD Capacity Utilization",
+ "tooltip": {
+ "show": true,
+ "showHistogram": false
+ },
+ "type": "heatmap",
+ "xAxis": {
+ "show": true
+ },
+ "xBucketNumber": null,
+ "xBucketSize": "",
+ "yAxis": {
+ "decimals": 2,
+ "format": "percentunit",
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true,
+ "splitFactor": null
+ },
+ "yBucketNumber": null,
+ "yBucketSize": null
+ },
+ {
+ "cards": {
+ "cardPadding": null,
+ "cardRound": 1
+ },
+ "color": {
+ "cardColor": "#b4ff00",
+ "colorScale": "sqrt",
+ "colorScheme": "interpolateBlues",
+ "exponent": 0.5,
+ "mode": "spectrum"
+ },
+ "dataFormat": "timeseries",
+ "datasource": "$datasource",
+ "gridPos": {
+ "h": 9,
+ "w": 6,
+ "x": 6,
+ "y": 15
+ },
+ "heatmap": {},
+ "highlightCards": true,
+ "id": 59,
+ "legend": {
+ "show": true
+ },
+ "links": [],
+ "targets": [
+ {
+ "expr": "ceph_osd_numpg",
+ "format": "time_series",
+ "intervalFactor": 1,
+ "legendFormat": "#PGs",
+ "refId": "A"
+ }
+ ],
+ "title": "PGs per OSD",
+ "tooltip": {
+ "show": true,
+ "showHistogram": false
+ },
+ "type": "heatmap",
+ "xAxis": {
+ "show": true
+ },
+ "xBucketNumber": null,
+ "xBucketSize": "",
+ "yAxis": {
+ "decimals": null,
+ "format": "none",
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true,
+ "splitFactor": null
+ },
+ "yBucketNumber": null,
+ "yBucketSize": null
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "$datasource",
+ "fill": 0,
+ "gridPos": {
+ "h": 9,
+ "w": 12,
+ "x": 12,
+ "y": 15
+ },
+ "id": 64,
+ "legend": {
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "show": false,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "sum(irate(ceph_osd_recovery_ops[1m]))",
+ "format": "time_series",
+ "intervalFactor": 1,
+ "legendFormat": "Op/s",
+ "refId": "A"
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Recovery Rate",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "ops",
+ "label": "Recovery Ops/s",
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ]
+ }
+ ],
+ "refresh": "30s",
+ "schemaVersion": 16,
+ "style": "dark",
+ "tags": [
+ "ceph",
+ "cluster"
+ ],
+ "templating": {
+ "list": [
+ {
+ "hide": 0,
+ "label": null,
+ "name": "datasource",
+ "options": [],
+ "query": "prometheus",
+ "refresh": 1,
+ "regex": "",
+ "type": "datasource"
+ },
+ {
+ "auto": true,
+ "auto_count": 10,
+ "auto_min": "1m",
+ "current": {
+ "text": "auto",
+ "value": "$__auto_interval_interval"
+ },
+ "datasource": null,
+ "hide": 0,
+ "includeAll": false,
+ "label": "Interval",
+ "multi": false,
+ "name": "interval",
+ "options": [
+ {
+ "selected": true,
+ "text": "auto",
+ "value": "$__auto_interval_interval"
+ },
+ {
+ "selected": false,
+ "text": "1m",
+ "value": "1m"
+ },
+ {
+ "selected": false,
+ "text": "10m",
+ "value": "10m"
+ },
+ {
+ "selected": false,
+ "text": "30m",
+ "value": "30m"
+ },
+ {
+ "selected": false,
+ "text": "1h",
+ "value": "1h"
+ },
+ {
+ "selected": false,
+ "text": "6h",
+ "value": "6h"
+ },
+ {
+ "selected": false,
+ "text": "12h",
+ "value": "12h"
+ },
+ {
+ "selected": false,
+ "text": "1d",
+ "value": "1d"
+ },
+ {
+ "selected": false,
+ "text": "7d",
+ "value": "7d"
+ },
+ {
+ "selected": false,
+ "text": "14d",
+ "value": "14d"
+ },
+ {
+ "selected": false,
+ "text": "30d",
+ "value": "30d"
+ }
+ ],
+ "query": "1m,10m,30m,1h,6h,12h,1d,7d,14d,30d",
+ "refresh": 2,
+ "type": "interval"
+ }
+ ]
+ },
+ "time": {
+ "from": "now-6h",
+ "to": "now"
+ },
+ "timepicker": {
+ "refresh_intervals": [
+ "5s",
+ "10s",
+ "30s",
+ "1m",
+ "5m",
+ "15m",
+ "30m",
+ "1h",
+ "2h",
+ "1d"
+ ],
+ "time_options": [
+ "5m",
+ "15m",
+ "1h",
+ "6h",
+ "12h",
+ "24h",
+ "2d",
+ "7d",
+ "30d"
+ ]
+ },
+ "timezone": "",
+ "title": "Ceph - Cluster",
+ "version": 13
+ }
diff --git a/atmosphere/jsonnet/vendor/github.com/ceph/ceph/monitoring/ceph-mixin/dashboards_out/cephfs-overview.json b/atmosphere/jsonnet/vendor/github.com/ceph/ceph/monitoring/ceph-mixin/dashboards_out/cephfs-overview.json
new file mode 100644
index 0000000..3e7aeef
--- /dev/null
+++ b/atmosphere/jsonnet/vendor/github.com/ceph/ceph/monitoring/ceph-mixin/dashboards_out/cephfs-overview.json
@@ -0,0 +1,362 @@
+{
+ "__inputs": [ ],
+ "__requires": [
+ {
+ "id": "grafana",
+ "name": "Grafana",
+ "type": "grafana",
+ "version": "5.3.2"
+ },
+ {
+ "id": "graph",
+ "name": "Graph",
+ "type": "panel",
+ "version": "5.0.0"
+ }
+ ],
+ "annotations": {
+ "list": [
+ {
+ "builtIn": 1,
+ "datasource": "-- Grafana --",
+ "enable": true,
+ "hide": true,
+ "iconColor": "rgba(0, 211, 255, 1)",
+ "name": "Annotations & Alerts",
+ "showIn": 0,
+ "tags": [ ],
+ "type": "dashboard"
+ }
+ ]
+ },
+ "description": "",
+ "editable": false,
+ "gnetId": null,
+ "graphTooltip": 0,
+ "hideControls": false,
+ "id": null,
+ "links": [ ],
+ "panels": [
+ {
+ "collapse": false,
+ "collapsed": false,
+ "gridPos": {
+ "h": 1,
+ "w": 24,
+ "x": 0,
+ "y": 0
+ },
+ "id": 2,
+ "panels": [ ],
+ "repeat": null,
+ "repeatIteration": null,
+ "repeatRowId": null,
+ "showTitle": true,
+ "title": "MDS Performance",
+ "titleSize": "h6",
+ "type": "row"
+ },
+ {
+ "aliasColors": { },
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "$datasource",
+ "description": "",
+ "fill": 1,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 9,
+ "w": 12,
+ "x": 0,
+ "y": 1
+ },
+ "id": 3,
+ "legend": {
+ "alignAsTable": false,
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "rightSide": false,
+ "show": true,
+ "sideWidth": null,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [ ],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "repeat": null,
+ "seriesOverrides": [
+ {
+ "alias": "/.*Reads/",
+ "transform": "negative-Y"
+ }
+ ],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "sum(rate(ceph_objecter_op_r{job=~\"$job\", ceph_daemon=~\"($mds_servers).*\"}[$__rate_interval]))",
+ "format": "time_series",
+ "intervalFactor": 1,
+ "legendFormat": "Read Ops",
+ "refId": "A"
+ },
+ {
+ "expr": "sum(rate(ceph_objecter_op_w{job=~\"$job\", ceph_daemon=~\"($mds_servers).*\"}[$__rate_interval]))",
+ "format": "time_series",
+ "intervalFactor": 1,
+ "legendFormat": "Write Ops",
+ "refId": "B"
+ }
+ ],
+ "thresholds": [ ],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "MDS Workload - $mds_servers",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": [ ]
+ },
+ "yaxes": [
+ {
+ "format": "none",
+ "label": "Reads(-) / Writes (+)",
+ "logBase": 1,
+ "max": null,
+ "min": 0,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": 0,
+ "show": true
+ }
+ ]
+ },
+ {
+ "aliasColors": { },
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "$datasource",
+ "description": "",
+ "fill": 1,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 9,
+ "w": 12,
+ "x": 12,
+ "y": 1
+ },
+ "id": 4,
+ "legend": {
+ "alignAsTable": false,
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "rightSide": false,
+ "show": true,
+ "sideWidth": null,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [ ],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "repeat": null,
+ "seriesOverrides": [ ],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "ceph_mds_server_handle_client_request{job=~\"$job\", ceph_daemon=~\"($mds_servers).*\"}",
+ "format": "time_series",
+ "intervalFactor": 1,
+ "legendFormat": "{{ceph_daemon}}",
+ "refId": "A"
+ }
+ ],
+ "thresholds": [ ],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Client Request Load - $mds_servers",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": [ ]
+ },
+ "yaxes": [
+ {
+ "format": "none",
+ "label": "Client Requests",
+ "logBase": 1,
+ "max": null,
+ "min": 0,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": 0,
+ "show": true
+ }
+ ]
+ }
+ ],
+ "refresh": "30s",
+ "rows": [ ],
+ "schemaVersion": 16,
+ "style": "dark",
+ "tags": [
+ "ceph-mixin"
+ ],
+ "templating": {
+ "list": [
+ {
+ "current": {
+ "text": "default",
+ "value": "default"
+ },
+ "hide": 0,
+ "label": "Data Source",
+ "name": "datasource",
+ "options": [ ],
+ "query": "prometheus",
+ "refresh": 1,
+ "regex": "",
+ "type": "datasource"
+ },
+ {
+ "allValue": ".+",
+ "current": { },
+ "datasource": "$datasource",
+ "hide": 2,
+ "includeAll": true,
+ "label": "cluster",
+ "multi": true,
+ "name": "cluster",
+ "options": [ ],
+ "query": "label_values(ceph_osd_metadata, cluster)",
+ "refresh": 1,
+ "regex": "(.*)",
+ "sort": 1,
+ "tagValuesQuery": "",
+ "tags": [ ],
+ "tagsQuery": "",
+ "type": "query",
+ "useTags": false
+ },
+ {
+ "allValue": ".+",
+ "current": { },
+ "datasource": "$datasource",
+ "hide": 0,
+ "includeAll": true,
+ "label": "job",
+ "multi": true,
+ "name": "job",
+ "options": [ ],
+ "query": "label_values(ceph_osd_metadata{}, job)",
+ "refresh": 1,
+ "regex": "(.*)",
+ "sort": 1,
+ "tagValuesQuery": "",
+ "tags": [ ],
+ "tagsQuery": "",
+ "type": "query",
+ "useTags": false
+ },
+ {
+ "allValue": null,
+ "current": { },
+ "datasource": "$datasource",
+ "hide": 0,
+ "includeAll": true,
+ "label": "MDS Server",
+ "multi": false,
+ "name": "mds_servers",
+ "options": [ ],
+ "query": "label_values(ceph_mds_inodes{job=~\"$job\"}, ceph_daemon)",
+ "refresh": 1,
+ "regex": "",
+ "sort": 1,
+ "tagValuesQuery": "",
+ "tags": [ ],
+ "tagsQuery": "",
+ "type": "query",
+ "useTags": false
+ }
+ ]
+ },
+ "time": {
+ "from": "now-1h",
+ "to": "now"
+ },
+ "timepicker": {
+ "refresh_intervals": [
+ "5s",
+ "10s",
+ "30s",
+ "1m",
+ "5m",
+ "15m",
+ "30m",
+ "1h",
+ "2h",
+ "1d"
+ ],
+ "time_options": [
+ "5m",
+ "15m",
+ "1h",
+ "6h",
+ "12h",
+ "24h",
+ "2d",
+ "7d",
+ "30d"
+ ]
+ },
+ "timezone": "",
+ "title": "MDS Performance",
+ "uid": "tbO9LAiZz",
+ "version": 0
+}
diff --git a/atmosphere/jsonnet/vendor/github.com/ceph/ceph/monitoring/ceph-mixin/dashboards_out/host-details.json b/atmosphere/jsonnet/vendor/github.com/ceph/ceph/monitoring/ceph-mixin/dashboards_out/host-details.json
new file mode 100644
index 0000000..93c51f0
--- /dev/null
+++ b/atmosphere/jsonnet/vendor/github.com/ceph/ceph/monitoring/ceph-mixin/dashboards_out/host-details.json
@@ -0,0 +1,1243 @@
+{
+ "__inputs": [ ],
+ "__requires": [
+ {
+ "id": "grafana",
+ "name": "Grafana",
+ "type": "grafana",
+ "version": "5.3.2"
+ },
+ {
+ "id": "graph",
+ "name": "Graph",
+ "type": "panel",
+ "version": "5.0.0"
+ },
+ {
+ "id": "singlestat",
+ "name": "Singlestat",
+ "type": "panel",
+ "version": "5.0.0"
+ }
+ ],
+ "annotations": {
+ "list": [
+ {
+ "builtIn": 1,
+ "datasource": "-- Grafana --",
+ "enable": true,
+ "hide": true,
+ "iconColor": "rgba(0, 211, 255, 1)",
+ "name": "Annotations & Alerts",
+ "showIn": 0,
+ "tags": [ ],
+ "type": "dashboard"
+ }
+ ]
+ },
+ "description": "",
+ "editable": false,
+ "gnetId": null,
+ "graphTooltip": 0,
+ "hideControls": false,
+ "id": null,
+ "links": [ ],
+ "panels": [
+ {
+ "collapse": false,
+ "collapsed": false,
+ "gridPos": {
+ "h": 1,
+ "w": 24,
+ "x": 0,
+ "y": 0
+ },
+ "id": 2,
+ "panels": [ ],
+ "repeat": null,
+ "repeatIteration": null,
+ "repeatRowId": null,
+ "showTitle": true,
+ "title": "$ceph_hosts System Overview",
+ "titleSize": "h6",
+ "type": "row"
+ },
+ {
+ "cacheTimeout": null,
+ "colorBackground": false,
+ "colorValue": false,
+ "colors": [
+ "#299c46",
+ "rgba(237, 129, 40, 0.89)",
+ "#d44a3a"
+ ],
+ "datasource": "$datasource",
+ "format": "none",
+ "gauge": {
+ "maxValue": 100,
+ "minValue": 0,
+ "show": false,
+ "thresholdLabels": false,
+ "thresholdMarkers": true
+ },
+ "gridPos": {
+ "h": 5,
+ "w": 3,
+ "x": 0,
+ "y": 1
+ },
+ "id": 3,
+ "interval": null,
+ "links": [ ],
+ "mappingType": 1,
+ "mappingTypes": [
+ {
+ "name": "value to text",
+ "value": 1
+ },
+ {
+ "name": "range to text",
+ "value": 2
+ }
+ ],
+ "maxDataPoints": 100,
+ "nullPointMode": "connected",
+ "nullText": null,
+ "postfix": "",
+ "postfixFontSize": "50%",
+ "prefix": "",
+ "prefixFontSize": "50%",
+ "rangeMaps": [
+ {
+ "from": "null",
+ "text": "N/A",
+ "to": "null"
+ }
+ ],
+ "sparkline": {
+ "fillColor": "rgba(31, 118, 189, 0.18)",
+ "full": false,
+ "lineColor": "rgb(31, 120, 193)",
+ "show": false
+ },
+ "tableColumn": "",
+ "targets": [
+ {
+ "expr": "count(sum by (ceph_daemon) (ceph_osd_metadata{job=~\"$job\", hostname='$ceph_hosts'}))",
+ "format": "time_series",
+ "intervalFactor": 1,
+ "legendFormat": "",
+ "refId": "A"
+ }
+ ],
+ "thresholds": "",
+ "title": "OSDs",
+ "type": "singlestat",
+ "valueFontSize": "80%",
+ "valueMaps": [
+ {
+ "op": "=",
+ "text": "N/A",
+ "value": "null"
+ }
+ ],
+ "valueName": "current"
+ },
+ {
+ "aliasColors": {
+ "interrupt": "#447EBC",
+ "steal": "#6D1F62",
+ "system": "#890F02",
+ "user": "#3F6833",
+ "wait": "#C15C17"
+ },
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "$datasource",
+ "description": "Shows the CPU breakdown. When multiple servers are selected, only the first host's cpu data is shown",
+ "fill": 1,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 10,
+ "w": 6,
+ "x": 3,
+ "y": 1
+ },
+ "id": 4,
+ "legend": {
+ "alignAsTable": false,
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "rightSide": false,
+ "show": true,
+ "sideWidth": null,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [ ],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "repeat": null,
+ "seriesOverrides": [ ],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "sum by (mode) (\n rate(node_cpu{instance=~\"($ceph_hosts)([\\\\\\\\.:].*)?\", mode=~\"(irq|nice|softirq|steal|system|user|iowait)\"}[$__rate_interval]) or\n rate(node_cpu_seconds_total{instance=~\"($ceph_hosts)([\\\\\\\\.:].*)?\", mode=~\"(irq|nice|softirq|steal|system|user|iowait)\"}[$__rate_interval])\n) / (\n scalar(\n sum(rate(node_cpu{instance=~\"($ceph_hosts)([\\\\\\\\.:].*)?\"}[$__rate_interval]) or\n rate(node_cpu_seconds_total{instance=~\"($ceph_hosts)([\\\\\\\\.:].*)?\"}[$__rate_interval]))\n ) * 100\n)\n",
+ "format": "time_series",
+ "intervalFactor": 1,
+ "legendFormat": "{{mode}}",
+ "refId": "A"
+ }
+ ],
+ "thresholds": [ ],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "CPU Utilization",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": [ ]
+ },
+ "yaxes": [
+ {
+ "format": "percent",
+ "label": "% Utilization",
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ]
+ },
+ {
+ "aliasColors": {
+ "Available": "#508642",
+ "Free": "#508642",
+ "Total": "#bf1b00",
+ "Used": "#bf1b00",
+ "total": "#bf1b00",
+ "used": "#0a50a1"
+ },
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "$datasource",
+ "description": "",
+ "fill": 1,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 10,
+ "w": 6,
+ "x": 9,
+ "y": 1
+ },
+ "id": 5,
+ "legend": {
+ "alignAsTable": false,
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "rightSide": false,
+ "show": true,
+ "sideWidth": null,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [ ],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "repeat": null,
+ "seriesOverrides": [
+ {
+ "alias": "total",
+ "color": "#bf1b00",
+ "fill": 0,
+ "linewidth": 2,
+ "stack": false
+ }
+ ],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "node_memory_MemFree{instance=~\"$ceph_hosts([\\\\\\\\.:].*)?\"} or\n node_memory_MemFree_bytes{instance=~\"$ceph_hosts([\\\\\\\\.:].*)?\"}\n",
+ "format": "time_series",
+ "intervalFactor": 1,
+ "legendFormat": "Free",
+ "refId": "A"
+ },
+ {
+ "expr": "node_memory_MemTotal{instance=~\"$ceph_hosts([\\\\\\\\.:].*)?\"} or\n node_memory_MemTotal_bytes{instance=~\"$ceph_hosts([\\\\\\\\.:].*)?\"}\n",
+ "format": "time_series",
+ "intervalFactor": 1,
+ "legendFormat": "total",
+ "refId": "B"
+ },
+ {
+ "expr": "(\n node_memory_Cached{instance=~\"$ceph_hosts([\\\\\\\\.:].*)?\"} or\n node_memory_Cached_bytes{instance=~\"$ceph_hosts([\\\\\\\\.:].*)?\"}\n) + (\n node_memory_Buffers{instance=~\"$ceph_hosts([\\\\\\\\.:].*)?\"} or\n node_memory_Buffers_bytes{instance=~\"$ceph_hosts([\\\\\\\\.:].*)?\"}\n) + (\n node_memory_Slab{instance=~\"$ceph_hosts([\\\\\\\\.:].*)?\"} or\n node_memory_Slab_bytes{instance=~\"$ceph_hosts([\\\\\\\\.:].*)?\"}\n)\n",
+ "format": "time_series",
+ "intervalFactor": 1,
+ "legendFormat": "buffers/cache",
+ "refId": "C"
+ },
+ {
+ "expr": "(\n node_memory_MemTotal{instance=~\"$ceph_hosts([\\\\\\\\.:].*)?\"} or\n node_memory_MemTotal_bytes{instance=~\"$ceph_hosts([\\\\\\\\.:].*)?\"}\n) - (\n (\n node_memory_MemFree{instance=~\"$ceph_hosts([\\\\\\\\.:].*)?\"} or\n node_memory_MemFree_bytes{instance=~\"$ceph_hosts([\\\\\\\\.:].*)?\"}\n ) + (\n node_memory_Cached{instance=~\"$ceph_hosts([\\\\\\\\.:].*)?\"} or\n node_memory_Cached_bytes{instance=~\"$ceph_hosts([\\\\\\\\.:].*)?\"}\n ) + (\n node_memory_Buffers{instance=~\"$ceph_hosts([\\\\\\\\.:].*)?\"} or\n node_memory_Buffers_bytes{instance=~\"$ceph_hosts([\\\\\\\\.:].*)?\"}\n ) +\n (\n node_memory_Slab{instance=~\"$ceph_hosts([\\\\\\\\.:].*)?\"} or\n node_memory_Slab_bytes{instance=~\"$ceph_hosts([\\\\\\\\.:].*)?\"}\n )\n)\n",
+ "format": "time_series",
+ "intervalFactor": 1,
+ "legendFormat": "used",
+ "refId": "D"
+ }
+ ],
+ "thresholds": [ ],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "RAM Usage",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": [ ]
+ },
+ "yaxes": [
+ {
+ "format": "bytes",
+ "label": "RAM used",
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ]
+ },
+ {
+ "aliasColors": { },
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "$datasource",
+ "description": "Show the network load (rx,tx) across all interfaces (excluding loopback 'lo')",
+ "fill": 1,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 10,
+ "w": 6,
+ "x": 15,
+ "y": 1
+ },
+ "id": 6,
+ "legend": {
+ "alignAsTable": false,
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "rightSide": false,
+ "show": true,
+ "sideWidth": null,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [ ],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "repeat": null,
+ "seriesOverrides": [
+ {
+ "alias": "/.*tx/",
+ "transform": "negative-Y"
+ }
+ ],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "sum by (device) (\n rate(\n node_network_receive_bytes{instance=~\"($ceph_hosts)([\\\\\\\\.:].*)?\",device!=\"lo\"}[$__rate_interval]) or\n rate(node_network_receive_bytes_total{instance=~\"($ceph_hosts)([\\\\\\\\.:].*)?\",device!=\"lo\"}[$__rate_interval]\n )\n)\n",
+ "format": "time_series",
+ "intervalFactor": 1,
+ "legendFormat": "{{device}}.rx",
+ "refId": "A"
+ },
+ {
+ "expr": "sum by (device) (\n rate(node_network_transmit_bytes{instance=~\"($ceph_hosts)([\\\\\\\\.:].*)?\",device!=\"lo\"}[$__rate_interval]) or\n rate(node_network_transmit_bytes_total{instance=~\"($ceph_hosts)([\\\\\\\\.:].*)?\",device!=\"lo\"}[$__rate_interval])\n)\n",
+ "format": "time_series",
+ "intervalFactor": 1,
+ "legendFormat": "{{device}}.tx",
+ "refId": "B"
+ }
+ ],
+ "thresholds": [ ],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Network Load",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": [ ]
+ },
+ "yaxes": [
+ {
+ "format": "decbytes",
+ "label": "Send (-) / Receive (+)",
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ]
+ },
+ {
+ "aliasColors": { },
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "$datasource",
+ "description": "",
+ "fill": 1,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 5,
+ "w": 3,
+ "x": 21,
+ "y": 1
+ },
+ "id": 7,
+ "legend": {
+ "alignAsTable": false,
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "rightSide": false,
+ "show": true,
+ "sideWidth": null,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [ ],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "repeat": null,
+ "seriesOverrides": [
+ {
+ "alias": "/.*tx/",
+ "transform": "negative-Y"
+ }
+ ],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "rate(node_network_receive_drop{instance=~\"$ceph_hosts([\\\\\\\\.:].*)?\"}[$__rate_interval]) or\n rate(node_network_receive_drop_total{instance=~\"$ceph_hosts([\\\\\\\\.:].*)?\"}[$__rate_interval])\n",
+ "format": "time_series",
+ "intervalFactor": 1,
+ "legendFormat": "{{device}}.rx",
+ "refId": "A"
+ },
+ {
+ "expr": "rate(node_network_transmit_drop{instance=~\"$ceph_hosts([\\\\\\\\.:].*)?\"}[$__rate_interval]) or\n rate(node_network_transmit_drop_total{instance=~\"$ceph_hosts([\\\\\\\\.:].*)?\"}[$__rate_interval])\n",
+ "format": "time_series",
+ "intervalFactor": 1,
+ "legendFormat": "{{device}}.tx",
+ "refId": "B"
+ }
+ ],
+ "thresholds": [ ],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Network drop rate",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": [ ]
+ },
+ "yaxes": [
+ {
+ "format": "pps",
+ "label": "Send (-) / Receive (+)",
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ]
+ },
+ {
+ "cacheTimeout": null,
+ "colorBackground": false,
+ "colorValue": false,
+ "colors": [
+ "#299c46",
+ "rgba(237, 129, 40, 0.89)",
+ "#d44a3a"
+ ],
+ "datasource": "$datasource",
+ "description": "Each OSD consists of a Journal/WAL partition and a data partition. The RAW Capacity shown is the sum of the data partitions across all OSDs on the selected OSD hosts.",
+ "format": "bytes",
+ "gauge": {
+ "maxValue": 100,
+ "minValue": 0,
+ "show": false,
+ "thresholdLabels": false,
+ "thresholdMarkers": true
+ },
+ "gridPos": {
+ "h": 5,
+ "w": 3,
+ "x": 0,
+ "y": 6
+ },
+ "id": 8,
+ "interval": null,
+ "links": [ ],
+ "mappingType": 1,
+ "mappingTypes": [
+ {
+ "name": "value to text",
+ "value": 1
+ },
+ {
+ "name": "range to text",
+ "value": 2
+ }
+ ],
+ "maxDataPoints": 100,
+ "nullPointMode": "connected",
+ "nullText": null,
+ "postfix": "",
+ "postfixFontSize": "50%",
+ "prefix": "",
+ "prefixFontSize": "50%",
+ "rangeMaps": [
+ {
+ "from": "null",
+ "text": "N/A",
+ "to": "null"
+ }
+ ],
+ "sparkline": {
+ "fillColor": "rgba(31, 118, 189, 0.18)",
+ "full": false,
+ "lineColor": "rgb(31, 120, 193)",
+ "show": false
+ },
+ "tableColumn": "",
+ "targets": [
+ {
+ "expr": "sum(\n ceph_osd_stat_bytes{job=~\"$job\"} and\n on (ceph_daemon) ceph_disk_occupation{job=~\"$job\", instance=~\"($ceph_hosts)([\\\\\\\\.:].*)?\"}\n)\n",
+ "format": "time_series",
+ "intervalFactor": 1,
+ "legendFormat": "",
+ "refId": "A"
+ }
+ ],
+ "thresholds": "",
+ "title": "Raw Capacity",
+ "type": "singlestat",
+ "valueFontSize": "80%",
+ "valueMaps": [
+ {
+ "op": "=",
+ "text": "N/A",
+ "value": "null"
+ }
+ ],
+ "valueName": "current"
+ },
+ {
+ "aliasColors": { },
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "$datasource",
+ "description": "",
+ "fill": 1,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 5,
+ "w": 3,
+ "x": 21,
+ "y": 6
+ },
+ "id": 9,
+ "legend": {
+ "alignAsTable": false,
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "rightSide": false,
+ "show": true,
+ "sideWidth": null,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [ ],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "repeat": null,
+ "seriesOverrides": [
+ {
+ "alias": "/.*tx/",
+ "transform": "negative-Y"
+ }
+ ],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "rate(node_network_receive_errs{instance=~\"$ceph_hosts([\\\\\\\\.:].*)?\"}[$__rate_interval]) or\n rate(node_network_receive_errs_total{instance=~\"$ceph_hosts([\\\\\\\\.:].*)?\"}[$__rate_interval])\n",
+ "format": "time_series",
+ "intervalFactor": 1,
+ "legendFormat": "{{device}}.rx",
+ "refId": "A"
+ },
+ {
+ "expr": "rate(node_network_transmit_errs{instance=~\"$ceph_hosts([\\\\\\\\.:].*)?\"}[$__rate_interval]) or\n rate(node_network_transmit_errs_total{instance=~\"$ceph_hosts([\\\\\\\\.:].*)?\"}[$__rate_interval])\n",
+ "format": "time_series",
+ "intervalFactor": 1,
+ "legendFormat": "{{device}}.tx",
+ "refId": "B"
+ }
+ ],
+ "thresholds": [ ],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Network error rate",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": [ ]
+ },
+ "yaxes": [
+ {
+ "format": "pps",
+ "label": "Send (-) / Receive (+)",
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ]
+ },
+ {
+ "collapse": false,
+ "collapsed": false,
+ "gridPos": {
+ "h": 1,
+ "w": 24,
+ "x": 0,
+ "y": 11
+ },
+ "id": 10,
+ "panels": [ ],
+ "repeat": null,
+ "repeatIteration": null,
+ "repeatRowId": null,
+ "showTitle": true,
+ "title": "OSD Disk Performance Statistics",
+ "titleSize": "h6",
+ "type": "row"
+ },
+ {
+ "aliasColors": { },
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "$datasource",
+ "description": "For any OSD devices on the host, this chart shows the iops per physical device. Each device is shown by it's name and corresponding OSD id value",
+ "fill": 1,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 9,
+ "w": 11,
+ "x": 0,
+ "y": 12
+ },
+ "id": 11,
+ "legend": {
+ "alignAsTable": false,
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "rightSide": false,
+ "show": true,
+ "sideWidth": null,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [ ],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "repeat": null,
+ "seriesOverrides": [
+ {
+ "alias": "/.*reads/",
+ "transform": "negative-Y"
+ }
+ ],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "label_replace(\n (\n rate(node_disk_writes_completed{instance=~\"($ceph_hosts)([\\\\\\\\.:].*)?\"}[$__rate_interval]) or\n rate(node_disk_writes_completed_total{instance=~\"($ceph_hosts)([\\\\\\\\.:].*)?\"}[$__rate_interval])\n ), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\"\n) * on(instance, device) group_left(ceph_daemon) label_replace(\n label_replace(\n ceph_disk_occupation_human{job=~\"$job\"}, \"device\", \"$1\", \"device\", \"/dev/(.*)\"\n ), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\"\n)\n",
+ "format": "time_series",
+ "intervalFactor": 1,
+ "legendFormat": "{{device}}({{ceph_daemon}}) writes",
+ "refId": "A"
+ },
+ {
+ "expr": "label_replace(\n (\n rate(node_disk_reads_completed{instance=~\"($ceph_hosts)([\\\\\\\\.:].*)?\"}[$__rate_interval]) or\n rate(node_disk_reads_completed_total{instance=~\"($ceph_hosts)([\\\\\\\\.:].*)?\"}[$__rate_interval])\n ), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\"\n) * on(instance, device) group_left(ceph_daemon) label_replace(\n label_replace(\n ceph_disk_occupation_human{job=~\"$job\"},\"device\", \"$1\", \"device\", \"/dev/(.*)\"\n ), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\"\n)\n",
+ "format": "time_series",
+ "intervalFactor": 1,
+ "legendFormat": "{{device}}({{ceph_daemon}}) reads",
+ "refId": "B"
+ }
+ ],
+ "thresholds": [ ],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "$ceph_hosts Disk IOPS",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": [ ]
+ },
+ "yaxes": [
+ {
+ "format": "ops",
+ "label": "Read (-) / Write (+)",
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ]
+ },
+ {
+ "aliasColors": { },
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "$datasource",
+ "description": "For OSD hosts, this chart shows the disk bandwidth (read bytes/sec + write bytes/sec) of the physical OSD device. Each device is shown by device name, and corresponding OSD id",
+ "fill": 1,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 9,
+ "w": 11,
+ "x": 12,
+ "y": 12
+ },
+ "id": 12,
+ "legend": {
+ "alignAsTable": false,
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "rightSide": false,
+ "show": true,
+ "sideWidth": null,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [ ],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "repeat": null,
+ "seriesOverrides": [
+ {
+ "alias": "/.*read/",
+ "transform": "negative-Y"
+ }
+ ],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "label_replace(\n (\n rate(node_disk_bytes_written{instance=~\"($ceph_hosts)([\\\\\\\\.:].*)?\"}[$__rate_interval]) or\n rate(node_disk_written_bytes_total{instance=~\"($ceph_hosts)([\\\\\\\\.:].*)?\"}[$__rate_interval])\n ), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\") * on(instance, device)\n group_left(ceph_daemon) label_replace(\n label_replace(ceph_disk_occupation_human{job=~\"$job\"}, \"device\", \"$1\", \"device\", \"/dev/(.*)\"),\n \"instance\", \"$1\", \"instance\", \"([^:.]*).*\"\n )\n",
+ "format": "time_series",
+ "intervalFactor": 1,
+ "legendFormat": "{{device}}({{ceph_daemon}}) write",
+ "refId": "A"
+ },
+ {
+ "expr": "label_replace(\n (\n rate(node_disk_bytes_read{instance=~\"($ceph_hosts)([\\\\\\\\.:].*)?\"}[$__rate_interval]) or\n rate(node_disk_read_bytes_total{instance=~\"($ceph_hosts)([\\\\\\\\.:].*)?\"}[$__rate_interval])\n ),\n \"instance\", \"$1\", \"instance\", \"([^:.]*).*\") * on(instance, device)\n group_left(ceph_daemon) label_replace(\n label_replace(ceph_disk_occupation_human{job=~\"$job\"}, \"device\", \"$1\", \"device\", \"/dev/(.*)\"),\n \"instance\", \"$1\", \"instance\", \"([^:.]*).*\"\n )\n",
+ "format": "time_series",
+ "intervalFactor": 1,
+ "legendFormat": "{{device}}({{ceph_daemon}}) read",
+ "refId": "B"
+ }
+ ],
+ "thresholds": [ ],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "$ceph_hosts Throughput by Disk",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": [ ]
+ },
+ "yaxes": [
+ {
+ "format": "Bps",
+ "label": "Read (-) / Write (+)",
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ]
+ },
+ {
+ "aliasColors": { },
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "$datasource",
+ "description": "For OSD hosts, this chart shows the latency at the physical drive. Each drive is shown by device name, with it's corresponding OSD id",
+ "fill": 1,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 9,
+ "w": 11,
+ "x": 0,
+ "y": 21
+ },
+ "id": 13,
+ "legend": {
+ "alignAsTable": false,
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "rightSide": false,
+ "show": true,
+ "sideWidth": null,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [ ],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "repeat": null,
+ "seriesOverrides": [ ],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "max by(instance, device) (label_replace(\n (rate(node_disk_write_time_seconds_total{instance=~\"($ceph_hosts)([\\\\\\\\.:].*)?\"}[$__rate_interval])) /\n clamp_min(rate(node_disk_writes_completed_total{instance=~\"($ceph_hosts)([\\\\\\\\.:].*)?\"}[$__rate_interval]), 0.001) or\n (rate(node_disk_read_time_seconds_total{instance=~\"($ceph_hosts)([\\\\\\\\.:].*)?\"}[$__rate_interval])) /\n clamp_min(rate(node_disk_reads_completed_total{instance=~\"($ceph_hosts)([\\\\\\\\.:].*)?\"}[$__rate_interval]), 0.001),\n \"instance\", \"$1\", \"instance\", \"([^:.]*).*\"\n)) * on(instance, device) group_left(ceph_daemon) label_replace(\n label_replace(\n ceph_disk_occupation_human{instance=~\"($ceph_hosts)([\\\\\\\\.:].*)?\"},\n \"device\", \"$1\", \"device\", \"/dev/(.*)\"\n ), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\"\n)\n",
+ "format": "time_series",
+ "intervalFactor": 1,
+ "legendFormat": "{{device}}({{ceph_daemon}})",
+ "refId": "A"
+ }
+ ],
+ "thresholds": [ ],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "$ceph_hosts Disk Latency",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": [ ]
+ },
+ "yaxes": [
+ {
+ "format": "s",
+ "label": "",
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ]
+ },
+ {
+ "aliasColors": { },
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "$datasource",
+ "description": "Show disk utilization % (util) of any OSD devices on the host by the physical device name and associated OSD id.",
+ "fill": 1,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 9,
+ "w": 11,
+ "x": 12,
+ "y": 21
+ },
+ "id": 14,
+ "legend": {
+ "alignAsTable": false,
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "rightSide": false,
+ "show": true,
+ "sideWidth": null,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [ ],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "repeat": null,
+ "seriesOverrides": [ ],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "label_replace(\n (\n (rate(node_disk_io_time_ms{instance=~\"($ceph_hosts)([\\\\\\\\.:].*)?\"}[$__rate_interval]) / 10) or\n rate(node_disk_io_time_seconds_total{instance=~\"($ceph_hosts)([\\\\\\\\.:].*)?\"}[$__rate_interval]) * 100\n ), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\"\n) * on(instance, device) group_left(ceph_daemon) label_replace(\n label_replace(ceph_disk_occupation_human{job=~\"$job\", instance=~\"($ceph_hosts)([\\\\\\\\.:].*)?\"},\n \"device\", \"$1\", \"device\", \"/dev/(.*)\"), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\"\n)\n",
+ "format": "time_series",
+ "intervalFactor": 1,
+ "legendFormat": "{{device}}({{ceph_daemon}})",
+ "refId": "A"
+ }
+ ],
+ "thresholds": [ ],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "$ceph_hosts Disk utilization",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": [ ]
+ },
+ "yaxes": [
+ {
+ "format": "percent",
+ "label": "%Util",
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ]
+ }
+ ],
+ "refresh": "30s",
+ "rows": [ ],
+ "schemaVersion": 16,
+ "style": "dark",
+ "tags": [
+ "ceph-mixin",
+ "overview"
+ ],
+ "templating": {
+ "list": [
+ {
+ "current": {
+ "text": "default",
+ "value": "default"
+ },
+ "hide": 0,
+ "label": "Data Source",
+ "name": "datasource",
+ "options": [ ],
+ "query": "prometheus",
+ "refresh": 1,
+ "regex": "",
+ "type": "datasource"
+ },
+ {
+ "allValue": ".+",
+ "current": { },
+ "datasource": "$datasource",
+ "hide": 2,
+ "includeAll": true,
+ "label": "cluster",
+ "multi": true,
+ "name": "cluster",
+ "options": [ ],
+ "query": "label_values(ceph_osd_metadata, cluster)",
+ "refresh": 1,
+ "regex": "(.*)",
+ "sort": 1,
+ "tagValuesQuery": "",
+ "tags": [ ],
+ "tagsQuery": "",
+ "type": "query",
+ "useTags": false
+ },
+ {
+ "allValue": ".+",
+ "current": { },
+ "datasource": "$datasource",
+ "hide": 0,
+ "includeAll": true,
+ "label": "job",
+ "multi": true,
+ "name": "job",
+ "options": [ ],
+ "query": "label_values(ceph_osd_metadata{}, job)",
+ "refresh": 1,
+ "regex": "(.*)",
+ "sort": 1,
+ "tagValuesQuery": "",
+ "tags": [ ],
+ "tagsQuery": "",
+ "type": "query",
+ "useTags": false
+ },
+ {
+ "allValue": null,
+ "current": { },
+ "datasource": "$datasource",
+ "hide": 0,
+ "includeAll": false,
+ "label": "Hostname",
+ "multi": false,
+ "name": "ceph_hosts",
+ "options": [ ],
+ "query": "label_values({}, instance)",
+ "refresh": 1,
+ "regex": "([^.:]*).*",
+ "sort": 3,
+ "tagValuesQuery": "",
+ "tags": [ ],
+ "tagsQuery": "",
+ "type": "query",
+ "useTags": false
+ }
+ ]
+ },
+ "time": {
+ "from": "now-1h",
+ "to": "now"
+ },
+ "timepicker": {
+ "refresh_intervals": [
+ "5s",
+ "10s",
+ "30s",
+ "1m",
+ "5m",
+ "15m",
+ "30m",
+ "1h",
+ "2h",
+ "1d"
+ ],
+ "time_options": [
+ "5m",
+ "15m",
+ "1h",
+ "6h",
+ "12h",
+ "24h",
+ "2d",
+ "7d",
+ "30d"
+ ]
+ },
+ "timezone": "",
+ "title": "Host Details",
+ "uid": "rtOg0AiWz",
+ "version": 0
+}
diff --git a/atmosphere/jsonnet/vendor/github.com/ceph/ceph/monitoring/ceph-mixin/dashboards_out/hosts-overview.json b/atmosphere/jsonnet/vendor/github.com/ceph/ceph/monitoring/ceph-mixin/dashboards_out/hosts-overview.json
new file mode 100644
index 0000000..f1cd4c4
--- /dev/null
+++ b/atmosphere/jsonnet/vendor/github.com/ceph/ceph/monitoring/ceph-mixin/dashboards_out/hosts-overview.json
@@ -0,0 +1,894 @@
+{
+ "__inputs": [ ],
+ "__requires": [
+ {
+ "id": "grafana",
+ "name": "Grafana",
+ "type": "grafana",
+ "version": "5.3.2"
+ },
+ {
+ "id": "graph",
+ "name": "Graph",
+ "type": "panel",
+ "version": "5.0.0"
+ },
+ {
+ "id": "singlestat",
+ "name": "Singlestat",
+ "type": "panel",
+ "version": "5.0.0"
+ }
+ ],
+ "annotations": {
+ "list": [
+ {
+ "builtIn": 1,
+ "datasource": "-- Grafana --",
+ "enable": true,
+ "hide": true,
+ "iconColor": "rgba(0, 211, 255, 1)",
+ "name": "Annotations & Alerts",
+ "showIn": 0,
+ "tags": [ ],
+ "type": "dashboard"
+ }
+ ]
+ },
+ "description": "",
+ "editable": false,
+ "gnetId": null,
+ "graphTooltip": 0,
+ "hideControls": false,
+ "id": null,
+ "links": [ ],
+ "panels": [
+ {
+ "cacheTimeout": null,
+ "colorBackground": false,
+ "colorValue": false,
+ "colors": [
+ "#299c46",
+ "rgba(237, 129, 40, 0.89)",
+ "#d44a3a"
+ ],
+ "datasource": "$datasource",
+ "format": "none",
+ "gauge": {
+ "maxValue": 100,
+ "minValue": 0,
+ "show": false,
+ "thresholdLabels": false,
+ "thresholdMarkers": true
+ },
+ "gridPos": {
+ "h": 5,
+ "w": 4,
+ "x": 0,
+ "y": 0
+ },
+ "id": 2,
+ "interval": null,
+ "links": [ ],
+ "mappingType": 1,
+ "mappingTypes": [
+ {
+ "name": "value to text",
+ "value": 1
+ },
+ {
+ "name": "range to text",
+ "value": 2
+ }
+ ],
+ "maxDataPoints": 100,
+ "nullPointMode": "connected",
+ "nullText": null,
+ "postfix": "",
+ "postfixFontSize": "50%",
+ "prefix": "",
+ "prefixFontSize": "50%",
+ "rangeMaps": [
+ {
+ "from": "null",
+ "text": "N/A",
+ "to": "null"
+ }
+ ],
+ "sparkline": {
+ "fillColor": "rgba(31, 118, 189, 0.18)",
+ "full": false,
+ "lineColor": "rgb(31, 120, 193)",
+ "show": false
+ },
+ "tableColumn": "",
+ "targets": [
+ {
+ "expr": "count(sum by (hostname) (ceph_osd_metadata{job=~\"$job\"}))",
+ "format": "time_series",
+ "instant": true,
+ "intervalFactor": 1,
+ "legendFormat": "",
+ "refId": "A"
+ }
+ ],
+ "thresholds": "",
+ "title": "OSD Hosts",
+ "type": "singlestat",
+ "valueFontSize": "80%",
+ "valueMaps": [
+ {
+ "op": "=",
+ "text": "N/A",
+ "value": "null"
+ }
+ ],
+ "valueName": "current"
+ },
+ {
+ "cacheTimeout": null,
+ "colorBackground": false,
+ "colorValue": false,
+ "colors": [
+ "#299c46",
+ "rgba(237, 129, 40, 0.89)",
+ "#d44a3a"
+ ],
+ "datasource": "$datasource",
+ "description": "Average CPU busy across all hosts (OSD, RGW, MON etc) within the cluster",
+ "format": "percentunit",
+ "gauge": {
+ "maxValue": 100,
+ "minValue": 0,
+ "show": false,
+ "thresholdLabels": false,
+ "thresholdMarkers": true
+ },
+ "gridPos": {
+ "h": 5,
+ "w": 4,
+ "x": 4,
+ "y": 0
+ },
+ "id": 3,
+ "interval": null,
+ "links": [ ],
+ "mappingType": 1,
+ "mappingTypes": [
+ {
+ "name": "value to text",
+ "value": 1
+ },
+ {
+ "name": "range to text",
+ "value": 2
+ }
+ ],
+ "maxDataPoints": 100,
+ "nullPointMode": "connected",
+ "nullText": null,
+ "postfix": "",
+ "postfixFontSize": "50%",
+ "prefix": "",
+ "prefixFontSize": "50%",
+ "rangeMaps": [
+ {
+ "from": "null",
+ "text": "N/A",
+ "to": "null"
+ }
+ ],
+ "sparkline": {
+ "fillColor": "rgba(31, 118, 189, 0.18)",
+ "full": false,
+ "lineColor": "rgb(31, 120, 193)",
+ "show": false
+ },
+ "tableColumn": "",
+ "targets": [
+ {
+ "expr": "avg(1 - (\n avg by(instance) (\n rate(node_cpu_seconds_total{mode='idle',instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\"}[$__rate_interval]) or\n rate(node_cpu{mode='idle',instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\"}[$__rate_interval])\n )\n))\n",
+ "format": "time_series",
+ "instant": true,
+ "intervalFactor": 1,
+ "legendFormat": "",
+ "refId": "A"
+ }
+ ],
+ "thresholds": "",
+ "title": "AVG CPU Busy",
+ "type": "singlestat",
+ "valueFontSize": "80%",
+ "valueMaps": [
+ {
+ "op": "=",
+ "text": "N/A",
+ "value": "null"
+ }
+ ],
+ "valueName": "current"
+ },
+ {
+ "cacheTimeout": null,
+ "colorBackground": false,
+ "colorValue": false,
+ "colors": [
+ "#299c46",
+ "rgba(237, 129, 40, 0.89)",
+ "#d44a3a"
+ ],
+ "datasource": "$datasource",
+ "description": "Average Memory Usage across all hosts in the cluster (excludes buffer/cache usage)",
+ "format": "percentunit",
+ "gauge": {
+ "maxValue": 100,
+ "minValue": 0,
+ "show": false,
+ "thresholdLabels": false,
+ "thresholdMarkers": true
+ },
+ "gridPos": {
+ "h": 5,
+ "w": 4,
+ "x": 8,
+ "y": 0
+ },
+ "id": 4,
+ "interval": null,
+ "links": [ ],
+ "mappingType": 1,
+ "mappingTypes": [
+ {
+ "name": "value to text",
+ "value": 1
+ },
+ {
+ "name": "range to text",
+ "value": 2
+ }
+ ],
+ "maxDataPoints": 100,
+ "nullPointMode": "connected",
+ "nullText": null,
+ "postfix": "",
+ "postfixFontSize": "50%",
+ "prefix": "",
+ "prefixFontSize": "50%",
+ "rangeMaps": [
+ {
+ "from": "null",
+ "text": "N/A",
+ "to": "null"
+ }
+ ],
+ "sparkline": {
+ "fillColor": "rgba(31, 118, 189, 0.18)",
+ "full": false,
+ "lineColor": "rgb(31, 120, 193)",
+ "show": false
+ },
+ "tableColumn": "",
+ "targets": [
+ {
+ "expr": "avg ((\n (\n node_memory_MemTotal{instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\"} or\n node_memory_MemTotal_bytes{instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\"}\n ) - ((\n node_memory_MemFree{instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\"} or\n node_memory_MemFree_bytes{instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\"}) +\n (\n node_memory_Cached{instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\"} or\n node_memory_Cached_bytes{instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\"}\n ) + (\n node_memory_Buffers{instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\"} or\n node_memory_Buffers_bytes{instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\"}\n ) + (\n node_memory_Slab{instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\"} or\n node_memory_Slab_bytes{instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\"}\n )\n )\n) / (\n node_memory_MemTotal{instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\"} or\n node_memory_MemTotal_bytes{instance=~\"($osd_hosts|$rgw_hosts|$mon_hosts|$mds_hosts).*\"}\n))\n",
+ "format": "time_series",
+ "instant": true,
+ "intervalFactor": 1,
+ "legendFormat": "",
+ "refId": "A"
+ }
+ ],
+ "thresholds": "",
+ "title": "AVG RAM Utilization",
+ "type": "singlestat",
+ "valueFontSize": "80%",
+ "valueMaps": [
+ {
+ "op": "=",
+ "text": "N/A",
+ "value": "null"
+ }
+ ],
+ "valueName": "current"
+ },
+ {
+ "cacheTimeout": null,
+ "colorBackground": false,
+ "colorValue": false,
+ "colors": [
+ "#299c46",
+ "rgba(237, 129, 40, 0.89)",
+ "#d44a3a"
+ ],
+ "datasource": "$datasource",
+ "description": "IOPS Load at the device as reported by the OS on all OSD hosts",
+ "format": "none",
+ "gauge": {
+ "maxValue": 100,
+ "minValue": 0,
+ "show": false,
+ "thresholdLabels": false,
+ "thresholdMarkers": true
+ },
+ "gridPos": {
+ "h": 5,
+ "w": 4,
+ "x": 12,
+ "y": 0
+ },
+ "id": 5,
+ "interval": null,
+ "links": [ ],
+ "mappingType": 1,
+ "mappingTypes": [
+ {
+ "name": "value to text",
+ "value": 1
+ },
+ {
+ "name": "range to text",
+ "value": 2
+ }
+ ],
+ "maxDataPoints": 100,
+ "nullPointMode": "connected",
+ "nullText": null,
+ "postfix": "",
+ "postfixFontSize": "50%",
+ "prefix": "",
+ "prefixFontSize": "50%",
+ "rangeMaps": [
+ {
+ "from": "null",
+ "text": "N/A",
+ "to": "null"
+ }
+ ],
+ "sparkline": {
+ "fillColor": "rgba(31, 118, 189, 0.18)",
+ "full": false,
+ "lineColor": "rgb(31, 120, 193)",
+ "show": false
+ },
+ "tableColumn": "",
+ "targets": [
+ {
+ "expr": "sum ((\n rate(node_disk_reads_completed{instance=~\"($osd_hosts).*\"}[$__rate_interval]) or\n rate(node_disk_reads_completed_total{instance=~\"($osd_hosts).*\"}[$__rate_interval])\n) + (\n rate(node_disk_writes_completed{instance=~\"($osd_hosts).*\"}[$__rate_interval]) or\n rate(node_disk_writes_completed_total{instance=~\"($osd_hosts).*\"}[$__rate_interval])\n))\n",
+ "format": "time_series",
+ "instant": true,
+ "intervalFactor": 1,
+ "legendFormat": "",
+ "refId": "A"
+ }
+ ],
+ "thresholds": "",
+ "title": "Physical IOPS",
+ "type": "singlestat",
+ "valueFontSize": "80%",
+ "valueMaps": [
+ {
+ "op": "=",
+ "text": "N/A",
+ "value": "null"
+ }
+ ],
+ "valueName": "current"
+ },
+ {
+ "cacheTimeout": null,
+ "colorBackground": false,
+ "colorValue": false,
+ "colors": [
+ "#299c46",
+ "rgba(237, 129, 40, 0.89)",
+ "#d44a3a"
+ ],
+ "datasource": "$datasource",
+ "description": "Average Disk utilization for all OSD data devices (i.e. excludes journal/WAL)",
+ "format": "percent",
+ "gauge": {
+ "maxValue": 100,
+ "minValue": 0,
+ "show": false,
+ "thresholdLabels": false,
+ "thresholdMarkers": true
+ },
+ "gridPos": {
+ "h": 5,
+ "w": 4,
+ "x": 16,
+ "y": 0
+ },
+ "id": 6,
+ "interval": null,
+ "links": [ ],
+ "mappingType": 1,
+ "mappingTypes": [
+ {
+ "name": "value to text",
+ "value": 1
+ },
+ {
+ "name": "range to text",
+ "value": 2
+ }
+ ],
+ "maxDataPoints": 100,
+ "nullPointMode": "connected",
+ "nullText": null,
+ "postfix": "",
+ "postfixFontSize": "50%",
+ "prefix": "",
+ "prefixFontSize": "50%",
+ "rangeMaps": [
+ {
+ "from": "null",
+ "text": "N/A",
+ "to": "null"
+ }
+ ],
+ "sparkline": {
+ "fillColor": "rgba(31, 118, 189, 0.18)",
+ "full": false,
+ "lineColor": "rgb(31, 120, 193)",
+ "show": false
+ },
+ "tableColumn": "",
+ "targets": [
+ {
+ "expr": "avg (\n label_replace(\n (rate(node_disk_io_time_ms[$__rate_interval]) / 10 ) or\n (rate(node_disk_io_time_seconds_total[$__rate_interval]) * 100),\n \"instance\", \"$1\", \"instance\", \"([^.:]*).*\"\n ) * on(instance, device) group_left(ceph_daemon) label_replace(\n label_replace(\n ceph_disk_occupation_human{job=~\"$job\", instance=~\"($osd_hosts).*\"},\n \"device\", \"$1\", \"device\", \"/dev/(.*)\"\n ), \"instance\", \"$1\", \"instance\", \"([^.:]*).*\"\n )\n)\n",
+ "format": "time_series",
+ "instant": true,
+ "intervalFactor": 1,
+ "legendFormat": "",
+ "refId": "A"
+ }
+ ],
+ "thresholds": "",
+ "title": "AVG Disk Utilization",
+ "type": "singlestat",
+ "valueFontSize": "80%",
+ "valueMaps": [
+ {
+ "op": "=",
+ "text": "N/A",
+ "value": "null"
+ }
+ ],
+ "valueName": "current"
+ },
+ {
+ "cacheTimeout": null,
+ "colorBackground": false,
+ "colorValue": false,
+ "colors": [
+ "#299c46",
+ "rgba(237, 129, 40, 0.89)",
+ "#d44a3a"
+ ],
+ "datasource": "$datasource",
+ "description": "Total send/receive network load across all hosts in the ceph cluster",
+ "format": "bytes",
+ "gauge": {
+ "maxValue": 100,
+ "minValue": 0,
+ "show": false,
+ "thresholdLabels": false,
+ "thresholdMarkers": true
+ },
+ "gridPos": {
+ "h": 5,
+ "w": 4,
+ "x": 20,
+ "y": 0
+ },
+ "id": 7,
+ "interval": null,
+ "links": [ ],
+ "mappingType": 1,
+ "mappingTypes": [
+ {
+ "name": "value to text",
+ "value": 1
+ },
+ {
+ "name": "range to text",
+ "value": 2
+ }
+ ],
+ "maxDataPoints": 100,
+ "nullPointMode": "connected",
+ "nullText": null,
+ "postfix": "",
+ "postfixFontSize": "50%",
+ "prefix": "",
+ "prefixFontSize": "50%",
+ "rangeMaps": [
+ {
+ "from": "null",
+ "text": "N/A",
+ "to": "null"
+ }
+ ],
+ "sparkline": {
+ "fillColor": "rgba(31, 118, 189, 0.18)",
+ "full": false,
+ "lineColor": "rgb(31, 120, 193)",
+ "show": false
+ },
+ "tableColumn": "",
+ "targets": [
+ {
+ "expr": "sum (\n (\n rate(node_network_receive_bytes{instance=~\"($osd_hosts|mon_hosts|mds_hosts|rgw_hosts).*\",device!=\"lo\"}[$__rate_interval]) or\n rate(node_network_receive_bytes_total{instance=~\"($osd_hosts|mon_hosts|mds_hosts|rgw_hosts).*\",device!=\"lo\"}[$__rate_interval])\n ) unless on (device, instance)\n label_replace((bonding_slaves > 0), \"device\", \"$1\", \"master\", \"(.+)\")\n) +\nsum (\n (\n rate(node_network_transmit_bytes{instance=~\"($osd_hosts|mon_hosts|mds_hosts|rgw_hosts).*\",device!=\"lo\"}[$__rate_interval]) or\n rate(node_network_transmit_bytes_total{instance=~\"($osd_hosts|mon_hosts|mds_hosts|rgw_hosts).*\",device!=\"lo\"}[$__rate_interval])\n ) unless on (device, instance)\n label_replace((bonding_slaves > 0), \"device\", \"$1\", \"master\", \"(.+)\")\n)\n",
+ "format": "time_series",
+ "instant": true,
+ "intervalFactor": 1,
+ "legendFormat": "",
+ "refId": "A"
+ }
+ ],
+ "thresholds": "",
+ "title": "Network Load",
+ "type": "singlestat",
+ "valueFontSize": "80%",
+ "valueMaps": [
+ {
+ "op": "=",
+ "text": "N/A",
+ "value": "null"
+ }
+ ],
+ "valueName": "current"
+ },
+ {
+ "aliasColors": { },
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "$datasource",
+ "description": "Show the top 10 busiest hosts by cpu",
+ "fill": 1,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 9,
+ "w": 12,
+ "x": 0,
+ "y": 5
+ },
+ "id": 8,
+ "legend": {
+ "alignAsTable": false,
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "rightSide": false,
+ "show": true,
+ "sideWidth": null,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [ ],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "repeat": null,
+ "seriesOverrides": [ ],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "topk(10,\n 100 * (\n 1 - (\n avg by(instance) (\n rate(node_cpu_seconds_total{mode='idle',instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\"}[$__rate_interval]) or\n rate(node_cpu{mode='idle',instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\"}[$__rate_interval])\n )\n )\n )\n)\n",
+ "format": "time_series",
+ "intervalFactor": 1,
+ "legendFormat": "{{instance}}",
+ "refId": "A"
+ }
+ ],
+ "thresholds": [ ],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "CPU Busy - Top 10 Hosts",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": [ ]
+ },
+ "yaxes": [
+ {
+ "format": "percent",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": 0,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": 0,
+ "show": true
+ }
+ ]
+ },
+ {
+ "aliasColors": { },
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "$datasource",
+ "description": "Top 10 hosts by network load",
+ "fill": 1,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 9,
+ "w": 12,
+ "x": 12,
+ "y": 5
+ },
+ "id": 9,
+ "legend": {
+ "alignAsTable": false,
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "rightSide": false,
+ "show": true,
+ "sideWidth": null,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [ ],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "repeat": null,
+ "seriesOverrides": [ ],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "topk(10, (sum by(instance) (\n(\n rate(node_network_receive_bytes{instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\",device!=\"lo\"}[$__rate_interval]) or\n rate(node_network_receive_bytes_total{instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\",device!=\"lo\"}[$__rate_interval])\n) +\n(\n rate(node_network_transmit_bytes{instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\",device!=\"lo\"}[$__rate_interval]) or\n rate(node_network_transmit_bytes_total{instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\",device!=\"lo\"}[$__rate_interval])\n) unless on (device, instance)\n label_replace((bonding_slaves > 0), \"device\", \"$1\", \"master\", \"(.+)\"))\n))\n",
+ "format": "time_series",
+ "intervalFactor": 1,
+ "legendFormat": "{{instance}}",
+ "refId": "A"
+ }
+ ],
+ "thresholds": [ ],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Network Load - Top 10 Hosts",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": [ ]
+ },
+ "yaxes": [
+ {
+ "format": "Bps",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": 0,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": 0,
+ "show": true
+ }
+ ]
+ }
+ ],
+ "refresh": "30s",
+ "rows": [ ],
+ "schemaVersion": 16,
+ "style": "dark",
+ "tags": [
+ "ceph-mixin"
+ ],
+ "templating": {
+ "list": [
+ {
+ "current": {
+ "text": "default",
+ "value": "default"
+ },
+ "hide": 0,
+ "label": "Data Source",
+ "name": "datasource",
+ "options": [ ],
+ "query": "prometheus",
+ "refresh": 1,
+ "regex": "",
+ "type": "datasource"
+ },
+ {
+ "allValue": ".+",
+ "current": { },
+ "datasource": "$datasource",
+ "hide": 2,
+ "includeAll": true,
+ "label": "cluster",
+ "multi": true,
+ "name": "cluster",
+ "options": [ ],
+ "query": "label_values(ceph_osd_metadata, cluster)",
+ "refresh": 1,
+ "regex": "(.*)",
+ "sort": 1,
+ "tagValuesQuery": "",
+ "tags": [ ],
+ "tagsQuery": "",
+ "type": "query",
+ "useTags": false
+ },
+ {
+ "allValue": ".+",
+ "current": { },
+ "datasource": "$datasource",
+ "hide": 0,
+ "includeAll": true,
+ "label": "job",
+ "multi": true,
+ "name": "job",
+ "options": [ ],
+ "query": "label_values(ceph_osd_metadata{}, job)",
+ "refresh": 1,
+ "regex": "(.*)",
+ "sort": 1,
+ "tagValuesQuery": "",
+ "tags": [ ],
+ "tagsQuery": "",
+ "type": "query",
+ "useTags": false
+ },
+ {
+ "allValue": null,
+ "current": { },
+ "datasource": "$datasource",
+ "hide": 0,
+ "includeAll": true,
+ "label": null,
+ "multi": false,
+ "name": "osd_hosts",
+ "options": [ ],
+ "query": "label_values(ceph_disk_occupation{job=~\"$job\"}, exported_instance)",
+ "refresh": 1,
+ "regex": "([^.]*).*",
+ "sort": 1,
+ "tagValuesQuery": "",
+ "tags": [ ],
+ "tagsQuery": "",
+ "type": "query",
+ "useTags": false
+ },
+ {
+ "allValue": null,
+ "current": { },
+ "datasource": "$datasource",
+ "hide": 0,
+ "includeAll": true,
+ "label": null,
+ "multi": false,
+ "name": "mon_hosts",
+ "options": [ ],
+ "query": "label_values(ceph_mon_metadata{job=~\"$job\"}, ceph_daemon)",
+ "refresh": 1,
+ "regex": "mon.(.*)",
+ "sort": 1,
+ "tagValuesQuery": "",
+ "tags": [ ],
+ "tagsQuery": "",
+ "type": "query",
+ "useTags": false
+ },
+ {
+ "allValue": null,
+ "current": { },
+ "datasource": "$datasource",
+ "hide": 0,
+ "includeAll": true,
+ "label": null,
+ "multi": false,
+ "name": "mds_hosts",
+ "options": [ ],
+ "query": "label_values(ceph_mds_inodes{job=~\"$job\"}, ceph_daemon)",
+ "refresh": 1,
+ "regex": "mds.(.*)",
+ "sort": 1,
+ "tagValuesQuery": "",
+ "tags": [ ],
+ "tagsQuery": "",
+ "type": "query",
+ "useTags": false
+ },
+ {
+ "allValue": null,
+ "current": { },
+ "datasource": "$datasource",
+ "hide": 0,
+ "includeAll": true,
+ "label": null,
+ "multi": false,
+ "name": "rgw_hosts",
+ "options": [ ],
+ "query": "label_values(ceph_rgw_metadata{job=~\"$job\"}, ceph_daemon)",
+ "refresh": 1,
+ "regex": "rgw.(.*)",
+ "sort": 1,
+ "tagValuesQuery": "",
+ "tags": [ ],
+ "tagsQuery": "",
+ "type": "query",
+ "useTags": false
+ }
+ ]
+ },
+ "time": {
+ "from": "now-1h",
+ "to": "now"
+ },
+ "timepicker": {
+ "refresh_intervals": [
+ "5s",
+ "10s",
+ "30s",
+ "1m",
+ "5m",
+ "15m",
+ "30m",
+ "1h",
+ "2h",
+ "1d"
+ ],
+ "time_options": [
+ "5m",
+ "15m",
+ "1h",
+ "6h",
+ "12h",
+ "24h",
+ "2d",
+ "7d",
+ "30d"
+ ]
+ },
+ "timezone": "",
+ "title": "Host Overview",
+ "uid": "y0KGL0iZz",
+ "version": 0
+}
diff --git a/atmosphere/jsonnet/vendor/github.com/ceph/ceph/monitoring/ceph-mixin/dashboards_out/osd-device-details.json b/atmosphere/jsonnet/vendor/github.com/ceph/ceph/monitoring/ceph-mixin/dashboards_out/osd-device-details.json
new file mode 100644
index 0000000..384516f
--- /dev/null
+++ b/atmosphere/jsonnet/vendor/github.com/ceph/ceph/monitoring/ceph-mixin/dashboards_out/osd-device-details.json
@@ -0,0 +1,871 @@
+{
+ "__inputs": [ ],
+ "__requires": [
+ {
+ "id": "grafana",
+ "name": "Grafana",
+ "type": "grafana",
+ "version": "5.3.2"
+ },
+ {
+ "id": "graph",
+ "name": "Graph",
+ "type": "panel",
+ "version": "5.0.0"
+ }
+ ],
+ "annotations": {
+ "list": [
+ {
+ "builtIn": 1,
+ "datasource": "-- Grafana --",
+ "enable": true,
+ "hide": true,
+ "iconColor": "rgba(0, 211, 255, 1)",
+ "name": "Annotations & Alerts",
+ "showIn": 0,
+ "tags": [ ],
+ "type": "dashboard"
+ }
+ ]
+ },
+ "description": "",
+ "editable": false,
+ "gnetId": null,
+ "graphTooltip": 0,
+ "hideControls": false,
+ "id": null,
+ "links": [ ],
+ "panels": [
+ {
+ "collapse": false,
+ "collapsed": false,
+ "gridPos": {
+ "h": 1,
+ "w": 24,
+ "x": 0,
+ "y": 0
+ },
+ "id": 2,
+ "panels": [ ],
+ "repeat": null,
+ "repeatIteration": null,
+ "repeatRowId": null,
+ "showTitle": true,
+ "title": "OSD Performance",
+ "titleSize": "h6",
+ "type": "row"
+ },
+ {
+ "aliasColors": { },
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "$datasource",
+ "description": "",
+ "fill": 1,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 9,
+ "w": 6,
+ "x": 0,
+ "y": 1
+ },
+ "id": 3,
+ "legend": {
+ "alignAsTable": false,
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "rightSide": false,
+ "show": true,
+ "sideWidth": null,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [ ],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "repeat": null,
+ "seriesOverrides": [
+ {
+ "alias": "read",
+ "transform": "negative-Y"
+ }
+ ],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "rate(ceph_osd_op_r_latency_sum{job=~\"$job\", ceph_daemon=~\"$osd\"}[$__rate_interval]) /\n on (ceph_daemon) rate(ceph_osd_op_r_latency_count{job=~\"$job\"}[$__rate_interval])\n",
+ "format": "time_series",
+ "intervalFactor": 1,
+ "legendFormat": "read",
+ "refId": "A"
+ },
+ {
+ "expr": "rate(ceph_osd_op_w_latency_sum{job=~\"$job\", ceph_daemon=~\"$osd\"}[$__rate_interval]) /\n on (ceph_daemon) rate(ceph_osd_op_w_latency_count{job=~\"$job\"}[$__rate_interval])\n",
+ "format": "time_series",
+ "intervalFactor": 1,
+ "legendFormat": "write",
+ "refId": "B"
+ }
+ ],
+ "thresholds": [ ],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "$osd Latency",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": [ ]
+ },
+ "yaxes": [
+ {
+ "format": "s",
+ "label": "Read (-) / Write (+)",
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ]
+ },
+ {
+ "aliasColors": { },
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "$datasource",
+ "description": "",
+ "fill": 1,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 9,
+ "w": 6,
+ "x": 6,
+ "y": 1
+ },
+ "id": 4,
+ "legend": {
+ "alignAsTable": false,
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "rightSide": false,
+ "show": true,
+ "sideWidth": null,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [ ],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "repeat": null,
+ "seriesOverrides": [
+ {
+ "alias": "Reads",
+ "transform": "negative-Y"
+ }
+ ],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "rate(ceph_osd_op_r{job=~\"$job\", ceph_daemon=~\"$osd\"}[$__rate_interval])",
+ "format": "time_series",
+ "intervalFactor": 1,
+ "legendFormat": "Reads",
+ "refId": "A"
+ },
+ {
+ "expr": "rate(ceph_osd_op_w{job=~\"$job\", ceph_daemon=~\"$osd\"}[$__rate_interval])",
+ "format": "time_series",
+ "intervalFactor": 1,
+ "legendFormat": "Writes",
+ "refId": "B"
+ }
+ ],
+ "thresholds": [ ],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "$osd R/W IOPS",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": [ ]
+ },
+ "yaxes": [
+ {
+ "format": "short",
+ "label": "Read (-) / Write (+)",
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ]
+ },
+ {
+ "aliasColors": { },
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "$datasource",
+ "description": "",
+ "fill": 1,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 9,
+ "w": 6,
+ "x": 12,
+ "y": 1
+ },
+ "id": 5,
+ "legend": {
+ "alignAsTable": false,
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "rightSide": false,
+ "show": true,
+ "sideWidth": null,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [ ],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "repeat": null,
+ "seriesOverrides": [
+ {
+ "alias": "Read Bytes",
+ "transform": "negative-Y"
+ }
+ ],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "rate(ceph_osd_op_r_out_bytes{job=~\"$job\", ceph_daemon=~\"$osd\"}[$__rate_interval])",
+ "format": "time_series",
+ "intervalFactor": 1,
+ "legendFormat": "Read Bytes",
+ "refId": "A"
+ },
+ {
+ "expr": "rate(ceph_osd_op_w_in_bytes{job=~\"$job\", ceph_daemon=~\"$osd\"}[$__rate_interval])",
+ "format": "time_series",
+ "intervalFactor": 1,
+ "legendFormat": "Write Bytes",
+ "refId": "B"
+ }
+ ],
+ "thresholds": [ ],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "$osd R/W Bytes",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": [ ]
+ },
+ "yaxes": [
+ {
+ "format": "bytes",
+ "label": "Read (-) / Write (+)",
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ]
+ },
+ {
+ "collapse": false,
+ "collapsed": false,
+ "gridPos": {
+ "h": 1,
+ "w": 24,
+ "x": 0,
+ "y": 10
+ },
+ "id": 6,
+ "panels": [ ],
+ "repeat": null,
+ "repeatIteration": null,
+ "repeatRowId": null,
+ "showTitle": true,
+ "title": "Physical Device Performance",
+ "titleSize": "h6",
+ "type": "row"
+ },
+ {
+ "aliasColors": { },
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "$datasource",
+ "description": "",
+ "fill": 1,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 9,
+ "w": 6,
+ "x": 0,
+ "y": 11
+ },
+ "id": 7,
+ "legend": {
+ "alignAsTable": false,
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "rightSide": false,
+ "show": true,
+ "sideWidth": null,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [ ],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "repeat": null,
+ "seriesOverrides": [
+ {
+ "alias": "/.*Reads/",
+ "transform": "negative-Y"
+ }
+ ],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "(\n label_replace(\n rate(node_disk_read_time_seconds_total{}[$__rate_interval]) /\n rate(node_disk_reads_completed_total{}[$__rate_interval]),\n \"instance\", \"$1\", \"instance\", \"([^:.]*).*\"\n ) and on (instance, device) label_replace(\n label_replace(\n ceph_disk_occupation_human{job=~\"$job\", ceph_daemon=~\"$osd\"},\n \"device\", \"$1\", \"device\", \"/dev/(.*)\"\n ), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\"\n )\n)\n",
+ "format": "time_series",
+ "intervalFactor": 1,
+ "legendFormat": "{{instance}}/{{device}} Reads",
+ "refId": "A"
+ },
+ {
+ "expr": "(\n label_replace(\n rate(node_disk_write_time_seconds_total{}[$__rate_interval]) /\n rate(node_disk_writes_completed_total{}[$__rate_interval]),\n \"instance\", \"$1\", \"instance\", \"([^:.]*).*\") and on (instance, device)\n label_replace(\n label_replace(\n ceph_disk_occupation_human{job=~\"$job\", ceph_daemon=~\"$osd\"}, \"device\", \"$1\", \"device\", \"/dev/(.*)\"\n ), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\"\n )\n )\n",
+ "format": "time_series",
+ "intervalFactor": 1,
+ "legendFormat": "{{instance}}/{{device}} Writes",
+ "refId": "B"
+ }
+ ],
+ "thresholds": [ ],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Physical Device Latency for $osd",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": [ ]
+ },
+ "yaxes": [
+ {
+ "format": "s",
+ "label": "Read (-) / Write (+)",
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ]
+ },
+ {
+ "aliasColors": { },
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "$datasource",
+ "description": "",
+ "fill": 1,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 9,
+ "w": 6,
+ "x": 6,
+ "y": 11
+ },
+ "id": 8,
+ "legend": {
+ "alignAsTable": false,
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "rightSide": false,
+ "show": true,
+ "sideWidth": null,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [ ],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "repeat": null,
+ "seriesOverrides": [
+ {
+ "alias": "/.*Reads/",
+ "transform": "negative-Y"
+ }
+ ],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "label_replace(\n rate(node_disk_writes_completed_total{}[$__rate_interval]),\n \"instance\", \"$1\", \"instance\", \"([^:.]*).*\"\n) and on (instance, device) label_replace(\n label_replace(\n ceph_disk_occupation_human{job=~\"$job\", ceph_daemon=~\"$osd\"},\n \"device\", \"$1\", \"device\", \"/dev/(.*)\"\n ), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\"\n)\n",
+ "format": "time_series",
+ "intervalFactor": 1,
+ "legendFormat": "{{device}} on {{instance}} Writes",
+ "refId": "A"
+ },
+ {
+ "expr": "label_replace(\n rate(node_disk_reads_completed_total{}[$__rate_interval]),\n \"instance\", \"$1\", \"instance\", \"([^:.]*).*\"\n) and on (instance, device) label_replace(\n label_replace(\n ceph_disk_occupation_human{job=~\"$job\", ceph_daemon=~\"$osd\"},\n \"device\", \"$1\", \"device\", \"/dev/(.*)\"\n ), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\"\n)\n",
+ "format": "time_series",
+ "intervalFactor": 1,
+ "legendFormat": "{{device}} on {{instance}} Reads",
+ "refId": "B"
+ }
+ ],
+ "thresholds": [ ],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Physical Device R/W IOPS for $osd",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": [ ]
+ },
+ "yaxes": [
+ {
+ "format": "short",
+ "label": "Read (-) / Write (+)",
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ]
+ },
+ {
+ "aliasColors": { },
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "$datasource",
+ "description": "",
+ "fill": 1,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 9,
+ "w": 6,
+ "x": 12,
+ "y": 11
+ },
+ "id": 9,
+ "legend": {
+ "alignAsTable": false,
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "rightSide": false,
+ "show": true,
+ "sideWidth": null,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [ ],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "repeat": null,
+ "seriesOverrides": [
+ {
+ "alias": "/.*Reads/",
+ "transform": "negative-Y"
+ }
+ ],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "label_replace(\n rate(node_disk_read_bytes_total{}[$__rate_interval]), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\"\n) and on (instance, device) label_replace(\n label_replace(\n ceph_disk_occupation_human{job=~\"$job\", ceph_daemon=~\"$osd\"},\n \"device\", \"$1\", \"device\", \"/dev/(.*)\"\n ), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\"\n)\n",
+ "format": "time_series",
+ "intervalFactor": 1,
+ "legendFormat": "{{instance}} {{device}} Reads",
+ "refId": "A"
+ },
+ {
+ "expr": "label_replace(\n rate(node_disk_written_bytes_total{}[$__rate_interval]), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\"\n) and on (instance, device) label_replace(\n label_replace(\n ceph_disk_occupation_human{job=~\"$job\", ceph_daemon=~\"$osd\"},\n \"device\", \"$1\", \"device\", \"/dev/(.*)\"\n ), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\"\n)\n",
+ "format": "time_series",
+ "intervalFactor": 1,
+ "legendFormat": "{{instance}} {{device}} Writes",
+ "refId": "B"
+ }
+ ],
+ "thresholds": [ ],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Physical Device R/W Bytes for $osd",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": [ ]
+ },
+ "yaxes": [
+ {
+ "format": "Bps",
+ "label": "Read (-) / Write (+)",
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ]
+ },
+ {
+ "aliasColors": { },
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "$datasource",
+ "description": "",
+ "fill": 1,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 9,
+ "w": 6,
+ "x": 18,
+ "y": 11
+ },
+ "id": 10,
+ "legend": {
+ "alignAsTable": false,
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "rightSide": false,
+ "show": true,
+ "sideWidth": null,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [ ],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "repeat": null,
+ "seriesOverrides": [ ],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "label_replace(\n rate(node_disk_io_time_seconds_total{}[$__rate_interval]),\n \"instance\", \"$1\", \"instance\", \"([^:.]*).*\"\n) and on (instance, device) label_replace(\n label_replace(\n ceph_disk_occupation_human{job=~\"$job\", ceph_daemon=~\"$osd\"}, \"device\", \"$1\", \"device\", \"/dev/(.*)\"\n ), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\"\n)\n",
+ "format": "time_series",
+ "intervalFactor": 1,
+ "legendFormat": "{{device}} on {{instance}}",
+ "refId": "A"
+ }
+ ],
+ "thresholds": [ ],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Physical Device Util% for $osd",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": [ ]
+ },
+ "yaxes": [
+ {
+ "format": "percentunit",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ]
+ }
+ ],
+ "refresh": "30s",
+ "rows": [ ],
+ "schemaVersion": 16,
+ "style": "dark",
+ "tags": [
+ "ceph-mixin"
+ ],
+ "templating": {
+ "list": [
+ {
+ "current": {
+ "text": "default",
+ "value": "default"
+ },
+ "hide": 0,
+ "label": "Data Source",
+ "name": "datasource",
+ "options": [ ],
+ "query": "prometheus",
+ "refresh": 1,
+ "regex": "",
+ "type": "datasource"
+ },
+ {
+ "allValue": ".+",
+ "current": { },
+ "datasource": "$datasource",
+ "hide": 2,
+ "includeAll": true,
+ "label": "cluster",
+ "multi": true,
+ "name": "cluster",
+ "options": [ ],
+ "query": "label_values(ceph_osd_metadata, cluster)",
+ "refresh": 1,
+ "regex": "(.*)",
+ "sort": 1,
+ "tagValuesQuery": "",
+ "tags": [ ],
+ "tagsQuery": "",
+ "type": "query",
+ "useTags": false
+ },
+ {
+ "allValue": ".+",
+ "current": { },
+ "datasource": "$datasource",
+ "hide": 0,
+ "includeAll": true,
+ "label": "job",
+ "multi": true,
+ "name": "job",
+ "options": [ ],
+ "query": "label_values(ceph_osd_metadata{}, job)",
+ "refresh": 1,
+ "regex": "(.*)",
+ "sort": 1,
+ "tagValuesQuery": "",
+ "tags": [ ],
+ "tagsQuery": "",
+ "type": "query",
+ "useTags": false
+ },
+ {
+ "allValue": null,
+ "current": { },
+ "datasource": "$datasource",
+ "hide": 0,
+ "includeAll": false,
+ "label": "OSD",
+ "multi": false,
+ "name": "osd",
+ "options": [ ],
+ "query": "label_values(ceph_osd_metadata{job=~\"$job\"}, ceph_daemon)",
+ "refresh": 1,
+ "regex": "(.*)",
+ "sort": 1,
+ "tagValuesQuery": "",
+ "tags": [ ],
+ "tagsQuery": "",
+ "type": "query",
+ "useTags": false
+ }
+ ]
+ },
+ "time": {
+ "from": "now-3h",
+ "to": "now"
+ },
+ "timepicker": {
+ "refresh_intervals": [
+ "5s",
+ "10s",
+ "30s",
+ "1m",
+ "5m",
+ "15m",
+ "30m",
+ "1h",
+ "2h",
+ "1d"
+ ],
+ "time_options": [
+ "5m",
+ "15m",
+ "1h",
+ "6h",
+ "12h",
+ "24h",
+ "2d",
+ "7d",
+ "30d"
+ ]
+ },
+ "timezone": "",
+ "title": "OSD device details",
+ "uid": "CrAHE0iZz",
+ "version": 0
+}
diff --git a/atmosphere/jsonnet/vendor/github.com/ceph/ceph/monitoring/ceph-mixin/dashboards_out/osds-overview.json b/atmosphere/jsonnet/vendor/github.com/ceph/ceph/monitoring/ceph-mixin/dashboards_out/osds-overview.json
new file mode 100644
index 0000000..5ea8955
--- /dev/null
+++ b/atmosphere/jsonnet/vendor/github.com/ceph/ceph/monitoring/ceph-mixin/dashboards_out/osds-overview.json
@@ -0,0 +1,963 @@
+{
+ "__inputs": [ ],
+ "__requires": [
+ {
+ "id": "grafana",
+ "name": "Grafana",
+ "type": "grafana",
+ "version": "5.0.0"
+ },
+ {
+ "id": "grafana-piechart-panel",
+ "name": "Pie Chart",
+ "type": "panel",
+ "version": "1.3.3"
+ },
+ {
+ "id": "graph",
+ "name": "Graph",
+ "type": "panel",
+ "version": "5.0.0"
+ },
+ {
+ "id": "table",
+ "name": "Table",
+ "type": "panel",
+ "version": "5.0.0"
+ }
+ ],
+ "annotations": {
+ "list": [
+ {
+ "builtIn": 1,
+ "datasource": "-- Grafana --",
+ "enable": true,
+ "hide": true,
+ "iconColor": "rgba(0, 211, 255, 1)",
+ "name": "Annotations & Alerts",
+ "showIn": 0,
+ "tags": [ ],
+ "type": "dashboard"
+ }
+ ]
+ },
+ "description": "",
+ "editable": false,
+ "gnetId": null,
+ "graphTooltip": 0,
+ "hideControls": false,
+ "id": null,
+ "links": [ ],
+ "panels": [
+ {
+ "aliasColors": {
+ "@95%ile": "#e0752d"
+ },
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "$datasource",
+ "description": "",
+ "fill": 1,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 8,
+ "w": 8,
+ "x": 0,
+ "y": 0
+ },
+ "id": 2,
+ "legend": {
+ "alignAsTable": false,
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "rightSide": false,
+ "show": true,
+ "sideWidth": null,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [ ],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "repeat": null,
+ "seriesOverrides": [ ],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "avg (\n rate(ceph_osd_op_r_latency_sum{job=~\"$job\"}[$__rate_interval]) /\n on (ceph_daemon) rate(ceph_osd_op_r_latency_count{job=~\"$job\"}[$__rate_interval]) * 1000\n)\n",
+ "format": "time_series",
+ "intervalFactor": 1,
+ "legendFormat": "AVG read",
+ "refId": "A"
+ },
+ {
+ "expr": "max(\n rate(ceph_osd_op_r_latency_sum{job=~\"$job\"}[$__rate_interval]) /\n on (ceph_daemon) rate(ceph_osd_op_r_latency_count{job=~\"$job\"}[$__rate_interval]) * 1000\n)\n",
+ "format": "time_series",
+ "intervalFactor": 1,
+ "legendFormat": "MAX read",
+ "refId": "B"
+ },
+ {
+ "expr": "quantile(0.95,\n (\n rate(ceph_osd_op_r_latency_sum{job=~\"$job\"}[$__rate_interval]) /\n on (ceph_daemon) rate(ceph_osd_op_r_latency_count{job=~\"$job\"}[$__rate_interval])\n * 1000\n )\n)\n",
+ "format": "time_series",
+ "intervalFactor": 1,
+ "legendFormat": "@95%ile",
+ "refId": "C"
+ }
+ ],
+ "thresholds": [ ],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "OSD Read Latencies",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": [ ]
+ },
+ "yaxes": [
+ {
+ "format": "ms",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": "0",
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": "0",
+ "show": true
+ }
+ ]
+ },
+ {
+ "columns": [ ],
+ "datasource": "$datasource",
+ "description": "This table shows the osd's that are delivering the 10 highest read latencies within the cluster",
+ "gridPos": {
+ "h": 8,
+ "w": 4,
+ "x": 8,
+ "y": 0
+ },
+ "id": 3,
+ "links": [ ],
+ "sort": {
+ "col": 2,
+ "desc": true
+ },
+ "styles": [
+ {
+ "alias": "OSD ID",
+ "colorMode": null,
+ "colors": [
+ "rgba(245, 54, 54, 0.9)",
+ "rgba(237, 129, 40, 0.89)",
+ "rgba(50, 172, 45, 0.97)"
+ ],
+ "dateFormat": "YYYY-MM-DD HH:mm:ss",
+ "decimals": 2,
+ "mappingType": 1,
+ "pattern": "ceph_daemon",
+ "thresholds": [ ],
+ "type": "string",
+ "unit": "short",
+ "valueMaps": [ ]
+ },
+ {
+ "alias": "Latency (ms)",
+ "colorMode": null,
+ "colors": [
+ "rgba(245, 54, 54, 0.9)",
+ "rgba(237, 129, 40, 0.89)",
+ "rgba(50, 172, 45, 0.97)"
+ ],
+ "dateFormat": "YYYY-MM-DD HH:mm:ss",
+ "decimals": 2,
+ "mappingType": 1,
+ "pattern": "Value",
+ "thresholds": [ ],
+ "type": "number",
+ "unit": "none",
+ "valueMaps": [ ]
+ },
+ {
+ "alias": "",
+ "colorMode": null,
+ "colors": [
+ "rgba(245, 54, 54, 0.9)",
+ "rgba(237, 129, 40, 0.89)",
+ "rgba(50, 172, 45, 0.97)"
+ ],
+ "dateFormat": "YYYY-MM-DD HH:mm:ss",
+ "decimals": 2,
+ "mappingType": 1,
+ "pattern": "/.*/",
+ "thresholds": [ ],
+ "type": "hidden",
+ "unit": "short",
+ "valueMaps": [ ]
+ }
+ ],
+ "targets": [
+ {
+ "expr": "topk(10,\n (sort(\n (\n rate(ceph_osd_op_r_latency_sum{job=~\"$job\"}[$__rate_interval]) /\n on (ceph_daemon) rate(ceph_osd_op_r_latency_count{job=~\"$job\"}[$__rate_interval]) *\n 1000\n )\n ))\n)\n",
+ "format": "table",
+ "instant": true,
+ "intervalFactor": 1,
+ "legendFormat": "",
+ "refId": "A"
+ }
+ ],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Highest READ Latencies",
+ "transform": "table",
+ "type": "table"
+ },
+ {
+ "aliasColors": {
+ "@95%ile write": "#e0752d"
+ },
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "$datasource",
+ "description": "",
+ "fill": 1,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 8,
+ "w": 8,
+ "x": 12,
+ "y": 0
+ },
+ "id": 4,
+ "legend": {
+ "alignAsTable": false,
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "rightSide": false,
+ "show": true,
+ "sideWidth": null,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [ ],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "repeat": null,
+ "seriesOverrides": [ ],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "avg(\n rate(ceph_osd_op_w_latency_sum{job=~\"$job\"}[$__rate_interval]) /\n on (ceph_daemon) rate(ceph_osd_op_w_latency_count{job=~\"$job\"}[$__rate_interval])\n * 1000\n)\n",
+ "format": "time_series",
+ "intervalFactor": 1,
+ "legendFormat": "AVG write",
+ "refId": "A"
+ },
+ {
+ "expr": "max(\n rate(ceph_osd_op_w_latency_sum{job=~\"$job\"}[$__rate_interval]) /\n on (ceph_daemon) rate(ceph_osd_op_w_latency_count{job=~\"$job\"}[$__rate_interval]) *\n 1000\n)\n",
+ "format": "time_series",
+ "intervalFactor": 1,
+ "legendFormat": "MAX write",
+ "refId": "B"
+ },
+ {
+ "expr": "quantile(0.95, (\n rate(ceph_osd_op_w_latency_sum{job=~\"$job\"}[$__rate_interval]) /\n on (ceph_daemon) rate(ceph_osd_op_w_latency_count{job=~\"$job\"}[$__rate_interval]) *\n 1000\n))\n",
+ "format": "time_series",
+ "intervalFactor": 1,
+ "legendFormat": "@95%ile write",
+ "refId": "C"
+ }
+ ],
+ "thresholds": [ ],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "OSD Write Latencies",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": [ ]
+ },
+ "yaxes": [
+ {
+ "format": "ms",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": "0",
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": "0",
+ "show": true
+ }
+ ]
+ },
+ {
+ "columns": [ ],
+ "datasource": "$datasource",
+ "description": "This table shows the osd's that are delivering the 10 highest write latencies within the cluster",
+ "gridPos": {
+ "h": 8,
+ "w": 4,
+ "x": 20,
+ "y": 0
+ },
+ "id": 5,
+ "links": [ ],
+ "sort": {
+ "col": 2,
+ "desc": true
+ },
+ "styles": [
+ {
+ "alias": "OSD ID",
+ "colorMode": null,
+ "colors": [
+ "rgba(245, 54, 54, 0.9)",
+ "rgba(237, 129, 40, 0.89)",
+ "rgba(50, 172, 45, 0.97)"
+ ],
+ "dateFormat": "YYYY-MM-DD HH:mm:ss",
+ "decimals": 2,
+ "mappingType": 1,
+ "pattern": "ceph_daemon",
+ "thresholds": [ ],
+ "type": "string",
+ "unit": "short",
+ "valueMaps": [ ]
+ },
+ {
+ "alias": "Latency (ms)",
+ "colorMode": null,
+ "colors": [
+ "rgba(245, 54, 54, 0.9)",
+ "rgba(237, 129, 40, 0.89)",
+ "rgba(50, 172, 45, 0.97)"
+ ],
+ "dateFormat": "YYYY-MM-DD HH:mm:ss",
+ "decimals": 2,
+ "mappingType": 1,
+ "pattern": "Value",
+ "thresholds": [ ],
+ "type": "number",
+ "unit": "none",
+ "valueMaps": [ ]
+ },
+ {
+ "alias": "",
+ "colorMode": null,
+ "colors": [
+ "rgba(245, 54, 54, 0.9)",
+ "rgba(237, 129, 40, 0.89)",
+ "rgba(50, 172, 45, 0.97)"
+ ],
+ "dateFormat": "YYYY-MM-DD HH:mm:ss",
+ "decimals": 2,
+ "mappingType": 1,
+ "pattern": "/.*/",
+ "thresholds": [ ],
+ "type": "hidden",
+ "unit": "short",
+ "valueMaps": [ ]
+ }
+ ],
+ "targets": [
+ {
+ "expr": "topk(10,\n (sort(\n (rate(ceph_osd_op_w_latency_sum{job=~\"$job\"}[$__rate_interval]) /\n on (ceph_daemon) rate(ceph_osd_op_w_latency_count{job=~\"$job\"}[$__rate_interval]) *\n 1000)\n ))\n)\n",
+ "format": "table",
+ "instant": true,
+ "intervalFactor": 1,
+ "legendFormat": "",
+ "refId": "A"
+ }
+ ],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Highest WRITE Latencies",
+ "transform": "table",
+ "type": "table"
+ },
+ {
+ "aliasColors": { },
+ "datasource": "$datasource",
+ "description": "",
+ "gridPos": {
+ "h": 8,
+ "w": 4,
+ "x": 0,
+ "y": 8
+ },
+ "id": 6,
+ "legend": {
+ "percentage": true,
+ "show": true,
+ "values": true
+ },
+ "legendType": "Under graph",
+ "pieType": "pie",
+ "targets": [
+ {
+ "expr": "count by (device_class) (ceph_osd_metadata{job=~\"$job\"})",
+ "format": "time_series",
+ "intervalFactor": 1,
+ "legendFormat": "{{device_class}}",
+ "refId": "A"
+ }
+ ],
+ "title": "OSD Types Summary",
+ "type": "grafana-piechart-panel",
+ "valueName": "current"
+ },
+ {
+ "aliasColors": {
+ "Non-Encrypted": "#E5AC0E"
+ },
+ "datasource": "$datasource",
+ "description": "",
+ "gridPos": {
+ "h": 8,
+ "w": 4,
+ "x": 4,
+ "y": 8
+ },
+ "id": 7,
+ "legend": {
+ "percentage": true,
+ "show": true,
+ "values": true
+ },
+ "legendType": "Under graph",
+ "pieType": "pie",
+ "targets": [
+ {
+ "expr": "count(ceph_bluefs_wal_total_bytes{job=~\"$job\"})",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "bluestore",
+ "refId": "A"
+ },
+ {
+ "expr": "absent(ceph_bluefs_wal_total_bytes{job=~\"$job\"}) * count(ceph_osd_metadata{job=~\"$job\"})",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "filestore",
+ "refId": "B"
+ }
+ ],
+ "title": "OSD Objectstore Types",
+ "type": "grafana-piechart-panel",
+ "valueName": "current"
+ },
+ {
+ "aliasColors": { },
+ "datasource": "$datasource",
+ "description": "The pie chart shows the various OSD sizes used within the cluster",
+ "gridPos": {
+ "h": 8,
+ "w": 4,
+ "x": 8,
+ "y": 8
+ },
+ "id": 8,
+ "legend": {
+ "percentage": true,
+ "show": true,
+ "values": true
+ },
+ "legendType": "Under graph",
+ "pieType": "pie",
+ "targets": [
+ {
+ "expr": "count(ceph_osd_stat_bytes{job=~\"$job\"} < 1099511627776)",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "<1TB",
+ "refId": "A"
+ },
+ {
+ "expr": "count(ceph_osd_stat_bytes{job=~\"$job\"} >= 1099511627776 < 2199023255552)",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "<2TB",
+ "refId": "B"
+ },
+ {
+ "expr": "count(ceph_osd_stat_bytes{job=~\"$job\"} >= 2199023255552 < 3298534883328)",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "<3TB",
+ "refId": "C"
+ },
+ {
+ "expr": "count(ceph_osd_stat_bytes{job=~\"$job\"} >= 3298534883328 < 4398046511104)",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "<4TB",
+ "refId": "D"
+ },
+ {
+ "expr": "count(ceph_osd_stat_bytes{job=~\"$job\"} >= 4398046511104 < 6597069766656)",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "<6TB",
+ "refId": "E"
+ },
+ {
+ "expr": "count(ceph_osd_stat_bytes{job=~\"$job\"} >= 6597069766656 < 8796093022208)",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "<8TB",
+ "refId": "F"
+ },
+ {
+ "expr": "count(ceph_osd_stat_bytes{job=~\"$job\"} >= 8796093022208 < 10995116277760)",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "<10TB",
+ "refId": "G"
+ },
+ {
+ "expr": "count(ceph_osd_stat_bytes{job=~\"$job\"} >= 10995116277760 < 13194139533312)",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "<12TB",
+ "refId": "H"
+ },
+ {
+ "expr": "count(ceph_osd_stat_bytes{job=~\"$job\"} >= 13194139533312)",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "<12TB+",
+ "refId": "I"
+ }
+ ],
+ "title": "OSD Size Summary",
+ "type": "grafana-piechart-panel",
+ "valueName": "current"
+ },
+ {
+ "aliasColors": { },
+ "bars": true,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "$datasource",
+ "fill": 1,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 8,
+ "w": 8,
+ "x": 12,
+ "y": 8
+ },
+ "id": 9,
+ "legend": {
+ "alignAsTable": false,
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "rightSide": false,
+ "show": true,
+ "sideWidth": null,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [ ],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "repeat": null,
+ "seriesOverrides": [ ],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "ceph_osd_numpg{job=~\"$job\"}",
+ "format": "time_series",
+ "instant": true,
+ "intervalFactor": 1,
+ "legendFormat": "PGs per OSD",
+ "refId": "A"
+ }
+ ],
+ "thresholds": [ ],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Distribution of PGs per OSD",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": 20,
+ "mode": "histogram",
+ "name": null,
+ "show": true,
+ "values": [ ]
+ },
+ "yaxes": [
+ {
+ "format": "short",
+ "label": "# of OSDs",
+ "logBase": 1,
+ "max": null,
+ "min": "0",
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": "0",
+ "show": true
+ }
+ ]
+ },
+ {
+ "cacheTimeout": null,
+ "colorBackground": false,
+ "colorValue": true,
+ "colors": [
+ "#299c46",
+ "rgba(237, 129, 40, 0.89)",
+ "#d44a3a"
+ ],
+ "datasource": "$datasource",
+ "description": "This gauge panel shows onode Hits ratio to help determine if increasing RAM per OSD could help improve the performance of the cluster",
+ "format": "percentunit",
+ "gauge": {
+ "maxValue": 1,
+ "minValue": 0,
+ "show": true,
+ "thresholdLabels": false,
+ "thresholdMarkers": true
+ },
+ "gridPos": {
+ "h": 8,
+ "w": 4,
+ "x": 20,
+ "y": 8
+ },
+ "id": 10,
+ "interval": null,
+ "links": [ ],
+ "mappingType": 1,
+ "mappingTypes": [
+ {
+ "name": "value to text",
+ "value": 1
+ },
+ {
+ "name": "range to text",
+ "value": 2
+ }
+ ],
+ "maxDataPoints": 100,
+ "nullPointMode": "connected",
+ "nullText": null,
+ "postfix": "",
+ "postfixFontSize": "50%",
+ "prefix": "",
+ "prefixFontSize": "50%",
+ "rangeMaps": [
+ {
+ "from": "null",
+ "text": "N/A",
+ "to": "null"
+ }
+ ],
+ "sparkline": {
+ "fillColor": "rgba(31, 118, 189, 0.18)",
+ "full": false,
+ "lineColor": "rgb(31, 120, 193)",
+ "show": false
+ },
+ "tableColumn": "",
+ "targets": [
+ {
+ "expr": "sum(ceph_bluestore_onode_hits{job=~\"$job\"}) / (\n sum(ceph_bluestore_onode_hits{job=~\"$job\"}) +\n sum(ceph_bluestore_onode_misses{job=~\"$job\"})\n)\n",
+ "format": "time_series",
+ "intervalFactor": 1,
+ "legendFormat": "",
+ "refId": "A"
+ }
+ ],
+ "thresholds": ".75",
+ "title": "OSD onode Hits Ratio",
+ "type": "singlestat",
+ "valueFontSize": "80%",
+ "valueMaps": [
+ {
+ "op": "=",
+ "text": "N/A",
+ "value": "null"
+ }
+ ],
+ "valueName": "current"
+ },
+ {
+ "collapse": false,
+ "collapsed": false,
+ "gridPos": {
+ "h": 1,
+ "w": 24,
+ "x": 0,
+ "y": 16
+ },
+ "id": 11,
+ "panels": [ ],
+ "repeat": null,
+ "repeatIteration": null,
+ "repeatRowId": null,
+ "showTitle": true,
+ "title": "R/W Profile",
+ "titleSize": "h6",
+ "type": "row"
+ },
+ {
+ "aliasColors": { },
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "$datasource",
+ "description": "Show the read/write workload profile overtime",
+ "fill": 1,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 8,
+ "w": 24,
+ "x": 0,
+ "y": 17
+ },
+ "id": 12,
+ "legend": {
+ "alignAsTable": false,
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "rightSide": false,
+ "show": true,
+ "sideWidth": null,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [ ],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "repeat": null,
+ "seriesOverrides": [ ],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "round(sum(rate(ceph_pool_rd{job=~\"$job\"}[$__rate_interval])))",
+ "format": "time_series",
+ "intervalFactor": 1,
+ "legendFormat": "Reads",
+ "refId": "A"
+ },
+ {
+ "expr": "round(sum(rate(ceph_pool_wr{job=~\"$job\"}[$__rate_interval])))",
+ "format": "time_series",
+ "intervalFactor": 1,
+ "legendFormat": "Writes",
+ "refId": "B"
+ }
+ ],
+ "thresholds": [ ],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Read/Write Profile",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": [ ]
+ },
+ "yaxes": [
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ]
+ }
+ ],
+ "refresh": "30s",
+ "rows": [ ],
+ "schemaVersion": 16,
+ "style": "dark",
+ "tags": [
+ "ceph-mixin"
+ ],
+ "templating": {
+ "list": [
+ {
+ "current": {
+ "text": "default",
+ "value": "default"
+ },
+ "hide": 0,
+ "label": "Data Source",
+ "name": "datasource",
+ "options": [ ],
+ "query": "prometheus",
+ "refresh": 1,
+ "regex": "",
+ "type": "datasource"
+ },
+ {
+ "allValue": ".+",
+ "current": { },
+ "datasource": "$datasource",
+ "hide": 2,
+ "includeAll": true,
+ "label": "cluster",
+ "multi": true,
+ "name": "cluster",
+ "options": [ ],
+ "query": "label_values(ceph_osd_metadata, cluster)",
+ "refresh": 1,
+ "regex": "(.*)",
+ "sort": 1,
+ "tagValuesQuery": "",
+ "tags": [ ],
+ "tagsQuery": "",
+ "type": "query",
+ "useTags": false
+ },
+ {
+ "allValue": ".+",
+ "current": { },
+ "datasource": "$datasource",
+ "hide": 0,
+ "includeAll": true,
+ "label": "job",
+ "multi": true,
+ "name": "job",
+ "options": [ ],
+ "query": "label_values(ceph_osd_metadata{}, job)",
+ "refresh": 1,
+ "regex": "(.*)",
+ "sort": 1,
+ "tagValuesQuery": "",
+ "tags": [ ],
+ "tagsQuery": "",
+ "type": "query",
+ "useTags": false
+ }
+ ]
+ },
+ "time": {
+ "from": "now-1h",
+ "to": "now"
+ },
+ "timepicker": {
+ "refresh_intervals": [
+ "5s",
+ "10s",
+ "30s",
+ "1m",
+ "5m",
+ "15m",
+ "30m",
+ "1h",
+ "2h",
+ "1d"
+ ],
+ "time_options": [
+ "5m",
+ "15m",
+ "1h",
+ "6h",
+ "12h",
+ "24h",
+ "2d",
+ "7d",
+ "30d"
+ ]
+ },
+ "timezone": "",
+ "title": "OSD Overview",
+ "uid": "lo02I1Aiz",
+ "version": 0
+}
diff --git a/atmosphere/jsonnet/vendor/github.com/ceph/ceph/monitoring/ceph-mixin/dashboards_out/pool-detail.json b/atmosphere/jsonnet/vendor/github.com/ceph/ceph/monitoring/ceph-mixin/dashboards_out/pool-detail.json
new file mode 100644
index 0000000..dc8b415
--- /dev/null
+++ b/atmosphere/jsonnet/vendor/github.com/ceph/ceph/monitoring/ceph-mixin/dashboards_out/pool-detail.json
@@ -0,0 +1,708 @@
+{
+ "__inputs": [ ],
+ "__requires": [
+ {
+ "id": "grafana",
+ "name": "Grafana",
+ "type": "grafana",
+ "version": "5.3.2"
+ },
+ {
+ "id": "graph",
+ "name": "Graph",
+ "type": "panel",
+ "version": "5.0.0"
+ },
+ {
+ "id": "singlestat",
+ "name": "Singlestat",
+ "type": "panel",
+ "version": "5.0.0"
+ }
+ ],
+ "annotations": {
+ "list": [
+ {
+ "builtIn": 1,
+ "datasource": "-- Grafana --",
+ "enable": true,
+ "hide": true,
+ "iconColor": "rgba(0, 211, 255, 1)",
+ "name": "Annotations & Alerts",
+ "showIn": 0,
+ "tags": [ ],
+ "type": "dashboard"
+ }
+ ]
+ },
+ "description": "",
+ "editable": false,
+ "gnetId": null,
+ "graphTooltip": 0,
+ "hideControls": false,
+ "id": null,
+ "links": [ ],
+ "panels": [
+ {
+ "cacheTimeout": null,
+ "colorBackground": false,
+ "colorValue": true,
+ "colors": [
+ "#299c46",
+ "rgba(237, 129, 40, 0.89)",
+ "#d44a3a"
+ ],
+ "datasource": "$datasource",
+ "format": "percentunit",
+ "gauge": {
+ "maxValue": 1,
+ "minValue": 0,
+ "show": true,
+ "thresholdLabels": false,
+ "thresholdMarkers": true
+ },
+ "gridPos": {
+ "h": 7,
+ "w": 7,
+ "x": 0,
+ "y": 0
+ },
+ "id": 2,
+ "interval": null,
+ "links": [ ],
+ "mappingType": 1,
+ "mappingTypes": [
+ {
+ "name": "value to text",
+ "value": 1
+ },
+ {
+ "name": "range to text",
+ "value": 2
+ }
+ ],
+ "maxDataPoints": 100,
+ "nullPointMode": "connected",
+ "nullText": null,
+ "postfix": "",
+ "postfixFontSize": "50%",
+ "prefix": "",
+ "prefixFontSize": "50%",
+ "rangeMaps": [
+ {
+ "from": "null",
+ "text": "N/A",
+ "to": "null"
+ }
+ ],
+ "sparkline": {
+ "fillColor": "rgba(31, 118, 189, 0.18)",
+ "full": false,
+ "lineColor": "rgb(31, 120, 193)",
+ "show": true
+ },
+ "tableColumn": "",
+ "targets": [
+ {
+ "expr": "(ceph_pool_stored{job=~\"$job\"} / (ceph_pool_stored{job=~\"$job\"} + ceph_pool_max_avail{job=~\"$job\"})) *\n on(pool_id) group_left(instance, name) ceph_pool_metadata{job=~\"$job\", name=~\"$pool_name\"}\n",
+ "format": "time_series",
+ "intervalFactor": 1,
+ "legendFormat": "",
+ "refId": "A"
+ }
+ ],
+ "thresholds": ".7,.8",
+ "title": "Capacity used",
+ "type": "singlestat",
+ "valueFontSize": "80%",
+ "valueMaps": [
+ {
+ "op": "=",
+ "text": "N/A",
+ "value": "null"
+ }
+ ],
+ "valueName": "current"
+ },
+ {
+ "cacheTimeout": null,
+ "colorBackground": false,
+ "colorValue": 100,
+ "colors": [
+ "#299c46",
+ "rgba(237, 129, 40, 0.89)",
+ "#d44a3a"
+ ],
+ "datasource": "$datasource",
+ "description": "Time till pool is full assuming the average fill rate of the last 6 hours",
+ "format": "s",
+ "gauge": {
+ "maxValue": false,
+ "minValue": 0,
+ "show": false,
+ "thresholdLabels": false,
+ "thresholdMarkers": true
+ },
+ "gridPos": {
+ "h": 7,
+ "w": 5,
+ "x": 7,
+ "y": 0
+ },
+ "id": 3,
+ "interval": null,
+ "links": [ ],
+ "mappingType": 1,
+ "mappingTypes": [
+ {
+ "name": "value to text",
+ "value": 1
+ },
+ {
+ "name": "range to text",
+ "value": 2
+ }
+ ],
+ "maxDataPoints": 100,
+ "nullPointMode": "connected",
+ "nullText": null,
+ "postfix": "",
+ "postfixFontSize": "50%",
+ "prefix": "",
+ "prefixFontSize": "50%",
+ "rangeMaps": [
+ {
+ "from": "null",
+ "text": "N/A",
+ "to": "null"
+ }
+ ],
+ "sparkline": {
+ "fillColor": "rgba(31, 118, 189, 0.18)",
+ "full": false,
+ "lineColor": "rgb(31, 120, 193)",
+ "show": ""
+ },
+ "tableColumn": "",
+ "targets": [
+ {
+ "expr": "(ceph_pool_max_avail{job=~\"$job\"} / deriv(ceph_pool_stored{job=~\"$job\"}[6h])) *\n on(pool_id) group_left(instance, name) ceph_pool_metadata{job=~\"$job\", name=~\"$pool_name\"} > 0\n",
+ "format": "time_series",
+ "intervalFactor": 1,
+ "legendFormat": "",
+ "refId": "A"
+ }
+ ],
+ "thresholds": "current",
+ "title": "Time till full",
+ "type": "singlestat",
+ "valueFontSize": "80%",
+ "valueMaps": [
+ {
+ "op": "=",
+ "text": "N/A",
+ "value": "null"
+ }
+ ],
+ "valueName": false
+ },
+ {
+ "aliasColors": {
+ "read_op_per_sec": "#3F6833",
+ "write_op_per_sec": "#E5AC0E"
+ },
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "$datasource",
+ "description": "",
+ "fill": 1,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 7,
+ "w": 12,
+ "x": 12,
+ "y": 0
+ },
+ "id": 4,
+ "legend": {
+ "alignAsTable": false,
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "rightSide": false,
+ "show": true,
+ "sideWidth": null,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [ ],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "repeat": null,
+ "seriesOverrides": [ ],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "deriv(ceph_pool_objects{job=~\"$job\"}[1m]) *\n on(pool_id) group_left(instance, name) ceph_pool_metadata{job=~\"$job\", name=~\"$pool_name\"}\n",
+ "format": "time_series",
+ "intervalFactor": 1,
+ "legendFormat": "Objects per second",
+ "refId": "A"
+ }
+ ],
+ "thresholds": [ ],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "$pool_name Object Ingress/Egress",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": [ ]
+ },
+ "yaxes": [
+ {
+ "format": "ops",
+ "label": "Objects out(-) / in(+) ",
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ]
+ },
+ {
+ "aliasColors": {
+ "read_op_per_sec": "#3F6833",
+ "write_op_per_sec": "#E5AC0E"
+ },
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "$datasource",
+ "description": "",
+ "fill": 1,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 7,
+ "w": 12,
+ "x": 0,
+ "y": 7
+ },
+ "id": 5,
+ "legend": {
+ "alignAsTable": false,
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "rightSide": false,
+ "show": true,
+ "sideWidth": null,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [ ],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "repeat": null,
+ "seriesOverrides": [
+ {
+ "alias": "reads",
+ "transform": "negative-Y"
+ }
+ ],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "rate(ceph_pool_rd{job=~\"$job\"}[$__rate_interval]) *\n on(pool_id) group_left(instance,name) ceph_pool_metadata{job=~\"$job\", name=~\"$pool_name\"}\n",
+ "format": "time_series",
+ "intervalFactor": 1,
+ "legendFormat": "reads",
+ "refId": "A"
+ },
+ {
+ "expr": "rate(ceph_pool_wr{job=~\"$job\"}[$__rate_interval]) *\n on(pool_id) group_left(instance, name) ceph_pool_metadata{job=~\"$job\", name=~\"$pool_name\"}\n",
+ "format": "time_series",
+ "intervalFactor": 1,
+ "legendFormat": "writes",
+ "refId": "B"
+ }
+ ],
+ "thresholds": [ ],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "$pool_name Client IOPS",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": [ ]
+ },
+ "yaxes": [
+ {
+ "format": "iops",
+ "label": "Read (-) / Write (+)",
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ]
+ },
+ {
+ "aliasColors": {
+ "read_op_per_sec": "#3F6833",
+ "write_op_per_sec": "#E5AC0E"
+ },
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "$datasource",
+ "description": "",
+ "fill": 1,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 7,
+ "w": 12,
+ "x": 12,
+ "y": 7
+ },
+ "id": 6,
+ "legend": {
+ "alignAsTable": false,
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "rightSide": false,
+ "show": true,
+ "sideWidth": null,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [ ],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "repeat": null,
+ "seriesOverrides": [
+ {
+ "alias": "reads",
+ "transform": "negative-Y"
+ }
+ ],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "rate(ceph_pool_rd_bytes{job=~\"$job\"}[$__rate_interval]) +\n on(pool_id) group_left(instance, name) ceph_pool_metadata{job=~\"$job\", name=~\"$pool_name\"}\n",
+ "format": "time_series",
+ "intervalFactor": 1,
+ "legendFormat": "reads",
+ "refId": "A"
+ },
+ {
+ "expr": "rate(ceph_pool_wr_bytes{job=~\"$job\"}[$__rate_interval]) +\n on(pool_id) group_left(instance,name) ceph_pool_metadata{job=~\"$job\", name=~\"$pool_name\"}\n",
+ "format": "time_series",
+ "intervalFactor": 1,
+ "legendFormat": "writes",
+ "refId": "B"
+ }
+ ],
+ "thresholds": [ ],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "$pool_name Client Throughput",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": [ ]
+ },
+ "yaxes": [
+ {
+ "format": "Bps",
+ "label": "Read (-) / Write (+)",
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ]
+ },
+ {
+ "aliasColors": {
+ "read_op_per_sec": "#3F6833",
+ "write_op_per_sec": "#E5AC0E"
+ },
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "$datasource",
+ "description": "",
+ "fill": 1,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 7,
+ "w": 12,
+ "x": 0,
+ "y": 14
+ },
+ "id": 7,
+ "legend": {
+ "alignAsTable": false,
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "rightSide": false,
+ "show": true,
+ "sideWidth": null,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [ ],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "repeat": null,
+ "seriesOverrides": [ ],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "ceph_pool_objects{job=~\"$job\"} *\n on(pool_id) group_left(instance,name) ceph_pool_metadata{job=~\"$job\", name=~\"$pool_name\"}\n",
+ "format": "time_series",
+ "intervalFactor": 1,
+ "legendFormat": "Number of Objects",
+ "refId": "A"
+ }
+ ],
+ "thresholds": [ ],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "$pool_name Objects",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": [ ]
+ },
+ "yaxes": [
+ {
+ "format": "short",
+ "label": "Objects",
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ]
+ }
+ ],
+ "refresh": "30s",
+ "rows": [ ],
+ "schemaVersion": 22,
+ "style": "dark",
+ "tags": [
+ "ceph-mixin"
+ ],
+ "templating": {
+ "list": [
+ {
+ "current": {
+ "text": "default",
+ "value": "default"
+ },
+ "hide": 0,
+ "label": "Data Source",
+ "name": "datasource",
+ "options": [ ],
+ "query": "prometheus",
+ "refresh": 1,
+ "regex": "",
+ "type": "datasource"
+ },
+ {
+ "allValue": ".+",
+ "current": { },
+ "datasource": "$datasource",
+ "hide": 2,
+ "includeAll": true,
+ "label": "cluster",
+ "multi": true,
+ "name": "cluster",
+ "options": [ ],
+ "query": "label_values(ceph_osd_metadata, cluster)",
+ "refresh": 1,
+ "regex": "(.*)",
+ "sort": 1,
+ "tagValuesQuery": "",
+ "tags": [ ],
+ "tagsQuery": "",
+ "type": "query",
+ "useTags": false
+ },
+ {
+ "allValue": ".+",
+ "current": { },
+ "datasource": "$datasource",
+ "hide": 0,
+ "includeAll": true,
+ "label": "job",
+ "multi": true,
+ "name": "job",
+ "options": [ ],
+ "query": "label_values(ceph_osd_metadata{}, job)",
+ "refresh": 1,
+ "regex": "(.*)",
+ "sort": 1,
+ "tagValuesQuery": "",
+ "tags": [ ],
+ "tagsQuery": "",
+ "type": "query",
+ "useTags": false
+ },
+ {
+ "allValue": null,
+ "current": { },
+ "datasource": "$datasource",
+ "hide": 0,
+ "includeAll": false,
+ "label": "Pool Name",
+ "multi": false,
+ "name": "pool_name",
+ "options": [ ],
+ "query": "label_values(ceph_pool_metadata{job=~\"$job\"}, name)",
+ "refresh": 1,
+ "regex": "",
+ "sort": 1,
+ "tagValuesQuery": "",
+ "tags": [ ],
+ "tagsQuery": "",
+ "type": "query",
+ "useTags": false
+ }
+ ]
+ },
+ "time": {
+ "from": "now-1h",
+ "to": "now"
+ },
+ "timepicker": {
+ "refresh_intervals": [
+ "5s",
+ "10s",
+ "30s",
+ "1m",
+ "5m",
+ "15m",
+ "30m",
+ "1h",
+ "2h",
+ "1d"
+ ],
+ "time_options": [
+ "5m",
+ "15m",
+ "1h",
+ "6h",
+ "12h",
+ "24h",
+ "2d",
+ "7d",
+ "30d"
+ ]
+ },
+ "timezone": "",
+ "title": "Ceph Pool Details",
+ "uid": "-xyV8KCiz",
+ "version": 0
+}
diff --git a/atmosphere/jsonnet/vendor/github.com/ceph/ceph/monitoring/ceph-mixin/dashboards_out/pool-overview.json b/atmosphere/jsonnet/vendor/github.com/ceph/ceph/monitoring/ceph-mixin/dashboards_out/pool-overview.json
new file mode 100644
index 0000000..7f042aa
--- /dev/null
+++ b/atmosphere/jsonnet/vendor/github.com/ceph/ceph/monitoring/ceph-mixin/dashboards_out/pool-overview.json
@@ -0,0 +1,1542 @@
+{
+ "__inputs": [ ],
+ "__requires": [ ],
+ "annotations": {
+ "list": [
+ {
+ "builtIn": 1,
+ "datasource": "-- Grafana --",
+ "enable": true,
+ "hide": true,
+ "iconColor": "rgba(0, 211, 255, 1)",
+ "name": "Annotations & Alerts",
+ "showIn": 0,
+ "tags": [ ],
+ "type": "dashboard"
+ }
+ ]
+ },
+ "description": "",
+ "editable": false,
+ "gnetId": null,
+ "graphTooltip": 0,
+ "hideControls": false,
+ "id": null,
+ "links": [ ],
+ "panels": [
+ {
+ "cacheTimeout": null,
+ "colorBackground": false,
+ "colorValue": false,
+ "colors": [
+ "#299c46",
+ "rgba(237, 129, 40, 0.89)",
+ "#d44a3a"
+ ],
+ "datasource": "$datasource",
+ "format": "none",
+ "gauge": {
+ "maxValue": 100,
+ "minValue": 0,
+ "show": false,
+ "thresholdLabels": false,
+ "thresholdMarkers": true
+ },
+ "gridPos": {
+ "h": 3,
+ "w": 3,
+ "x": 0,
+ "y": 0
+ },
+ "id": 2,
+ "interval": null,
+ "links": [ ],
+ "mappingType": 1,
+ "mappingTypes": [
+ {
+ "name": "value to text",
+ "value": 1
+ },
+ {
+ "name": "range to text",
+ "value": 2
+ }
+ ],
+ "maxDataPoints": 100,
+ "nullPointMode": "connected",
+ "nullText": null,
+ "postfix": "",
+ "postfixFontSize": "50%",
+ "prefix": "",
+ "prefixFontSize": "50%",
+ "rangeMaps": [
+ {
+ "from": "null",
+ "text": "N/A",
+ "to": "null"
+ }
+ ],
+ "sparkline": {
+ "fillColor": "rgba(31, 118, 189, 0.18)",
+ "full": false,
+ "lineColor": "rgb(31, 120, 193)",
+ "show": false
+ },
+ "tableColumn": "",
+ "targets": [
+ {
+ "expr": "count(ceph_pool_metadata{job=~\"$job\"})",
+ "format": "table",
+ "instant": true,
+ "intervalFactor": 1,
+ "legendFormat": "",
+ "refId": "A"
+ }
+ ],
+ "thresholds": "",
+ "title": "Pools",
+ "type": "singlestat",
+ "valueFontSize": "80%",
+ "valueMaps": [
+ {
+ "op": "=",
+ "text": "N/A",
+ "value": "null"
+ }
+ ],
+ "valueName": "avg"
+ },
+ {
+ "cacheTimeout": null,
+ "colorBackground": false,
+ "colorValue": false,
+ "colors": [
+ "#299c46",
+ "rgba(237, 129, 40, 0.89)",
+ "#d44a3a"
+ ],
+ "datasource": "$datasource",
+ "description": "Count of the pools that have compression enabled",
+ "format": "none",
+ "gauge": {
+ "maxValue": 100,
+ "minValue": 0,
+ "show": false,
+ "thresholdLabels": false,
+ "thresholdMarkers": true
+ },
+ "gridPos": {
+ "h": 3,
+ "w": 3,
+ "x": 3,
+ "y": 0
+ },
+ "id": 3,
+ "interval": null,
+ "links": [ ],
+ "mappingType": 1,
+ "mappingTypes": [
+ {
+ "name": "value to text",
+ "value": 1
+ },
+ {
+ "name": "range to text",
+ "value": 2
+ }
+ ],
+ "maxDataPoints": 100,
+ "nullPointMode": "connected",
+ "nullText": null,
+ "postfix": "",
+ "postfixFontSize": "50%",
+ "prefix": "",
+ "prefixFontSize": "50%",
+ "rangeMaps": [
+ {
+ "from": "null",
+ "text": "N/A",
+ "to": "null"
+ }
+ ],
+ "sparkline": {
+ "fillColor": "rgba(31, 118, 189, 0.18)",
+ "full": false,
+ "lineColor": "rgb(31, 120, 193)",
+ "show": false
+ },
+ "tableColumn": "",
+ "targets": [
+ {
+ "expr": "count(ceph_pool_metadata{job=~\"$job\", compression_mode!=\"none\"})",
+ "format": "",
+ "intervalFactor": 1,
+ "legendFormat": "",
+ "refId": "A"
+ }
+ ],
+ "thresholds": "",
+ "title": "Pools with Compression",
+ "type": "singlestat",
+ "valueFontSize": "80%",
+ "valueMaps": [
+ {
+ "op": "=",
+ "text": "N/A",
+ "value": "null"
+ }
+ ],
+ "valueName": "current"
+ },
+ {
+ "cacheTimeout": null,
+ "colorBackground": false,
+ "colorValue": false,
+ "colors": [
+ "#299c46",
+ "rgba(237, 129, 40, 0.89)",
+ "#d44a3a"
+ ],
+ "datasource": "$datasource",
+ "description": "Total raw capacity available to the cluster",
+ "format": "bytes",
+ "gauge": {
+ "maxValue": 100,
+ "minValue": 0,
+ "show": false,
+ "thresholdLabels": false,
+ "thresholdMarkers": true
+ },
+ "gridPos": {
+ "h": 3,
+ "w": 3,
+ "x": 6,
+ "y": 0
+ },
+ "id": 4,
+ "interval": null,
+ "links": [ ],
+ "mappingType": 1,
+ "mappingTypes": [
+ {
+ "name": "value to text",
+ "value": 1
+ },
+ {
+ "name": "range to text",
+ "value": 2
+ }
+ ],
+ "maxDataPoints": 100,
+ "nullPointMode": "connected",
+ "nullText": null,
+ "postfix": "",
+ "postfixFontSize": "50%",
+ "prefix": "",
+ "prefixFontSize": "50%",
+ "rangeMaps": [
+ {
+ "from": "null",
+ "text": "N/A",
+ "to": "null"
+ }
+ ],
+ "sparkline": {
+ "fillColor": "rgba(31, 118, 189, 0.18)",
+ "full": false,
+ "lineColor": "rgb(31, 120, 193)",
+ "show": false
+ },
+ "tableColumn": "",
+ "targets": [
+ {
+ "expr": "sum(ceph_osd_stat_bytes{job=~\"$job\"})",
+ "format": "",
+ "intervalFactor": 1,
+ "legendFormat": "",
+ "refId": "A"
+ }
+ ],
+ "thresholds": "",
+ "title": "Total Raw Capacity",
+ "type": "singlestat",
+ "valueFontSize": "80%",
+ "valueMaps": [
+ {
+ "op": "=",
+ "text": "N/A",
+ "value": "null"
+ }
+ ],
+ "valueName": "current"
+ },
+ {
+ "cacheTimeout": null,
+ "colorBackground": false,
+ "colorValue": false,
+ "colors": [
+ "#299c46",
+ "rgba(237, 129, 40, 0.89)",
+ "#d44a3a"
+ ],
+ "datasource": "$datasource",
+ "description": "Total raw capacity consumed by user data and associated overheads (metadata + redundancy)",
+ "format": "bytes",
+ "gauge": {
+ "maxValue": 100,
+ "minValue": 0,
+ "show": false,
+ "thresholdLabels": false,
+ "thresholdMarkers": true
+ },
+ "gridPos": {
+ "h": 3,
+ "w": 3,
+ "x": 9,
+ "y": 0
+ },
+ "id": 5,
+ "interval": null,
+ "links": [ ],
+ "mappingType": 1,
+ "mappingTypes": [
+ {
+ "name": "value to text",
+ "value": 1
+ },
+ {
+ "name": "range to text",
+ "value": 2
+ }
+ ],
+ "maxDataPoints": 100,
+ "nullPointMode": "connected",
+ "nullText": null,
+ "postfix": "",
+ "postfixFontSize": "50%",
+ "prefix": "",
+ "prefixFontSize": "50%",
+ "rangeMaps": [
+ {
+ "from": "null",
+ "text": "N/A",
+ "to": "null"
+ }
+ ],
+ "sparkline": {
+ "fillColor": "rgba(31, 118, 189, 0.18)",
+ "full": false,
+ "lineColor": "rgb(31, 120, 193)",
+ "show": false
+ },
+ "tableColumn": "",
+ "targets": [
+ {
+ "expr": "sum(ceph_pool_bytes_used{job=~\"$job\"})",
+ "format": "",
+ "instant": true,
+ "intervalFactor": 1,
+ "legendFormat": "",
+ "refId": "A"
+ }
+ ],
+ "thresholds": "",
+ "title": "Raw Capacity Consumed",
+ "type": "singlestat",
+ "valueFontSize": "80%",
+ "valueMaps": [
+ {
+ "op": "=",
+ "text": "N/A",
+ "value": "null"
+ }
+ ],
+ "valueName": "current"
+ },
+ {
+ "cacheTimeout": null,
+ "colorBackground": false,
+ "colorValue": false,
+ "colors": [
+ "#299c46",
+ "rgba(237, 129, 40, 0.89)",
+ "#d44a3a"
+ ],
+ "datasource": "$datasource",
+ "description": "Total of client data stored in the cluster",
+ "format": "bytes",
+ "gauge": {
+ "maxValue": 100,
+ "minValue": 0,
+ "show": false,
+ "thresholdLabels": false,
+ "thresholdMarkers": true
+ },
+ "gridPos": {
+ "h": 3,
+ "w": 3,
+ "x": 12,
+ "y": 0
+ },
+ "id": 6,
+ "interval": null,
+ "links": [ ],
+ "mappingType": 1,
+ "mappingTypes": [
+ {
+ "name": "value to text",
+ "value": 1
+ },
+ {
+ "name": "range to text",
+ "value": 2
+ }
+ ],
+ "maxDataPoints": 100,
+ "nullPointMode": "connected",
+ "nullText": null,
+ "postfix": "",
+ "postfixFontSize": "50%",
+ "prefix": "",
+ "prefixFontSize": "50%",
+ "rangeMaps": [
+ {
+ "from": "null",
+ "text": "N/A",
+ "to": "null"
+ }
+ ],
+ "sparkline": {
+ "fillColor": "rgba(31, 118, 189, 0.18)",
+ "full": false,
+ "lineColor": "rgb(31, 120, 193)",
+ "show": false
+ },
+ "tableColumn": "",
+ "targets": [
+ {
+ "expr": "sum(ceph_pool_stored{job=~\"$job\"})",
+ "format": "",
+ "instant": true,
+ "intervalFactor": 1,
+ "legendFormat": "",
+ "refId": "A"
+ }
+ ],
+ "thresholds": "",
+ "title": "Logical Stored ",
+ "type": "singlestat",
+ "valueFontSize": "80%",
+ "valueMaps": [
+ {
+ "op": "=",
+ "text": "N/A",
+ "value": "null"
+ }
+ ],
+ "valueName": "current"
+ },
+ {
+ "cacheTimeout": null,
+ "colorBackground": false,
+ "colorValue": false,
+ "colors": [
+ "#299c46",
+ "rgba(237, 129, 40, 0.89)",
+ "#d44a3a"
+ ],
+ "datasource": "$datasource",
+ "description": "A compression saving is determined as the data eligible to be compressed minus the capacity used to store the data after compression",
+ "format": "bytes",
+ "gauge": {
+ "maxValue": 100,
+ "minValue": 0,
+ "show": false,
+ "thresholdLabels": false,
+ "thresholdMarkers": true
+ },
+ "gridPos": {
+ "h": 3,
+ "w": 3,
+ "x": 15,
+ "y": 0
+ },
+ "id": 7,
+ "interval": null,
+ "links": [ ],
+ "mappingType": 1,
+ "mappingTypes": [
+ {
+ "name": "value to text",
+ "value": 1
+ },
+ {
+ "name": "range to text",
+ "value": 2
+ }
+ ],
+ "maxDataPoints": 100,
+ "nullPointMode": "connected",
+ "nullText": null,
+ "postfix": "",
+ "postfixFontSize": "50%",
+ "prefix": "",
+ "prefixFontSize": "50%",
+ "rangeMaps": [
+ {
+ "from": "null",
+ "text": "N/A",
+ "to": "null"
+ }
+ ],
+ "sparkline": {
+ "fillColor": "rgba(31, 118, 189, 0.18)",
+ "full": false,
+ "lineColor": "rgb(31, 120, 193)",
+ "show": false
+ },
+ "tableColumn": "",
+ "targets": [
+ {
+ "expr": "sum(\n ceph_pool_compress_under_bytes{job=~\"$job\"} -\n ceph_pool_compress_bytes_used{job=~\"$job\"}\n)\n",
+ "format": "",
+ "intervalFactor": 1,
+ "legendFormat": "",
+ "refId": "A"
+ }
+ ],
+ "thresholds": "",
+ "title": "Compression Savings",
+ "type": "singlestat",
+ "valueFontSize": "80%",
+ "valueMaps": [
+ {
+ "op": "=",
+ "text": "N/A",
+ "value": "null"
+ }
+ ],
+ "valueName": "current"
+ },
+ {
+ "cacheTimeout": null,
+ "colorBackground": false,
+ "colorValue": false,
+ "colors": [
+ "#299c46",
+ "rgba(237, 129, 40, 0.89)",
+ "#d44a3a"
+ ],
+ "datasource": "$datasource",
+ "description": "Indicates how suitable the data is within the pools that are/have been enabled for compression - averaged across all pools holding compressed data",
+ "format": "percent",
+ "gauge": {
+ "maxValue": 100,
+ "minValue": 0,
+ "show": false,
+ "thresholdLabels": false,
+ "thresholdMarkers": true
+ },
+ "gridPos": {
+ "h": 3,
+ "w": 3,
+ "x": 18,
+ "y": 0
+ },
+ "id": 8,
+ "interval": null,
+ "links": [ ],
+ "mappingType": 1,
+ "mappingTypes": [
+ {
+ "name": "value to text",
+ "value": 1
+ },
+ {
+ "name": "range to text",
+ "value": 2
+ }
+ ],
+ "maxDataPoints": 100,
+ "nullPointMode": "connected",
+ "nullText": null,
+ "postfix": "",
+ "postfixFontSize": "50%",
+ "prefix": "",
+ "prefixFontSize": "50%",
+ "rangeMaps": [
+ {
+ "from": "null",
+ "text": "N/A",
+ "to": "null"
+ }
+ ],
+ "sparkline": {
+ "fillColor": "rgba(31, 118, 189, 0.18)",
+ "full": false,
+ "lineColor": "rgb(31, 120, 193)",
+ "show": false
+ },
+ "tableColumn": "",
+ "targets": [
+ {
+ "expr": "(\n sum(ceph_pool_compress_under_bytes{job=~\"$job\"} > 0) /\n sum(ceph_pool_stored_raw{job=~\"$job\"} and ceph_pool_compress_under_bytes{job=~\"$job\"} > 0)\n) * 100\n",
+ "format": "table",
+ "intervalFactor": 1,
+ "legendFormat": "",
+ "refId": "A"
+ }
+ ],
+ "thresholds": "",
+ "title": "Compression Eligibility",
+ "type": "singlestat",
+ "valueFontSize": "80%",
+ "valueMaps": [
+ {
+ "op": "=",
+ "text": "N/A",
+ "value": "null"
+ }
+ ],
+ "valueName": "current"
+ },
+ {
+ "cacheTimeout": null,
+ "colorBackground": false,
+ "colorValue": false,
+ "colors": [
+ "#299c46",
+ "rgba(237, 129, 40, 0.89)",
+ "#d44a3a"
+ ],
+ "datasource": "$datasource",
+ "description": "This factor describes the average ratio of data eligible to be compressed divided by the data actually stored. It does not account for data written that was ineligible for compression (too small, or compression yield too low)",
+ "format": "none",
+ "gauge": {
+ "maxValue": 100,
+ "minValue": 0,
+ "show": false,
+ "thresholdLabels": false,
+ "thresholdMarkers": true
+ },
+ "gridPos": {
+ "h": 3,
+ "w": 3,
+ "x": 21,
+ "y": 0
+ },
+ "id": 9,
+ "interval": null,
+ "links": [ ],
+ "mappingType": 1,
+ "mappingTypes": [
+ {
+ "name": "value to text",
+ "value": 1
+ },
+ {
+ "name": "range to text",
+ "value": 2
+ }
+ ],
+ "maxDataPoints": 100,
+ "nullPointMode": "connected",
+ "nullText": null,
+ "postfix": "",
+ "postfixFontSize": "50%",
+ "prefix": "",
+ "prefixFontSize": "50%",
+ "rangeMaps": [
+ {
+ "from": "null",
+ "text": "N/A",
+ "to": "null"
+ }
+ ],
+ "sparkline": {
+ "fillColor": "rgba(31, 118, 189, 0.18)",
+ "full": false,
+ "lineColor": "rgb(31, 120, 193)",
+ "show": false
+ },
+ "tableColumn": "",
+ "targets": [
+ {
+ "expr": "sum(\n ceph_pool_compress_under_bytes{job=~\"$job\"} > 0)\n / sum(ceph_pool_compress_bytes_used{job=~\"$job\"} > 0\n)\n",
+ "format": "",
+ "intervalFactor": 1,
+ "legendFormat": "",
+ "refId": "A"
+ }
+ ],
+ "thresholds": "",
+ "title": "Compression Factor",
+ "type": "singlestat",
+ "valueFontSize": "80%",
+ "valueMaps": [
+ {
+ "op": "=",
+ "text": "N/A",
+ "value": "null"
+ }
+ ],
+ "valueName": "current"
+ },
+ {
+ "columns": [ ],
+ "datasource": "$datasource",
+ "description": "",
+ "gridPos": {
+ "h": 6,
+ "w": 24,
+ "x": 0,
+ "y": 3
+ },
+ "id": 10,
+ "links": [ ],
+ "sort": {
+ "col": 5,
+ "desc": true
+ },
+ "styles": [
+ {
+ "alias": "",
+ "colorMode": null,
+ "colors": [
+ "rgba(245, 54, 54, 0.9)",
+ "rgba(237, 129, 40, 0.89)",
+ "rgba(50, 172, 45, 0.97)"
+ ],
+ "dateFormat": "YYYY-MM-DD HH:mm:ss",
+ "decimals": 2,
+ "mappingType": 1,
+ "pattern": "Time",
+ "thresholds": [ ],
+ "type": "hidden",
+ "unit": "short",
+ "valueMaps": [ ]
+ },
+ {
+ "alias": "",
+ "colorMode": null,
+ "colors": [
+ "rgba(245, 54, 54, 0.9)",
+ "rgba(237, 129, 40, 0.89)",
+ "rgba(50, 172, 45, 0.97)"
+ ],
+ "dateFormat": "YYYY-MM-DD HH:mm:ss",
+ "decimals": 2,
+ "mappingType": 1,
+ "pattern": "instance",
+ "thresholds": [ ],
+ "type": "hidden",
+ "unit": "short",
+ "valueMaps": [ ]
+ },
+ {
+ "alias": "",
+ "colorMode": null,
+ "colors": [
+ "rgba(245, 54, 54, 0.9)",
+ "rgba(237, 129, 40, 0.89)",
+ "rgba(50, 172, 45, 0.97)"
+ ],
+ "dateFormat": "YYYY-MM-DD HH:mm:ss",
+ "decimals": 2,
+ "mappingType": 1,
+ "pattern": "job",
+ "thresholds": [ ],
+ "type": "hidden",
+ "unit": "short",
+ "valueMaps": [ ]
+ },
+ {
+ "alias": "Pool Name",
+ "colorMode": null,
+ "colors": [
+ "rgba(245, 54, 54, 0.9)",
+ "rgba(237, 129, 40, 0.89)",
+ "rgba(50, 172, 45, 0.97)"
+ ],
+ "dateFormat": "YYYY-MM-DD HH:mm:ss",
+ "decimals": 2,
+ "mappingType": 1,
+ "pattern": "name",
+ "thresholds": [ ],
+ "type": "string",
+ "unit": "short",
+ "valueMaps": [ ]
+ },
+ {
+ "alias": "Pool ID",
+ "colorMode": null,
+ "colors": [
+ "rgba(245, 54, 54, 0.9)",
+ "rgba(237, 129, 40, 0.89)",
+ "rgba(50, 172, 45, 0.97)"
+ ],
+ "dateFormat": "YYYY-MM-DD HH:mm:ss",
+ "decimals": 2,
+ "mappingType": 1,
+ "pattern": "pool_id",
+ "thresholds": [ ],
+ "type": "hidden",
+ "unit": "none",
+ "valueMaps": [ ]
+ },
+ {
+ "alias": "Compression Factor",
+ "colorMode": null,
+ "colors": [
+ "rgba(245, 54, 54, 0.9)",
+ "rgba(237, 129, 40, 0.89)",
+ "rgba(50, 172, 45, 0.97)"
+ ],
+ "dateFormat": "YYYY-MM-DD HH:mm:ss",
+ "decimals": 2,
+ "mappingType": 1,
+ "pattern": "Value #A",
+ "thresholds": [ ],
+ "type": "number",
+ "unit": "none",
+ "valueMaps": [ ]
+ },
+ {
+ "alias": "% Used",
+ "colorMode": "value",
+ "colors": [
+ "rgba(245, 54, 54, 0.9)",
+ "rgba(237, 129, 40, 0.89)",
+ "rgba(50, 172, 45, 0.97)"
+ ],
+ "dateFormat": "YYYY-MM-DD HH:mm:ss",
+ "decimals": 2,
+ "mappingType": 1,
+ "pattern": "Value #D",
+ "thresholds": [
+ "70",
+ "85"
+ ],
+ "type": "number",
+ "unit": "percentunit",
+ "valueMaps": [ ]
+ },
+ {
+ "alias": "Usable Free",
+ "colorMode": null,
+ "colors": [
+ "rgba(245, 54, 54, 0.9)",
+ "rgba(237, 129, 40, 0.89)",
+ "rgba(50, 172, 45, 0.97)"
+ ],
+ "dateFormat": "YYYY-MM-DD HH:mm:ss",
+ "decimals": 2,
+ "mappingType": 1,
+ "pattern": "Value #B",
+ "thresholds": [ ],
+ "type": "number",
+ "unit": "bytes",
+ "valueMaps": [ ]
+ },
+ {
+ "alias": "Compression Eligibility",
+ "colorMode": null,
+ "colors": [
+ "rgba(245, 54, 54, 0.9)",
+ "rgba(237, 129, 40, 0.89)",
+ "rgba(50, 172, 45, 0.97)"
+ ],
+ "dateFormat": "YYYY-MM-DD HH:mm:ss",
+ "decimals": 2,
+ "mappingType": 1,
+ "pattern": "Value #C",
+ "thresholds": [ ],
+ "type": "number",
+ "unit": "percent",
+ "valueMaps": [ ]
+ },
+ {
+ "alias": "Compression Savings",
+ "colorMode": null,
+ "colors": [
+ "rgba(245, 54, 54, 0.9)",
+ "rgba(237, 129, 40, 0.89)",
+ "rgba(50, 172, 45, 0.97)"
+ ],
+ "dateFormat": "YYYY-MM-DD HH:mm:ss",
+ "decimals": 2,
+ "mappingType": 1,
+ "pattern": "Value #E",
+ "thresholds": [ ],
+ "type": "number",
+ "unit": "bytes",
+ "valueMaps": [ ]
+ },
+ {
+ "alias": "Growth (5d)",
+ "colorMode": "value",
+ "colors": [
+ "rgba(245, 54, 54, 0.9)",
+ "rgba(237, 129, 40, 0.89)",
+ "rgba(50, 172, 45, 0.97)"
+ ],
+ "dateFormat": "YYYY-MM-DD HH:mm:ss",
+ "decimals": 2,
+ "mappingType": 1,
+ "pattern": "Value #F",
+ "thresholds": [
+ "0",
+ "0"
+ ],
+ "type": "number",
+ "unit": "bytes",
+ "valueMaps": [ ]
+ },
+ {
+ "alias": "IOPS",
+ "colorMode": null,
+ "colors": [
+ "rgba(245, 54, 54, 0.9)",
+ "rgba(237, 129, 40, 0.89)",
+ "rgba(50, 172, 45, 0.97)"
+ ],
+ "dateFormat": "YYYY-MM-DD HH:mm:ss",
+ "decimals": 2,
+ "mappingType": 1,
+ "pattern": "Value #G",
+ "thresholds": [ ],
+ "type": "number",
+ "unit": "none",
+ "valueMaps": [ ]
+ },
+ {
+ "alias": "Bandwidth",
+ "colorMode": null,
+ "colors": [
+ "rgba(245, 54, 54, 0.9)",
+ "rgba(237, 129, 40, 0.89)",
+ "rgba(50, 172, 45, 0.97)"
+ ],
+ "dateFormat": "YYYY-MM-DD HH:mm:ss",
+ "decimals": 2,
+ "mappingType": 1,
+ "pattern": "Value #H",
+ "thresholds": [ ],
+ "type": "number",
+ "unit": "Bps",
+ "valueMaps": [ ]
+ },
+ {
+ "alias": "",
+ "colorMode": null,
+ "colors": [
+ "rgba(245, 54, 54, 0.9)",
+ "rgba(237, 129, 40, 0.89)",
+ "rgba(50, 172, 45, 0.97)"
+ ],
+ "dateFormat": "YYYY-MM-DD HH:mm:ss",
+ "decimals": 2,
+ "mappingType": 1,
+ "pattern": "__name__",
+ "thresholds": [ ],
+ "type": "hidden",
+ "unit": "short",
+ "valueMaps": [ ]
+ },
+ {
+ "alias": "",
+ "colorMode": null,
+ "colors": [
+ "rgba(245, 54, 54, 0.9)",
+ "rgba(237, 129, 40, 0.89)",
+ "rgba(50, 172, 45, 0.97)"
+ ],
+ "dateFormat": "YYYY-MM-DD HH:mm:ss",
+ "decimals": 2,
+ "mappingType": 1,
+ "pattern": "type",
+ "thresholds": [ ],
+ "type": "hidden",
+ "unit": "short",
+ "valueMaps": [ ]
+ },
+ {
+ "alias": "",
+ "colorMode": null,
+ "colors": [
+ "rgba(245, 54, 54, 0.9)",
+ "rgba(237, 129, 40, 0.89)",
+ "rgba(50, 172, 45, 0.97)"
+ ],
+ "dateFormat": "YYYY-MM-DD HH:mm:ss",
+ "decimals": 2,
+ "mappingType": 1,
+ "pattern": "compression_mode",
+ "thresholds": [ ],
+ "type": "hidden",
+ "unit": "short",
+ "valueMaps": [ ]
+ },
+ {
+ "alias": "Type",
+ "colorMode": null,
+ "colors": [
+ "rgba(245, 54, 54, 0.9)",
+ "rgba(237, 129, 40, 0.89)",
+ "rgba(50, 172, 45, 0.97)"
+ ],
+ "dateFormat": "YYYY-MM-DD HH:mm:ss",
+ "decimals": 2,
+ "mappingType": 1,
+ "pattern": "description",
+ "thresholds": [ ],
+ "type": "string",
+ "unit": "short",
+ "valueMaps": [ ]
+ },
+ {
+ "alias": "Stored",
+ "colorMode": null,
+ "colors": [
+ "rgba(245, 54, 54, 0.9)",
+ "rgba(237, 129, 40, 0.89)",
+ "rgba(50, 172, 45, 0.97)"
+ ],
+ "dateFormat": "YYYY-MM-DD HH:mm:ss",
+ "decimals": 2,
+ "mappingType": 1,
+ "pattern": "Value #J",
+ "thresholds": [ ],
+ "type": "number",
+ "unit": "bytes",
+ "valueMaps": [ ]
+ },
+ {
+ "alias": "",
+ "colorMode": null,
+ "colors": [
+ "rgba(245, 54, 54, 0.9)",
+ "rgba(237, 129, 40, 0.89)",
+ "rgba(50, 172, 45, 0.97)"
+ ],
+ "dateFormat": "YYYY-MM-DD HH:mm:ss",
+ "decimals": 2,
+ "mappingType": 1,
+ "pattern": "Value #I",
+ "thresholds": [ ],
+ "type": "hidden",
+ "unit": "short",
+ "valueMaps": [ ]
+ },
+ {
+ "alias": "Compression",
+ "colorMode": null,
+ "colors": [
+ "rgba(245, 54, 54, 0.9)",
+ "rgba(237, 129, 40, 0.89)",
+ "rgba(50, 172, 45, 0.97)"
+ ],
+ "dateFormat": "YYYY-MM-DD HH:mm:ss",
+ "decimals": 2,
+ "mappingType": 1,
+ "pattern": "Value #K",
+ "thresholds": [ ],
+ "type": "string",
+ "unit": "short",
+ "valueMaps": [
+ {
+ "text": "ON",
+ "value": "1"
+ }
+ ]
+ }
+ ],
+ "targets": [
+ {
+ "expr": "(\n ceph_pool_compress_under_bytes{job=~\"$job\"} /\n ceph_pool_compress_bytes_used{job=~\"$job\"} > 0\n) and on(pool_id) (\n (\n (ceph_pool_compress_under_bytes{job=~\"$job\"} > 0) /\n ceph_pool_stored_raw{job=~\"$job\"}\n ) * 100 > 0.5\n)\n",
+ "format": "table",
+ "instant": true,
+ "intervalFactor": 1,
+ "legendFormat": "A",
+ "refId": "A"
+ },
+ {
+ "expr": "ceph_pool_max_avail{job=~\"$job\"} *\n on(pool_id) group_left(name) ceph_pool_metadata{job=~\"$job\"}\n",
+ "format": "table",
+ "instant": true,
+ "intervalFactor": 1,
+ "legendFormat": "B",
+ "refId": "B"
+ },
+ {
+ "expr": "(\n (ceph_pool_compress_under_bytes{job=~\"$job\"} > 0) /\n ceph_pool_stored_raw{job=~\"$job\"}\n) * 100\n",
+ "format": "table",
+ "instant": true,
+ "intervalFactor": 1,
+ "legendFormat": "C",
+ "refId": "C"
+ },
+ {
+ "expr": "ceph_pool_percent_used{job=~\"$job\"} *\n on(pool_id) group_left(name) ceph_pool_metadata{job=~\"$job\"}\n",
+ "format": "table",
+ "instant": true,
+ "intervalFactor": 1,
+ "legendFormat": "D",
+ "refId": "D"
+ },
+ {
+ "expr": "ceph_pool_compress_under_bytes{job=~\"$job\"} -\n ceph_pool_compress_bytes_used{job=~\"$job\"} > 0\n",
+ "format": "table",
+ "instant": true,
+ "intervalFactor": 1,
+ "legendFormat": "E",
+ "refId": "E"
+ },
+ {
+ "expr": "delta(ceph_pool_stored{job=~\"$job\"}[5d])",
+ "format": "table",
+ "instant": true,
+ "intervalFactor": 1,
+ "legendFormat": "F",
+ "refId": "F"
+ },
+ {
+ "expr": "rate(ceph_pool_rd{job=~\"$job\"}[$__rate_interval])\n + rate(ceph_pool_wr{job=~\"$job\"}[$__rate_interval])\n",
+ "format": "table",
+ "instant": true,
+ "intervalFactor": 1,
+ "legendFormat": "G",
+ "refId": "G"
+ },
+ {
+ "expr": "rate(ceph_pool_rd_bytes{job=~\"$job\"}[$__rate_interval]) +\n rate(ceph_pool_wr_bytes{job=~\"$job\"}[$__rate_interval])\n",
+ "format": "table",
+ "instant": true,
+ "intervalFactor": 1,
+ "legendFormat": "H",
+ "refId": "H"
+ },
+ {
+ "expr": "ceph_pool_metadata{job=~\"$job\"}",
+ "format": "table",
+ "instant": true,
+ "intervalFactor": 1,
+ "legendFormat": "I",
+ "refId": "I"
+ },
+ {
+ "expr": "ceph_pool_stored{job=~\"$job\"} * on(pool_id) group_left ceph_pool_metadata{job=~\"$job\"}",
+ "format": "table",
+ "instant": true,
+ "intervalFactor": 1,
+ "legendFormat": "J",
+ "refId": "J"
+ },
+ {
+ "expr": "ceph_pool_metadata{job=~\"$job\", compression_mode!=\"none\"}",
+ "format": "table",
+ "instant": true,
+ "intervalFactor": 1,
+ "legendFormat": "K",
+ "refId": "K"
+ },
+ {
+ "expr": "",
+ "format": "",
+ "intervalFactor": "",
+ "legendFormat": "L",
+ "refId": "L"
+ }
+ ],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Pool Overview",
+ "transform": "table",
+ "type": "table"
+ },
+ {
+ "aliasColors": { },
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "$datasource",
+ "description": "This chart shows the sum of read and write IOPS from all clients by pool",
+ "fill": 1,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 8,
+ "w": 12,
+ "x": 0,
+ "y": 9
+ },
+ "id": 11,
+ "legend": {
+ "alignAsTable": false,
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "rightSide": false,
+ "show": true,
+ "sideWidth": null,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [ ],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "repeat": null,
+ "seriesOverrides": [ ],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "topk($topk,\n round(\n (\n rate(ceph_pool_rd{job=~\"$job\"}[$__rate_interval]) +\n rate(ceph_pool_wr{job=~\"$job\"}[$__rate_interval])\n ), 1\n ) * on(pool_id) group_left(instance,name) ceph_pool_metadata{job=~\"$job\"})\n",
+ "format": "time_series",
+ "intervalFactor": 1,
+ "legendFormat": "{{name}} ",
+ "refId": "A"
+ },
+ {
+ "expr": "topk($topk,\n rate(ceph_pool_wr{job=~\"$job\"}[$__rate_interval]) +\n on(pool_id) group_left(instance,name) ceph_pool_metadata{job=~\"$job\"}\n)\n",
+ "format": "time_series",
+ "intervalFactor": 1,
+ "legendFormat": "{{name}} - write",
+ "refId": "B"
+ }
+ ],
+ "thresholds": [ ],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Top $topk Client IOPS by Pool",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": [ ]
+ },
+ "yaxes": [
+ {
+ "format": "short",
+ "label": "IOPS",
+ "logBase": 1,
+ "max": null,
+ "min": 0,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": 0,
+ "show": true
+ }
+ ]
+ },
+ {
+ "aliasColors": { },
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "$datasource",
+ "description": "The chart shows the sum of read and write bytes from all clients, by pool",
+ "fill": 1,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 8,
+ "w": 12,
+ "x": 12,
+ "y": 9
+ },
+ "id": 12,
+ "legend": {
+ "alignAsTable": false,
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "rightSide": false,
+ "show": true,
+ "sideWidth": null,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [ ],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "repeat": null,
+ "seriesOverrides": [ ],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "topk($topk,\n (\n rate(ceph_pool_rd_bytes{job=~\"$job\"}[$__rate_interval]) +\n rate(ceph_pool_wr_bytes{job=~\"$job\"}[$__rate_interval])\n ) * on(pool_id) group_left(instance, name) ceph_pool_metadata{job=~\"$job\"}\n)\n",
+ "format": "time_series",
+ "intervalFactor": 1,
+ "legendFormat": "{{name}}",
+ "refId": "A"
+ }
+ ],
+ "thresholds": [ ],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Top $topk Client Bandwidth by Pool",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": [ ]
+ },
+ "yaxes": [
+ {
+ "format": "Bps",
+ "label": "Throughput",
+ "logBase": 1,
+ "max": null,
+ "min": 0,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": 0,
+ "show": true
+ }
+ ]
+ },
+ {
+ "aliasColors": { },
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "$datasource",
+ "description": "Historical view of capacity usage, to help identify growth and trends in pool consumption",
+ "fill": 1,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 7,
+ "w": 24,
+ "x": 0,
+ "y": 17
+ },
+ "id": 13,
+ "legend": {
+ "alignAsTable": false,
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "rightSide": false,
+ "show": true,
+ "sideWidth": null,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [ ],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "repeat": null,
+ "seriesOverrides": [ ],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "ceph_pool_bytes_used{job=~\"$job\"} * on(pool_id) group_right ceph_pool_metadata{job=~\"$job\"}",
+ "format": "time_series",
+ "intervalFactor": 1,
+ "legendFormat": "{{name}}",
+ "refId": "A"
+ }
+ ],
+ "thresholds": [ ],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Pool Capacity Usage (RAW)",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": [ ]
+ },
+ "yaxes": [
+ {
+ "format": "bytes",
+ "label": "Capacity Used",
+ "logBase": 1,
+ "max": null,
+ "min": 0,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": 0,
+ "show": true
+ }
+ ]
+ }
+ ],
+ "refresh": "30s",
+ "rows": [ ],
+ "schemaVersion": 22,
+ "style": "dark",
+ "tags": [
+ "ceph-mixin"
+ ],
+ "templating": {
+ "list": [
+ {
+ "current": {
+ "text": "default",
+ "value": "default"
+ },
+ "hide": 0,
+ "label": "Data Source",
+ "name": "datasource",
+ "options": [ ],
+ "query": "prometheus",
+ "refresh": 1,
+ "regex": "",
+ "type": "datasource"
+ },
+ {
+ "allValue": ".+",
+ "current": { },
+ "datasource": "$datasource",
+ "hide": 2,
+ "includeAll": true,
+ "label": "cluster",
+ "multi": true,
+ "name": "cluster",
+ "options": [ ],
+ "query": "label_values(ceph_osd_metadata, cluster)",
+ "refresh": 1,
+ "regex": "(.*)",
+ "sort": 1,
+ "tagValuesQuery": "",
+ "tags": [ ],
+ "tagsQuery": "",
+ "type": "query",
+ "useTags": false
+ },
+ {
+ "allValue": ".+",
+ "current": { },
+ "datasource": "$datasource",
+ "hide": 0,
+ "includeAll": true,
+ "label": "job",
+ "multi": true,
+ "name": "job",
+ "options": [ ],
+ "query": "label_values(ceph_osd_metadata{}, job)",
+ "refresh": 1,
+ "regex": "(.*)",
+ "sort": 1,
+ "tagValuesQuery": "",
+ "tags": [ ],
+ "tagsQuery": "",
+ "type": "query",
+ "useTags": false
+ },
+ {
+ "allValue": null,
+ "current": {
+ "text": "15",
+ "value": "15"
+ },
+ "hide": 0,
+ "includeAll": false,
+ "label": "TopK",
+ "multi": false,
+ "name": "topk",
+ "options": [
+ {
+ "text": "15",
+ "value": "15"
+ }
+ ],
+ "query": "15",
+ "refresh": 0,
+ "type": "custom"
+ }
+ ]
+ },
+ "time": {
+ "from": "now-1h",
+ "to": "now"
+ },
+ "timepicker": {
+ "refresh_intervals": [
+ "5s",
+ "10s",
+ "30s",
+ "1m",
+ "5m",
+ "15m",
+ "30m",
+ "1h",
+ "2h",
+ "1d"
+ ],
+ "time_options": [
+ "5m",
+ "15m",
+ "1h",
+ "6h",
+ "12h",
+ "24h",
+ "2d",
+ "7d",
+ "30d"
+ ]
+ },
+ "timezone": "",
+ "title": "Ceph Pools Overview",
+ "uid": "z99hzWtmk",
+ "version": 0
+}
diff --git a/atmosphere/jsonnet/vendor/github.com/ceph/ceph/monitoring/ceph-mixin/dashboards_out/radosgw-detail.json b/atmosphere/jsonnet/vendor/github.com/ceph/ceph/monitoring/ceph-mixin/dashboards_out/radosgw-detail.json
new file mode 100644
index 0000000..a0f8f35
--- /dev/null
+++ b/atmosphere/jsonnet/vendor/github.com/ceph/ceph/monitoring/ceph-mixin/dashboards_out/radosgw-detail.json
@@ -0,0 +1,542 @@
+{
+ "__inputs": [ ],
+ "__requires": [
+ {
+ "id": "grafana",
+ "name": "Grafana",
+ "type": "grafana",
+ "version": "5.0.0"
+ },
+ {
+ "id": "grafana-piechart-panel",
+ "name": "Pie Chart",
+ "type": "panel",
+ "version": "1.3.3"
+ },
+ {
+ "id": "graph",
+ "name": "Graph",
+ "type": "panel",
+ "version": "5.0.0"
+ }
+ ],
+ "annotations": {
+ "list": [
+ {
+ "builtIn": 1,
+ "datasource": "-- Grafana --",
+ "enable": true,
+ "hide": true,
+ "iconColor": "rgba(0, 211, 255, 1)",
+ "name": "Annotations & Alerts",
+ "showIn": 0,
+ "tags": [ ],
+ "type": "dashboard"
+ }
+ ]
+ },
+ "description": "",
+ "editable": false,
+ "gnetId": null,
+ "graphTooltip": 0,
+ "hideControls": false,
+ "id": null,
+ "links": [ ],
+ "panels": [
+ {
+ "collapse": false,
+ "collapsed": false,
+ "gridPos": {
+ "h": 1,
+ "w": 24,
+ "x": 0,
+ "y": 0
+ },
+ "id": 2,
+ "panels": [ ],
+ "repeat": null,
+ "repeatIteration": null,
+ "repeatRowId": null,
+ "showTitle": true,
+ "title": "RGW Host Detail : $rgw_servers",
+ "titleSize": "h6",
+ "type": "row"
+ },
+ {
+ "aliasColors": { },
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "$datasource",
+ "description": "",
+ "fill": 1,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 8,
+ "w": 6,
+ "x": 0,
+ "y": 1
+ },
+ "id": 3,
+ "legend": {
+ "alignAsTable": false,
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "rightSide": false,
+ "show": true,
+ "sideWidth": null,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [ ],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "repeat": null,
+ "seriesOverrides": [ ],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "sum by (instance_id) (\n rate(ceph_rgw_get_initial_lat_sum{job=~\"$job\"}[$__rate_interval]) /\n rate(ceph_rgw_get_initial_lat_count{job=~\"$job\"}[$__rate_interval])\n) * on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{job=~\"$job\", ceph_daemon=~\"$rgw_servers\"}\n",
+ "format": "time_series",
+ "intervalFactor": 1,
+ "legendFormat": "GET {{ceph_daemon}}",
+ "refId": "A"
+ },
+ {
+ "expr": "sum by (instance_id) (\n rate(ceph_rgw_put_initial_lat_sum{job=~\"$job\"}[$__rate_interval]) /\n rate(ceph_rgw_put_initial_lat_count{job=~\"$job\"}[$__rate_interval])\n) * on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{job=~\"$job\", ceph_daemon=~\"$rgw_servers\"}\n",
+ "format": "time_series",
+ "intervalFactor": 1,
+ "legendFormat": "PUT {{ceph_daemon}}",
+ "refId": "B"
+ }
+ ],
+ "thresholds": [ ],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "$rgw_servers GET/PUT Latencies",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": [ ]
+ },
+ "yaxes": [
+ {
+ "format": "s",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": 0,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": 0,
+ "show": true
+ }
+ ]
+ },
+ {
+ "aliasColors": { },
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "$datasource",
+ "description": "",
+ "fill": 1,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 8,
+ "w": 7,
+ "x": 6,
+ "y": 1
+ },
+ "id": 4,
+ "legend": {
+ "alignAsTable": false,
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "rightSide": false,
+ "show": true,
+ "sideWidth": null,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [ ],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "repeat": null,
+ "seriesOverrides": [ ],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "rate(ceph_rgw_get_b{job=~\"$job\"}[$__rate_interval]) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{job=~\"$job\", ceph_daemon=~\"$rgw_servers\"}\n",
+ "format": "time_series",
+ "intervalFactor": 1,
+ "legendFormat": "GETs {{ceph_daemon}}",
+ "refId": "A"
+ },
+ {
+ "expr": "rate(ceph_rgw_put_b{job=~\"$job\"}[$__rate_interval]) *\n on (instance_id) group_left (ceph_daemon)\n ceph_rgw_metadata{job=~\"$job\", ceph_daemon=~\"$rgw_servers\"}\n",
+ "format": "time_series",
+ "intervalFactor": 1,
+ "legendFormat": "PUTs {{ceph_daemon}}",
+ "refId": "B"
+ }
+ ],
+ "thresholds": [ ],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Bandwidth by HTTP Operation",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": [ ]
+ },
+ "yaxes": [
+ {
+ "format": "bytes",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": 0,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": 0,
+ "show": true
+ }
+ ]
+ },
+ {
+ "aliasColors": {
+ "GETs": "#7eb26d",
+ "Other": "#447ebc",
+ "PUTs": "#eab839",
+ "Requests": "#3f2b5b",
+ "Requests Failed": "#bf1b00"
+ },
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "$datasource",
+ "description": "",
+ "fill": 1,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 8,
+ "w": 7,
+ "x": 13,
+ "y": 1
+ },
+ "id": 5,
+ "legend": {
+ "alignAsTable": false,
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "rightSide": false,
+ "show": true,
+ "sideWidth": null,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [ ],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "repeat": null,
+ "seriesOverrides": [ ],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "rate(ceph_rgw_failed_req{job=~\"$job\"}[$__rate_interval]) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{job=~\"$job\",ceph_daemon=~\"$rgw_servers\"}\n",
+ "format": "time_series",
+ "intervalFactor": 1,
+ "legendFormat": "Requests Failed {{ceph_daemon}}",
+ "refId": "A"
+ },
+ {
+ "expr": "rate(ceph_rgw_get{job=~\"$job\"}[$__rate_interval]) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{job=~\"$job\", ceph_daemon=~\"$rgw_servers\"}\n",
+ "format": "time_series",
+ "intervalFactor": 1,
+ "legendFormat": "GETs {{ceph_daemon}}",
+ "refId": "B"
+ },
+ {
+ "expr": "rate(ceph_rgw_put{job=~\"$job\"}[$__rate_interval]) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{job=~\"$job\", ceph_daemon=~\"$rgw_servers\"}\n",
+ "format": "time_series",
+ "intervalFactor": 1,
+ "legendFormat": "PUTs {{ceph_daemon}}",
+ "refId": "C"
+ },
+ {
+ "expr": "(\n rate(ceph_rgw_req{job=~\"$job\"}[$__rate_interval]) -\n (\n rate(ceph_rgw_get{job=~\"$job\"}[$__rate_interval]) +\n rate(ceph_rgw_put{job=~\"$job\"}[$__rate_interval])\n )\n) * on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{job=~\"$job\", ceph_daemon=~\"$rgw_servers\"}\n",
+ "format": "time_series",
+ "intervalFactor": 1,
+ "legendFormat": "Other {{ceph_daemon}}",
+ "refId": "D"
+ }
+ ],
+ "thresholds": [ ],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "HTTP Request Breakdown",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": [ ]
+ },
+ "yaxes": [
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": 0,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": 0,
+ "show": true
+ }
+ ]
+ },
+ {
+ "aliasColors": {
+ "Failures": "#bf1b00",
+ "GETs": "#7eb26d",
+ "Other (HEAD,POST,DELETE)": "#447ebc",
+ "PUTs": "#eab839",
+ "Requests": "#3f2b5b"
+ },
+ "datasource": "$datasource",
+ "description": "",
+ "gridPos": {
+ "h": 8,
+ "w": 4,
+ "x": 20,
+ "y": 1
+ },
+ "id": 6,
+ "legend": {
+ "percentage": true,
+ "show": true,
+ "values": true
+ },
+ "legendType": "Under graph",
+ "pieType": "pie",
+ "targets": [
+ {
+ "expr": "rate(ceph_rgw_failed_req{job=~\"$job\"}[$__rate_interval]) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{job=~\"$job\", ceph_daemon=~\"$rgw_servers\"}\n",
+ "format": "time_series",
+ "intervalFactor": 1,
+ "legendFormat": "Failures {{ceph_daemon}}",
+ "refId": "A"
+ },
+ {
+ "expr": "rate(ceph_rgw_get{job=~\"$job\"}[$__rate_interval]) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{job=~\"$job\", ceph_daemon=~\"$rgw_servers\"}\n",
+ "format": "time_series",
+ "intervalFactor": 1,
+ "legendFormat": "GETs {{ceph_daemon}}",
+ "refId": "B"
+ },
+ {
+ "expr": "rate(ceph_rgw_put{job=~\"$job\"}[$__rate_interval]) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{job=~\"$job\", ceph_daemon=~\"$rgw_servers\"}\n",
+ "format": "time_series",
+ "intervalFactor": 1,
+ "legendFormat": "PUTs {{ceph_daemon}}",
+ "refId": "C"
+ },
+ {
+ "expr": "(\n rate(ceph_rgw_req{job=~\"$job\"}[$__rate_interval]) -\n (\n rate(ceph_rgw_get{job=~\"$job\"}[$__rate_interval]) +\n rate(ceph_rgw_put{job=~\"$job\"}[$__rate_interval])\n )\n) * on (instance_id) group_left (ceph_daemon)\n ceph_rgw_metadata{job=~\"$job\", ceph_daemon=~\"$rgw_servers\"}\n",
+ "format": "time_series",
+ "intervalFactor": 1,
+ "legendFormat": "Other (DELETE,LIST) {{ceph_daemon}}",
+ "refId": "D"
+ }
+ ],
+ "title": "Workload Breakdown",
+ "type": "grafana-piechart-panel",
+ "valueName": "current"
+ }
+ ],
+ "refresh": "30s",
+ "rows": [ ],
+ "schemaVersion": 16,
+ "style": "dark",
+ "tags": [
+ "ceph-mixin",
+ "overview"
+ ],
+ "templating": {
+ "list": [
+ {
+ "current": {
+ "text": "default",
+ "value": "default"
+ },
+ "hide": 0,
+ "label": "Data Source",
+ "name": "datasource",
+ "options": [ ],
+ "query": "prometheus",
+ "refresh": 1,
+ "regex": "",
+ "type": "datasource"
+ },
+ {
+ "allValue": ".+",
+ "current": { },
+ "datasource": "$datasource",
+ "hide": 2,
+ "includeAll": true,
+ "label": "cluster",
+ "multi": true,
+ "name": "cluster",
+ "options": [ ],
+ "query": "label_values(ceph_osd_metadata, cluster)",
+ "refresh": 1,
+ "regex": "(.*)",
+ "sort": 1,
+ "tagValuesQuery": "",
+ "tags": [ ],
+ "tagsQuery": "",
+ "type": "query",
+ "useTags": false
+ },
+ {
+ "allValue": ".+",
+ "current": { },
+ "datasource": "$datasource",
+ "hide": 0,
+ "includeAll": true,
+ "label": "job",
+ "multi": true,
+ "name": "job",
+ "options": [ ],
+ "query": "label_values(ceph_osd_metadata{}, job)",
+ "refresh": 1,
+ "regex": "(.*)",
+ "sort": 1,
+ "tagValuesQuery": "",
+ "tags": [ ],
+ "tagsQuery": "",
+ "type": "query",
+ "useTags": false
+ },
+ {
+ "allValue": null,
+ "current": { },
+ "datasource": "$datasource",
+ "hide": 0,
+ "includeAll": true,
+ "label": "",
+ "multi": false,
+ "name": "rgw_servers",
+ "options": [ ],
+ "query": "label_values(ceph_rgw_metadata{job=~\"$job\"}, ceph_daemon)",
+ "refresh": 1,
+ "regex": "",
+ "sort": 1,
+ "tagValuesQuery": "",
+ "tags": [ ],
+ "tagsQuery": "",
+ "type": "query",
+ "useTags": false
+ }
+ ]
+ },
+ "time": {
+ "from": "now-1h",
+ "to": "now"
+ },
+ "timepicker": {
+ "refresh_intervals": [
+ "5s",
+ "10s",
+ "30s",
+ "1m",
+ "5m",
+ "15m",
+ "30m",
+ "1h",
+ "2h",
+ "1d"
+ ],
+ "time_options": [
+ "5m",
+ "15m",
+ "1h",
+ "6h",
+ "12h",
+ "24h",
+ "2d",
+ "7d",
+ "30d"
+ ]
+ },
+ "timezone": "",
+ "title": "RGW Instance Detail",
+ "uid": "x5ARzZtmk",
+ "version": 0
+}
diff --git a/atmosphere/jsonnet/vendor/github.com/ceph/ceph/monitoring/ceph-mixin/dashboards_out/radosgw-overview.json b/atmosphere/jsonnet/vendor/github.com/ceph/ceph/monitoring/ceph-mixin/dashboards_out/radosgw-overview.json
new file mode 100644
index 0000000..77d69e4
--- /dev/null
+++ b/atmosphere/jsonnet/vendor/github.com/ceph/ceph/monitoring/ceph-mixin/dashboards_out/radosgw-overview.json
@@ -0,0 +1,1266 @@
+{
+ "__inputs": [ ],
+ "__requires": [
+ {
+ "id": "grafana",
+ "name": "Grafana",
+ "type": "grafana",
+ "version": "5.0.0"
+ },
+ {
+ "id": "graph",
+ "name": "Graph",
+ "type": "panel",
+ "version": "5.0.0"
+ }
+ ],
+ "annotations": {
+ "list": [
+ {
+ "builtIn": 1,
+ "datasource": "-- Grafana --",
+ "enable": true,
+ "hide": true,
+ "iconColor": "rgba(0, 211, 255, 1)",
+ "name": "Annotations & Alerts",
+ "showIn": 0,
+ "tags": [ ],
+ "type": "dashboard"
+ }
+ ]
+ },
+ "description": "",
+ "editable": false,
+ "gnetId": null,
+ "graphTooltip": 0,
+ "hideControls": false,
+ "id": null,
+ "links": [ ],
+ "panels": [
+ {
+ "collapse": false,
+ "collapsed": false,
+ "gridPos": {
+ "h": 1,
+ "w": 24,
+ "x": 0,
+ "y": 0
+ },
+ "id": 2,
+ "panels": [ ],
+ "repeat": null,
+ "repeatIteration": null,
+ "repeatRowId": null,
+ "showTitle": true,
+ "title": "RGW Overview - All Gateways",
+ "titleSize": "h6",
+ "type": "row"
+ },
+ {
+ "aliasColors": { },
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "$datasource",
+ "description": "",
+ "fill": 1,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 7,
+ "w": 8,
+ "x": 0,
+ "y": 1
+ },
+ "id": 3,
+ "legend": {
+ "alignAsTable": false,
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "rightSide": false,
+ "show": true,
+ "sideWidth": null,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [ ],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "repeat": null,
+ "seriesOverrides": [ ],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "label_replace(\n rate(ceph_rgw_get_initial_lat_sum{job=~\"$job\"}[$__rate_interval]) /\n rate(ceph_rgw_get_initial_lat_count{job=~\"$job\"}[$__rate_interval]) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{job=~\"$job\"},\n \"rgw_host\", \"$1\", \"ceph_daemon\", \"rgw.(.*)\"\n)\n",
+ "format": "time_series",
+ "intervalFactor": 1,
+ "legendFormat": "GET {{rgw_host}}",
+ "refId": "A"
+ },
+ {
+ "expr": "label_replace(\n rate(ceph_rgw_put_initial_lat_sum{job=~\"$job\"}[$__rate_interval]) /\n rate(ceph_rgw_put_initial_lat_count{job=~\"$job\"}[$__rate_interval]) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{job=~\"$job\"},\n \"rgw_host\", \"$1\", \"ceph_daemon\", \"rgw.(.*)\"\n)\n",
+ "format": "time_series",
+ "intervalFactor": 1,
+ "legendFormat": "PUT {{rgw_host}}",
+ "refId": "B"
+ }
+ ],
+ "thresholds": [ ],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Average GET/PUT Latencies by RGW Instance",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": [ ]
+ },
+ "yaxes": [
+ {
+ "format": "s",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": 0,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": 0,
+ "show": true
+ }
+ ]
+ },
+ {
+ "aliasColors": { },
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "$datasource",
+ "description": "",
+ "fill": 1,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 7,
+ "w": 7,
+ "x": 8,
+ "y": 1
+ },
+ "id": 4,
+ "legend": {
+ "alignAsTable": false,
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "rightSide": false,
+ "show": true,
+ "sideWidth": null,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [ ],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "repeat": null,
+ "seriesOverrides": [ ],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "sum by (rgw_host) (\n label_replace(\n rate(ceph_rgw_req{job=~\"$job\"}[$__rate_interval]) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{job=~\"$job\"},\n \"rgw_host\", \"$1\", \"ceph_daemon\", \"rgw.(.*)\"\n )\n)\n",
+ "format": "time_series",
+ "intervalFactor": 1,
+ "legendFormat": "{{rgw_host}}",
+ "refId": "A"
+ }
+ ],
+ "thresholds": [ ],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Total Requests/sec by RGW Instance",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": [ ]
+ },
+ "yaxes": [
+ {
+ "format": "none",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": 0,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": 0,
+ "show": true
+ }
+ ]
+ },
+ {
+ "aliasColors": { },
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "$datasource",
+ "description": "Latencies are shown stacked, without a yaxis to provide a visual indication of GET latency imbalance across RGW hosts",
+ "fill": 1,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 7,
+ "w": 6,
+ "x": 15,
+ "y": 1
+ },
+ "id": 5,
+ "legend": {
+ "alignAsTable": false,
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "rightSide": false,
+ "show": true,
+ "sideWidth": null,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [ ],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "repeat": null,
+ "seriesOverrides": [ ],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "label_replace(\n rate(ceph_rgw_get_initial_lat_sum{job=~\"$job\"}[$__rate_interval]) /\n rate(ceph_rgw_get_initial_lat_count{job=~\"$job\"}[$__rate_interval]) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{job=~\"$job\"},\n \"rgw_host\", \"$1\", \"ceph_daemon\", \"rgw.(.*)\"\n)\n",
+ "format": "time_series",
+ "intervalFactor": 1,
+ "legendFormat": "{{rgw_host}}",
+ "refId": "A"
+ }
+ ],
+ "thresholds": [ ],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "GET Latencies by RGW Instance",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": [ ]
+ },
+ "yaxes": [
+ {
+ "format": "s",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": 0,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": 0,
+ "show": true
+ }
+ ]
+ },
+ {
+ "aliasColors": { },
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "$datasource",
+ "description": "Total bytes transferred in/out of all radosgw instances within the cluster",
+ "fill": 1,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 6,
+ "w": 8,
+ "x": 0,
+ "y": 8
+ },
+ "id": 6,
+ "legend": {
+ "alignAsTable": false,
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "rightSide": false,
+ "show": true,
+ "sideWidth": null,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [ ],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "repeat": null,
+ "seriesOverrides": [ ],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "sum(rate(ceph_rgw_get_b{job=~\"$job\"}[$__rate_interval]))",
+ "format": "time_series",
+ "intervalFactor": 1,
+ "legendFormat": "GETs",
+ "refId": "A"
+ },
+ {
+ "expr": "sum(rate(ceph_rgw_put_b{job=~\"$job\"}[$__rate_interval]))",
+ "format": "time_series",
+ "intervalFactor": 1,
+ "legendFormat": "PUTs",
+ "refId": "B"
+ }
+ ],
+ "thresholds": [ ],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Bandwidth Consumed by Type",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": [ ]
+ },
+ "yaxes": [
+ {
+ "format": "bytes",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": 0,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": 0,
+ "show": true
+ }
+ ]
+ },
+ {
+ "aliasColors": { },
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "$datasource",
+ "description": "Total bytes transferred in/out through get/put operations, by radosgw instance",
+ "fill": 1,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 6,
+ "w": 7,
+ "x": 8,
+ "y": 8
+ },
+ "id": 7,
+ "legend": {
+ "alignAsTable": false,
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "rightSide": false,
+ "show": true,
+ "sideWidth": null,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [ ],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "repeat": null,
+ "seriesOverrides": [ ],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "label_replace(sum by (instance_id) (\n rate(ceph_rgw_get_b{job=~\"$job\"}[$__rate_interval]) +\n rate(ceph_rgw_put_b{job=~\"$job\"}[$__rate_interval])) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{job=~\"$job\"},\n \"rgw_host\", \"$1\", \"ceph_daemon\", \"rgw.(.*)\"\n)\n",
+ "format": "time_series",
+ "intervalFactor": 1,
+ "legendFormat": "{{rgw_host}}",
+ "refId": "A"
+ }
+ ],
+ "thresholds": [ ],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Bandwidth by RGW Instance",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": [ ]
+ },
+ "yaxes": [
+ {
+ "format": "bytes",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": 0,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": 0,
+ "show": true
+ }
+ ]
+ },
+ {
+ "aliasColors": { },
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "$datasource",
+ "description": "Latencies are shown stacked, without a yaxis to provide a visual indication of PUT latency imbalance across RGW hosts",
+ "fill": 1,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 6,
+ "w": 6,
+ "x": 15,
+ "y": 8
+ },
+ "id": 8,
+ "legend": {
+ "alignAsTable": false,
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "rightSide": false,
+ "show": true,
+ "sideWidth": null,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [ ],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "repeat": null,
+ "seriesOverrides": [ ],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "label_replace(\n rate(ceph_rgw_put_initial_lat_sum{job=~\"$job\"}[$__rate_interval]) /\n rate(ceph_rgw_put_initial_lat_count{job=~\"$job\"}[$__rate_interval]) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{job=~\"$job\"},\n \"rgw_host\", \"$1\", \"ceph_daemon\", \"rgw.(.*)\"\n)\n",
+ "format": "time_series",
+ "intervalFactor": 1,
+ "legendFormat": "{{rgw_host}}",
+ "refId": "A"
+ }
+ ],
+ "thresholds": [ ],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "PUT Latencies by RGW Instance",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": [ ]
+ },
+ "yaxes": [
+ {
+ "format": "s",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": 0,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": 0,
+ "show": true
+ }
+ ]
+ },
+ {
+ "collapse": false,
+ "collapsed": false,
+ "gridPos": {
+ "h": 12,
+ "w": 9,
+ "x": 0,
+ "y": 12
+ },
+ "id": 9,
+ "panels": [ ],
+ "repeat": null,
+ "repeatIteration": null,
+ "repeatRowId": null,
+ "showTitle": true,
+ "title": "RGW Overview - HAProxy Metrics",
+ "titleSize": "h6",
+ "type": "row"
+ },
+ {
+ "aliasColors": { },
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "$datasource",
+ "description": "",
+ "fill": 1,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 12,
+ "w": 5,
+ "x": 0,
+ "y": 12
+ },
+ "id": 10,
+ "legend": {
+ "alignAsTable": true,
+ "avg": true,
+ "current": true,
+ "max": true,
+ "min": true,
+ "rightSide": false,
+ "show": true,
+ "sideWidth": null,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [ ],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "repeat": null,
+ "seriesOverrides": [
+ [
+ {
+ "alias": "/.*Back.*/",
+ "transform": "negative-Y"
+ },
+ {
+ "alias": "/.*1.*/"
+ },
+ {
+ "alias": "/.*2.*/"
+ },
+ {
+ "alias": "/.*3.*/"
+ },
+ {
+ "alias": "/.*4.*/"
+ },
+ {
+ "alias": "/.*5.*/"
+ },
+ {
+ "alias": "/.*other.*/"
+ }
+ ]
+ ],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "sum(\n rate(\n haproxy_frontend_http_responses_total{code=~\"$code\", job=~\"$job_haproxy\", instance=~\"$ingress_service\", proxy=~\"frontend\"}[$__rate_interval]\n )\n) by (code)\n",
+ "format": "time_series",
+ "intervalFactor": 1,
+ "legendFormat": "Frontend {{ code }}",
+ "refId": "A"
+ },
+ {
+ "expr": "sum(\n rate(\n haproxy_backend_http_responses_total{code=~\"$code\", job=~\"$job_haproxy\", instance=~\"$ingress_service\", proxy=~\"backend\"}[$__rate_interval]\n )\n) by (code)\n",
+ "format": "time_series",
+ "intervalFactor": 1,
+ "legendFormat": "Backend {{ code }}",
+ "refId": "B"
+ }
+ ],
+ "thresholds": [ ],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Total responses by HTTP code",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": [ ]
+ },
+ "yaxes": [
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": 0,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": 0,
+ "show": true
+ }
+ ]
+ },
+ {
+ "aliasColors": { },
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "$datasource",
+ "description": "",
+ "fill": 1,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 12,
+ "w": 5,
+ "x": 5,
+ "y": 12
+ },
+ "id": 11,
+ "legend": {
+ "alignAsTable": true,
+ "avg": true,
+ "current": true,
+ "max": true,
+ "min": true,
+ "rightSide": false,
+ "show": true,
+ "sideWidth": null,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [ ],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "repeat": null,
+ "seriesOverrides": [
+ [
+ {
+ "alias": "/.*Response.*/",
+ "transform": "negative-Y"
+ },
+ {
+ "alias": "/.*Backend.*/",
+ "transform": "negative-Y"
+ }
+ ]
+ ],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "sum(\n rate(\n haproxy_frontend_http_requests_total{proxy=~\"frontend\", job=~\"$job_haproxy\", instance=~\"$ingress_service\"}[$__rate_interval]\n )\n) by (instance)\n",
+ "format": "time_series",
+ "intervalFactor": 1,
+ "legendFormat": "Requests",
+ "refId": "A"
+ },
+ {
+ "expr": "sum(\n rate(\n haproxy_backend_response_errors_total{proxy=~\"backend\", job=~\"$job_haproxy\", instance=~\"$ingress_service\"}[$__rate_interval]\n )\n) by (instance)\n",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "Response errors",
+ "refId": "B"
+ },
+ {
+ "expr": "sum(\n rate(\n haproxy_frontend_request_errors_total{proxy=~\"frontend\", job=~\"$job_haproxy\", instance=~\"$ingress_service\"}[$__rate_interval]\n )\n) by (instance)\n",
+ "format": "time_series",
+ "intervalFactor": 1,
+ "legendFormat": "Requests errors",
+ "refId": "C"
+ },
+ {
+ "expr": "sum(\n rate(\n haproxy_backend_redispatch_warnings_total{proxy=~\"backend\", job=~\"$job_haproxy\", instance=~\"$ingress_service\"}[$__rate_interval]\n )\n) by (instance)\n",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "Backend redispatch",
+ "refId": "D"
+ },
+ {
+ "expr": "sum(\n rate(\n haproxy_backend_retry_warnings_total{proxy=~\"backend\", job=~\"$job_haproxy\", instance=~\"$ingress_service\"}[$__rate_interval]\n )\n) by (instance)\n",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "Backend retry",
+ "refId": "E"
+ },
+ {
+ "expr": "sum(\n rate(\n haproxy_frontend_requests_denied_total{proxy=~\"frontend\", job=~\"$job_haproxy\", instance=~\"$ingress_service\"}[$__rate_interval]\n )\n) by (instance)\n",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "Request denied",
+ "refId": "F"
+ },
+ {
+ "expr": "sum(\n haproxy_backend_current_queue{proxy=~\"backend\", job=~\"$job_haproxy\", instance=~\"$ingress_service\"}\n) by (instance)\n",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "Backend Queued",
+ "refId": "G"
+ }
+ ],
+ "thresholds": [ ],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Total requests / responses",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": [ ]
+ },
+ "yaxes": [
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": 0,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": 0,
+ "show": true
+ }
+ ]
+ },
+ {
+ "aliasColors": { },
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "$datasource",
+ "description": "",
+ "fill": 1,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 12,
+ "w": 5,
+ "x": 10,
+ "y": 12
+ },
+ "id": 12,
+ "legend": {
+ "alignAsTable": true,
+ "avg": true,
+ "current": true,
+ "max": true,
+ "min": true,
+ "rightSide": false,
+ "show": true,
+ "sideWidth": null,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [ ],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "repeat": null,
+ "seriesOverrides": [
+ [
+ {
+ "alias": "/.*Back.*/",
+ "transform": "negative-Y"
+ }
+ ]
+ ],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "sum(\n rate(\n haproxy_frontend_connections_total{proxy=~\"frontend\", job=~\"$job_haproxy\", instance=~\"$ingress_service\"}[$__rate_interval]\n )\n) by (instance)\n",
+ "format": "time_series",
+ "intervalFactor": 1,
+ "legendFormat": "Front",
+ "refId": "A"
+ },
+ {
+ "expr": "sum(\n rate(\n haproxy_backend_connection_attempts_total{proxy=~\"backend\", job=~\"$job_haproxy\", instance=~\"$ingress_service\"}[$__rate_interval]\n )\n) by (instance)\n",
+ "format": "time_series",
+ "intervalFactor": 1,
+ "legendFormat": "Back",
+ "refId": "B"
+ },
+ {
+ "expr": "sum(\n rate(\n haproxy_backend_connection_errors_total{proxy=~\"backend\", job=~\"$job_haproxy\", instance=~\"$ingress_service\"}[$__rate_interval]\n )\n) by (instance)\n",
+ "format": "time_series",
+ "intervalFactor": 1,
+ "legendFormat": "Back errors",
+ "refId": "C"
+ }
+ ],
+ "thresholds": [ ],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Total number of connections",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": [ ]
+ },
+ "yaxes": [
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": 0,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": 0,
+ "show": true
+ }
+ ]
+ },
+ {
+ "aliasColors": { },
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "$datasource",
+ "description": "",
+ "fill": 1,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 12,
+ "w": 6,
+ "x": 15,
+ "y": 12
+ },
+ "id": 13,
+ "legend": {
+ "alignAsTable": true,
+ "avg": true,
+ "current": true,
+ "max": true,
+ "min": true,
+ "rightSide": false,
+ "show": true,
+ "sideWidth": null,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [ ],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "repeat": null,
+ "seriesOverrides": [
+ [
+ {
+ "alias": "/.*OUT.*/",
+ "transform": "negative-Y"
+ }
+ ]
+ ],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "sum(\n rate(\n haproxy_frontend_bytes_in_total{proxy=~\"frontend\", job=~\"$job_haproxy\", instance=~\"$ingress_service\"}[$__rate_interval]\n ) * 8\n) by (instance)\n",
+ "format": "time_series",
+ "intervalFactor": 1,
+ "legendFormat": "IN Front",
+ "refId": "A"
+ },
+ {
+ "expr": "sum(\n rate(\n haproxy_frontend_bytes_out_total{proxy=~\"frontend\", job=~\"$job_haproxy\", instance=~\"$ingress_service\"}[$__rate_interval]\n ) * 8\n) by (instance)\n",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "OUT Front",
+ "refId": "B"
+ },
+ {
+ "expr": "sum(\n rate(\n haproxy_backend_bytes_in_total{proxy=~\"backend\", job=~\"$job_haproxy\", instance=~\"$ingress_service\"}[$__rate_interval]\n ) * 8\n) by (instance)\n",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "IN Back",
+ "refId": "C"
+ },
+ {
+ "expr": "sum(\n rate(\n haproxy_backend_bytes_out_total{proxy=~\"backend\", job=~\"$job_haproxy\", instance=~\"$ingress_service\"}[$__rate_interval]\n ) * 8\n) by (instance)\n",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "OUT Back",
+ "refId": "D"
+ }
+ ],
+ "thresholds": [ ],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Current total of incoming / outgoing bytes",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": [ ]
+ },
+ "yaxes": [
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": 0,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": 0,
+ "show": true
+ }
+ ]
+ }
+ ],
+ "refresh": "30s",
+ "rows": [ ],
+ "schemaVersion": 16,
+ "style": "dark",
+ "tags": [
+ "ceph-mixin",
+ "overview"
+ ],
+ "templating": {
+ "list": [
+ {
+ "current": {
+ "text": "default",
+ "value": "default"
+ },
+ "hide": 0,
+ "label": "Data Source",
+ "name": "datasource",
+ "options": [ ],
+ "query": "prometheus",
+ "refresh": 1,
+ "regex": "",
+ "type": "datasource"
+ },
+ {
+ "allValue": ".+",
+ "current": { },
+ "datasource": "$datasource",
+ "hide": 2,
+ "includeAll": true,
+ "label": "cluster",
+ "multi": true,
+ "name": "cluster",
+ "options": [ ],
+ "query": "label_values(ceph_osd_metadata, cluster)",
+ "refresh": 1,
+ "regex": "(.*)",
+ "sort": 1,
+ "tagValuesQuery": "",
+ "tags": [ ],
+ "tagsQuery": "",
+ "type": "query",
+ "useTags": false
+ },
+ {
+ "allValue": ".+",
+ "current": { },
+ "datasource": "$datasource",
+ "hide": 0,
+ "includeAll": true,
+ "label": "job",
+ "multi": true,
+ "name": "job",
+ "options": [ ],
+ "query": "label_values(ceph_osd_metadata{}, job)",
+ "refresh": 1,
+ "regex": "(.*)",
+ "sort": 1,
+ "tagValuesQuery": "",
+ "tags": [ ],
+ "tagsQuery": "",
+ "type": "query",
+ "useTags": false
+ },
+ {
+ "allValue": null,
+ "current": { },
+ "datasource": "$datasource",
+ "hide": 0,
+ "includeAll": true,
+ "label": "",
+ "multi": false,
+ "name": "rgw_servers",
+ "options": [ ],
+ "query": "label_values(ceph_rgw_metadata{job=~\"$job\"}, ceph_daemon)",
+ "refresh": 1,
+ "regex": "RGW Server",
+ "sort": 1,
+ "tagValuesQuery": "",
+ "tags": [ ],
+ "tagsQuery": "",
+ "type": "query",
+ "useTags": false
+ },
+ {
+ "allValue": null,
+ "current": { },
+ "datasource": "$datasource",
+ "hide": 0,
+ "includeAll": true,
+ "label": "HTTP Code",
+ "multi": false,
+ "name": "code",
+ "options": [ ],
+ "query": "label_values(haproxy_server_http_responses_total{job=~\"$job_haproxy\", instance=~\"$ingress_service\"}, code)",
+ "refresh": 1,
+ "regex": "",
+ "sort": 1,
+ "tagValuesQuery": "",
+ "tags": [ ],
+ "tagsQuery": "",
+ "type": "query",
+ "useTags": false
+ },
+ {
+ "allValue": ".+",
+ "current": { },
+ "datasource": "$datasource",
+ "hide": 0,
+ "includeAll": true,
+ "label": "job haproxy",
+ "multi": true,
+ "name": "job_haproxy",
+ "options": [ ],
+ "query": "label_values(haproxy_server_status, job)",
+ "refresh": 1,
+ "regex": "(.*)",
+ "sort": 1,
+ "tagValuesQuery": "",
+ "tags": [ ],
+ "tagsQuery": "",
+ "type": "query",
+ "useTags": false
+ },
+ {
+ "allValue": null,
+ "current": { },
+ "datasource": "$datasource",
+ "hide": 0,
+ "includeAll": true,
+ "label": "Ingress Service",
+ "multi": false,
+ "name": "ingress_service",
+ "options": [ ],
+ "query": "label_values(haproxy_server_status{job=~\"$job_haproxy\"}, instance)",
+ "refresh": 1,
+ "regex": "",
+ "sort": 1,
+ "tagValuesQuery": "",
+ "tags": [ ],
+ "tagsQuery": "",
+ "type": "query",
+ "useTags": false
+ }
+ ]
+ },
+ "time": {
+ "from": "now-1h",
+ "to": "now"
+ },
+ "timepicker": {
+ "refresh_intervals": [
+ "5s",
+ "10s",
+ "30s",
+ "1m",
+ "5m",
+ "15m",
+ "30m",
+ "1h",
+ "2h",
+ "1d"
+ ],
+ "time_options": [
+ "5m",
+ "15m",
+ "1h",
+ "6h",
+ "12h",
+ "24h",
+ "2d",
+ "7d",
+ "30d"
+ ]
+ },
+ "timezone": "",
+ "title": "RGW Overview",
+ "uid": "WAkugZpiz",
+ "version": 0
+}
diff --git a/atmosphere/jsonnet/vendor/github.com/ceph/ceph/monitoring/ceph-mixin/dashboards_out/radosgw-sync-overview.json b/atmosphere/jsonnet/vendor/github.com/ceph/ceph/monitoring/ceph-mixin/dashboards_out/radosgw-sync-overview.json
new file mode 100644
index 0000000..e0c3037
--- /dev/null
+++ b/atmosphere/jsonnet/vendor/github.com/ceph/ceph/monitoring/ceph-mixin/dashboards_out/radosgw-sync-overview.json
@@ -0,0 +1,504 @@
+{
+ "__inputs": [ ],
+ "__requires": [
+ {
+ "id": "grafana",
+ "name": "Grafana",
+ "type": "grafana",
+ "version": "5.0.0"
+ },
+ {
+ "id": "graph",
+ "name": "Graph",
+ "type": "panel",
+ "version": "5.0.0"
+ }
+ ],
+ "annotations": {
+ "list": [
+ {
+ "builtIn": 1,
+ "datasource": "-- Grafana --",
+ "enable": true,
+ "hide": true,
+ "iconColor": "rgba(0, 211, 255, 1)",
+ "name": "Annotations & Alerts",
+ "showIn": 0,
+ "tags": [ ],
+ "type": "dashboard"
+ }
+ ]
+ },
+ "description": "",
+ "editable": false,
+ "gnetId": null,
+ "graphTooltip": 0,
+ "hideControls": false,
+ "id": null,
+ "links": [ ],
+ "panels": [
+ {
+ "aliasColors": { },
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "$datasource",
+ "description": "",
+ "fill": 1,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 7,
+ "w": 8,
+ "x": 0,
+ "y": 0
+ },
+ "id": 2,
+ "legend": {
+ "alignAsTable": false,
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "rightSide": false,
+ "show": true,
+ "sideWidth": null,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [ ],
+ "nullPointMode": "null as zero",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "repeat": null,
+ "seriesOverrides": [ ],
+ "spaceLength": 10,
+ "stack": true,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "sum by (source_zone) (rate(ceph_data_sync_from_zone_fetch_bytes_sum{job=~\"$job\"}[$__rate_interval]))",
+ "format": "time_series",
+ "intervalFactor": 1,
+ "legendFormat": "{{source_zone}}",
+ "refId": "A"
+ }
+ ],
+ "thresholds": [ ],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Replication (throughput) from Source Zone",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": [ ]
+ },
+ "yaxes": [
+ {
+ "format": "Bps",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": 0,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": 0,
+ "show": true
+ }
+ ]
+ },
+ {
+ "aliasColors": { },
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "$datasource",
+ "description": "",
+ "fill": 1,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 7,
+ "w": 8,
+ "x": 8,
+ "y": 0
+ },
+ "id": 3,
+ "legend": {
+ "alignAsTable": false,
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "rightSide": false,
+ "show": true,
+ "sideWidth": null,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [ ],
+ "nullPointMode": "null as zero",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "repeat": null,
+ "seriesOverrides": [ ],
+ "spaceLength": 10,
+ "stack": true,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "sum by (source_zone) (rate(ceph_data_sync_from_zone_fetch_bytes_count{job=~\"$job\"}[$__rate_interval]))",
+ "format": "time_series",
+ "intervalFactor": 1,
+ "legendFormat": "{{source_zone}}",
+ "refId": "A"
+ }
+ ],
+ "thresholds": [ ],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Replication (objects) from Source Zone",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": [ ]
+ },
+ "yaxes": [
+ {
+ "format": "short",
+ "label": "Objects/s",
+ "logBase": 1,
+ "max": null,
+ "min": 0,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": 0,
+ "show": true
+ }
+ ]
+ },
+ {
+ "aliasColors": { },
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "$datasource",
+ "description": "",
+ "fill": 1,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 7,
+ "w": 8,
+ "x": 16,
+ "y": 0
+ },
+ "id": 4,
+ "legend": {
+ "alignAsTable": false,
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "rightSide": false,
+ "show": true,
+ "sideWidth": null,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [ ],
+ "nullPointMode": "null as zero",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "repeat": null,
+ "seriesOverrides": [ ],
+ "spaceLength": 10,
+ "stack": true,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "sum by (source_zone) (rate(ceph_data_sync_from_zone_poll_latency_sum{job=~\"$job\"}[$__rate_interval]))",
+ "format": "time_series",
+ "intervalFactor": 1,
+ "legendFormat": "{{source_zone}}",
+ "refId": "A"
+ }
+ ],
+ "thresholds": [ ],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Polling Request Latency from Source Zone",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": [ ]
+ },
+ "yaxes": [
+ {
+ "format": "ms",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": 0,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": 0,
+ "show": true
+ }
+ ]
+ },
+ {
+ "aliasColors": { },
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "$datasource",
+ "description": "",
+ "fill": 1,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 7,
+ "w": 8,
+ "x": 0,
+ "y": 7
+ },
+ "id": 5,
+ "legend": {
+ "alignAsTable": false,
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "rightSide": false,
+ "show": true,
+ "sideWidth": null,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [ ],
+ "nullPointMode": "null as zero",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "repeat": null,
+ "seriesOverrides": [ ],
+ "spaceLength": 10,
+ "stack": true,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "sum by (source_zone) (rate(ceph_data_sync_from_zone_fetch_errors{job=~\"$job\"}[$__rate_interval]))",
+ "format": "time_series",
+ "intervalFactor": 1,
+ "legendFormat": "{{source_zone}}",
+ "refId": "A"
+ }
+ ],
+ "thresholds": [ ],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Unsuccessful Object Replications from Source Zone",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": [ ]
+ },
+ "yaxes": [
+ {
+ "format": "short",
+ "label": "Count/s",
+ "logBase": 1,
+ "max": null,
+ "min": 0,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": 0,
+ "show": true
+ }
+ ]
+ }
+ ],
+ "refresh": "30s",
+ "rows": [ ],
+ "schemaVersion": 16,
+ "style": "dark",
+ "tags": [
+ "ceph-mixin",
+ "overview"
+ ],
+ "templating": {
+ "list": [
+ {
+ "current": {
+ "text": "default",
+ "value": "default"
+ },
+ "hide": 0,
+ "label": "Data Source",
+ "name": "datasource",
+ "options": [ ],
+ "query": "prometheus",
+ "refresh": 1,
+ "regex": "",
+ "type": "datasource"
+ },
+ {
+ "allValue": ".+",
+ "current": { },
+ "datasource": "$datasource",
+ "hide": 2,
+ "includeAll": true,
+ "label": "cluster",
+ "multi": true,
+ "name": "cluster",
+ "options": [ ],
+ "query": "label_values(ceph_osd_metadata, cluster)",
+ "refresh": 1,
+ "regex": "(.*)",
+ "sort": 1,
+ "tagValuesQuery": "",
+ "tags": [ ],
+ "tagsQuery": "",
+ "type": "query",
+ "useTags": false
+ },
+ {
+ "allValue": ".+",
+ "current": { },
+ "datasource": "$datasource",
+ "hide": 0,
+ "includeAll": true,
+ "label": "job",
+ "multi": true,
+ "name": "job",
+ "options": [ ],
+ "query": "label_values(ceph_osd_metadata{}, job)",
+ "refresh": 1,
+ "regex": "(.*)",
+ "sort": 1,
+ "tagValuesQuery": "",
+ "tags": [ ],
+ "tagsQuery": "",
+ "type": "query",
+ "useTags": false
+ },
+ {
+ "allValue": null,
+ "current": { },
+ "datasource": "$datasource",
+ "hide": 0,
+ "includeAll": true,
+ "label": "",
+ "multi": false,
+ "name": "rgw_servers",
+ "options": [ ],
+ "query": "label_values(ceph_rgw_metadata{job=~\"$job\"}, ceph_daemon)",
+ "refresh": 1,
+ "regex": "RGW Server",
+ "sort": 1,
+ "tagValuesQuery": "",
+ "tags": [ ],
+ "tagsQuery": "",
+ "type": "query",
+ "useTags": false
+ }
+ ]
+ },
+ "time": {
+ "from": "now-1h",
+ "to": "now"
+ },
+ "timepicker": {
+ "refresh_intervals": [
+ "5s",
+ "10s",
+ "30s",
+ "1m",
+ "5m",
+ "15m",
+ "30m",
+ "1h",
+ "2h",
+ "1d"
+ ],
+ "time_options": [
+ "5m",
+ "15m",
+ "1h",
+ "6h",
+ "12h",
+ "24h",
+ "2d",
+ "7d",
+ "30d"
+ ]
+ },
+ "timezone": "",
+ "title": "RGW Sync Overview",
+ "uid": "rgw-sync-overview",
+ "version": 0
+}
diff --git a/atmosphere/jsonnet/vendor/github.com/ceph/ceph/monitoring/ceph-mixin/dashboards_out/rbd-details.json b/atmosphere/jsonnet/vendor/github.com/ceph/ceph/monitoring/ceph-mixin/dashboards_out/rbd-details.json
new file mode 100644
index 0000000..f64de31
--- /dev/null
+++ b/atmosphere/jsonnet/vendor/github.com/ceph/ceph/monitoring/ceph-mixin/dashboards_out/rbd-details.json
@@ -0,0 +1,458 @@
+{
+ "__inputs": [ ],
+ "__requires": [
+ {
+ "id": "grafana",
+ "name": "Grafana",
+ "type": "grafana",
+ "version": "5.3.3"
+ },
+ {
+ "id": "graph",
+ "name": "Graph",
+ "type": "panel",
+ "version": "5.0.0"
+ }
+ ],
+ "annotations": {
+ "list": [
+ {
+ "builtIn": 1,
+ "datasource": "-- Grafana --",
+ "enable": true,
+ "hide": true,
+ "iconColor": "rgba(0, 211, 255, 1)",
+ "name": "Annotations & Alerts",
+ "showIn": 0,
+ "tags": [ ],
+ "type": "dashboard"
+ }
+ ]
+ },
+ "description": "Detailed Performance of RBD Images (IOPS/Throughput/Latency)",
+ "editable": false,
+ "gnetId": null,
+ "graphTooltip": 0,
+ "hideControls": false,
+ "id": null,
+ "links": [ ],
+ "panels": [
+ {
+ "aliasColors": { },
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "$datasource",
+ "description": "",
+ "fill": 1,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 9,
+ "w": 8,
+ "x": 0,
+ "y": 0
+ },
+ "id": 2,
+ "legend": {
+ "alignAsTable": false,
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "rightSide": false,
+ "show": true,
+ "sideWidth": null,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [ ],
+ "nullPointMode": "null as zero",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "repeat": null,
+ "seriesOverrides": [ ],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "rate(ceph_rbd_write_ops{job=~\"$job\", pool=\"$pool\", image=\"$image\"}[$__rate_interval])",
+ "format": "time_series",
+ "intervalFactor": 1,
+ "legendFormat": "{{pool}} Write",
+ "refId": "A"
+ },
+ {
+ "expr": "rate(ceph_rbd_read_ops{job=~\"$job\", pool=\"$pool\", image=\"$image\"}[$__rate_interval])",
+ "format": "time_series",
+ "intervalFactor": 1,
+ "legendFormat": "{{pool}} Read",
+ "refId": "B"
+ }
+ ],
+ "thresholds": [ ],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "IOPS",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": [ ]
+ },
+ "yaxes": [
+ {
+ "format": "iops",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": 0,
+ "show": true
+ },
+ {
+ "format": "iops",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": 0,
+ "show": true
+ }
+ ]
+ },
+ {
+ "aliasColors": { },
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "$datasource",
+ "description": "",
+ "fill": 1,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 9,
+ "w": 8,
+ "x": 8,
+ "y": 0
+ },
+ "id": 3,
+ "legend": {
+ "alignAsTable": false,
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "rightSide": false,
+ "show": true,
+ "sideWidth": null,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [ ],
+ "nullPointMode": "null as zero",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "repeat": null,
+ "seriesOverrides": [ ],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "rate(ceph_rbd_write_bytes{job=~\"$job\", pool=\"$pool\", image=\"$image\"}[$__rate_interval])",
+ "format": "time_series",
+ "intervalFactor": 1,
+ "legendFormat": "{{pool}} Write",
+ "refId": "A"
+ },
+ {
+ "expr": "rate(ceph_rbd_read_bytes{job=~\"$job\", pool=\"$pool\", image=\"$image\"}[$__rate_interval])",
+ "format": "time_series",
+ "intervalFactor": 1,
+ "legendFormat": "{{pool}} Read",
+ "refId": "B"
+ }
+ ],
+ "thresholds": [ ],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Throughput",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": [ ]
+ },
+ "yaxes": [
+ {
+ "format": "Bps",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": 0,
+ "show": true
+ },
+ {
+ "format": "Bps",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": 0,
+ "show": true
+ }
+ ]
+ },
+ {
+ "aliasColors": { },
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "$datasource",
+ "description": "",
+ "fill": 1,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 9,
+ "w": 8,
+ "x": 16,
+ "y": 0
+ },
+ "id": 4,
+ "legend": {
+ "alignAsTable": false,
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "rightSide": false,
+ "show": true,
+ "sideWidth": null,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [ ],
+ "nullPointMode": "null as zero",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "repeat": null,
+ "seriesOverrides": [ ],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "rate(ceph_rbd_write_latency_sum{job=~\"$job\", pool=\"$pool\", image=\"$image\"}[$__rate_interval]) /\n rate(ceph_rbd_write_latency_count{job=~\"$job\", pool=\"$pool\", image=\"$image\"}[$__rate_interval])\n",
+ "format": "time_series",
+ "intervalFactor": 1,
+ "legendFormat": "{{pool}} Write",
+ "refId": "A"
+ },
+ {
+ "expr": "rate(ceph_rbd_read_latency_sum{job=~\"$job\", pool=\"$pool\", image=\"$image\"}[$__rate_interval]) /\n rate(ceph_rbd_read_latency_count{job=~\"$job\", pool=\"$pool\", image=\"$image\"}[$__rate_interval])\n",
+ "format": "time_series",
+ "intervalFactor": 1,
+ "legendFormat": "{{pool}} Read",
+ "refId": "B"
+ }
+ ],
+ "thresholds": [ ],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Average Latency",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": [ ]
+ },
+ "yaxes": [
+ {
+ "format": "ns",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": 0,
+ "show": true
+ },
+ {
+ "format": "ns",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": 0,
+ "show": true
+ }
+ ]
+ }
+ ],
+ "refresh": "30s",
+ "rows": [ ],
+ "schemaVersion": 16,
+ "style": "dark",
+ "tags": [
+ "ceph-mixin"
+ ],
+ "templating": {
+ "list": [
+ {
+ "current": {
+ "text": "default",
+ "value": "default"
+ },
+ "hide": 0,
+ "label": "Data Source",
+ "name": "datasource",
+ "options": [ ],
+ "query": "prometheus",
+ "refresh": 1,
+ "regex": "",
+ "type": "datasource"
+ },
+ {
+ "allValue": ".+",
+ "current": { },
+ "datasource": "$datasource",
+ "hide": 2,
+ "includeAll": true,
+ "label": "cluster",
+ "multi": true,
+ "name": "cluster",
+ "options": [ ],
+ "query": "label_values(ceph_osd_metadata, cluster)",
+ "refresh": 1,
+ "regex": "(.*)",
+ "sort": 1,
+ "tagValuesQuery": "",
+ "tags": [ ],
+ "tagsQuery": "",
+ "type": "query",
+ "useTags": false
+ },
+ {
+ "allValue": ".+",
+ "current": { },
+ "datasource": "$datasource",
+ "hide": 0,
+ "includeAll": true,
+ "label": "job",
+ "multi": true,
+ "name": "job",
+ "options": [ ],
+ "query": "label_values(ceph_osd_metadata{}, job)",
+ "refresh": 1,
+ "regex": "(.*)",
+ "sort": 1,
+ "tagValuesQuery": "",
+ "tags": [ ],
+ "tagsQuery": "",
+ "type": "query",
+ "useTags": false
+ },
+ {
+ "allValue": null,
+ "current": { },
+ "datasource": "$datasource",
+ "hide": 0,
+ "includeAll": false,
+ "label": "",
+ "multi": false,
+ "name": "pool",
+ "options": [ ],
+ "query": "label_values(pool)",
+ "refresh": 1,
+ "regex": "",
+ "sort": 0,
+ "tagValuesQuery": "",
+ "tags": [ ],
+ "tagsQuery": "",
+ "type": "query",
+ "useTags": false
+ },
+ {
+ "allValue": null,
+ "current": { },
+ "datasource": "$datasource",
+ "hide": 0,
+ "includeAll": false,
+ "label": "",
+ "multi": false,
+ "name": "image",
+ "options": [ ],
+ "query": "label_values(image)",
+ "refresh": 1,
+ "regex": "",
+ "sort": 0,
+ "tagValuesQuery": "",
+ "tags": [ ],
+ "tagsQuery": "",
+ "type": "query",
+ "useTags": false
+ }
+ ]
+ },
+ "time": {
+ "from": "now-1h",
+ "to": "now"
+ },
+ "timepicker": {
+ "refresh_intervals": [
+ "5s",
+ "10s",
+ "30s",
+ "1m",
+ "5m",
+ "15m",
+ "30m",
+ "1h",
+ "2h",
+ "1d"
+ ],
+ "time_options": [
+ "5m",
+ "15m",
+ "1h",
+ "6h",
+ "12h",
+ "24h",
+ "2d",
+ "7d",
+ "30d"
+ ]
+ },
+ "timezone": "",
+ "title": "RBD Details",
+ "uid": "YhCYGcuZz",
+ "version": 0
+}
diff --git a/atmosphere/jsonnet/vendor/github.com/ceph/ceph/monitoring/ceph-mixin/dashboards_out/rbd-overview.json b/atmosphere/jsonnet/vendor/github.com/ceph/ceph/monitoring/ceph-mixin/dashboards_out/rbd-overview.json
new file mode 100644
index 0000000..e017280
--- /dev/null
+++ b/atmosphere/jsonnet/vendor/github.com/ceph/ceph/monitoring/ceph-mixin/dashboards_out/rbd-overview.json
@@ -0,0 +1,737 @@
+{
+ "__inputs": [ ],
+ "__requires": [
+ {
+ "id": "grafana",
+ "name": "Grafana",
+ "type": "grafana",
+ "version": "5.4.2"
+ },
+ {
+ "id": "graph",
+ "name": "Graph",
+ "type": "panel",
+ "version": "5.0.0"
+ },
+ {
+ "id": "prometheus",
+ "name": "Prometheus",
+ "type": "datasource",
+ "version": "5.0.0"
+ },
+ {
+ "id": "table",
+ "name": "Table",
+ "type": "panel",
+ "version": "5.0.0"
+ }
+ ],
+ "annotations": {
+ "list": [
+ {
+ "builtIn": 1,
+ "datasource": "-- Grafana --",
+ "enable": true,
+ "hide": true,
+ "iconColor": "rgba(0, 211, 255, 1)",
+ "name": "Annotations & Alerts",
+ "showIn": 0,
+ "tags": [ ],
+ "type": "dashboard"
+ }
+ ]
+ },
+ "description": "",
+ "editable": false,
+ "gnetId": null,
+ "graphTooltip": 0,
+ "hideControls": false,
+ "id": null,
+ "links": [ ],
+ "panels": [
+ {
+ "aliasColors": { },
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "$datasource",
+ "description": "",
+ "fill": 1,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 7,
+ "w": 8,
+ "x": 0,
+ "y": 0
+ },
+ "id": 2,
+ "legend": {
+ "alignAsTable": false,
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "rightSide": false,
+ "show": true,
+ "sideWidth": null,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [ ],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "repeat": null,
+ "seriesOverrides": [ ],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "round(sum(rate(ceph_rbd_write_ops{job=~\"$job\"}[$__rate_interval])))",
+ "format": "time_series",
+ "intervalFactor": 1,
+ "legendFormat": "Writes",
+ "refId": "A"
+ },
+ {
+ "expr": "round(sum(rate(ceph_rbd_read_ops{job=~\"$job\"}[$__rate_interval])))",
+ "format": "time_series",
+ "intervalFactor": 1,
+ "legendFormat": "Reads",
+ "refId": "B"
+ }
+ ],
+ "thresholds": [ ],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "IOPS",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": [ ]
+ },
+ "yaxes": [
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": 0,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": 0,
+ "show": true
+ }
+ ]
+ },
+ {
+ "aliasColors": { },
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "$datasource",
+ "description": "",
+ "fill": 1,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 7,
+ "w": 8,
+ "x": 8,
+ "y": 0
+ },
+ "id": 3,
+ "legend": {
+ "alignAsTable": false,
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "rightSide": false,
+ "show": true,
+ "sideWidth": null,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [ ],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "repeat": null,
+ "seriesOverrides": [ ],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "round(sum(rate(ceph_rbd_write_bytes{job=~\"$job\"}[$__rate_interval])))",
+ "format": "time_series",
+ "intervalFactor": 1,
+ "legendFormat": "Write",
+ "refId": "A"
+ },
+ {
+ "expr": "round(sum(rate(ceph_rbd_read_bytes{job=~\"$job\"}[$__rate_interval])))",
+ "format": "time_series",
+ "intervalFactor": 1,
+ "legendFormat": "Read",
+ "refId": "B"
+ }
+ ],
+ "thresholds": [ ],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Throughput",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": [ ]
+ },
+ "yaxes": [
+ {
+ "format": "Bps",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": 0,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": 0,
+ "show": true
+ }
+ ]
+ },
+ {
+ "aliasColors": { },
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "$datasource",
+ "description": "",
+ "fill": 1,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 7,
+ "w": 8,
+ "x": 16,
+ "y": 0
+ },
+ "id": 4,
+ "legend": {
+ "alignAsTable": false,
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "rightSide": false,
+ "show": true,
+ "sideWidth": null,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [ ],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "repeat": null,
+ "seriesOverrides": [ ],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "round(\n sum(rate(ceph_rbd_write_latency_sum{job=~\"$job\"}[$__rate_interval])) /\n sum(rate(ceph_rbd_write_latency_count{job=~\"$job\"}[$__rate_interval]))\n)\n",
+ "format": "time_series",
+ "intervalFactor": 1,
+ "legendFormat": "Write",
+ "refId": "A"
+ },
+ {
+ "expr": "round(\n sum(rate(ceph_rbd_read_latency_sum{job=~\"$job\"}[$__rate_interval])) /\n sum(rate(ceph_rbd_read_latency_count{job=~\"$job\"}[$__rate_interval]))\n)\n",
+ "format": "time_series",
+ "intervalFactor": 1,
+ "legendFormat": "Read",
+ "refId": "B"
+ }
+ ],
+ "thresholds": [ ],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Average Latency",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": [ ]
+ },
+ "yaxes": [
+ {
+ "format": "ns",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": 0,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": 0,
+ "show": true
+ }
+ ]
+ },
+ {
+ "columns": [ ],
+ "datasource": "$datasource",
+ "description": "",
+ "gridPos": {
+ "h": 7,
+ "w": 8,
+ "x": 0,
+ "y": 7
+ },
+ "id": 5,
+ "links": [ ],
+ "sort": {
+ "col": 3,
+ "desc": true
+ },
+ "styles": [
+ {
+ "alias": "Pool",
+ "colorMode": null,
+ "colors": [
+ "rgba(245, 54, 54, 0.9)",
+ "rgba(237, 129, 40, 0.89)",
+ "rgba(50, 172, 45, 0.97)"
+ ],
+ "dateFormat": "YYYY-MM-DD HH:mm:ss",
+ "decimals": 2,
+ "mappingType": 1,
+ "pattern": "pool",
+ "thresholds": [ ],
+ "type": "string",
+ "unit": "short",
+ "valueMaps": [ ]
+ },
+ {
+ "alias": "Image",
+ "colorMode": null,
+ "colors": [
+ "rgba(245, 54, 54, 0.9)",
+ "rgba(237, 129, 40, 0.89)",
+ "rgba(50, 172, 45, 0.97)"
+ ],
+ "dateFormat": "YYYY-MM-DD HH:mm:ss",
+ "decimals": 2,
+ "mappingType": 1,
+ "pattern": "image",
+ "thresholds": [ ],
+ "type": "string",
+ "unit": "short",
+ "valueMaps": [ ]
+ },
+ {
+ "alias": "IOPS",
+ "colorMode": null,
+ "colors": [
+ "rgba(245, 54, 54, 0.9)",
+ "rgba(237, 129, 40, 0.89)",
+ "rgba(50, 172, 45, 0.97)"
+ ],
+ "dateFormat": "YYYY-MM-DD HH:mm:ss",
+ "decimals": 2,
+ "mappingType": 1,
+ "pattern": "Value",
+ "thresholds": [ ],
+ "type": "number",
+ "unit": "iops",
+ "valueMaps": [ ]
+ },
+ {
+ "alias": "",
+ "colorMode": null,
+ "colors": [
+ "rgba(245, 54, 54, 0.9)",
+ "rgba(237, 129, 40, 0.89)",
+ "rgba(50, 172, 45, 0.97)"
+ ],
+ "dateFormat": "YYYY-MM-DD HH:mm:ss",
+ "decimals": 2,
+ "mappingType": 1,
+ "pattern": "/.*/",
+ "thresholds": [ ],
+ "type": "hidden",
+ "unit": "short",
+ "valueMaps": [ ]
+ }
+ ],
+ "targets": [
+ {
+ "expr": "topk(10,\n (\n sort((\n rate(ceph_rbd_write_ops{job=~\"$job\"}[$__rate_interval]) +\n on (image, pool, namespace) rate(ceph_rbd_read_ops{job=~\"$job\"}[$__rate_interval])\n ))\n )\n)\n",
+ "format": "table",
+ "instant": true,
+ "intervalFactor": 1,
+ "legendFormat": "",
+ "refId": "A"
+ }
+ ],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Highest IOPS",
+ "transform": "table",
+ "type": "table"
+ },
+ {
+ "columns": [ ],
+ "datasource": "$datasource",
+ "description": "",
+ "gridPos": {
+ "h": 7,
+ "w": 8,
+ "x": 8,
+ "y": 7
+ },
+ "id": 6,
+ "links": [ ],
+ "sort": {
+ "col": 3,
+ "desc": true
+ },
+ "styles": [
+ {
+ "alias": "Pool",
+ "colorMode": null,
+ "colors": [
+ "rgba(245, 54, 54, 0.9)",
+ "rgba(237, 129, 40, 0.89)",
+ "rgba(50, 172, 45, 0.97)"
+ ],
+ "dateFormat": "YYYY-MM-DD HH:mm:ss",
+ "decimals": 2,
+ "mappingType": 1,
+ "pattern": "pool",
+ "thresholds": [ ],
+ "type": "string",
+ "unit": "short",
+ "valueMaps": [ ]
+ },
+ {
+ "alias": "Image",
+ "colorMode": null,
+ "colors": [
+ "rgba(245, 54, 54, 0.9)",
+ "rgba(237, 129, 40, 0.89)",
+ "rgba(50, 172, 45, 0.97)"
+ ],
+ "dateFormat": "YYYY-MM-DD HH:mm:ss",
+ "decimals": 2,
+ "mappingType": 1,
+ "pattern": "image",
+ "thresholds": [ ],
+ "type": "string",
+ "unit": "short",
+ "valueMaps": [ ]
+ },
+ {
+ "alias": "Throughput",
+ "colorMode": null,
+ "colors": [
+ "rgba(245, 54, 54, 0.9)",
+ "rgba(237, 129, 40, 0.89)",
+ "rgba(50, 172, 45, 0.97)"
+ ],
+ "dateFormat": "YYYY-MM-DD HH:mm:ss",
+ "decimals": 2,
+ "mappingType": 1,
+ "pattern": "Value",
+ "thresholds": [ ],
+ "type": "number",
+ "unit": "Bps",
+ "valueMaps": [ ]
+ },
+ {
+ "alias": "",
+ "colorMode": null,
+ "colors": [
+ "rgba(245, 54, 54, 0.9)",
+ "rgba(237, 129, 40, 0.89)",
+ "rgba(50, 172, 45, 0.97)"
+ ],
+ "dateFormat": "YYYY-MM-DD HH:mm:ss",
+ "decimals": 2,
+ "mappingType": 1,
+ "pattern": "/.*/",
+ "thresholds": [ ],
+ "type": "hidden",
+ "unit": "short",
+ "valueMaps": [ ]
+ }
+ ],
+ "targets": [
+ {
+ "expr": "topk(10,\n sort(\n sum(\n rate(ceph_rbd_read_bytes{job=~\"$job\"}[$__rate_interval]) +\n rate(ceph_rbd_write_bytes{job=~\"$job\"}[$__rate_interval])\n ) by (pool, image, namespace)\n )\n)\n",
+ "format": "table",
+ "instant": true,
+ "intervalFactor": 1,
+ "legendFormat": "",
+ "refId": "A"
+ }
+ ],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Highest Throughput",
+ "transform": "table",
+ "type": "table"
+ },
+ {
+ "columns": [ ],
+ "datasource": "$datasource",
+ "description": "",
+ "gridPos": {
+ "h": 7,
+ "w": 8,
+ "x": 16,
+ "y": 7
+ },
+ "id": 7,
+ "links": [ ],
+ "sort": {
+ "col": 3,
+ "desc": true
+ },
+ "styles": [
+ {
+ "alias": "Pool",
+ "colorMode": null,
+ "colors": [
+ "rgba(245, 54, 54, 0.9)",
+ "rgba(237, 129, 40, 0.89)",
+ "rgba(50, 172, 45, 0.97)"
+ ],
+ "dateFormat": "YYYY-MM-DD HH:mm:ss",
+ "decimals": 2,
+ "mappingType": 1,
+ "pattern": "pool",
+ "thresholds": [ ],
+ "type": "string",
+ "unit": "short",
+ "valueMaps": [ ]
+ },
+ {
+ "alias": "Image",
+ "colorMode": null,
+ "colors": [
+ "rgba(245, 54, 54, 0.9)",
+ "rgba(237, 129, 40, 0.89)",
+ "rgba(50, 172, 45, 0.97)"
+ ],
+ "dateFormat": "YYYY-MM-DD HH:mm:ss",
+ "decimals": 2,
+ "mappingType": 1,
+ "pattern": "image",
+ "thresholds": [ ],
+ "type": "string",
+ "unit": "short",
+ "valueMaps": [ ]
+ },
+ {
+ "alias": "Latency",
+ "colorMode": null,
+ "colors": [
+ "rgba(245, 54, 54, 0.9)",
+ "rgba(237, 129, 40, 0.89)",
+ "rgba(50, 172, 45, 0.97)"
+ ],
+ "dateFormat": "YYYY-MM-DD HH:mm:ss",
+ "decimals": 2,
+ "mappingType": 1,
+ "pattern": "Value",
+ "thresholds": [ ],
+ "type": "number",
+ "unit": "ns",
+ "valueMaps": [ ]
+ },
+ {
+ "alias": "",
+ "colorMode": null,
+ "colors": [
+ "rgba(245, 54, 54, 0.9)",
+ "rgba(237, 129, 40, 0.89)",
+ "rgba(50, 172, 45, 0.97)"
+ ],
+ "dateFormat": "YYYY-MM-DD HH:mm:ss",
+ "decimals": 2,
+ "mappingType": 1,
+ "pattern": "/.*/",
+ "thresholds": [ ],
+ "type": "hidden",
+ "unit": "short",
+ "valueMaps": [ ]
+ }
+ ],
+ "targets": [
+ {
+ "expr": "topk(10,\n sum(\n rate(ceph_rbd_write_latency_sum{job=~\"$job\"}[$__rate_interval]) /\n clamp_min(rate(ceph_rbd_write_latency_count{job=~\"$job\"}[$__rate_interval]), 1) +\n rate(ceph_rbd_read_latency_sum{job=~\"$job\"}[$__rate_interval]) /\n clamp_min(rate(ceph_rbd_read_latency_count{job=~\"$job\"}[$__rate_interval]), 1)\n ) by (pool, image, namespace)\n)\n",
+ "format": "table",
+ "instant": true,
+ "intervalFactor": 1,
+ "legendFormat": "",
+ "refId": "A"
+ }
+ ],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Highest Latency",
+ "transform": "table",
+ "type": "table"
+ }
+ ],
+ "refresh": "30s",
+ "rows": [ ],
+ "schemaVersion": 16,
+ "style": "dark",
+ "tags": [
+ "ceph-mixin",
+ "overview"
+ ],
+ "templating": {
+ "list": [
+ {
+ "current": {
+ "text": "default",
+ "value": "default"
+ },
+ "hide": 0,
+ "label": "Data Source",
+ "name": "datasource",
+ "options": [ ],
+ "query": "prometheus",
+ "refresh": 1,
+ "regex": "",
+ "type": "datasource"
+ },
+ {
+ "allValue": ".+",
+ "current": { },
+ "datasource": "$datasource",
+ "hide": 2,
+ "includeAll": true,
+ "label": "cluster",
+ "multi": true,
+ "name": "cluster",
+ "options": [ ],
+ "query": "label_values(ceph_osd_metadata, cluster)",
+ "refresh": 1,
+ "regex": "(.*)",
+ "sort": 1,
+ "tagValuesQuery": "",
+ "tags": [ ],
+ "tagsQuery": "",
+ "type": "query",
+ "useTags": false
+ },
+ {
+ "allValue": ".+",
+ "current": { },
+ "datasource": "$datasource",
+ "hide": 0,
+ "includeAll": true,
+ "label": "job",
+ "multi": true,
+ "name": "job",
+ "options": [ ],
+ "query": "label_values(ceph_osd_metadata{}, job)",
+ "refresh": 1,
+ "regex": "(.*)",
+ "sort": 1,
+ "tagValuesQuery": "",
+ "tags": [ ],
+ "tagsQuery": "",
+ "type": "query",
+ "useTags": false
+ }
+ ]
+ },
+ "time": {
+ "from": "now-1h",
+ "to": "now"
+ },
+ "timepicker": {
+ "refresh_intervals": [
+ "5s",
+ "10s",
+ "30s",
+ "1m",
+ "5m",
+ "15m",
+ "30m",
+ "1h",
+ "2h",
+ "1d"
+ ],
+ "time_options": [
+ "5m",
+ "15m",
+ "1h",
+ "6h",
+ "12h",
+ "24h",
+ "2d",
+ "7d",
+ "30d"
+ ]
+ },
+ "timezone": "",
+ "title": "RBD Overview",
+ "uid": "41FrpeUiz",
+ "version": 0
+}
diff --git a/atmosphere/jsonnet/vendor/github.com/ceph/ceph/monitoring/ceph-mixin/jsonnet-bundler-build.sh b/atmosphere/jsonnet/vendor/github.com/ceph/ceph/monitoring/ceph-mixin/jsonnet-bundler-build.sh
new file mode 100755
index 0000000..d713cff
--- /dev/null
+++ b/atmosphere/jsonnet/vendor/github.com/ceph/ceph/monitoring/ceph-mixin/jsonnet-bundler-build.sh
@@ -0,0 +1,8 @@
+#!/bin/sh -ex
+
+JSONNET_VERSION="v0.4.0"
+OUTPUT_DIR=${1:-$(pwd)}
+
+git clone -b ${JSONNET_VERSION} --depth 1 https://github.com/jsonnet-bundler/jsonnet-bundler
+make -C jsonnet-bundler build
+mv jsonnet-bundler/_output/jb ${OUTPUT_DIR}
diff --git a/atmosphere/jsonnet/vendor/github.com/ceph/ceph/monitoring/ceph-mixin/jsonnetfile.json b/atmosphere/jsonnet/vendor/github.com/ceph/ceph/monitoring/ceph-mixin/jsonnetfile.json
new file mode 100644
index 0000000..93f3316
--- /dev/null
+++ b/atmosphere/jsonnet/vendor/github.com/ceph/ceph/monitoring/ceph-mixin/jsonnetfile.json
@@ -0,0 +1,15 @@
+{
+ "version": 1,
+ "dependencies": [
+ {
+ "source": {
+ "git": {
+ "remote": "https://github.com/grafana/grafonnet-lib.git",
+ "subdir": "grafonnet"
+ }
+ },
+ "version": "master"
+ }
+ ],
+ "legacyImports": true
+}
diff --git a/atmosphere/jsonnet/vendor/github.com/ceph/ceph/monitoring/ceph-mixin/jsonnetfile.lock.json b/atmosphere/jsonnet/vendor/github.com/ceph/ceph/monitoring/ceph-mixin/jsonnetfile.lock.json
new file mode 100644
index 0000000..3c9d38d
--- /dev/null
+++ b/atmosphere/jsonnet/vendor/github.com/ceph/ceph/monitoring/ceph-mixin/jsonnetfile.lock.json
@@ -0,0 +1,16 @@
+{
+ "version": 1,
+ "dependencies": [
+ {
+ "source": {
+ "git": {
+ "remote": "https://github.com/grafana/grafonnet-lib.git",
+ "subdir": "grafonnet"
+ }
+ },
+ "version": "30280196507e0fe6fa978a3e0eaca3a62844f817",
+ "sum": "342u++/7rViR/zj2jeJOjshzglkZ1SY+hFNuyCBFMdc="
+ }
+ ],
+ "legacyImports": false
+}
diff --git a/atmosphere/jsonnet/vendor/github.com/ceph/ceph/monitoring/ceph-mixin/lint-jsonnet.sh b/atmosphere/jsonnet/vendor/github.com/ceph/ceph/monitoring/ceph-mixin/lint-jsonnet.sh
new file mode 100755
index 0000000..6f77162
--- /dev/null
+++ b/atmosphere/jsonnet/vendor/github.com/ceph/ceph/monitoring/ceph-mixin/lint-jsonnet.sh
@@ -0,0 +1,5 @@
+#!/bin/sh -e
+
+JSONNETS_FILES=$(find . -name 'vendor' -prune -o \
+ -name '*.jsonnet' -print -o -name '*.libsonnet' -print)
+jsonnetfmt "$@" ${JSONNETS_FILES}
diff --git a/atmosphere/jsonnet/vendor/github.com/ceph/ceph/monitoring/ceph-mixin/mixin.libsonnet b/atmosphere/jsonnet/vendor/github.com/ceph/ceph/monitoring/ceph-mixin/mixin.libsonnet
new file mode 100644
index 0000000..3c983a3
--- /dev/null
+++ b/atmosphere/jsonnet/vendor/github.com/ceph/ceph/monitoring/ceph-mixin/mixin.libsonnet
@@ -0,0 +1,3 @@
+(import 'config.libsonnet') +
+(import 'dashboards.libsonnet') +
+(import 'alerts.libsonnet')
diff --git a/atmosphere/jsonnet/vendor/github.com/ceph/ceph/monitoring/ceph-mixin/prometheus_alerts.libsonnet b/atmosphere/jsonnet/vendor/github.com/ceph/ceph/monitoring/ceph-mixin/prometheus_alerts.libsonnet
new file mode 100644
index 0000000..bed89a8
--- /dev/null
+++ b/atmosphere/jsonnet/vendor/github.com/ceph/ceph/monitoring/ceph-mixin/prometheus_alerts.libsonnet
@@ -0,0 +1,718 @@
+{
+ _config:: error 'must provide _config',
+
+ MultiClusterQuery()::
+ if $._config.showMultiCluster
+ then 'cluster,'
+ else '',
+
+ MultiClusterSummary()::
+ if $._config.showMultiCluster
+ then ' on cluster {{ $labels.cluster }}'
+ else '',
+
+ groups+: [
+ {
+ name: 'cluster health',
+ rules: [
+ {
+ alert: 'CephHealthError',
+ 'for': '5m',
+ expr: 'ceph_health_status == 2',
+ labels: { severity: 'critical', type: 'ceph_default', oid: '1.3.6.1.4.1.50495.1.2.1.2.1' },
+ annotations: {
+ summary: 'Ceph is in the ERROR state%(cluster)s' % $.MultiClusterSummary(),
+ description: "The cluster state has been HEALTH_ERROR for more than 5 minutes%(cluster)s. Please check 'ceph health detail' for more information." % $.MultiClusterSummary(),
+ },
+ },
+ {
+ alert: 'CephHealthWarning',
+ 'for': '15m',
+ expr: 'ceph_health_status == 1',
+ labels: { severity: 'warning', type: 'ceph_default' },
+ annotations: {
+ summary: 'Ceph is in the WARNING state%(cluster)s' % $.MultiClusterSummary(),
+ description: "The cluster state has been HEALTH_WARN for more than 15 minutes%(cluster)s. Please check 'ceph health detail' for more information." % $.MultiClusterSummary(),
+ },
+ },
+ ],
+ },
+ {
+ name: 'mon',
+ rules: [
+ {
+ alert: 'CephMonDownQuorumAtRisk',
+ 'for': '30s',
+ expr: |||
+ (
+ (ceph_health_detail{name="MON_DOWN"} == 1) * on() (
+ count(ceph_mon_quorum_status == 1) == bool (floor(count(ceph_mon_metadata) / 2) + 1)
+ )
+ ) == 1
+ |||,
+ labels: { severity: 'critical', type: 'ceph_default', oid: '1.3.6.1.4.1.50495.1.2.1.3.1' },
+ annotations: {
+ documentation: 'https://docs.ceph.com/en/latest/rados/operations/health-checks#mon-down',
+ summary: 'Monitor quorum is at risk%(cluster)s' % $.MultiClusterSummary(),
+ description: '{{ $min := query "floor(count(ceph_mon_metadata) / 2) + 1" | first | value }}Quorum requires a majority of monitors (x {{ $min }}) to be active. Without quorum the cluster will become inoperable, affecting all services and connected clients. The following monitors are down: {{- range query "(ceph_mon_quorum_status == 0) + on(ceph_daemon) group_left(hostname) (ceph_mon_metadata * 0)" }} - {{ .Labels.ceph_daemon }} on {{ .Labels.hostname }} {{- end }}',
+ },
+ },
+ {
+ alert: 'CephMonDown',
+ 'for': '30s',
+ expr: |||
+ count(ceph_mon_quorum_status == 0) <= (count(ceph_mon_metadata) - floor(count(ceph_mon_metadata) / 2) + 1)
+ |||,
+ labels: { severity: 'warning', type: 'ceph_default' },
+ annotations: {
+ documentation: 'https://docs.ceph.com/en/latest/rados/operations/health-checks#mon-down',
+ summary: 'One or more monitors down%(cluster)s' % $.MultiClusterSummary(),
+ description: |||
+ {{ $down := query "count(ceph_mon_quorum_status == 0)" | first | value }}{{ $s := "" }}{{ if gt $down 1.0 }}{{ $s = "s" }}{{ end }}You have {{ $down }} monitor{{ $s }} down. Quorum is still intact, but the loss of an additional monitor will make your cluster inoperable. The following monitors are down: {{- range query "(ceph_mon_quorum_status == 0) + on(ceph_daemon) group_left(hostname) (ceph_mon_metadata * 0)" }} - {{ .Labels.ceph_daemon }} on {{ .Labels.hostname }} {{- end }}
+ |||,
+ },
+ },
+ {
+ alert: 'CephMonDiskspaceCritical',
+ 'for': '1m',
+ expr: 'ceph_health_detail{name="MON_DISK_CRIT"} == 1',
+ labels: { severity: 'critical', type: 'ceph_default', oid: '1.3.6.1.4.1.50495.1.2.1.3.2' },
+ annotations: {
+ documentation: 'https://docs.ceph.com/en/latest/rados/operations/health-checks#mon-disk-crit',
+ summary: 'Filesystem space on at least one monitor is critically low%(cluster)s' % $.MultiClusterSummary(),
+ description: "The free space available to a monitor's store is critically low. You should increase the space available to the monitor(s). The default directory is /var/lib/ceph/mon-*/data/store.db on traditional deployments, and /var/lib/rook/mon-*/data/store.db on the mon pod's worker node for Rook. Look for old, rotated versions of *.log and MANIFEST*. Do NOT touch any *.sst files. Also check any other directories under /var/lib/rook and other directories on the same filesystem, often /var/log and /var/tmp are culprits. Your monitor hosts are; {{- range query \"ceph_mon_metadata\"}} - {{ .Labels.hostname }} {{- end }}",
+ },
+ },
+ {
+ alert: 'CephMonDiskspaceLow',
+ 'for': '5m',
+ expr: 'ceph_health_detail{name="MON_DISK_LOW"} == 1',
+ labels: { severity: 'warning', type: 'ceph_default' },
+ annotations: {
+ documentation: 'https://docs.ceph.com/en/latest/rados/operations/health-checks#mon-disk-low',
+ summary: 'Drive space on at least one monitor is approaching full%(cluster)s' % $.MultiClusterSummary(),
+ description: "The space available to a monitor's store is approaching full (>70% is the default). You should increase the space available to the monitor(s). The default directory is /var/lib/ceph/mon-*/data/store.db on traditional deployments, and /var/lib/rook/mon-*/data/store.db on the mon pod's worker node for Rook. Look for old, rotated versions of *.log and MANIFEST*. Do NOT touch any *.sst files. Also check any other directories under /var/lib/rook and other directories on the same filesystem, often /var/log and /var/tmp are culprits. Your monitor hosts are; {{- range query \"ceph_mon_metadata\"}} - {{ .Labels.hostname }} {{- end }}",
+ },
+ },
+ {
+ alert: 'CephMonClockSkew',
+ 'for': '1m',
+ expr: 'ceph_health_detail{name="MON_CLOCK_SKEW"} == 1',
+ labels: { severity: 'warning', type: 'ceph_default' },
+ annotations: {
+ documentation: 'https://docs.ceph.com/en/latest/rados/operations/health-checks#mon-clock-skew',
+ summary: 'Clock skew detected among monitors%(cluster)s' % $.MultiClusterSummary(),
+ description: "Ceph monitors rely on closely synchronized time to maintain quorum and cluster consistency. This event indicates that the time on at least one mon has drifted too far from the lead mon. Review cluster status with ceph -s. This will show which monitors are affected. Check the time sync status on each monitor host with 'ceph time-sync-status' and the state and peers of your ntpd or chrony daemon.",
+ },
+ },
+ ],
+ },
+ {
+ name: 'osd',
+ rules: [
+ {
+ alert: 'CephOSDDownHigh',
+ expr: 'count(ceph_osd_up == 0) / count(ceph_osd_up) * 100 >= 10',
+ labels: { severity: 'critical', type: 'ceph_default', oid: '1.3.6.1.4.1.50495.1.2.1.4.1' },
+ annotations: {
+ summary: 'More than 10%% of OSDs are down%(cluster)s' % $.MultiClusterSummary(),
+ description: '{{ $value | humanize }}% or {{ with query "count(ceph_osd_up == 0)" }}{{ . | first | value }}{{ end }} of {{ with query "count(ceph_osd_up)" }}{{ . | first | value }}{{ end }} OSDs are down (>= 10%). The following OSDs are down: {{- range query "(ceph_osd_up * on(ceph_daemon) group_left(hostname) ceph_osd_metadata) == 0" }} - {{ .Labels.ceph_daemon }} on {{ .Labels.hostname }} {{- end }}',
+ },
+ },
+ {
+ alert: 'CephOSDHostDown',
+ 'for': '5m',
+ expr: 'ceph_health_detail{name="OSD_HOST_DOWN"} == 1',
+ labels: { severity: 'warning', type: 'ceph_default', oid: '1.3.6.1.4.1.50495.1.2.1.4.8' },
+ annotations: {
+ summary: 'An OSD host is offline%(cluster)s' % $.MultiClusterSummary(),
+ description: 'The following OSDs are down: {{- range query "(ceph_osd_up * on(ceph_daemon) group_left(hostname) ceph_osd_metadata) == 0" }} - {{ .Labels.hostname }} : {{ .Labels.ceph_daemon }} {{- end }}',
+ },
+ },
+ {
+ alert: 'CephOSDDown',
+ 'for': '5m',
+ expr: 'ceph_health_detail{name="OSD_DOWN"} == 1',
+ labels: { severity: 'warning', type: 'ceph_default', oid: '1.3.6.1.4.1.50495.1.2.1.4.2' },
+ annotations: {
+ documentation: 'https://docs.ceph.com/en/latest/rados/operations/health-checks#osd-down',
+ summary: 'An OSD has been marked down%(cluster)s' % $.MultiClusterSummary(),
+ description: |||
+ {{ $num := query "count(ceph_osd_up == 0)" | first | value }}{{ $s := "" }}{{ if gt $num 1.0 }}{{ $s = "s" }}{{ end }}{{ $num }} OSD{{ $s }} down for over 5mins. The following OSD{{ $s }} {{ if eq $s "" }}is{{ else }}are{{ end }} down: {{- range query "(ceph_osd_up * on(ceph_daemon) group_left(hostname) ceph_osd_metadata) == 0"}} - {{ .Labels.ceph_daemon }} on {{ .Labels.hostname }} {{- end }}
+ |||,
+ },
+ },
+ {
+ alert: 'CephOSDNearFull',
+ 'for': '5m',
+ expr: 'ceph_health_detail{name="OSD_NEARFULL"} == 1',
+ labels: { severity: 'warning', type: 'ceph_default', oid: '1.3.6.1.4.1.50495.1.2.1.4.3' },
+ annotations: {
+ documentation: 'https://docs.ceph.com/en/latest/rados/operations/health-checks#osd-nearfull',
+ summary: 'OSD(s) running low on free space (NEARFULL)%(cluster)s' % $.MultiClusterSummary(),
+ description: "One or more OSDs have reached the NEARFULL threshold. Use 'ceph health detail' and 'ceph osd df' to identify the problem. To resolve, add capacity to the affected OSD's failure domain, restore down/out OSDs, or delete unwanted data.",
+ },
+ },
+ {
+ alert: 'CephOSDFull',
+ 'for': '1m',
+ expr: 'ceph_health_detail{name="OSD_FULL"} > 0',
+ labels: { severity: 'critical', type: 'ceph_default', oid: '1.3.6.1.4.1.50495.1.2.1.4.6' },
+ annotations: {
+ documentation: 'https://docs.ceph.com/en/latest/rados/operations/health-checks#osd-full',
+ summary: 'OSD full, writes blocked%(cluster)s' % $.MultiClusterSummary(),
+ description: "An OSD has reached the FULL threshold. Writes to pools that share the affected OSD will be blocked. Use 'ceph health detail' and 'ceph osd df' to identify the problem. To resolve, add capacity to the affected OSD's failure domain, restore down/out OSDs, or delete unwanted data.",
+ },
+ },
+ {
+ alert: 'CephOSDBackfillFull',
+ 'for': '1m',
+ expr: 'ceph_health_detail{name="OSD_BACKFILLFULL"} > 0',
+ labels: { severity: 'warning', type: 'ceph_default' },
+ annotations: {
+ documentation: 'https://docs.ceph.com/en/latest/rados/operations/health-checks#osd-backfillfull',
+ summary: 'OSD(s) too full for backfill operations%(cluster)s' % $.MultiClusterSummary(),
+ description: "An OSD has reached the BACKFILL FULL threshold. This will prevent rebalance operations from completing. Use 'ceph health detail' and 'ceph osd df' to identify the problem. To resolve, add capacity to the affected OSD's failure domain, restore down/out OSDs, or delete unwanted data.",
+ },
+ },
+ {
+ alert: 'CephOSDTooManyRepairs',
+ 'for': '30s',
+ expr: 'ceph_health_detail{name="OSD_TOO_MANY_REPAIRS"} == 1',
+ labels: { severity: 'warning', type: 'ceph_default' },
+ annotations: {
+ documentation: 'https://docs.ceph.com/en/latest/rados/operations/health-checks#osd-too-many-repairs',
+ summary: 'OSD reports a high number of read errors%(cluster)s' % $.MultiClusterSummary(),
+ description: 'Reads from an OSD have used a secondary PG to return data to the client, indicating a potential failing drive.',
+ },
+ },
+ {
+ alert: 'CephOSDTimeoutsPublicNetwork',
+ 'for': '1m',
+ expr: 'ceph_health_detail{name="OSD_SLOW_PING_TIME_FRONT"} == 1',
+ labels: { severity: 'warning', type: 'ceph_default' },
+ annotations: {
+ summary: 'Network issues delaying OSD heartbeats (public network)%(cluster)s' % $.MultiClusterSummary(),
+ description: "OSD heartbeats on the cluster's 'public' network (frontend) are running slow. Investigate the network for latency or loss issues. Use 'ceph health detail' to show the affected OSDs.",
+ },
+ },
+ {
+ alert: 'CephOSDTimeoutsClusterNetwork',
+ 'for': '1m',
+ expr: 'ceph_health_detail{name="OSD_SLOW_PING_TIME_BACK"} == 1',
+ labels: { severity: 'warning', type: 'ceph_default' },
+ annotations: {
+ summary: 'Network issues delaying OSD heartbeats (cluster network)%(cluster)s' % $.MultiClusterSummary(),
+ description: "OSD heartbeats on the cluster's 'cluster' network (backend) are slow. Investigate the network for latency issues on this subnet. Use 'ceph health detail' to show the affected OSDs.",
+ },
+ },
+ {
+ alert: 'CephOSDInternalDiskSizeMismatch',
+ 'for': '1m',
+ expr: 'ceph_health_detail{name="BLUESTORE_DISK_SIZE_MISMATCH"} == 1',
+ labels: { severity: 'warning', type: 'ceph_default' },
+ annotations: {
+ documentation: 'https://docs.ceph.com/en/latest/rados/operations/health-checks#bluestore-disk-size-mismatch',
+ summary: 'OSD size inconsistency error%(cluster)s' % $.MultiClusterSummary(),
+ description: 'One or more OSDs have an internal inconsistency between metadata and the size of the device. This could lead to the OSD(s) crashing in future. You should redeploy the affected OSDs.',
+ },
+ },
+ {
+ alert: 'CephDeviceFailurePredicted',
+ 'for': '1m',
+ expr: 'ceph_health_detail{name="DEVICE_HEALTH"} == 1',
+ labels: { severity: 'warning', type: 'ceph_default' },
+ annotations: {
+ documentation: 'https://docs.ceph.com/en/latest/rados/operations/health-checks#id2',
+ summary: 'Device(s) predicted to fail soon%(cluster)s' % $.MultiClusterSummary(),
+ description: "The device health module has determined that one or more devices will fail soon. To review device status use 'ceph device ls'. To show a specific device use 'ceph device info <dev id>'. Mark the OSD out so that data may migrate to other OSDs. Once the OSD has drained, destroy the OSD, replace the device, and redeploy the OSD.",
+ },
+ },
+ {
+ alert: 'CephDeviceFailurePredictionTooHigh',
+ 'for': '1m',
+ expr: 'ceph_health_detail{name="DEVICE_HEALTH_TOOMANY"} == 1',
+ labels: { severity: 'critical', type: 'ceph_default', oid: '1.3.6.1.4.1.50495.1.2.1.4.7' },
+ annotations: {
+ documentation: 'https://docs.ceph.com/en/latest/rados/operations/health-checks#device-health-toomany',
+ summary: 'Too many devices are predicted to fail, unable to resolve%(cluster)s' % $.MultiClusterSummary(),
+ description: 'The device health module has determined that devices predicted to fail can not be remediated automatically, since too many OSDs would be removed from the cluster to ensure performance and availabililty. Prevent data integrity issues by adding new OSDs so that data may be relocated.',
+ },
+ },
+ {
+ alert: 'CephDeviceFailureRelocationIncomplete',
+ 'for': '1m',
+ expr: 'ceph_health_detail{name="DEVICE_HEALTH_IN_USE"} == 1',
+ labels: { severity: 'warning', type: 'ceph_default' },
+ annotations: {
+ documentation: 'https://docs.ceph.com/en/latest/rados/operations/health-checks#device-health-in-use',
+ summary: 'Device failure is predicted, but unable to relocate data%(cluster)s' % $.MultiClusterSummary(),
+ description: 'The device health module has determined that one or more devices will fail soon, but the normal process of relocating the data on the device to other OSDs in the cluster is blocked. \nEnsure that the cluster has available free space. It may be necessary to add capacity to the cluster to allow data from the failing device to successfully migrate, or to enable the balancer.',
+ },
+ },
+ {
+ alert: 'CephOSDFlapping',
+ expr: '(rate(ceph_osd_up[5m]) * on(%(cluster)sceph_daemon) group_left(hostname) ceph_osd_metadata) * 60 > 1' % $.MultiClusterQuery(),
+ labels: { severity: 'warning', type: 'ceph_default', oid: '1.3.6.1.4.1.50495.1.2.1.4.4' },
+ annotations: {
+ documentation: 'https://docs.ceph.com/en/latest/rados/troubleshooting/troubleshooting-osd#flapping-osds',
+ summary: 'Network issues are causing OSDs to flap (mark each other down)%(cluster)s' % $.MultiClusterSummary(),
+ description: 'OSD {{ $labels.ceph_daemon }} on {{ $labels.hostname }} was marked down and back up {{ $value | humanize }} times once a minute for 5 minutes. This may indicate a network issue (latency, packet loss, MTU mismatch) on the cluster network, or the public network if no cluster network is deployed. Check the network stats on the listed host(s).',
+ },
+ },
+ {
+ alert: 'CephOSDReadErrors',
+ 'for': '30s',
+ expr: 'ceph_health_detail{name="BLUESTORE_SPURIOUS_READ_ERRORS"} == 1',
+ labels: { severity: 'warning', type: 'ceph_default' },
+ annotations: {
+ documentation: 'https://docs.ceph.com/en/latest/rados/operations/health-checks#bluestore-spurious-read-errors',
+ summary: 'Device read errors detected%(cluster)s' % $.MultiClusterSummary(),
+ description: 'An OSD has encountered read errors, but the OSD has recovered by retrying the reads. This may indicate an issue with hardware or the kernel.',
+ },
+ },
+ {
+ alert: 'CephPGImbalance',
+ 'for': '5m',
+ expr: |||
+ abs(
+ ((ceph_osd_numpg > 0) - on (%(cluster)sjob) group_left avg(ceph_osd_numpg > 0) by (%(cluster)sjob)) /
+ on (job) group_left avg(ceph_osd_numpg > 0) by (job)
+ ) * on (%(cluster)sceph_daemon) group_left(hostname) ceph_osd_metadata > 0.30
+ ||| % [$.MultiClusterQuery(), $.MultiClusterQuery(), $.MultiClusterQuery()],
+ labels: { severity: 'warning', type: 'ceph_default', oid: '1.3.6.1.4.1.50495.1.2.1.4.5' },
+ annotations: {
+ summary: 'PGs are not balanced across OSDs%(cluster)s' % $.MultiClusterSummary(),
+ description: 'OSD {{ $labels.ceph_daemon }} on {{ $labels.hostname }} deviates by more than 30% from average PG count.',
+ },
+ },
+ ],
+ },
+ {
+ name: 'mds',
+ rules: [
+ {
+ alert: 'CephFilesystemDamaged',
+ 'for': '1m',
+ expr: 'ceph_health_detail{name="MDS_DAMAGE"} > 0',
+ labels: { severity: 'critical', type: 'ceph_default', oid: '1.3.6.1.4.1.50495.1.2.1.5.1' },
+ annotations: {
+ documentation: 'https://docs.ceph.com/en/latest/cephfs/health-messages#cephfs-health-messages',
+ summary: 'CephFS filesystem is damaged%(cluster)s.' % $.MultiClusterSummary(),
+ description: 'Filesystem metadata has been corrupted. Data may be inaccessible. Analyze metrics from the MDS daemon admin socket, or escalate to support.',
+ },
+ },
+ {
+ alert: 'CephFilesystemOffline',
+ 'for': '1m',
+ expr: 'ceph_health_detail{name="MDS_ALL_DOWN"} > 0',
+ labels: { severity: 'critical', type: 'ceph_default', oid: '1.3.6.1.4.1.50495.1.2.1.5.3' },
+ annotations: {
+ documentation: 'https://docs.ceph.com/en/latest/cephfs/health-messages/#mds-all-down',
+ summary: 'CephFS filesystem is offline%(cluster)s' % $.MultiClusterSummary(),
+ description: 'All MDS ranks are unavailable. The MDS daemons managing metadata are down, rendering the filesystem offline.',
+ },
+ },
+ {
+ alert: 'CephFilesystemDegraded',
+ 'for': '1m',
+ expr: 'ceph_health_detail{name="FS_DEGRADED"} > 0',
+ labels: { severity: 'critical', type: 'ceph_default', oid: '1.3.6.1.4.1.50495.1.2.1.5.4' },
+ annotations: {
+ documentation: 'https://docs.ceph.com/en/latest/cephfs/health-messages/#fs-degraded',
+ summary: 'CephFS filesystem is degraded%(cluster)s' % $.MultiClusterSummary(),
+ description: 'One or more metadata daemons (MDS ranks) are failed or in a damaged state. At best the filesystem is partially available, at worst the filesystem is completely unusable.',
+ },
+ },
+ {
+ alert: 'CephFilesystemMDSRanksLow',
+ 'for': '1m',
+ expr: 'ceph_health_detail{name="MDS_UP_LESS_THAN_MAX"} > 0',
+ labels: { severity: 'warning', type: 'ceph_default' },
+ annotations: {
+ documentation: 'https://docs.ceph.com/en/latest/cephfs/health-messages/#mds-up-less-than-max',
+ summary: 'Ceph MDS daemon count is lower than configured%(cluster)s' % $.MultiClusterSummary(),
+ description: "The filesystem's 'max_mds' setting defines the number of MDS ranks in the filesystem. The current number of active MDS daemons is less than this value.",
+ },
+ },
+ {
+ alert: 'CephFilesystemInsufficientStandby',
+ 'for': '1m',
+ expr: 'ceph_health_detail{name="MDS_INSUFFICIENT_STANDBY"} > 0',
+ labels: { severity: 'warning', type: 'ceph_default' },
+ annotations: {
+ documentation: 'https://docs.ceph.com/en/latest/cephfs/health-messages/#mds-insufficient-standby',
+ summary: 'Ceph filesystem standby daemons too few%(cluster)s' % $.MultiClusterSummary(),
+ description: 'The minimum number of standby daemons required by standby_count_wanted is less than the current number of standby daemons. Adjust the standby count or increase the number of MDS daemons.',
+ },
+ },
+ {
+ alert: 'CephFilesystemFailureNoStandby',
+ 'for': '1m',
+ expr: 'ceph_health_detail{name="FS_WITH_FAILED_MDS"} > 0',
+ labels: { severity: 'critical', type: 'ceph_default', oid: '1.3.6.1.4.1.50495.1.2.1.5.5' },
+ annotations: {
+ documentation: 'https://docs.ceph.com/en/latest/cephfs/health-messages/#fs-with-failed-mds',
+ summary: 'MDS daemon failed, no further standby available%(cluster)s' % $.MultiClusterSummary(),
+ description: 'An MDS daemon has failed, leaving only one active rank and no available standby. Investigate the cause of the failure or add a standby MDS.',
+ },
+ },
+ {
+ alert: 'CephFilesystemReadOnly',
+ 'for': '1m',
+ expr: 'ceph_health_detail{name="MDS_HEALTH_READ_ONLY"} > 0',
+ labels: { severity: 'critical', type: 'ceph_default', oid: '1.3.6.1.4.1.50495.1.2.1.5.2' },
+ annotations: {
+ documentation: 'https://docs.ceph.com/en/latest/cephfs/health-messages#cephfs-health-messages',
+ summary: 'CephFS filesystem in read only mode due to write error(s)%(cluster)s' % $.MultiClusterSummary(),
+ description: 'The filesystem has switched to READ ONLY due to an unexpected error when writing to the metadata pool. Either analyze the output from the MDS daemon admin socket, or escalate to support.',
+ },
+ },
+ ],
+ },
+ {
+ name: 'mgr',
+ rules: [
+ {
+ alert: 'CephMgrModuleCrash',
+ 'for': '5m',
+ expr: 'ceph_health_detail{name="RECENT_MGR_MODULE_CRASH"} == 1',
+ labels: { severity: 'critical', type: 'ceph_default', oid: '1.3.6.1.4.1.50495.1.2.1.6.1' },
+ annotations: {
+ documentation: 'https://docs.ceph.com/en/latest/rados/operations/health-checks#recent-mgr-module-crash',
+ summary: 'A manager module has recently crashed%(cluster)s' % $.MultiClusterSummary(),
+ description: "One or more mgr modules have crashed and have yet to be acknowledged by an administrator. A crashed module may impact functionality within the cluster. Use the 'ceph crash' command to determine which module has failed, and archive it to acknowledge the failure.",
+ },
+ },
+ {
+ alert: 'CephMgrPrometheusModuleInactive',
+ 'for': '1m',
+ expr: 'up{job="ceph"} == 0',
+ labels: { severity: 'critical', type: 'ceph_default', oid: '1.3.6.1.4.1.50495.1.2.1.6.2' },
+ annotations: {
+ summary: 'The mgr/prometheus module is not available%(cluster)s' % $.MultiClusterSummary(),
+ description: "The mgr/prometheus module at {{ $labels.instance }} is unreachable. This could mean that the module has been disabled or the mgr daemon itself is down. Without the mgr/prometheus module metrics and alerts will no longer function. Open a shell to an admin node or toolbox pod and use 'ceph -s' to to determine whether the mgr is active. If the mgr is not active, restart it, otherwise you can determine module status with 'ceph mgr module ls'. If it is not listed as enabled, enable it with 'ceph mgr module enable prometheus'.",
+ },
+ },
+ ],
+ },
+ {
+ name: 'pgs',
+ rules: [
+ {
+ alert: 'CephPGsInactive',
+ 'for': '5m',
+ expr: 'ceph_pool_metadata * on(%(cluster)spool_id,instance) group_left() (ceph_pg_total - ceph_pg_active) > 0' % $.MultiClusterQuery(),
+ labels: { severity: 'critical', type: 'ceph_default', oid: '1.3.6.1.4.1.50495.1.2.1.7.1' },
+ annotations: {
+ summary: 'One or more placement groups are inactive%(cluster)s' % $.MultiClusterSummary(),
+ description: '{{ $value }} PGs have been inactive for more than 5 minutes in pool {{ $labels.name }}. Inactive placement groups are not able to serve read/write requests.',
+ },
+ },
+ {
+ alert: 'CephPGsUnclean',
+ 'for': '15m',
+ expr: 'ceph_pool_metadata * on(%(cluster)spool_id,instance) group_left() (ceph_pg_total - ceph_pg_clean) > 0' % $.MultiClusterQuery(),
+ labels: { severity: 'warning', type: 'ceph_default', oid: '1.3.6.1.4.1.50495.1.2.1.7.2' },
+ annotations: {
+ summary: 'One or more placement groups are marked unclean%(cluster)s' % $.MultiClusterSummary(),
+ description: '{{ $value }} PGs have been unclean for more than 15 minutes in pool {{ $labels.name }}. Unclean PGs have not recovered from a previous failure.',
+ },
+ },
+ {
+ alert: 'CephPGsDamaged',
+ 'for': '5m',
+ expr: 'ceph_health_detail{name=~"PG_DAMAGED|OSD_SCRUB_ERRORS"} == 1',
+ labels: { severity: 'critical', type: 'ceph_default', oid: '1.3.6.1.4.1.50495.1.2.1.7.4' },
+ annotations: {
+ documentation: 'https://docs.ceph.com/en/latest/rados/operations/health-checks#pg-damaged',
+ summary: 'Placement group damaged, manual intervention needed%(cluster)s' % $.MultiClusterSummary(),
+ description: "During data consistency checks (scrub), at least one PG has been flagged as being damaged or inconsistent. Check to see which PG is affected, and attempt a manual repair if necessary. To list problematic placement groups, use 'rados list-inconsistent-pg <pool>'. To repair PGs use the 'ceph pg repair <pg_num>' command.",
+ },
+ },
+ {
+ alert: 'CephPGRecoveryAtRisk',
+ 'for': '1m',
+ expr: 'ceph_health_detail{name="PG_RECOVERY_FULL"} == 1',
+ labels: { severity: 'critical', type: 'ceph_default', oid: '1.3.6.1.4.1.50495.1.2.1.7.5' },
+ annotations: {
+ documentation: 'https://docs.ceph.com/en/latest/rados/operations/health-checks#pg-recovery-full',
+ summary: 'OSDs are too full for recovery%(cluster)s' % $.MultiClusterSummary(),
+ description: "Data redundancy is at risk since one or more OSDs are at or above the 'full' threshold. Add more capacity to the cluster, restore down/out OSDs, or delete unwanted data.",
+ },
+ },
+ {
+ alert: 'CephPGUnavilableBlockingIO',
+ 'for': '1m',
+ expr: '((ceph_health_detail{name="PG_AVAILABILITY"} == 1) - scalar(ceph_health_detail{name="OSD_DOWN"})) == 1',
+ labels: { severity: 'critical', type: 'ceph_default', oid: '1.3.6.1.4.1.50495.1.2.1.7.3' },
+ annotations: {
+ documentation: 'https://docs.ceph.com/en/latest/rados/operations/health-checks#pg-availability',
+ summary: 'PG is unavailable%(cluster)s, blocking I/O' % $.MultiClusterSummary(),
+ description: "Data availability is reduced, impacting the cluster's ability to service I/O. One or more placement groups (PGs) are in a state that blocks I/O.",
+ },
+ },
+ {
+ alert: 'CephPGBackfillAtRisk',
+ 'for': '1m',
+ expr: 'ceph_health_detail{name="PG_BACKFILL_FULL"} == 1',
+ labels: { severity: 'critical', type: 'ceph_default', oid: '1.3.6.1.4.1.50495.1.2.1.7.6' },
+ annotations: {
+ documentation: 'https://docs.ceph.com/en/latest/rados/operations/health-checks#pg-backfill-full',
+ summary: 'Backfill operations are blocked due to lack of free space%(cluster)s' % $.MultiClusterSummary(),
+ description: "Data redundancy may be at risk due to lack of free space within the cluster. One or more OSDs have reached the 'backfillfull' threshold. Add more capacity, or delete unwanted data.",
+ },
+ },
+ {
+ alert: 'CephPGNotScrubbed',
+ 'for': '5m',
+ expr: 'ceph_health_detail{name="PG_NOT_SCRUBBED"} == 1',
+ labels: { severity: 'warning', type: 'ceph_default' },
+ annotations: {
+ documentation: 'https://docs.ceph.com/en/latest/rados/operations/health-checks#pg-not-scrubbed',
+ summary: 'Placement group(s) have not been scrubbed%(cluster)s' % $.MultiClusterSummary(),
+ description: "One or more PGs have not been scrubbed recently. Scrubs check metadata integrity, protecting against bit-rot. They check that metadata is consistent across data replicas. When PGs miss their scrub interval, it may indicate that the scrub window is too small, or PGs were not in a 'clean' state during the scrub window. You can manually initiate a scrub with: ceph pg scrub <pgid>",
+ },
+ },
+ {
+ alert: 'CephPGsHighPerOSD',
+ 'for': '1m',
+ expr: 'ceph_health_detail{name="TOO_MANY_PGS"} == 1',
+ labels: { severity: 'warning', type: 'ceph_default' },
+ annotations: {
+ documentation: 'https://docs.ceph.com/en/latest/rados/operations/health-checks/#too-many-pgs',
+ summary: 'Placement groups per OSD is too high%(cluster)s' % $.MultiClusterSummary(),
+ description: "The number of placement groups per OSD is too high (exceeds the mon_max_pg_per_osd setting).\n Check that the pg_autoscaler has not been disabled for any pools with 'ceph osd pool autoscale-status', and that the profile selected is appropriate. You may also adjust the target_size_ratio of a pool to guide the autoscaler based on the expected relative size of the pool ('ceph osd pool set cephfs.cephfs.meta target_size_ratio .1') or set the pg_autoscaler mode to 'warn' and adjust pg_num appropriately for one or more pools.",
+ },
+ },
+ {
+ alert: 'CephPGNotDeepScrubbed',
+ 'for': '5m',
+ expr: 'ceph_health_detail{name="PG_NOT_DEEP_SCRUBBED"} == 1',
+ labels: { severity: 'warning', type: 'ceph_default' },
+ annotations: {
+ documentation: 'https://docs.ceph.com/en/latest/rados/operations/health-checks#pg-not-deep-scrubbed',
+ summary: 'Placement group(s) have not been deep scrubbed%(cluster)s' % $.MultiClusterSummary(),
+ description: "One or more PGs have not been deep scrubbed recently. Deep scrubs protect against bit-rot. They compare data replicas to ensure consistency. When PGs miss their deep scrub interval, it may indicate that the window is too small or PGs were not in a 'clean' state during the deep-scrub window.",
+ },
+ },
+ ],
+ },
+ {
+ name: 'nodes',
+ rules: [
+ {
+ alert: 'CephNodeRootFilesystemFull',
+ 'for': '5m',
+ expr: 'node_filesystem_avail_bytes{mountpoint="/"} / node_filesystem_size_bytes{mountpoint="/"} * 100 < 5',
+ labels: { severity: 'critical', type: 'ceph_default', oid: '1.3.6.1.4.1.50495.1.2.1.8.1' },
+ annotations: {
+ summary: 'Root filesystem is dangerously full%(cluster)s' % $.MultiClusterSummary(),
+ description: 'Root volume is dangerously full: {{ $value | humanize }}% free.',
+ },
+ },
+ {
+ alert: 'CephNodeNetworkPacketDrops',
+ expr: |||
+ (
+ rate(node_network_receive_drop_total{device!="lo"}[1m]) +
+ rate(node_network_transmit_drop_total{device!="lo"}[1m])
+ ) / (
+ rate(node_network_receive_packets_total{device!="lo"}[1m]) +
+ rate(node_network_transmit_packets_total{device!="lo"}[1m])
+ ) >= %(CephNodeNetworkPacketDropsThreshold)s and (
+ rate(node_network_receive_drop_total{device!="lo"}[1m]) +
+ rate(node_network_transmit_drop_total{device!="lo"}[1m])
+ ) >= %(CephNodeNetworkPacketDropsPerSec)s
+ ||| % $._config,
+ labels: { severity: 'warning', type: 'ceph_default', oid: '1.3.6.1.4.1.50495.1.2.1.8.2' },
+ annotations: {
+ summary: 'One or more NICs reports packet drops%(cluster)s' % $.MultiClusterSummary(),
+ description: 'Node {{ $labels.instance }} experiences packet drop > %(CephNodeNetworkPacketDropsThreshold)s%% or > %(CephNodeNetworkPacketDropsPerSec)s packets/s on interface {{ $labels.device }}.' % { CephNodeNetworkPacketDropsThreshold: $._config.CephNodeNetworkPacketDropsThreshold * 100, CephNodeNetworkPacketDropsPerSec: $._config.CephNodeNetworkPacketDropsPerSec },
+ },
+ },
+ {
+ alert: 'CephNodeNetworkPacketErrors',
+ expr: |||
+ (
+ rate(node_network_receive_errs_total{device!="lo"}[1m]) +
+ rate(node_network_transmit_errs_total{device!="lo"}[1m])
+ ) / (
+ rate(node_network_receive_packets_total{device!="lo"}[1m]) +
+ rate(node_network_transmit_packets_total{device!="lo"}[1m])
+ ) >= 0.0001 or (
+ rate(node_network_receive_errs_total{device!="lo"}[1m]) +
+ rate(node_network_transmit_errs_total{device!="lo"}[1m])
+ ) >= 10
+ |||,
+ labels: { severity: 'warning', type: 'ceph_default', oid: '1.3.6.1.4.1.50495.1.2.1.8.3' },
+ annotations: {
+ summary: 'One or more NICs reports packet errors%(cluster)s' % $.MultiClusterSummary(),
+ description: 'Node {{ $labels.instance }} experiences packet errors > 0.01% or > 10 packets/s on interface {{ $labels.device }}.',
+ },
+ },
+ {
+ alert: 'CephNodeDiskspaceWarning',
+ expr: 'predict_linear(node_filesystem_free_bytes{device=~"/.*"}[2d], 3600 * 24 * 5) *on(instance) group_left(nodename) node_uname_info < 0',
+ labels: { severity: 'warning', type: 'ceph_default', oid: '1.3.6.1.4.1.50495.1.2.1.8.4' },
+ annotations: {
+ summary: 'Host filesystem free space is getting low%(cluster)s' % $.MultiClusterSummary(),
+ description: 'Mountpoint {{ $labels.mountpoint }} on {{ $labels.nodename }} will be full in less than 5 days based on the 48 hour trailing fill rate.',
+ },
+ },
+ {
+ alert: 'CephNodeInconsistentMTU',
+ expr: 'node_network_mtu_bytes * (node_network_up{device!="lo"} > 0) == scalar( max by (device) (node_network_mtu_bytes * (node_network_up{device!="lo"} > 0)) != quantile by (device) (.5, node_network_mtu_bytes * (node_network_up{device!="lo"} > 0)) )or node_network_mtu_bytes * (node_network_up{device!="lo"} > 0) == scalar( min by (device) (node_network_mtu_bytes * (node_network_up{device!="lo"} > 0)) != quantile by (device) (.5, node_network_mtu_bytes * (node_network_up{device!="lo"} > 0)) )',
+ labels: { severity: 'warning', type: 'ceph_default' },
+ annotations: {
+ summary: 'MTU settings across Ceph hosts are inconsistent%(cluster)s' % $.MultiClusterSummary(),
+ description: 'Node {{ $labels.instance }} has a different MTU size ({{ $value }}) than the median of devices named {{ $labels.device }}.',
+ },
+ },
+ ],
+ },
+ {
+ name: 'pools',
+ rules: [
+ {
+ alert: 'CephPoolGrowthWarning',
+ expr: '(predict_linear(ceph_pool_percent_used[2d], 3600 * 24 * 5) * on(%(cluster)spool_id) group_right ceph_pool_metadata) >= 95' % $.MultiClusterQuery(),
+ labels: { severity: 'warning', type: 'ceph_default', oid: '1.3.6.1.4.1.50495.1.2.1.9.2' },
+ annotations: {
+ summary: 'Pool growth rate may soon exceed capacity%(cluster)s' % $.MultiClusterSummary(),
+ description: "Pool '{{ $labels.name }}' will be full in less than 5 days assuming the average fill-up rate of the past 48 hours.",
+ },
+ },
+ {
+ alert: 'CephPoolBackfillFull',
+ expr: 'ceph_health_detail{name="POOL_BACKFILLFULL"} > 0',
+ labels: { severity: 'warning', type: 'ceph_default' },
+ annotations: {
+ summary: 'Free space in a pool is too low for recovery/backfill%(cluster)s' % $.MultiClusterSummary(),
+ description: 'A pool is approaching the near full threshold, which will prevent recovery/backfill operations from completing. Consider adding more capacity.',
+ },
+ },
+ {
+ alert: 'CephPoolFull',
+ 'for': '1m',
+ expr: 'ceph_health_detail{name="POOL_FULL"} > 0',
+ labels: { severity: 'critical', type: 'ceph_default', oid: '1.3.6.1.4.1.50495.1.2.1.9.1' },
+ annotations: {
+ documentation: 'https://docs.ceph.com/en/latest/rados/operations/health-checks#pool-full',
+ summary: 'Pool is full - writes are blocked%(cluster)s' % $.MultiClusterSummary(),
+ description: "A pool has reached its MAX quota, or OSDs supporting the pool have reached the FULL threshold. Until this is resolved, writes to the pool will be blocked. Pool Breakdown (top 5) {{- range query \"topk(5, sort_desc(ceph_pool_percent_used * on(pool_id) group_right ceph_pool_metadata))\" }} - {{ .Labels.name }} at {{ .Value }}% {{- end }} Increase the pool's quota, or add capacity to the cluster first then increase the pool's quota (e.g. ceph osd pool set quota <pool_name> max_bytes <bytes>)",
+ },
+ },
+ {
+ alert: 'CephPoolNearFull',
+ 'for': '5m',
+ expr: 'ceph_health_detail{name="POOL_NEAR_FULL"} > 0',
+ labels: { severity: 'warning', type: 'ceph_default' },
+ annotations: {
+ summary: 'One or more Ceph pools are nearly full%(cluster)s' % $.MultiClusterSummary(),
+ description: "A pool has exceeded the warning (percent full) threshold, or OSDs supporting the pool have reached the NEARFULL threshold. Writes may continue, but you are at risk of the pool going read-only if more capacity isn't made available. Determine the affected pool with 'ceph df detail', looking at QUOTA BYTES and STORED. Increase the pool's quota, or add capacity to the cluster first then increase the pool's quota (e.g. ceph osd pool set quota <pool_name> max_bytes <bytes>). Also ensure that the balancer is active.",
+ },
+ },
+ ],
+ },
+ {
+ name: 'healthchecks',
+ rules: [
+ {
+ alert: 'CephSlowOps',
+ 'for': '30s',
+ expr: 'ceph_healthcheck_slow_ops > 0',
+ labels: { severity: 'warning', type: 'ceph_default' },
+ annotations: {
+ documentation: 'https://docs.ceph.com/en/latest/rados/operations/health-checks#slow-ops',
+ summary: 'OSD operations are slow to complete%(cluster)s' % $.MultiClusterSummary(),
+ description: '{{ $value }} OSD requests are taking too long to process (osd_op_complaint_time exceeded)',
+ },
+ },
+ ],
+ },
+ {
+ name: 'cephadm',
+ rules: [
+ {
+ alert: 'CephadmUpgradeFailed',
+ 'for': '30s',
+ expr: 'ceph_health_detail{name="UPGRADE_EXCEPTION"} > 0',
+ labels: { severity: 'critical', type: 'ceph_default', oid: '1.3.6.1.4.1.50495.1.2.1.11.2' },
+ annotations: {
+ summary: 'Ceph version upgrade has failed%(cluster)s' % $.MultiClusterSummary(),
+ description: 'The cephadm cluster upgrade process has failed. The cluster remains in an undetermined state. Please review the cephadm logs, to understand the nature of the issue',
+ },
+ },
+ {
+ alert: 'CephadmDaemonFailed',
+ 'for': '30s',
+ expr: 'ceph_health_detail{name="CEPHADM_FAILED_DAEMON"} > 0',
+ labels: { severity: 'critical', type: 'ceph_default', oid: '1.3.6.1.4.1.50495.1.2.1.11.1' },
+ annotations: {
+ summary: 'A ceph daemon manged by cephadm is down%(cluster)s' % $.MultiClusterSummary(),
+ description: "A daemon managed by cephadm is no longer active. Determine, which daemon is down with 'ceph health detail'. you may start daemons with the 'ceph orch daemon start <daemon_id>'",
+ },
+ },
+ {
+ alert: 'CephadmPaused',
+ 'for': '1m',
+ expr: 'ceph_health_detail{name="CEPHADM_PAUSED"} > 0',
+ labels: { severity: 'warning', type: 'ceph_default' },
+ annotations: {
+ documentation: 'https://docs.ceph.com/en/latest/cephadm/operations#cephadm-paused',
+ summary: 'Orchestration tasks via cephadm are PAUSED%(cluster)s' % $.MultiClusterSummary(),
+ description: "Cluster management has been paused manually. This will prevent the orchestrator from service management and reconciliation. If this is not intentional, resume cephadm operations with 'ceph orch resume'",
+ },
+ },
+ ],
+ },
+ {
+ name: 'PrometheusServer',
+ rules: [
+ {
+ alert: 'PrometheusJobMissing',
+ 'for': '30s',
+ expr: 'absent(up{job="ceph"})',
+ labels: { severity: 'critical', type: 'ceph_default', oid: '1.3.6.1.4.1.50495.1.2.1.12.1' },
+ annotations: {
+ summary: 'The scrape job for Ceph is missing from Prometheus%(cluster)s' % $.MultiClusterSummary(),
+ description: "The prometheus job that scrapes from Ceph is no longer defined, this will effectively mean you'll have no metrics or alerts for the cluster. Please review the job definitions in the prometheus.yml file of the prometheus instance.",
+ },
+ },
+ ],
+ },
+ {
+ name: 'rados',
+ rules: [
+ {
+ alert: 'CephObjectMissing',
+ 'for': '30s',
+ expr: '(ceph_health_detail{name="OBJECT_UNFOUND"} == 1) * on() (count(ceph_osd_up == 1) == bool count(ceph_osd_metadata)) == 1',
+ labels: { severity: 'critical', type: 'ceph_default', oid: '1.3.6.1.4.1.50495.1.2.1.10.1' },
+ annotations: {
+ documentation: 'https://docs.ceph.com/en/latest/rados/operations/health-checks#object-unfound',
+ summary: 'Object(s) marked UNFOUND%(cluster)s' % $.MultiClusterSummary(),
+ description: 'The latest version of a RADOS object can not be found, even though all OSDs are up. I/O requests for this object from clients will block (hang). Resolving this issue may require the object to be rolled back to a prior version manually, and manually verified.',
+ },
+ },
+ ],
+ },
+ {
+ name: 'generic',
+ rules: [
+ {
+ alert: 'CephDaemonCrash',
+ 'for': '1m',
+ expr: 'ceph_health_detail{name="RECENT_CRASH"} == 1',
+ labels: { severity: 'critical', type: 'ceph_default', oid: '1.3.6.1.4.1.50495.1.2.1.1.2' },
+ annotations: {
+ documentation: 'https://docs.ceph.com/en/latest/rados/operations/health-checks/#recent-crash',
+ summary: 'One or more Ceph daemons have crashed, and are pending acknowledgement%(cluster)s' % $.MultiClusterSummary(),
+ description: "One or more daemons have crashed recently, and need to be acknowledged. This notification ensures that software crashes do not go unseen. To acknowledge a crash, use the 'ceph crash archive <id>' command.",
+ },
+ },
+ ],
+ },
+ ],
+}
diff --git a/atmosphere/jsonnet/vendor/github.com/ceph/ceph/monitoring/ceph-mixin/prometheus_alerts.yml b/atmosphere/jsonnet/vendor/github.com/ceph/ceph/monitoring/ceph-mixin/prometheus_alerts.yml
new file mode 100644
index 0000000..a544d41
--- /dev/null
+++ b/atmosphere/jsonnet/vendor/github.com/ceph/ceph/monitoring/ceph-mixin/prometheus_alerts.yml
@@ -0,0 +1,635 @@
+groups:
+ - name: "cluster health"
+ rules:
+ - alert: "CephHealthError"
+ annotations:
+ description: "The cluster state has been HEALTH_ERROR for more than 5 minutes. Please check 'ceph health detail' for more information."
+ summary: "Ceph is in the ERROR state"
+ expr: "ceph_health_status == 2"
+ for: "5m"
+ labels:
+ oid: "1.3.6.1.4.1.50495.1.2.1.2.1"
+ severity: "critical"
+ type: "ceph_default"
+ - alert: "CephHealthWarning"
+ annotations:
+ description: "The cluster state has been HEALTH_WARN for more than 15 minutes. Please check 'ceph health detail' for more information."
+ summary: "Ceph is in the WARNING state"
+ expr: "ceph_health_status == 1"
+ for: "15m"
+ labels:
+ severity: "warning"
+ type: "ceph_default"
+ - name: "mon"
+ rules:
+ - alert: "CephMonDownQuorumAtRisk"
+ annotations:
+ description: "{{ $min := query \"floor(count(ceph_mon_metadata) / 2) + 1\" | first | value }}Quorum requires a majority of monitors (x {{ $min }}) to be active. Without quorum the cluster will become inoperable, affecting all services and connected clients. The following monitors are down: {{- range query \"(ceph_mon_quorum_status == 0) + on(ceph_daemon) group_left(hostname) (ceph_mon_metadata * 0)\" }} - {{ .Labels.ceph_daemon }} on {{ .Labels.hostname }} {{- end }}"
+ documentation: "https://docs.ceph.com/en/latest/rados/operations/health-checks#mon-down"
+ summary: "Monitor quorum is at risk"
+ expr: |
+ (
+ (ceph_health_detail{name="MON_DOWN"} == 1) * on() (
+ count(ceph_mon_quorum_status == 1) == bool (floor(count(ceph_mon_metadata) / 2) + 1)
+ )
+ ) == 1
+ for: "30s"
+ labels:
+ oid: "1.3.6.1.4.1.50495.1.2.1.3.1"
+ severity: "critical"
+ type: "ceph_default"
+ - alert: "CephMonDown"
+ annotations:
+ description: |
+ {{ $down := query "count(ceph_mon_quorum_status == 0)" | first | value }}{{ $s := "" }}{{ if gt $down 1.0 }}{{ $s = "s" }}{{ end }}You have {{ $down }} monitor{{ $s }} down. Quorum is still intact, but the loss of an additional monitor will make your cluster inoperable. The following monitors are down: {{- range query "(ceph_mon_quorum_status == 0) + on(ceph_daemon) group_left(hostname) (ceph_mon_metadata * 0)" }} - {{ .Labels.ceph_daemon }} on {{ .Labels.hostname }} {{- end }}
+ documentation: "https://docs.ceph.com/en/latest/rados/operations/health-checks#mon-down"
+ summary: "One or more monitors down"
+ expr: |
+ count(ceph_mon_quorum_status == 0) <= (count(ceph_mon_metadata) - floor(count(ceph_mon_metadata) / 2) + 1)
+ for: "30s"
+ labels:
+ severity: "warning"
+ type: "ceph_default"
+ - alert: "CephMonDiskspaceCritical"
+ annotations:
+ description: "The free space available to a monitor's store is critically low. You should increase the space available to the monitor(s). The default directory is /var/lib/ceph/mon-*/data/store.db on traditional deployments, and /var/lib/rook/mon-*/data/store.db on the mon pod's worker node for Rook. Look for old, rotated versions of *.log and MANIFEST*. Do NOT touch any *.sst files. Also check any other directories under /var/lib/rook and other directories on the same filesystem, often /var/log and /var/tmp are culprits. Your monitor hosts are; {{- range query \"ceph_mon_metadata\"}} - {{ .Labels.hostname }} {{- end }}"
+ documentation: "https://docs.ceph.com/en/latest/rados/operations/health-checks#mon-disk-crit"
+ summary: "Filesystem space on at least one monitor is critically low"
+ expr: "ceph_health_detail{name=\"MON_DISK_CRIT\"} == 1"
+ for: "1m"
+ labels:
+ oid: "1.3.6.1.4.1.50495.1.2.1.3.2"
+ severity: "critical"
+ type: "ceph_default"
+ - alert: "CephMonDiskspaceLow"
+ annotations:
+ description: "The space available to a monitor's store is approaching full (>70% is the default). You should increase the space available to the monitor(s). The default directory is /var/lib/ceph/mon-*/data/store.db on traditional deployments, and /var/lib/rook/mon-*/data/store.db on the mon pod's worker node for Rook. Look for old, rotated versions of *.log and MANIFEST*. Do NOT touch any *.sst files. Also check any other directories under /var/lib/rook and other directories on the same filesystem, often /var/log and /var/tmp are culprits. Your monitor hosts are; {{- range query \"ceph_mon_metadata\"}} - {{ .Labels.hostname }} {{- end }}"
+ documentation: "https://docs.ceph.com/en/latest/rados/operations/health-checks#mon-disk-low"
+ summary: "Drive space on at least one monitor is approaching full"
+ expr: "ceph_health_detail{name=\"MON_DISK_LOW\"} == 1"
+ for: "5m"
+ labels:
+ severity: "warning"
+ type: "ceph_default"
+ - alert: "CephMonClockSkew"
+ annotations:
+ description: "Ceph monitors rely on closely synchronized time to maintain quorum and cluster consistency. This event indicates that the time on at least one mon has drifted too far from the lead mon. Review cluster status with ceph -s. This will show which monitors are affected. Check the time sync status on each monitor host with 'ceph time-sync-status' and the state and peers of your ntpd or chrony daemon."
+ documentation: "https://docs.ceph.com/en/latest/rados/operations/health-checks#mon-clock-skew"
+ summary: "Clock skew detected among monitors"
+ expr: "ceph_health_detail{name=\"MON_CLOCK_SKEW\"} == 1"
+ for: "1m"
+ labels:
+ severity: "warning"
+ type: "ceph_default"
+ - name: "osd"
+ rules:
+ - alert: "CephOSDDownHigh"
+ annotations:
+ description: "{{ $value | humanize }}% or {{ with query \"count(ceph_osd_up == 0)\" }}{{ . | first | value }}{{ end }} of {{ with query \"count(ceph_osd_up)\" }}{{ . | first | value }}{{ end }} OSDs are down (>= 10%). The following OSDs are down: {{- range query \"(ceph_osd_up * on(ceph_daemon) group_left(hostname) ceph_osd_metadata) == 0\" }} - {{ .Labels.ceph_daemon }} on {{ .Labels.hostname }} {{- end }}"
+ summary: "More than 10% of OSDs are down"
+ expr: "count(ceph_osd_up == 0) / count(ceph_osd_up) * 100 >= 10"
+ labels:
+ oid: "1.3.6.1.4.1.50495.1.2.1.4.1"
+ severity: "critical"
+ type: "ceph_default"
+ - alert: "CephOSDHostDown"
+ annotations:
+ description: "The following OSDs are down: {{- range query \"(ceph_osd_up * on(ceph_daemon) group_left(hostname) ceph_osd_metadata) == 0\" }} - {{ .Labels.hostname }} : {{ .Labels.ceph_daemon }} {{- end }}"
+ summary: "An OSD host is offline"
+ expr: "ceph_health_detail{name=\"OSD_HOST_DOWN\"} == 1"
+ for: "5m"
+ labels:
+ oid: "1.3.6.1.4.1.50495.1.2.1.4.8"
+ severity: "warning"
+ type: "ceph_default"
+ - alert: "CephOSDDown"
+ annotations:
+ description: |
+ {{ $num := query "count(ceph_osd_up == 0)" | first | value }}{{ $s := "" }}{{ if gt $num 1.0 }}{{ $s = "s" }}{{ end }}{{ $num }} OSD{{ $s }} down for over 5mins. The following OSD{{ $s }} {{ if eq $s "" }}is{{ else }}are{{ end }} down: {{- range query "(ceph_osd_up * on(ceph_daemon) group_left(hostname) ceph_osd_metadata) == 0"}} - {{ .Labels.ceph_daemon }} on {{ .Labels.hostname }} {{- end }}
+ documentation: "https://docs.ceph.com/en/latest/rados/operations/health-checks#osd-down"
+ summary: "An OSD has been marked down"
+ expr: "ceph_health_detail{name=\"OSD_DOWN\"} == 1"
+ for: "5m"
+ labels:
+ oid: "1.3.6.1.4.1.50495.1.2.1.4.2"
+ severity: "warning"
+ type: "ceph_default"
+ - alert: "CephOSDNearFull"
+ annotations:
+ description: "One or more OSDs have reached the NEARFULL threshold. Use 'ceph health detail' and 'ceph osd df' to identify the problem. To resolve, add capacity to the affected OSD's failure domain, restore down/out OSDs, or delete unwanted data."
+ documentation: "https://docs.ceph.com/en/latest/rados/operations/health-checks#osd-nearfull"
+ summary: "OSD(s) running low on free space (NEARFULL)"
+ expr: "ceph_health_detail{name=\"OSD_NEARFULL\"} == 1"
+ for: "5m"
+ labels:
+ oid: "1.3.6.1.4.1.50495.1.2.1.4.3"
+ severity: "warning"
+ type: "ceph_default"
+ - alert: "CephOSDFull"
+ annotations:
+ description: "An OSD has reached the FULL threshold. Writes to pools that share the affected OSD will be blocked. Use 'ceph health detail' and 'ceph osd df' to identify the problem. To resolve, add capacity to the affected OSD's failure domain, restore down/out OSDs, or delete unwanted data."
+ documentation: "https://docs.ceph.com/en/latest/rados/operations/health-checks#osd-full"
+ summary: "OSD full, writes blocked"
+ expr: "ceph_health_detail{name=\"OSD_FULL\"} > 0"
+ for: "1m"
+ labels:
+ oid: "1.3.6.1.4.1.50495.1.2.1.4.6"
+ severity: "critical"
+ type: "ceph_default"
+ - alert: "CephOSDBackfillFull"
+ annotations:
+ description: "An OSD has reached the BACKFILL FULL threshold. This will prevent rebalance operations from completing. Use 'ceph health detail' and 'ceph osd df' to identify the problem. To resolve, add capacity to the affected OSD's failure domain, restore down/out OSDs, or delete unwanted data."
+ documentation: "https://docs.ceph.com/en/latest/rados/operations/health-checks#osd-backfillfull"
+ summary: "OSD(s) too full for backfill operations"
+ expr: "ceph_health_detail{name=\"OSD_BACKFILLFULL\"} > 0"
+ for: "1m"
+ labels:
+ severity: "warning"
+ type: "ceph_default"
+ - alert: "CephOSDTooManyRepairs"
+ annotations:
+ description: "Reads from an OSD have used a secondary PG to return data to the client, indicating a potential failing drive."
+ documentation: "https://docs.ceph.com/en/latest/rados/operations/health-checks#osd-too-many-repairs"
+ summary: "OSD reports a high number of read errors"
+ expr: "ceph_health_detail{name=\"OSD_TOO_MANY_REPAIRS\"} == 1"
+ for: "30s"
+ labels:
+ severity: "warning"
+ type: "ceph_default"
+ - alert: "CephOSDTimeoutsPublicNetwork"
+ annotations:
+ description: "OSD heartbeats on the cluster's 'public' network (frontend) are running slow. Investigate the network for latency or loss issues. Use 'ceph health detail' to show the affected OSDs."
+ summary: "Network issues delaying OSD heartbeats (public network)"
+ expr: "ceph_health_detail{name=\"OSD_SLOW_PING_TIME_FRONT\"} == 1"
+ for: "1m"
+ labels:
+ severity: "warning"
+ type: "ceph_default"
+ - alert: "CephOSDTimeoutsClusterNetwork"
+ annotations:
+ description: "OSD heartbeats on the cluster's 'cluster' network (backend) are slow. Investigate the network for latency issues on this subnet. Use 'ceph health detail' to show the affected OSDs."
+ summary: "Network issues delaying OSD heartbeats (cluster network)"
+ expr: "ceph_health_detail{name=\"OSD_SLOW_PING_TIME_BACK\"} == 1"
+ for: "1m"
+ labels:
+ severity: "warning"
+ type: "ceph_default"
+ - alert: "CephOSDInternalDiskSizeMismatch"
+ annotations:
+ description: "One or more OSDs have an internal inconsistency between metadata and the size of the device. This could lead to the OSD(s) crashing in future. You should redeploy the affected OSDs."
+ documentation: "https://docs.ceph.com/en/latest/rados/operations/health-checks#bluestore-disk-size-mismatch"
+ summary: "OSD size inconsistency error"
+ expr: "ceph_health_detail{name=\"BLUESTORE_DISK_SIZE_MISMATCH\"} == 1"
+ for: "1m"
+ labels:
+ severity: "warning"
+ type: "ceph_default"
+ - alert: "CephDeviceFailurePredicted"
+ annotations:
+ description: "The device health module has determined that one or more devices will fail soon. To review device status use 'ceph device ls'. To show a specific device use 'ceph device info <dev id>'. Mark the OSD out so that data may migrate to other OSDs. Once the OSD has drained, destroy the OSD, replace the device, and redeploy the OSD."
+ documentation: "https://docs.ceph.com/en/latest/rados/operations/health-checks#id2"
+ summary: "Device(s) predicted to fail soon"
+ expr: "ceph_health_detail{name=\"DEVICE_HEALTH\"} == 1"
+ for: "1m"
+ labels:
+ severity: "warning"
+ type: "ceph_default"
+ - alert: "CephDeviceFailurePredictionTooHigh"
+ annotations:
+ description: "The device health module has determined that devices predicted to fail can not be remediated automatically, since too many OSDs would be removed from the cluster to ensure performance and availabililty. Prevent data integrity issues by adding new OSDs so that data may be relocated."
+ documentation: "https://docs.ceph.com/en/latest/rados/operations/health-checks#device-health-toomany"
+ summary: "Too many devices are predicted to fail, unable to resolve"
+ expr: "ceph_health_detail{name=\"DEVICE_HEALTH_TOOMANY\"} == 1"
+ for: "1m"
+ labels:
+ oid: "1.3.6.1.4.1.50495.1.2.1.4.7"
+ severity: "critical"
+ type: "ceph_default"
+ - alert: "CephDeviceFailureRelocationIncomplete"
+ annotations:
+ description: "The device health module has determined that one or more devices will fail soon, but the normal process of relocating the data on the device to other OSDs in the cluster is blocked. \nEnsure that the cluster has available free space. It may be necessary to add capacity to the cluster to allow data from the failing device to successfully migrate, or to enable the balancer."
+ documentation: "https://docs.ceph.com/en/latest/rados/operations/health-checks#device-health-in-use"
+ summary: "Device failure is predicted, but unable to relocate data"
+ expr: "ceph_health_detail{name=\"DEVICE_HEALTH_IN_USE\"} == 1"
+ for: "1m"
+ labels:
+ severity: "warning"
+ type: "ceph_default"
+ - alert: "CephOSDFlapping"
+ annotations:
+ description: "OSD {{ $labels.ceph_daemon }} on {{ $labels.hostname }} was marked down and back up {{ $value | humanize }} times once a minute for 5 minutes. This may indicate a network issue (latency, packet loss, MTU mismatch) on the cluster network, or the public network if no cluster network is deployed. Check the network stats on the listed host(s)."
+ documentation: "https://docs.ceph.com/en/latest/rados/troubleshooting/troubleshooting-osd#flapping-osds"
+ summary: "Network issues are causing OSDs to flap (mark each other down)"
+ expr: "(rate(ceph_osd_up[5m]) * on(ceph_daemon) group_left(hostname) ceph_osd_metadata) * 60 > 1"
+ labels:
+ oid: "1.3.6.1.4.1.50495.1.2.1.4.4"
+ severity: "warning"
+ type: "ceph_default"
+ - alert: "CephOSDReadErrors"
+ annotations:
+ description: "An OSD has encountered read errors, but the OSD has recovered by retrying the reads. This may indicate an issue with hardware or the kernel."
+ documentation: "https://docs.ceph.com/en/latest/rados/operations/health-checks#bluestore-spurious-read-errors"
+ summary: "Device read errors detected"
+ expr: "ceph_health_detail{name=\"BLUESTORE_SPURIOUS_READ_ERRORS\"} == 1"
+ for: "30s"
+ labels:
+ severity: "warning"
+ type: "ceph_default"
+ - alert: "CephPGImbalance"
+ annotations:
+ description: "OSD {{ $labels.ceph_daemon }} on {{ $labels.hostname }} deviates by more than 30% from average PG count."
+ summary: "PGs are not balanced across OSDs"
+ expr: |
+ abs(
+ ((ceph_osd_numpg > 0) - on (job) group_left avg(ceph_osd_numpg > 0) by (job)) /
+ on (job) group_left avg(ceph_osd_numpg > 0) by (job)
+ ) * on (ceph_daemon) group_left(hostname) ceph_osd_metadata > 0.30
+ for: "5m"
+ labels:
+ oid: "1.3.6.1.4.1.50495.1.2.1.4.5"
+ severity: "warning"
+ type: "ceph_default"
+ - name: "mds"
+ rules:
+ - alert: "CephFilesystemDamaged"
+ annotations:
+ description: "Filesystem metadata has been corrupted. Data may be inaccessible. Analyze metrics from the MDS daemon admin socket, or escalate to support."
+ documentation: "https://docs.ceph.com/en/latest/cephfs/health-messages#cephfs-health-messages"
+ summary: "CephFS filesystem is damaged."
+ expr: "ceph_health_detail{name=\"MDS_DAMAGE\"} > 0"
+ for: "1m"
+ labels:
+ oid: "1.3.6.1.4.1.50495.1.2.1.5.1"
+ severity: "critical"
+ type: "ceph_default"
+ - alert: "CephFilesystemOffline"
+ annotations:
+ description: "All MDS ranks are unavailable. The MDS daemons managing metadata are down, rendering the filesystem offline."
+ documentation: "https://docs.ceph.com/en/latest/cephfs/health-messages/#mds-all-down"
+ summary: "CephFS filesystem is offline"
+ expr: "ceph_health_detail{name=\"MDS_ALL_DOWN\"} > 0"
+ for: "1m"
+ labels:
+ oid: "1.3.6.1.4.1.50495.1.2.1.5.3"
+ severity: "critical"
+ type: "ceph_default"
+ - alert: "CephFilesystemDegraded"
+ annotations:
+ description: "One or more metadata daemons (MDS ranks) are failed or in a damaged state. At best the filesystem is partially available, at worst the filesystem is completely unusable."
+ documentation: "https://docs.ceph.com/en/latest/cephfs/health-messages/#fs-degraded"
+ summary: "CephFS filesystem is degraded"
+ expr: "ceph_health_detail{name=\"FS_DEGRADED\"} > 0"
+ for: "1m"
+ labels:
+ oid: "1.3.6.1.4.1.50495.1.2.1.5.4"
+ severity: "critical"
+ type: "ceph_default"
+ - alert: "CephFilesystemMDSRanksLow"
+ annotations:
+ description: "The filesystem's 'max_mds' setting defines the number of MDS ranks in the filesystem. The current number of active MDS daemons is less than this value."
+ documentation: "https://docs.ceph.com/en/latest/cephfs/health-messages/#mds-up-less-than-max"
+ summary: "Ceph MDS daemon count is lower than configured"
+ expr: "ceph_health_detail{name=\"MDS_UP_LESS_THAN_MAX\"} > 0"
+ for: "1m"
+ labels:
+ severity: "warning"
+ type: "ceph_default"
+ - alert: "CephFilesystemInsufficientStandby"
+ annotations:
+ description: "The minimum number of standby daemons required by standby_count_wanted is less than the current number of standby daemons. Adjust the standby count or increase the number of MDS daemons."
+ documentation: "https://docs.ceph.com/en/latest/cephfs/health-messages/#mds-insufficient-standby"
+ summary: "Ceph filesystem standby daemons too few"
+ expr: "ceph_health_detail{name=\"MDS_INSUFFICIENT_STANDBY\"} > 0"
+ for: "1m"
+ labels:
+ severity: "warning"
+ type: "ceph_default"
+ - alert: "CephFilesystemFailureNoStandby"
+ annotations:
+ description: "An MDS daemon has failed, leaving only one active rank and no available standby. Investigate the cause of the failure or add a standby MDS."
+ documentation: "https://docs.ceph.com/en/latest/cephfs/health-messages/#fs-with-failed-mds"
+ summary: "MDS daemon failed, no further standby available"
+ expr: "ceph_health_detail{name=\"FS_WITH_FAILED_MDS\"} > 0"
+ for: "1m"
+ labels:
+ oid: "1.3.6.1.4.1.50495.1.2.1.5.5"
+ severity: "critical"
+ type: "ceph_default"
+ - alert: "CephFilesystemReadOnly"
+ annotations:
+ description: "The filesystem has switched to READ ONLY due to an unexpected error when writing to the metadata pool. Either analyze the output from the MDS daemon admin socket, or escalate to support."
+ documentation: "https://docs.ceph.com/en/latest/cephfs/health-messages#cephfs-health-messages"
+ summary: "CephFS filesystem in read only mode due to write error(s)"
+ expr: "ceph_health_detail{name=\"MDS_HEALTH_READ_ONLY\"} > 0"
+ for: "1m"
+ labels:
+ oid: "1.3.6.1.4.1.50495.1.2.1.5.2"
+ severity: "critical"
+ type: "ceph_default"
+ - name: "mgr"
+ rules:
+ - alert: "CephMgrModuleCrash"
+ annotations:
+ description: "One or more mgr modules have crashed and have yet to be acknowledged by an administrator. A crashed module may impact functionality within the cluster. Use the 'ceph crash' command to determine which module has failed, and archive it to acknowledge the failure."
+ documentation: "https://docs.ceph.com/en/latest/rados/operations/health-checks#recent-mgr-module-crash"
+ summary: "A manager module has recently crashed"
+ expr: "ceph_health_detail{name=\"RECENT_MGR_MODULE_CRASH\"} == 1"
+ for: "5m"
+ labels:
+ oid: "1.3.6.1.4.1.50495.1.2.1.6.1"
+ severity: "critical"
+ type: "ceph_default"
+ - alert: "CephMgrPrometheusModuleInactive"
+ annotations:
+ description: "The mgr/prometheus module at {{ $labels.instance }} is unreachable. This could mean that the module has been disabled or the mgr daemon itself is down. Without the mgr/prometheus module metrics and alerts will no longer function. Open a shell to an admin node or toolbox pod and use 'ceph -s' to to determine whether the mgr is active. If the mgr is not active, restart it, otherwise you can determine module status with 'ceph mgr module ls'. If it is not listed as enabled, enable it with 'ceph mgr module enable prometheus'."
+ summary: "The mgr/prometheus module is not available"
+ expr: "up{job=\"ceph\"} == 0"
+ for: "1m"
+ labels:
+ oid: "1.3.6.1.4.1.50495.1.2.1.6.2"
+ severity: "critical"
+ type: "ceph_default"
+ - name: "pgs"
+ rules:
+ - alert: "CephPGsInactive"
+ annotations:
+ description: "{{ $value }} PGs have been inactive for more than 5 minutes in pool {{ $labels.name }}. Inactive placement groups are not able to serve read/write requests."
+ summary: "One or more placement groups are inactive"
+ expr: "ceph_pool_metadata * on(pool_id,instance) group_left() (ceph_pg_total - ceph_pg_active) > 0"
+ for: "5m"
+ labels:
+ oid: "1.3.6.1.4.1.50495.1.2.1.7.1"
+ severity: "critical"
+ type: "ceph_default"
+ - alert: "CephPGsUnclean"
+ annotations:
+ description: "{{ $value }} PGs have been unclean for more than 15 minutes in pool {{ $labels.name }}. Unclean PGs have not recovered from a previous failure."
+ summary: "One or more placement groups are marked unclean"
+ expr: "ceph_pool_metadata * on(pool_id,instance) group_left() (ceph_pg_total - ceph_pg_clean) > 0"
+ for: "15m"
+ labels:
+ oid: "1.3.6.1.4.1.50495.1.2.1.7.2"
+ severity: "warning"
+ type: "ceph_default"
+ - alert: "CephPGsDamaged"
+ annotations:
+ description: "During data consistency checks (scrub), at least one PG has been flagged as being damaged or inconsistent. Check to see which PG is affected, and attempt a manual repair if necessary. To list problematic placement groups, use 'rados list-inconsistent-pg <pool>'. To repair PGs use the 'ceph pg repair <pg_num>' command."
+ documentation: "https://docs.ceph.com/en/latest/rados/operations/health-checks#pg-damaged"
+ summary: "Placement group damaged, manual intervention needed"
+ expr: "ceph_health_detail{name=~\"PG_DAMAGED|OSD_SCRUB_ERRORS\"} == 1"
+ for: "5m"
+ labels:
+ oid: "1.3.6.1.4.1.50495.1.2.1.7.4"
+ severity: "critical"
+ type: "ceph_default"
+ - alert: "CephPGRecoveryAtRisk"
+ annotations:
+ description: "Data redundancy is at risk since one or more OSDs are at or above the 'full' threshold. Add more capacity to the cluster, restore down/out OSDs, or delete unwanted data."
+ documentation: "https://docs.ceph.com/en/latest/rados/operations/health-checks#pg-recovery-full"
+ summary: "OSDs are too full for recovery"
+ expr: "ceph_health_detail{name=\"PG_RECOVERY_FULL\"} == 1"
+ for: "1m"
+ labels:
+ oid: "1.3.6.1.4.1.50495.1.2.1.7.5"
+ severity: "critical"
+ type: "ceph_default"
+ - alert: "CephPGUnavilableBlockingIO"
+ annotations:
+ description: "Data availability is reduced, impacting the cluster's ability to service I/O. One or more placement groups (PGs) are in a state that blocks I/O."
+ documentation: "https://docs.ceph.com/en/latest/rados/operations/health-checks#pg-availability"
+ summary: "PG is unavailable, blocking I/O"
+ expr: "((ceph_health_detail{name=\"PG_AVAILABILITY\"} == 1) - scalar(ceph_health_detail{name=\"OSD_DOWN\"})) == 1"
+ for: "1m"
+ labels:
+ oid: "1.3.6.1.4.1.50495.1.2.1.7.3"
+ severity: "critical"
+ type: "ceph_default"
+ - alert: "CephPGBackfillAtRisk"
+ annotations:
+ description: "Data redundancy may be at risk due to lack of free space within the cluster. One or more OSDs have reached the 'backfillfull' threshold. Add more capacity, or delete unwanted data."
+ documentation: "https://docs.ceph.com/en/latest/rados/operations/health-checks#pg-backfill-full"
+ summary: "Backfill operations are blocked due to lack of free space"
+ expr: "ceph_health_detail{name=\"PG_BACKFILL_FULL\"} == 1"
+ for: "1m"
+ labels:
+ oid: "1.3.6.1.4.1.50495.1.2.1.7.6"
+ severity: "critical"
+ type: "ceph_default"
+ - alert: "CephPGNotScrubbed"
+ annotations:
+ description: "One or more PGs have not been scrubbed recently. Scrubs check metadata integrity, protecting against bit-rot. They check that metadata is consistent across data replicas. When PGs miss their scrub interval, it may indicate that the scrub window is too small, or PGs were not in a 'clean' state during the scrub window. You can manually initiate a scrub with: ceph pg scrub <pgid>"
+ documentation: "https://docs.ceph.com/en/latest/rados/operations/health-checks#pg-not-scrubbed"
+ summary: "Placement group(s) have not been scrubbed"
+ expr: "ceph_health_detail{name=\"PG_NOT_SCRUBBED\"} == 1"
+ for: "5m"
+ labels:
+ severity: "warning"
+ type: "ceph_default"
+ - alert: "CephPGsHighPerOSD"
+ annotations:
+ description: "The number of placement groups per OSD is too high (exceeds the mon_max_pg_per_osd setting).\n Check that the pg_autoscaler has not been disabled for any pools with 'ceph osd pool autoscale-status', and that the profile selected is appropriate. You may also adjust the target_size_ratio of a pool to guide the autoscaler based on the expected relative size of the pool ('ceph osd pool set cephfs.cephfs.meta target_size_ratio .1') or set the pg_autoscaler mode to 'warn' and adjust pg_num appropriately for one or more pools."
+ documentation: "https://docs.ceph.com/en/latest/rados/operations/health-checks/#too-many-pgs"
+ summary: "Placement groups per OSD is too high"
+ expr: "ceph_health_detail{name=\"TOO_MANY_PGS\"} == 1"
+ for: "1m"
+ labels:
+ severity: "warning"
+ type: "ceph_default"
+ - alert: "CephPGNotDeepScrubbed"
+ annotations:
+ description: "One or more PGs have not been deep scrubbed recently. Deep scrubs protect against bit-rot. They compare data replicas to ensure consistency. When PGs miss their deep scrub interval, it may indicate that the window is too small or PGs were not in a 'clean' state during the deep-scrub window."
+ documentation: "https://docs.ceph.com/en/latest/rados/operations/health-checks#pg-not-deep-scrubbed"
+ summary: "Placement group(s) have not been deep scrubbed"
+ expr: "ceph_health_detail{name=\"PG_NOT_DEEP_SCRUBBED\"} == 1"
+ for: "5m"
+ labels:
+ severity: "warning"
+ type: "ceph_default"
+ - name: "nodes"
+ rules:
+ - alert: "CephNodeRootFilesystemFull"
+ annotations:
+ description: "Root volume is dangerously full: {{ $value | humanize }}% free."
+ summary: "Root filesystem is dangerously full"
+ expr: "node_filesystem_avail_bytes{mountpoint=\"/\"} / node_filesystem_size_bytes{mountpoint=\"/\"} * 100 < 5"
+ for: "5m"
+ labels:
+ oid: "1.3.6.1.4.1.50495.1.2.1.8.1"
+ severity: "critical"
+ type: "ceph_default"
+ - alert: "CephNodeNetworkPacketDrops"
+ annotations:
+ description: "Node {{ $labels.instance }} experiences packet drop > 0.5% or > 10 packets/s on interface {{ $labels.device }}."
+ summary: "One or more NICs reports packet drops"
+ expr: |
+ (
+ rate(node_network_receive_drop_total{device!="lo"}[1m]) +
+ rate(node_network_transmit_drop_total{device!="lo"}[1m])
+ ) / (
+ rate(node_network_receive_packets_total{device!="lo"}[1m]) +
+ rate(node_network_transmit_packets_total{device!="lo"}[1m])
+ ) >= 0.0050000000000000001 and (
+ rate(node_network_receive_drop_total{device!="lo"}[1m]) +
+ rate(node_network_transmit_drop_total{device!="lo"}[1m])
+ ) >= 10
+ labels:
+ oid: "1.3.6.1.4.1.50495.1.2.1.8.2"
+ severity: "warning"
+ type: "ceph_default"
+ - alert: "CephNodeNetworkPacketErrors"
+ annotations:
+ description: "Node {{ $labels.instance }} experiences packet errors > 0.01% or > 10 packets/s on interface {{ $labels.device }}."
+ summary: "One or more NICs reports packet errors"
+ expr: |
+ (
+ rate(node_network_receive_errs_total{device!="lo"}[1m]) +
+ rate(node_network_transmit_errs_total{device!="lo"}[1m])
+ ) / (
+ rate(node_network_receive_packets_total{device!="lo"}[1m]) +
+ rate(node_network_transmit_packets_total{device!="lo"}[1m])
+ ) >= 0.0001 or (
+ rate(node_network_receive_errs_total{device!="lo"}[1m]) +
+ rate(node_network_transmit_errs_total{device!="lo"}[1m])
+ ) >= 10
+ labels:
+ oid: "1.3.6.1.4.1.50495.1.2.1.8.3"
+ severity: "warning"
+ type: "ceph_default"
+ - alert: "CephNodeDiskspaceWarning"
+ annotations:
+ description: "Mountpoint {{ $labels.mountpoint }} on {{ $labels.nodename }} will be full in less than 5 days based on the 48 hour trailing fill rate."
+ summary: "Host filesystem free space is getting low"
+ expr: "predict_linear(node_filesystem_free_bytes{device=~\"/.*\"}[2d], 3600 * 24 * 5) *on(instance) group_left(nodename) node_uname_info < 0"
+ labels:
+ oid: "1.3.6.1.4.1.50495.1.2.1.8.4"
+ severity: "warning"
+ type: "ceph_default"
+ - alert: "CephNodeInconsistentMTU"
+ annotations:
+ description: "Node {{ $labels.instance }} has a different MTU size ({{ $value }}) than the median of devices named {{ $labels.device }}."
+ summary: "MTU settings across Ceph hosts are inconsistent"
+ expr: "node_network_mtu_bytes * (node_network_up{device!=\"lo\"} > 0) == scalar( max by (device) (node_network_mtu_bytes * (node_network_up{device!=\"lo\"} > 0)) != quantile by (device) (.5, node_network_mtu_bytes * (node_network_up{device!=\"lo\"} > 0)) )or node_network_mtu_bytes * (node_network_up{device!=\"lo\"} > 0) == scalar( min by (device) (node_network_mtu_bytes * (node_network_up{device!=\"lo\"} > 0)) != quantile by (device) (.5, node_network_mtu_bytes * (node_network_up{device!=\"lo\"} > 0)) )"
+ labels:
+ severity: "warning"
+ type: "ceph_default"
+ - name: "pools"
+ rules:
+ - alert: "CephPoolGrowthWarning"
+ annotations:
+ description: "Pool '{{ $labels.name }}' will be full in less than 5 days assuming the average fill-up rate of the past 48 hours."
+ summary: "Pool growth rate may soon exceed capacity"
+ expr: "(predict_linear(ceph_pool_percent_used[2d], 3600 * 24 * 5) * on(pool_id) group_right ceph_pool_metadata) >= 95"
+ labels:
+ oid: "1.3.6.1.4.1.50495.1.2.1.9.2"
+ severity: "warning"
+ type: "ceph_default"
+ - alert: "CephPoolBackfillFull"
+ annotations:
+ description: "A pool is approaching the near full threshold, which will prevent recovery/backfill operations from completing. Consider adding more capacity."
+ summary: "Free space in a pool is too low for recovery/backfill"
+ expr: "ceph_health_detail{name=\"POOL_BACKFILLFULL\"} > 0"
+ labels:
+ severity: "warning"
+ type: "ceph_default"
+ - alert: "CephPoolFull"
+ annotations:
+ description: "A pool has reached its MAX quota, or OSDs supporting the pool have reached the FULL threshold. Until this is resolved, writes to the pool will be blocked. Pool Breakdown (top 5) {{- range query \"topk(5, sort_desc(ceph_pool_percent_used * on(pool_id) group_right ceph_pool_metadata))\" }} - {{ .Labels.name }} at {{ .Value }}% {{- end }} Increase the pool's quota, or add capacity to the cluster first then increase the pool's quota (e.g. ceph osd pool set quota <pool_name> max_bytes <bytes>)"
+ documentation: "https://docs.ceph.com/en/latest/rados/operations/health-checks#pool-full"
+ summary: "Pool is full - writes are blocked"
+ expr: "ceph_health_detail{name=\"POOL_FULL\"} > 0"
+ for: "1m"
+ labels:
+ oid: "1.3.6.1.4.1.50495.1.2.1.9.1"
+ severity: "critical"
+ type: "ceph_default"
+ - alert: "CephPoolNearFull"
+ annotations:
+ description: "A pool has exceeded the warning (percent full) threshold, or OSDs supporting the pool have reached the NEARFULL threshold. Writes may continue, but you are at risk of the pool going read-only if more capacity isn't made available. Determine the affected pool with 'ceph df detail', looking at QUOTA BYTES and STORED. Increase the pool's quota, or add capacity to the cluster first then increase the pool's quota (e.g. ceph osd pool set quota <pool_name> max_bytes <bytes>). Also ensure that the balancer is active."
+ summary: "One or more Ceph pools are nearly full"
+ expr: "ceph_health_detail{name=\"POOL_NEAR_FULL\"} > 0"
+ for: "5m"
+ labels:
+ severity: "warning"
+ type: "ceph_default"
+ - name: "healthchecks"
+ rules:
+ - alert: "CephSlowOps"
+ annotations:
+ description: "{{ $value }} OSD requests are taking too long to process (osd_op_complaint_time exceeded)"
+ documentation: "https://docs.ceph.com/en/latest/rados/operations/health-checks#slow-ops"
+ summary: "OSD operations are slow to complete"
+ expr: "ceph_healthcheck_slow_ops > 0"
+ for: "30s"
+ labels:
+ severity: "warning"
+ type: "ceph_default"
+ - name: "cephadm"
+ rules:
+ - alert: "CephadmUpgradeFailed"
+ annotations:
+ description: "The cephadm cluster upgrade process has failed. The cluster remains in an undetermined state. Please review the cephadm logs, to understand the nature of the issue"
+ summary: "Ceph version upgrade has failed"
+ expr: "ceph_health_detail{name=\"UPGRADE_EXCEPTION\"} > 0"
+ for: "30s"
+ labels:
+ oid: "1.3.6.1.4.1.50495.1.2.1.11.2"
+ severity: "critical"
+ type: "ceph_default"
+ - alert: "CephadmDaemonFailed"
+ annotations:
+ description: "A daemon managed by cephadm is no longer active. Determine, which daemon is down with 'ceph health detail'. you may start daemons with the 'ceph orch daemon start <daemon_id>'"
+ summary: "A ceph daemon manged by cephadm is down"
+ expr: "ceph_health_detail{name=\"CEPHADM_FAILED_DAEMON\"} > 0"
+ for: "30s"
+ labels:
+ oid: "1.3.6.1.4.1.50495.1.2.1.11.1"
+ severity: "critical"
+ type: "ceph_default"
+ - alert: "CephadmPaused"
+ annotations:
+ description: "Cluster management has been paused manually. This will prevent the orchestrator from service management and reconciliation. If this is not intentional, resume cephadm operations with 'ceph orch resume'"
+ documentation: "https://docs.ceph.com/en/latest/cephadm/operations#cephadm-paused"
+ summary: "Orchestration tasks via cephadm are PAUSED"
+ expr: "ceph_health_detail{name=\"CEPHADM_PAUSED\"} > 0"
+ for: "1m"
+ labels:
+ severity: "warning"
+ type: "ceph_default"
+ - name: "PrometheusServer"
+ rules:
+ - alert: "PrometheusJobMissing"
+ annotations:
+ description: "The prometheus job that scrapes from Ceph is no longer defined, this will effectively mean you'll have no metrics or alerts for the cluster. Please review the job definitions in the prometheus.yml file of the prometheus instance."
+ summary: "The scrape job for Ceph is missing from Prometheus"
+ expr: "absent(up{job=\"ceph\"})"
+ for: "30s"
+ labels:
+ oid: "1.3.6.1.4.1.50495.1.2.1.12.1"
+ severity: "critical"
+ type: "ceph_default"
+ - name: "rados"
+ rules:
+ - alert: "CephObjectMissing"
+ annotations:
+ description: "The latest version of a RADOS object can not be found, even though all OSDs are up. I/O requests for this object from clients will block (hang). Resolving this issue may require the object to be rolled back to a prior version manually, and manually verified."
+ documentation: "https://docs.ceph.com/en/latest/rados/operations/health-checks#object-unfound"
+ summary: "Object(s) marked UNFOUND"
+ expr: "(ceph_health_detail{name=\"OBJECT_UNFOUND\"} == 1) * on() (count(ceph_osd_up == 1) == bool count(ceph_osd_metadata)) == 1"
+ for: "30s"
+ labels:
+ oid: "1.3.6.1.4.1.50495.1.2.1.10.1"
+ severity: "critical"
+ type: "ceph_default"
+ - name: "generic"
+ rules:
+ - alert: "CephDaemonCrash"
+ annotations:
+ description: "One or more daemons have crashed recently, and need to be acknowledged. This notification ensures that software crashes do not go unseen. To acknowledge a crash, use the 'ceph crash archive <id>' command."
+ documentation: "https://docs.ceph.com/en/latest/rados/operations/health-checks/#recent-crash"
+ summary: "One or more Ceph daemons have crashed, and are pending acknowledgement"
+ expr: "ceph_health_detail{name=\"RECENT_CRASH\"} == 1"
+ for: "1m"
+ labels:
+ oid: "1.3.6.1.4.1.50495.1.2.1.1.2"
+ severity: "critical"
+ type: "ceph_default"
diff --git a/atmosphere/jsonnet/vendor/github.com/ceph/ceph/monitoring/ceph-mixin/requirements-alerts.txt b/atmosphere/jsonnet/vendor/github.com/ceph/ceph/monitoring/ceph-mixin/requirements-alerts.txt
new file mode 100644
index 0000000..e518e69
--- /dev/null
+++ b/atmosphere/jsonnet/vendor/github.com/ceph/ceph/monitoring/ceph-mixin/requirements-alerts.txt
@@ -0,0 +1,2 @@
+pyyaml==6.0
+bs4
diff --git a/atmosphere/jsonnet/vendor/github.com/ceph/ceph/monitoring/ceph-mixin/requirements-grafonnet.txt b/atmosphere/jsonnet/vendor/github.com/ceph/ceph/monitoring/ceph-mixin/requirements-grafonnet.txt
new file mode 100644
index 0000000..9891d55
--- /dev/null
+++ b/atmosphere/jsonnet/vendor/github.com/ceph/ceph/monitoring/ceph-mixin/requirements-grafonnet.txt
@@ -0,0 +1 @@
+jsondiff
diff --git a/atmosphere/jsonnet/vendor/github.com/ceph/ceph/monitoring/ceph-mixin/requirements-lint.txt b/atmosphere/jsonnet/vendor/github.com/ceph/ceph/monitoring/ceph-mixin/requirements-lint.txt
new file mode 100644
index 0000000..f9a3c77
--- /dev/null
+++ b/atmosphere/jsonnet/vendor/github.com/ceph/ceph/monitoring/ceph-mixin/requirements-lint.txt
@@ -0,0 +1,18 @@
+attrs==21.2.0
+behave==1.2.6
+py==1.10.0
+pyparsing==2.4.7
+PyYAML==6.0
+types-PyYAML==6.0.0
+typing-extensions==3.10.0.2
+termcolor==1.1.0
+types-termcolor==1.1.2
+dataclasses==0.6
+types-dataclasses==0.6.1
+six==1.16.0
+toml==0.10.2
+pylint==2.6.0
+isort==5.10.0
+mypy==0.910
+mypy-extensions==0.4.3
+prettytable==2.4.0
diff --git a/atmosphere/jsonnet/vendor/github.com/ceph/ceph/monitoring/ceph-mixin/test-jsonnet.sh b/atmosphere/jsonnet/vendor/github.com/ceph/ceph/monitoring/ceph-mixin/test-jsonnet.sh
new file mode 100755
index 0000000..87c5338
--- /dev/null
+++ b/atmosphere/jsonnet/vendor/github.com/ceph/ceph/monitoring/ceph-mixin/test-jsonnet.sh
@@ -0,0 +1,35 @@
+#!/bin/sh -e
+
+TEMPDIR=$(mktemp -d)
+BASEDIR=$(dirname "$0")
+
+jsonnet -J vendor -m ${TEMPDIR} $BASEDIR/dashboards.jsonnet
+
+truncate -s 0 ${TEMPDIR}/json_difference.log
+for file in ${BASEDIR}/dashboards_out/*.json
+do
+ file_name="$(basename $file)"
+ for generated_file in ${TEMPDIR}/*.json
+ do
+ generated_file_name="$(basename $generated_file)"
+ if [ "$file_name" == "$generated_file_name" ]; then
+ jsondiff --indent 2 "${generated_file}" "${file}" \
+ | tee -a ${TEMPDIR}/json_difference.log
+ fi
+ done
+done
+
+jsonnet -J vendor -S alerts.jsonnet -o ${TEMPDIR}/prometheus_alerts.yml
+jsondiff --indent 2 "prometheus_alerts.yml" "${TEMPDIR}/prometheus_alerts.yml" \
+ | tee -a ${TEMPDIR}/json_difference.log
+
+err=0
+if [ $(wc -l < ${TEMPDIR}/json_difference.log) -eq 0 ]
+then
+ rm -rf ${TEMPDIR}
+ echo "Congratulations! Grafonnet Check Passed"
+else
+ rm -rf ${TEMPDIR}
+ echo "Grafonnet Check Failed, failed comparing generated file with existing"
+ exit 1
+fi
diff --git a/atmosphere/jsonnet/vendor/github.com/ceph/ceph/monitoring/ceph-mixin/tests_alerts/README.md b/atmosphere/jsonnet/vendor/github.com/ceph/ceph/monitoring/ceph-mixin/tests_alerts/README.md
new file mode 100644
index 0000000..cf95fa6
--- /dev/null
+++ b/atmosphere/jsonnet/vendor/github.com/ceph/ceph/monitoring/ceph-mixin/tests_alerts/README.md
@@ -0,0 +1,92 @@
+
+## Alert Rule Standards
+
+The alert rules should adhere to the following principles
+- each alert must have a unique name
+- each alert should define a common structure
+ - labels : must contain severity and type
+ - annotations : must provide description
+ - expr : must define the promql expression
+ - alert : defines the alert name
+- alerts that have a corresponding section within docs.ceph.com must include a
+ documentation field in the annotations section
+- critical alerts should declare an oid in the labels section
+- critical alerts should have a corresponding entry in the Ceph MIB
+
+
+## Testing Prometheus Rules
+Once you have updated the `ceph_default_alerts.yml` file, you should use the
+`validate_rules.py` script directly, or via `tox` to ensure the format of any update
+or change aligns to our rule structure guidelines. The validate_rules.py script will
+process the rules and look for any configuration anomalies and output a report if
+problems are detected.
+
+Here's an example run, to illustrate the format and the kinds of issues detected.
+
+```
+[paul@myhost tests]$ ./validate_rules.py
+
+Checking rule groups
+ cluster health : ..
+ mon : E.W..
+ osd : E...W......W.E..
+ mds : WW
+ mgr : WW
+ pgs : ..WWWW..
+ nodes : .EEEE
+ pools : EEEW.
+ healthchecks : .
+ cephadm : WW.
+ prometheus : W
+ rados : W
+
+Summary
+
+Rule file : ../alerts/ceph_default_alerts.yml
+Unit Test file : test_alerts.yml
+
+Rule groups processed : 12
+Rules processed : 51
+Rule errors : 10
+Rule warnings : 16
+Rule name duplicates : 0
+Unit tests missing : 4
+
+Problem Report
+
+ Group Severity Alert Name Problem Description
+ ----- -------- ---------- -------------------
+ cephadm Warning Cluster upgrade has failed critical level alert is missing an SNMP oid entry
+ cephadm Warning A daemon managed by cephadm is down critical level alert is missing an SNMP oid entry
+ mds Warning Ceph Filesystem damage detected critical level alert is missing an SNMP oid entry
+ mds Warning Ceph Filesystem switched to READ ONLY critical level alert is missing an SNMP oid entry
+ mgr Warning mgr module failure critical level alert is missing an SNMP oid entry
+ mgr Warning mgr prometheus module is not active critical level alert is missing an SNMP oid entry
+ mon Error Monitor down, quorum is at risk documentation link error: #mon-downwah not found on the page
+ mon Warning Ceph mon disk space critically low critical level alert is missing an SNMP oid entry
+ nodes Error network packets dropped invalid alert structure. Missing field: for
+ nodes Error network packet errors invalid alert structure. Missing field: for
+ nodes Error storage filling up invalid alert structure. Missing field: for
+ nodes Error MTU Mismatch invalid alert structure. Missing field: for
+ osd Error 10% OSDs down invalid alert structure. Missing field: for
+ osd Error Flapping OSD invalid alert structure. Missing field: for
+ osd Warning OSD Full critical level alert is missing an SNMP oid entry
+ osd Warning Too many devices predicted to fail critical level alert is missing an SNMP oid entry
+ pgs Warning Placement Group (PG) damaged critical level alert is missing an SNMP oid entry
+ pgs Warning Recovery at risk, cluster too full critical level alert is missing an SNMP oid entry
+ pgs Warning I/O blocked to some data critical level alert is missing an SNMP oid entry
+ pgs Warning Cluster too full, automatic data recovery impaired critical level alert is missing an SNMP oid entry
+ pools Error pool full invalid alert structure. Missing field: for
+ pools Error pool filling up (growth forecast) invalid alert structure. Missing field: for
+ pools Error Ceph pool is too full for recovery/rebalance invalid alert structure. Missing field: for
+ pools Warning Ceph pool is full - writes blocked critical level alert is missing an SNMP oid entry
+ prometheus Warning Scrape job is missing critical level alert is missing an SNMP oid entry
+ rados Warning Data not found/missing critical level alert is missing an SNMP oid entry
+
+Unit tests are incomplete. Tests missing for the following alerts;
+ - Placement Group (PG) damaged
+ - OSD Full
+ - storage filling up
+ - pool filling up (growth forecast)
+
+```
diff --git a/atmosphere/jsonnet/vendor/github.com/ceph/ceph/monitoring/ceph-mixin/tests_alerts/__init__.py b/atmosphere/jsonnet/vendor/github.com/ceph/ceph/monitoring/ceph-mixin/tests_alerts/__init__.py
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/atmosphere/jsonnet/vendor/github.com/ceph/ceph/monitoring/ceph-mixin/tests_alerts/__init__.py
diff --git a/atmosphere/jsonnet/vendor/github.com/ceph/ceph/monitoring/ceph-mixin/tests_alerts/settings.py b/atmosphere/jsonnet/vendor/github.com/ceph/ceph/monitoring/ceph-mixin/tests_alerts/settings.py
new file mode 100644
index 0000000..d99dfdc
--- /dev/null
+++ b/atmosphere/jsonnet/vendor/github.com/ceph/ceph/monitoring/ceph-mixin/tests_alerts/settings.py
@@ -0,0 +1,11 @@
+import os
+
+ALERTS_FILE = '../prometheus_alerts.yml'
+UNIT_TESTS_FILE = 'test_alerts.yml'
+MIB_FILE = '../../snmp/CEPH-MIB.txt'
+
+current_dir = os.path.dirname(os.path.abspath(__file__))
+
+ALERTS_FILE = os.path.join(current_dir, ALERTS_FILE)
+UNIT_TESTS_FILE = os.path.join(current_dir, UNIT_TESTS_FILE)
+MIB_FILE = os.path.join(current_dir, MIB_FILE)
diff --git a/atmosphere/jsonnet/vendor/github.com/ceph/ceph/monitoring/ceph-mixin/tests_alerts/test_alerts.yml b/atmosphere/jsonnet/vendor/github.com/ceph/ceph/monitoring/ceph-mixin/tests_alerts/test_alerts.yml
new file mode 100644
index 0000000..7b7e7db
--- /dev/null
+++ b/atmosphere/jsonnet/vendor/github.com/ceph/ceph/monitoring/ceph-mixin/tests_alerts/test_alerts.yml
@@ -0,0 +1,1821 @@
+rule_files:
+ - ../prometheus_alerts.yml
+evaluation_interval: 5m
+tests:
+ # health error
+ - interval: 5m
+ input_series:
+ - series: 'ceph_health_status{instance="ceph:9283",job="ceph"}'
+ values: '2 2 2 2 2 2 2'
+ promql_expr_test:
+ - expr: ceph_health_status == 2
+ eval_time: 5m
+ exp_samples:
+ - labels: 'ceph_health_status{instance="ceph:9283",job="ceph"}'
+ value: 2
+ alert_rule_test:
+ - eval_time: 1m
+ alertname: CephHealthError
+ - eval_time: 6m
+ alertname: CephHealthError
+ exp_alerts:
+ - exp_labels:
+ instance: ceph:9283
+ job: ceph
+ oid: 1.3.6.1.4.1.50495.1.2.1.2.1
+ type: ceph_default
+ severity: critical
+ exp_annotations:
+ summary: Ceph is in the ERROR state
+ description: The cluster state has been HEALTH_ERROR for more than 5 minutes. Please check 'ceph health detail' for more information.
+
+ # health warning
+ - interval: 5m
+ input_series:
+ - series: 'ceph_health_status{instance="ceph:9283",job="ceph"}'
+ values: '1 1 1 1 1 1 1 1 1 1'
+ promql_expr_test:
+ - expr: ceph_health_status == 1
+ eval_time: 15m
+ exp_samples:
+ - labels: 'ceph_health_status{instance="ceph:9283",job="ceph"}'
+ value: 1
+ alert_rule_test:
+ - eval_time: 10m
+ alertname: CephHealthWarning
+ - eval_time: 20m
+ alertname: CephHealthWarning
+ exp_alerts:
+ - exp_labels:
+ instance: ceph:9283
+ job: ceph
+ type: ceph_default
+ severity: warning
+ exp_annotations:
+ summary: Ceph is in the WARNING state
+ description: The cluster state has been HEALTH_WARN for more than 15 minutes. Please check 'ceph health detail' for more information.
+
+ # 10% OSDs down
+ - interval: 1m
+ input_series:
+ - series: 'ceph_osd_up{ceph_daemon="osd.0",instance="ceph:9283",job="ceph"}'
+ values: '1 1 1 1 1'
+ - series: 'ceph_osd_up{ceph_daemon="osd.1",instance="ceph:9283",job="ceph"}'
+ values: '0 0 0 0 0'
+ - series: 'ceph_osd_up{ceph_daemon="osd.2",instance="ceph:9283",job="ceph"}'
+ values: '1 1 1 1 1'
+ - series: 'ceph_osd_metadata{back_iface="eth0",ceph_daemon="osd.0",
+ ceph_version="ceph version 17.0.0-189-g3558fd72
+ (3558fd7291855971aa6481a2ade468ad61fbb346) pacific (dev)",
+ cluster_addr="172.20.0.2",device_class="hdd",front_iface="eth0",
+ hostname="ceph",instance="ceph:9283",job="ceph",objectstore="bluestore",
+ public_addr="172.20.0.2"}'
+ values: '1 1 1 1 1'
+ - series: 'ceph_osd_metadata{back_iface="eth0",ceph_daemon="osd.1",
+ ceph_version="ceph version 17.0.0-189-g3558fd72
+ (3558fd7291855971aa6481a2ade468ad61fbb346) pacific (dev)",
+ cluster_addr="172.20.0.2",device_class="hdd",front_iface="eth0",
+ hostname="ceph",instance="ceph:9283",job="ceph",objectstore="bluestore",
+ public_addr="172.20.0.2"}'
+ values: '1 1 1 1 1'
+ - series: 'ceph_osd_metadata{back_iface="eth0",ceph_daemon="osd.2",
+ ceph_version="ceph version 17.0.0-189-g3558fd72
+ (3558fd7291855971aa6481a2ade468ad61fbb346) pacific (dev)",
+ cluster_addr="172.20.0.2",device_class="hdd",front_iface="eth0",
+ hostname="ceph",instance="ceph:9283",job="ceph",objectstore="bluestore",
+ public_addr="172.20.0.2"}'
+ values: '1 1 1 1 1'
+ promql_expr_test:
+ - expr: count(ceph_osd_up == 0) / count(ceph_osd_up) * 100 >= 10
+ eval_time: 1m
+ exp_samples:
+ - labels: '{}'
+ value: 3.333333333333333E+01
+ alert_rule_test:
+ - eval_time: 1m
+ alertname: CephOSDDownHigh
+ exp_alerts:
+ - exp_labels:
+ oid: 1.3.6.1.4.1.50495.1.2.1.4.1
+ type: ceph_default
+ severity: critical
+ exp_annotations:
+ summary: More than 10% of OSDs are down
+ description: "33.33% or 1 of 3 OSDs are down (>= 10%). The following OSDs are down: - osd.1 on ceph"
+
+ # flapping OSD
+ - interval: 1s
+ input_series:
+ - series: 'ceph_osd_up{ceph_daemon="osd.0",instance="ceph:9283",job="ceph"}'
+ values: '1+1x100'
+ - series: 'ceph_osd_up{ceph_daemon="osd.1",instance="ceph:9283",job="ceph"}'
+ values: '1+0x100'
+ - series: 'ceph_osd_up{ceph_daemon="osd.2",instance="ceph:9283",job="ceph"}'
+ values: '1+0x100'
+ - series: 'ceph_osd_metadata{back_iface="eth0",ceph_daemon="osd.0",
+ ceph_version="ceph version 17.0.0-189-g3558fd72
+ (3558fd7291855971aa6481a2ade468ad61fbb346) pacific (dev)",
+ cluster_addr="172.20.0.2",device_class="hdd",front_iface="eth0",
+ hostname="ceph",instance="ceph:9283",job="ceph",objectstore="bluestore",
+ public_addr="172.20.0.2"}'
+ values: '1 1 1 1 1 1'
+ - series: 'ceph_osd_metadata{back_iface="eth0",ceph_daemon="osd.1",
+ ceph_version="ceph version 17.0.0-189-g3558fd72
+ (3558fd7291855971aa6481a2ade468ad61fbb346) pacific (dev)",
+ cluster_addr="172.20.0.2",device_class="hdd",front_iface="eth0",
+ hostname="ceph",instance="ceph:9283",job="ceph",objectstore="bluestore",
+ public_addr="172.20.0.2"}'
+ values: '1 1 1 1 1 1'
+ - series: 'ceph_osd_metadata{back_iface="eth0",ceph_daemon="osd.2",
+ ceph_version="ceph version 17.0.0-189-g3558fd72
+ (3558fd7291855971aa6481a2ade468ad61fbb346) pacific (dev)",
+ cluster_addr="172.20.0.2",device_class="hdd",front_iface="eth0",
+ hostname="ceph",instance="ceph:9283",job="ceph",objectstore="bluestore",
+ public_addr="172.20.0.2"}'
+ values: '1 1 1 1 1 1'
+ promql_expr_test:
+ - expr: |
+ (
+ rate(ceph_osd_up[5m])
+ * on(ceph_daemon) group_left(hostname) ceph_osd_metadata
+ ) * 60 > 1
+ eval_time: 1m
+ exp_samples:
+ - labels: '{ceph_daemon="osd.0", hostname="ceph", instance="ceph:9283",
+ job="ceph"}'
+ value: 1.2200000000000001E+01
+ alert_rule_test:
+ - eval_time: 5m
+ alertname: CephOSDFlapping
+ exp_alerts:
+ - exp_labels:
+ ceph_daemon: osd.0
+ hostname: ceph
+ instance: ceph:9283
+ job: ceph
+ oid: 1.3.6.1.4.1.50495.1.2.1.4.4
+ severity: warning
+ type: ceph_default
+ exp_annotations:
+ documentation: https://docs.ceph.com/en/latest/rados/troubleshooting/troubleshooting-osd#flapping-osds
+ summary: Network issues are causing OSDs to flap (mark each other down)
+ description: "OSD osd.0 on ceph was marked down and back up 20.1 times once a minute for 5 minutes. This may indicate a network issue (latency, packet loss, MTU mismatch) on the cluster network, or the public network if no cluster network is deployed. Check the network stats on the listed host(s)."
+
+ # high pg count deviation
+ - interval: 1m
+ input_series:
+ - series: 'ceph_osd_numpg{ceph_daemon="osd.0",instance="ceph:9283",
+ job="ceph"}'
+ values: '100 100 100 100 100 160'
+ - series: 'ceph_osd_numpg{ceph_daemon="osd.1",instance="ceph:9283",
+ job="ceph"}'
+ values: '100 100 100 100 100 320'
+ - series: 'ceph_osd_numpg{ceph_daemon="osd.2",instance="ceph:9283",
+ job="ceph"}'
+ values: '100 100 100 100 100 160'
+ - series: 'ceph_osd_numpg{ceph_daemon="osd.3",instance="ceph:9283",
+ job="ceph"}'
+ values: '100 100 100 100 100 160'
+ - series: 'ceph_osd_metadata{back_iface="eth0",ceph_daemon="osd.0",
+ ceph_version="ceph version 17.0.0-189-g3558fd72
+ (3558fd7291855971aa6481a2ade468ad61fbb346) pacific (dev)",
+ cluster_addr="172.20.0.2",device_class="hdd",front_iface="eth0",
+ hostname="ceph",instance="ceph:9283",job="ceph",objectstore="bluestore",
+ public_addr="172.20.0.2"}'
+ values: '1 1 1 1 1 1'
+ - series: 'ceph_osd_metadata{back_iface="eth0",ceph_daemon="osd.1",
+ ceph_version="ceph version 17.0.0-189-g3558fd72
+ (3558fd7291855971aa6481a2ade468ad61fbb346) pacific (dev)",
+ cluster_addr="172.20.0.2",device_class="hdd",front_iface="eth0",
+ hostname="ceph",instance="ceph:9283",job="ceph",objectstore="bluestore",
+ public_addr="172.20.0.2"}'
+ values: '1 1 1 1 1 1'
+ - series: 'ceph_osd_metadata{back_iface="eth0",ceph_daemon="osd.2",
+ ceph_version="ceph version 17.0.0-189-g3558fd72
+ (3558fd7291855971aa6481a2ade468ad61fbb346) pacific (dev)",
+ cluster_addr="172.20.0.2",device_class="hdd",front_iface="eth0",
+ hostname="ceph",instance="ceph:9283",job="ceph",objectstore="bluestore",
+ public_addr="172.20.0.2"}'
+ values: '1 1 1 1 1 1'
+ - series: 'ceph_osd_metadata{back_iface="eth0",ceph_daemon="osd.3",
+ ceph_version="ceph version 17.0.0-189-g3558fd72
+ (3558fd7291855971aa6481a2ade468ad61fbb346) pacific (dev)",
+ cluster_addr="172.20.0.2",device_class="hdd",front_iface="eth0",
+ hostname="ceph",instance="ceph:9283",job="ceph",objectstore="bluestore",
+ public_addr="172.20.0.2"}'
+ values: '1 1 1 1 1 1'
+ promql_expr_test:
+ - expr: |
+ abs(
+ (
+ (ceph_osd_numpg > 0) - on (job) group_left avg(ceph_osd_numpg > 0)
+ by (job)
+ ) / on (job) group_left avg(ceph_osd_numpg > 0) by (job)
+ ) * on(ceph_daemon) group_left(hostname) ceph_osd_metadata > 0.30
+
+ eval_time: 5m
+ exp_samples:
+ - labels: '{ceph_daemon="osd.1", hostname="ceph", instance="ceph:9283",
+ job="ceph"}'
+ value: 6E-01
+ alert_rule_test:
+ - eval_time: 10m
+ alertname: CephPGImbalance
+ exp_alerts:
+ - exp_labels:
+ ceph_daemon: osd.1
+ hostname: ceph
+ instance: ceph:9283
+ job: ceph
+ oid: 1.3.6.1.4.1.50495.1.2.1.4.5
+ severity: warning
+ type: ceph_default
+ exp_annotations:
+ summary: PGs are not balanced across OSDs
+ description: "OSD osd.1 on ceph deviates by more than 30% from average PG count."
+
+ # pgs inactive
+ - interval: 1m
+ input_series:
+ - series: 'ceph_pool_metadata{instance="ceph:9283",job="ceph",
+ name="device_health_metrics",pool_id="1"}'
+ values: '1 1 1 1 1 1 1 1'
+ - series: 'ceph_pool_metadata{instance="ceph:9283",job="ceph",
+ name="device_health_metrics",pool_id="2"}'
+ values: '1 1 1 1 1 1 1 1'
+ - series: 'ceph_pool_metadata{instance="ceph:9283",job="ceph",
+ name="device_health_metrics",pool_id="3"}'
+ values: '1 1 1 1 1 1 1 1'
+ - series: 'ceph_pg_total{instance="ceph:9283",job="ceph",pool_id="1"}'
+ values: '1 1 1 1 1 1 1 1'
+ - series: 'ceph_pg_total{instance="ceph:9283",job="ceph",pool_id="2"}'
+ values: '32 32 32 32 32 32 32 32'
+ - series: 'ceph_pg_total{instance="ceph:9283",job="ceph",pool_id="3"}'
+ values: '33 32 32 32 32 33 33 32'
+ - series: 'ceph_pg_active{instance="ceph:9283",job="ceph",pool_id="1"}'
+ values: '1 1 1 1 1 1 1 1 1'
+ - series: 'ceph_pg_active{instance="ceph:9283",job="ceph",pool_id="2"}'
+ values: '32 32 32 32 32 32 32 32'
+ - series: 'ceph_pg_active{instance="ceph:9283",job="ceph",pool_id="3"}'
+ values: '32 32 32 32 32 32 32 32'
+ promql_expr_test:
+ - expr: ceph_pool_metadata * on(pool_id,instance) group_left()
+ (ceph_pg_total - ceph_pg_active) > 0
+ eval_time: 5m
+ exp_samples:
+ - labels: '{instance="ceph:9283", job="ceph",
+ name="device_health_metrics",
+ pool_id="3"}'
+ value: 1
+ alert_rule_test:
+ - eval_time: 5m
+ alertname: CephPGsInactive
+ exp_alerts:
+ - exp_labels:
+ instance: ceph:9283
+ job: ceph
+ name: device_health_metrics
+ oid: 1.3.6.1.4.1.50495.1.2.1.7.1
+ pool_id: 3
+ severity: critical
+ type: ceph_default
+ exp_annotations:
+ summary: One or more placement groups are inactive
+ description: "1 PGs have been inactive for more than 5 minutes in pool device_health_metrics. Inactive placement groups are not able to serve read/write requests."
+
+ #pgs unclean
+ - interval: 1m
+ input_series:
+ - series: 'ceph_pool_metadata{instance="ceph:9283",job="ceph",
+ name="device_health_metrics",pool_id="1"}'
+ values: '1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1'
+ - series: 'ceph_pool_metadata{instance="ceph:9283",job="ceph",
+ name="device_health_metrics",pool_id="2"}'
+ values: '1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1'
+ - series: 'ceph_pool_metadata{instance="ceph:9283",job="ceph",
+ name="device_health_metrics",pool_id="3"}'
+ values: '1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1'
+ - series: 'ceph_pg_total{instance="ceph:9283",job="ceph",pool_id="1"}'
+ values: '1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1'
+ - series: 'ceph_pg_total{instance="ceph:9283",job="ceph",pool_id="2"}'
+ values: '32 32 32 32 32 32 32 32 32 32 32 32 32 32 32 32 32 32 32 32 32
+ 32 32 32'
+ - series: 'ceph_pg_total{instance="ceph:9283",job="ceph",pool_id="3"}'
+ values: '33 33 33 33 33 33 33 33 33 33 33 33 33 33 33 33 33 33 33 33 33
+ 33 33'
+ - series: 'ceph_pg_clean{instance="ceph:9283",job="ceph",pool_id="1"}'
+ values: '1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1'
+ - series: 'ceph_pg_clean{instance="ceph:9283",job="ceph",pool_id="2"}'
+ values: '32 32 32 32 32 32 32 32 32 32 32 32 32 32 32 32 32 32 32 32 32 32
+ 32 32'
+ - series: 'ceph_pg_clean{instance="ceph:9283",job="ceph",pool_id="3"}'
+ values: '32 32 32 32 32 32 32 32 32 32 32 32 32 32 32 32 32 32 32 32 32 32
+ 32 32'
+ promql_expr_test:
+ - expr: ceph_pool_metadata * on(pool_id,instance) group_left()
+ (ceph_pg_total - ceph_pg_clean) > 0
+ eval_time: 15m
+ exp_samples:
+ - labels: '{instance="ceph:9283", job="ceph",
+ name="device_health_metrics", pool_id="3"}'
+ value: 1
+ alert_rule_test:
+ - eval_time: 16m
+ alertname: CephPGsUnclean
+ exp_alerts:
+ - exp_labels:
+ instance: ceph:9283
+ job: ceph
+ name: device_health_metrics
+ oid: 1.3.6.1.4.1.50495.1.2.1.7.2
+ pool_id: 3
+ severity: warning
+ type: ceph_default
+ exp_annotations:
+ summary: One or more placement groups are marked unclean
+ description: "1 PGs have been unclean for more than 15 minutes in pool device_health_metrics. Unclean PGs have not recovered from a previous failure."
+
+ # root volume full
+ - interval: 1m
+ input_series:
+ - series: 'node_filesystem_avail_bytes{device="/dev/mapper/fedora_localhost
+ --live-home",fstype="ext4",instance="node-exporter",job="node-exporter",
+ mountpoint="/"}'
+ values: '35336400896 35336400896 35336400896 35336400896 35336400896
+ 3525385519.104 3533640089'
+ - series: 'node_filesystem_size_bytes{device="/dev/mapper/fedora_localhost
+ --live-home",fstype="ext4",instance="node-exporter",job="node-exporter",
+ mountpoint="/"}'
+ values: '73445531648 73445531648 73445531648 73445531648 73445531648
+ 73445531648 73445531648'
+ promql_expr_test:
+ - expr: node_filesystem_avail_bytes{mountpoint="/"} /
+ node_filesystem_size_bytes{mountpoint="/"} * 100 < 5
+ eval_time: 5m
+ exp_samples:
+ - labels: '{device="/dev/mapper/fedora_localhost --live-home",
+ fstype="ext4", instance="node-exporter", job="node-exporter",
+ mountpoint="/"}'
+ value: 4.8E+00
+ alert_rule_test:
+ - eval_time: 10m
+ alertname: CephNodeRootFilesystemFull
+ exp_alerts:
+ - exp_labels:
+ device: /dev/mapper/fedora_localhost --live-home
+ fstype: ext4
+ instance: node-exporter
+ job: node-exporter
+ mountpoint: /
+ oid: 1.3.6.1.4.1.50495.1.2.1.8.1
+ severity: critical
+ type: ceph_default
+ exp_annotations:
+ summary: Root filesystem is dangerously full
+ description: "Root volume is dangerously full: 4.811% free."
+
+ # network packets dropped
+ - interval: 1m
+ input_series:
+ - series: 'node_network_receive_drop_total{device="eth0",
+ instance="node-exporter",job="node-exporter"}'
+ values: '0+600x10'
+ - series: 'node_network_transmit_drop_total{device="eth0",
+ instance="node-exporter",job="node-exporter"}'
+ values: '0+600x10'
+ - series: 'node_network_receive_packets_total{device="eth0",
+ instance="node-exporter",job="node-exporter"}'
+ values: '0+750x10'
+ - series: 'node_network_transmit_packets_total{device="eth0",
+ instance="node-exporter",job="node-exporter"}'
+ values: '0+750x10'
+ promql_expr_test:
+ - expr: |
+ (
+ rate(node_network_receive_drop_total{device!="lo"}[1m]) +
+ rate(node_network_transmit_drop_total{device!="lo"}[1m])
+ ) / (
+ rate(node_network_receive_packets_total{device!="lo"}[1m]) +
+ rate(node_network_transmit_packets_total{device!="lo"}[1m])
+ ) >= 0.0050000000000000001 and (
+ rate(node_network_receive_drop_total{device!="lo"}[1m]) +
+ rate(node_network_transmit_drop_total{device!="lo"}[1m])
+ ) >= 10
+
+ eval_time: 5m
+ exp_samples:
+ - labels: '{device="eth0", instance="node-exporter",
+ job="node-exporter"}'
+ value: 8E-1
+ alert_rule_test:
+ - eval_time: 5m
+ alertname: CephNodeNetworkPacketDrops
+ exp_alerts:
+ - exp_labels:
+ device: eth0
+ instance: node-exporter
+ job: node-exporter
+ oid: 1.3.6.1.4.1.50495.1.2.1.8.2
+ severity: warning
+ type: ceph_default
+ exp_annotations:
+ summary: One or more NICs reports packet drops
+ description: "Node node-exporter experiences packet drop > 0.5% or > 10 packets/s on interface eth0."
+
+ # network packets errors
+ - interval: 1m
+ input_series:
+ - series: 'node_network_receive_errs_total{device="eth0",
+ instance="node-exporter",job="node-exporter"}'
+ values: '0+600x10'
+ - series: 'node_network_transmit_errs_total{device="eth0",
+ instance="node-exporter",job="node-exporter"}'
+ values: '0+600x10'
+ - series: 'node_network_transmit_packets_total{device="eth0",
+ instance="node-exporter",job="node-exporter"}'
+ values: '0+750x10'
+ - series: 'node_network_receive_packets_total{device="eth0",
+ instance="node-exporter",job="node-exporter"}'
+ values: '0+750x10'
+ promql_expr_test:
+ - expr: |
+ (
+ rate(node_network_receive_errs_total{device!="lo"}[1m]) +
+ rate(node_network_transmit_errs_total{device!="lo"}[1m])
+ ) / (
+ rate(node_network_receive_packets_total{device!="lo"}[1m]) +
+ rate(node_network_transmit_packets_total{device!="lo"}[1m])
+ ) >= 0.0001 or (
+ rate(node_network_receive_errs_total{device!="lo"}[1m]) +
+ rate(node_network_transmit_errs_total{device!="lo"}[1m])
+ ) >= 10
+
+ eval_time: 5m
+ exp_samples:
+ - labels: '{device="eth0", instance="node-exporter",
+ job="node-exporter"}'
+ value: 8E-01
+ alert_rule_test:
+ - eval_time: 5m
+ alertname: CephNodeNetworkPacketErrors
+ exp_alerts:
+ - exp_labels:
+ device: eth0
+ instance: node-exporter
+ job: node-exporter
+ oid: 1.3.6.1.4.1.50495.1.2.1.8.3
+ severity: warning
+ type: ceph_default
+ exp_annotations:
+ summary: One or more NICs reports packet errors
+ description: "Node node-exporter experiences packet errors > 0.01% or > 10 packets/s on interface eth0."
+
+# Node Storage disk space filling up
+ - interval: 1m
+ # 20GB = 21474836480, 256MB = 268435456
+ input_series:
+ - series: 'node_filesystem_free_bytes{device="/dev/mapper/vg-root",
+ fstype="xfs",instance="node-1",mountpoint="/rootfs"}'
+ values: '21474836480-268435456x48'
+ - series: 'node_filesystem_free_bytes{device="/dev/mapper/vg-root",
+ fstype="xfs",instance="node-2",mountpoint="/rootfs"}'
+ values: '21474836480+0x48'
+ - series: 'node_uname_info{instance="node-1", nodename="node-1.unittests.com"}'
+ values: 1+0x48
+ - series: 'node_uname_info{instance="node-2", nodename="node-2.unittests.com"}'
+ values: 1+0x48
+ promql_expr_test:
+ - expr: |
+ predict_linear(node_filesystem_free_bytes{device=~"/.*"}[2d], 3600 * 24 * 5) *
+ on(instance) group_left(nodename) node_uname_info < 0
+ eval_time: 5m
+ exp_samples:
+ - labels: '{device="/dev/mapper/vg-root",instance="node-1",fstype="xfs",
+ mountpoint="/rootfs",nodename="node-1.unittests.com"}'
+ value: -1.912602624E+12
+ alert_rule_test:
+ - eval_time: 5m
+ alertname: CephNodeDiskspaceWarning
+ exp_alerts:
+ - exp_labels:
+ severity: warning
+ type: ceph_default
+ oid: 1.3.6.1.4.1.50495.1.2.1.8.4
+ device: /dev/mapper/vg-root
+ fstype: xfs
+ instance: node-1
+ mountpoint: /rootfs
+ nodename: node-1.unittests.com
+ exp_annotations:
+ summary: Host filesystem free space is getting low
+ description: "Mountpoint /rootfs on node-1.unittests.com will be full in less than 5 days based on the 48 hour trailing fill rate."
+ # MTU Mismatch
+ - interval: 1m
+ input_series:
+ - series: 'node_network_mtu_bytes{device="eth0",instance="node-exporter",
+ job="node-exporter"}'
+ values: '1500 1500 1500 1500 1500'
+ - series: 'node_network_mtu_bytes{device="eth1",instance="node-exporter",
+ job="node-exporter"}'
+ values: '1500 1500 1500 1500 1500'
+ - series: 'node_network_mtu_bytes{device="eth2",instance="node-exporter",
+ job="node-exporter"}'
+ values: '1500 1500 1500 1500 1500'
+ - series: 'node_network_mtu_bytes{device="eth3",instance="node-exporter",
+ job="node-exporter"}'
+ values: '1500 1500 1500 1500 1500'
+ - series: 'node_network_mtu_bytes{device="eth4",instance="node-exporter",
+ job="node-exporter"}'
+ values: '9000 9000 9000 9000 9000'
+ - series: 'node_network_mtu_bytes{device="eth4",instance="hostname1",
+ job="node-exporter"}'
+ values: '2200 2200 2200 2200 2200'
+ - series: 'node_network_mtu_bytes{device="eth4",instance="hostname2",
+ job="node-exporter"}'
+ values: '2400 2400 2400 2400 2400'
+ - series: 'node_network_up{device="eth0",instance="node-exporter",
+ job="node-exporter"}'
+ values: '0 0 0 0 0'
+ - series: 'node_network_up{device="eth1",instance="node-exporter",
+ job="node-exporter"}'
+ values: '0 0 0 0 0'
+ - series: 'node_network_up{device="eth2",instance="node-exporter",
+ job="node-exporter"}'
+ values: '1 1 1 1 1'
+ - series: 'node_network_up{device="eth3",instance="node-exporter",
+ job="node-exporter"}'
+ values: '1 1 1 1 1'
+ - series: 'node_network_up{device="eth4",instance="node-exporter",
+ job="node-exporter"}'
+ values: '1 1 1 1 1'
+ - series: 'node_network_up{device="eth4",instance="hostname1",
+ job="node-exporter"}'
+ values: '1 1 1 1 1'
+ - series: 'node_network_up{device="eth4",instance="hostname2",
+ job="node-exporter"}'
+ values: '0 0 0 0 0'
+ promql_expr_test:
+ - expr: |
+ node_network_mtu_bytes * (node_network_up{device!="lo"} > 0) ==
+ scalar(
+ max by (device) (node_network_mtu_bytes * (node_network_up{device!="lo"} > 0)) !=
+ quantile by (device) (.5, node_network_mtu_bytes * (node_network_up{device!="lo"} > 0))
+ )
+ or
+ node_network_mtu_bytes * (node_network_up{device!="lo"} > 0) ==
+ scalar(
+ min by (device) (node_network_mtu_bytes * (node_network_up{device!="lo"} > 0)) !=
+ quantile by (device) (.5, node_network_mtu_bytes * (node_network_up{device!="lo"} > 0))
+ )
+ eval_time: 1m
+ exp_samples:
+ - labels: '{device="eth4", instance="node-exporter", job="node-exporter"}'
+ value: 9000
+ - labels: '{device="eth4", instance="hostname1", job="node-exporter"}'
+ value: 2200
+ alert_rule_test:
+ - eval_time: 1m
+ alertname: CephNodeInconsistentMTU
+ exp_alerts:
+ - exp_labels:
+ device: eth4
+ instance: hostname1
+ job: node-exporter
+ severity: warning
+ type: ceph_default
+ exp_annotations:
+ summary: MTU settings across Ceph hosts are inconsistent
+ description: "Node hostname1 has a different MTU size (2200) than the median of devices named eth4."
+ - exp_labels:
+ device: eth4
+ instance: node-exporter
+ job: node-exporter
+ severity: warning
+ type: ceph_default
+ exp_annotations:
+ summary: MTU settings across Ceph hosts are inconsistent
+ description: "Node node-exporter has a different MTU size (9000) than the median of devices named eth4."
+
+ # pool full, data series has 6 but using topk(5) so to ensure the
+ # results are working as expected
+ - interval: 1m
+ input_series:
+ - series: 'ceph_health_detail{name="POOL_FULL"}'
+ values: '0 0 0 1 1 1 1 1 1 1 1'
+ - series: 'ceph_pool_percent_used{pool_id="1"}'
+ values: '32+0x10'
+ - series: 'ceph_pool_percent_used{pool_id="2"}'
+ values: '96+0x10'
+ - series: 'ceph_pool_percent_used{pool_id="3"}'
+ values: '90+0x10'
+ - series: 'ceph_pool_percent_used{pool_id="4"}'
+ values: '72+0x10'
+ - series: 'ceph_pool_percent_used{pool_id="5"}'
+ values: '19+0x10'
+ - series: 'ceph_pool_percent_used{pool_id="6"}'
+ values: '10+0x10'
+ - series: 'ceph_pool_metadata{instance="ceph:9283",job="ceph",
+ name="cephfs_data",pool_id="1"}'
+ values: '1 1 1 1 1 1 1 1 1'
+ - series: 'ceph_pool_metadata{instance="ceph:9283",job="ceph",
+ name="rbd",pool_id="2"}'
+ values: '1 1 1 1 1 1 1 1 1'
+ - series: 'ceph_pool_metadata{instance="ceph:9283",job="ceph",
+ name="iscsi",pool_id="3"}'
+ values: '1 1 1 1 1 1 1 1 1'
+ - series: 'ceph_pool_metadata{instance="ceph:9283",job="ceph",
+ name="default.rgw.index",pool_id="4"}'
+ values: '1 1 1 1 1 1 1 1 1'
+ - series: 'ceph_pool_metadata{instance="ceph:9283",job="ceph",
+ name="default.rgw.log",pool_id="5"}'
+ values: '1 1 1 1 1 1 1 1 1'
+ - series: 'ceph_pool_metadata{instance="ceph:9283",job="ceph",
+ name="dummy",pool_id="6"}'
+ values: '1 1 1 1 1 1 1 1 1'
+ promql_expr_test:
+ - expr: ceph_health_detail{name="POOL_FULL"} > 0
+ eval_time: 5m
+ exp_samples:
+ - labels: '{__name__="ceph_health_detail", name="POOL_FULL"}'
+ value: 1
+ alert_rule_test:
+ - eval_time: 1m
+ alertname: CephPoolFull
+ - eval_time: 10m
+ alertname: CephPoolFull
+ exp_alerts:
+ - exp_labels:
+ name: POOL_FULL
+ severity: critical
+ type: ceph_default
+ oid: 1.3.6.1.4.1.50495.1.2.1.9.1
+ exp_annotations:
+ documentation: https://docs.ceph.com/en/latest/rados/operations/health-checks#pool-full
+ summary: Pool is full - writes are blocked
+ description: "A pool has reached its MAX quota, or OSDs supporting the pool have reached the FULL threshold. Until this is resolved, writes to the pool will be blocked. Pool Breakdown (top 5) - rbd at 96% - iscsi at 90% - default.rgw.index at 72% - cephfs_data at 32% - default.rgw.log at 19% Increase the pool's quota, or add capacity to the cluster first then increase the pool's quota (e.g. ceph osd pool set quota <pool_name> max_bytes <bytes>)"
+ # slow OSD ops
+ - interval : 1m
+ input_series:
+ - series: 'ceph_healthcheck_slow_ops{instance="ceph:9283",job="ceph"}'
+ values: '1+0x120'
+ promql_expr_test:
+ - expr: ceph_healthcheck_slow_ops > 0
+ eval_time: 1m
+ exp_samples:
+ - labels: '{__name__="ceph_healthcheck_slow_ops", instance="ceph:9283",
+ job="ceph"}'
+ value: 1
+ alert_rule_test:
+ - eval_time: 20m
+ alertname: CephSlowOps
+ exp_alerts:
+ - exp_labels:
+ instance: ceph:9283
+ job: ceph
+ severity: warning
+ type: ceph_default
+ exp_annotations:
+ documentation: https://docs.ceph.com/en/latest/rados/operations/health-checks#slow-ops
+ summary: OSD operations are slow to complete
+ description: "1 OSD requests are taking too long to process (osd_op_complaint_time exceeded)"
+
+# CEPHADM orchestrator alert triggers
+ - interval: 30s
+ input_series:
+ - series: 'ceph_health_detail{name="UPGRADE_EXCEPTION"}'
+ values: '1+0x40'
+ promql_expr_test:
+ - expr: ceph_health_detail{name="UPGRADE_EXCEPTION"} > 0
+ eval_time: 2m
+ exp_samples:
+ - labels: '{__name__="ceph_health_detail", name="UPGRADE_EXCEPTION"}'
+ value: 1
+ alert_rule_test:
+ - eval_time: 1m
+ alertname: CephadmUpgradeFailed
+ - eval_time: 5m
+ alertname: CephadmUpgradeFailed
+ exp_alerts:
+ - exp_labels:
+ name: UPGRADE_EXCEPTION
+ severity: critical
+ type: ceph_default
+ oid: 1.3.6.1.4.1.50495.1.2.1.11.2
+ exp_annotations:
+ summary: Ceph version upgrade has failed
+ description: "The cephadm cluster upgrade process has failed. The cluster remains in an undetermined state. Please review the cephadm logs, to understand the nature of the issue"
+ - interval: 30s
+ input_series:
+ - series: 'ceph_health_detail{name="CEPHADM_FAILED_DAEMON"}'
+ values: '1+0x40'
+ promql_expr_test:
+ - expr: ceph_health_detail{name="CEPHADM_FAILED_DAEMON"} > 0
+ eval_time: 2m
+ exp_samples:
+ - labels: '{__name__="ceph_health_detail", name="CEPHADM_FAILED_DAEMON"}'
+ value: 1
+ alert_rule_test:
+ - eval_time: 1m
+ alertname: CephadmDaemonFailed
+ - eval_time: 5m
+ alertname: CephadmDaemonFailed
+ exp_alerts:
+ - exp_labels:
+ name: CEPHADM_FAILED_DAEMON
+ severity: critical
+ type: ceph_default
+ oid: 1.3.6.1.4.1.50495.1.2.1.11.1
+ exp_annotations:
+ summary: A ceph daemon manged by cephadm is down
+ description: "A daemon managed by cephadm is no longer active. Determine, which daemon is down with 'ceph health detail'. you may start daemons with the 'ceph orch daemon start <daemon_id>'"
+ - interval: 1m
+ input_series:
+ - series: 'ceph_health_detail{name="CEPHADM_PAUSED"}'
+ values: '1 1 1 1 1 1 1 1 1'
+ promql_expr_test:
+ - expr: ceph_health_detail{name="CEPHADM_PAUSED"} > 0
+ eval_time: 2m
+ exp_samples:
+ - labels: '{__name__="ceph_health_detail", name="CEPHADM_PAUSED"}'
+ value: 1
+ alert_rule_test:
+ - eval_time: 1m
+ alertname: CephadmPaused
+ - eval_time: 5m
+ alertname: CephadmPaused
+ exp_alerts:
+ - exp_labels:
+ name: CEPHADM_PAUSED
+ severity: warning
+ type: ceph_default
+ exp_annotations:
+ documentation: https://docs.ceph.com/en/latest/cephadm/operations#cephadm-paused
+ summary: Orchestration tasks via cephadm are PAUSED
+ description: "Cluster management has been paused manually. This will prevent the orchestrator from service management and reconciliation. If this is not intentional, resume cephadm operations with 'ceph orch resume'"
+# MDS
+ - interval: 1m
+ input_series:
+ - series: 'ceph_health_detail{name="MDS_DAMAGE"}'
+ values: '1 1 1 1 1 1 1 1 1'
+ promql_expr_test:
+ - expr: ceph_health_detail{name="MDS_DAMAGE"} > 0
+ eval_time: 2m
+ exp_samples:
+ - labels: '{__name__="ceph_health_detail", name="MDS_DAMAGE"}'
+ value: 1
+ alert_rule_test:
+ - eval_time: 1m
+ alertname: CephFilesystemDamaged
+ - eval_time: 5m
+ alertname: CephFilesystemDamaged
+ exp_alerts:
+ - exp_labels:
+ name: MDS_DAMAGE
+ severity: critical
+ type: ceph_default
+ oid: 1.3.6.1.4.1.50495.1.2.1.5.1
+ exp_annotations:
+ documentation: https://docs.ceph.com/en/latest/cephfs/health-messages#cephfs-health-messages
+ summary: CephFS filesystem is damaged.
+ description: "Filesystem metadata has been corrupted. Data may be inaccessible. Analyze metrics from the MDS daemon admin socket, or escalate to support."
+ - interval: 1m
+ input_series:
+ - series: 'ceph_health_detail{name="MDS_HEALTH_READ_ONLY"}'
+ values: '1 1 1 1 1 1 1 1 1'
+ promql_expr_test:
+ - expr: ceph_health_detail{name="MDS_HEALTH_READ_ONLY"} > 0
+ eval_time: 2m
+ exp_samples:
+ - labels: '{__name__="ceph_health_detail", name="MDS_HEALTH_READ_ONLY"}'
+ value: 1
+ alert_rule_test:
+ - eval_time: 1m
+ alertname: CephFilesystemReadOnly
+ - eval_time: 5m
+ alertname: CephFilesystemReadOnly
+ exp_alerts:
+ - exp_labels:
+ name: MDS_HEALTH_READ_ONLY
+ severity: critical
+ type: ceph_default
+ oid: 1.3.6.1.4.1.50495.1.2.1.5.2
+ exp_annotations:
+ documentation: https://docs.ceph.com/en/latest/cephfs/health-messages#cephfs-health-messages
+ summary: CephFS filesystem in read only mode due to write error(s)
+ description: "The filesystem has switched to READ ONLY due to an unexpected error when writing to the metadata pool. Either analyze the output from the MDS daemon admin socket, or escalate to support."
+ - interval: 1m
+ input_series:
+ - series: 'ceph_health_detail{name="MDS_ALL_DOWN"}'
+ values: '0 0 1 1 1 1 1 1 1 1 1'
+ promql_expr_test:
+ - expr: ceph_health_detail{name="MDS_ALL_DOWN"} > 0
+ eval_time: 2m
+ exp_samples:
+ - labels: '{__name__="ceph_health_detail", name="MDS_ALL_DOWN"}'
+ value: 1
+ alert_rule_test:
+ - eval_time: 1m
+ alertname: CephFilesystemOffline
+ - eval_time: 10m
+ alertname: CephFilesystemOffline
+ exp_alerts:
+ - exp_labels:
+ name: MDS_ALL_DOWN
+ severity: critical
+ type: ceph_default
+ oid: 1.3.6.1.4.1.50495.1.2.1.5.3
+ exp_annotations:
+ documentation: https://docs.ceph.com/en/latest/cephfs/health-messages/#mds-all-down
+ summary: CephFS filesystem is offline
+ description: "All MDS ranks are unavailable. The MDS daemons managing metadata are down, rendering the filesystem offline."
+ - interval: 1m
+ input_series:
+ - series: 'ceph_health_detail{name="FS_DEGRADED"}'
+ values: '0 0 1 1 1 1 1 1 1 1 1'
+ promql_expr_test:
+ - expr: ceph_health_detail{name="FS_DEGRADED"} > 0
+ eval_time: 2m
+ exp_samples:
+ - labels: '{__name__="ceph_health_detail", name="FS_DEGRADED"}'
+ value: 1
+ alert_rule_test:
+ - eval_time: 1m
+ alertname: CephFilesystemDegraded
+ - eval_time: 10m
+ alertname: CephFilesystemDegraded
+ exp_alerts:
+ - exp_labels:
+ name: FS_DEGRADED
+ severity: critical
+ type: ceph_default
+ oid: 1.3.6.1.4.1.50495.1.2.1.5.4
+ exp_annotations:
+ documentation: https://docs.ceph.com/en/latest/cephfs/health-messages/#fs-degraded
+ summary: CephFS filesystem is degraded
+ description: "One or more metadata daemons (MDS ranks) are failed or in a damaged state. At best the filesystem is partially available, at worst the filesystem is completely unusable."
+ - interval: 1m
+ input_series:
+ - series: 'ceph_health_detail{name="MDS_INSUFFICIENT_STANDBY"}'
+ values: '0 0 1 1 1 1 1 1 1 1 1'
+ promql_expr_test:
+ - expr: ceph_health_detail{name="MDS_INSUFFICIENT_STANDBY"} > 0
+ eval_time: 2m
+ exp_samples:
+ - labels: '{__name__="ceph_health_detail", name="MDS_INSUFFICIENT_STANDBY"}'
+ value: 1
+ alert_rule_test:
+ - eval_time: 1m
+ alertname: CephFilesystemInsufficientStandby
+ - eval_time: 10m
+ alertname: CephFilesystemInsufficientStandby
+ exp_alerts:
+ - exp_labels:
+ name: MDS_INSUFFICIENT_STANDBY
+ severity: warning
+ type: ceph_default
+ exp_annotations:
+ documentation: https://docs.ceph.com/en/latest/cephfs/health-messages/#mds-insufficient-standby
+ summary: Ceph filesystem standby daemons too few
+ description: "The minimum number of standby daemons required by standby_count_wanted is less than the current number of standby daemons. Adjust the standby count or increase the number of MDS daemons."
+ - interval: 1m
+ input_series:
+ - series: 'ceph_health_detail{name="FS_WITH_FAILED_MDS"}'
+ values: '0 0 1 1 1 1 1 1 1 1 1'
+ promql_expr_test:
+ - expr: ceph_health_detail{name="FS_WITH_FAILED_MDS"} > 0
+ eval_time: 2m
+ exp_samples:
+ - labels: '{__name__="ceph_health_detail", name="FS_WITH_FAILED_MDS"}'
+ value: 1
+ alert_rule_test:
+ - eval_time: 1m
+ alertname: CephFilesystemFailureNoStandby
+ - eval_time: 10m
+ alertname: CephFilesystemFailureNoStandby
+ exp_alerts:
+ - exp_labels:
+ name: FS_WITH_FAILED_MDS
+ severity: critical
+ type: ceph_default
+ oid: 1.3.6.1.4.1.50495.1.2.1.5.5
+ exp_annotations:
+ documentation: https://docs.ceph.com/en/latest/cephfs/health-messages/#fs-with-failed-mds
+ summary: MDS daemon failed, no further standby available
+ description: "An MDS daemon has failed, leaving only one active rank and no available standby. Investigate the cause of the failure or add a standby MDS."
+ - interval: 1m
+ input_series:
+ - series: 'ceph_health_detail{name="MDS_UP_LESS_THAN_MAX"}'
+ values: '0 0 1 1 1 1 1 1 1 1 1'
+ promql_expr_test:
+ - expr: ceph_health_detail{name="MDS_UP_LESS_THAN_MAX"} > 0
+ eval_time: 2m
+ exp_samples:
+ - labels: '{__name__="ceph_health_detail", name="MDS_UP_LESS_THAN_MAX"}'
+ value: 1
+ alert_rule_test:
+ - eval_time: 1m
+ alertname: CephFilesystemMDSRanksLow
+ - eval_time: 10m
+ alertname: CephFilesystemMDSRanksLow
+ exp_alerts:
+ - exp_labels:
+ name: MDS_UP_LESS_THAN_MAX
+ severity: warning
+ type: ceph_default
+ exp_annotations:
+ documentation: https://docs.ceph.com/en/latest/cephfs/health-messages/#mds-up-less-than-max
+ summary: Ceph MDS daemon count is lower than configured
+ description: "The filesystem's 'max_mds' setting defines the number of MDS ranks in the filesystem. The current number of active MDS daemons is less than this value."
+# MGR
+ - interval: 1m
+ input_series:
+ - series: 'up{job="ceph", instance="ceph-mgr:9283"}'
+ values: '1+0x2 0+0x10'
+ promql_expr_test:
+ - expr: up{job="ceph"} == 0
+ eval_time: 3m
+ exp_samples:
+ - labels: '{__name__="up", job="ceph", instance="ceph-mgr:9283"}'
+ value: 0
+ alert_rule_test:
+ - eval_time: 1m
+ alertname: CephMgrPrometheusModuleInactive
+ - eval_time: 10m
+ alertname: CephMgrPrometheusModuleInactive
+ exp_alerts:
+ - exp_labels:
+ instance: ceph-mgr:9283
+ job: ceph
+ severity: critical
+ type: ceph_default
+ oid: 1.3.6.1.4.1.50495.1.2.1.6.2
+ exp_annotations:
+ summary: The mgr/prometheus module is not available
+ description: "The mgr/prometheus module at ceph-mgr:9283 is unreachable. This could mean that the module has been disabled or the mgr daemon itself is down. Without the mgr/prometheus module metrics and alerts will no longer function. Open a shell to an admin node or toolbox pod and use 'ceph -s' to to determine whether the mgr is active. If the mgr is not active, restart it, otherwise you can determine module status with 'ceph mgr module ls'. If it is not listed as enabled, enable it with 'ceph mgr module enable prometheus'."
+ - interval: 1m
+ input_series:
+ - series: 'ceph_health_detail{name="RECENT_MGR_MODULE_CRASH"}'
+ values: '0+0x2 1+0x20'
+ promql_expr_test:
+ - expr: ceph_health_detail{name="RECENT_MGR_MODULE_CRASH"} == 1
+ eval_time: 3m
+ exp_samples:
+ - labels: '{__name__="ceph_health_detail", name="RECENT_MGR_MODULE_CRASH"}'
+ value: 1
+ alert_rule_test:
+ - eval_time: 1m
+ alertname: CephMgrModuleCrash
+ - eval_time: 15m
+ alertname: CephMgrModuleCrash
+ exp_alerts:
+ - exp_labels:
+ name: RECENT_MGR_MODULE_CRASH
+ severity: critical
+ type: ceph_default
+ oid: 1.3.6.1.4.1.50495.1.2.1.6.1
+ exp_annotations:
+ documentation: https://docs.ceph.com/en/latest/rados/operations/health-checks#recent-mgr-module-crash
+ summary: A manager module has recently crashed
+ description: "One or more mgr modules have crashed and have yet to be acknowledged by an administrator. A crashed module may impact functionality within the cluster. Use the 'ceph crash' command to determine which module has failed, and archive it to acknowledge the failure."
+# MON
+ - interval: 1m
+ input_series:
+ - series: 'ceph_health_detail{name="MON_DISK_CRIT"}'
+ values: '0+0x2 1+0x10'
+ - series: 'ceph_mon_metadata{ceph_daemon="mon.a", hostname="ceph-mon-a"}'
+ values: '1+0x13'
+ promql_expr_test:
+ - expr: ceph_health_detail{name="MON_DISK_CRIT"} == 1
+ eval_time: 3m
+ exp_samples:
+ - labels: '{__name__="ceph_health_detail", name="MON_DISK_CRIT"}'
+ value: 1
+ alert_rule_test:
+ - eval_time: 1m
+ alertname: CephMonDiskspaceCritical
+ - eval_time: 10m
+ alertname: CephMonDiskspaceCritical
+ exp_alerts:
+ - exp_labels:
+ name: "MON_DISK_CRIT"
+ severity: critical
+ type: ceph_default
+ oid: 1.3.6.1.4.1.50495.1.2.1.3.2
+ exp_annotations:
+ documentation: https://docs.ceph.com/en/latest/rados/operations/health-checks#mon-disk-crit
+ summary: Filesystem space on at least one monitor is critically low
+ description: "The free space available to a monitor's store is critically low. You should increase the space available to the monitor(s). The default directory is /var/lib/ceph/mon-*/data/store.db on traditional deployments, and /var/lib/rook/mon-*/data/store.db on the mon pod's worker node for Rook. Look for old, rotated versions of *.log and MANIFEST*. Do NOT touch any *.sst files. Also check any other directories under /var/lib/rook and other directories on the same filesystem, often /var/log and /var/tmp are culprits. Your monitor hosts are; - ceph-mon-a"
+ - interval: 1m
+ input_series:
+ - series: 'ceph_health_detail{name="MON_DISK_LOW"}'
+ values: '0+0x2 1+0x10'
+ - series: 'ceph_mon_metadata{ceph_daemon="mon.a", hostname="ceph-mon-a"}'
+ values: '1+0x13'
+ promql_expr_test:
+ - expr: ceph_health_detail{name="MON_DISK_LOW"} == 1
+ eval_time: 3m
+ exp_samples:
+ - labels: '{__name__="ceph_health_detail", name="MON_DISK_LOW"}'
+ value: 1
+ alert_rule_test:
+ - eval_time: 1m
+ alertname: CephMonDiskspaceLow
+ - eval_time: 10m
+ alertname: CephMonDiskspaceLow
+ exp_alerts:
+ - exp_labels:
+ name: "MON_DISK_LOW"
+ severity: warning
+ type: ceph_default
+ exp_annotations:
+ documentation: https://docs.ceph.com/en/latest/rados/operations/health-checks#mon-disk-low
+ summary: Drive space on at least one monitor is approaching full
+ description: "The space available to a monitor's store is approaching full (>70% is the default). You should increase the space available to the monitor(s). The default directory is /var/lib/ceph/mon-*/data/store.db on traditional deployments, and /var/lib/rook/mon-*/data/store.db on the mon pod's worker node for Rook. Look for old, rotated versions of *.log and MANIFEST*. Do NOT touch any *.sst files. Also check any other directories under /var/lib/rook and other directories on the same filesystem, often /var/log and /var/tmp are culprits. Your monitor hosts are; - ceph-mon-a"
+ - interval: 1m
+ input_series:
+ - series: 'ceph_health_detail{name="MON_CLOCK_SKEW"}'
+ values: '0+0x2 1+0x10'
+ promql_expr_test:
+ - expr: ceph_health_detail{name="MON_CLOCK_SKEW"} == 1
+ eval_time: 3m
+ exp_samples:
+ - labels: '{__name__="ceph_health_detail", name="MON_CLOCK_SKEW"}'
+ value: 1
+ alert_rule_test:
+ - eval_time: 1m
+ alertname: CephMonClockSkew
+ - eval_time: 10m
+ alertname: CephMonClockSkew
+ exp_alerts:
+ - exp_labels:
+ name: "MON_CLOCK_SKEW"
+ severity: warning
+ type: ceph_default
+ exp_annotations:
+ documentation: https://docs.ceph.com/en/latest/rados/operations/health-checks#mon-clock-skew
+ summary: Clock skew detected among monitors
+ description: "Ceph monitors rely on closely synchronized time to maintain quorum and cluster consistency. This event indicates that the time on at least one mon has drifted too far from the lead mon. Review cluster status with ceph -s. This will show which monitors are affected. Check the time sync status on each monitor host with 'ceph time-sync-status' and the state and peers of your ntpd or chrony daemon."
+
+# Check 3 mons one down, quorum at risk
+ - interval: 1m
+ input_series:
+ - series: 'ceph_health_detail{name="MON_DOWN"}'
+ values: '0+0x2 1+0x12'
+ - series: 'ceph_mon_quorum_status{ceph_daemon="mon.a"}'
+ values: '1+0x14'
+ - series: 'ceph_mon_quorum_status{ceph_daemon="mon.b"}'
+ values: '1+0x14'
+ - series: 'ceph_mon_quorum_status{ceph_daemon="mon.c"}'
+ values: '1+0x2 0+0x12'
+ - series: 'ceph_mon_metadata{ceph_daemon="mon.a", hostname="ceph-mon-1"}'
+ values: '1+0x14'
+ - series: 'ceph_mon_metadata{ceph_daemon="mon.b", hostname="ceph-mon-2"}'
+ values: '1+0x14'
+ - series: 'ceph_mon_metadata{ceph_daemon="mon.c", hostname="ceph-mon-3"}'
+ values: '1+0x14'
+ promql_expr_test:
+ - expr: ((ceph_health_detail{name="MON_DOWN"} == 1) * on() (count(ceph_mon_quorum_status == 1) == bool (floor(count(ceph_mon_metadata) / 2) + 1))) == 1
+ eval_time: 3m
+ exp_samples:
+ - labels: '{}'
+ value: 1
+ alert_rule_test:
+ - eval_time: 1m
+ alertname: CephMonDownQuorumAtRisk
+ # shouldn't fire
+ - eval_time: 10m
+ alertname: CephMonDownQuorumAtRisk
+ exp_alerts:
+ - exp_labels:
+ severity: critical
+ type: ceph_default
+ oid: 1.3.6.1.4.1.50495.1.2.1.3.1
+ exp_annotations:
+ documentation: https://docs.ceph.com/en/latest/rados/operations/health-checks#mon-down
+ summary: Monitor quorum is at risk
+ description: "Quorum requires a majority of monitors (x 2) to be active. Without quorum the cluster will become inoperable, affecting all services and connected clients. The following monitors are down: - mon.c on ceph-mon-3"
+# check 5 mons, 1 down - warning only
+ - interval: 1m
+ input_series:
+ - series: 'ceph_mon_quorum_status{ceph_daemon="mon.a"}'
+ values: '1+0x14'
+ - series: 'ceph_mon_quorum_status{ceph_daemon="mon.b"}'
+ values: '1+0x14'
+ - series: 'ceph_mon_quorum_status{ceph_daemon="mon.c"}'
+ values: '1+0x14'
+ - series: 'ceph_mon_quorum_status{ceph_daemon="mon.d"}'
+ values: '1+0x14'
+ - series: 'ceph_mon_quorum_status{ceph_daemon="mon.e"}'
+ values: '1+0x2 0+0x12'
+ - series: 'ceph_mon_metadata{ceph_daemon="mon.a", hostname="ceph-mon-1"}'
+ values: '1+0x14'
+ - series: 'ceph_mon_metadata{ceph_daemon="mon.b", hostname="ceph-mon-2"}'
+ values: '1+0x14'
+ - series: 'ceph_mon_metadata{ceph_daemon="mon.c", hostname="ceph-mon-3"}'
+ values: '1+0x14'
+ - series: 'ceph_mon_metadata{ceph_daemon="mon.d", hostname="ceph-mon-4"}'
+ values: '1+0x14'
+ - series: 'ceph_mon_metadata{ceph_daemon="mon.e", hostname="ceph-mon-5"}'
+ values: '1+0x14'
+ promql_expr_test:
+ - expr: (count(ceph_mon_quorum_status == 0) <= (count(ceph_mon_metadata) - floor(count(ceph_mon_metadata) / 2) + 1))
+ eval_time: 3m
+ exp_samples:
+ - labels: '{}'
+ value: 1
+ alert_rule_test:
+ - eval_time: 1m
+ alertname: CephMonDown
+ - eval_time: 10m
+ alertname: CephMonDown
+ exp_alerts:
+ - exp_labels:
+ severity: warning
+ type: ceph_default
+ exp_annotations:
+ documentation: https://docs.ceph.com/en/latest/rados/operations/health-checks#mon-down
+ summary: One or more monitors down
+ description: "You have 1 monitor down. Quorum is still intact, but the loss of an additional monitor will make your cluster inoperable. The following monitors are down: - mon.e on ceph-mon-5\n"
+# Device Health
+ - interval: 1m
+ input_series:
+ - series: 'ceph_health_detail{name="DEVICE_HEALTH"}'
+ values: '0+0x2 1+0x10'
+ promql_expr_test:
+ - expr: ceph_health_detail{name="DEVICE_HEALTH"} == 1
+ eval_time: 3m
+ exp_samples:
+ - labels: '{__name__="ceph_health_detail", name="DEVICE_HEALTH"}'
+ value: 1
+ alert_rule_test:
+ - eval_time: 1m
+ alertname: CephDeviceFailurePredicted
+ - eval_time: 10m
+ alertname: CephDeviceFailurePredicted
+ exp_alerts:
+ - exp_labels:
+ name: "DEVICE_HEALTH"
+ severity: warning
+ type: ceph_default
+ exp_annotations:
+ documentation: https://docs.ceph.com/en/latest/rados/operations/health-checks#id2
+ summary: Device(s) predicted to fail soon
+ description: "The device health module has determined that one or more devices will fail soon. To review device status use 'ceph device ls'. To show a specific device use 'ceph device info <dev id>'. Mark the OSD out so that data may migrate to other OSDs. Once the OSD has drained, destroy the OSD, replace the device, and redeploy the OSD."
+ - interval: 1m
+ input_series:
+ - series: 'ceph_health_detail{name="DEVICE_HEALTH_TOOMANY"}'
+ values: '0+0x2 1+0x10'
+ promql_expr_test:
+ - expr: ceph_health_detail{name="DEVICE_HEALTH_TOOMANY"} == 1
+ eval_time: 3m
+ exp_samples:
+ - labels: '{__name__="ceph_health_detail", name="DEVICE_HEALTH_TOOMANY"}'
+ value: 1
+ alert_rule_test:
+ - eval_time: 1m
+ alertname: CephDeviceFailurePredictionTooHigh
+ - eval_time: 10m
+ alertname: CephDeviceFailurePredictionTooHigh
+ exp_alerts:
+ - exp_labels:
+ name: "DEVICE_HEALTH_TOOMANY"
+ severity: critical
+ type: ceph_default
+ oid: 1.3.6.1.4.1.50495.1.2.1.4.7
+ exp_annotations:
+ documentation: https://docs.ceph.com/en/latest/rados/operations/health-checks#device-health-toomany
+ summary: Too many devices are predicted to fail, unable to resolve
+ description: "The device health module has determined that devices predicted to fail can not be remediated automatically, since too many OSDs would be removed from the cluster to ensure performance and availabililty. Prevent data integrity issues by adding new OSDs so that data may be relocated."
+ - interval: 1m
+ input_series:
+ - series: 'ceph_health_detail{name="DEVICE_HEALTH_IN_USE"}'
+ values: '0+0x2 1+0x10'
+ promql_expr_test:
+ - expr: ceph_health_detail{name="DEVICE_HEALTH_IN_USE"} == 1
+ eval_time: 3m
+ exp_samples:
+ - labels: '{__name__="ceph_health_detail", name="DEVICE_HEALTH_IN_USE"}'
+ value: 1
+ alert_rule_test:
+ - eval_time: 1m
+ alertname: CephDeviceFailureRelocationIncomplete
+ - eval_time: 10m
+ alertname: CephDeviceFailureRelocationIncomplete
+ exp_alerts:
+ - exp_labels:
+ name: "DEVICE_HEALTH_IN_USE"
+ severity: warning
+ type: ceph_default
+ exp_annotations:
+ documentation: https://docs.ceph.com/en/latest/rados/operations/health-checks#device-health-in-use
+ summary: Device failure is predicted, but unable to relocate data
+ description: "The device health module has determined that one or more devices will fail soon, but the normal process of relocating the data on the device to other OSDs in the cluster is blocked. \nEnsure that the cluster has available free space. It may be necessary to add capacity to the cluster to allow data from the failing device to successfully migrate, or to enable the balancer."
+# OSD
+ - interval: 1m
+ input_series:
+ - series: 'ceph_health_detail{name="OSD_HOST_DOWN"}'
+ values: '0+0x2 1+0x10'
+ - series: 'ceph_osd_up{ceph_daemon="osd.0"}'
+ values: '1+0x2 0+0x10'
+ - series: 'ceph_osd_metadata{ceph_daemon="osd.0", hostname="ceph-osd-1"}'
+ values: '1+0x12'
+ promql_expr_test:
+ - expr: ceph_health_detail{name="OSD_HOST_DOWN"} == 1
+ eval_time: 3m
+ exp_samples:
+ - labels: '{__name__="ceph_health_detail", name="OSD_HOST_DOWN"}'
+ value: 1
+ alert_rule_test:
+ - eval_time: 1m
+ alertname: CephOSDHostDown
+ - eval_time: 10m
+ alertname: CephOSDHostDown
+ exp_alerts:
+ - exp_labels:
+ name: "OSD_HOST_DOWN"
+ severity: warning
+ type: ceph_default
+ oid: 1.3.6.1.4.1.50495.1.2.1.4.8
+ exp_annotations:
+ summary: An OSD host is offline
+ description: "The following OSDs are down: - ceph-osd-1 : osd.0"
+ - interval: 1m
+ input_series:
+ - series: 'ceph_health_detail{name="OSD_SLOW_PING_TIME_FRONT"}'
+ values: '0+0x2 1+0x20'
+ promql_expr_test:
+ - expr: ceph_health_detail{name="OSD_SLOW_PING_TIME_FRONT"} == 0
+ eval_time: 1m
+ exp_samples:
+ - labels: '{__name__="ceph_health_detail", name="OSD_SLOW_PING_TIME_FRONT"}'
+ value: 0
+ alert_rule_test:
+ - eval_time: 1m
+ alertname: CephOSDTimeoutsPublicNetwork
+ - eval_time: 10m
+ alertname: CephOSDTimeoutsPublicNetwork
+ exp_alerts:
+ - exp_labels:
+ name: "OSD_SLOW_PING_TIME_FRONT"
+ severity: warning
+ type: ceph_default
+ exp_annotations:
+ summary: Network issues delaying OSD heartbeats (public network)
+ description: "OSD heartbeats on the cluster's 'public' network (frontend) are running slow. Investigate the network for latency or loss issues. Use 'ceph health detail' to show the affected OSDs."
+ - interval: 1m
+ input_series:
+ - series: 'ceph_health_detail{name="OSD_SLOW_PING_TIME_BACK"}'
+ values: '0+0x2 1+0x20'
+ promql_expr_test:
+ - expr: ceph_health_detail{name="OSD_SLOW_PING_TIME_BACK"} == 0
+ eval_time: 1m
+ exp_samples:
+ - labels: '{__name__="ceph_health_detail", name="OSD_SLOW_PING_TIME_BACK"}'
+ value: 0
+ alert_rule_test:
+ - eval_time: 1m
+ alertname: CephOSDTimeoutsClusterNetwork
+ - eval_time: 10m
+ alertname: CephOSDTimeoutsClusterNetwork
+ exp_alerts:
+ - exp_labels:
+ name: "OSD_SLOW_PING_TIME_BACK"
+ severity: warning
+ type: ceph_default
+ exp_annotations:
+ summary: Network issues delaying OSD heartbeats (cluster network)
+ description: "OSD heartbeats on the cluster's 'cluster' network (backend) are slow. Investigate the network for latency issues on this subnet. Use 'ceph health detail' to show the affected OSDs."
+ - interval: 1m
+ input_series:
+ - series: 'ceph_health_detail{name="BLUESTORE_DISK_SIZE_MISMATCH"}'
+ values: '0+0x2 1+0x20'
+ promql_expr_test:
+ - expr: ceph_health_detail{name="BLUESTORE_DISK_SIZE_MISMATCH"} == 0
+ eval_time: 1m
+ exp_samples:
+ - labels: '{__name__="ceph_health_detail", name="BLUESTORE_DISK_SIZE_MISMATCH"}'
+ value: 0
+ alert_rule_test:
+ - eval_time: 1m
+ alertname: CephOSDInternalDiskSizeMismatch
+ - eval_time: 10m
+ alertname: CephOSDInternalDiskSizeMismatch
+ exp_alerts:
+ - exp_labels:
+ name: "BLUESTORE_DISK_SIZE_MISMATCH"
+ severity: warning
+ type: ceph_default
+ exp_annotations:
+ documentation: https://docs.ceph.com/en/latest/rados/operations/health-checks#bluestore-disk-size-mismatch
+ summary: OSD size inconsistency error
+ description: "One or more OSDs have an internal inconsistency between metadata and the size of the device. This could lead to the OSD(s) crashing in future. You should redeploy the affected OSDs."
+ - interval: 30s
+ input_series:
+ - series: 'ceph_health_detail{name="BLUESTORE_SPURIOUS_READ_ERRORS"}'
+ values: '0+0x2 1+0x20'
+ promql_expr_test:
+ - expr: ceph_health_detail{name="BLUESTORE_SPURIOUS_READ_ERRORS"} == 1
+ eval_time: 3m
+ exp_samples:
+ - labels: '{__name__="ceph_health_detail", name="BLUESTORE_SPURIOUS_READ_ERRORS"}'
+ value: 1
+ alert_rule_test:
+ - eval_time: 1m
+ alertname: CephOSDReadErrors
+ - eval_time: 10m
+ alertname: CephOSDReadErrors
+ exp_alerts:
+ - exp_labels:
+ name: "BLUESTORE_SPURIOUS_READ_ERRORS"
+ severity: warning
+ type: ceph_default
+ exp_annotations:
+ documentation: https://docs.ceph.com/en/latest/rados/operations/health-checks#bluestore-spurious-read-errors
+ summary: Device read errors detected
+ description: "An OSD has encountered read errors, but the OSD has recovered by retrying the reads. This may indicate an issue with hardware or the kernel."
+ - interval: 1m
+ input_series:
+ - series: 'ceph_health_detail{name="OSD_DOWN"}'
+ values: '0+0x2 1+0x10'
+ - series: 'ceph_osd_up{ceph_daemon="osd.0"}'
+ values: '1+0x12'
+ - series: 'ceph_osd_up{ceph_daemon="osd.1"}'
+ values: '1+0x2 0+0x10'
+ - series: 'ceph_osd_up{ceph_daemon="osd.2"}'
+ values: '1+0x12'
+ - series: 'ceph_osd_metadata{ceph_daemon="osd.0", hostname="ceph-osd-1"}'
+ values: '1+0x12'
+ - series: 'ceph_osd_metadata{ceph_daemon="osd.1", hostname="ceph-osd-2"}'
+ values: '1+0x12'
+ - series: 'ceph_osd_metadata{ceph_daemon="osd.2", hostname="ceph-osd-3"}'
+ values: '1+0x12'
+ promql_expr_test:
+ - expr: ceph_health_detail{name="OSD_DOWN"} == 1
+ eval_time: 3m
+ exp_samples:
+ - labels: '{__name__="ceph_health_detail", name="OSD_DOWN"}'
+ value: 1
+ alert_rule_test:
+ - eval_time: 1m
+ alertname: CephOSDDown
+ - eval_time: 10m
+ alertname: CephOSDDown
+ exp_alerts:
+ - exp_labels:
+ name: "OSD_DOWN"
+ severity: warning
+ type: ceph_default
+ oid: 1.3.6.1.4.1.50495.1.2.1.4.2
+ exp_annotations:
+ documentation: https://docs.ceph.com/en/latest/rados/operations/health-checks#osd-down
+ summary: An OSD has been marked down
+ description: "1 OSD down for over 5mins. The following OSD is down: - osd.1 on ceph-osd-2\n"
+ - interval: 1m
+ input_series:
+ - series: 'ceph_health_detail{name="OSD_NEARFULL"}'
+ values: '0+0x2 1+0x10'
+ promql_expr_test:
+ - expr: ceph_health_detail{name="OSD_NEARFULL"} == 1
+ eval_time: 3m
+ exp_samples:
+ - labels: '{__name__="ceph_health_detail", name="OSD_NEARFULL"}'
+ value: 1
+ alert_rule_test:
+ - eval_time: 1m
+ alertname: CephOSDNearFull
+ - eval_time: 10m
+ alertname: CephOSDNearFull
+ exp_alerts:
+ - exp_labels:
+ name: "OSD_NEARFULL"
+ severity: warning
+ type: ceph_default
+ oid: 1.3.6.1.4.1.50495.1.2.1.4.3
+ exp_annotations:
+ documentation: https://docs.ceph.com/en/latest/rados/operations/health-checks#osd-nearfull
+ summary: OSD(s) running low on free space (NEARFULL)
+ description: One or more OSDs have reached the NEARFULL threshold. Use 'ceph health detail' and 'ceph osd df' to identify the problem. To resolve, add capacity to the affected OSD's failure domain, restore down/out OSDs, or delete unwanted data.
+ - interval: 1m
+ input_series:
+ - series: 'ceph_health_detail{name="OSD_FULL"}'
+ values: '0+0x2 1+0x10'
+ promql_expr_test:
+ - expr: ceph_health_detail{name="OSD_FULL"} == 1
+ eval_time: 3m
+ exp_samples:
+ - labels: '{__name__="ceph_health_detail", name="OSD_FULL"}'
+ value: 1
+ alert_rule_test:
+ - eval_time: 1m
+ alertname: CephOSDFull
+ - eval_time: 10m
+ alertname: CephOSDFull
+ exp_alerts:
+ - exp_labels:
+ name: "OSD_FULL"
+ severity: critical
+ type: ceph_default
+ oid: 1.3.6.1.4.1.50495.1.2.1.4.6
+ exp_annotations:
+ documentation: https://docs.ceph.com/en/latest/rados/operations/health-checks#osd-full
+ summary: OSD full, writes blocked
+ description: An OSD has reached the FULL threshold. Writes to pools that share the affected OSD will be blocked. Use 'ceph health detail' and 'ceph osd df' to identify the problem. To resolve, add capacity to the affected OSD's failure domain, restore down/out OSDs, or delete unwanted data.
+ - interval: 1m
+ input_series:
+ - series: 'ceph_health_detail{name="OSD_BACKFILLFULL"}'
+ values: '0+0x2 1+0x10'
+ promql_expr_test:
+ - expr: ceph_health_detail{name="OSD_BACKFILLFULL"} == 1
+ eval_time: 3m
+ exp_samples:
+ - labels: '{__name__="ceph_health_detail", name="OSD_BACKFILLFULL"}'
+ value: 1
+ alert_rule_test:
+ - eval_time: 1m
+ alertname: CephOSDBackfillFull
+ - eval_time: 10m
+ alertname: CephOSDBackfillFull
+ exp_alerts:
+ - exp_labels:
+ name: "OSD_BACKFILLFULL"
+ severity: warning
+ type: ceph_default
+ exp_annotations:
+ documentation: https://docs.ceph.com/en/latest/rados/operations/health-checks#osd-backfillfull
+ summary: OSD(s) too full for backfill operations
+ description: "An OSD has reached the BACKFILL FULL threshold. This will prevent rebalance operations from completing. Use 'ceph health detail' and 'ceph osd df' to identify the problem. To resolve, add capacity to the affected OSD's failure domain, restore down/out OSDs, or delete unwanted data."
+ - interval: 30s
+ input_series:
+ - series: 'ceph_health_detail{name="OSD_TOO_MANY_REPAIRS"}'
+ values: '0+0x2 1+0x20'
+ promql_expr_test:
+ - expr: ceph_health_detail{name="OSD_TOO_MANY_REPAIRS"} == 0
+ eval_time: 1m
+ exp_samples:
+ - labels: '{__name__="ceph_health_detail", name="OSD_TOO_MANY_REPAIRS"}'
+ value: 0
+ alert_rule_test:
+ - eval_time: 1m
+ alertname: CephOSDTooManyRepairs
+ - eval_time: 10m
+ alertname: CephOSDTooManyRepairs
+ exp_alerts:
+ - exp_labels:
+ name: "OSD_TOO_MANY_REPAIRS"
+ severity: warning
+ type: ceph_default
+ exp_annotations:
+ documentation: https://docs.ceph.com/en/latest/rados/operations/health-checks#osd-too-many-repairs
+ summary: OSD reports a high number of read errors
+ description: Reads from an OSD have used a secondary PG to return data to the client, indicating a potential failing drive.
+# Pools
+ # trigger percent full prediction on pools 1 and 2 only
+ - interval: 12h
+ input_series:
+ - series: 'ceph_pool_percent_used{pool_id="1"}'
+ values: '70 75 80 87 92'
+ - series: 'ceph_pool_percent_used{pool_id="2"}'
+ values: '22 22 23 23 24'
+ - series: 'ceph_pool_metadata{pool_id="1",name="rbd",type="replicated"}'
+ values: '1 1 1 1 1'
+ - series: 'ceph_pool_metadata{pool_id="2",name="default.rgw.index",type="replicated"}'
+ values: '1 1 1 1 1'
+ promql_expr_test:
+ - expr: |
+ (predict_linear(ceph_pool_percent_used[2d], 3600 * 24 * 5) * on(pool_id)
+ group_right ceph_pool_metadata) >= 95
+ eval_time: 36h
+ exp_samples:
+ - labels: '{name="rbd",pool_id="1",type="replicated"}'
+ value: 1.424E+02 # 142%
+ alert_rule_test:
+ - eval_time: 48h
+ alertname: CephPoolGrowthWarning
+ exp_alerts:
+ - exp_labels:
+ name: rbd
+ pool_id: 1
+ severity: warning
+ type: ceph_default
+ oid: 1.3.6.1.4.1.50495.1.2.1.9.2
+ exp_annotations:
+ summary: Pool growth rate may soon exceed capacity
+ description: Pool 'rbd' will be full in less than 5 days assuming the average fill-up rate of the past 48 hours.
+ - interval: 1m
+ input_series:
+ - series: 'ceph_health_detail{name="POOL_BACKFILLFULL"}'
+ values: '0+0x2 1+0x10'
+ promql_expr_test:
+ - expr: ceph_health_detail{name="POOL_BACKFILLFULL"} == 1
+ eval_time: 3m
+ exp_samples:
+ - labels: '{__name__="ceph_health_detail", name="POOL_BACKFILLFULL"}'
+ value: 1
+ alert_rule_test:
+ - eval_time: 1m
+ alertname: CephPoolBackfillFull
+ - eval_time: 5m
+ alertname: CephPoolBackfillFull
+ exp_alerts:
+ - exp_labels:
+ name: "POOL_BACKFILLFULL"
+ severity: warning
+ type: ceph_default
+ exp_annotations:
+ summary: Free space in a pool is too low for recovery/backfill
+ description: A pool is approaching the near full threshold, which will prevent recovery/backfill operations from completing. Consider adding more capacity.
+
+ - interval: 1m
+ input_series:
+ - series: 'ceph_health_detail{name="POOL_NEAR_FULL"}'
+ values: '0+0x2 1+0x10'
+ promql_expr_test:
+ - expr: ceph_health_detail{name="POOL_NEAR_FULL"} == 1
+ eval_time: 3m
+ exp_samples:
+ - labels: '{__name__="ceph_health_detail", name="POOL_NEAR_FULL"}'
+ value: 1
+ alert_rule_test:
+ - eval_time: 1m
+ alertname: CephPoolNearFull
+ - eval_time: 10m
+ alertname: CephPoolNearFull
+ exp_alerts:
+ - exp_labels:
+ name: "POOL_NEAR_FULL"
+ severity: warning
+ type: ceph_default
+ exp_annotations:
+ summary: One or more Ceph pools are nearly full
+ description: "A pool has exceeded the warning (percent full) threshold, or OSDs supporting the pool have reached the NEARFULL threshold. Writes may continue, but you are at risk of the pool going read-only if more capacity isn't made available. Determine the affected pool with 'ceph df detail', looking at QUOTA BYTES and STORED. Increase the pool's quota, or add capacity to the cluster first then increase the pool's quota (e.g. ceph osd pool set quota <pool_name> max_bytes <bytes>). Also ensure that the balancer is active."
+
+# PGs
+ - interval: 1m
+ input_series:
+ - series: 'ceph_health_detail{name="PG_NOT_SCRUBBED"}'
+ values: '0+0x2 1+0x10'
+ promql_expr_test:
+ - expr: ceph_health_detail{name="PG_NOT_SCRUBBED"} == 1
+ eval_time: 3m
+ exp_samples:
+ - labels: '{__name__="ceph_health_detail", name="PG_NOT_SCRUBBED"}'
+ value: 1
+ alert_rule_test:
+ - eval_time: 1m
+ alertname: CephPGNotScrubbed
+ - eval_time: 10m
+ alertname: CephPGNotScrubbed
+ exp_alerts:
+ - exp_labels:
+ name: "PG_NOT_SCRUBBED"
+ severity: warning
+ type: ceph_default
+ exp_annotations:
+ documentation: https://docs.ceph.com/en/latest/rados/operations/health-checks#pg-not-scrubbed
+ summary: Placement group(s) have not been scrubbed
+ description: "One or more PGs have not been scrubbed recently. Scrubs check metadata integrity, protecting against bit-rot. They check that metadata is consistent across data replicas. When PGs miss their scrub interval, it may indicate that the scrub window is too small, or PGs were not in a 'clean' state during the scrub window. You can manually initiate a scrub with: ceph pg scrub <pgid>"
+ - interval: 1m
+ input_series:
+ - series: 'ceph_health_detail{name="PG_DAMAGED"}'
+ values: '0+0x4 1+0x20'
+ promql_expr_test:
+ - expr: ceph_health_detail{name=~"PG_DAMAGED|OSD_SCRUB_ERRORS"} == 1
+ eval_time: 5m
+ exp_samples:
+ - labels: '{__name__="ceph_health_detail", name="PG_DAMAGED"}'
+ value: 1
+ alert_rule_test:
+ - eval_time: 1m
+ alertname: CephPGsDamaged
+ - eval_time: 10m
+ alertname: CephPGsDamaged
+ exp_alerts:
+ - exp_labels:
+ name: "PG_DAMAGED"
+ severity: critical
+ type: ceph_default
+ oid: 1.3.6.1.4.1.50495.1.2.1.7.4
+ exp_annotations:
+ documentation: https://docs.ceph.com/en/latest/rados/operations/health-checks#pg-damaged
+ summary: Placement group damaged, manual intervention needed
+ description: During data consistency checks (scrub), at least one PG has been flagged as being damaged or inconsistent. Check to see which PG is affected, and attempt a manual repair if necessary. To list problematic placement groups, use 'rados list-inconsistent-pg <pool>'. To repair PGs use the 'ceph pg repair <pg_num>' command.
+ - interval: 1m
+ input_series:
+ - series: 'ceph_health_detail{name="TOO_MANY_PGS"}'
+ values: '0+0x4 1+0x20'
+ promql_expr_test:
+ - expr: ceph_health_detail{name="TOO_MANY_PGS"} == 1
+ eval_time: 5m
+ exp_samples:
+ - labels: '{__name__="ceph_health_detail", name="TOO_MANY_PGS"}'
+ value: 1
+ alert_rule_test:
+ - eval_time: 1m
+ alertname: CephPGsHighPerOSD
+ - eval_time: 10m
+ alertname: CephPGsHighPerOSD
+ exp_alerts:
+ - exp_labels:
+ name: "TOO_MANY_PGS"
+ severity: warning
+ type: ceph_default
+ exp_annotations:
+ documentation: https://docs.ceph.com/en/latest/rados/operations/health-checks/#too-many-pgs
+ summary: Placement groups per OSD is too high
+ description: "The number of placement groups per OSD is too high (exceeds the mon_max_pg_per_osd setting).\n Check that the pg_autoscaler has not been disabled for any pools with 'ceph osd pool autoscale-status', and that the profile selected is appropriate. You may also adjust the target_size_ratio of a pool to guide the autoscaler based on the expected relative size of the pool ('ceph osd pool set cephfs.cephfs.meta target_size_ratio .1') or set the pg_autoscaler mode to 'warn' and adjust pg_num appropriately for one or more pools."
+ - interval: 1m
+ input_series:
+ - series: 'ceph_health_detail{name="PG_RECOVERY_FULL"}'
+ values: '0+0x2 1+0x20'
+ promql_expr_test:
+ - expr: ceph_health_detail{name="PG_RECOVERY_FULL"} == 0
+ eval_time: 1m
+ exp_samples:
+ - labels: '{__name__="ceph_health_detail", name="PG_RECOVERY_FULL"}'
+ value: 0
+ alert_rule_test:
+ - eval_time: 1m
+ alertname: CephPGRecoveryAtRisk
+ - eval_time: 10m
+ alertname: CephPGRecoveryAtRisk
+ exp_alerts:
+ - exp_labels:
+ name: "PG_RECOVERY_FULL"
+ severity: critical
+ type: ceph_default
+ oid: 1.3.6.1.4.1.50495.1.2.1.7.5
+ exp_annotations:
+ documentation: https://docs.ceph.com/en/latest/rados/operations/health-checks#pg-recovery-full
+ summary: OSDs are too full for recovery
+ description: Data redundancy is at risk since one or more OSDs are at or above the 'full' threshold. Add more capacity to the cluster, restore down/out OSDs, or delete unwanted data.
+ - interval: 1m
+ input_series:
+ - series: 'ceph_health_detail{name="PG_BACKFILL_FULL"}'
+ values: '0+0x2 1+0x20'
+ promql_expr_test:
+ - expr: ceph_health_detail{name="PG_BACKFILL_FULL"} == 0
+ eval_time: 1m
+ exp_samples:
+ - labels: '{__name__="ceph_health_detail", name="PG_BACKFILL_FULL"}'
+ value: 0
+ alert_rule_test:
+ - eval_time: 1m
+ alertname: CephPGBackfillAtRisk
+ - eval_time: 10m
+ alertname: CephPGBackfillAtRisk
+ exp_alerts:
+ - exp_labels:
+ name: "PG_BACKFILL_FULL"
+ severity: critical
+ type: ceph_default
+ oid: 1.3.6.1.4.1.50495.1.2.1.7.6
+ exp_annotations:
+ documentation: https://docs.ceph.com/en/latest/rados/operations/health-checks#pg-backfill-full
+ summary: Backfill operations are blocked due to lack of free space
+ description: Data redundancy may be at risk due to lack of free space within the cluster. One or more OSDs have reached the 'backfillfull' threshold. Add more capacity, or delete unwanted data.
+ - interval: 1m
+ input_series:
+ - series: 'ceph_health_detail{name="PG_AVAILABILITY"}'
+ values: '0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1'
+ - series: 'ceph_health_detail{name="OSD_DOWN"}'
+ values: '0 0 0 1 1 1 1 1 1 0 0 0 0 0 0 0'
+ promql_expr_test:
+ - expr: ((ceph_health_detail{name="PG_AVAILABILITY"} == 1) - scalar(ceph_health_detail{name="OSD_DOWN"}))
+ eval_time: 1m
+ # empty set at 1m
+ exp_samples:
+ alert_rule_test:
+ # PG_AVAILABILITY and OSD_DOWN not firing .. no alert
+ - eval_time: 1m
+ alertname: CephPGUnavilableBlockingIO
+ exp_alerts:
+ # PG_AVAILABILITY firing, but osd_down is active .. no alert
+ - eval_time: 5m
+ alertname: CephPGUnavilableBlockingIO
+ exp_alerts:
+ # PG_AVAILABILITY firing, AND OSD_DOWN is not active...raise the alert
+ - eval_time: 15m
+ alertname: CephPGUnavilableBlockingIO
+ exp_alerts:
+ - exp_labels:
+ name: "PG_AVAILABILITY"
+ severity: critical
+ type: ceph_default
+ oid: 1.3.6.1.4.1.50495.1.2.1.7.3
+ exp_annotations:
+ documentation: https://docs.ceph.com/en/latest/rados/operations/health-checks#pg-availability
+ summary: PG is unavailable, blocking I/O
+ description: Data availability is reduced, impacting the cluster's ability to service I/O. One or more placement groups (PGs) are in a state that blocks I/O.
+ - interval: 1m
+ input_series:
+ - series: 'ceph_health_detail{name="PG_NOT_DEEP_SCRUBBED"}'
+ values: '0+0x2 1+0x10'
+ promql_expr_test:
+ - expr: ceph_health_detail{name="PG_NOT_DEEP_SCRUBBED"} == 1
+ eval_time: 3m
+ exp_samples:
+ - labels: '{__name__="ceph_health_detail", name="PG_NOT_DEEP_SCRUBBED"}'
+ value: 1
+ alert_rule_test:
+ - eval_time: 1m
+ alertname: CephPGNotDeepScrubbed
+ - eval_time: 10m
+ alertname: CephPGNotDeepScrubbed
+ exp_alerts:
+ - exp_labels:
+ name: "PG_NOT_DEEP_SCRUBBED"
+ severity: warning
+ type: ceph_default
+ exp_annotations:
+ documentation: https://docs.ceph.com/en/latest/rados/operations/health-checks#pg-not-deep-scrubbed
+ summary: Placement group(s) have not been deep scrubbed
+ description: One or more PGs have not been deep scrubbed recently. Deep scrubs protect against bit-rot. They compare data replicas to ensure consistency. When PGs miss their deep scrub interval, it may indicate that the window is too small or PGs were not in a 'clean' state during the deep-scrub window.
+
+# Prometheus
+ - interval: 1m
+ input_series:
+ - series: 'up{job="myjob"}'
+ values: '1+0x10'
+ promql_expr_test:
+ - expr: absent(up{job="ceph"})
+ eval_time: 1m
+ exp_samples:
+ - labels: '{job="ceph"}'
+ value: 1
+ alert_rule_test:
+ - eval_time: 5m
+ alertname: PrometheusJobMissing
+ exp_alerts:
+ - exp_labels:
+ job: ceph
+ severity: critical
+ type: ceph_default
+ oid: 1.3.6.1.4.1.50495.1.2.1.12.1
+ exp_annotations:
+ summary: The scrape job for Ceph is missing from Prometheus
+ description: The prometheus job that scrapes from Ceph is no longer defined, this will effectively mean you'll have no metrics or alerts for the cluster. Please review the job definitions in the prometheus.yml file of the prometheus instance.
+# RADOS
+ - interval: 1m
+ input_series:
+ - series: 'ceph_health_detail{name="OBJECT_UNFOUND"}'
+ values: '0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1'
+ - series: 'ceph_osd_up{ceph_daemon="osd.0"}'
+ values: '1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1'
+ - series: 'ceph_osd_up{ceph_daemon="osd.1"}'
+ values: '1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1'
+ - series: 'ceph_osd_up{ceph_daemon="osd.2"}'
+ values: '1 1 1 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1'
+ - series: 'ceph_osd_metadata{ceph_daemon="osd.0"}'
+ values: '1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1'
+ - series: 'ceph_osd_metadata{ceph_daemon="osd.1"}'
+ values: '1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1'
+ - series: 'ceph_osd_metadata{ceph_daemon="osd.2"}'
+ values: '1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1'
+ promql_expr_test:
+ - expr: (ceph_health_detail{name="OBJECT_UNFOUND"} == 1) * on() (count(ceph_osd_up == 1) == bool count(ceph_osd_metadata)) == 1
+ eval_time: 1m
+ exp_samples:
+ alert_rule_test:
+ # OBJECT_UNFOUND but osd.2 is down, so don't fire
+ - eval_time: 5m
+ alertname: CephObjectMissing
+ exp_alerts:
+ # OBJECT_UNFOUND and all osd's are online, so fire
+ - eval_time: 15m
+ alertname: CephObjectMissing
+ exp_alerts:
+ - exp_labels:
+ severity: critical
+ type: ceph_default
+ oid: 1.3.6.1.4.1.50495.1.2.1.10.1
+ exp_annotations:
+ documentation: https://docs.ceph.com/en/latest/rados/operations/health-checks#object-unfound
+ summary: Object(s) marked UNFOUND
+ description: The latest version of a RADOS object can not be found, even though all OSDs are up. I/O requests for this object from clients will block (hang). Resolving this issue may require the object to be rolled back to a prior version manually, and manually verified.
+# Generic Alerts
+ - interval: 1m
+ input_series:
+ - series: 'ceph_health_detail{name="RECENT_CRASH"}'
+ values: '0 0 0 1 1 1 1 1 1 1 1'
+ promql_expr_test:
+ - expr: ceph_health_detail{name="RECENT_CRASH"} == 1
+ eval_time: 1m
+ exp_samples:
+ alert_rule_test:
+ # not firing
+ - eval_time: 1m
+ alertname: CephDaemonCrash
+ exp_alerts:
+ # firing
+ - eval_time: 10m
+ alertname: CephDaemonCrash
+ exp_alerts:
+ - exp_labels:
+ name: RECENT_CRASH
+ severity: critical
+ type: ceph_default
+ oid: 1.3.6.1.4.1.50495.1.2.1.1.2
+ exp_annotations:
+ documentation: https://docs.ceph.com/en/latest/rados/operations/health-checks/#recent-crash
+ summary: One or more Ceph daemons have crashed, and are pending acknowledgement
+ description: One or more daemons have crashed recently, and need to be acknowledged. This notification ensures that software crashes do not go unseen. To acknowledge a crash, use the 'ceph crash archive <id>' command.
diff --git a/atmosphere/jsonnet/vendor/github.com/ceph/ceph/monitoring/ceph-mixin/tests_alerts/test_syntax.py b/atmosphere/jsonnet/vendor/github.com/ceph/ceph/monitoring/ceph-mixin/tests_alerts/test_syntax.py
new file mode 100755
index 0000000..966d768
--- /dev/null
+++ b/atmosphere/jsonnet/vendor/github.com/ceph/ceph/monitoring/ceph-mixin/tests_alerts/test_syntax.py
@@ -0,0 +1,42 @@
+import pytest
+import os
+import yaml
+from .utils import promtool_available, call
+from .settings import ALERTS_FILE, UNIT_TESTS_FILE
+
+
+def load_yaml(file_name):
+ yaml_data = None
+ with open(file_name, 'r') as alert_file:
+ raw = alert_file.read()
+ try:
+ yaml_data = yaml.safe_load(raw)
+ except yaml.YAMLError as e:
+ pass
+
+ return yaml_data
+
+
+def test_alerts_present():
+ assert os.path.exists(ALERTS_FILE), f"{ALERTS_FILE} not found"
+
+
+def test_unittests_present():
+ assert os.path.exists(UNIT_TESTS_FILE), f"{UNIT_TESTS_FILE} not found"
+
+
+@pytest.mark.skipif(not os.path.exists(ALERTS_FILE), reason=f"{ALERTS_FILE} missing")
+def test_rules_format():
+ assert load_yaml(ALERTS_FILE)
+
+
+@pytest.mark.skipif(not os.path.exists(UNIT_TESTS_FILE), reason=f"{UNIT_TESTS_FILE} missing")
+def test_unittests_format():
+ assert load_yaml(UNIT_TESTS_FILE)
+
+
+@pytest.mark.skipif(not promtool_available(), reason="promtool is not installed. Unable to check syntax")
+def test_rule_syntax():
+ completion = call(f"promtool check rules {ALERTS_FILE}")
+ assert completion.returncode == 0
+ assert b"SUCCESS" in completion.stdout
diff --git a/atmosphere/jsonnet/vendor/github.com/ceph/ceph/monitoring/ceph-mixin/tests_alerts/test_unittests.py b/atmosphere/jsonnet/vendor/github.com/ceph/ceph/monitoring/ceph-mixin/tests_alerts/test_unittests.py
new file mode 100644
index 0000000..4cfb2b6
--- /dev/null
+++ b/atmosphere/jsonnet/vendor/github.com/ceph/ceph/monitoring/ceph-mixin/tests_alerts/test_unittests.py
@@ -0,0 +1,19 @@
+import pytest
+import os
+from .utils import promtool_available, call
+from .settings import ALERTS_FILE, UNIT_TESTS_FILE
+
+
+def test_alerts_present():
+ assert os.path.exists(ALERTS_FILE), f"{ALERTS_FILE} not found"
+
+
+def test_unittests_present():
+ assert os.path.exists(UNIT_TESTS_FILE), f"{UNIT_TESTS_FILE} not found"
+
+
+@pytest.mark.skipif(not promtool_available(), reason="promtool is not installed. Unable to run unit tests")
+def test_run_unittests():
+ completion = call(f"promtool test rules {UNIT_TESTS_FILE}")
+ assert completion.returncode == 0
+ assert b"SUCCESS" in completion.stdout
diff --git a/atmosphere/jsonnet/vendor/github.com/ceph/ceph/monitoring/ceph-mixin/tests_alerts/utils.py b/atmosphere/jsonnet/vendor/github.com/ceph/ceph/monitoring/ceph-mixin/tests_alerts/utils.py
new file mode 100644
index 0000000..8429244
--- /dev/null
+++ b/atmosphere/jsonnet/vendor/github.com/ceph/ceph/monitoring/ceph-mixin/tests_alerts/utils.py
@@ -0,0 +1,12 @@
+import pytest
+import shutil
+import subprocess
+
+
+def promtool_available() -> bool:
+ return shutil.which('promtool') is not None
+
+
+def call(cmd):
+ completion = subprocess.run(cmd.split(), stdout=subprocess.PIPE)
+ return completion
diff --git a/atmosphere/jsonnet/vendor/github.com/ceph/ceph/monitoring/ceph-mixin/tests_alerts/validate_rules.py b/atmosphere/jsonnet/vendor/github.com/ceph/ceph/monitoring/ceph-mixin/tests_alerts/validate_rules.py
new file mode 100755
index 0000000..c24ce5c
--- /dev/null
+++ b/atmosphere/jsonnet/vendor/github.com/ceph/ceph/monitoring/ceph-mixin/tests_alerts/validate_rules.py
@@ -0,0 +1,571 @@
+#!/usr/bin/env python3
+#
+# Check the Prometheus rules for format, and integration
+# with the unit tests. This script has the following exit
+# codes:
+# 0 .. Everything worked
+# 4 .. rule problems or missing unit tests
+# 8 .. Missing fields in YAML
+# 12 .. Invalid YAML - unable to load
+# 16 .. Missing input files
+#
+# Externals
+# snmptranslate .. used to determine the oid's in the MIB to verify the rule -> MIB is correct
+#
+
+import re
+import os
+import sys
+import yaml
+import shutil
+import string
+from bs4 import BeautifulSoup
+from typing import List, Any, Dict, Set, Optional, Tuple
+import subprocess
+
+import urllib.request
+import urllib.error
+from urllib.parse import urlparse
+
+from settings import ALERTS_FILE, MIB_FILE, UNIT_TESTS_FILE
+
+DOCLINK_NAME = 'documentation'
+
+
+def isascii(s: str) -> bool:
+ try:
+ s.encode('ascii')
+ except UnicodeEncodeError:
+ return False
+ return True
+
+
+def read_file(file_name: str) -> Tuple[str, str]:
+ try:
+ with open(file_name, 'r') as input_file:
+ raw_data = input_file.read()
+ except OSError:
+ return '', f"Unable to open {file_name}"
+
+ return raw_data, ''
+
+
+def load_yaml(file_name: str) -> Tuple[Dict[str, Any], str]:
+ data = {}
+ errs = ''
+
+ raw_data, err = read_file(file_name)
+ if not err:
+
+ try:
+ data = yaml.safe_load(raw_data)
+ except yaml.YAMLError as e:
+ errs = f"filename '{file_name} is not a valid YAML file"
+
+ return data, errs
+
+
+def run_command(command: str):
+ c = command.split()
+ completion = subprocess.run(c, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+ return (completion.returncode,
+ completion.stdout.decode('utf-8').split('\n'),
+ completion.stderr.decode('utf-8').split('\n'))
+
+
+class HTMLCache:
+ def __init__(self) -> None:
+ self.cache: Dict[str, Tuple[int, str]] = {}
+
+ def fetch(self, url_str: str) -> None:
+ parsed = urlparse(url_str)
+ url = f"{parsed.scheme}://{parsed.netloc}{parsed.path}"
+
+ if url in self.cache:
+ return self.cache[url]
+
+ req = urllib.request.Request(url)
+ try:
+ r = urllib.request.urlopen(req)
+ except urllib.error.HTTPError as e:
+ self.cache[url] = e.code, e.reason
+ return self.cache[url]
+ except urllib.error.URLError as e:
+ self.cache[url] = 400, e.reason
+ return self.cache[url]
+
+ if r.status == 200:
+ html = r.read().decode('utf-8')
+ self.cache[url] = 200, html
+ return self.cache[url]
+
+ self.cache[url] = r.status, r.reason
+ return r.status, r.reason
+
+ @property
+ def cached_pages(self) -> List[str]:
+ return self.cache.keys()
+
+ @property
+ def cached_pages_total(self) -> int:
+ return len(self.cache.keys())
+
+class PrometheusRule:
+ expected_attrs = [
+ 'alert',
+ 'expr',
+ 'labels',
+ 'annotations'
+ ]
+
+ def __init__(self, rule_group, rule_data: Dict[str, Any]):
+
+ assert 'alert' in rule_data
+ self.group: RuleGroup = rule_group
+ self.name = rule_data.get('alert')
+ self.rule = rule_data
+ self.errors: List[str] = []
+ self.warnings: List[str] = []
+ self.validate()
+
+ @property
+ def has_oid(self):
+ return True if self.rule.get('labels', {}).get('oid', '') else False
+
+ @property
+ def labels(self) -> Dict[str, str]:
+ return self.rule.get('labels', {})
+
+ @property
+ def annotations(self) -> Dict[str, str]:
+ return self.rule.get('annotations', {})
+
+ def _check_alert_name(self):
+ # this is simplistic, but works in the context of the alert name
+ if self.name[0] in string.ascii_uppercase and \
+ self.name != self.name.lower() and \
+ self.name != self.name.upper() and \
+ " " not in self.name and \
+ "_" not in self.name:
+ return
+
+ self.warnings.append("Alert name is not in CamelCase format")
+
+ def _check_structure(self):
+ rule_attrs = self.rule.keys()
+ missing_attrs = [a for a in PrometheusRule.expected_attrs if a not in rule_attrs]
+
+ if missing_attrs:
+ self.errors.append(
+ f"invalid alert structure. Missing field{'s' if len(missing_attrs) > 1 else ''}"
+ f": {','.join(missing_attrs)}")
+
+ def _check_labels(self):
+ for rqd in ['severity', 'type']:
+ if rqd not in self.labels.keys():
+ self.errors.append(f"rule is missing {rqd} label definition")
+
+ def _check_annotations(self):
+ for rqd in ['summary', 'description']:
+ if rqd not in self.annotations:
+ self.errors.append(f"rule is missing {rqd} annotation definition")
+
+ def _check_doclink(self):
+ doclink = self.annotations.get(DOCLINK_NAME, '')
+
+ if doclink:
+ url = urlparse(doclink)
+ status, content = self.group.fetch_html_page(doclink)
+ if status == 200:
+ if url.fragment:
+ soup = BeautifulSoup(content, 'html.parser')
+ if not soup.find(id=url.fragment):
+ self.errors.append(f"documentation link error: {url.fragment} anchor not found on the page")
+ else:
+ # catch all
+ self.errors.append(f"documentation link error: {status} {content}")
+
+ def _check_snmp(self):
+ oid = self.labels.get('oid', '')
+
+ if self.labels.get('severity', '') == 'critical' and not oid:
+ self.warnings.append("critical level alert is missing an SNMP oid entry")
+ if oid and not re.search('^1.3.6.1.4.1.50495.1.2.\\d+.\\d+.\\d+$', oid):
+ self.errors.append("invalid OID format provided")
+ if self.group.get_oids():
+ if oid and oid not in self.group.get_oids():
+ self.errors.append(f"rule defines an OID {oid} that is missing from the MIB file({os.path.basename(MIB_FILE)})")
+
+ def _check_ascii(self):
+ if 'oid' not in self.labels:
+ return
+
+ desc = self.annotations.get('description', '')
+ summary = self.annotations.get('summary', '')
+ if not isascii(desc):
+ self.errors.append(f"non-ascii characters found in 'description' field will cause issues in associated snmp trap.")
+ if not isascii(summary):
+ self.errors.append(f"non-ascii characters found in 'summary' field will cause issues in associated snmp trap.")
+
+ def validate(self):
+
+ self._check_alert_name()
+ self._check_structure()
+ self._check_labels()
+ self._check_annotations()
+ self._check_doclink()
+ self._check_snmp()
+ self._check_ascii()
+ char = '.'
+
+ if self.errors:
+ char = 'E'
+ self.group.update('error', self.name)
+ elif self.warnings:
+ char = 'W'
+ self.group.update('warning', self.name)
+
+ sys.stdout.write(char)
+
+
+class RuleGroup:
+
+ def __init__(self, rule_file, group_name: str, group_name_width: int):
+ self.rule_file: RuleFile = rule_file
+ self.group_name = group_name
+ self.rules: Dict[str, PrometheusRule] = {}
+ self.problems = {
+ "error": [],
+ "warning": [],
+ }
+
+ sys.stdout.write(f"\n\t{group_name:<{group_name_width}} : ")
+
+ def add_rule(self, rule_data:Dict[str, Any]):
+ alert_name = rule_data.get('alert')
+ self.rules[alert_name] = PrometheusRule(self, rule_data)
+
+ def update(self, problem_type:str, alert_name:str):
+ assert problem_type in ['error', 'warning']
+
+ self.problems[problem_type].append(alert_name)
+ self.rule_file.update(self.group_name)
+
+ def fetch_html_page(self, url):
+ return self.rule_file.fetch_html_page(url)
+
+ def get_oids(self):
+ return self.rule_file.oid_list
+
+ @property
+ def error_count(self):
+ return len(self.problems['error'])
+
+ def warning_count(self):
+ return len(self.problems['warning'])
+
+ @property
+ def count(self):
+ return len(self.rules)
+
+
+class RuleFile:
+
+ def __init__(self, parent, file_name, rules, oid_list):
+ self.parent = parent
+ self.file_name = file_name
+ self.rules: Dict[str, Any] = rules
+ self.oid_list = oid_list
+ self.problems: Set[str] = set()
+ self.group: Dict[str, RuleGroup] = {}
+ self.alert_names_seen: Set[str] = set()
+ self.duplicate_alert_names:List[str] = []
+ self.html_cache = HTMLCache()
+
+ assert 'groups' in self.rules
+ self.max_group_name_width = self.get_max_group_name()
+ self.load_groups()
+
+ def update(self, group_name):
+ self.problems.add(group_name)
+ self.parent.mark_invalid()
+
+ def fetch_html_page(self, url):
+ return self.html_cache.fetch(url)
+
+ @property
+ def group_count(self):
+ return len(self.rules['groups'])
+
+ @property
+ def rule_count(self):
+ rule_count = 0
+ for _group_name, rule_group in self.group.items():
+ rule_count += rule_group.count
+ return rule_count
+
+ @property
+ def oid_count(self):
+ oid_count = 0
+ for _group_name, rule_group in self.group.items():
+ for _rule_name, rule in rule_group.rules.items():
+ if rule.has_oid:
+ oid_count += 1
+ return oid_count
+
+ @property
+ def group_names(self):
+ return self.group.keys()
+
+ @property
+ def problem_count(self):
+ return len(self.problems)
+
+ def get_max_group_name(self):
+ group_name_list = []
+ for group in self.rules.get('groups'):
+ group_name_list.append(group['name'])
+ return max([len(g) for g in group_name_list])
+
+ def load_groups(self):
+ sys.stdout.write("\nChecking rule groups")
+ for group in self.rules.get('groups'):
+ group_name = group['name']
+ rules = group['rules']
+ self.group[group_name] = RuleGroup(self, group_name, self.max_group_name_width)
+ for rule_data in rules:
+ if 'alert' in rule_data:
+ alert_name = rule_data.get('alert')
+ if alert_name in self.alert_names_seen:
+ self.duplicate_alert_names.append(alert_name)
+ else:
+ self.alert_names_seen.add(alert_name)
+ self.group[group_name].add_rule(rule_data)
+ else:
+ # skipped recording rule
+ pass
+
+ def report(self):
+ def max_width(item_list: Set[str], min_width: int = 0) -> int:
+ return max([len(i) for i in item_list] + [min_width])
+
+ if not self.problems and not self.duplicate_alert_names:
+ print("\nNo problems detected in the rule file")
+ return
+
+ print("\nProblem Report\n")
+
+ group_width = max_width(self.problems, 5)
+ alert_names = set()
+ for g in self.problems:
+ group = self.group[g]
+ alert_names.update(group.problems.get('error', []))
+ alert_names.update(group.problems.get('warning', []))
+ alert_width = max_width(alert_names, 10)
+
+ template = " {group:<{group_width}} {severity:<8} {alert_name:<{alert_width}} {description}"
+
+ print(template.format(
+ group="Group",
+ group_width=group_width,
+ severity="Severity",
+ alert_name="Alert Name",
+ alert_width=alert_width,
+ description="Problem Description"))
+
+ print(template.format(
+ group="-----",
+ group_width=group_width,
+ severity="--------",
+ alert_name="----------",
+ alert_width=alert_width,
+ description="-------------------"))
+
+ for group_name in sorted(self.problems):
+ group = self.group[group_name]
+ rules = group.rules
+ for alert_name in group.problems.get('error', []):
+ for desc in rules[alert_name].errors:
+ print(template.format(
+ group=group_name,
+ group_width=group_width,
+ severity="Error",
+ alert_name=alert_name,
+ alert_width=alert_width,
+ description=desc))
+ for alert_name in group.problems.get('warning', []):
+ for desc in rules[alert_name].warnings:
+ print(template.format(
+ group=group_name,
+ group_width=group_width,
+ severity="Warning",
+ alert_name=alert_name,
+ alert_width=alert_width,
+ description=desc))
+ if self.duplicate_alert_names:
+ print("Duplicate alert names detected:")
+ for a in self.duplicate_alert_names:
+ print(f" - {a}")
+
+
+class UnitTests:
+ expected_attrs = [
+ 'rule_files',
+ 'tests',
+ 'evaluation_interval'
+ ]
+ def __init__(self, filename):
+ self.filename = filename
+ self.unit_test_data: Dict[str, Any] = {}
+ self.alert_names_seen: Set[str] = set()
+ self.problems: List[str] = []
+ self.load()
+
+ def load(self):
+ self.unit_test_data, errs = load_yaml(self.filename)
+ if errs:
+ print(f"\n\nError in unit tests file: {errs}")
+ sys.exit(12)
+
+ missing_attr = [a for a in UnitTests.expected_attrs if a not in self.unit_test_data.keys()]
+ if missing_attr:
+ print(f"\nMissing attributes in unit tests: {','.join(missing_attr)}")
+ sys.exit(8)
+
+ def _check_alert_names(self, alert_names: List[str]):
+ alerts_tested: Set[str] = set()
+ for t in self.unit_test_data.get('tests'):
+ test_cases = t.get('alert_rule_test', [])
+ if not test_cases:
+ continue
+ for case in test_cases:
+ alertname = case.get('alertname', '')
+ if alertname:
+ alerts_tested.add(alertname)
+
+ alerts_defined = set(alert_names)
+ self.problems = list(alerts_defined.difference(alerts_tested))
+
+ def process(self, defined_alert_names: List[str]):
+ self._check_alert_names(defined_alert_names)
+
+ def report(self) -> None:
+
+ if not self.problems:
+ print("\nNo problems detected in unit tests file")
+ return
+
+ print("\nUnit tests are incomplete. Tests missing for the following alerts;")
+ for p in self.problems:
+ print(f" - {p}")
+
+class RuleChecker:
+
+ def __init__(self, rules_filename: str = None, test_filename: str = None):
+ self.rules_filename = rules_filename or ALERTS_FILE
+ self.test_filename = test_filename or UNIT_TESTS_FILE
+ self.rule_file: Optional[RuleFile] = None
+ self.unit_tests: Optional[UnitTests] = None
+ self.rule_file_problems: bool = False
+ self.errors = {}
+ self.warnings = {}
+ self.error_count = 0
+ self.warning_count = 0
+ self.oid_count = 0
+
+ self.oid_list = self.build_oid_list()
+
+ def build_oid_list(self) -> List[str]:
+
+ cmd = shutil.which('snmptranslate')
+ if not cmd:
+ return []
+
+ rc, stdout, stderr = run_command(f"{cmd} -Pu -Tz -M ../../snmp:/usr/share/snmp/mibs -m CEPH-MIB")
+ if rc != 0:
+ return []
+
+ oid_list: List[str] = []
+ for line in stdout[:-1]:
+ _label, oid = line.replace('"', '').replace('\t', ' ').split()
+ oid_list.append(oid)
+
+ return oid_list
+
+ @property
+ def status(self):
+ if self.rule_file_problems or self.unit_tests.problems:
+ return 4
+
+ return 0
+
+ def mark_invalid(self):
+ self.rule_file_problems = True
+
+ def summarise_rule_file(self):
+ for group_name in self.rule_file.problems:
+ group = self.rule_file.group[group_name]
+ self.error_count += len(group.problems['error'])
+ self.warning_count += len(group.problems['warning'])
+
+ def ready(self):
+ errs: List[str] = []
+ ready_state = True
+ if not os.path.exists(self.rules_filename):
+ errs.append(f"rule file '{self.rules_filename}' not found")
+ ready_state = False
+
+ if not os.path.exists(self.test_filename):
+ errs.append(f"test file '{self.test_filename}' not found")
+ ready_state = False
+
+ return ready_state, errs
+
+ def run(self):
+
+ ready, errs = self.ready()
+ if not ready:
+ print("Unable to start:")
+ for e in errs:
+ print(f"- {e}")
+ sys.exit(16)
+
+ rules, errs = load_yaml(self.rules_filename)
+ if errs:
+ print(errs)
+ sys.exit(12)
+
+ self.rule_file = RuleFile(self, self.rules_filename, rules, self.oid_list)
+ self.summarise_rule_file()
+
+ self.unit_tests = UnitTests(self.test_filename)
+ self.unit_tests.process(self.rule_file.alert_names_seen)
+
+ def report(self):
+ print("\n\nSummary\n")
+ print(f"Rule file : {self.rules_filename}")
+ print(f"Unit Test file : {self.test_filename}")
+ print(f"\nRule groups processed : {self.rule_file.group_count:>3}")
+ print(f"Rules processed : {self.rule_file.rule_count:>3}")
+ print(f"SNMP OIDs declared : {self.rule_file.oid_count:>3} {'(snmptranslate missing, unable to cross check)' if not self.oid_list else ''}")
+ print(f"Rule errors : {self.error_count:>3}")
+ print(f"Rule warnings : {self.warning_count:>3}")
+ print(f"Rule name duplicates : {len(self.rule_file.duplicate_alert_names):>3}")
+ print(f"Unit tests missing : {len(self.unit_tests.problems):>3}")
+
+ self.rule_file.report()
+ self.unit_tests.report()
+
+
+def main():
+ checker = RuleChecker()
+
+ checker.run()
+ checker.report()
+ print()
+
+ sys.exit(checker.status)
+
+
+if __name__ == '__main__':
+ main()
diff --git a/atmosphere/jsonnet/vendor/github.com/ceph/ceph/monitoring/ceph-mixin/tests_dashboards/__init__.py b/atmosphere/jsonnet/vendor/github.com/ceph/ceph/monitoring/ceph-mixin/tests_dashboards/__init__.py
new file mode 100644
index 0000000..45147e5
--- /dev/null
+++ b/atmosphere/jsonnet/vendor/github.com/ceph/ceph/monitoring/ceph-mixin/tests_dashboards/__init__.py
@@ -0,0 +1,189 @@
+import re
+import subprocess
+import sys
+import tempfile
+from dataclasses import asdict, dataclass, field
+from typing import Any, List
+
+import yaml
+
+from .util import replace_grafana_expr_variables
+
+
+@dataclass
+class InputSeries:
+ series: str = ''
+ values: str = ''
+
+@dataclass
+class ExprSample:
+ labels: str = ''
+ value: float = -1
+
+@dataclass
+class PromqlExprTest:
+ expr: str = ''
+ eval_time: str = '1m'
+ exp_samples: List[ExprSample] = field(default_factory=list)
+
+@dataclass
+class Test:
+ interval: str = '1m'
+ input_series: List[InputSeries] = field(default_factory=list)
+ promql_expr_test: List[PromqlExprTest] = field(default_factory=list)
+
+
+@dataclass
+class TestFile:
+ evaluation_interval: str = '1m'
+ tests: List[Test] = field(default_factory=list)
+
+
+class PromqlTest:
+ """
+ Base class to provide prometheus query test capabilities. After setting up
+ the query test with its input and expected output it's expected to run promtool.
+
+ https://prometheus.io/docs/prometheus/latest/configuration/unit_testing_rules/#test-yml
+
+ The workflow of testing would be something like:
+
+ # add prometheus query to test
+ self.set_expression('bonding_slaves > 0')
+
+ # add some prometheus input series
+ self.add_series('bonding_slaves{master="bond0"}', '2')
+ self.add_series('bonding_slaves{master="bond1"}', '3')
+ self.add_series('node_network_receive_bytes{instance="127.0.0.1",
+ device="eth1"}', "10 100 230 22")
+
+ # expected output of the query
+ self.add_exp_samples('bonding_slaves{master="bond0"}', 2)
+ self.add_exp_samples('bonding_slaves{master="bond1"}', 3)
+
+ # at last, always call promtool with:
+ self.assertTrue(self.run_promtool())
+ # assertTrue means it expect promtool to succeed
+ """
+
+ def __init__(self):
+ self.test_output_file = tempfile.NamedTemporaryFile('w+')
+
+ self.test_file = TestFile()
+ self.test = Test()
+ self.promql_expr_test = PromqlExprTest()
+ self.test.promql_expr_test.append(self.promql_expr_test)
+ self.test_file.tests.append(self.test)
+
+ self.variables = {}
+
+ def __del__(self):
+ self.test_output_file.close()
+
+
+ def set_evaluation_interval(self, interval: int, unit: str = 'm') -> None:
+ """
+ Set the evaluation interval of the time series
+
+ Args:
+ interval (int): number of units.
+ unit (str): unit type: 'ms', 's', 'm', etc...
+ """
+ self.test_file.evaluation_interval = f'{interval}{unit}'
+
+ def set_interval(self, interval: int, unit: str = 'm') -> None:
+ """
+ Set the duration of the time series
+
+ Args:
+ interval (int): number of units.
+ unit (str): unit type: 'ms', 's', 'm', etc...
+ """
+ self.test.interval = f'{interval}{unit}'
+
+ def set_expression(self, expr: str) -> None:
+ """
+ Set the prometheus expression/query used to filter data.
+
+ Args:
+ expr(str): expression/query.
+ """
+ self.promql_expr_test.expr = expr
+
+ def add_series(self, series: str, values: str) -> None:
+ """
+ Add a series to the input.
+
+ Args:
+ series(str): Prometheus series.
+ Notation: '<metric name>{<label name>=<label value>, ...}'
+ values(str): Value of the series.
+ """
+ input_series = InputSeries(series=series, values=values)
+ self.test.input_series.append(input_series)
+
+ def set_eval_time(self, eval_time: int, unit: str = 'm') -> None:
+ """
+ Set the time when the expression will be evaluated
+
+ Args:
+ interval (int): number of units.
+ unit (str): unit type: 'ms', 's', 'm', etc...
+ """
+ self.promql_expr_test.eval_time = f'{eval_time}{unit}'
+
+ def add_exp_samples(self, sample: str, values: Any) -> None:
+ """
+ Add an expected sample/output of the query given the series/input
+
+ Args:
+ sample(str): Expected sample.
+ Notation: '<metric name>{<label name>=<label value>, ...}'
+ values(Any): Value of the sample.
+ """
+ expr_sample = ExprSample(labels=sample, value=values)
+ self.promql_expr_test.exp_samples.append(expr_sample)
+
+ def set_variable(self, variable: str, value: str):
+ """
+ If a query makes use of grafonnet variables, for example
+ '$osd_hosts', you should change this to a real value. Example:
+
+
+ > self.set_expression('bonding_slaves{master="$osd_hosts"} > 0')
+ > self.set_variable('osd_hosts', '127.0.0.1')
+ > print(self.query)
+ > bonding_slaves{master="127.0.0.1"} > 0
+
+ Args:
+ variable(str): Variable name
+ value(str): Value to replace variable with
+
+ """
+ self.variables[variable] = value
+
+ def run_promtool(self):
+ """
+ Run promtool to test the query after setting up the input, output
+ and extra parameters.
+
+ Returns:
+ bool: True if successful, False otherwise.
+ """
+
+ for variable, value in self.variables.items():
+ expr = self.promql_expr_test.expr
+ new_expr = replace_grafana_expr_variables(expr, variable, value)
+ self.set_expression(new_expr)
+
+ test_as_dict = asdict(self.test_file)
+ yaml.dump(test_as_dict, self.test_output_file)
+
+ args = f'promtool test rules {self.test_output_file.name}'.split()
+ try:
+ subprocess.run(args, check=True)
+ return True
+ except subprocess.CalledProcessError as process_error:
+ print(yaml.dump(test_as_dict))
+ print(process_error.stderr)
+ return False
diff --git a/atmosphere/jsonnet/vendor/github.com/ceph/ceph/monitoring/ceph-mixin/tests_dashboards/features/__init__.py b/atmosphere/jsonnet/vendor/github.com/ceph/ceph/monitoring/ceph-mixin/tests_dashboards/features/__init__.py
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/atmosphere/jsonnet/vendor/github.com/ceph/ceph/monitoring/ceph-mixin/tests_dashboards/features/__init__.py
diff --git a/atmosphere/jsonnet/vendor/github.com/ceph/ceph/monitoring/ceph-mixin/tests_dashboards/features/ceph-cluster.feature b/atmosphere/jsonnet/vendor/github.com/ceph/ceph/monitoring/ceph-mixin/tests_dashboards/features/ceph-cluster.feature
new file mode 100644
index 0000000..1a446cd
--- /dev/null
+++ b/atmosphere/jsonnet/vendor/github.com/ceph/ceph/monitoring/ceph-mixin/tests_dashboards/features/ceph-cluster.feature
@@ -0,0 +1,54 @@
+Feature: Ceph Cluster Dashboard
+
+Scenario: "Test total PG States"
+ Given the following series:
+ | metrics | values |
+ | ceph_pg_total{foo="var"} | 10 100 |
+ | ceph_pg_total{foo="bar"} | 20 200 |
+ Then Grafana panel `PG States` with legend `Total` shows:
+ | metrics | values |
+ | {} | 300 |
+
+Scenario: "Test OSDs in"
+ Given the following series:
+ | metrics | values |
+ | ceph_osd_in{ceph_daemon="osd.0"} | 1.0 |
+ | ceph_osd_in{ceph_daemon="osd.1"} | 0.0 |
+ | ceph_osd_in{ceph_daemon="osd.2"} | 1.0 |
+ When variable `instance` is `.*`
+ Then Grafana panel `OSDs` with legend `In` shows:
+ | metrics | values |
+ | {} | 2 |
+
+Scenario: "Test OSDs down"
+ Given the following series:
+ | metrics | values |
+ | ceph_osd_up{ceph_daemon="osd.0", instance="127.0.0.1"} | 0.0 |
+ | ceph_osd_up{ceph_daemon="osd.1", instance="127.0.0.1"} | 0.0 |
+ | ceph_osd_up{ceph_daemon="osd.2", instance="127.0.0.1"} | 0.0 |
+ When variable `instance` is `127.0.0.1`
+ Then Grafana panel `OSDs` with legend `Down` shows:
+ | metrics | values |
+ | {} | 3 |
+
+Scenario: "Test OSDs out"
+ Given the following series:
+ | metrics | values |
+ | ceph_osd_in{ceph_daemon="osd.0", instance="127.0.0.1"} | 0.0 |
+ | ceph_osd_in{ceph_daemon="osd.1", instance="127.0.0.1"} | 1.0 |
+ | ceph_osd_in{ceph_daemon="osd.2", instance="127.0.0.1"} | 0.0 |
+ When variable `instance` is `127.0.0.1`
+ Then Grafana panel `OSDs` with legend `Out` shows:
+ | metrics | values |
+ | {} | 2 |
+
+Scenario: "Test OSDs all"
+ Given the following series:
+ | metrics | values |
+ | ceph_osd_metadata{ceph_daemon="osd.0", instance="127.0.0.1"} | 1.0 |
+ | ceph_osd_metadata{ceph_daemon="osd.1", instance="127.0.0.1"} | 1.0 |
+ | ceph_osd_metadata{ceph_daemon="osd.2", instance="127.0.0.1"} | 1.0 |
+ When variable `instance` is `127.0.0.1`
+ Then Grafana panel `OSDs` with legend `All` shows:
+ | metrics | values |
+ | {} | 3 |
diff --git a/atmosphere/jsonnet/vendor/github.com/ceph/ceph/monitoring/ceph-mixin/tests_dashboards/features/environment.py b/atmosphere/jsonnet/vendor/github.com/ceph/ceph/monitoring/ceph-mixin/tests_dashboards/features/environment.py
new file mode 100644
index 0000000..5dc76a0
--- /dev/null
+++ b/atmosphere/jsonnet/vendor/github.com/ceph/ceph/monitoring/ceph-mixin/tests_dashboards/features/environment.py
@@ -0,0 +1,135 @@
+# type: ignore[no-redef]
+# pylint: disable=E0611,W0613,E0102
+import copy
+
+from behave import given, then, when
+from prettytable import PrettyTable
+
+from tests_dashboards import PromqlTest
+from tests_dashboards.util import get_dashboards_data, resolve_time_and_unit
+
+
+class GlobalContext:
+ def __init__(self):
+ self.tested_queries_count = 0
+ self.promql_expr_test = None
+ self.data = get_dashboards_data()
+ self.query_map = self.data['queries']
+
+ def reset_promql_test(self):
+ self.promql_expr_test = PromqlTest()
+ self.promql_expr_test.variables = copy.copy(self.data['variables'])
+
+ def print_query_stats(self):
+ total = len(self.query_map)
+ table = PrettyTable()
+ table.field_names = ['Name', 'Queries', 'Tested', 'Cover']
+
+ def percent(tested, total):
+ return str(round((tested / total) * 100, 2)) + '%'
+
+ def file_name(path):
+ return path.split('/')[-1]
+
+ total = 0
+ tested = 0
+ for path, stat in self.data['stats'].items():
+ assert stat['total']
+ table.add_row([file_name(path), stat['total'], stat['tested'],
+ percent(stat['tested'], stat['total'])])
+ total += stat['total']
+ tested += stat['tested']
+
+ assert total
+ table.add_row(['Total', total, tested, percent(tested, total)])
+ print(table)
+
+
+global_context = GlobalContext()
+
+# Behave function overloading
+# ===========================
+
+
+def before_scenario(context, scenario):
+ global_context.reset_promql_test()
+
+
+def after_scenario(context, scenario):
+ assert global_context.promql_expr_test.run_promtool()
+
+
+def after_all(context):
+ global_context.print_query_stats()
+
+
+@given("the following series")
+def step_impl(context):
+ for row in context.table:
+ metric = row['metrics']
+ value = row['values']
+ global_context.promql_expr_test.add_series(metric, value)
+
+
+@when('evaluation interval is `{interval}`')
+def step_impl(context, interval):
+ interval_without_unit, unit = resolve_time_and_unit(interval)
+ if interval_without_unit is None:
+ raise ValueError(f'Invalid interval time: {interval_without_unit}. ' +
+ 'A valid time looks like "1m" where you have a number plus a unit')
+ global_context.promql_expr_test.set_evaluation_interval(interval_without_unit, unit)
+
+
+@when('interval is `{interval}`')
+def step_impl(context, interval):
+ interval_without_unit, unit = resolve_time_and_unit(interval)
+ if interval_without_unit is None:
+ raise ValueError(f'Invalid interval time: {interval_without_unit}. ' +
+ 'A valid time looks like "1m" where you have a number plus a unit')
+ global_context.promql_expr_test.set_interval(interval_without_unit, unit)
+
+
+@when('evaluation time is `{eval_time}`')
+def step_impl(context, eval_time):
+ eval_time_without_unit, unit = resolve_time_and_unit(eval_time)
+ if eval_time_without_unit is None:
+ raise ValueError(f'Invalid evalution time: {eval_time}. ' +
+ 'A valid time looks like "1m" where you have a number plus a unit')
+ global_context.promql_expr_test.set_eval_time(eval_time_without_unit, unit)
+
+
+@when('variable `{variable}` is `{value}`')
+def step_impl(context, variable, value):
+ global_context.promql_expr_test.set_variable(variable, value)
+
+
+@then('Grafana panel `{panel_name}` with legend `{legend}` shows')
+def step_impl(context, panel_name, legend):
+ """
+ This step can have an empty legend. As 'behave' doesn't provide a way
+ to say it's empty we use EMPTY to mark as empty.
+ """
+ if legend == "EMPTY":
+ legend = ''
+ query_id = panel_name + '-' + legend
+ if query_id not in global_context.query_map:
+ raise KeyError((f'Query with legend {legend} in panel "{panel_name}"'
+ 'couldn\'t be found'))
+
+ expr = global_context.query_map[query_id]['query']
+ global_context.promql_expr_test.set_expression(expr)
+ for row in context.table:
+ metric = row['metrics']
+ value = row['values']
+ global_context.promql_expr_test.add_exp_samples(metric, float(value))
+ path = global_context.query_map[query_id]['path']
+ global_context.data['stats'][path]['tested'] += 1
+
+
+@then('query `{query}` produces')
+def step_impl(context, query):
+ global_context.promql_expr_test.set_expression(query)
+ for row in context.table:
+ metric = row['metrics']
+ value = row['values']
+ global_context.promql_expr_test.add_exp_samples(metric, float(value))
diff --git a/atmosphere/jsonnet/vendor/github.com/ceph/ceph/monitoring/ceph-mixin/tests_dashboards/features/host-details.feature b/atmosphere/jsonnet/vendor/github.com/ceph/ceph/monitoring/ceph-mixin/tests_dashboards/features/host-details.feature
new file mode 100644
index 0000000..51e3c58
--- /dev/null
+++ b/atmosphere/jsonnet/vendor/github.com/ceph/ceph/monitoring/ceph-mixin/tests_dashboards/features/host-details.feature
@@ -0,0 +1,131 @@
+Feature: Host Details Dashboard
+
+Scenario: "Test OSD"
+ Given the following series:
+ | metrics | values |
+ | ceph_osd_metadata{job="ceph",back_iface="",ceph_daemon="osd.0",cluster_addr="192.168.1.12",device_class="hdd",front_iface="",hostname="127.0.0.1",objectstore="bluestore",public_addr="192.168.1.12",ceph_version="ceph version 17.0.0-8967-g6932a4f702a (6932a4f702a0d557fc36df3ca7a3bca70de42667) quincy (dev)"} | 1.0 |
+ | ceph_osd_metadata{job="ceph",back_iface="",ceph_daemon="osd.1",cluster_addr="192.168.1.12",device_class="hdd",front_iface="",hostname="127.0.0.1",objectstore="bluestore",public_addr="192.168.1.12",ceph_version="ceph version 17.0.0-8967-g6932a4f702a (6932a4f702a0d557fc36df3ca7a3bca70de42667) quincy (dev)"} | 1.0 |
+ | ceph_osd_metadata{job="ceph",back_iface="",ceph_daemon="osd.2",cluster_addr="192.168.1.12",device_class="hdd",front_iface="",hostname="127.0.0.1",objectstore="bluestore",public_addr="192.168.1.12",ceph_version="ceph version 17.0.0-8967-g6932a4f702a (6932a4f702a0d557fc36df3ca7a3bca70de42667) quincy (dev)"} | 1.0 |
+ When variable `ceph_hosts` is `127.0.0.1`
+ Then Grafana panel `OSDs` with legend `EMPTY` shows:
+ | metrics | values |
+ | {} | 3 |
+
+# IOPS Panel - begin
+
+Scenario: "Test Disk IOPS - Writes - Several OSDs per device"
+ Given the following series:
+ | metrics | values |
+ | node_disk_writes_completed_total{job="ceph",device="sda",instance="localhost:9100"} | 10+60x1 |
+ | node_disk_writes_completed_total{job="ceph",device="sdb",instance="localhost:9100"} | 10+60x1 |
+ | ceph_disk_occupation_human{job="ceph",ceph_daemon="osd.0 osd.1 osd.2",device="/dev/sda",instance="localhost:9283"} | 1.0 |
+ | ceph_disk_occupation_human{job="ceph",ceph_daemon="osd.3 osd.4 osd.5",device="/dev/sdb",instance="localhost:9283"} | 1.0 |
+ When variable `ceph_hosts` is `localhost`
+ Then Grafana panel `$ceph_hosts Disk IOPS` with legend `{{device}}({{ceph_daemon}}) writes` shows:
+ | metrics | values |
+ | {job="ceph",ceph_daemon="osd.0 osd.1 osd.2", device="sda", instance="localhost"} | 1 |
+ | {job="ceph",ceph_daemon="osd.3 osd.4 osd.5", device="sdb", instance="localhost"} | 1 |
+
+Scenario: "Test Disk IOPS - Writes - Single OSD per device"
+ Given the following series:
+ | metrics | values |
+ | node_disk_writes_completed_total{job="ceph",device="sda",instance="localhost:9100"} | 10+60x1 |
+ | node_disk_writes_completed_total{job="ceph",device="sdb",instance="localhost:9100"} | 10+60x1 |
+ | ceph_disk_occupation_human{job="ceph",ceph_daemon="osd.0",device="/dev/sda",instance="localhost:9283"} | 1.0 |
+ | ceph_disk_occupation_human{job="ceph",ceph_daemon="osd.1",device="/dev/sdb",instance="localhost:9283"} | 1.0 |
+ When variable `ceph_hosts` is `localhost`
+ Then Grafana panel `$ceph_hosts Disk IOPS` with legend `{{device}}({{ceph_daemon}}) writes` shows:
+ | metrics | values |
+ | {job="ceph",ceph_daemon="osd.0", device="sda", instance="localhost"} | 1 |
+ | {job="ceph",ceph_daemon="osd.1", device="sdb", instance="localhost"} | 1 |
+
+Scenario: "Test Disk IOPS - Reads - Several OSDs per device"
+ Given the following series:
+ | metrics | values |
+ | node_disk_reads_completed_total{job="ceph",device="sda",instance="localhost:9100"} | 10+60x1 |
+ | node_disk_reads_completed_total{job="ceph",device="sdb",instance="localhost:9100"} | 10+60x1 |
+ | ceph_disk_occupation_human{job="ceph",ceph_daemon="osd.0 osd.1 osd.2",device="/dev/sda",instance="localhost:9283"} | 1.0 |
+ | ceph_disk_occupation_human{job="ceph",ceph_daemon="osd.3 osd.4 osd.5",device="/dev/sdb",instance="localhost:9283"} | 1.0 |
+ When variable `ceph_hosts` is `localhost`
+ Then Grafana panel `$ceph_hosts Disk IOPS` with legend `{{device}}({{ceph_daemon}}) reads` shows:
+ | metrics | values |
+ | {job="ceph",ceph_daemon="osd.0 osd.1 osd.2", device="sda", instance="localhost"} | 1 |
+ | {job="ceph",ceph_daemon="osd.3 osd.4 osd.5", device="sdb", instance="localhost"} | 1 |
+
+Scenario: "Test Disk IOPS - Reads - Single OSD per device"
+ Given the following series:
+ | metrics | values |
+ | node_disk_reads_completed_total{job="ceph",device="sda",instance="localhost:9100"} | 10+60x1 |
+ | node_disk_reads_completed_total{job="ceph",device="sdb",instance="localhost:9100"} | 10+60x1 |
+ | ceph_disk_occupation_human{job="ceph",ceph_daemon="osd.0",device="/dev/sda",instance="localhost:9283"} | 1.0 |
+ | ceph_disk_occupation_human{job="ceph",ceph_daemon="osd.1",device="/dev/sdb",instance="localhost:9283"} | 1.0 |
+ When variable `ceph_hosts` is `localhost`
+ Then Grafana panel `$ceph_hosts Disk IOPS` with legend `{{device}}({{ceph_daemon}}) reads` shows:
+ | metrics | values |
+ | {job="ceph",ceph_daemon="osd.0", device="sda", instance="localhost"} | 1 |
+ | {job="ceph",ceph_daemon="osd.1", device="sdb", instance="localhost"} | 1 |
+
+# IOPS Panel - end
+
+# Node disk bytes written/read panel - begin
+
+Scenario: "Test disk throughput - read"
+ Given the following series:
+ | metrics | values |
+ | node_disk_read_bytes_total{job="ceph",device="sda",instance="localhost:9100"} | 10+60x1 |
+ | node_disk_read_bytes_total{job="ceph",device="sdb",instance="localhost:9100"} | 100+600x1 |
+ | ceph_disk_occupation_human{job="ceph",ceph_daemon="osd.0",device="/dev/sda",instance="localhost:9283"} | 1.0 |
+ | ceph_disk_occupation_human{job="ceph",ceph_daemon="osd.1",device="/dev/sdb",instance="localhost:9283"} | 1.0 |
+ When variable `ceph_hosts` is `localhost`
+ Then Grafana panel `$ceph_hosts Throughput by Disk` with legend `{{device}}({{ceph_daemon}}) read` shows:
+ | metrics | values |
+ | {job="ceph",ceph_daemon="osd.0", device="sda", instance="localhost"} | 1 |
+ | {job="ceph",ceph_daemon="osd.1", device="sdb", instance="localhost"} | 10 |
+
+Scenario: "Test disk throughput - write"
+ Given the following series:
+ | metrics | values |
+ | node_disk_written_bytes_total{job="ceph",device="sda",instance="localhost:9100"} | 10+60x1 |
+ | node_disk_written_bytes_total{job="ceph",device="sdb",instance="localhost:9100"} | 100+600x1 |
+ | ceph_disk_occupation_human{job="ceph",ceph_daemon="osd.0",device="/dev/sda",instance="localhost:9283"} | 1.0 |
+ | ceph_disk_occupation_human{job="ceph",ceph_daemon="osd.1",device="/dev/sdb",instance="localhost:9283"} | 1.0 |
+ When variable `ceph_hosts` is `localhost`
+ Then Grafana panel `$ceph_hosts Throughput by Disk` with legend `{{device}}({{ceph_daemon}}) write` shows:
+ | metrics | values |
+ | {job="ceph",ceph_daemon="osd.0", device="sda", instance="localhost"} | 1 |
+ | {job="ceph",ceph_daemon="osd.1", device="sdb", instance="localhost"} | 10 |
+
+# Node disk bytes written/read panel - end
+
+Scenario: "Test $ceph_hosts Disk Latency panel"
+ Given the following series:
+ | metrics | values |
+ | node_disk_write_time_seconds_total{job="ceph",device="sda",instance="localhost:9100"} | 10+60x1 |
+ | node_disk_write_time_seconds_total{job="ceph",device="sdb",instance="localhost:9100"} | 10+60x1 |
+ | node_disk_writes_completed_total{job="ceph",device="sda",instance="localhost:9100"} | 10+60x1 |
+ | node_disk_writes_completed_total{job="ceph",device="sdb",instance="localhost:9100"} | 10+60x1 |
+ | node_disk_read_time_seconds_total{job="ceph",device="sda",instance="localhost:9100"} | 10+60x1 |
+ | node_disk_read_time_seconds_total{job="ceph",device="sdb",instance="localhost:9100"} | 10+60x1 |
+ | node_disk_reads_completed_total{job="ceph",device="sda",instance="localhost:9100"} | 10+60x1 |
+ | node_disk_reads_completed_total{job="ceph",device="sdb",instance="localhost:9100"} | 10+60x1 |
+ | ceph_disk_occupation_human{job="ceph",ceph_daemon="osd.0",device="/dev/sda",instance="localhost:9283"} | 1.0 |
+ | ceph_disk_occupation_human{job="ceph",ceph_daemon="osd.1",device="/dev/sdb",instance="localhost:9283"} | 1.0 |
+ When variable `ceph_hosts` is `localhost`
+ Then Grafana panel `$ceph_hosts Disk Latency` with legend `{{device}}({{ceph_daemon}})` shows:
+ | metrics | values |
+ | {ceph_daemon="osd.0", device="sda", instance="localhost"} | 1 |
+ | {ceph_daemon="osd.1", device="sdb", instance="localhost"} | 1 |
+
+Scenario: "Test $ceph_hosts Disk utilization"
+ Given the following series:
+ | metrics | values |
+ | node_disk_io_time_seconds_total{job="ceph",device="sda",instance="localhost:9100"} | 10+60x1 |
+ | node_disk_io_time_seconds_total{job="ceph",device="sdb",instance="localhost:9100"} | 10+60x1 |
+ | ceph_disk_occupation_human{job="ceph",ceph_daemon="osd.0",device="/dev/sda",instance="localhost:9283"} | 1.0 |
+ | ceph_disk_occupation_human{job="ceph",ceph_daemon="osd.1",device="/dev/sdb",instance="localhost:9283"} | 1.0 |
+ When variable `ceph_hosts` is `localhost`
+ Then Grafana panel `$ceph_hosts Disk utilization` with legend `{{device}}({{ceph_daemon}})` shows:
+ | metrics | values |
+ | {job="ceph",ceph_daemon="osd.0", device="sda", instance="localhost"} | 100 |
+ | {job="ceph",ceph_daemon="osd.1", device="sdb", instance="localhost"} | 100 |
+
diff --git a/atmosphere/jsonnet/vendor/github.com/ceph/ceph/monitoring/ceph-mixin/tests_dashboards/features/hosts_overview.feature b/atmosphere/jsonnet/vendor/github.com/ceph/ceph/monitoring/ceph-mixin/tests_dashboards/features/hosts_overview.feature
new file mode 100644
index 0000000..6c5ecea
--- /dev/null
+++ b/atmosphere/jsonnet/vendor/github.com/ceph/ceph/monitoring/ceph-mixin/tests_dashboards/features/hosts_overview.feature
@@ -0,0 +1,41 @@
+Feature: Hosts Overview Dashboard
+
+Scenario: "Test network load succeeds"
+ Given the following series:
+ | metrics | values |
+ | node_network_receive_bytes{instance="127.0.0.1", device="eth1"} | 10 100 |
+ | node_network_receive_bytes{instance="127.0.0.1", device="eth2"} | 10 100 |
+ | node_network_transmit_bytes{instance="127.0.0.1", device="eth1"} | 10 100 |
+ | node_network_transmit_bytes{instance="127.0.0.1", device="eth2"} | 10 100 |
+ When variable `osd_hosts` is `127.0.0.1`
+ Then Grafana panel `Network Load` with legend `EMPTY` shows:
+ | metrics | values |
+ | {} | 6 |
+
+Scenario: "Test network load with bonding succeeds"
+ Given the following series:
+ | metrics | values |
+ | node_network_receive_bytes{instance="127.0.0.1", device="eth1"} | 10 100 200 |
+ | node_network_receive_bytes{instance="127.0.0.1", device="eth2"} | 10 100 200 |
+ | node_network_transmit_bytes{instance="127.0.0.1", device="eth1"} | 10 100 200 |
+ | node_network_transmit_bytes{instance="127.0.0.1", device="eth2"} | 10 100 200 |
+ | node_network_transmit_bytes{instance="127.0.0.1", device="bond0"} | 20 200 300 |
+ | node_network_transmit_bytes{instance="127.0.0.1", device="bond0"} | 20 200 300 |
+ | bonding_slaves{instance="127.0.0.1", master="bond0"} | 2 |
+ When variable `osd_hosts` is `127.0.0.1`
+ Then Grafana panel `Network Load` with legend `EMPTY` shows:
+ | metrics | values |
+ | {} | 6 |
+
+Scenario: "Test AVG Disk Utilization"
+ Given the following series:
+ | metrics | values |
+ | node_disk_io_time_seconds_total{device="sda",instance="localhost:9100"} | 10+60x1 |
+ | node_disk_io_time_seconds_total{device="sdb",instance="localhost:9100"} | 10+60x1 |
+ | node_disk_io_time_seconds_total{device="sdc",instance="localhost:9100"} | 10 2000 |
+ | ceph_disk_occupation_human{job="ceph",ceph_daemon="osd.0",device="sda",instance="localhost:9283"} | 1.0 |
+ | ceph_disk_occupation_human{job="ceph",ceph_daemon="osd.1",device="/dev/sdb",instance="localhost:9283"} | 1.0 |
+ When variable `osd_hosts` is `localhost`
+ Then Grafana panel `AVG Disk Utilization` with legend `EMPTY` shows:
+ | metrics | values |
+ | {} | 100 |
diff --git a/atmosphere/jsonnet/vendor/github.com/ceph/ceph/monitoring/ceph-mixin/tests_dashboards/features/osd-device-details.feature b/atmosphere/jsonnet/vendor/github.com/ceph/ceph/monitoring/ceph-mixin/tests_dashboards/features/osd-device-details.feature
new file mode 100644
index 0000000..0d6ca8b
--- /dev/null
+++ b/atmosphere/jsonnet/vendor/github.com/ceph/ceph/monitoring/ceph-mixin/tests_dashboards/features/osd-device-details.feature
@@ -0,0 +1,88 @@
+Feature: OSD device details
+
+Scenario: "Test Physical Device Latency for $osd - Reads"
+ Given the following series:
+ | metrics | values |
+ | node_disk_reads_completed_total{device="sda",instance="localhost"} | 10 60 |
+ | node_disk_reads_completed_total{device="sdb",instance="localhost"} | 10 60 |
+ | node_disk_read_time_seconds_total{device="sda",instance="localhost"} | 100 600 |
+ | node_disk_read_time_seconds_total{device="sdb",instance="localhost"} | 100 600 |
+ | ceph_disk_occupation_human{job="ceph",ceph_daemon="osd.0",device="/dev/sda",instance="localhost:9283"} | 1.0 |
+ | ceph_disk_occupation_human{job="ceph",ceph_daemon="osd.1",device="/dev/sdb",instance="localhost:9283"} | 1.0 |
+ When variable `osd` is `osd.0`
+ Then Grafana panel `Physical Device Latency for $osd` with legend `{{instance}}/{{device}} Reads` shows:
+ | metrics | values |
+ | {device="sda",instance="localhost"} | 10 |
+
+Scenario: "Test Physical Device Latency for $osd - Writes"
+ Given the following series:
+ | metrics | values |
+ | node_disk_writes_completed_total{device="sda",instance="localhost"} | 10 60 |
+ | node_disk_writes_completed_total{device="sdb",instance="localhost"} | 10 60 |
+ | node_disk_write_time_seconds_total{device="sda",instance="localhost"} | 100 600 |
+ | node_disk_write_time_seconds_total{device="sdb",instance="localhost"} | 100 600 |
+ | ceph_disk_occupation_human{job="ceph",ceph_daemon="osd.0",device="/dev/sda",instance="localhost:9283"} | 1.0 |
+ | ceph_disk_occupation_human{job="ceph",ceph_daemon="osd.1",device="/dev/sdb",instance="localhost:9283"} | 1.0 |
+ When variable `osd` is `osd.0`
+ Then Grafana panel `Physical Device Latency for $osd` with legend `{{instance}}/{{device}} Writes` shows:
+ | metrics | values |
+ | {device="sda",instance="localhost"} | 10 |
+
+Scenario: "Test Physical Device R/W IOPS for $osd - Writes"
+ Given the following series:
+ | metrics | values |
+ | node_disk_writes_completed_total{device="sda",instance="localhost"} | 10 100 |
+ | node_disk_writes_completed_total{device="sdb",instance="localhost"} | 10 100 |
+ | ceph_disk_occupation_human{job="ceph",ceph_daemon="osd.0",device="/dev/sda",instance="localhost:9283"} | 1.0 |
+ | ceph_disk_occupation_human{job="ceph",ceph_daemon="osd.1",device="/dev/sdb",instance="localhost:9283"} | 1.0 |
+ When variable `osd` is `osd.0`
+ Then Grafana panel `Physical Device R/W IOPS for $osd` with legend `{{device}} on {{instance}} Writes` shows:
+ | metrics | values |
+ | {device="sda",instance="localhost"} | 1.5 |
+
+Scenario: "Test Physical Device R/W IOPS for $osd - Reads"
+ Given the following series:
+ | metrics | values |
+ | node_disk_reads_completed_total{device="sda",instance="localhost"} | 10 100 |
+ | node_disk_reads_completed_total{device="sdb",instance="localhost"} | 10 100 |
+ | ceph_disk_occupation_human{job="ceph",ceph_daemon="osd.0",device="/dev/sda",instance="localhost:9283"} | 1.0 |
+ | ceph_disk_occupation_human{job="ceph",ceph_daemon="osd.1",device="/dev/sdb",instance="localhost:9283"} | 1.0 |
+ When variable `osd` is `osd.0`
+ Then Grafana panel `Physical Device R/W IOPS for $osd` with legend `{{device}} on {{instance}} Reads` shows:
+ | metrics | values |
+ | {device="sda",instance="localhost"} | 1.5 |
+
+Scenario: "Test Physical Device R/W Bytes for $osd - Reads"
+ Given the following series:
+ | metrics | values |
+ | node_disk_reads_completed_total{device="sda",instance="localhost"} | 10 100 |
+ | node_disk_reads_completed_total{device="sdb",instance="localhost"} | 10 100 |
+ | ceph_disk_occupation_human{job="ceph",ceph_daemon="osd.0",device="/dev/sda",instance="localhost:9283"} | 1.0 |
+ | ceph_disk_occupation_human{job="ceph",ceph_daemon="osd.1",device="/dev/sdb",instance="localhost:9283"} | 1.0 |
+ When variable `osd` is `osd.0`
+ Then Grafana panel `Physical Device R/W IOPS for $osd` with legend `{{device}} on {{instance}} Reads` shows:
+ | metrics | values |
+ | {device="sda",instance="localhost"} | 1.5 |
+
+Scenario: "Test Physical Device R/W Bytes for $osd - Writes"
+ Given the following series:
+ | metrics | values |
+ | node_disk_writes_completed_total{device="sda",instance="localhost"} | 10 100 |
+ | node_disk_writes_completed_total{device="sdb",instance="localhost"} | 10 100 |
+ | ceph_disk_occupation_human{job="ceph",ceph_daemon="osd.0",device="/dev/sda",instance="localhost:9283"} | 1.0 |
+ | ceph_disk_occupation_human{job="ceph",ceph_daemon="osd.1",device="/dev/sdb",instance="localhost:9283"} | 1.0 |
+ When variable `osd` is `osd.0`
+ Then Grafana panel `Physical Device R/W IOPS for $osd` with legend `{{device}} on {{instance}} Writes` shows:
+ | metrics | values |
+ | {device="sda",instance="localhost"} | 1.5 |
+
+Scenario: "Test Physical Device Util% for $osd"
+ Given the following series:
+ | metrics | values |
+ | node_disk_io_time_seconds_total{device="sda",instance="localhost:9100"} | 10 100 |
+ | ceph_disk_occupation_human{job="ceph",ceph_daemon="osd.0",device="/dev/sda",instance="localhost:9283"} | 1.0 |
+ | ceph_disk_occupation_human{job="ceph",ceph_daemon="osd.1",device="/dev/sdb",instance="localhost:9283"} | 1.0 |
+ When variable `osd` is `osd.0`
+ Then Grafana panel `Physical Device Util% for $osd` with legend `{{device}} on {{instance}}` shows:
+ | metrics | values |
+ | {device="sda",instance="localhost"} | 1.5 |
diff --git a/atmosphere/jsonnet/vendor/github.com/ceph/ceph/monitoring/ceph-mixin/tests_dashboards/features/osds-overview.feature b/atmosphere/jsonnet/vendor/github.com/ceph/ceph/monitoring/ceph-mixin/tests_dashboards/features/osds-overview.feature
new file mode 100644
index 0000000..78d3064
--- /dev/null
+++ b/atmosphere/jsonnet/vendor/github.com/ceph/ceph/monitoring/ceph-mixin/tests_dashboards/features/osds-overview.feature
@@ -0,0 +1,15 @@
+Feature: OSD Overview
+
+Scenario: "Test OSD onode Hits Ratio"
+ Given the following series:
+ | metrics | values |
+ | ceph_bluestore_onode_hits{ceph_daemon="osd.0",instance="ceph:9283",job="ceph"} | 5255 |
+ | ceph_bluestore_onode_hits{ceph_daemon="osd.1",instance="ceph:9283",job="ceph"} | 5419 |
+ | ceph_bluestore_onode_hits{ceph_daemon="osd.2",instance="ceph:9283",job="ceph"} | 5242 |
+ | ceph_bluestore_onode_misses{ceph_daemon="osd.0",instance="ceph:9283",job="ceph"} | 202 |
+ | ceph_bluestore_onode_misses{ceph_daemon="osd.1",instance="ceph:9283",job="ceph"} | 247 |
+ | ceph_bluestore_onode_misses{ceph_daemon="osd.2",instance="ceph:9283",job="ceph"} | 234 |
+ Then Grafana panel `OSD onode Hits Ratio` with legend `EMPTY` shows:
+ | metrics | values |
+ | {} | 9.588529429483704E-01 |
+
diff --git a/atmosphere/jsonnet/vendor/github.com/ceph/ceph/monitoring/ceph-mixin/tests_dashboards/features/radosgw-detail.feature b/atmosphere/jsonnet/vendor/github.com/ceph/ceph/monitoring/ceph-mixin/tests_dashboards/features/radosgw-detail.feature
new file mode 100644
index 0000000..e0016c5
--- /dev/null
+++ b/atmosphere/jsonnet/vendor/github.com/ceph/ceph/monitoring/ceph-mixin/tests_dashboards/features/radosgw-detail.feature
@@ -0,0 +1,139 @@
+Feature: RGW Host Detail Dashboard
+
+Scenario: "Test $rgw_servers GET/PUT Latencies - GET"
+ Given the following series:
+ | metrics | values |
+ | ceph_rgw_get_initial_lat_sum{instance="127.0.0.1", instance_id="58892247", job="ceph"} | 10 50 100 |
+ | ceph_rgw_get_initial_lat_count{instance="127.0.0.1", instance_id="58892247", job="ceph"} | 20 60 80 |
+ | ceph_rgw_metadata{ceph_daemon="rgw.foo", hostname="localhost", instance="127.0.0.1", instance_id="58892247", job="ceph"} | 1 1 1 |
+ When interval is `30s`
+ And variable `rgw_servers` is `rgw.foo`
+ Then Grafana panel `$rgw_servers GET/PUT Latencies` with legend `GET {{ceph_daemon}}` shows:
+ | metrics | values |
+ | {ceph_daemon="rgw.foo", instance_id="58892247"} | 1.5 |
+
+Scenario: "Test $rgw_servers GET/PUT Latencies - PUT"
+ Given the following series:
+ | metrics | values |
+ | ceph_rgw_put_initial_lat_sum{instance="127.0.0.1", instance_id="58892247", job="ceph"} | 15 35 55 |
+ | ceph_rgw_put_initial_lat_count{instance="127.0.0.1", instance_id="58892247", job="ceph"} | 10 30 50 |
+ | ceph_rgw_metadata{ceph_daemon="rgw.foo", hostname="localhost", instance="127.0.0.1", instance_id="58892247", job="ceph"} | 1 1 1 |
+ When interval is `30s`
+ And variable `rgw_servers` is `rgw.foo`
+ Then Grafana panel `$rgw_servers GET/PUT Latencies` with legend `PUT {{ceph_daemon}}` shows:
+ | metrics | values |
+ | {ceph_daemon="rgw.foo", instance_id="58892247"} | 1 |
+
+Scenario: "Test Bandwidth by HTTP Operation - GET"
+ Given the following series:
+ | metrics | values |
+ | ceph_rgw_get_b{instance="127.0.0.1", instance_id="92806566", job="ceph"} | 10 50 100 |
+ | ceph_rgw_metadata{ceph_daemon="rgw.1", hostname="localhost", instance="127.0.0.1", instance_id="92806566", job="ceph"} | 1 1 1 |
+ When interval is `30s`
+ And variable `rgw_servers` is `rgw.1`
+ Then Grafana panel `Bandwidth by HTTP Operation` with legend `GETs {{ceph_daemon}}` shows:
+ | metrics | values |
+ | {ceph_daemon="rgw.1", instance="127.0.0.1", instance_id="92806566", job="ceph"} | 1.5 |
+
+Scenario: "Test Bandwidth by HTTP Operation - PUT"
+ Given the following series:
+ | metrics | values |
+ | ceph_rgw_put_b{instance="127.0.0.1", instance_id="92806566", job="ceph"} | 5 20 50 |
+ | ceph_rgw_metadata{ceph_daemon="rgw.1", hostname="localhost", instance="127.0.0.1", instance_id="92806566", job="ceph"} | 1 1 1 |
+ When interval is `30s`
+ And variable `rgw_servers` is `rgw.1`
+ Then Grafana panel `Bandwidth by HTTP Operation` with legend `PUTs {{ceph_daemon}}` shows:
+ | metrics | values |
+ | {ceph_daemon="rgw.1", instance="127.0.0.1", instance_id="92806566", job="ceph"} | 7.5E-01 |
+
+Scenario: "Test HTTP Request Breakdown - Requests Failed"
+ Given the following series:
+ | metrics | values |
+ | ceph_rgw_failed_req{instance="127.0.0.1", instance_id="58892247", job="ceph"} | 1 5 7 |
+ | ceph_rgw_metadata{ceph_daemon="rgw.foo", hostname="localhost", instance="127.0.0.1", instance_id="58892247", job="ceph"} | 1 1 1 |
+ When interval is `30s`
+ And variable `rgw_servers` is `rgw.foo`
+ Then Grafana panel `HTTP Request Breakdown` with legend `Requests Failed {{ceph_daemon}}` shows:
+ | metrics | values |
+ | {ceph_daemon="rgw.foo", instance="127.0.0.1", instance_id="58892247", job="ceph"} | 1E-01 |
+
+Scenario: "Test HTTP Request Breakdown - GET"
+ Given the following series:
+ | metrics | values |
+ | ceph_rgw_get{instance="127.0.0.1", instance_id="58892247", job="ceph"} | 100 150 170 |
+ | ceph_rgw_metadata{ceph_daemon="rgw.foo", hostname="localhost", instance="127.0.0.1", instance_id="58892247", job="ceph"} | 1 1 1 |
+ When interval is `30s`
+ And variable `rgw_servers` is `rgw.foo`
+ Then Grafana panel `HTTP Request Breakdown` with legend `GETs {{ceph_daemon}}` shows:
+ | metrics | values |
+ | {ceph_daemon="rgw.foo", instance="127.0.0.1", instance_id="58892247", job="ceph"} | 1.1666666666666667 |
+
+Scenario: "Test HTTP Request Breakdown - PUT"
+ Given the following series:
+ | metrics | values |
+ | ceph_rgw_put{instance="127.0.0.1", instance_id="58892247", job="ceph"} | 70 90 160 |
+ | ceph_rgw_metadata{ceph_daemon="rgw.foo", hostname="localhost", instance="127.0.0.1", instance_id="58892247", job="ceph"} | 1 1 1 |
+ When interval is `30s`
+ And variable `rgw_servers` is `rgw.foo`
+ Then Grafana panel `HTTP Request Breakdown` with legend `PUTs {{ceph_daemon}}` shows:
+ | metrics | values |
+ | {ceph_daemon="rgw.foo", instance="127.0.0.1", instance_id="58892247", job="ceph"} | 1.5 |
+
+Scenario: "Test HTTP Request Breakdown - Other"
+ Given the following series:
+ | metrics | values |
+ | ceph_rgw_req{instance="127.0.0.1", instance_id="58892247", job="ceph"} | 175 250 345 |
+ | ceph_rgw_get{instance="127.0.0.1", instance_id="58892247", job="ceph"} | 100 150 170 |
+ | ceph_rgw_put{instance="127.0.0.1", instance_id="58892247", job="ceph"} | 70 90 160 |
+ | ceph_rgw_metadata{ceph_daemon="rgw.foo", hostname="localhost", instance="127.0.0.1", instance_id="58892247", job="ceph"} | 1 1 1 |
+ When interval is `30s`
+ And variable `rgw_servers` is `rgw.foo`
+ Then Grafana panel `HTTP Request Breakdown` with legend `Other {{ceph_daemon}}` shows:
+ | metrics | values |
+ | {ceph_daemon="rgw.foo", instance="127.0.0.1", instance_id="58892247", job="ceph"} | .16666666666666652 |
+
+Scenario: "Test Workload Breakdown - Failures"
+ Given the following series:
+ | metrics | values |
+ | ceph_rgw_failed_req{instance="127.0.0.1", instance_id="58892247", job="ceph"} | 1 5 7 |
+ | ceph_rgw_metadata{ceph_daemon="rgw.foo", hostname="localhost", instance="127.0.0.1", instance_id="58892247", job="ceph"} | 1 1 1 |
+ When interval is `30s`
+ And variable `rgw_servers` is `rgw.foo`
+ Then Grafana panel `Workload Breakdown` with legend `Failures {{ceph_daemon}}` shows:
+ | metrics | values |
+ | {ceph_daemon="rgw.foo", instance="127.0.0.1", instance_id="58892247", job="ceph"} | 1E-01 |
+
+Scenario: "Test Workload Breakdown - GETs"
+ Given the following series:
+ | metrics | values |
+ | ceph_rgw_get{instance="127.0.0.1", instance_id="58892247", job="ceph"} | 100 150 170 |
+ | ceph_rgw_metadata{ceph_daemon="rgw.foo", hostname="localhost", instance="127.0.0.1", instance_id="58892247", job="ceph"} | 1 1 1 |
+ When interval is `30s`
+ And variable `rgw_servers` is `rgw.foo`
+ Then Grafana panel `Workload Breakdown` with legend `GETs {{ceph_daemon}}` shows:
+ | metrics | values |
+ | {ceph_daemon="rgw.foo", instance="127.0.0.1", instance_id="58892247", job="ceph"} | 1.1666666666666667 |
+
+Scenario: "Test Workload Breakdown - PUTs"
+ Given the following series:
+ | metrics | values |
+ | ceph_rgw_put{instance="127.0.0.1", instance_id="58892247", job="ceph"} | 70 90 160 |
+ | ceph_rgw_metadata{ceph_daemon="rgw.foo", hostname="localhost", instance="127.0.0.1", instance_id="58892247", job="ceph"} | 1 1 1 |
+ When interval is `30s`
+ And variable `rgw_servers` is `rgw.foo`
+ Then Grafana panel `Workload Breakdown` with legend `PUTs {{ceph_daemon}}` shows:
+ | metrics | values |
+ | {ceph_daemon="rgw.foo", instance="127.0.0.1", instance_id="58892247", job="ceph"} | 1.5 |
+
+Scenario: "Test Workload Breakdown - Other"
+ Given the following series:
+ | metrics | values |
+ | ceph_rgw_req{instance="127.0.0.1", instance_id="58892247", job="ceph"} | 175 250 345 |
+ | ceph_rgw_get{instance="127.0.0.1", instance_id="58892247", job="ceph"} | 100 150 170 |
+ | ceph_rgw_put{instance="127.0.0.1", instance_id="58892247", job="ceph"} | 70 90 160 |
+ | ceph_rgw_metadata{ceph_daemon="rgw.foo", hostname="localhost", instance="127.0.0.1", instance_id="58892247", job="ceph"} | 1 1 1 |
+ When interval is `30s`
+ And variable `rgw_servers` is `rgw.foo`
+ Then Grafana panel `Workload Breakdown` with legend `Other (DELETE,LIST) {{ceph_daemon}}` shows:
+ | metrics | values |
+ | {ceph_daemon="rgw.foo", instance="127.0.0.1", instance_id="58892247", job="ceph"} | .16666666666666652 |
diff --git a/atmosphere/jsonnet/vendor/github.com/ceph/ceph/monitoring/ceph-mixin/tests_dashboards/features/radosgw_overview.feature b/atmosphere/jsonnet/vendor/github.com/ceph/ceph/monitoring/ceph-mixin/tests_dashboards/features/radosgw_overview.feature
new file mode 100644
index 0000000..642e439
--- /dev/null
+++ b/atmosphere/jsonnet/vendor/github.com/ceph/ceph/monitoring/ceph-mixin/tests_dashboards/features/radosgw_overview.feature
@@ -0,0 +1,250 @@
+Feature: RGW Overview Dashboard
+
+Scenario: "Test Average GET Latencies"
+ Given the following series:
+ | metrics | values |
+ | ceph_rgw_get_initial_lat_sum{instance="127.0.0.1", instance_id="58892247", job="ceph"} | 10 50 100 |
+ | ceph_rgw_get_initial_lat_count{instance="127.0.0.1", instance_id="58892247", job="ceph"} | 20 60 80 |
+ | ceph_rgw_metadata{ceph_daemon="rgw.foo", hostname="localhost", instance="127.0.0.1", instance_id="58892247", job="ceph"} | 1 1 1 |
+ When interval is `30s`
+ Then Grafana panel `Average GET/PUT Latencies by RGW Instance` with legend `GET {{rgw_host}}` shows:
+ | metrics | values |
+ | {ceph_daemon="rgw.foo", instance="127.0.0.1", instance_id="58892247", job="ceph", rgw_host="foo"} | 1.5 |
+
+Scenario: "Test Average PUT Latencies"
+ Given the following series:
+ | metrics | values |
+ | ceph_rgw_put_initial_lat_sum{instance="127.0.0.1", instance_id="58892247", job="ceph"} | 15 35 55 |
+ | ceph_rgw_put_initial_lat_count{instance="127.0.0.1", instance_id="58892247", job="ceph"} | 10 30 50 |
+ | ceph_rgw_metadata{ceph_daemon="rgw.foo", hostname="localhost", instance="127.0.0.1", instance_id="58892247", job="ceph"} | 1 1 1 |
+ When interval is `30s`
+ Then Grafana panel `Average GET/PUT Latencies by RGW Instance` with legend `PUT {{rgw_host}}` shows:
+ | metrics | values |
+ | {ceph_daemon="rgw.foo", instance="127.0.0.1", instance_id="58892247", job="ceph", rgw_host="foo"} | 1 |
+
+Scenario: "Test Total Requests/sec by RGW Instance"
+ Given the following series:
+ | metrics | values |
+ | ceph_rgw_req{instance="127.0.0.1", instance_id="92806566", job="ceph"} | 10 50 100 |
+ | ceph_rgw_metadata{ceph_daemon="rgw.1", hostname="localhost", instance="127.0.0.1", instance_id="92806566", job="ceph"} | 1 1 1 |
+ When interval is `30s`
+ Then Grafana panel `Total Requests/sec by RGW Instance` with legend `{{rgw_host}}` shows:
+ | metrics | values |
+ | {rgw_host="1"} | 1.5 |
+
+Scenario: "Test GET Latencies by RGW Instance"
+ Given the following series:
+ | metrics | values |
+ | ceph_rgw_get_initial_lat_sum{instance="127.0.0.1", instance_id="58892247", job="ceph"} | 10 50 100 |
+ | ceph_rgw_get_initial_lat_count{instance="127.0.0.1", instance_id="58892247", job="ceph"} | 20 60 80 |
+ | ceph_rgw_metadata{ceph_daemon="rgw.foo", hostname="localhost", instance="127.0.0.1", instance_id="58892247", job="ceph"} | 1 1 1 |
+ When interval is `30s`
+ Then Grafana panel `GET Latencies by RGW Instance` with legend `{{rgw_host}}` shows:
+ | metrics | values |
+ | {ceph_daemon="rgw.foo", instance="127.0.0.1", instance_id="58892247", job="ceph", rgw_host="foo"} | 1.5 |
+
+Scenario: "Test Bandwidth Consumed by Type- GET"
+ Given the following series:
+ | metrics | values |
+ | ceph_rgw_get_b{instance="127.0.0.1", instance_id="92806566", job="ceph"} | 10 50 100 |
+ When evaluation time is `1m`
+ And interval is `30s`
+ Then Grafana panel `Bandwidth Consumed by Type` with legend `GETs` shows:
+ | metrics | values |
+ | {} | 1.5 |
+
+Scenario: "Test Bandwidth Consumed by Type- PUT"
+ Given the following series:
+ | metrics | values |
+ | ceph_rgw_put_b{instance="127.0.0.1", instance_id="92806566", job="ceph"} | 5 20 50 |
+ When evaluation time is `1m`
+ And interval is `30s`
+ Then Grafana panel `Bandwidth Consumed by Type` with legend `PUTs` shows:
+ | metrics | values |
+ | {} | 7.5E-01 |
+
+Scenario: "Test Bandwidth by RGW Instance"
+ Given the following series:
+ | metrics | values |
+ | ceph_rgw_get_b{instance="127.0.0.1", instance_id="92806566", job="ceph"} | 10 50 100 |
+ | ceph_rgw_put_b{instance="127.0.0.1", instance_id="92806566", job="ceph"} | 5 20 50 |
+ | ceph_rgw_metadata{ceph_daemon="rgw.1", hostname="localhost", instance="127.0.0.1", instance_id="92806566", job="ceph"} | 1 1 1 |
+ When evaluation time is `1m`
+ And interval is `30s`
+ Then Grafana panel `Bandwidth by RGW Instance` with legend `{{rgw_host}}` shows:
+ | metrics | values |
+ | {ceph_daemon="rgw.1", instance_id="92806566", rgw_host="1"} | 2.25 |
+
+Scenario: "Test PUT Latencies by RGW Instance"
+ Given the following series:
+ | metrics | values |
+ | ceph_rgw_put_initial_lat_sum{instance="127.0.0.1", instance_id="58892247", job="ceph"} | 15 35 55 |
+ | ceph_rgw_put_initial_lat_count{instance="127.0.0.1", instance_id="58892247", job="ceph"} | 10 30 50 |
+ | ceph_rgw_metadata{ceph_daemon="rgw.foo", hostname="localhost", instance="127.0.0.1", instance_id="58892247", job="ceph"} | 1 1 1 |
+ When evaluation time is `1m`
+ And interval is `30s`
+ Then Grafana panel `PUT Latencies by RGW Instance` with legend `{{rgw_host}}` shows:
+ | metrics | values |
+ | {ceph_daemon="rgw.foo", instance="127.0.0.1", instance_id="58892247", job="ceph", rgw_host="foo"} | 1 |
+
+Scenario: "Test Total backend responses by HTTP code"
+ Given the following series:
+ | metrics | values |
+ | haproxy_backend_http_responses_total{job="haproxy",code="200",instance="ingress.rgw.1",proxy="backend"} | 10 100 |
+ | haproxy_backend_http_responses_total{job="haproxy",code="404",instance="ingress.rgw.1",proxy="backend"} | 20 200 |
+ When variable `ingress_service` is `ingress.rgw.1`
+ When variable `code` is `200`
+ Then Grafana panel `Total responses by HTTP code` with legend `Backend {{ code }}` shows:
+ | metrics | values |
+ | {code="200"} | 1.5 |
+
+Scenario: "Test Total frontend responses by HTTP code"
+ Given the following series:
+ | metrics | values |
+ | haproxy_frontend_http_responses_total{job="haproxy",code="200",instance="ingress.rgw.1",proxy="frontend"} | 10 100 |
+ | haproxy_frontend_http_responses_total{job="haproxy",code="404",instance="ingress.rgw.1",proxy="frontend"} | 20 200 |
+ When variable `ingress_service` is `ingress.rgw.1`
+ When variable `code` is `200`
+ Then Grafana panel `Total responses by HTTP code` with legend `Frontend {{ code }}` shows:
+ | metrics | values |
+ | {code="200"} | 1.5 |
+
+Scenario: "Test Total http frontend requests by instance"
+ Given the following series:
+ | metrics | values |
+ | haproxy_frontend_http_requests_total{job="haproxy",proxy="frontend",instance="ingress.rgw.1"} | 10 100 |
+ | haproxy_frontend_http_requests_total{job="haproxy",proxy="frontend",instance="ingress.rgw.1"} | 20 200 |
+ When variable `ingress_service` is `ingress.rgw.1`
+ Then Grafana panel `Total requests / responses` with legend `Requests` shows:
+ | metrics | values |
+ | {instance="ingress.rgw.1"} | 3 |
+
+Scenario: "Test Total backend response errors by instance"
+ Given the following series:
+ | metrics | values |
+ | haproxy_backend_response_errors_total{job="haproxy",proxy="backend",instance="ingress.rgw.1"} | 10 100 |
+ | haproxy_backend_response_errors_total{job="haproxy",proxy="backend",instance="ingress.rgw.1"} | 20 200 |
+ When variable `ingress_service` is `ingress.rgw.1`
+ Then Grafana panel `Total requests / responses` with legend `Response errors` shows:
+ | metrics | values |
+ | {instance="ingress.rgw.1"} | 3 |
+
+Scenario: "Test Total frontend requests errors by instance"
+ Given the following series:
+ | metrics | values |
+ | haproxy_frontend_request_errors_total{job="haproxy",proxy="frontend",instance="ingress.rgw.1"} | 10 100 |
+ | haproxy_frontend_request_errors_total{job="haproxy",proxy="frontend",instance="ingress.rgw.1"} | 20 200 |
+ When variable `ingress_service` is `ingress.rgw.1`
+ Then Grafana panel `Total requests / responses` with legend `Requests errors` shows:
+ | metrics | values |
+ | {instance="ingress.rgw.1"} | 3 |
+
+Scenario: "Test Total backend redispatch warnings by instance"
+ Given the following series:
+ | metrics | values |
+ | haproxy_backend_redispatch_warnings_total{job="haproxy",proxy="backend",instance="ingress.rgw.1"} | 10 100 |
+ | haproxy_backend_redispatch_warnings_total{job="haproxy",proxy="backend",instance="ingress.rgw.1"} | 20 200 |
+ When variable `ingress_service` is `ingress.rgw.1`
+ Then Grafana panel `Total requests / responses` with legend `Backend redispatch` shows:
+ | metrics | values |
+ | {instance="ingress.rgw.1"} | 3 |
+
+Scenario: "Test Total backend retry warnings by instance"
+ Given the following series:
+ | metrics | values |
+ | haproxy_backend_retry_warnings_total{job="haproxy",proxy="backend",instance="ingress.rgw.1"} | 10 100 |
+ | haproxy_backend_retry_warnings_total{job="haproxy",proxy="backend",instance="ingress.rgw.1"} | 20 200 |
+ When variable `ingress_service` is `ingress.rgw.1`
+ Then Grafana panel `Total requests / responses` with legend `Backend retry` shows:
+ | metrics | values |
+ | {instance="ingress.rgw.1"} | 3 |
+
+Scenario: "Test Total frontend requests denied by instance"
+ Given the following series:
+ | metrics | values |
+ | haproxy_frontend_requests_denied_total{job="haproxy",proxy="frontend",instance="ingress.rgw.1"} | 10 100 |
+ | haproxy_frontend_requests_denied_total{job="haproxy",proxy="frontend",instance="ingress.rgw.1"} | 20 200 |
+ When variable `ingress_service` is `ingress.rgw.1`
+ Then Grafana panel `Total requests / responses` with legend `Request denied` shows:
+ | metrics | values |
+ | {instance="ingress.rgw.1"} | 3 |
+
+Scenario: "Test Total backend current queue by instance"
+ Given the following series:
+ | metrics | values |
+ | haproxy_backend_current_queue{job="haproxy",proxy="backend",instance="ingress.rgw.1"} | 10 100 |
+ | haproxy_backend_current_queue{job="haproxy",proxy="backend",instance="ingress.rgw.1"} | 20 200 |
+ When variable `ingress_service` is `ingress.rgw.1`
+ Then Grafana panel `Total requests / responses` with legend `Backend Queued` shows:
+ | metrics | values |
+ | {instance="ingress.rgw.1"} | 200 |
+
+Scenario: "Test Total frontend connections by instance"
+ Given the following series:
+ | metrics | values |
+ | haproxy_frontend_connections_total{job="haproxy",proxy="frontend",instance="ingress.rgw.1"} | 10 100 |
+ | haproxy_frontend_connections_total{job="haproxy",proxy="frontend",instance="ingress.rgw.1"} | 20 200 |
+ When variable `ingress_service` is `ingress.rgw.1`
+ Then Grafana panel `Total number of connections` with legend `Front` shows:
+ | metrics | values |
+ | {instance="ingress.rgw.1"} | 3 |
+
+Scenario: "Test Total backend connections attempts by instance"
+ Given the following series:
+ | metrics | values |
+ | haproxy_backend_connection_attempts_total{job="haproxy",proxy="backend",instance="ingress.rgw.1"} | 10 100 |
+ | haproxy_backend_connection_attempts_total{job="haproxy",proxy="backend",instance="ingress.rgw.1"} | 20 200 |
+ When variable `ingress_service` is `ingress.rgw.1`
+ Then Grafana panel `Total number of connections` with legend `Back` shows:
+ | metrics | values |
+ | {instance="ingress.rgw.1"} | 3 |
+
+Scenario: "Test Total backend connections error by instance"
+ Given the following series:
+ | metrics | values |
+ | haproxy_backend_connection_errors_total{job="haproxy",proxy="backend",instance="ingress.rgw.1"} | 10 100 |
+ | haproxy_backend_connection_errors_total{job="haproxy",proxy="backend",instance="ingress.rgw.1"} | 20 200 |
+ When variable `ingress_service` is `ingress.rgw.1`
+ Then Grafana panel `Total number of connections` with legend `Back errors` shows:
+ | metrics | values |
+ | {instance="ingress.rgw.1"} | 3 |
+
+Scenario: "Test Total frontend bytes incoming by instance"
+ Given the following series:
+ | metrics | values |
+ | haproxy_frontend_bytes_in_total{job="haproxy",proxy="frontend",instance="ingress.rgw.1"} | 10 100 |
+ | haproxy_frontend_bytes_in_total{job="haproxy",proxy="frontend",instance="ingress.rgw.1"} | 20 200 |
+ When variable `ingress_service` is `ingress.rgw.1`
+ Then Grafana panel `Current total of incoming / outgoing bytes` with legend `IN Front` shows:
+ | metrics | values |
+ | {instance="ingress.rgw.1"} | 24 |
+
+Scenario: "Test Total frontend bytes outgoing by instance"
+ Given the following series:
+ | metrics | values |
+ | haproxy_frontend_bytes_out_total{job="haproxy",proxy="frontend",instance="ingress.rgw.1"} | 10 100 |
+ | haproxy_frontend_bytes_out_total{job="haproxy",proxy="frontend",instance="ingress.rgw.1"} | 20 200 |
+ When variable `ingress_service` is `ingress.rgw.1`
+ Then Grafana panel `Current total of incoming / outgoing bytes` with legend `OUT Front` shows:
+ | metrics | values |
+ | {instance="ingress.rgw.1"} | 24 |
+
+Scenario: "Test Total backend bytes incoming by instance"
+ Given the following series:
+ | metrics | values |
+ | haproxy_backend_bytes_in_total{job="haproxy",proxy="backend",instance="ingress.rgw.1"} | 10 100 |
+ | haproxy_backend_bytes_in_total{job="haproxy",proxy="backend",instance="ingress.rgw.1"} | 20 200 |
+ When variable `ingress_service` is `ingress.rgw.1`
+ Then Grafana panel `Current total of incoming / outgoing bytes` with legend `IN Back` shows:
+ | metrics | values |
+ | {instance="ingress.rgw.1"} | 24 |
+
+Scenario: "Test Total backend bytes outgoing by instance"
+ Given the following series:
+ | metrics | values |
+ | haproxy_backend_bytes_out_total{job="haproxy",proxy="backend",instance="ingress.rgw.1"} | 10 100 |
+ | haproxy_backend_bytes_out_total{job="haproxy",proxy="backend",instance="ingress.rgw.1"} | 20 200 |
+ When variable `ingress_service` is `ingress.rgw.1`
+ Then Grafana panel `Current total of incoming / outgoing bytes` with legend `OUT Back` shows:
+ | metrics | values |
+ | {instance="ingress.rgw.1"} | 24 |
diff --git a/atmosphere/jsonnet/vendor/github.com/ceph/ceph/monitoring/ceph-mixin/tests_dashboards/features/self.feature b/atmosphere/jsonnet/vendor/github.com/ceph/ceph/monitoring/ceph-mixin/tests_dashboards/features/self.feature
new file mode 100644
index 0000000..2b44ce0
--- /dev/null
+++ b/atmosphere/jsonnet/vendor/github.com/ceph/ceph/monitoring/ceph-mixin/tests_dashboards/features/self.feature
@@ -0,0 +1,68 @@
+Feature: Test tester
+
+Scenario: "Simple query works"
+ Given the following series:
+ | metrics | values |
+ | node_network_receive_bytes{instance="127.0.0.1", device="eth1"} | 10 100 |
+ | node_network_receive_bytes{instance="127.0.0.1", device="eth2"} | 10 100 |
+ | node_network_transmit_bytes{instance="127.0.0.1", device="eth1"} | 10 100 |
+ | node_network_transmit_bytes{instance="127.0.0.1", device="eth2"} | 10 100 |
+ | node_network_transmit_bytes{instance="192.168.100.2", device="bond0"} | 20 200 |
+ | node_network_transmit_bytes{instance="192.168.100.1", device="bond0"} | 20 200 |
+ | bonding_slaves{instance="127.0.0.1", master="bond0"} | 2 |
+ Then query `node_network_transmit_bytes{instance="127.0.0.1"} > 0` produces:
+ | metrics | values |
+ | node_network_transmit_bytes{instance="127.0.0.1", device="eth1"} | 100 |
+ | node_network_transmit_bytes{instance="127.0.0.1", device="eth2"} | 100 |
+
+Scenario: "Query with evaluation time"
+ Given the following series:
+ | metrics | values |
+ | node_network_receive_bytes{instance="127.0.0.1", device="eth1"} | 10 100 |
+ | node_network_receive_bytes{instance="127.0.0.1", device="eth2"} | 10 100 |
+ | node_network_transmit_bytes{instance="127.0.0.1", device="eth1"} | 10 100 |
+ | node_network_transmit_bytes{instance="127.0.0.1", device="eth2"} | 10 100 |
+ | node_network_transmit_bytes{instance="192.168.100.2", device="bond0"} | 20 200 |
+ | node_network_transmit_bytes{instance="192.168.100.1", device="bond0"} | 20 200 |
+ | bonding_slaves{instance="127.0.0.1", master="bond0"} | 2 |
+ When evaluation time is `0m`
+ Then query `node_network_transmit_bytes{instance="127.0.0.1"} > 0` produces:
+ | metrics | values |
+ | node_network_transmit_bytes{instance="127.0.0.1", device="eth1"} | 10 |
+ | node_network_transmit_bytes{instance="127.0.0.1", device="eth2"} | 10 |
+
+Scenario: "Query with evaluation time and variable value"
+ Given the following series:
+ | metrics | values |
+ | node_network_receive_bytes{instance="127.0.0.1", device="eth1"} | 10 100 |
+ | node_network_receive_bytes{instance="127.0.0.1", device="eth2"} | 10 100 |
+ | node_network_transmit_bytes{instance="127.0.0.1", device="eth1"} | 10 100 |
+ | node_network_transmit_bytes{instance="127.0.0.1", device="eth2"} | 10 100 |
+ | node_network_transmit_bytes{instance="192.168.100.2", device="bond0"} | 20 200 |
+ | node_network_transmit_bytes{instance="192.168.100.1", device="bond0"} | 20 200 |
+ | bonding_slaves{instance="127.0.0.1", master="bond0"} | 2 |
+ When evaluation time is `0m`
+ And variable `osd_hosts` is `127.0.0.1`
+ Then query `node_network_transmit_bytes{instance="$osd_hosts"} > 0` produces:
+ | metrics | values |
+ | node_network_transmit_bytes{instance="127.0.0.1", device="eth1"} | 10 |
+ | node_network_transmit_bytes{instance="127.0.0.1", device="eth2"} | 10 |
+
+Scenario: "Query with interval time"
+ Given the following series:
+ | metrics | values |
+ | node_network_receive_bytes{instance="127.0.0.1", device="eth1"} | 10 100 200 |
+ | node_network_receive_bytes{instance="127.0.0.1", device="eth2"} | 10 100 200 |
+ | node_network_transmit_bytes{instance="127.0.0.1", device="eth1"} | 10 100 200 |
+ | node_network_transmit_bytes{instance="127.0.0.1", device="eth2"} | 10 100 200 |
+ | node_network_transmit_bytes{instance="192.168.100.2", device="bond0"} | 20 200 300 |
+ | node_network_transmit_bytes{instance="192.168.100.1", device="bond0"} | 20 200 300 |
+ | bonding_slaves{instance="127.0.0.1", master="bond0"} | 2 |
+ When evaluation time is `2h`
+ And evaluation interval is `1h`
+ And interval is `1h`
+ And variable `osd_hosts` is `127.0.0.1`
+ Then query `node_network_transmit_bytes{instance="$osd_hosts"} > 0` produces:
+ | metrics | values |
+ | node_network_transmit_bytes{instance="127.0.0.1", device="eth1"} | 200 |
+ | node_network_transmit_bytes{instance="127.0.0.1", device="eth2"} | 200 |
\ No newline at end of file
diff --git a/atmosphere/jsonnet/vendor/github.com/ceph/ceph/monitoring/ceph-mixin/tests_dashboards/features/steps/__init__.py b/atmosphere/jsonnet/vendor/github.com/ceph/ceph/monitoring/ceph-mixin/tests_dashboards/features/steps/__init__.py
new file mode 100644
index 0000000..0b90f46
--- /dev/null
+++ b/atmosphere/jsonnet/vendor/github.com/ceph/ceph/monitoring/ceph-mixin/tests_dashboards/features/steps/__init__.py
@@ -0,0 +1 @@
+# This file and steps files is needed even if its empty because of 'behave' :(
diff --git a/atmosphere/jsonnet/vendor/github.com/ceph/ceph/monitoring/ceph-mixin/tests_dashboards/requirements.txt b/atmosphere/jsonnet/vendor/github.com/ceph/ceph/monitoring/ceph-mixin/tests_dashboards/requirements.txt
new file mode 100644
index 0000000..8ad130e
--- /dev/null
+++ b/atmosphere/jsonnet/vendor/github.com/ceph/ceph/monitoring/ceph-mixin/tests_dashboards/requirements.txt
@@ -0,0 +1,12 @@
+attrs==21.2.0
+behave==1.2.6
+py==1.10.0
+pyparsing==2.4.7
+PyYAML==6.0
+types-PyYAML==6.0.0
+typing-extensions==3.10.0.2
+termcolor==1.1.0
+types-termcolor==1.1.2
+dataclasses==0.6
+types-dataclasses==0.6.1
+prettytable==2.4.0
diff --git a/atmosphere/jsonnet/vendor/github.com/ceph/ceph/monitoring/ceph-mixin/tests_dashboards/util.py b/atmosphere/jsonnet/vendor/github.com/ceph/ceph/monitoring/ceph-mixin/tests_dashboards/util.py
new file mode 100644
index 0000000..1fce655
--- /dev/null
+++ b/atmosphere/jsonnet/vendor/github.com/ceph/ceph/monitoring/ceph-mixin/tests_dashboards/util.py
@@ -0,0 +1,109 @@
+import json
+import re
+from pathlib import Path
+from typing import Any, Dict, Tuple, Union
+
+from termcolor import cprint
+
+UNITS = ['ms', 's', 'm', 'h', 'd', 'w', 'y']
+
+
+def resolve_time_and_unit(time: str) -> Union[Tuple[int, str], Tuple[None, None]]:
+ """
+ Divide time with its unit and return a tuple like (10, 'm')
+ Return None if its and invalid prometheus time
+ Valid units are inside UNITS.
+ """
+ if time[-1] in UNITS:
+ return int(time[:-1]), time[-1]
+ if time[-2:] in UNITS:
+ return int(time[:-2]), time[-2:]
+ return None, None
+
+
+def get_dashboards_data() -> Dict[str, Any]:
+ data: Dict[str, Any] = {'queries': {}, 'variables': {}, 'stats': {}}
+ for file in Path(__file__).parent.parent \
+ .joinpath('dashboards_out').glob('*.json'):
+ with open(file, 'r') as f:
+ dashboard_data = json.load(f)
+ data['stats'][str(file)] = {'total': 0, 'tested': 0}
+ add_dashboard_queries(data, dashboard_data, str(file))
+ add_dashboard_variables(data, dashboard_data)
+ add_default_dashboards_variables(data)
+ return data
+
+
+def add_dashboard_queries(data: Dict[str, Any], dashboard_data: Dict[str, Any], path: str) -> None:
+ """
+ Grafana panels can have more than one target/query, in order to identify each
+ query in the panel we append the "legendFormat" of the target to the panel name.
+ format: panel_name-legendFormat
+ """
+ if 'panels' not in dashboard_data:
+ return
+ error = 0
+ for panel in dashboard_data['panels']:
+ if (
+ 'title' in panel
+ and 'targets' in panel
+ and len(panel['targets']) > 0
+ and 'expr' in panel['targets'][0]
+ ):
+ for target in panel['targets']:
+ title = panel['title']
+ legend_format = target['legendFormat'] if 'legendFormat' in target else ""
+ query_id = f'{title}-{legend_format}'
+ if query_id in data['queries']:
+ # NOTE: If two or more panels have the same name and legend it
+ # might suggest a refactoring is needed or add something else
+ # to identify each query.
+ conflict_file = Path(data['queries'][query_id]['path']).name
+ file = Path(path).name
+ cprint((f'ERROR: Query in panel "{title}" with legend "{legend_format}"'
+ f' already exists. Conflict "{conflict_file}" '
+ f'with: "{file}"'), 'red')
+ error = 1
+ data['queries'][query_id] = {'query': target['expr'], 'path': path}
+ data['stats'][path]['total'] += 1
+ if error:
+ raise ValueError('Missing legend_format in queries, please add a proper value.')
+
+
+def add_dashboard_variables(data: Dict[str, Any], dashboard_data: Dict[str, Any]) -> None:
+ if 'templating' not in dashboard_data or 'list' not in dashboard_data['templating']:
+ return
+ for variable in dashboard_data['templating']['list']:
+ if 'name' in variable:
+ data['variables'][variable['name']] = 'UNSET VARIABLE'
+
+def add_default_dashboards_variables(data: Dict[str, Any]) -> None:
+ data['variables']['job'] = 'ceph'
+ data['variables']['job_haproxy'] = 'haproxy'
+ data['variables']['__rate_interval'] = '1m'
+
+def replace_grafana_expr_variables(expr: str, variable: str, value: Any) -> str:
+ """ Replace grafana variables in expression with a value
+
+ It should match the whole word, 'osd' musn't match with the 'osd' prefix in 'osd_hosts'
+ >>> replace_grafana_expr_variables('metric{name~="$osd_hosts|$other|$osd"}', \
+ 'osd', 'replacement')
+ 'metric{name~="$osd_hosts|$other|replacement"}'
+
+ >>> replace_grafana_expr_variables('metric{name~="$osd_hosts|$other|$osd"}', \
+ 'other', 'replacement')
+ 'metric{name~="$osd_hosts|replacement|$osd"}'
+
+ It replaces words with dollar prefix
+ >>> replace_grafana_expr_variables('metric{name~="no_dollar|$other|$osd"}', \
+ 'no_dollar', 'replacement')
+ 'metric{name~="no_dollar|$other|$osd"}'
+
+ It shouldn't replace the next char after the variable (positive lookahead test).
+ >>> replace_grafana_expr_variables('metric{name~="$osd"}', \
+ 'osd', 'replacement')
+ 'metric{name~="replacement"}'
+ """
+ regex = fr'\${variable}(?=\W)'
+ new_expr = re.sub(regex, fr'{value}', expr)
+ return new_expr
diff --git a/atmosphere/jsonnet/vendor/github.com/ceph/ceph/monitoring/ceph-mixin/tox.ini b/atmosphere/jsonnet/vendor/github.com/ceph/ceph/monitoring/ceph-mixin/tox.ini
new file mode 100644
index 0000000..df5bb46
--- /dev/null
+++ b/atmosphere/jsonnet/vendor/github.com/ceph/ceph/monitoring/ceph-mixin/tox.ini
@@ -0,0 +1,75 @@
+[tox]
+envlist =
+ lint,
+ jsonnet-{check,lint,fix},
+ jsonnet-bundler-{install,update},
+ promql-query-{test,lint},
+ alerts-check
+skipsdist = true
+
+[testenv:jsonnet-bundler-{install,update}]
+whitelist_externals =
+ jb
+description =
+ install: Install the jsonnet dependencies
+ update: Update the jsonnet dependencies
+commands =
+ install: jb install
+ update: jb update
+
+[testenv:jsonnet-{check,fix,lint}]
+basepython = python3
+whitelist_externals =
+ find
+ jb
+ jsonnet
+ jsonnetfm
+ sh
+description =
+ check: Ensure that auto-generated files matches the current version
+ fix: Update generated files from jsonnet file with latest changes
+ lint: Test if jsonnet files are linted (without any update)
+deps =
+ -rrequirements-grafonnet.txt
+depends = jsonnet-bundler-install
+commands =
+ check: sh test-jsonnet.sh
+ lint: ./lint-jsonnet.sh --test
+ fix: jsonnet -J vendor -m dashboards_out dashboards.jsonnet
+
+[testenv:lint]
+description =
+ Run python linters
+deps =
+ -rrequirements-lint.txt
+setenv =
+commands =
+ pylint --rcfile=.pylintrc tests_dashboards
+ mypy tests_dashboards
+ isort tests_dashboards
+
+[testenv:promql-query-test]
+description =
+ lint: Run promtool check on grafana queries
+ test: Run promtool unit testing on grafana queries.
+deps =
+ -rrequirements-lint.txt
+depends = grafonnet-check
+setenv =
+whitelist_externals =
+ promtool
+commands =
+ behave tests_dashboards/features
+
+[testenv:alerts-{fix,check,lint}]
+deps =
+ -rrequirements-alerts.txt
+ pytest
+depends = grafonnet-check
+whitelist_externals =
+ promtool
+commands =
+ fix: jsonnet -J vendor -S alerts.jsonnet -o prometheus_alerts.yml
+ lint: promtool check rules prometheus_alerts.yml
+ test: pytest -rA tests_alerts/test_syntax.py tests_alerts/test_unittests.py
+ python3 ./tests_alerts/validate_rules.py
diff --git a/atmosphere/jsonnet/vendor/github.com/grafana/grafonnet-lib/grafonnet/alert_condition.libsonnet b/atmosphere/jsonnet/vendor/github.com/grafana/grafonnet-lib/grafonnet/alert_condition.libsonnet
new file mode 100644
index 0000000..163d082
--- /dev/null
+++ b/atmosphere/jsonnet/vendor/github.com/grafana/grafonnet-lib/grafonnet/alert_condition.libsonnet
@@ -0,0 +1,47 @@
+{
+ /**
+ * Returns a new condition of alert of graph panel.
+ * Currently the only condition type that exists is a Query condition
+ * that allows to specify a query letter, time range and an aggregation function.
+ *
+ * @name alertCondition.new
+ *
+ * @param evaluatorParams Value of threshold
+ * @param evaluatorType Type of threshold
+ * @param operatorType Operator between conditions
+ * @param queryRefId The letter defines what query to execute from the Metrics tab
+ * @param queryTimeStart Begging of time range
+ * @param queryTimeEnd End of time range
+ * @param reducerParams Params of an aggregation function
+ * @param reducerType Name of an aggregation function
+ *
+ * @return A json that represents a condition of alert
+ */
+ new(
+ evaluatorParams=[],
+ evaluatorType='gt',
+ operatorType='and',
+ queryRefId='A',
+ queryTimeEnd='now',
+ queryTimeStart='5m',
+ reducerParams=[],
+ reducerType='avg',
+ )::
+ {
+ evaluator: {
+ params: if std.type(evaluatorParams) == 'array' then evaluatorParams else [evaluatorParams],
+ type: evaluatorType,
+ },
+ operator: {
+ type: operatorType,
+ },
+ query: {
+ params: [queryRefId, queryTimeStart, queryTimeEnd],
+ },
+ reducer: {
+ params: if std.type(reducerParams) == 'array' then reducerParams else [reducerParams],
+ type: reducerType,
+ },
+ type: 'query',
+ },
+}
diff --git a/atmosphere/jsonnet/vendor/github.com/grafana/grafonnet-lib/grafonnet/alertlist.libsonnet b/atmosphere/jsonnet/vendor/github.com/grafana/grafonnet-lib/grafonnet/alertlist.libsonnet
new file mode 100644
index 0000000..94df360
--- /dev/null
+++ b/atmosphere/jsonnet/vendor/github.com/grafana/grafonnet-lib/grafonnet/alertlist.libsonnet
@@ -0,0 +1,43 @@
+{
+ /**
+ * Creates an [Alert list panel](https://grafana.com/docs/grafana/latest/panels/visualizations/alert-list-panel/)
+ *
+ * @name alertlist.new
+ *
+ * @param title (default `''`)
+ * @param span (optional)
+ * @param show (default `'current'`) Whether the panel should display the current alert state or recent alert state changes.
+ * @param limit (default `10`) Sets the maximum number of alerts to list.
+ * @param sortOrder (default `'1'`) '1': alerting, '2': no_data, '3': pending, '4': ok, '5': paused
+ * @param stateFilter (optional)
+ * @param onlyAlertsOnDashboard (optional) Shows alerts only from the dashboard the alert list is in
+ * @param transparent (optional) Whether to display the panel without a background
+ * @param description (optional)
+ * @param datasource (optional)
+ */
+ new(
+ title='',
+ span=null,
+ show='current',
+ limit=10,
+ sortOrder=1,
+ stateFilter=[],
+ onlyAlertsOnDashboard=true,
+ transparent=null,
+ description=null,
+ datasource=null,
+ )::
+ {
+ [if transparent != null then 'transparent']: transparent,
+ title: title,
+ [if span != null then 'span']: span,
+ type: 'alertlist',
+ show: show,
+ limit: limit,
+ sortOrder: sortOrder,
+ [if show != 'changes' then 'stateFilter']: stateFilter,
+ onlyAlertsOnDashboard: onlyAlertsOnDashboard,
+ [if description != null then 'description']: description,
+ datasource: datasource,
+ },
+}
diff --git a/atmosphere/jsonnet/vendor/github.com/grafana/grafonnet-lib/grafonnet/annotation.libsonnet b/atmosphere/jsonnet/vendor/github.com/grafana/grafonnet-lib/grafonnet/annotation.libsonnet
new file mode 100644
index 0000000..955b029
--- /dev/null
+++ b/atmosphere/jsonnet/vendor/github.com/grafana/grafonnet-lib/grafonnet/annotation.libsonnet
@@ -0,0 +1,40 @@
+{
+ default::
+ {
+ builtIn: 1,
+ datasource: '-- Grafana --',
+ enable: true,
+ hide: true,
+ iconColor: 'rgba(0, 211, 255, 1)',
+ name: 'Annotations & Alerts',
+ type: 'dashboard',
+ },
+
+ /**
+ * @name annotation.datasource
+ */
+
+ datasource(
+ name,
+ datasource,
+ expr=null,
+ enable=true,
+ hide=false,
+ iconColor='rgba(255, 96, 96, 1)',
+ tags=[],
+ type='tags',
+ builtIn=null,
+ )::
+ {
+ datasource: datasource,
+ enable: enable,
+ [if expr != null then 'expr']: expr,
+ hide: hide,
+ iconColor: iconColor,
+ name: name,
+ showIn: 0,
+ tags: tags,
+ type: type,
+ [if builtIn != null then 'builtIn']: builtIn,
+ },
+}
diff --git a/atmosphere/jsonnet/vendor/github.com/grafana/grafonnet-lib/grafonnet/bar_gauge_panel.libsonnet b/atmosphere/jsonnet/vendor/github.com/grafana/grafonnet-lib/grafonnet/bar_gauge_panel.libsonnet
new file mode 100644
index 0000000..313e5a0
--- /dev/null
+++ b/atmosphere/jsonnet/vendor/github.com/grafana/grafonnet-lib/grafonnet/bar_gauge_panel.libsonnet
@@ -0,0 +1,47 @@
+{
+ /**
+ * Create a [bar gauge panel](https://grafana.com/docs/grafana/latest/panels/visualizations/bar-gauge-panel/),
+ *
+ * @name barGaugePanel.new
+ *
+ * @param title Panel title.
+ * @param description (optional) Panel description.
+ * @param datasource (optional) Panel datasource.
+ * @param unit (optional) The unit of the data.
+ * @param thresholds (optional) An array of threashold values.
+ *
+ * @method addTarget(target) Adds a target object.
+ * @method addTargets(targets) Adds an array of targets.
+ */
+ new(
+ title,
+ description=null,
+ datasource=null,
+ unit=null,
+ thresholds=[],
+ ):: {
+ type: 'bargauge',
+ title: title,
+ [if description != null then 'description']: description,
+ datasource: datasource,
+ targets: [
+ ],
+ fieldConfig: {
+ defaults: {
+ unit: unit,
+ thresholds: {
+ mode: 'absolute',
+ steps: thresholds,
+ },
+ },
+ },
+ _nextTarget:: 0,
+ addTarget(target):: self {
+ // automatically ref id in added targets.
+ local nextTarget = super._nextTarget,
+ _nextTarget: nextTarget + 1,
+ targets+: [target { refId: std.char(std.codepoint('A') + nextTarget) }],
+ },
+ addTargets(targets):: std.foldl(function(p, t) p.addTarget(t), targets, self),
+ },
+}
diff --git a/atmosphere/jsonnet/vendor/github.com/grafana/grafonnet-lib/grafonnet/cloudmonitoring.libsonnet b/atmosphere/jsonnet/vendor/github.com/grafana/grafonnet-lib/grafonnet/cloudmonitoring.libsonnet
new file mode 100644
index 0000000..49cef5e
--- /dev/null
+++ b/atmosphere/jsonnet/vendor/github.com/grafana/grafonnet-lib/grafonnet/cloudmonitoring.libsonnet
@@ -0,0 +1,57 @@
+{
+ /**
+ * Creates a [Google Cloud Monitoring target](https://grafana.com/docs/grafana/latest/datasources/google-cloud-monitoring/)
+ *
+ * @name cloudmonitoring.target
+ *
+ * @param metric
+ * @param project
+ * @param filters (optional)
+ * @param groupBys (optional)
+ * @param period (default: `'cloud-monitoring-auto'`)
+ * @param crossSeriesReducer (default 'REDUCE_MAX')
+ * @param valueType (default 'INT64')
+ * @param perSeriesAligner (default 'ALIGN_DELTA')
+ * @param metricKind (default 'CUMULATIVE')
+ * @param unit (optional)
+ * @param alias (optional)
+
+ * @return Panel target
+ */
+
+ target(
+ metric,
+ project,
+ filters=[],
+ groupBys=[],
+ period='cloud-monitoring-auto',
+ crossSeriesReducer='REDUCE_MAX',
+ valueType='INT64',
+ perSeriesAligner='ALIGN_DELTA',
+ metricKind='CUMULATIVE',
+ unit=1,
+ alias=null,
+ ):: {
+ metricQuery: {
+ [if alias != null then 'aliasBy']: alias,
+ alignmentPeriod: period,
+ crossSeriesReducer: crossSeriesReducer,
+ [if filters != null then 'filters']: filters,
+ [if groupBys != null then 'groupBys']: groupBys,
+ metricKind: metricKind,
+ metricType: metric,
+ perSeriesAligner: perSeriesAligner,
+ projectName: project,
+ unit: unit,
+ valueType: valueType,
+ },
+ sloQuery: {
+ [if alias != null then 'aliasBy']: alias,
+ alignmentPeriod: period,
+ projectName: project,
+ selectorName: 'select_slo_health',
+ serviceId: '',
+ sloId: '',
+ },
+ },
+}
diff --git a/atmosphere/jsonnet/vendor/github.com/grafana/grafonnet-lib/grafonnet/cloudwatch.libsonnet b/atmosphere/jsonnet/vendor/github.com/grafana/grafonnet-lib/grafonnet/cloudwatch.libsonnet
new file mode 100644
index 0000000..f56056f
--- /dev/null
+++ b/atmosphere/jsonnet/vendor/github.com/grafana/grafonnet-lib/grafonnet/cloudwatch.libsonnet
@@ -0,0 +1,51 @@
+{
+ /**
+ * Creates a [CloudWatch target](https://grafana.com/docs/grafana/latest/datasources/cloudwatch/)
+ *
+ * @name cloudwatch.target
+ *
+ * @param region
+ * @param namespace
+ * @param metric
+ * @param datasource (optional)
+ * @param statistic (default: `'Average'`)
+ * @param alias (optional)
+ * @param highResolution (default: `false`)
+ * @param period (default: `'auto'`)
+ * @param dimensions (optional)
+ * @param id (optional)
+ * @param expression (optional)
+ * @param hide (optional)
+
+ * @return Panel target
+ */
+
+ target(
+ region,
+ namespace,
+ metric,
+ datasource=null,
+ statistic='Average',
+ alias=null,
+ highResolution=false,
+ period='auto',
+ dimensions={},
+ id=null,
+ expression=null,
+ hide=null
+ ):: {
+ region: region,
+ namespace: namespace,
+ metricName: metric,
+ [if datasource != null then 'datasource']: datasource,
+ statistics: [statistic],
+ [if alias != null then 'alias']: alias,
+ highResolution: highResolution,
+ period: period,
+ dimensions: dimensions,
+ [if id != null then 'id']: id,
+ [if expression != null then 'expression']: expression,
+ [if hide != null then 'hide']: hide,
+
+ },
+}
diff --git a/atmosphere/jsonnet/vendor/github.com/grafana/grafonnet-lib/grafonnet/dashboard.libsonnet b/atmosphere/jsonnet/vendor/github.com/grafana/grafonnet-lib/grafonnet/dashboard.libsonnet
new file mode 100644
index 0000000..1cc1bf3
--- /dev/null
+++ b/atmosphere/jsonnet/vendor/github.com/grafana/grafonnet-lib/grafonnet/dashboard.libsonnet
@@ -0,0 +1,181 @@
+local timepickerlib = import 'timepicker.libsonnet';
+
+{
+ /**
+ * Creates a [dashboard](https://grafana.com/docs/grafana/latest/features/dashboard/dashboards/)
+ *
+ * @name dashboard.new
+ *
+ * @param title The title of the dashboard
+ * @param editable (default: `false`) Whether the dashboard is editable via Grafana UI.
+ * @param style (default: `'dark'`) Theme of dashboard, `'dark'` or `'light'`
+ * @param tags (optional) Array of tags associated to the dashboard, e.g.`['tag1','tag2']`
+ * @param time_from (default: `'now-6h'`)
+ * @param time_to (default: `'now'`)
+ * @param timezone (default: `'browser'`) Timezone of the dashboard, `'utc'` or `'browser'`
+ * @param refresh (default: `''`) Auto-refresh interval, e.g. `'30s'`
+ * @param timepicker (optional) See timepicker API
+ * @param graphTooltip (default: `'default'`) `'default'` : no shared crosshair or tooltip (0), `'shared_crosshair'`: shared crosshair (1), `'shared_tooltip'`: shared crosshair AND shared tooltip (2)
+ * @param hideControls (default: `false`)
+ * @param schemaVersion (default: `14`) Version of the Grafana JSON schema, incremented each time an update brings changes. `26` for Grafana 7.1.5, `22` for Grafana 6.7.4, `16` for Grafana 5.4.5, `14` for Grafana 4.6.3. etc.
+ * @param uid (default: `''`) Unique dashboard identifier as a string (8-40), that can be chosen by users. Used to identify a dashboard to update when using Grafana REST API.
+ * @param description (optional)
+ *
+ * @method addTemplate(template) Add a template variable
+ * @method addTemplates(templates) Adds an array of template variables
+ * @method addAnnotation(annotation) Add an [annotation](https://grafana.com/docs/grafana/latest/dashboards/annotations/)
+ * @method addPanel(panel,gridPos) Appends a panel, with an optional grid position in grid coordinates, e.g. `gridPos={'x':0, 'y':0, 'w':12, 'h': 9}`
+ * @method addPanels(panels) Appends an array of panels
+ * @method addLink(link) Adds a [dashboard link](https://grafana.com/docs/grafana/latest/linking/dashboard-links/)
+ * @method addLinks(dashboardLink) Adds an array of [dashboard links](https://grafana.com/docs/grafana/latest/linking/dashboard-links/)
+ * @method addRequired(type, name, id, version)
+ * @method addInput(name, label, type, pluginId, pluginName, description, value)
+ * @method addRow(row) Adds a row. This is the legacy row concept from Grafana < 5, when rows were needed for layout. Rows should now be added via `addPanel`.
+ */
+ new(
+ title,
+ editable=false,
+ style='dark',
+ tags=[],
+ time_from='now-6h',
+ time_to='now',
+ timezone='browser',
+ refresh='',
+ timepicker=timepickerlib.new(),
+ graphTooltip='default',
+ hideControls=false,
+ schemaVersion=14,
+ uid='',
+ description=null,
+ ):: {
+ local it = self,
+ _annotations:: [],
+ [if uid != '' then 'uid']: uid,
+ editable: editable,
+ [if description != null then 'description']: description,
+ gnetId: null,
+ graphTooltip:
+ if graphTooltip == 'shared_tooltip' then 2
+ else if graphTooltip == 'shared_crosshair' then 1
+ else if graphTooltip == 'default' then 0
+ else graphTooltip,
+ hideControls: hideControls,
+ id: null,
+ links: [],
+ panels:: [],
+ refresh: refresh,
+ rows: [],
+ schemaVersion: schemaVersion,
+ style: style,
+ tags: tags,
+ time: {
+ from: time_from,
+ to: time_to,
+ },
+ timezone: timezone,
+ timepicker: timepicker,
+ title: title,
+ version: 0,
+ addAnnotations(annotations):: self {
+ _annotations+:: annotations,
+ },
+ addAnnotation(a):: self.addAnnotations([a]),
+ addTemplates(templates):: self {
+ templates+: templates,
+ },
+ addTemplate(t):: self.addTemplates([t]),
+ templates:: [],
+ annotations: { list: it._annotations },
+ templating: { list: it.templates },
+ _nextPanel:: 2,
+ addRow(row)::
+ self {
+ // automatically number panels in added rows.
+ // https://github.com/kausalco/public/blob/master/klumps/grafana.libsonnet
+ local n = std.length(row.panels),
+ local nextPanel = super._nextPanel,
+ local panels = std.makeArray(n, function(i)
+ row.panels[i] { id: nextPanel + i }),
+
+ _nextPanel: nextPanel + n,
+ rows+: [row { panels: panels }],
+ },
+ addPanels(newpanels)::
+ self {
+ // automatically number panels in added rows.
+ // https://github.com/kausalco/public/blob/master/klumps/grafana.libsonnet
+ local n = std.foldl(function(numOfPanels, p)
+ (if 'panels' in p then
+ numOfPanels + 1 + std.length(p.panels)
+ else
+ numOfPanels + 1), newpanels, 0),
+ local nextPanel = super._nextPanel,
+ local _panels = std.makeArray(
+ std.length(newpanels), function(i)
+ newpanels[i] {
+ id: nextPanel + (
+ if i == 0 then
+ 0
+ else
+ if 'panels' in _panels[i - 1] then
+ (_panels[i - 1].id - nextPanel) + 1 + std.length(_panels[i - 1].panels)
+ else
+ (_panels[i - 1].id - nextPanel) + 1
+
+ ),
+ [if 'panels' in newpanels[i] then 'panels']: std.makeArray(
+ std.length(newpanels[i].panels), function(j)
+ newpanels[i].panels[j] {
+ id: 1 + j +
+ nextPanel + (
+ if i == 0 then
+ 0
+ else
+ if 'panels' in _panels[i - 1] then
+ (_panels[i - 1].id - nextPanel) + 1 + std.length(_panels[i - 1].panels)
+ else
+ (_panels[i - 1].id - nextPanel) + 1
+
+ ),
+ }
+ ),
+ }
+ ),
+
+ _nextPanel: nextPanel + n,
+ panels+::: _panels,
+ },
+ addPanel(panel, gridPos):: self.addPanels([panel { gridPos: gridPos }]),
+ addRows(rows):: std.foldl(function(d, row) d.addRow(row), rows, self),
+ addLink(link):: self {
+ links+: [link],
+ },
+ addLinks(dashboardLinks):: std.foldl(function(d, t) d.addLink(t), dashboardLinks, self),
+ required:: [],
+ __requires: it.required,
+ addRequired(type, name, id, version):: self {
+ required+: [{ type: type, name: name, id: id, version: version }],
+ },
+ inputs:: [],
+ __inputs: it.inputs,
+ addInput(
+ name,
+ label,
+ type,
+ pluginId=null,
+ pluginName=null,
+ description='',
+ value=null,
+ ):: self {
+ inputs+: [{
+ name: name,
+ label: label,
+ type: type,
+ [if pluginId != null then 'pluginId']: pluginId,
+ [if pluginName != null then 'pluginName']: pluginName,
+ [if value != null then 'value']: value,
+ description: description,
+ }],
+ },
+ },
+}
diff --git a/atmosphere/jsonnet/vendor/github.com/grafana/grafonnet-lib/grafonnet/dashlist.libsonnet b/atmosphere/jsonnet/vendor/github.com/grafana/grafonnet-lib/grafonnet/dashlist.libsonnet
new file mode 100644
index 0000000..436cb02
--- /dev/null
+++ b/atmosphere/jsonnet/vendor/github.com/grafana/grafonnet-lib/grafonnet/dashlist.libsonnet
@@ -0,0 +1,41 @@
+{
+ /**
+ * Creates a [dashlist panel](https://grafana.com/docs/grafana/latest/panels/visualizations/dashboard-list-panel/).
+ * It requires the dashlist panel plugin in grafana, which is built-in.
+ *
+ * @name dashlist.new
+ *
+ * @param title The title of the dashlist panel.
+ * @param description (optional) Description of the panel
+ * @param query (optional) Query to search by
+ * @param tags (optional) Array of tag(s) to search by
+ * @param recent (default `true`) Displays recently viewed dashboards
+ * @param search (default `false`) Description of the panel
+ * @param starred (default `false`) Displays starred dashboards
+ * @param headings (default `true`) Chosen list selection(starred, recently Viewed, search) is shown as a heading
+ * @param limit (default `10`) Set maximum items in a list
+ * @return A json that represents a dashlist panel
+ */
+ new(
+ title,
+ description=null,
+ query=null,
+ tags=[],
+ recent=true,
+ search=false,
+ starred=false,
+ headings=true,
+ limit=10,
+ ):: {
+ type: 'dashlist',
+ title: title,
+ query: if query != null then query else '',
+ tags: tags,
+ recent: recent,
+ search: search,
+ starred: starred,
+ headings: headings,
+ limit: limit,
+ [if description != null then 'description']: description,
+ },
+}
diff --git a/atmosphere/jsonnet/vendor/github.com/grafana/grafonnet-lib/grafonnet/elasticsearch.libsonnet b/atmosphere/jsonnet/vendor/github.com/grafana/grafonnet-lib/grafonnet/elasticsearch.libsonnet
new file mode 100644
index 0000000..769e1c7
--- /dev/null
+++ b/atmosphere/jsonnet/vendor/github.com/grafana/grafonnet-lib/grafonnet/elasticsearch.libsonnet
@@ -0,0 +1,51 @@
+{
+ /**
+ * Creates an [Elasticsearch target](https://grafana.com/docs/grafana/latest/datasources/elasticsearch/)
+ *
+ * @name elasticsearch.target
+ *
+ * @param query
+ * @param timeField
+ * @param id (optional)
+ * @param datasource (optional)
+ * @param metrics (optional)
+ * @param bucketAggs (optional)
+ * @param alias (optional)
+ */
+ target(
+ query,
+ timeField,
+ id=null,
+ datasource=null,
+ metrics=[{
+ field: 'value',
+ id: null,
+ type: 'percentiles',
+ settings: {
+ percents: [
+ '90',
+ ],
+ },
+ }],
+ bucketAggs=[{
+ field: 'timestamp',
+ id: null,
+ type: 'date_histogram',
+ settings: {
+ interval: '1s',
+ min_doc_count: 0,
+ trimEdges: 0,
+ },
+ }],
+ alias=null,
+ ):: {
+ [if datasource != null then 'datasource']: datasource,
+ query: query,
+ id: id,
+ timeField: timeField,
+ bucketAggs: bucketAggs,
+ metrics: metrics,
+ alias: alias,
+ // TODO: generate bucket ids
+ },
+}
diff --git a/atmosphere/jsonnet/vendor/github.com/grafana/grafonnet-lib/grafonnet/gauge_panel.libsonnet b/atmosphere/jsonnet/vendor/github.com/grafana/grafonnet-lib/grafonnet/gauge_panel.libsonnet
new file mode 100644
index 0000000..40b3673
--- /dev/null
+++ b/atmosphere/jsonnet/vendor/github.com/grafana/grafonnet-lib/grafonnet/gauge_panel.libsonnet
@@ -0,0 +1,211 @@
+{
+ /**
+ * Creates a [gauge panel](https://grafana.com/docs/grafana/latest/panels/visualizations/gauge-panel/).
+ *
+ * @name gaugePanel.new
+ *
+ * @param title Panel title.
+ * @param description (optional) Panel description.
+ * @param transparent (default `false`) Whether to display the panel without a background.
+ * @param datasource (optional) Panel datasource.
+ * @param allValues (default `false`) Show all values instead of reducing to one.
+ * @param valueLimit (optional) Limit of values in all values mode.
+ * @param reducerFunction (default `'mean'`) Function to use to reduce values to when using single value.
+ * @param fields (default `''`) Fields that should be included in the panel.
+ * @param showThresholdLabels (default `false`) Render the threshold values around the gauge bar.
+ * @param showThresholdMarkers (default `true`) Render the thresholds as an outer bar.
+ * @param unit (default `'percent'`) Panel unit field option.
+ * @param min (optional) Leave empty to calculate based on all values.
+ * @param max (optional) Leave empty to calculate based on all values.
+ * @param decimals Number of decimal places to show.
+ * @param displayName Change the field or series name.
+ * @param noValue (optional) What to show when there is no value.
+ * @param thresholdsMode (default `'absolute'`) 'absolute' or 'percentage'.
+ * @param repeat (optional) Name of variable that should be used to repeat this panel.
+ * @param repeatDirection (default `'h'`) 'h' for horizontal or 'v' for vertical.
+ * @param repeatMaxPerRow (optional) Maximum panels per row in repeat mode.
+ * @param pluginVersion (default `'7'`) Plugin version the panel should be modeled for. This has been tested with the default, '7', and '6.7'.
+ *
+ * @method addTarget(target) Adds a target object.
+ * @method addTargets(targets) Adds an array of targets.
+ * @method addLink(link) Adds a [panel link](https://grafana.com/docs/grafana/latest/linking/panel-links/). Argument format: `{ title: 'Link Title', url: 'https://...', targetBlank: true }`.
+ * @method addLinks(links) Adds an array of links.
+ * @method addThreshold(step) Adds a threshold step. Argument format: `{ color: 'green', value: 0 }`.
+ * @method addThresholds(steps) Adds an array of threshold steps.
+ * @method addMapping(mapping) Adds a value mapping.
+ * @method addMappings(mappings) Adds an array of value mappings.
+ * @method addDataLink(link) Adds a data link.
+ * @method addDataLinks(links) Adds an array of data links.
+ * @param timeFrom (optional)
+ */
+ new(
+ title,
+ description=null,
+ transparent=false,
+ datasource=null,
+ allValues=false,
+ valueLimit=null,
+ reducerFunction='mean',
+ fields='',
+ showThresholdLabels=false,
+ showThresholdMarkers=true,
+ unit='percent',
+ min=0,
+ max=100,
+ decimals=null,
+ displayName=null,
+ noValue=null,
+ thresholdsMode='absolute',
+ repeat=null,
+ repeatDirection='h',
+ repeatMaxPerRow=null,
+ timeFrom=null,
+ pluginVersion='7',
+ ):: {
+
+ type: 'gauge',
+ title: title,
+ [if description != null then 'description']: description,
+ transparent: transparent,
+ datasource: datasource,
+ targets: [],
+ links: [],
+ [if repeat != null then 'repeat']: repeat,
+ [if repeat != null then 'repeatDirection']: repeatDirection,
+ [if repeat != null then 'repeatMaxPerRow']: repeatMaxPerRow,
+ [if timeFrom != null then 'timeFrom']: timeFrom,
+
+ // targets
+ _nextTarget:: 0,
+ addTarget(target):: self {
+ local nextTarget = super._nextTarget,
+ _nextTarget: nextTarget + 1,
+ targets+: [target { refId: std.char(std.codepoint('A') + nextTarget) }],
+ },
+ addTargets(targets):: std.foldl(function(p, t) p.addTarget(t), targets, self),
+
+ // links
+ addLink(link):: self {
+ links+: [link],
+ },
+ addLinks(links):: std.foldl(function(p, l) p.addLink(l), links, self),
+
+ pluginVersion: pluginVersion,
+ } + (
+
+ if pluginVersion >= '7' then {
+ options: {
+ reduceOptions: {
+ values: allValues,
+ [if allValues && valueLimit != null then 'limit']: valueLimit,
+ calcs: [
+ reducerFunction,
+ ],
+ fields: fields,
+ },
+ showThresholdLabels: showThresholdLabels,
+ showThresholdMarkers: showThresholdMarkers,
+ },
+ fieldConfig: {
+ defaults: {
+ unit: unit,
+ [if min != null then 'min']: min,
+ [if max != null then 'max']: max,
+ [if decimals != null then 'decimals']: decimals,
+ [if displayName != null then 'displayName']: displayName,
+ [if noValue != null then 'noValue']: noValue,
+ thresholds: {
+ mode: thresholdsMode,
+ steps: [],
+ },
+ mappings: [],
+ links: [],
+ },
+ },
+
+ // thresholds
+ addThreshold(step):: self {
+ fieldConfig+: { defaults+: { thresholds+: { steps+: [step] } } },
+ },
+
+ // mappings
+ _nextMapping:: 0,
+ addMapping(mapping):: self {
+ local nextMapping = super._nextMapping,
+ _nextMapping: nextMapping + 1,
+ fieldConfig+: { defaults+: { mappings+: [mapping { id: nextMapping }] } },
+ },
+
+ // data links
+ addDataLink(link):: self {
+ fieldConfig+: { defaults+: { links+: [link] } },
+ },
+
+ // Overrides
+ addOverride(
+ matcher=null,
+ properties=null,
+ ):: self {
+ fieldConfig+: {
+ overrides+: [
+ {
+ [if matcher != null then 'matcher']: matcher,
+ [if properties != null then 'properties']: properties,
+ },
+ ],
+ },
+ },
+ addOverrides(overrides):: std.foldl(function(p, o) p.addOverride(o.matcher, o.properties), overrides, self),
+ } else {
+
+ options: {
+ fieldOptions: {
+ values: allValues,
+ [if allValues && valueLimit != null then 'limit']: valueLimit,
+ calcs: [
+ reducerFunction,
+ ],
+ fields: fields,
+ defaults: {
+ unit: unit,
+ [if min != null then 'min']: min,
+ [if max != null then 'max']: max,
+ [if decimals != null then 'decimals']: decimals,
+ [if displayName != null then 'displayName']: displayName,
+ [if noValue != null then 'noValue']: noValue,
+ thresholds: {
+ mode: thresholdsMode,
+ steps: [],
+ },
+ mappings: [],
+ links: [],
+ },
+ },
+ showThresholdLabels: showThresholdLabels,
+ showThresholdMarkers: showThresholdMarkers,
+ },
+
+ // thresholds
+ addThreshold(step):: self {
+ options+: { fieldOptions+: { defaults+: { thresholds+: { steps+: [step] } } } },
+ },
+
+ // mappings
+ _nextMapping:: 0,
+ addMapping(mapping):: self {
+ local nextMapping = super._nextMapping,
+ _nextMapping: nextMapping + 1,
+ options+: { fieldOptions+: { defaults+: { mappings+: [mapping { id: nextMapping }] } } },
+ },
+
+ // data links
+ addDataLink(link):: self {
+ options+: { fieldOptions+: { defaults+: { links+: [link] } } },
+ },
+ }
+ ) + {
+ addThresholds(steps):: std.foldl(function(p, s) p.addThreshold(s), steps, self),
+ addMappings(mappings):: std.foldl(function(p, m) p.addMapping(m), mappings, self),
+ addDataLinks(links):: std.foldl(function(p, l) p.addDataLink(l), links, self),
+ },
+}
diff --git a/atmosphere/jsonnet/vendor/github.com/grafana/grafonnet-lib/grafonnet/grafana.libsonnet b/atmosphere/jsonnet/vendor/github.com/grafana/grafonnet-lib/grafonnet/grafana.libsonnet
new file mode 100644
index 0000000..b94ddf3
--- /dev/null
+++ b/atmosphere/jsonnet/vendor/github.com/grafana/grafonnet-lib/grafonnet/grafana.libsonnet
@@ -0,0 +1,32 @@
+{
+ alertlist:: import 'alertlist.libsonnet',
+ dashboard:: import 'dashboard.libsonnet',
+ template:: import 'template.libsonnet',
+ text:: import 'text.libsonnet',
+ timepicker:: import 'timepicker.libsonnet',
+ row:: import 'row.libsonnet',
+ link:: import 'link.libsonnet',
+ annotation:: import 'annotation.libsonnet',
+ graphPanel:: import 'graph_panel.libsonnet',
+ logPanel:: import 'log_panel.libsonnet',
+ tablePanel:: import 'table_panel.libsonnet',
+ singlestat:: import 'singlestat.libsonnet',
+ pieChartPanel:: import 'pie_chart_panel.libsonnet',
+ influxdb:: import 'influxdb.libsonnet',
+ prometheus:: import 'prometheus.libsonnet',
+ loki:: import 'loki.libsonnet',
+ sql:: import 'sql.libsonnet',
+ graphite:: import 'graphite.libsonnet',
+ alertCondition:: import 'alert_condition.libsonnet',
+ cloudmonitoring:: import 'cloudmonitoring.libsonnet',
+ cloudwatch:: import 'cloudwatch.libsonnet',
+ elasticsearch:: import 'elasticsearch.libsonnet',
+ heatmapPanel:: import 'heatmap_panel.libsonnet',
+ dashlist:: import 'dashlist.libsonnet',
+ pluginlist:: import 'pluginlist.libsonnet',
+ gauge:: error 'gauge is removed, migrate to gaugePanel',
+ gaugePanel:: import 'gauge_panel.libsonnet',
+ barGaugePanel:: import 'bar_gauge_panel.libsonnet',
+ statPanel:: import 'stat_panel.libsonnet',
+ transformation:: import 'transformation.libsonnet',
+}
diff --git a/atmosphere/jsonnet/vendor/github.com/grafana/grafonnet-lib/grafonnet/graph_panel.libsonnet b/atmosphere/jsonnet/vendor/github.com/grafana/grafonnet-lib/grafonnet/graph_panel.libsonnet
new file mode 100644
index 0000000..8727695
--- /dev/null
+++ b/atmosphere/jsonnet/vendor/github.com/grafana/grafonnet-lib/grafonnet/graph_panel.libsonnet
@@ -0,0 +1,313 @@
+{
+ /**
+ * Creates a [graph panel](https://grafana.com/docs/grafana/latest/panels/visualizations/graph-panel/).
+ * It requires the graph panel plugin in grafana, which is built-in.
+ *
+ * @name graphPanel.new
+ *
+ * @param title The title of the graph panel.
+ * @param description (optional) The description of the panel
+ * @param span (optional) Width of the panel
+ * @param datasource (optional) Datasource
+ * @param fill (default `1`) , integer from 0 to 10
+ * @param fillGradient (default `0`) , integer from 0 to 10
+ * @param linewidth (default `1`) Line Width, integer from 0 to 10
+ * @param decimals (optional) Override automatic decimal precision for legend and tooltip. If null, not added to the json output.
+ * @param decimalsY1 (optional) Override automatic decimal precision for the first Y axis. If null, use decimals parameter.
+ * @param decimalsY2 (optional) Override automatic decimal precision for the second Y axis. If null, use decimals parameter.
+ * @param min_span (optional) Min span
+ * @param format (default `short`) Unit of the Y axes
+ * @param formatY1 (optional) Unit of the first Y axis
+ * @param formatY2 (optional) Unit of the second Y axis
+ * @param min (optional) Min of the Y axes
+ * @param max (optional) Max of the Y axes
+ * @param maxDataPoints (optional) If the data source supports it, sets the maximum number of data points for each series returned.
+ * @param labelY1 (optional) Label of the first Y axis
+ * @param labelY2 (optional) Label of the second Y axis
+ * @param x_axis_mode (default `'time'`) X axis mode, one of [time, series, histogram]
+ * @param x_axis_values (default `'total'`) Chosen value of series, one of [avg, min, max, total, count]
+ * @param x_axis_buckets (optional) Restricts the x axis to this amount of buckets
+ * @param x_axis_min (optional) Restricts the x axis to display from this value if supplied
+ * @param x_axis_max (optional) Restricts the x axis to display up to this value if supplied
+ * @param lines (default `true`) Display lines
+ * @param points (default `false`) Display points
+ * @param pointradius (default `5`) Radius of the points, allowed values are 0.5 or [1 ... 10] with step 1
+ * @param bars (default `false`) Display bars
+ * @param staircase (default `false`) Display line as staircase
+ * @param dashes (default `false`) Display line as dashes
+ * @param stack (default `false`) Whether to stack values
+ * @param repeat (optional) Name of variable that should be used to repeat this panel.
+ * @param repeatDirection (default `'h'`) 'h' for horizontal or 'v' for vertical.
+ * @param legend_show (default `true`) Show legend
+ * @param legend_values (default `false`) Show values in legend
+ * @param legend_min (default `false`) Show min in legend
+ * @param legend_max (default `false`) Show max in legend
+ * @param legend_current (default `false`) Show current in legend
+ * @param legend_total (default `false`) Show total in legend
+ * @param legend_avg (default `false`) Show average in legend
+ * @param legend_alignAsTable (default `false`) Show legend as table
+ * @param legend_rightSide (default `false`) Show legend to the right
+ * @param legend_sideWidth (optional) Legend width
+ * @param legend_sort (optional) Sort order of legend
+ * @param legend_sortDesc (optional) Sort legend descending
+ * @param aliasColors (optional) Define color mappings for graphs
+ * @param thresholds (optional) An array of graph thresholds
+ * @param logBase1Y (default `1`) Value of logarithm base of the first Y axis
+ * @param logBase2Y (default `1`) Value of logarithm base of the second Y axis
+ * @param transparent (default `false`) Whether to display the panel without a background.
+ * @param value_type (default `'individual'`) Type of tooltip value
+ * @param shared_tooltip (default `true`) Allow to group or spit tooltips on mouseover within a chart
+ * @param percentage (defaut: false) show as percentages
+ * @param interval (defaut: null) A lower limit for the interval.
+
+ *
+ * @method addTarget(target) Adds a target object.
+ * @method addTargets(targets) Adds an array of targets.
+ * @method addSeriesOverride(override)
+ * @method addYaxis(format,min,max,label,show,logBase,decimals) Adds a Y axis to the graph
+ * @method addAlert(alert) Adds an alert
+ * @method addLink(link) Adds a [panel link](https://grafana.com/docs/grafana/latest/linking/panel-links/)
+ * @method addLinks(links) Adds an array of links.
+ */
+ new(
+ title,
+ span=null,
+ fill=1,
+ fillGradient=0,
+ linewidth=1,
+ decimals=null,
+ decimalsY1=null,
+ decimalsY2=null,
+ description=null,
+ min_span=null,
+ format='short',
+ formatY1=null,
+ formatY2=null,
+ min=null,
+ max=null,
+ labelY1=null,
+ labelY2=null,
+ x_axis_mode='time',
+ x_axis_values='total',
+ x_axis_buckets=null,
+ x_axis_min=null,
+ x_axis_max=null,
+ lines=true,
+ datasource=null,
+ points=false,
+ pointradius=5,
+ bars=false,
+ staircase=false,
+ height=null,
+ nullPointMode='null',
+ dashes=false,
+ stack=false,
+ repeat=null,
+ repeatDirection=null,
+ sort=0,
+ show_xaxis=true,
+ legend_show=true,
+ legend_values=false,
+ legend_min=false,
+ legend_max=false,
+ legend_current=false,
+ legend_total=false,
+ legend_avg=false,
+ legend_alignAsTable=false,
+ legend_rightSide=false,
+ legend_sideWidth=null,
+ legend_hideEmpty=null,
+ legend_hideZero=null,
+ legend_sort=null,
+ legend_sortDesc=null,
+ aliasColors={},
+ thresholds=[],
+ links=[],
+ logBase1Y=1,
+ logBase2Y=1,
+ transparent=false,
+ value_type='individual',
+ shared_tooltip=true,
+ percentage=false,
+ maxDataPoints=null,
+ time_from=null,
+ time_shift=null,
+ interval=null
+ ):: {
+ title: title,
+ [if span != null then 'span']: span,
+ [if min_span != null then 'minSpan']: min_span,
+ [if decimals != null then 'decimals']: decimals,
+ type: 'graph',
+ datasource: datasource,
+ targets: [
+ ],
+ [if description != null then 'description']: description,
+ [if height != null then 'height']: height,
+ renderer: 'flot',
+ yaxes: [
+ self.yaxe(
+ if formatY1 != null then formatY1 else format,
+ min,
+ max,
+ decimals=(if decimalsY1 != null then decimalsY1 else decimals),
+ logBase=logBase1Y,
+ label=labelY1
+ ),
+ self.yaxe(
+ if formatY2 != null then formatY2 else format,
+ min,
+ max,
+ decimals=(if decimalsY2 != null then decimalsY2 else decimals),
+ logBase=logBase2Y,
+ label=labelY2
+ ),
+ ],
+ xaxis: {
+ show: show_xaxis,
+ mode: x_axis_mode,
+ name: null,
+ values: if x_axis_mode == 'series' then [x_axis_values] else [],
+ buckets: if x_axis_mode == 'histogram' then x_axis_buckets else null,
+ [if x_axis_min != null then 'min']: x_axis_min,
+ [if x_axis_max != null then 'max']: x_axis_max,
+ },
+ lines: lines,
+ fill: fill,
+ fillGradient: fillGradient,
+ linewidth: linewidth,
+ dashes: dashes,
+ dashLength: 10,
+ spaceLength: 10,
+ points: points,
+ pointradius: pointradius,
+ bars: bars,
+ stack: stack,
+ percentage: percentage,
+ [if maxDataPoints != null then 'maxDataPoints']: maxDataPoints,
+ legend: {
+ show: legend_show,
+ values: legend_values,
+ min: legend_min,
+ max: legend_max,
+ current: legend_current,
+ total: legend_total,
+ alignAsTable: legend_alignAsTable,
+ rightSide: legend_rightSide,
+ sideWidth: legend_sideWidth,
+ avg: legend_avg,
+ [if legend_hideEmpty != null then 'hideEmpty']: legend_hideEmpty,
+ [if legend_hideZero != null then 'hideZero']: legend_hideZero,
+ [if legend_sort != null then 'sort']: legend_sort,
+ [if legend_sortDesc != null then 'sortDesc']: legend_sortDesc,
+ },
+ nullPointMode: nullPointMode,
+ steppedLine: staircase,
+ tooltip: {
+ value_type: value_type,
+ shared: shared_tooltip,
+ sort: if sort == 'decreasing' then 2 else if sort == 'increasing' then 1 else sort,
+ },
+ timeFrom: time_from,
+ timeShift: time_shift,
+ [if interval != null then 'interval']: interval,
+ [if transparent == true then 'transparent']: transparent,
+ aliasColors: aliasColors,
+ repeat: repeat,
+ [if repeatDirection != null then 'repeatDirection']: repeatDirection,
+ seriesOverrides: [],
+ thresholds: thresholds,
+ links: links,
+ yaxe(
+ format='short',
+ min=null,
+ max=null,
+ label=null,
+ show=true,
+ logBase=1,
+ decimals=null,
+ ):: {
+ label: label,
+ show: show,
+ logBase: logBase,
+ min: min,
+ max: max,
+ format: format,
+ [if decimals != null then 'decimals']: decimals,
+ },
+ _nextTarget:: 0,
+ addTarget(target):: self {
+ // automatically ref id in added targets.
+ // https://github.com/kausalco/public/blob/master/klumps/grafana.libsonnet
+ local nextTarget = super._nextTarget,
+ _nextTarget: nextTarget + 1,
+ targets+: [target { refId: std.char(std.codepoint('A') + nextTarget) }],
+ },
+ addTargets(targets):: std.foldl(function(p, t) p.addTarget(t), targets, self),
+ addSeriesOverride(override):: self {
+ seriesOverrides+: [override],
+ },
+ resetYaxes():: self {
+ yaxes: [],
+ },
+ addYaxis(
+ format='short',
+ min=null,
+ max=null,
+ label=null,
+ show=true,
+ logBase=1,
+ decimals=null,
+ ):: self {
+ yaxes+: [self.yaxe(format, min, max, label, show, logBase, decimals)],
+ },
+ addAlert(
+ name,
+ executionErrorState='alerting',
+ forDuration='5m',
+ frequency='60s',
+ handler=1,
+ message='',
+ noDataState='no_data',
+ notifications=[],
+ alertRuleTags={},
+ ):: self {
+ local it = self,
+ _conditions:: [],
+ alert: {
+ name: name,
+ conditions: it._conditions,
+ executionErrorState: executionErrorState,
+ 'for': forDuration,
+ frequency: frequency,
+ handler: handler,
+ noDataState: noDataState,
+ notifications: notifications,
+ message: message,
+ alertRuleTags: alertRuleTags,
+ },
+ addCondition(condition):: self {
+ _conditions+: [condition],
+ },
+ addConditions(conditions):: std.foldl(function(p, c) p.addCondition(c), conditions, it),
+ },
+ addLink(link):: self {
+ links+: [link],
+ },
+ addLinks(links):: std.foldl(function(p, t) p.addLink(t), links, self),
+ addOverride(
+ matcher=null,
+ properties=null,
+ ):: self {
+ fieldConfig+: {
+ overrides+: [
+ {
+ [if matcher != null then 'matcher']: matcher,
+ [if properties != null then 'properties']: properties,
+ },
+ ],
+ },
+ },
+ addOverrides(overrides):: std.foldl(function(p, o) p.addOverride(o.matcher, o.properties), overrides, self),
+ },
+}
diff --git a/atmosphere/jsonnet/vendor/github.com/grafana/grafonnet-lib/grafonnet/graphite.libsonnet b/atmosphere/jsonnet/vendor/github.com/grafana/grafonnet-lib/grafonnet/graphite.libsonnet
new file mode 100644
index 0000000..46a0113
--- /dev/null
+++ b/atmosphere/jsonnet/vendor/github.com/grafana/grafonnet-lib/grafonnet/graphite.libsonnet
@@ -0,0 +1,29 @@
+{
+ /**
+ * Creates a [Graphite target](https://grafana.com/docs/grafana/latest/datasources/graphite/)
+ *
+ * @name graphite.target
+ *
+ * @param target Graphite Query. Nested queries are possible by adding the query reference (refId).
+ * @param targetFull (optional) Expanding the @target. Used in nested queries.
+ * @param hide (default `false`) Disable query on graph.
+ * @param textEditor (default `false`) Enable raw query mode.
+ * @param datasource (optional) Datasource.
+
+ * @return Panel target
+ */
+ target(
+ target,
+ targetFull=null,
+ hide=false,
+ textEditor=false,
+ datasource=null,
+ ):: {
+ target: target,
+ hide: hide,
+ textEditor: textEditor,
+
+ [if targetFull != null then 'targetFull']: targetFull,
+ [if datasource != null then 'datasource']: datasource,
+ },
+}
diff --git a/atmosphere/jsonnet/vendor/github.com/grafana/grafonnet-lib/grafonnet/heatmap_panel.libsonnet b/atmosphere/jsonnet/vendor/github.com/grafana/grafonnet-lib/grafonnet/heatmap_panel.libsonnet
new file mode 100644
index 0000000..5e9a04c
--- /dev/null
+++ b/atmosphere/jsonnet/vendor/github.com/grafana/grafonnet-lib/grafonnet/heatmap_panel.libsonnet
@@ -0,0 +1,150 @@
+{
+ /**
+ * Creates a [heatmap panel](https://grafana.com/docs/grafana/latest/panels/visualizations/heatmap/).
+ * Requires the heatmap panel plugin in Grafana, which is built-in.
+ *
+ * @name heatmapPanel.new
+ *
+ * @param title The title of the heatmap panel
+ * @param description (optional) Description of panel
+ * @param datasource (optional) Datasource
+ * @param min_span (optional) Min span
+ * @param span (optional) Width of the panel
+ * @param cards_cardPadding (optional) How much padding to put between bucket cards
+ * @param cards_cardRound (optional) How much rounding should be applied to the bucket card shape
+ * @param color_cardColor (default `'#b4ff00'`) Hex value of color used when color_colorScheme is 'opacity'
+ * @param color_colorScale (default `'sqrt'`) How to scale the color range, 'linear' or 'sqrt'
+ * @param color_colorScheme (default `'interpolateOranges'`) TODO: document
+ * @param color_exponent (default `0.5`) TODO: document
+ * @param color_max (optional) The value for the end of the color range
+ * @param color_min (optional) The value for the beginning of the color range
+ * @param color_mode (default `'spectrum'`) How to display difference in frequency with color
+ * @param dataFormat (default `'timeseries'`) How to format the data
+ * @param highlightCards (default `true`) TODO: document
+ * @param hideZeroBuckets (default `false`) Whether or not to hide empty buckets, default is false
+ * @param legend_show (default `false`) Show legend
+ * @param minSpan (optional) Minimum span of the panel when repeated on a template variable
+ * @param repeat (optional) Variable used to repeat the heatmap panel
+ * @param repeatDirection (optional) Which direction to repeat the panel, 'h' for horizontal and 'v' for vertically
+ * @param tooltipDecimals (optional) The number of decimal places to display in the tooltip
+ * @param tooltip_show (default `true`) Whether or not to display a tooltip when hovering over the heatmap
+ * @param tooltip_showHistogram (default `false`) Whether or not to display a histogram in the tooltip
+ * @param xAxis_show (default `true`) Whether or not to show the X axis, default true
+ * @param xBucketNumber (optional) Number of buckets for the X axis
+ * @param xBucketSize (optional) Size of X axis buckets. Number or interval(10s, 15h, etc.) Has priority over xBucketNumber
+ * @param yAxis_decimals (optional) Override automatic decimal precision for the Y axis
+ * @param yAxis_format (default `'short'`) Unit of the Y axis
+ * @param yAxis_logBase (default `1`) Only if dataFormat is 'timeseries'
+ * @param yAxis_min (optional) Only if dataFormat is 'timeseries', min of the Y axis
+ * @param yAxis_max (optional) Only if dataFormat is 'timeseries', max of the Y axis
+ * @param yAxis_show (default `true`) Whether or not to show the Y axis
+ * @param yAxis_splitFactor (optional) TODO: document
+ * @param yBucketBound (default `'auto'`) Which bound ('lower' or 'upper') of the bucket to use
+ * @param yBucketNumber (optional) Number of buckets for the Y axis
+ * @param yBucketSize (optional) Size of Y axis buckets. Has priority over yBucketNumber
+ * @param maxDataPoints (optional) The maximum data points per series. Used directly by some data sources and used in calculation of auto interval. With streaming data this value is used for the rolling buffer.
+ *
+ * @method addTarget(target) Adds a target object.
+ * @method addTargets(targets) Adds an array of targets.
+ */
+ new(
+ title,
+ datasource=null,
+ description=null,
+ cards_cardPadding=null,
+ cards_cardRound=null,
+ color_cardColor='#b4ff00',
+ color_colorScale='sqrt',
+ color_colorScheme='interpolateOranges',
+ color_exponent=0.5,
+ color_max=null,
+ color_min=null,
+ color_mode='spectrum',
+ dataFormat='timeseries',
+ highlightCards=true,
+ hideZeroBuckets=false,
+ legend_show=false,
+ minSpan=null,
+ span=null,
+ repeat=null,
+ repeatDirection=null,
+ tooltipDecimals=null,
+ tooltip_show=true,
+ tooltip_showHistogram=false,
+ xAxis_show=true,
+ xBucketNumber=null,
+ xBucketSize=null,
+ yAxis_decimals=null,
+ yAxis_format='short',
+ yAxis_logBase=1,
+ yAxis_min=null,
+ yAxis_max=null,
+ yAxis_show=true,
+ yAxis_splitFactor=null,
+ yBucketBound='auto',
+ yBucketNumber=null,
+ yBucketSize=null,
+ maxDataPoints=null,
+ ):: {
+ title: title,
+ type: 'heatmap',
+ [if description != null then 'description']: description,
+ datasource: datasource,
+ cards: {
+ cardPadding: cards_cardPadding,
+ cardRound: cards_cardRound,
+ },
+ color: {
+ mode: color_mode,
+ cardColor: color_cardColor,
+ colorScale: color_colorScale,
+ exponent: color_exponent,
+ [if color_mode == 'spectrum' then 'colorScheme']: color_colorScheme,
+ [if color_max != null then 'max']: color_max,
+ [if color_min != null then 'min']: color_min,
+ },
+ [if dataFormat != null then 'dataFormat']: dataFormat,
+ heatmap: {},
+ hideZeroBuckets: hideZeroBuckets,
+ highlightCards: highlightCards,
+ legend: {
+ show: legend_show,
+ },
+ [if minSpan != null then 'minSpan']: minSpan,
+ [if span != null then 'span']: span,
+ [if repeat != null then 'repeat']: repeat,
+ [if repeatDirection != null then 'repeatDirection']: repeatDirection,
+ tooltip: {
+ show: tooltip_show,
+ showHistogram: tooltip_showHistogram,
+ },
+ [if tooltipDecimals != null then 'tooltipDecimals']: tooltipDecimals,
+ xAxis: {
+ show: xAxis_show,
+ },
+ xBucketNumber: if dataFormat == 'timeseries' && xBucketSize != null then xBucketNumber else null,
+ xBucketSize: if dataFormat == 'timeseries' && xBucketSize != null then xBucketSize else null,
+ yAxis: {
+ decimals: yAxis_decimals,
+ [if dataFormat == 'timeseries' then 'logBase']: yAxis_logBase,
+ format: yAxis_format,
+ [if dataFormat == 'timeseries' then 'max']: yAxis_max,
+ [if dataFormat == 'timeseries' then 'min']: yAxis_min,
+ show: yAxis_show,
+ splitFactor: yAxis_splitFactor,
+ },
+ yBucketBound: yBucketBound,
+ [if dataFormat == 'timeseries' then 'yBucketNumber']: yBucketNumber,
+ [if dataFormat == 'timeseries' then 'yBucketSize']: yBucketSize,
+ [if maxDataPoints != null then 'maxDataPoints']: maxDataPoints,
+
+ _nextTarget:: 0,
+ addTarget(target):: self {
+ local nextTarget = super._nextTarget,
+ _nextTarget: nextTarget + 1,
+ targets+: [target { refId: std.char(std.codepoint('A') + nextTarget) }],
+ },
+ addTargets(targets):: std.foldl(function(p, t) p.addTarget(t), targets, self),
+ },
+
+}
diff --git a/atmosphere/jsonnet/vendor/github.com/grafana/grafonnet-lib/grafonnet/influxdb.libsonnet b/atmosphere/jsonnet/vendor/github.com/grafana/grafonnet-lib/grafonnet/influxdb.libsonnet
new file mode 100644
index 0000000..dd7c4fd
--- /dev/null
+++ b/atmosphere/jsonnet/vendor/github.com/grafana/grafonnet-lib/grafonnet/influxdb.libsonnet
@@ -0,0 +1,104 @@
+{
+ /**
+ * Creates an [InfluxDB target](https://grafana.com/docs/grafana/latest/datasources/influxdb/)
+ *
+ * @name influxdb.target
+ *
+ * @param query Raw InfluxQL statement
+ *
+ * @param alias (optional) 'Alias By' pattern
+ * @param datasource (optional) Datasource
+ * @param hide (optional) Disable query on graph
+ *
+ * @param rawQuery (optional) Enable/disable raw query mode
+ *
+ * @param policy (default: `'default'`) Tagged query 'From' policy
+ * @param measurement (optional) Tagged query 'From' measurement
+ * @param group_time (default: `'$__interval'`) 'Group by' time condition (if set to null, do not groups by time)
+ * @param group_tags (optional) 'Group by' tags list
+ * @param fill (default: `'none'`) 'Group by' missing values fill mode (works only with 'Group by time()')
+ *
+ * @param resultFormat (default: `'time_series'`) Format results as 'Time series' or 'Table'
+ *
+ * @return Panel target
+ */
+ target(
+ query=null,
+
+ alias=null,
+ datasource=null,
+ hide=null,
+
+ rawQuery=null,
+
+ policy='default',
+ measurement=null,
+
+ group_time='$__interval',
+ group_tags=[],
+ fill='none',
+
+ resultFormat='time_series',
+ ):: {
+ local it = self,
+
+ [if alias != null then 'alias']: alias,
+ [if datasource != null then 'datasource']: datasource,
+ [if hide != null then 'hide']: hide,
+
+ [if query != null then 'query']: query,
+ [if rawQuery != null then 'rawQuery']: rawQuery,
+ [if rawQuery == null && query != null then 'rawQuery']: true,
+
+ policy: policy,
+ [if measurement != null then 'measurement']: measurement,
+ tags: [],
+ select: [],
+ groupBy:
+ if group_time != null then
+ [{ type: 'time', params: [group_time] }] +
+ [{ type: 'tag', params: [tag_name] } for tag_name in group_tags] +
+ [{ type: 'fill', params: [fill] }]
+ else
+ [{ type: 'tag', params: [tag_name] } for tag_name in group_tags],
+
+ resultFormat: resultFormat,
+
+ where(key, operator, value, condition=null):: self {
+ /*
+ * Adds query tag condition ('Where' section)
+ */
+ tags:
+ if std.length(it.tags) == 0 then
+ [{ key: key, operator: operator, value: value }]
+ else
+ it.tags + [{
+ key: key,
+ operator: operator,
+ value: value,
+ condition: if condition == null then 'AND' else condition,
+ }],
+ },
+
+ selectField(value):: self {
+ /*
+ * Adds InfluxDB selection ('field(value)' part of 'Select' statement)
+ */
+ select+: [[{ params: [value], type: 'field' }]],
+ },
+
+ addConverter(type, params=[]):: self {
+ /*
+ * Appends converter (aggregation, selector, etc.) to last added selection
+ */
+ local len = std.length(it.select),
+ select:
+ if len == 1 then
+ [it.select[0] + [{ params: params, type: type }]]
+ else if len > 1 then
+ it.select[0:(len - 1)] + [it.select[len - 1] + [{ params: params, type: type }]]
+ else
+ [],
+ },
+ },
+}
diff --git a/atmosphere/jsonnet/vendor/github.com/grafana/grafonnet-lib/grafonnet/link.libsonnet b/atmosphere/jsonnet/vendor/github.com/grafana/grafonnet-lib/grafonnet/link.libsonnet
new file mode 100644
index 0000000..5e5ebd2
--- /dev/null
+++ b/atmosphere/jsonnet/vendor/github.com/grafana/grafonnet-lib/grafonnet/link.libsonnet
@@ -0,0 +1,39 @@
+{
+ /**
+ * Creates [links](https://grafana.com/docs/grafana/latest/linking/linking-overview/) to navigate to other dashboards.
+ *
+ * @param title Human-readable label for the link.
+ * @param tags Limits the linked dashboards to only the ones with the corresponding tags. Otherwise, Grafana includes links to all other dashboards.
+ * @param asDropdown (default: `true`) Whether to use a dropdown (with an optional title). If `false`, displays the dashboard links side by side across the top of dashboard.
+ * @param includeVars (default: `false`) Whether to include template variables currently used as query parameters in the link. Any matching templates in the linked dashboard are set to the values from the link
+ * @param keepTime (default: `false`) Whether to include the current dashboard time range in the link (e.g. from=now-3h&to=now)
+ * @param icon (default: `'external link'`) Icon displayed with the link.
+ * @param url (default: `''`) URL of the link
+ * @param targetBlank (default: `false`) Whether the link will open in a new window.
+ * @param type (default: `'dashboards'`)
+ *
+ * @name link.dashboards
+ */
+ dashboards(
+ title,
+ tags,
+ asDropdown=true,
+ includeVars=false,
+ keepTime=false,
+ icon='external link',
+ url='',
+ targetBlank=false,
+ type='dashboards',
+ )::
+ {
+ asDropdown: asDropdown,
+ icon: icon,
+ includeVars: includeVars,
+ keepTime: keepTime,
+ tags: tags,
+ title: title,
+ type: type,
+ url: url,
+ targetBlank: targetBlank,
+ },
+}
diff --git a/atmosphere/jsonnet/vendor/github.com/grafana/grafonnet-lib/grafonnet/log_panel.libsonnet b/atmosphere/jsonnet/vendor/github.com/grafana/grafonnet-lib/grafonnet/log_panel.libsonnet
new file mode 100644
index 0000000..747ad5f
--- /dev/null
+++ b/atmosphere/jsonnet/vendor/github.com/grafana/grafonnet-lib/grafonnet/log_panel.libsonnet
@@ -0,0 +1,56 @@
+{
+ /**
+ * Creates a [log panel](https://grafana.com/docs/grafana/latest/panels/visualizations/logs-panel/).
+ * It requires the log panel plugin in grafana, which is built-in.
+ *
+ * @name logPanel.new
+ *
+ * @param title (default `''`) The title of the log panel.
+ * @param span (optional) Width of the panel
+ * @param datasource (optional) Datasource
+ * @showLabels (default `false`) Whether to show or hide labels
+ * @showTime (default `true`) Whether to show or hide time for each line
+ * @wrapLogMessage (default `true`) Whether to wrap log line to the next line
+ * @sortOrder (default `'Descending'`) sort log by time (can be 'Descending' or 'Ascending' )
+ *
+ * @method addTarget(target) Adds a target object
+ * @method addTargets(targets) Adds an array of targets
+ */
+ new(
+ title='',
+ datasource=null,
+ time_from=null,
+ time_shift=null,
+ showLabels=false,
+ showTime=true,
+ sortOrder='Descending',
+ wrapLogMessage=true,
+ span=12,
+ height=null,
+ ):: {
+ [if height != null then 'height']: height,
+ span: span,
+ datasource: datasource,
+ options: {
+ showLabels: showLabels,
+ showTime: showTime,
+ sortOrder: sortOrder,
+ wrapLogMessage: wrapLogMessage,
+ },
+ targets: [
+ ],
+ _nextTarget:: 0,
+ addTarget(target):: self {
+ // automatically ref id in added targets.
+ // https://github.com/kausalco/public/blob/master/klumps/grafana.libsonnet
+ local nextTarget = super._nextTarget,
+ _nextTarget: nextTarget + 1,
+ targets+: [target { refId: std.char(std.codepoint('A') + nextTarget) }],
+ },
+ addTargets(targets):: std.foldl(function(p, t) p.addTarget(t), targets, self),
+ timeFrom: time_from,
+ timeShift: time_shift,
+ title: title,
+ type: 'logs',
+ },
+}
diff --git a/atmosphere/jsonnet/vendor/github.com/grafana/grafonnet-lib/grafonnet/loki.libsonnet b/atmosphere/jsonnet/vendor/github.com/grafana/grafonnet-lib/grafonnet/loki.libsonnet
new file mode 100644
index 0000000..a300f5a
--- /dev/null
+++ b/atmosphere/jsonnet/vendor/github.com/grafana/grafonnet-lib/grafonnet/loki.libsonnet
@@ -0,0 +1,22 @@
+{
+ /**
+ * Creates a [Loki target](https://grafana.com/docs/grafana/latest/datasources/loki/)
+ *
+ * @name loki.target
+ *
+ * @param expr
+ * @param hide (optional) Disable query on graph.
+ * @param legendFormat (optional) Defines the legend. Defaults to ''.
+ */
+ target(
+ expr,
+ hide=null,
+ legendFormat='',
+ instant=null,
+ ):: {
+ [if hide != null then 'hide']: hide,
+ expr: expr,
+ legendFormat: legendFormat,
+ [if instant != null then 'instant']: instant,
+ },
+}
diff --git a/atmosphere/jsonnet/vendor/github.com/grafana/grafonnet-lib/grafonnet/pie_chart_panel.libsonnet b/atmosphere/jsonnet/vendor/github.com/grafana/grafonnet-lib/grafonnet/pie_chart_panel.libsonnet
new file mode 100644
index 0000000..11719e1
--- /dev/null
+++ b/atmosphere/jsonnet/vendor/github.com/grafana/grafonnet-lib/grafonnet/pie_chart_panel.libsonnet
@@ -0,0 +1,72 @@
+{
+ /**
+ * Creates a pie chart panel.
+ * It requires the [pie chart panel plugin in grafana](https://grafana.com/grafana/plugins/grafana-piechart-panel),
+ * which needs to be explicitly installed.
+ *
+ * @name pieChartPanel.new
+ *
+ * @param title The title of the pie chart panel.
+ * @param description (default `''`) Description of the panel
+ * @param span (optional) Width of the panel
+ * @param min_span (optional) Min span
+ * @param datasource (optional) Datasource
+ * @param aliasColors (optional) Define color mappings
+ * @param pieType (default `'pie'`) Type of pie chart (one of pie or donut)
+ * @param showLegend (default `true`) Show legend
+ * @param showLegendPercentage (default `true`) Show percentage values in the legend
+ * @param legendType (default `'Right side'`) Type of legend (one of 'Right side', 'Under graph' or 'On graph')
+ * @param valueName (default `'current') Type of tooltip value
+ * @param repeat (optional) Variable used to repeat the pie chart
+ * @param repeatDirection (optional) Which direction to repeat the panel, 'h' for horizontal and 'v' for vertical
+ * @param maxPerRow (optional) Number of panels to display when repeated. Used in combination with repeat.
+ * @return A json that represents a pie chart panel
+ *
+ * @method addTarget(target) Adds a target object.
+ */
+ new(
+ title,
+ description='',
+ span=null,
+ min_span=null,
+ datasource=null,
+ height=null,
+ aliasColors={},
+ pieType='pie',
+ valueName='current',
+ showLegend=true,
+ showLegendPercentage=true,
+ legendType='Right side',
+ repeat=null,
+ repeatDirection=null,
+ maxPerRow=null,
+ ):: {
+ type: 'grafana-piechart-panel',
+ [if description != null then 'description']: description,
+ pieType: pieType,
+ title: title,
+ aliasColors: aliasColors,
+ [if span != null then 'span']: span,
+ [if min_span != null then 'minSpan']: min_span,
+ [if height != null then 'height']: height,
+ [if repeat != null then 'repeat']: repeat,
+ [if repeatDirection != null then 'repeatDirection']: repeatDirection,
+ [if maxPerRow != null then 'maxPerRow']: maxPerRow,
+ valueName: valueName,
+ datasource: datasource,
+ legend: {
+ show: showLegend,
+ values: true,
+ percentage: showLegendPercentage,
+ },
+ legendType: legendType,
+ targets: [
+ ],
+ _nextTarget:: 0,
+ addTarget(target):: self {
+ local nextTarget = super._nextTarget,
+ _nextTarget: nextTarget + 1,
+ targets+: [target { refId: std.char(std.codepoint('A') + nextTarget) }],
+ },
+ },
+}
diff --git a/atmosphere/jsonnet/vendor/github.com/grafana/grafonnet-lib/grafonnet/pluginlist.libsonnet b/atmosphere/jsonnet/vendor/github.com/grafana/grafonnet-lib/grafonnet/pluginlist.libsonnet
new file mode 100644
index 0000000..d3f23ab
--- /dev/null
+++ b/atmosphere/jsonnet/vendor/github.com/grafana/grafonnet-lib/grafonnet/pluginlist.libsonnet
@@ -0,0 +1,23 @@
+{
+ /**
+ * Returns a new pluginlist panel that can be added in a row.
+ * It requires the pluginlist panel plugin in grafana, which is built-in.
+ *
+ * @name pluginlist.new
+ *
+ * @param title The title of the pluginlist panel.
+ * @param description (optional) Description of the panel
+ * @param limit (optional) Set maximum items in a list
+ * @return A json that represents a pluginlist panel
+ */
+ new(
+ title,
+ description=null,
+ limit=null,
+ ):: {
+ type: 'pluginlist',
+ title: title,
+ [if limit != null then 'limit']: limit,
+ [if description != null then 'description']: description,
+ },
+}
diff --git a/atmosphere/jsonnet/vendor/github.com/grafana/grafonnet-lib/grafonnet/prometheus.libsonnet b/atmosphere/jsonnet/vendor/github.com/grafana/grafonnet-lib/grafonnet/prometheus.libsonnet
new file mode 100644
index 0000000..46b75b0
--- /dev/null
+++ b/atmosphere/jsonnet/vendor/github.com/grafana/grafonnet-lib/grafonnet/prometheus.libsonnet
@@ -0,0 +1,38 @@
+{
+ /**
+ * Creates a [Prometheus target](https://grafana.com/docs/grafana/latest/datasources/prometheus/)
+ * to be added to panels.
+ *
+ * @name prometheus.target
+ *
+ * @param expr PromQL query to be exercised against Prometheus. Checkout [Prometheus documentation](https://prometheus.io/docs/prometheus/latest/querying/basics/).
+ * @param format (default `'time_series'`) Switch between `'table'`, `'time_series'` or `'heatmap'`. Table will only work in the Table panel. Heatmap is suitable for displaying metrics of the Histogram type on a Heatmap panel. Under the hood, it converts cumulative histograms to regular ones and sorts series by the bucket bound.
+ * @param intervalFactor (default `2`)
+ * @param legendFormat (default `''`) Controls the name of the time series, using name or pattern. For example `{{hostname}}` is replaced with the label value for the label `hostname`.
+ * @param datasource (optional) Name of the Prometheus datasource. Leave by default otherwise.
+ * @param interval (optional) Time span used to aggregate or group data points by time. By default Grafana uses an automatic interval calculated based on the width of the graph.
+ * @param instant (optional) Perform an "instant" query, to return only the latest value that Prometheus has scraped for the requested time series. Instant queries return results much faster than normal range queries. Use them to look up label sets.
+ * @param hide (optional) Set to `true` to hide the target from the panel.
+ *
+ * @return A Prometheus target to be added to panels.
+ */
+ target(
+ expr,
+ format='time_series',
+ intervalFactor=2,
+ legendFormat='',
+ datasource=null,
+ interval=null,
+ instant=null,
+ hide=null,
+ ):: {
+ [if hide != null then 'hide']: hide,
+ [if datasource != null then 'datasource']: datasource,
+ expr: expr,
+ format: format,
+ intervalFactor: intervalFactor,
+ legendFormat: legendFormat,
+ [if interval != null then 'interval']: interval,
+ [if instant != null then 'instant']: instant,
+ },
+}
diff --git a/atmosphere/jsonnet/vendor/github.com/grafana/grafonnet-lib/grafonnet/row.libsonnet b/atmosphere/jsonnet/vendor/github.com/grafana/grafonnet-lib/grafonnet/row.libsonnet
new file mode 100644
index 0000000..b380192
--- /dev/null
+++ b/atmosphere/jsonnet/vendor/github.com/grafana/grafonnet-lib/grafonnet/row.libsonnet
@@ -0,0 +1,47 @@
+{
+ /**
+ * Creates a [row](https://grafana.com/docs/grafana/latest/features/dashboard/dashboards/#rows).
+ * Rows are logical dividers within a dashboard and used to group panels together.
+ *
+ * @name row.new
+ *
+ * @param title The title of the row.
+ * @param showTitle (default `true` if title is set) Whether to show the row title
+ * @paral titleSize (default `'h6'`) The size of the title
+ * @param collapse (default `false`) The initial state of the row when opening the dashboard. Panels in a collapsed row are not load until the row is expanded.
+ * @param repeat (optional) Name of variable that should be used to repeat this row. It is recommended to use the variable in the row title as well.
+ *
+ * @method addPanels(panels) Appends an array of nested panels
+ * @method addPanel(panel,gridPos) Appends a nested panel, with an optional grid position in grid coordinates, e.g. `gridPos={'x':0, 'y':0, 'w':12, 'h': 9}`
+ */
+ new(
+ title='Dashboard Row',
+ height=null,
+ collapse=false,
+ repeat=null,
+ showTitle=null,
+ titleSize='h6'
+ ):: {
+ collapse: collapse,
+ collapsed: collapse,
+ [if height != null then 'height']: height,
+ panels: [],
+ repeat: repeat,
+ repeatIteration: null,
+ repeatRowId: null,
+ showTitle:
+ if showTitle != null then
+ showTitle
+ else
+ title != 'Dashboard Row',
+ title: title,
+ type: 'row',
+ titleSize: titleSize,
+ addPanels(panels):: self {
+ panels+: panels,
+ },
+ addPanel(panel, gridPos={}):: self {
+ panels+: [panel { gridPos: gridPos }],
+ },
+ },
+}
diff --git a/atmosphere/jsonnet/vendor/github.com/grafana/grafonnet-lib/grafonnet/singlestat.libsonnet b/atmosphere/jsonnet/vendor/github.com/grafana/grafonnet-lib/grafonnet/singlestat.libsonnet
new file mode 100644
index 0000000..78428d2
--- /dev/null
+++ b/atmosphere/jsonnet/vendor/github.com/grafana/grafonnet-lib/grafonnet/singlestat.libsonnet
@@ -0,0 +1,181 @@
+{
+ /**
+ * Creates a singlestat panel.
+ *
+ * @name singlestat.new
+ *
+ * @param title The title of the singlestat panel.
+ * @param format (default `'none'`) Unit
+ * @param description (default `''`)
+ * @param interval (optional)
+ * @param height (optional)
+ * @param datasource (optional)
+ * @param span (optional)
+ * @param min_span (optional)
+ * @param decimals (optional)
+ * @param valueName (default `'avg'`)
+ * @param valueFontSize (default `'80%'`)
+ * @param prefixFontSize (default `'50%'`)
+ * @param postfixFontSize (default `'50%'`)
+ * @param mappingType (default `1`)
+ * @param repeat (optional)
+ * @param repeatDirection (optional)
+ * @param prefix (default `''`)
+ * @param postfix (default `''`)
+ * @param colors (default `['#299c46','rgba(237, 129, 40, 0.89)','#d44a3a']`)
+ * @param colorBackground (default `false`)
+ * @param colorValue (default `false`)
+ * @param thresholds (default `''`)
+ * @param valueMaps (default `{value: 'null',op: '=',text: 'N/A'}`)
+ * @param rangeMaps (default `{value: 'null',op: '=',text: 'N/A'}`)
+ * @param transparent (optional)
+ * @param sparklineFillColor (default `'rgba(31, 118, 189, 0.18)'`)
+ * @param sparklineFull (default `false`)
+ * @param sparklineLineColor (default `'rgb(31, 120, 193)'`)
+ * @param sparklineShow (default `false`)
+ * @param gaugeShow (default `false`)
+ * @param gaugeMinValue (default `0`)
+ * @param gaugeMaxValue (default `100`)
+ * @param gaugeThresholdMarkers (default `true`)
+ * @param gaugeThresholdLabels (default `false`)
+ * @param timeFrom (optional)
+ * @param links (optional)
+ * @param tableColumn (default `''`)
+ * @param maxPerRow (optional)
+ * @param maxDataPoints (default `100`)
+ *
+ * @method addTarget(target) Adds a target object.
+ */
+ new(
+ title,
+ format='none',
+ description='',
+ interval=null,
+ height=null,
+ datasource=null,
+ span=null,
+ min_span=null,
+ decimals=null,
+ valueName='avg',
+ valueFontSize='80%',
+ prefixFontSize='50%',
+ postfixFontSize='50%',
+ mappingType=1,
+ repeat=null,
+ repeatDirection=null,
+ prefix='',
+ postfix='',
+ colors=[
+ '#299c46',
+ 'rgba(237, 129, 40, 0.89)',
+ '#d44a3a',
+ ],
+ colorBackground=false,
+ colorValue=false,
+ thresholds='',
+ valueMaps=[
+ {
+ value: 'null',
+ op: '=',
+ text: 'N/A',
+ },
+ ],
+ rangeMaps=[
+ {
+ from: 'null',
+ to: 'null',
+ text: 'N/A',
+ },
+ ],
+ transparent=null,
+ sparklineFillColor='rgba(31, 118, 189, 0.18)',
+ sparklineFull=false,
+ sparklineLineColor='rgb(31, 120, 193)',
+ sparklineShow=false,
+ gaugeShow=false,
+ gaugeMinValue=0,
+ gaugeMaxValue=100,
+ gaugeThresholdMarkers=true,
+ gaugeThresholdLabels=false,
+ timeFrom=null,
+ links=[],
+ tableColumn='',
+ maxPerRow=null,
+ maxDataPoints=100,
+ )::
+ {
+ [if height != null then 'height']: height,
+ [if description != '' then 'description']: description,
+ [if repeat != null then 'repeat']: repeat,
+ [if repeatDirection != null then 'repeatDirection']: repeatDirection,
+ [if transparent != null then 'transparent']: transparent,
+ [if min_span != null then 'minSpan']: min_span,
+ title: title,
+ [if span != null then 'span']: span,
+ type: 'singlestat',
+ datasource: datasource,
+ targets: [
+ ],
+ links: links,
+ [if decimals != null then 'decimals']: decimals,
+ maxDataPoints: maxDataPoints,
+ interval: interval,
+ cacheTimeout: null,
+ format: format,
+ prefix: prefix,
+ postfix: postfix,
+ nullText: null,
+ valueMaps: valueMaps,
+ [if maxPerRow != null then 'maxPerRow']: maxPerRow,
+ mappingTypes: [
+ {
+ name: 'value to text',
+ value: 1,
+ },
+ {
+ name: 'range to text',
+ value: 2,
+ },
+ ],
+ rangeMaps: rangeMaps,
+ mappingType:
+ if mappingType == 'value'
+ then
+ 1
+ else if mappingType == 'range'
+ then
+ 2
+ else
+ mappingType,
+ nullPointMode: 'connected',
+ valueName: valueName,
+ prefixFontSize: prefixFontSize,
+ valueFontSize: valueFontSize,
+ postfixFontSize: postfixFontSize,
+ thresholds: thresholds,
+ [if timeFrom != null then 'timeFrom']: timeFrom,
+ colorBackground: colorBackground,
+ colorValue: colorValue,
+ colors: colors,
+ gauge: {
+ show: gaugeShow,
+ minValue: gaugeMinValue,
+ maxValue: gaugeMaxValue,
+ thresholdMarkers: gaugeThresholdMarkers,
+ thresholdLabels: gaugeThresholdLabels,
+ },
+ sparkline: {
+ fillColor: sparklineFillColor,
+ full: sparklineFull,
+ lineColor: sparklineLineColor,
+ show: sparklineShow,
+ },
+ tableColumn: tableColumn,
+ _nextTarget:: 0,
+ addTarget(target):: self {
+ local nextTarget = super._nextTarget,
+ _nextTarget: nextTarget + 1,
+ targets+: [target { refId: std.char(std.codepoint('A') + nextTarget) }],
+ },
+ },
+}
diff --git a/atmosphere/jsonnet/vendor/github.com/grafana/grafonnet-lib/grafonnet/sql.libsonnet b/atmosphere/jsonnet/vendor/github.com/grafana/grafonnet-lib/grafonnet/sql.libsonnet
new file mode 100644
index 0000000..ab48543
--- /dev/null
+++ b/atmosphere/jsonnet/vendor/github.com/grafana/grafonnet-lib/grafonnet/sql.libsonnet
@@ -0,0 +1,23 @@
+{
+ /**
+ * Creates an SQL target.
+ *
+ * @name sql.target
+ *
+ * @param rawSql The SQL query
+ * @param datasource (optional)
+ * @param format (default `'time_series'`)
+ * @param alias (optional)
+ */
+ target(
+ rawSql,
+ datasource=null,
+ format='time_series',
+ alias=null,
+ ):: {
+ [if datasource != null then 'datasource']: datasource,
+ format: format,
+ [if alias != null then 'alias']: alias,
+ rawSql: rawSql,
+ },
+}
diff --git a/atmosphere/jsonnet/vendor/github.com/grafana/grafonnet-lib/grafonnet/stat_panel.libsonnet b/atmosphere/jsonnet/vendor/github.com/grafana/grafonnet-lib/grafonnet/stat_panel.libsonnet
new file mode 100644
index 0000000..5d1e5e7
--- /dev/null
+++ b/atmosphere/jsonnet/vendor/github.com/grafana/grafonnet-lib/grafonnet/stat_panel.libsonnet
@@ -0,0 +1,222 @@
+{
+ /**
+ * Creates a [stat panel](https://grafana.com/docs/grafana/latest/panels/visualizations/stat-panel/).
+ *
+ * @name statPanel.new
+ *
+ * @param title Panel title.
+ * @param description (optional) Panel description.
+ * @param transparent (default `false`) Whether to display the panel without a background.
+ * @param datasource (optional) Panel datasource.
+ * @param allValues (default `false`) Show all values instead of reducing to one.
+ * @param valueLimit (optional) Limit of values in all values mode.
+ * @param reducerFunction (default `'mean'`) Function to use to reduce values to when using single value.
+ * @param fields (default `''`) Fields that should be included in the panel.
+ * @param orientation (default `'auto'`) Stacking direction in case of multiple series or fields.
+ * @param colorMode (default `'value'`) 'value' or 'background'.
+ * @param graphMode (default `'area'`) 'none' or 'area' to enable sparkline mode.
+ * @param textMode (default `'auto'`) Control if name and value is displayed or just name.
+ * @param justifyMode (default `'auto'`) 'auto' or 'center'.
+ * @param unit (default `'none'`) Panel unit field option.
+ * @param min (optional) Leave empty to calculate based on all values.
+ * @param max (optional) Leave empty to calculate based on all values.
+ * @param decimals (optional) Number of decimal places to show.
+ * @param displayName (optional) Change the field or series name.
+ * @param noValue (optional) What to show when there is no value.
+ * @param thresholdsMode (default `'absolute'`) 'absolute' or 'percentage'.
+ * @param timeFrom (optional) Override the relative time range.
+ * @param repeat (optional) Name of variable that should be used to repeat this panel.
+ * @param repeatDirection (default `'h'`) 'h' for horizontal or 'v' for vertical.
+ * @param maxPerRow (optional) Maximum panels per row in repeat mode.
+ * @param pluginVersion (default `'7'`) Plugin version the panel should be modeled for. This has been tested with the default, '7', and '6.7'.
+ *
+ * @method addTarget(target) Adds a target object.
+ * @method addTargets(targets) Adds an array of targets.
+ * @method addLink(link) Adds a [panel link](https://grafana.com/docs/grafana/latest/linking/panel-links/). Argument format: `{ title: 'Link Title', url: 'https://...', targetBlank: true }`.
+ * @method addLinks(links) Adds an array of links.
+ * @method addThreshold(step) Adds a [threshold](https://grafana.com/docs/grafana/latest/panels/thresholds/) step. Argument format: `{ color: 'green', value: 0 }`.
+ * @method addThresholds(steps) Adds an array of threshold steps.
+ * @method addMapping(mapping) Adds a value mapping.
+ * @method addMappings(mappings) Adds an array of value mappings.
+ * @method addDataLink(link) Adds a data link.
+ * @method addDataLinks(links) Adds an array of data links.
+ */
+ new(
+ title,
+ description=null,
+ transparent=false,
+ datasource=null,
+ allValues=false,
+ valueLimit=null,
+ reducerFunction='mean',
+ fields='',
+ orientation='auto',
+ colorMode='value',
+ graphMode='area',
+ textMode='auto',
+ justifyMode='auto',
+ unit='none',
+ min=null,
+ max=null,
+ decimals=null,
+ displayName=null,
+ noValue=null,
+ thresholdsMode='absolute',
+ timeFrom=null,
+ repeat=null,
+ repeatDirection='h',
+ maxPerRow=null,
+ pluginVersion='7',
+ ):: {
+
+ type: 'stat',
+ title: title,
+ [if description != null then 'description']: description,
+ transparent: transparent,
+ datasource: datasource,
+ targets: [],
+ links: [],
+ [if repeat != null then 'repeat']: repeat,
+ [if repeat != null then 'repeatDirection']: repeatDirection,
+ [if timeFrom != null then 'timeFrom']: timeFrom,
+ [if repeat != null then 'maxPerRow']: maxPerRow,
+
+ // targets
+ _nextTarget:: 0,
+ addTarget(target):: self {
+ local nextTarget = super._nextTarget,
+ _nextTarget: nextTarget + 1,
+ targets+: [target { refId: std.char(std.codepoint('A') + nextTarget) }],
+ },
+ addTargets(targets):: std.foldl(function(p, t) p.addTarget(t), targets, self),
+
+ // links
+ addLink(link):: self {
+ links+: [link],
+ },
+ addLinks(links):: std.foldl(function(p, l) p.addLink(l), links, self),
+
+ pluginVersion: pluginVersion,
+ } + (
+
+ if pluginVersion >= '7' then {
+ options: {
+ reduceOptions: {
+ values: allValues,
+ [if allValues && valueLimit != null then 'limit']: valueLimit,
+ calcs: [
+ reducerFunction,
+ ],
+ fields: fields,
+ },
+ orientation: orientation,
+ colorMode: colorMode,
+ graphMode: graphMode,
+ justifyMode: justifyMode,
+ textMode: textMode,
+ },
+ fieldConfig: {
+ defaults: {
+ unit: unit,
+ [if min != null then 'min']: min,
+ [if max != null then 'max']: max,
+ [if decimals != null then 'decimals']: decimals,
+ [if displayName != null then 'displayName']: displayName,
+ [if noValue != null then 'noValue']: noValue,
+ thresholds: {
+ mode: thresholdsMode,
+ steps: [],
+ },
+ mappings: [],
+ links: [],
+ },
+ },
+
+ // thresholds
+ addThreshold(step):: self {
+ fieldConfig+: { defaults+: { thresholds+: { steps+: [step] } } },
+ },
+
+ // mappings
+ _nextMapping:: 0,
+ addMapping(mapping):: self {
+ local nextMapping = super._nextMapping,
+ _nextMapping: nextMapping + 1,
+ fieldConfig+: { defaults+: { mappings+: [mapping { id: nextMapping }] } },
+ },
+
+ // data links
+ addDataLink(link):: self {
+ fieldConfig+: { defaults+: { links+: [link] } },
+ },
+
+ // Overrides
+ addOverride(
+ matcher=null,
+ properties=null,
+ ):: self {
+ fieldConfig+: {
+ overrides+: [
+ {
+ [if matcher != null then 'matcher']: matcher,
+ [if properties != null then 'properties']: properties,
+ },
+ ],
+ },
+ },
+ addOverrides(overrides):: std.foldl(function(p, o) p.addOverride(o.matcher, o.properties), overrides, self),
+ } else {
+ options: {
+ fieldOptions: {
+ values: allValues,
+ [if allValues && valueLimit != null then 'limit']: valueLimit,
+ calcs: [
+ reducerFunction,
+ ],
+ fields: fields,
+ defaults: {
+ unit: unit,
+ [if min != null then 'min']: min,
+ [if max != null then 'max']: max,
+ [if decimals != null then 'decimals']: decimals,
+ [if displayName != null then 'displayName']: displayName,
+ [if noValue != null then 'noValue']: noValue,
+ thresholds: {
+ mode: thresholdsMode,
+ steps: [],
+ },
+ mappings: [],
+ links: [],
+ },
+ },
+ orientation: orientation,
+ colorMode: colorMode,
+ graphMode: graphMode,
+ justifyMode: justifyMode,
+ },
+
+ // thresholds
+ addThreshold(step):: self {
+ options+: { fieldOptions+: { defaults+: { thresholds+: { steps+: [step] } } } },
+ },
+
+ // mappings
+ _nextMapping:: 0,
+ addMapping(mapping):: self {
+ local nextMapping = super._nextMapping,
+ _nextMapping: nextMapping + 1,
+ options+: { fieldOptions+: { defaults+: { mappings+: [mapping { id: nextMapping }] } } },
+ },
+
+ // data links
+ addDataLink(link):: self {
+ options+: { fieldOptions+: { defaults+: { links+: [link] } } },
+ },
+ }
+
+ ) + {
+ addThresholds(steps):: std.foldl(function(p, s) p.addThreshold(s), steps, self),
+ addMappings(mappings):: std.foldl(function(p, m) p.addMapping(m), mappings, self),
+ addDataLinks(links):: std.foldl(function(p, l) p.addDataLink(l), links, self),
+ },
+}
diff --git a/atmosphere/jsonnet/vendor/github.com/grafana/grafonnet-lib/grafonnet/table_panel.libsonnet b/atmosphere/jsonnet/vendor/github.com/grafana/grafonnet-lib/grafonnet/table_panel.libsonnet
new file mode 100644
index 0000000..4c686b3
--- /dev/null
+++ b/atmosphere/jsonnet/vendor/github.com/grafana/grafonnet-lib/grafonnet/table_panel.libsonnet
@@ -0,0 +1,91 @@
+{
+ /**
+ * Creates a [table panel](https://grafana.com/docs/grafana/latest/panels/visualizations/table-panel/) that can be added in a row.
+ * It requires the table panel plugin in grafana, which is built-in.
+ *
+ * @name table.new
+ *
+ * @param title The title of the graph panel.
+ * @param description (optional) Description of the panel
+ * @param span (optional) Width of the panel
+ * @param height (optional) Height of the panel
+ * @param datasource (optional) Datasource
+ * @param min_span (optional) Min span
+ * @param styles (optional) Array of styles for the panel
+ * @param columns (optional) Array of columns for the panel
+ * @param sort (optional) Sorting instruction for the panel
+ * @param transform (optional) Allow table manipulation to present data as desired
+ * @param transparent (default: 'false') Whether to display the panel without a background
+ * @param links (optional) Array of links for the panel.
+ * @return A json that represents a table panel
+ *
+ * @method addTarget(target) Adds a target object
+ * @method addTargets(targets) Adds an array of targets
+ * @method addColumn(field, style) Adds a column
+ * @method hideColumn(field) Hides a column
+ * @method addLink(link) Adds a link
+ * @method addTransformation(transformation) Adds a transformation object
+ * @method addTransformations(transformations) Adds an array of transformations
+ */
+ new(
+ title,
+ description=null,
+ span=null,
+ min_span=null,
+ height=null,
+ datasource=null,
+ styles=[],
+ transform=null,
+ transparent=false,
+ columns=[],
+ sort=null,
+ time_from=null,
+ time_shift=null,
+ links=[],
+ ):: {
+ type: 'table',
+ title: title,
+ [if span != null then 'span']: span,
+ [if min_span != null then 'minSpan']: min_span,
+ [if height != null then 'height']: height,
+ datasource: datasource,
+ targets: [
+ ],
+ styles: styles,
+ columns: columns,
+ timeFrom: time_from,
+ timeShift: time_shift,
+ links: links,
+ [if sort != null then 'sort']: sort,
+ [if description != null then 'description']: description,
+ [if transform != null then 'transform']: transform,
+ [if transparent == true then 'transparent']: transparent,
+ _nextTarget:: 0,
+ addTarget(target):: self {
+ local nextTarget = super._nextTarget,
+ _nextTarget: nextTarget + 1,
+ targets+: [target { refId: std.char(std.codepoint('A') + nextTarget) }],
+ },
+ addTargets(targets):: std.foldl(function(p, t) p.addTarget(t), targets, self),
+ addColumn(field, style):: self {
+ local style_ = style { pattern: field },
+ local column_ = { text: field, value: field },
+ styles+: [style_],
+ columns+: [column_],
+ },
+ hideColumn(field):: self {
+ styles+: [{
+ alias: field,
+ pattern: field,
+ type: 'hidden',
+ }],
+ },
+ addLink(link):: self {
+ links+: [link],
+ },
+ addTransformation(transformation):: self {
+ transformations+: [transformation],
+ },
+ addTransformations(transformations):: std.foldl(function(p, t) p.addTransformation(t), transformations, self),
+ },
+}
diff --git a/atmosphere/jsonnet/vendor/github.com/grafana/grafonnet-lib/grafonnet/template.libsonnet b/atmosphere/jsonnet/vendor/github.com/grafana/grafonnet-lib/grafonnet/template.libsonnet
new file mode 100644
index 0000000..be253e1
--- /dev/null
+++ b/atmosphere/jsonnet/vendor/github.com/grafana/grafonnet-lib/grafonnet/template.libsonnet
@@ -0,0 +1,289 @@
+{
+ /**
+ * Creates a [template](https://grafana.com/docs/grafana/latest/variables/#templates) that can be added to a dashboard.
+ *
+ * @name template.new
+ *
+ * @param name Name of variable.
+ * @param datasource Template [datasource](https://grafana.com/docs/grafana/latest/variables/variable-types/add-data-source-variable/)
+ * @param query [Query expression](https://grafana.com/docs/grafana/latest/variables/variable-types/add-query-variable/) for the datasource.
+ * @param label (optional) Display name of the variable dropdown. If null, then the dropdown label will be the variable name.
+ * @param allValues (optional) Formatting for [multi-value variables](https://grafana.com/docs/grafana/latest/variables/formatting-multi-value-variables/#formatting-multi-value-variables)
+ * @param tagValuesQuery (default `''`) Group values into [selectable tags](https://grafana.com/docs/grafana/latest/variables/variable-value-tags/)
+ * @param current (default `null`) Can be `null`, `'all'` for all, or any other custom text value.
+ * @param hide (default `''`) `''`: the variable dropdown displays the variable Name or Label value. `'label'`: the variable dropdown only displays the selected variable value and a down arrow. Any other value: no variable dropdown is displayed on the dashboard.
+ * @param regex (default `''`) Regex expression to filter or capture specific parts of the names returned by your data source query. To see examples, refer to [Filter variables with regex](https://grafana.com/docs/grafana/latest/variables/filter-variables-with-regex/).
+ * @param refresh (default `'never'`) `'never'`: variables queries are cached and values are not updated. This is fine if the values never change, but problematic if they are dynamic and change a lot. `'load'`: Queries the data source every time the dashboard loads. This slows down dashboard loading, because the variable query needs to be completed before dashboard can be initialized. `'time'`: Queries the data source when the dashboard time range changes. Only use this option if your variable options query contains a time range filter or is dependent on the dashboard time range.
+ * @param includeAll (default `false`) Whether all value option is available or not.
+ * @param multi (default `false`) Whether multiple values can be selected or not from variable value list.
+ * @param sort (default `0`) `0`: Without Sort, `1`: Alphabetical (asc), `2`: Alphabetical (desc), `3`: Numerical (asc), `4`: Numerical (desc).
+ *
+ * @return A [template](https://grafana.com/docs/grafana/latest/variables/#templates)
+ */
+ new(
+ name,
+ datasource,
+ query,
+ label=null,
+ allValues=null,
+ tagValuesQuery='',
+ current=null,
+ hide='',
+ regex='',
+ refresh='never',
+ includeAll=false,
+ multi=false,
+ sort=0,
+ )::
+ {
+ allValue: allValues,
+ current: $.current(current),
+ datasource: datasource,
+ includeAll: includeAll,
+ hide: $.hide(hide),
+ label: label,
+ multi: multi,
+ name: name,
+ options: [],
+ query: query,
+ refresh: $.refresh(refresh),
+ regex: regex,
+ sort: sort,
+ tagValuesQuery: tagValuesQuery,
+ tags: [],
+ tagsQuery: '',
+ type: 'query',
+ useTags: false,
+ },
+ /**
+ * Use an [interval variable](https://grafana.com/docs/grafana/latest/variables/variable-types/add-interval-variable/) to represent time spans such as '1m', '1h', '1d'. You can think of them as a dashboard-wide "group by time" command. Interval variables change how the data is grouped in the visualization. You can also use the Auto Option to return a set number of data points per time span.
+ * You can use an interval variable as a parameter to group by time (for InfluxDB), date histogram interval (for Elasticsearch), or as a summarize function parameter (for Graphite).
+ *
+ * @name template.interval
+ *
+ * @param name Variable name
+ * @param query Comma separated values without spacing of intervals available for selection. Add `'auto'` in the query to turn on the Auto Option. Ex: `'auto,5m,10m,20m'`.
+ * @param current Currently selected interval. Must be one of the values in the query. `'auto'` is allowed if defined in the query.
+ * @param hide (default `''`) `''`: the variable dropdown displays the variable Name or Label value. `'label'`: the variable dropdown only displays the selected variable value and a down arrow. Any other value: no variable dropdown is displayed on the dashboard.
+ * @param label (optional) Display name of the variable dropdown. If null, then the dropdown label will be the variable name.
+ * @param auto_count (default `300`) Valid only if `'auto'` is defined in query. Number of times the current time range will be divided to calculate the value, similar to the Max data points query option. For example, if the current visible time range is 30 minutes, then the auto interval groups the data into 30 one-minute increments. The default value is 30 steps.
+ * @param auto_min (default `'10s'`) Valid only if `'auto'` is defined in query. The minimum threshold below which the step count intervals will not divide the time. To continue the 30 minute example, if the minimum interval is set to `'2m'`, then Grafana would group the data into 15 two-minute increments.
+ *
+ * @return A new interval variable for templating.
+ */
+ interval(
+ name,
+ query,
+ current,
+ hide='',
+ label=null,
+ auto_count=300,
+ auto_min='10s',
+ )::
+ {
+ current: $.current(current),
+ hide: $.hide(hide),
+ label: label,
+ name: name,
+ query: std.join(',', std.filter($.filterAuto, std.split(query, ','))),
+ refresh: 2,
+ type: 'interval',
+ auto: std.count(std.split(query, ','), 'auto') > 0,
+ auto_count: auto_count,
+ auto_min: auto_min,
+ },
+ hide(hide)::
+ if hide == '' then 0 else if hide == 'label' then 1 else 2,
+ current(current):: {
+ [if current != null then 'text']: current,
+ [if current != null then 'value']: if current == 'auto' then
+ '$__auto_interval'
+ else if current == 'all' then
+ '$__all'
+ else
+ current,
+ },
+ /**
+ * Data [source variables](https://grafana.com/docs/grafana/latest/variables/variable-types/add-data-source-variable/)
+ * allow you to quickly change the data source for an entire dashboard.
+ * They are useful if you have multiple instances of a data source, perhaps in different environments.
+ *
+ * @name template.datasource
+ *
+ * @param name Data source variable name. Ex: `'PROMETHEUS_DS'`.
+ * @param query Type of data source. Ex: `'prometheus'`.
+ * @param current Ex: `'Prometheus'`.
+ * @param hide (default `''`) `''`: the variable dropdown displays the variable Name or Label value. `'label'`: the variable dropdown only displays the selected variable value and a down arrow. Any other value: no variable dropdown is displayed on the dashboard.
+ * @param label (optional) Display name of the variable dropdown. If null, then the dropdown label will be the variable name.
+ * @param regex (default `''`) Regex filter for which data source instances to choose from in the variable value drop-down list. Leave this field empty to display all instances.
+ * @param refresh (default `'load'`) `'never'`: Variables queries are cached and values are not updated. This is fine if the values never change, but problematic if they are dynamic and change a lot. `'load'`: Queries the data source every time the dashboard loads. This slows down dashboard loading, because the variable query needs to be completed before dashboard can be initialized. `'time'`: Queries the data source when the dashboard time range changes. Only use this option if your variable options query contains a time range filter or is dependent on the dashboard time range.
+ *
+ * @return A [data source variable](https://grafana.com/docs/grafana/latest/variables/variable-types/add-data-source-variable/).
+ */
+ datasource(
+ name,
+ query,
+ current,
+ hide='',
+ label=null,
+ regex='',
+ refresh='load',
+ ):: {
+ current: $.current(current),
+ hide: $.hide(hide),
+ label: label,
+ name: name,
+ options: [],
+ query: query,
+ refresh: $.refresh(refresh),
+ regex: regex,
+ type: 'datasource',
+ },
+ refresh(refresh):: if refresh == 'never'
+ then
+ 0
+ else if refresh == 'load'
+ then
+ 1
+ else if refresh == 'time'
+ then
+ 2
+ else
+ refresh,
+ filterAuto(str):: str != 'auto',
+ /**
+ * Use a [custom variable](https://grafana.com/docs/grafana/latest/variables/variable-types/add-custom-variable/)
+ * for values that do not change.
+ *
+ * @name template.custom
+ * This might be numbers, strings, or even other variables.
+ * @param name Variable name
+ * @param query Comma separated without spacing list of selectable values.
+ * @param current Selected value
+ * @param refresh (default `'never'`) `'never'`: Variables queries are cached and values are not updated. This is fine if the values never change, but problematic if they are dynamic and change a lot. `'load'`: Queries the data source every time the dashboard loads. This slows down dashboard loading, because the variable query needs to be completed before dashboard can be initialized. `'time'`: Queries the data source when the dashboard time range changes. Only use this option if your variable options query contains a time range filter or is dependent on the dashboard time range.
+ * @param label (default `''`) Display name of the variable dropdown. If you don’t enter a display name, then the dropdown label will be the variable name.
+ * @param valuelabels (default `{}`) Display names for values defined in query. For example, if `query='new,old'`, then you may display them as follows `valuelabels={new: 'nouveau', old: 'ancien'}`.
+ * @param multi (default `false`) Whether multiple values can be selected or not from variable value list.
+ * @param allValues (optional) Formatting for [multi-value variables](https://grafana.com/docs/grafana/latest/variables/formatting-multi-value-variables/#formatting-multi-value-variables)
+ * @param includeAll (default `false`) Whether all value option is available or not.
+ * @param hide (default `''`) `''`: the variable dropdown displays the variable Name or Label value. `'label'`: the variable dropdown only displays the selected variable value and a down arrow. Any other value: no variable dropdown is displayed on the dashboard.
+ *
+ * @return A custom variable.
+ */
+ custom(
+ name,
+ query,
+ current,
+ refresh='never',
+ label='',
+ valuelabels={},
+ multi=false,
+ allValues=null,
+ includeAll=false,
+ hide='',
+ )::
+ {
+ // self has dynamic scope, so self may not be myself below.
+ // '$' can't be used neither as this object is not top-level object.
+ local custom = self,
+
+ allValue: allValues,
+ current: {
+ // Both 'all' and 'All' are accepted for consistency.
+ value: if includeAll && (current == 'All' || current == 'all') then
+ if multi then ['$__all'] else '$__all'
+ else
+ current,
+ text: if std.isArray(current) then
+ std.join(' + ', std.map(custom.valuelabel, current))
+ else
+ custom.valuelabel(current),
+ [if multi then 'selected']: true,
+ },
+ options: std.map(self.option, self.query_array(query)),
+ hide: $.hide(hide),
+ includeAll: includeAll,
+ label: label,
+ refresh: $.refresh(refresh),
+ multi: multi,
+ name: name,
+ query: query,
+ type: 'custom',
+
+ valuelabel(value):: if value in valuelabels then
+ valuelabels[value]
+ else value,
+
+ option(option):: {
+ text: custom.valuelabel(option),
+ value: if includeAll && option == 'All' then '$__all' else option,
+ [if multi then 'selected']: if multi && std.isArray(current) then
+ std.member(current, option)
+ else if multi then
+ current == option
+ else
+ null,
+ },
+ query_array(query):: std.split(
+ if includeAll then 'All,' + query else query, ','
+ ),
+ },
+ /**
+ * [Text box variables](https://grafana.com/docs/grafana/latest/variables/variable-types/add-text-box-variable/)
+ * display a free text input field with an optional default value.
+ * This is the most flexible variable, because you can enter any value.
+ * Use this type of variable if you have metrics with high cardinality or if you want to
+ * update multiple panels in a dashboard at the same time.
+ *
+ * @name template.text
+ *
+ * @param name Variable name.
+ * @param label (default `''`) Display name of the variable dropdown. If you don’t enter a display name, then the dropdown label will be the variable name.
+ *
+ * @return A text box variable.
+ */
+ text(
+ name,
+ label=''
+ )::
+ {
+ current: {
+ selected: false,
+ text: '',
+ value: '',
+ },
+ name: name,
+ label: label,
+ query: '',
+ type: 'textbox',
+ },
+ /**
+ * [Ad hoc filters](https://grafana.com/docs/grafana/latest/variables/variable-types/add-ad-hoc-filters/)
+ * allow you to add key/value filters that are automatically added to all metric queries
+ * that use the specified data source. Unlike other variables, you do not use ad hoc filters in queries.
+ * Instead, you use ad hoc filters to write filters for existing queries.
+ * Note: Ad hoc filter variables only work with InfluxDB, Prometheus, and Elasticsearch data sources.
+ *
+ * @name template.adhoc
+ *
+ * @param name Variable name.
+ * @param datasource Target data source
+ * @param label (optional) Display name of the variable dropdown. If you don’t enter a display name, then the dropdown label will be the variable name.
+ * @param hide (default `''`) `''`: the variable dropdown displays the variable Name or Label value. `'label'`: the variable dropdown only displays the selected variable value and a down arrow. Any other value: no variable dropdown is displayed on the dashboard.
+ *
+ * @return An ad hoc filter
+ */
+ adhoc(
+ name,
+ datasource,
+ label=null,
+ hide='',
+ )::
+ {
+ datasource: datasource,
+ hide: $.hide(hide),
+ label: label,
+ name: name,
+ type: 'adhoc',
+ },
+}
diff --git a/atmosphere/jsonnet/vendor/github.com/grafana/grafonnet-lib/grafonnet/text.libsonnet b/atmosphere/jsonnet/vendor/github.com/grafana/grafonnet-lib/grafonnet/text.libsonnet
new file mode 100644
index 0000000..18020a6
--- /dev/null
+++ b/atmosphere/jsonnet/vendor/github.com/grafana/grafonnet-lib/grafonnet/text.libsonnet
@@ -0,0 +1,43 @@
+{
+ /**
+ * Creates a [text panel](https://grafana.com/docs/grafana/latest/panels/visualizations/text-panel/).
+ *
+ * @name text.new
+ *
+ * @param title (default `''`) Panel title.
+ * @param description (optional) Panel description.
+ * @param datasource (optional) Panel datasource.
+ * @param span (optional)
+ * @param content (default `''`)
+ * @param mode (default `'markdown'`) Rendering of the content: 'markdown','html', ...
+ * @param transparent (optional) Whether to display the panel without a background.
+ * @param repeat (optional) Name of variable that should be used to repeat this panel.
+ * @param repeatDirection (default `'h'`) 'h' for horizontal or 'v' for vertical.
+ * @param repeatMaxPerRow (optional) Maximum panels per row in repeat mode.
+ */
+ new(
+ title='',
+ span=null,
+ mode='markdown',
+ content='',
+ transparent=null,
+ description=null,
+ datasource=null,
+ repeat=null,
+ repeatDirection=null,
+ repeatMaxPerRow=null,
+ )::
+ {
+ [if transparent != null then 'transparent']: transparent,
+ title: title,
+ [if span != null then 'span']: span,
+ type: 'text',
+ mode: mode,
+ content: content,
+ [if description != null then 'description']: description,
+ datasource: datasource,
+ [if repeat != null then 'repeat']: repeat,
+ [if repeat != null then 'repeatDirection']: repeatDirection,
+ [if repeat != null then 'maxPerRow']: repeatMaxPerRow,
+ },
+}
diff --git a/atmosphere/jsonnet/vendor/github.com/grafana/grafonnet-lib/grafonnet/timepicker.libsonnet b/atmosphere/jsonnet/vendor/github.com/grafana/grafonnet-lib/grafonnet/timepicker.libsonnet
new file mode 100644
index 0000000..9c18bef
--- /dev/null
+++ b/atmosphere/jsonnet/vendor/github.com/grafana/grafonnet-lib/grafonnet/timepicker.libsonnet
@@ -0,0 +1,40 @@
+{
+ /**
+ * Creates a Timepicker
+ *
+ * @name timepicker.new
+ *
+ * @param refresh_intervals (default: `['5s','10s','30s','1m','5m','15m','30m','1h','2h','1d']`) Array of time durations
+ * @param time_options (default: `['5m','15m','1h','6h','12h','24h','2d','7d','30d']`) Array of time durations
+ */
+ new(
+ refresh_intervals=[
+ '5s',
+ '10s',
+ '30s',
+ '1m',
+ '5m',
+ '15m',
+ '30m',
+ '1h',
+ '2h',
+ '1d',
+ ],
+ time_options=[
+ '5m',
+ '15m',
+ '1h',
+ '6h',
+ '12h',
+ '24h',
+ '2d',
+ '7d',
+ '30d',
+ ],
+ nowDelay=null,
+ ):: {
+ refresh_intervals: refresh_intervals,
+ time_options: time_options,
+ [if nowDelay != null then 'nowDelay']: nowDelay,
+ },
+}
diff --git a/atmosphere/jsonnet/vendor/github.com/grafana/grafonnet-lib/grafonnet/transformation.libsonnet b/atmosphere/jsonnet/vendor/github.com/grafana/grafonnet-lib/grafonnet/transformation.libsonnet
new file mode 100644
index 0000000..5e62ade
--- /dev/null
+++ b/atmosphere/jsonnet/vendor/github.com/grafana/grafonnet-lib/grafonnet/transformation.libsonnet
@@ -0,0 +1,12 @@
+{
+ /**
+ * @name transformation.new
+ */
+ new(
+ id='',
+ options={}
+ ):: {
+ id: id,
+ options: options,
+ },
+}
diff --git a/atmosphere/jsonnet/vendor/github.com/grafana/jsonnet-libs/grafana-builder/grafana.libsonnet b/atmosphere/jsonnet/vendor/github.com/grafana/jsonnet-libs/grafana-builder/grafana.libsonnet
new file mode 100644
index 0000000..b25be92
--- /dev/null
+++ b/atmosphere/jsonnet/vendor/github.com/grafana/jsonnet-libs/grafana-builder/grafana.libsonnet
@@ -0,0 +1,500 @@
+{
+ dashboard(title, uid='', datasource='default', datasource_regex=''):: {
+ // Stuff that isn't materialised.
+ _nextPanel:: 1,
+ addRow(row):: self {
+ // automatically number panels in added rows.
+ local n = std.length(row.panels),
+ local nextPanel = super._nextPanel,
+ local panels = std.makeArray(n, function(i)
+ row.panels[i] { id: nextPanel + i }),
+
+ _nextPanel: nextPanel + n,
+ rows+: [row { panels: panels }],
+ },
+
+ addTemplate(name, metric_name, label_name, hide=0, allValue=null, includeAll=false):: self {
+ templating+: {
+ list+: [{
+ allValue: allValue,
+ current: {
+ text: 'prod',
+ value: 'prod',
+ },
+ datasource: '$datasource',
+ hide: hide,
+ includeAll: includeAll,
+ label: name,
+ multi: false,
+ name: name,
+ options: [],
+ query: 'label_values(%s, %s)' % [metric_name, label_name],
+ refresh: 1,
+ regex: '',
+ sort: 2,
+ tagValuesQuery: '',
+ tags: [],
+ tagsQuery: '',
+ type: 'query',
+ useTags: false,
+ }],
+ },
+ },
+
+ addMultiTemplate(name, metric_name, label_name, hide=0, allValue='.+'):: self {
+ templating+: {
+ list+: [{
+ allValue: allValue,
+ current: {
+ selected: true,
+ text: 'All',
+ value: '$__all',
+ },
+ datasource: '$datasource',
+ hide: hide,
+ includeAll: true,
+ label: name,
+ multi: true,
+ name: name,
+ options: [],
+ query: 'label_values(%s, %s)' % [metric_name, label_name],
+ refresh: 1,
+ regex: '',
+ sort: 2,
+ tagValuesQuery: '',
+ tags: [],
+ tagsQuery: '',
+ type: 'query',
+ useTags: false,
+ }],
+ },
+ },
+
+ dashboardLinkUrl(title, url):: self {
+ links+: [
+ {
+ asDropdown: false,
+ icon: 'external link',
+ includeVars: true,
+ keepTime: true,
+ tags: [],
+ targetBlank: true,
+ title: title,
+ tooltip: '',
+ type: 'link',
+ url: url,
+ },
+ ],
+ },
+
+ // Stuff that is materialised.
+ uid: uid,
+ annotations: {
+ list: [],
+ },
+ hideControls: false,
+ links: [],
+ rows: [],
+ schemaVersion: 14,
+ style: 'dark',
+ tags: [],
+ editable: true,
+ gnetId: null,
+ graphTooltip: 0,
+ templating: {
+ list: [
+ {
+ current: {
+ text: datasource,
+ value: datasource,
+ },
+ hide: 0,
+ label: 'Data Source',
+ name: 'datasource',
+ options: [],
+ query: 'prometheus',
+ refresh: 1,
+ regex: datasource_regex,
+ type: 'datasource',
+ },
+ ],
+ },
+ time: {
+ from: 'now-1h',
+ to: 'now',
+ },
+ refresh: '10s',
+ timepicker: {
+ refresh_intervals: [
+ '5s',
+ '10s',
+ '30s',
+ '1m',
+ '5m',
+ '15m',
+ '30m',
+ '1h',
+ '2h',
+ '1d',
+ ],
+ time_options: [
+ '5m',
+ '15m',
+ '1h',
+ '6h',
+ '12h',
+ '24h',
+ '2d',
+ '7d',
+ '30d',
+ ],
+ },
+ timezone: 'utc',
+ title: title,
+ version: 0,
+ },
+
+ row(title):: {
+ _panels:: [],
+ addPanel(panel):: self {
+ _panels+: [panel],
+ },
+
+ panels:
+ // Automatically distribute panels within a row.
+ local n = std.length(self._panels);
+ [
+ p { span: std.floor(12 / n) }
+ for p in self._panels
+ ],
+
+ collapse: false,
+ height: '250px',
+ repeat: null,
+ repeatIteration: null,
+ repeatRowId: null,
+ showTitle: true,
+ title: title,
+ titleSize: 'h6',
+ },
+
+ // "graph" type, now deprecated.
+ panel(title):: {
+ aliasColors: {},
+ bars: false,
+ dashLength: 10,
+ dashes: false,
+ datasource: '$datasource',
+ fill: 1,
+ legend: {
+ avg: false,
+ current: false,
+ max: false,
+ min: false,
+ show: true,
+ total: false,
+ values: false,
+ },
+ lines: true,
+ linewidth: 1,
+ links: [],
+ nullPointMode: 'null as zero',
+ percentage: false,
+ pointradius: 5,
+ points: false,
+ renderer: 'flot',
+ seriesOverrides: [],
+ spaceLength: 10,
+ span: 6,
+ stack: false,
+ steppedLine: false,
+ targets: [],
+ thresholds: [],
+ timeFrom: null,
+ timeShift: null,
+ title: title,
+ tooltip: {
+ shared: true,
+ sort: 2,
+ value_type: 'individual',
+ },
+ type: 'graph',
+ xaxis: {
+ buckets: null,
+ mode: 'time',
+ name: null,
+ show: true,
+ values: [],
+ },
+ yaxes: $.yaxes('short'),
+ },
+
+ // "timeseries" panel, introduced with Grafana 7.4 and made standard in 8.0.
+ timeseriesPanel(title):: {
+ datasource: '$datasource',
+ fieldConfig: {
+ defaults: {
+ custom: {
+ drawStyle: 'line',
+ fillOpacity: 1,
+ lineWidth: 1,
+ pointSize: 5,
+ showPoints: 'never',
+ spanNulls: false,
+ stacking: {
+ group: 'A',
+ mode: 'none',
+ },
+ },
+ thresholds: {
+ mode: 'absolute',
+ steps: [],
+ },
+ unit: 's',
+ },
+ overrides: [],
+ },
+ options: {
+ legend: {
+ showLegend: true,
+ },
+ tooltip: {
+ mode: 'single',
+ sort: 'none',
+ },
+ },
+ links: [],
+ targets: [],
+ title: title,
+ type: 'timeseries',
+ },
+
+ queryPanel(queries, legends, legendLink=null):: {
+
+ local qs =
+ if std.type(queries) == 'string'
+ then [queries]
+ else queries,
+ local ls =
+ if std.type(legends) == 'string'
+ then [legends]
+ else legends,
+
+ local qsandls = if std.length(ls) == std.length(qs)
+ then std.makeArray(std.length(qs), function(x) { q: qs[x], l: ls[x] })
+ else error 'length of queries is not equal to length of legends',
+
+ targets+: [
+ {
+ legendLink: legendLink,
+ expr: ql.q,
+ format: 'time_series',
+ intervalFactor: 2,
+ legendFormat: ql.l,
+ step: 10,
+ }
+ for ql in qsandls
+ ],
+ },
+
+ statPanel(query, format='percentunit'):: {
+ type: 'singlestat',
+ thresholds: '70,80',
+ format: format,
+ targets: [
+ {
+ expr: query,
+ format: 'time_series',
+ instant: true,
+ intervalFactor: 2,
+ refId: 'A',
+ },
+ ],
+ },
+
+ tablePanel(queries, labelStyles):: {
+ local qs =
+ if std.type(queries) == 'string'
+ then [queries]
+ else queries,
+
+ local style(labelStyle) =
+ if std.type(labelStyle) == 'string'
+ then {
+ alias: labelStyle,
+ colorMode: null,
+ colors: [],
+ dateFormat: 'YYYY-MM-DD HH:mm:ss',
+ decimals: 2,
+ thresholds: [],
+ type: 'string',
+ unit: 'short',
+ }
+ else {
+ alias: labelStyle.alias,
+ colorMode: null,
+ colors: [],
+ dateFormat: 'YYYY-MM-DD HH:mm:ss',
+ decimals: if std.objectHas(labelStyle, 'decimals') then labelStyle.decimals else 2,
+ thresholds: [],
+ type: if std.objectHas(labelStyle, 'type') then labelStyle.type else 'number',
+ unit: if std.objectHas(labelStyle, 'unit') then labelStyle.unit else 'short',
+ link: std.objectHas(labelStyle, 'link'),
+ linkTargetBlank: if std.objectHas(labelStyle, 'linkTargetBlank') then labelStyle.linkTargetBlank else false,
+ linkTooltip: if std.objectHas(labelStyle, 'linkTooltip') then labelStyle.linkTooltip else 'Drill down',
+ linkUrl: if std.objectHas(labelStyle, 'link') then labelStyle.link else '',
+ },
+
+ _styles:: {
+ // By default hide time.
+ Time: {
+ alias: 'Time',
+ dateFormat: 'YYYY-MM-DD HH:mm:ss',
+ type: 'hidden',
+ },
+ } + {
+ [label]: style(labelStyles[label])
+ for label in std.objectFields(labelStyles)
+ },
+
+ styles: [
+ self._styles[pattern] { pattern: pattern }
+ for pattern in std.objectFields(self._styles)
+ ] + [style('') + { pattern: '/.*/' }],
+
+ transform: 'table',
+ type: 'table',
+ targets: [
+ {
+ expr: qs[i],
+ format: 'table',
+ instant: true,
+ intervalFactor: 2,
+ legendFormat: '',
+ step: 10,
+ refId: std.char(65 + i),
+ }
+ for i in std.range(0, std.length(qs) - 1)
+ ],
+ },
+
+ textPanel(title, markdown):: {
+ type: 'text',
+ title: title,
+ options: {
+ content: markdown,
+ mode: 'markdown',
+ },
+ transparent: true,
+ datasource: null,
+ timeFrom: null,
+ timeShift: null,
+ fieldConfig: {
+ defaults: {
+ custom: {},
+ },
+ overrides: [],
+ },
+ },
+
+ stack:: {
+ stack: true,
+ fill: 10,
+ linewidth: 0,
+ },
+
+ yaxes(args)::
+ local format = if std.type(args) == 'string' then args else null;
+ local options = if std.type(args) == 'object' then args else {};
+ [
+ {
+ format: format,
+ label: null,
+ logBase: 1,
+ max: null,
+ min: 0,
+ show: true,
+ } + options,
+ {
+ format: 'short',
+ label: null,
+ logBase: 1,
+ max: null,
+ min: null,
+ show: false,
+ },
+ ],
+
+ qpsPanel(selector, statusLabelName='status_code'):: {
+ aliasColors: {
+ '1xx': '#EAB839',
+ '2xx': '#7EB26D',
+ '3xx': '#6ED0E0',
+ '4xx': '#EF843C',
+ '5xx': '#E24D42',
+ success: '#7EB26D',
+ 'error': '#E24D42',
+ },
+ targets: [
+ {
+ expr:
+ |||
+ sum by (status) (
+ label_replace(label_replace(rate(%s[$__rate_interval]),
+ "status", "${1}xx", "%s", "([0-9]).."),
+ "status", "${1}", "%s", "([a-z]+)"))
+ ||| % [selector, statusLabelName, statusLabelName],
+ format: 'time_series',
+ intervalFactor: 2,
+ legendFormat: '{{status}}',
+ refId: 'A',
+ step: 10,
+ },
+ ],
+ } + $.stack,
+
+ latencyPanel(metricName, selector, multiplier='1e3'):: {
+ nullPointMode: 'null as zero',
+ targets: [
+ {
+ expr: 'histogram_quantile(0.99, sum(rate(%s_bucket%s[$__rate_interval])) by (le)) * %s' % [metricName, selector, multiplier],
+ format: 'time_series',
+ intervalFactor: 2,
+ legendFormat: '99th Percentile',
+ refId: 'A',
+ step: 10,
+ },
+ {
+ expr: 'histogram_quantile(0.50, sum(rate(%s_bucket%s[$__rate_interval])) by (le)) * %s' % [metricName, selector, multiplier],
+ format: 'time_series',
+ intervalFactor: 2,
+ legendFormat: '50th Percentile',
+ refId: 'B',
+ step: 10,
+ },
+ {
+ expr: 'sum(rate(%s_sum%s[$__rate_interval])) * %s / sum(rate(%s_count%s[$__rate_interval]))' % [metricName, selector, multiplier, metricName, selector],
+ format: 'time_series',
+ intervalFactor: 2,
+ legendFormat: 'Average',
+ refId: 'C',
+ step: 10,
+ },
+ ],
+ yaxes: $.yaxes('ms'),
+ },
+
+ selector:: {
+ eq(label, value):: { label: label, op: '=', value: value },
+ neq(label, value):: { label: label, op: '!=', value: value },
+ re(label, value):: { label: label, op: '=~', value: value },
+ nre(label, value):: { label: label, op: '!~', value: value },
+ },
+
+ toPrometheusSelector(selector)::
+ local pairs = [
+ '%(label)s%(op)s"%(value)s"' % matcher
+ for matcher in selector
+ ];
+ '{%s}' % std.join(', ', pairs),
+}
diff --git a/atmosphere/jsonnet/vendor/github.com/grafana/jsonnet-libs/memcached-mixin/README.md b/atmosphere/jsonnet/vendor/github.com/grafana/jsonnet-libs/memcached-mixin/README.md
new file mode 100644
index 0000000..8556caf
--- /dev/null
+++ b/atmosphere/jsonnet/vendor/github.com/grafana/jsonnet-libs/memcached-mixin/README.md
@@ -0,0 +1,28 @@
+# Memcached Monitoring Mixin
+
+<img align="right" width="200" height="136" src="dashboard.png">
+
+Grafana dashboard for operating Memcached, in the form
+of a monitoring mixin. They are easiest to use with the [prometheus-ksonnet](https://github.com/grafana/jsonnet-libs/tree/master/prometheus-ksonnet)
+package.
+
+To use this mixin, install [Tanka](https://tanka.dev/) and [Jsonnet Bundler](https://tanka.dev/install#jsonnet-bundler).
+
+Then you can install the mixin with:
+
+```
+jb install github.com/grafana/jsonnet-libs/memcached-mixin
+```
+
+To use, in your Tanka environment's `main.jsonnet` file:
+
+```js
+local prometheus = (import "prometheus-ksonnet/prometheus-ksonnet.libsonnet");
+local memcached_mixin = (import "memcached-mixin/mixin.libsonnet");
+
+prometheus + memcached_mixin {
+ _config+:: {
+ namespace: "default",
+ },
+}
+```
diff --git a/atmosphere/jsonnet/vendor/github.com/grafana/jsonnet-libs/memcached-mixin/alerts.libsonnet b/atmosphere/jsonnet/vendor/github.com/grafana/jsonnet-libs/memcached-mixin/alerts.libsonnet
new file mode 100644
index 0000000..a806d5d
--- /dev/null
+++ b/atmosphere/jsonnet/vendor/github.com/grafana/jsonnet-libs/memcached-mixin/alerts.libsonnet
@@ -0,0 +1,56 @@
+{
+ prometheusAlerts+: {
+ groups+: [
+ {
+ name: 'memcached',
+ rules: [
+ {
+ alert: 'MemcachedDown',
+ expr: |||
+ memcached_up == 0
+ |||,
+ 'for': '15m',
+ labels: {
+ severity: 'critical',
+ },
+ annotations: {
+ message: |||
+ Memcached Instance {{ $labels.job }} / {{ $labels.instance }} is down for more than 15mins.
+ |||,
+ },
+ },
+ {
+ alert: 'MemcachedConnectionLimitApproaching',
+ expr: |||
+ (memcached_current_connections / memcached_max_connections * 100) > 80
+ |||,
+ 'for': '15m',
+ labels: {
+ severity: 'warning',
+ },
+ annotations: {
+ message: |||
+ Memcached Instance {{ $labels.job }} / {{ $labels.instance }} connection usage is at {{ printf "%0.0f" $value }}% for at least 15m.
+ |||,
+ },
+ },
+ {
+ alert: 'MemcachedConnectionLimitApproaching',
+ expr: |||
+ (memcached_current_connections / memcached_max_connections * 100) > 95
+ |||,
+ 'for': '15m',
+ labels: {
+ severity: 'critical',
+ },
+ annotations: {
+ message: |||
+ Memcached Instance {{ $labels.job }} / {{ $labels.instance }} connection usage is at {{ printf "%0.0f" $value }}% for at least 15m.
+ |||,
+ },
+ },
+ ],
+ },
+ ],
+ },
+}
diff --git a/atmosphere/jsonnet/vendor/github.com/grafana/jsonnet-libs/memcached-mixin/dashboard.png b/atmosphere/jsonnet/vendor/github.com/grafana/jsonnet-libs/memcached-mixin/dashboard.png
new file mode 100644
index 0000000..445dab6
--- /dev/null
+++ b/atmosphere/jsonnet/vendor/github.com/grafana/jsonnet-libs/memcached-mixin/dashboard.png
Binary files differ
diff --git a/atmosphere/jsonnet/vendor/github.com/grafana/jsonnet-libs/memcached-mixin/dashboards.jsonnet b/atmosphere/jsonnet/vendor/github.com/grafana/jsonnet-libs/memcached-mixin/dashboards.jsonnet
new file mode 100644
index 0000000..74e1324
--- /dev/null
+++ b/atmosphere/jsonnet/vendor/github.com/grafana/jsonnet-libs/memcached-mixin/dashboards.jsonnet
@@ -0,0 +1,6 @@
+local mixin = import 'mixin.libsonnet';
+
+{
+ [name]: std.manifestJsonEx(mixin.grafanaDashboards[name], ' ')
+ for name in std.objectFields(mixin.grafanaDashboards)
+}
diff --git a/atmosphere/jsonnet/vendor/github.com/grafana/jsonnet-libs/memcached-mixin/dashboards.libsonnet b/atmosphere/jsonnet/vendor/github.com/grafana/jsonnet-libs/memcached-mixin/dashboards.libsonnet
new file mode 100644
index 0000000..942f6c5
--- /dev/null
+++ b/atmosphere/jsonnet/vendor/github.com/grafana/jsonnet-libs/memcached-mixin/dashboards.libsonnet
@@ -0,0 +1,110 @@
+local g = (import 'grafana-builder/grafana.libsonnet');
+
+{
+ grafanaDashboards+: {
+ 'memcached-overview.json':
+ (
+ g.dashboard('Memcached Overview') +
+ { uid: '124d5222454213f748dbfaf69b77ec48' }
+ )
+ .addMultiTemplate('cluster', 'memcached_commands_total', 'cluster')
+ .addMultiTemplate('job', 'memcached_commands_total{cluster=~"$cluster"}', 'job')
+ .addMultiTemplate('instance', 'memcached_commands_total{cluster=~"$cluster",job=~"$job"}', 'instance')
+ .addRow(
+ g.row('Hits')
+ .addPanel(
+ g.panel('Hit Rate') +
+ g.queryPanel('sum(rate(memcached_commands_total{cluster=~"$cluster", job=~"$job", instance=~"$instance", command="get", status="hit"}[$__rate_interval])) / sum(rate(memcached_commands_total{cluster=~"$cluster", job=~"$job", command="get"}[$__rate_interval]))', 'Hit Rate') +
+ { yaxes: g.yaxes('percentunit') },
+ )
+ .addPanel(
+ g.panel('Top 20 Highest Connection Usage') +
+ g.queryPanel(|||
+ topk(20,
+ max by (cluster, job, instance) (
+ memcached_current_connections{cluster=~"$cluster", job=~"$job", instance=~"$instance"} / memcached_max_connections{cluster=~"$cluster", job=~"$job", instance=~"$instance"}
+ ))
+ |||, '{{cluster }} / {{ job }} / {{ instance }}') +
+ { yaxes: g.yaxes('percentunit') },
+ )
+ )
+ .addRow(
+ g.row('Ops')
+ .addPanel(
+ g.panel('Commands') +
+ g.queryPanel('sum by(command, status) (rate(memcached_commands_total{cluster=~"$cluster", job=~"$job", instance=~"$instance"}[$__rate_interval]))', '{{command}} {{status}}')
+ )
+ .addPanel(
+ g.panel('Evictions') +
+ g.queryPanel('sum by(instance) (rate(memcached_items_evicted_total{cluster=~"$cluster", job=~"$job", instance=~"$instance"}[$__rate_interval]))', '{{instance}}')
+ )
+ .addPanel(
+ g.panel('Stored') +
+ g.queryPanel('sum by(instance) (rate(memcached_items_total{cluster=~"$cluster", job=~"$job", instance=~"$instance"}[$__rate_interval]))', '{{instance}}')
+ )
+ )
+ .addRow(
+ g.row('Memory')
+ .addPanel(
+ g.panel('Memory') +
+ g.queryPanel('sum by(instance) (memcached_current_bytes{cluster=~"$cluster", job=~"$job", instance=~"$instance"})', '{{instance}}') +
+ g.stack +
+ { yaxes: g.yaxes('bytes') },
+ // TODO add memcached_limit_bytes
+ )
+ .addPanel(
+ g.panel('Items') +
+ g.queryPanel('sum by(instance) (memcached_current_items{cluster=~"$cluster", job=~"$job", instance=~"$instance"})', '{{instance}}') +
+ g.stack,
+ )
+ )
+ .addRow(
+ g.row('Network')
+ .addPanel(
+ g.panel('Current Connections') +
+ g.queryPanel([
+ 'sum by(instance) (memcached_current_connections{cluster=~"$cluster", job=~"$job", instance=~"$instance"})',
+ // Be conservative showing the lowest setting for max connections among all selected instances.
+ 'min(memcached_max_connections{cluster=~"$cluster", job=~"$job", instance=~"$instance"})',
+ ], [
+ '{{instance}}',
+ 'Max Connections (min setting across all instances)',
+ ])
+ )
+ .addPanel(
+ g.panel('Connections / sec') +
+ g.queryPanel([
+ 'sum by(instance) (rate(memcached_connections_total{cluster=~"$cluster", job=~"$job", instance=~"$instance"}[$__rate_interval]))',
+ ], [
+ '{{instance}}',
+ ])
+ )
+ .addPanel(
+ g.panel('Reads') +
+ g.queryPanel('sum by(instance) (rate(memcached_read_bytes_total{cluster=~"$cluster", job=~"$job", instance=~"$instance"}[$__rate_interval]))', '{{instance}}') +
+ { yaxes: g.yaxes('bps') },
+ )
+ .addPanel(
+ g.panel('Writes') +
+ g.queryPanel('sum by(instance) (rate(memcached_written_bytes_total{cluster=~"$cluster", job=~"$job", instance=~"$instance"}[$__rate_interval]))', '{{instance}}') +
+ { yaxes: g.yaxes('bps') },
+ )
+ )
+ .addRow(
+ g.row('Memcached Info')
+ .addPanel(
+ g.panel('Memcached Info') +
+ g.tablePanel([
+ 'count by (job, instance, version) (memcached_version{cluster=~"$cluster", job=~"$job", instance=~"$instance"})',
+ 'max by (job, instance) (memcached_uptime_seconds{cluster=~"$cluster", job=~"$job", instance=~"$instance"})',
+ ], {
+ job: { alias: 'Job' },
+ instance: { alias: 'Instance' },
+ version: { alias: 'Version' },
+ 'Value #A': { alias: 'Count', type: 'hidden' },
+ 'Value #B': { alias: 'Uptime', type: 'number', unit: 'dtdurations' },
+ })
+ )
+ ),
+ },
+}
diff --git a/atmosphere/jsonnet/vendor/github.com/grafana/jsonnet-libs/memcached-mixin/jsonnetfile.json b/atmosphere/jsonnet/vendor/github.com/grafana/jsonnet-libs/memcached-mixin/jsonnetfile.json
new file mode 100644
index 0000000..39c0778
--- /dev/null
+++ b/atmosphere/jsonnet/vendor/github.com/grafana/jsonnet-libs/memcached-mixin/jsonnetfile.json
@@ -0,0 +1,15 @@
+{
+ "version": 1,
+ "dependencies": [
+ {
+ "source": {
+ "git": {
+ "remote": "https://github.com/grafana/jsonnet-libs.git",
+ "subdir": "grafana-builder"
+ }
+ },
+ "version": "master"
+ }
+ ],
+ "legacyImports": true
+}
diff --git a/atmosphere/jsonnet/vendor/github.com/grafana/jsonnet-libs/memcached-mixin/mixin.libsonnet b/atmosphere/jsonnet/vendor/github.com/grafana/jsonnet-libs/memcached-mixin/mixin.libsonnet
new file mode 100644
index 0000000..5196d4f
--- /dev/null
+++ b/atmosphere/jsonnet/vendor/github.com/grafana/jsonnet-libs/memcached-mixin/mixin.libsonnet
@@ -0,0 +1,5 @@
+(import 'dashboards.libsonnet') +
+(import 'alerts.libsonnet') +
+{
+ grafanaDashboardFolder: 'Memcached',
+}
diff --git a/atmosphere/jsonnet/vendor/github.com/povilasv/coredns-mixin/.circleci/config.yml b/atmosphere/jsonnet/vendor/github.com/povilasv/coredns-mixin/.circleci/config.yml
new file mode 100644
index 0000000..abe6d4d
--- /dev/null
+++ b/atmosphere/jsonnet/vendor/github.com/povilasv/coredns-mixin/.circleci/config.yml
@@ -0,0 +1,11 @@
+version: 2
+jobs:
+ build:
+ docker:
+ - image: quay.io/coreos/jsonnet-ci:release-0.38
+
+ working_directory: /go/src/github.com/povilasv/coredns-mixin
+ steps:
+ - checkout
+ - run: jb install
+ - run: make
diff --git a/atmosphere/jsonnet/vendor/github.com/povilasv/coredns-mixin/.gitignore b/atmosphere/jsonnet/vendor/github.com/povilasv/coredns-mixin/.gitignore
new file mode 100644
index 0000000..940474b
--- /dev/null
+++ b/atmosphere/jsonnet/vendor/github.com/povilasv/coredns-mixin/.gitignore
@@ -0,0 +1,4 @@
+prometheus_alerts.yaml
+vendor
+jsonnetfile.lock.json
+dashboards_out
diff --git a/atmosphere/jsonnet/vendor/github.com/povilasv/coredns-mixin/LICENSE b/atmosphere/jsonnet/vendor/github.com/povilasv/coredns-mixin/LICENSE
new file mode 100644
index 0000000..261eeb9
--- /dev/null
+++ b/atmosphere/jsonnet/vendor/github.com/povilasv/coredns-mixin/LICENSE
@@ -0,0 +1,201 @@
+ Apache License
+ Version 2.0, January 2004
+ http://www.apache.org/licenses/
+
+ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+ 1. Definitions.
+
+ "License" shall mean the terms and conditions for use, reproduction,
+ and distribution as defined by Sections 1 through 9 of this document.
+
+ "Licensor" shall mean the copyright owner or entity authorized by
+ the copyright owner that is granting the License.
+
+ "Legal Entity" shall mean the union of the acting entity and all
+ other entities that control, are controlled by, or are under common
+ control with that entity. For the purposes of this definition,
+ "control" means (i) the power, direct or indirect, to cause the
+ direction or management of such entity, whether by contract or
+ otherwise, or (ii) ownership of fifty percent (50%) or more of the
+ outstanding shares, or (iii) beneficial ownership of such entity.
+
+ "You" (or "Your") shall mean an individual or Legal Entity
+ exercising permissions granted by this License.
+
+ "Source" form shall mean the preferred form for making modifications,
+ including but not limited to software source code, documentation
+ source, and configuration files.
+
+ "Object" form shall mean any form resulting from mechanical
+ transformation or translation of a Source form, including but
+ not limited to compiled object code, generated documentation,
+ and conversions to other media types.
+
+ "Work" shall mean the work of authorship, whether in Source or
+ Object form, made available under the License, as indicated by a
+ copyright notice that is included in or attached to the work
+ (an example is provided in the Appendix below).
+
+ "Derivative Works" shall mean any work, whether in Source or Object
+ form, that is based on (or derived from) the Work and for which the
+ editorial revisions, annotations, elaborations, or other modifications
+ represent, as a whole, an original work of authorship. For the purposes
+ of this License, Derivative Works shall not include works that remain
+ separable from, or merely link (or bind by name) to the interfaces of,
+ the Work and Derivative Works thereof.
+
+ "Contribution" shall mean any work of authorship, including
+ the original version of the Work and any modifications or additions
+ to that Work or Derivative Works thereof, that is intentionally
+ submitted to Licensor for inclusion in the Work by the copyright owner
+ or by an individual or Legal Entity authorized to submit on behalf of
+ the copyright owner. For the purposes of this definition, "submitted"
+ means any form of electronic, verbal, or written communication sent
+ to the Licensor or its representatives, including but not limited to
+ communication on electronic mailing lists, source code control systems,
+ and issue tracking systems that are managed by, or on behalf of, the
+ Licensor for the purpose of discussing and improving the Work, but
+ excluding communication that is conspicuously marked or otherwise
+ designated in writing by the copyright owner as "Not a Contribution."
+
+ "Contributor" shall mean Licensor and any individual or Legal Entity
+ on behalf of whom a Contribution has been received by Licensor and
+ subsequently incorporated within the Work.
+
+ 2. Grant of Copyright License. Subject to the terms and conditions of
+ this License, each Contributor hereby grants to You a perpetual,
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+ copyright license to reproduce, prepare Derivative Works of,
+ publicly display, publicly perform, sublicense, and distribute the
+ Work and such Derivative Works in Source or Object form.
+
+ 3. Grant of Patent License. Subject to the terms and conditions of
+ this License, each Contributor hereby grants to You a perpetual,
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+ (except as stated in this section) patent license to make, have made,
+ use, offer to sell, sell, import, and otherwise transfer the Work,
+ where such license applies only to those patent claims licensable
+ by such Contributor that are necessarily infringed by their
+ Contribution(s) alone or by combination of their Contribution(s)
+ with the Work to which such Contribution(s) was submitted. If You
+ institute patent litigation against any entity (including a
+ cross-claim or counterclaim in a lawsuit) alleging that the Work
+ or a Contribution incorporated within the Work constitutes direct
+ or contributory patent infringement, then any patent licenses
+ granted to You under this License for that Work shall terminate
+ as of the date such litigation is filed.
+
+ 4. Redistribution. You may reproduce and distribute copies of the
+ Work or Derivative Works thereof in any medium, with or without
+ modifications, and in Source or Object form, provided that You
+ meet the following conditions:
+
+ (a) You must give any other recipients of the Work or
+ Derivative Works a copy of this License; and
+
+ (b) You must cause any modified files to carry prominent notices
+ stating that You changed the files; and
+
+ (c) You must retain, in the Source form of any Derivative Works
+ that You distribute, all copyright, patent, trademark, and
+ attribution notices from the Source form of the Work,
+ excluding those notices that do not pertain to any part of
+ the Derivative Works; and
+
+ (d) If the Work includes a "NOTICE" text file as part of its
+ distribution, then any Derivative Works that You distribute must
+ include a readable copy of the attribution notices contained
+ within such NOTICE file, excluding those notices that do not
+ pertain to any part of the Derivative Works, in at least one
+ of the following places: within a NOTICE text file distributed
+ as part of the Derivative Works; within the Source form or
+ documentation, if provided along with the Derivative Works; or,
+ within a display generated by the Derivative Works, if and
+ wherever such third-party notices normally appear. The contents
+ of the NOTICE file are for informational purposes only and
+ do not modify the License. You may add Your own attribution
+ notices within Derivative Works that You distribute, alongside
+ or as an addendum to the NOTICE text from the Work, provided
+ that such additional attribution notices cannot be construed
+ as modifying the License.
+
+ You may add Your own copyright statement to Your modifications and
+ may provide additional or different license terms and conditions
+ for use, reproduction, or distribution of Your modifications, or
+ for any such Derivative Works as a whole, provided Your use,
+ reproduction, and distribution of the Work otherwise complies with
+ the conditions stated in this License.
+
+ 5. Submission of Contributions. Unless You explicitly state otherwise,
+ any Contribution intentionally submitted for inclusion in the Work
+ by You to the Licensor shall be under the terms and conditions of
+ this License, without any additional terms or conditions.
+ Notwithstanding the above, nothing herein shall supersede or modify
+ the terms of any separate license agreement you may have executed
+ with Licensor regarding such Contributions.
+
+ 6. Trademarks. This License does not grant permission to use the trade
+ names, trademarks, service marks, or product names of the Licensor,
+ except as required for reasonable and customary use in describing the
+ origin of the Work and reproducing the content of the NOTICE file.
+
+ 7. Disclaimer of Warranty. Unless required by applicable law or
+ agreed to in writing, Licensor provides the Work (and each
+ Contributor provides its Contributions) on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+ implied, including, without limitation, any warranties or conditions
+ of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+ PARTICULAR PURPOSE. You are solely responsible for determining the
+ appropriateness of using or redistributing the Work and assume any
+ risks associated with Your exercise of permissions under this License.
+
+ 8. Limitation of Liability. In no event and under no legal theory,
+ whether in tort (including negligence), contract, or otherwise,
+ unless required by applicable law (such as deliberate and grossly
+ negligent acts) or agreed to in writing, shall any Contributor be
+ liable to You for damages, including any direct, indirect, special,
+ incidental, or consequential damages of any character arising as a
+ result of this License or out of the use or inability to use the
+ Work (including but not limited to damages for loss of goodwill,
+ work stoppage, computer failure or malfunction, or any and all
+ other commercial damages or losses), even if such Contributor
+ has been advised of the possibility of such damages.
+
+ 9. Accepting Warranty or Additional Liability. While redistributing
+ the Work or Derivative Works thereof, You may choose to offer,
+ and charge a fee for, acceptance of support, warranty, indemnity,
+ or other liability obligations and/or rights consistent with this
+ License. However, in accepting such obligations, You may act only
+ on Your own behalf and on Your sole responsibility, not on behalf
+ of any other Contributor, and only if You agree to indemnify,
+ defend, and hold each Contributor harmless for any liability
+ incurred by, or claims asserted against, such Contributor by reason
+ of your accepting any such warranty or additional liability.
+
+ END OF TERMS AND CONDITIONS
+
+ APPENDIX: How to apply the Apache License to your work.
+
+ To apply the Apache License to your work, attach the following
+ boilerplate notice, with the fields enclosed by brackets "[]"
+ replaced with your own identifying information. (Don't include
+ the brackets!) The text should be enclosed in the appropriate
+ comment syntax for the file format. We also recommend that a
+ file or class name and description of purpose be included on the
+ same "printed page" as the copyright notice for easier
+ identification within third-party archives.
+
+ Copyright [yyyy] [name of copyright owner]
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
diff --git a/atmosphere/jsonnet/vendor/github.com/povilasv/coredns-mixin/Makefile b/atmosphere/jsonnet/vendor/github.com/povilasv/coredns-mixin/Makefile
new file mode 100644
index 0000000..80a8175
--- /dev/null
+++ b/atmosphere/jsonnet/vendor/github.com/povilasv/coredns-mixin/Makefile
@@ -0,0 +1,35 @@
+JSONNET_ARGS := -n 2 --max-blank-lines 2 --string-style s --comment-style s
+ifneq (,$(shell which jsonnetfmt))
+ JSONNET_FMT_CMD := jsonnetfmt
+else
+ JSONNET_FMT_CMD := jsonnet
+ JSONNET_FMT_ARGS := fmt $(JSONNET_ARGS)
+endif
+JSONNET_FMT := $(JSONNET_FMT_CMD) $(JSONNET_FMT_ARGS)
+
+all: fmt prometheus_alerts.yaml dashboards_out lint test
+
+fmt:
+ find . -name 'vendor' -prune -o -name '*.libsonnet' -print -o -name '*.jsonnet' -print | \
+ xargs -n 1 -- $(JSONNET_FMT) -i
+
+prometheus_alerts.yaml: mixin.libsonnet lib/alerts.jsonnet alerts/*.libsonnet
+ jsonnet -J vendor -S lib/alerts.jsonnet > $@
+
+dashboards_out: mixin.libsonnet lib/dashboards.jsonnet dashboards/*.libsonnet
+ @mkdir -p dashboards_out
+ jsonnet -J vendor -m dashboards_out lib/dashboards.jsonnet
+
+lint: prometheus_alerts.yaml
+ find . -name 'vendor' -prune -o -name '*.libsonnet' -print -o -name '*.jsonnet' -print | \
+ while read f; do \
+ $(JSONNET_FMT) "$$f" | diff -u "$$f" -; \
+ done
+
+ promtool check rules prometheus_alerts.yaml
+
+clean:
+ rm -rf dashboards_out prometheus_alerts.yaml
+
+test: prometheus_alerts.yaml
+ promtool test rules tests.yaml
diff --git a/atmosphere/jsonnet/vendor/github.com/povilasv/coredns-mixin/README.md b/atmosphere/jsonnet/vendor/github.com/povilasv/coredns-mixin/README.md
new file mode 100644
index 0000000..a1796b3
--- /dev/null
+++ b/atmosphere/jsonnet/vendor/github.com/povilasv/coredns-mixin/README.md
@@ -0,0 +1,44 @@
+# Prometheus Monitoring Mixin for CoreDNS
+[](https://circleci.com/gh/povilasv/coredns-mixin)
+
+A set of Grafana dashboards & Prometheus alerts for CoreDNS.
+
+## How to use
+
+This mixin is designed to be vendored into the repo with your infrastructure config.
+To do this, use [jsonnet-bundler](https://github.com/jsonnet-bundler/jsonnet-bundler):
+
+## Generate config files
+
+You can manually generate the alerts, dashboards and rules files, but first you
+must install some tools:
+
+```
+$ go get github.com/jsonnet-bundler/jsonnet-bundler/cmd/jb
+$ brew install jsonnet
+```
+
+Then, grab the mixin and its dependencies:
+
+```
+$ git clone https://github.com/povilasv/coredns-mixin
+$ cd coredns-mixin
+$ jb install
+```
+
+Finally, build the mixin:
+
+```
+$ make prometheus_alerts.yaml
+$ make dashboards_out
+```
+
+The `prometheus_alerts.yaml` file then need to passed
+to your Prometheus server, and the files in `dashboards_out` need to be imported
+into you Grafana server. The exact details will depending on how you deploy your
+monitoring stack.
+
+## Background
+
+* For more information about monitoring mixins, see this [design doc](https://docs.google.com/document/d/1A9xvzwqnFVSOZ5fD3blKODXfsat5fg6ZhnKu9LK3lB4/edit#).
+* CoreDNS Prometheus metrics plugin [docs](https://github.com/coredns/coredns/tree/master/plugin/metrics)
diff --git a/atmosphere/jsonnet/vendor/github.com/povilasv/coredns-mixin/alerts/add-runbook-links.libsonnet b/atmosphere/jsonnet/vendor/github.com/povilasv/coredns-mixin/alerts/add-runbook-links.libsonnet
new file mode 100644
index 0000000..fd20972
--- /dev/null
+++ b/atmosphere/jsonnet/vendor/github.com/povilasv/coredns-mixin/alerts/add-runbook-links.libsonnet
@@ -0,0 +1,23 @@
+local utils = import '../lib/utils.libsonnet';
+
+local lower(x) =
+ local cp(c) = std.codepoint(c);
+ local lowerLetter(c) =
+ if cp(c) >= 65 && cp(c) < 91
+ then std.char(cp(c) + 32)
+ else c;
+ std.join('', std.map(lowerLetter, std.stringChars(x)));
+
+{
+ _config+:: {
+ corednsRunbookURLPattern: 'https://github.com/povilasv/coredns-mixin/tree/master/runbook.md#alert-name-%s',
+ },
+
+ prometheusAlerts+::
+ local addRunbookURL(rule, group) = rule {
+ [if 'alert' in rule && std.member(['coredns', 'coredns_forward'], group.name) then 'annotations']+: {
+ runbook_url: $._config.corednsRunbookURLPattern % lower(rule.alert),
+ },
+ };
+ utils.mapRuleGroups(addRunbookURL),
+}
diff --git a/atmosphere/jsonnet/vendor/github.com/povilasv/coredns-mixin/alerts/alerts.libsonnet b/atmosphere/jsonnet/vendor/github.com/povilasv/coredns-mixin/alerts/alerts.libsonnet
new file mode 100644
index 0000000..c165dde
--- /dev/null
+++ b/atmosphere/jsonnet/vendor/github.com/povilasv/coredns-mixin/alerts/alerts.libsonnet
@@ -0,0 +1,3 @@
+(import 'coredns.libsonnet') +
+(import 'forward.libsonnet') +
+(import 'add-runbook-links.libsonnet')
diff --git a/atmosphere/jsonnet/vendor/github.com/povilasv/coredns-mixin/alerts/coredns.libsonnet b/atmosphere/jsonnet/vendor/github.com/povilasv/coredns-mixin/alerts/coredns.libsonnet
new file mode 100644
index 0000000..80c3e69
--- /dev/null
+++ b/atmosphere/jsonnet/vendor/github.com/povilasv/coredns-mixin/alerts/coredns.libsonnet
@@ -0,0 +1,71 @@
+{
+ _config+:: {
+ corednsSelector: error 'must provide selector for coredns',
+ corednsLatencyCriticalSeconds: 4,
+ },
+ prometheusAlerts+:: {
+ groups+: [
+ {
+ name: 'coredns',
+ rules: [
+ {
+ alert: 'CoreDNSDown',
+ 'for': '15m',
+ expr: |||
+ absent(up{%(corednsSelector)s} == 1)
+ ||| % $._config,
+ labels: {
+ severity: 'critical',
+ },
+ annotations: {
+ message: 'CoreDNS has disappeared from Prometheus target discovery.',
+ },
+ },
+ {
+ alert: 'CoreDNSLatencyHigh',
+ expr: |||
+ histogram_quantile(0.99, sum(rate(coredns_dns_request_duration_seconds_bucket{%(corednsSelector)s}[5m])) by(server, zone, le)) > %(corednsLatencyCriticalSeconds)s
+ ||| % $._config,
+ 'for': '10m',
+ labels: {
+ severity: 'critical',
+ },
+ annotations: {
+ message: 'CoreDNS has 99th percentile latency of {{ $value }} seconds for server {{ $labels.server }} zone {{ $labels.zone }} .',
+ },
+ },
+ {
+ alert: 'CoreDNSErrorsHigh',
+ expr: |||
+ sum(rate(coredns_dns_responses_total{%(corednsSelector)s,rcode="SERVFAIL"}[5m]))
+ /
+ sum(rate(coredns_dns_responses_total{%(corednsSelector)s}[5m])) > 0.03
+ ||| % $._config,
+ 'for': '10m',
+ labels: {
+ severity: 'critical',
+ },
+ annotations: {
+ message: 'CoreDNS is returning SERVFAIL for {{ $value | humanizePercentage }} of requests.',
+ },
+ },
+ {
+ alert: 'CoreDNSErrorsHigh',
+ expr: |||
+ sum(rate(coredns_dns_responses_total{%(corednsSelector)s,rcode="SERVFAIL"}[5m]))
+ /
+ sum(rate(coredns_dns_responses_total{%(corednsSelector)s}[5m])) > 0.01
+ ||| % $._config,
+ 'for': '10m',
+ labels: {
+ severity: 'warning',
+ },
+ annotations: {
+ message: 'CoreDNS is returning SERVFAIL for {{ $value | humanizePercentage }} of requests.',
+ },
+ },
+ ],
+ },
+ ],
+ },
+}
diff --git a/atmosphere/jsonnet/vendor/github.com/povilasv/coredns-mixin/alerts/forward.libsonnet b/atmosphere/jsonnet/vendor/github.com/povilasv/coredns-mixin/alerts/forward.libsonnet
new file mode 100644
index 0000000..05459db
--- /dev/null
+++ b/atmosphere/jsonnet/vendor/github.com/povilasv/coredns-mixin/alerts/forward.libsonnet
@@ -0,0 +1,84 @@
+{
+ _config+:: {
+ corednsSelector: error 'must provide selector for coredns',
+ corednsForwardLatencyCriticalSeconds: 4,
+ },
+ prometheusAlerts+:: {
+ groups+: [
+ {
+ name: 'coredns_forward',
+ rules: [
+ {
+ alert: 'CoreDNSForwardLatencyHigh',
+ expr: |||
+ histogram_quantile(0.99, sum(rate(coredns_forward_request_duration_seconds_bucket{%(corednsSelector)s}[5m])) by(to, le)) > %(corednsForwardLatencyCriticalSeconds)s
+ ||| % $._config,
+ 'for': '10m',
+ labels: {
+ severity: 'critical',
+ },
+ annotations: {
+ message: 'CoreDNS has 99th percentile latency of {{ $value }} seconds forwarding requests to {{ $labels.to }}.',
+ },
+ },
+ {
+ alert: 'CoreDNSForwardErrorsHigh',
+ expr: |||
+ sum(rate(coredns_forward_responses_total{%(corednsSelector)s,rcode="SERVFAIL"}[5m]))
+ /
+ sum(rate(coredns_forward_responses_total{%(corednsSelector)s}[5m])) > 0.03
+ ||| % $._config,
+ 'for': '10m',
+ labels: {
+ severity: 'critical',
+ },
+ annotations: {
+ message: 'CoreDNS is returning SERVFAIL for {{ $value | humanizePercentage }} of forward requests to {{ $labels.to }}.',
+ },
+ },
+ {
+ alert: 'CoreDNSForwardErrorsHigh',
+ expr: |||
+ sum(rate(coredns_forward_responses_total{%(corednsSelector)s,rcode="SERVFAIL"}[5m]))
+ /
+ sum(rate(coredns_forward_responses_total{%(corednsSelector)s}[5m])) > 0.01
+ ||| % $._config,
+ 'for': '10m',
+ labels: {
+ severity: 'warning',
+ },
+ annotations: {
+ message: 'CoreDNS is returning SERVFAIL for {{ $value | humanizePercentage }} of forward requests to {{ $labels.to }}.',
+ },
+ },
+ {
+ alert: 'CoreDNSForwardHealthcheckFailureCount',
+ expr: |||
+ sum(rate(coredns_forward_healthcheck_failures_total{%(corednsSelector)s}[5m])) by (to) > 0
+ ||| % $._config,
+ 'for': '10m',
+ labels: {
+ severity: 'warning',
+ },
+ annotations: {
+ message: 'CoreDNS health checks have failed to upstream server {{ $labels.to }}.',
+ },
+ },
+ {
+ alert: 'CoreDNSForwardHealthcheckBrokenCount',
+ expr: |||
+ sum(rate(coredns_forward_healthcheck_broken_total{%(corednsSelector)s}[5m])) > 0
+ ||| % $._config,
+ 'for': '10m',
+ labels: {
+ severity: 'warning',
+ },
+ annotations: {
+ message: 'CoreDNS health checks have failed for all upstream servers.',
+ },
+ },
+ ],
+ },
+ ],
+ },
+}
diff --git a/atmosphere/jsonnet/vendor/github.com/povilasv/coredns-mixin/config.libsonnet b/atmosphere/jsonnet/vendor/github.com/povilasv/coredns-mixin/config.libsonnet
new file mode 100644
index 0000000..5a03a50
--- /dev/null
+++ b/atmosphere/jsonnet/vendor/github.com/povilasv/coredns-mixin/config.libsonnet
@@ -0,0 +1,20 @@
+{
+ _config+:: {
+ corednsSelector: 'job="kube-dns"',
+ instanceLabel: 'pod',
+
+ grafanaDashboardIDs: {
+ 'coredns.json': 'thael1rie7ohG6OY3eMeisahtee2iGoo1gooGhuu',
+ },
+
+ pluginNameLabel: 'name',
+ kubernetesPlugin: false,
+ grafana: {
+ dashboardNamePrefix: '',
+ dashboardTags: ['coredns-mixin'],
+
+ // The default refresh time for all dashboards, default to 10s
+ refresh: '10s',
+ },
+ },
+}
diff --git a/atmosphere/jsonnet/vendor/github.com/povilasv/coredns-mixin/dashboards/coredns.libsonnet b/atmosphere/jsonnet/vendor/github.com/povilasv/coredns-mixin/dashboards/coredns.libsonnet
new file mode 100644
index 0000000..4a4a84f
--- /dev/null
+++ b/atmosphere/jsonnet/vendor/github.com/povilasv/coredns-mixin/dashboards/coredns.libsonnet
@@ -0,0 +1,251 @@
+local grafana = import 'github.com/grafana/grafonnet-lib/grafonnet/grafana.libsonnet';
+local dashboard = grafana.dashboard;
+local row = grafana.row;
+local prometheus = grafana.prometheus;
+local template = grafana.template;
+local graphPanel = grafana.graphPanel;
+local singlestat = grafana.singlestat;
+
+{
+ _config+:: {
+ corednsSelector: 'job="kube-dns"',
+ },
+
+ grafanaDashboards+:: {
+ 'coredns.json':
+ local upCount =
+ singlestat.new(
+ 'Up',
+ datasource='$datasource',
+ span=1,
+ valueName='min',
+ )
+ .addTarget(prometheus.target('sum(up{%(corednsSelector)s})' % $._config));
+
+ local panicsCount =
+ singlestat.new(
+ 'Panics',
+ datasource='$datasource',
+ span=1,
+ valueName='max',
+ )
+ .addTarget(prometheus.target('sum(coredns_panics_total{%(corednsSelector)s})' % $._config));
+
+ local rpcRate =
+ graphPanel.new(
+ 'RPC Rate',
+ datasource='$datasource',
+ span=5,
+ format='ops',
+ min=0,
+ )
+ .addTarget(prometheus.target('sum(rate(coredns_dns_responses_total{%(corednsSelector)s,%(instanceLabel)s=~"$instance"}[5m])) by (rcode)' % $._config, legendFormat='{{rcode}}'))
+ .addTarget(prometheus.target('sum(rate(coredns_forward_responses_total{%(corednsSelector)s,%(instanceLabel)s=~"$instance"}[5m])) by (rcode)' % $._config, legendFormat='forward {{rcode}}'));
+
+ local requestDuration =
+ graphPanel.new(
+ 'Request duration 99th quantile',
+ datasource='$datasource',
+ span=5,
+ format='s',
+ legend_show=true,
+ legend_values=true,
+ legend_current=true,
+ legend_alignAsTable=true,
+ legend_rightSide=true,
+ min=0,
+ )
+ .addTarget(prometheus.target('histogram_quantile(0.99, sum(rate(coredns_dns_request_duration_seconds_bucket{%(corednsSelector)s,%(instanceLabel)s=~"$instance"}[5m])) by (server, zone, le))' % $._config, legendFormat='{{server}} {{zone}}'))
+ .addTarget(prometheus.target('histogram_quantile(0.99, sum(rate(coredns_forward_request_duration_seconds_bucket{%(corednsSelector)s,%(instanceLabel)s=~"$instance"}[5m])) by (to, le))' % $._config, legendFormat='forward {{to}}'));
+
+ local typeRate =
+ graphPanel.new(
+ 'Requests (by qtype)',
+ datasource='$datasource',
+ span=4,
+ format='ops',
+ min=0,
+ )
+ .addTarget(prometheus.target('sum(rate(coredns_dns_requests_total{%(corednsSelector)s,%(instanceLabel)s=~"$instance"}[5m])) by (type)' % $._config, legendFormat='{{type}}'));
+
+ local zoneRate =
+ graphPanel.new(
+ 'Requests (by zone)',
+ datasource='$datasource',
+ span=4,
+ format='ops',
+ min=0,
+ )
+ .addTarget(prometheus.target('sum(rate(coredns_dns_requests_total{%(corednsSelector)s,%(instanceLabel)s=~"$instance"}[5m])) by (zone)' % $._config, legendFormat='{{zone}}'));
+
+ local forwardRate =
+ graphPanel.new(
+ 'Forward Requests (by to)',
+ datasource='$datasource',
+ span=4,
+ format='ops',
+ min=0,
+ )
+ .addTarget(prometheus.target('sum(rate(coredns_forward_requests_total{%(corednsSelector)s,%(instanceLabel)s=~"$instance"}[5m])) by (to)' % $._config, legendFormat='{{to}}'));
+
+ local kubernetesDuration = if $._config.kubernetesPlugin then
+ graphPanel.new(
+ 'Kubernetes DNS programming duration',
+ datasource='$datasource',
+ span=4,
+ format='seconds',
+ min=0,
+ )
+ .addTarget(prometheus.target('histogram_quantile(0.99, sum(rate(coredns_kubernetes_dns_programming_duration_seconds_bucket{%(corednsSelector)s,%(instanceLabel)s=~"$instance"}[5m])) by (service_kind, le))' % $._config, legendFormat='99th {{service_kind}}'))
+ .addTarget(prometheus.target('histogram_quantile(0.50, sum(rate(coredns_kubernetes_dns_programming_duration_seconds_bucket{%(corednsSelector)s,%(instanceLabel)s=~"$instance"}[5m])) by (service_kind, le))' % $._config, legendFormat='50th {{service_kind}}'))
+ else
+ singlestat.new(
+ 'Plugins Enabled',
+ datasource='$datasource',
+ span=2,
+ valueName='min',
+ )
+ .addTarget(prometheus.target('count(sum(coredns_plugin_enabled{%(corednsSelector)s,%(instanceLabel)s=~"$instance"}) by (%(pluginNameLabel)s))' % $._config));
+
+ local requestSize =
+ graphPanel.new(
+ 'Request size',
+ datasource='$datasource',
+ span=if $._config.kubernetesPlugin then 4 else 5,
+ format='bytes',
+ min=0,
+ )
+ .addTarget(prometheus.target('histogram_quantile(0.99, sum(rate(coredns_dns_request_size_bytes_bucket{%(corednsSelector)s,%(instanceLabel)s=~"$instance"}[5m])) by (server, zone, proto, le))' % $._config, legendFormat='99th {{server}} {{zone}} {{proto}}'))
+ .addTarget(prometheus.target('histogram_quantile(0.50, sum(rate(coredns_dns_request_size_bytes_bucket{%(corednsSelector)s,%(instanceLabel)s=~"$instance"}[5m])) by (server, zone, proto, le))' % $._config, legendFormat='50th {{server}} {{zone}} {{proto}}'));
+
+ local responseSize =
+ graphPanel.new(
+ 'Response size',
+ datasource='$datasource',
+ span=if $._config.kubernetesPlugin then 4 else 5,
+ format='bytes',
+ min=0,
+ )
+ .addTarget(prometheus.target('histogram_quantile(0.99, sum(rate(coredns_dns_response_size_bytes_bucket{%(corednsSelector)s,%(instanceLabel)s=~"$instance"}[5m])) by (server, zone, proto, le))' % $._config, legendFormat='99th {{server}} {{zone}} {{proto}}'))
+ .addTarget(prometheus.target('histogram_quantile(0.50, sum(rate(coredns_dns_response_size_bytes_bucket{%(corednsSelector)s,%(instanceLabel)s=~"$instance"}[5m])) by (server, zone, proto, le))' % $._config, legendFormat='50th {{server}} {{zone}} {{proto}}'));
+
+ local cachePercentage =
+ singlestat.new(
+ 'Cached',
+ datasource='$datasource',
+ span=2,
+ valueName='min',
+ format='percentunit',
+ )
+ .addTarget(prometheus.target('sum(coredns_cache_hits_total{%(corednsSelector)s,%(instanceLabel)s=~"$instance"}) / (sum(coredns_cache_misses_total{%(corednsSelector)s,%(instanceLabel)s=~"$instance"}) + sum(coredns_cache_hits_total{%(corednsSelector)s,%(instanceLabel)s=~"$instance"}))' % $._config));
+
+ local cacheRate =
+ graphPanel.new(
+ 'Cache hit Rate',
+ datasource='$datasource',
+ span=5,
+ format='ops',
+ min=0,
+ )
+ .addTarget(prometheus.target('sum(rate(coredns_cache_hits_total{%(corednsSelector)s,%(instanceLabel)s=~"$instance"}[5m])) by (type)' % $._config, legendFormat='{{type}}'))
+ .addTarget(prometheus.target('sum(rate(coredns_cache_misses_total{%(corednsSelector)s,%(instanceLabel)s=~"$instance"}[5m]))' % $._config, legendFormat='misses'));
+
+ local cacheSize =
+ graphPanel.new(
+ 'Cache Size',
+ datasource='$datasource',
+ span=5,
+ format='short',
+ min=0,
+ )
+ .addTarget(prometheus.target('sum(coredns_cache_entries{%(corednsSelector)s,%(instanceLabel)s=~"$instance"}) by (type)' % $._config, legendFormat='{{type}}'));
+
+ local memory =
+ graphPanel.new(
+ 'Memory',
+ datasource='$datasource',
+ span=4,
+ format='bytes',
+ min=0,
+ )
+ .addTarget(prometheus.target('process_resident_memory_bytes{%(corednsSelector)s,%(instanceLabel)s=~"$instance"}' % $._config, legendFormat='{{%(instanceLabel)s}}' % $._config));
+
+ local cpu =
+ graphPanel.new(
+ 'CPU usage',
+ datasource='$datasource',
+ span=4,
+ format='short',
+ min=0,
+ )
+ .addTarget(prometheus.target('rate(process_cpu_seconds_total{%(corednsSelector)s,%(instanceLabel)s=~"$instance"}[5m])' % $._config, legendFormat='{{%(instanceLabel)s}}' % $._config));
+
+ local goroutines =
+ graphPanel.new(
+ 'Goroutines',
+ datasource='$datasource',
+ span=4,
+ format='short',
+ min=0,
+ )
+ .addTarget(prometheus.target('go_goroutines{%(corednsSelector)s,%(instanceLabel)s=~"$instance"}' % $._config, legendFormat='{{%(instanceLabel)s}}' % $._config));
+
+
+ dashboard.new(
+ '%(dashboardNamePrefix)sCoreDNS' % $._config.grafana,
+ time_from='now-1h',
+ uid=($._config.grafanaDashboardIDs['coredns.json']),
+ tags=($._config.grafana.dashboardTags),
+ ).addTemplate(
+ {
+ current: {
+ text: 'default',
+ value: 'default',
+ },
+ hide: 0,
+ label: 'Data Source',
+ name: 'datasource',
+ options: [],
+ query: 'prometheus',
+ refresh: 1,
+ regex: '',
+ type: 'datasource',
+ },
+ ).addTemplate(
+ template.new(
+ 'instance',
+ '$datasource',
+ 'label_values(coredns_build_info{%(corednsSelector)s}, %(instanceLabel)s)' % $._config,
+ refresh='time',
+ includeAll=true,
+ sort=1,
+ )
+ ).addRow(
+ row.new()
+ .addPanel(upCount)
+ .addPanel(panicsCount)
+ .addPanel(rpcRate)
+ .addPanel(requestDuration)
+ ).addRow(
+ row.new()
+ .addPanel(typeRate)
+ .addPanel(zoneRate)
+ .addPanel(forwardRate)
+ ).addRow(
+ row.new()
+ .addPanel(cachePercentage)
+ .addPanel(cacheRate)
+ .addPanel(cacheSize)
+ ).addRow(
+ row.new()
+ .addPanel(kubernetesDuration)
+ .addPanel(requestSize)
+ .addPanel(responseSize)
+ ).addRow(
+ row.new()
+ .addPanel(memory)
+ .addPanel(cpu)
+ .addPanel(goroutines)
+ ) + { refresh: $._config.grafana.refresh },
+ },
+}
diff --git a/atmosphere/jsonnet/vendor/github.com/povilasv/coredns-mixin/dashboards/dashboards.libsonnet b/atmosphere/jsonnet/vendor/github.com/povilasv/coredns-mixin/dashboards/dashboards.libsonnet
new file mode 100644
index 0000000..d5636af
--- /dev/null
+++ b/atmosphere/jsonnet/vendor/github.com/povilasv/coredns-mixin/dashboards/dashboards.libsonnet
@@ -0,0 +1 @@
+(import 'coredns.libsonnet')
diff --git a/atmosphere/jsonnet/vendor/github.com/povilasv/coredns-mixin/jsonnetfile.json b/atmosphere/jsonnet/vendor/github.com/povilasv/coredns-mixin/jsonnetfile.json
new file mode 100644
index 0000000..4a56eb7
--- /dev/null
+++ b/atmosphere/jsonnet/vendor/github.com/povilasv/coredns-mixin/jsonnetfile.json
@@ -0,0 +1,15 @@
+{
+ "version": 1,
+ "dependencies": [
+ {
+ "source": {
+ "git": {
+ "remote": "https://github.com/grafana/grafonnet-lib",
+ "subdir": "grafonnet"
+ }
+ },
+ "version": "master"
+ }
+ ],
+ "legacyImports": true
+}
diff --git a/atmosphere/jsonnet/vendor/github.com/povilasv/coredns-mixin/lib/alerts.jsonnet b/atmosphere/jsonnet/vendor/github.com/povilasv/coredns-mixin/lib/alerts.jsonnet
new file mode 100644
index 0000000..d396a38
--- /dev/null
+++ b/atmosphere/jsonnet/vendor/github.com/povilasv/coredns-mixin/lib/alerts.jsonnet
@@ -0,0 +1 @@
+std.manifestYamlDoc((import '../mixin.libsonnet').prometheusAlerts)
diff --git a/atmosphere/jsonnet/vendor/github.com/povilasv/coredns-mixin/lib/dashboards.jsonnet b/atmosphere/jsonnet/vendor/github.com/povilasv/coredns-mixin/lib/dashboards.jsonnet
new file mode 100644
index 0000000..dadaebe
--- /dev/null
+++ b/atmosphere/jsonnet/vendor/github.com/povilasv/coredns-mixin/lib/dashboards.jsonnet
@@ -0,0 +1,6 @@
+local dashboards = (import '../mixin.libsonnet').grafanaDashboards;
+
+{
+ [name]: dashboards[name]
+ for name in std.objectFields(dashboards)
+}
diff --git a/atmosphere/jsonnet/vendor/github.com/povilasv/coredns-mixin/lib/utils.libsonnet b/atmosphere/jsonnet/vendor/github.com/povilasv/coredns-mixin/lib/utils.libsonnet
new file mode 100644
index 0000000..12c62cf
--- /dev/null
+++ b/atmosphere/jsonnet/vendor/github.com/povilasv/coredns-mixin/lib/utils.libsonnet
@@ -0,0 +1,13 @@
+{
+ mapRuleGroups(f): {
+ groups: [
+ group {
+ rules: [
+ f(rule, group)
+ for rule in super.rules
+ ],
+ }
+ for group in super.groups
+ ],
+ },
+}
diff --git a/atmosphere/jsonnet/vendor/github.com/povilasv/coredns-mixin/mixin.libsonnet b/atmosphere/jsonnet/vendor/github.com/povilasv/coredns-mixin/mixin.libsonnet
new file mode 100644
index 0000000..119d2cd
--- /dev/null
+++ b/atmosphere/jsonnet/vendor/github.com/povilasv/coredns-mixin/mixin.libsonnet
@@ -0,0 +1,3 @@
+(import 'alerts/alerts.libsonnet') +
+(import 'dashboards/dashboards.libsonnet') +
+(import 'config.libsonnet')
diff --git a/atmosphere/jsonnet/vendor/github.com/povilasv/coredns-mixin/tests.yaml b/atmosphere/jsonnet/vendor/github.com/povilasv/coredns-mixin/tests.yaml
new file mode 100644
index 0000000..2b1fa7e
--- /dev/null
+++ b/atmosphere/jsonnet/vendor/github.com/povilasv/coredns-mixin/tests.yaml
@@ -0,0 +1,75 @@
+rule_files:
+ - prometheus_alerts.yaml
+
+evaluation_interval: 1m
+
+tests:
+- interval: 1m
+ input_series:
+ - series: 'coredns_dns_responses_total{instance="1.2.3.4:9153",job="kube-dns",kubernetes_name="coredns-kube-metrics",kubernetes_namespace="kube-system",pod="coredns-65b6759cb4-qgdxp",rcode="NOERROR",server="dns://:53",zone="."}'
+ values: '0 100 200 300 400 500 600 700 800 900 1000 1100 1200'
+ - series: 'coredns_dns_responses_total{instance="1.2.3.4:9153",job="kube-dns",kubernetes_name="coredns-kube-metrics",kubernetes_namespace="kube-system",pod="coredns-65b6759cb4-qgdxp",rcode="SERVFAIL",server="dns://:53",zone="."}'
+ values: '0 100 200 300 400 500 600 700 800 900 1000 1100 1200'
+ alert_rule_test:
+ - eval_time: 11m
+ alertname: CoreDNSErrorsHigh
+ exp_alerts:
+ - exp_labels:
+ severity: warning
+ exp_annotations:
+ message: "CoreDNS is returning SERVFAIL for 50% of requests."
+ runbook_url: "https://github.com/povilasv/coredns-mixin/tree/master/runbook.md#alert-name-corednserrorshigh"
+ - exp_labels:
+ severity: critical
+ exp_annotations:
+ message: "CoreDNS is returning SERVFAIL for 50% of requests."
+ runbook_url: "https://github.com/povilasv/coredns-mixin/tree/master/runbook.md#alert-name-corednserrorshigh"
+
+- interval: 1m
+ input_series:
+ - series: 'coredns_dns_request_duration_seconds_bucket{instance="1.2.3.4:9153",job="kube-dns",kubernetes_name="coredns-kube-metrics",kubernetes_namespace="kube-system",pod="coredns-65b6759cb4-qgdxp",le="0.001",server="dns://:53",zone="."}'
+ values: '0 1 2 3 4 5 6 7 8 9 10 11 12'
+ - series: 'coredns_dns_request_duration_seconds_bucket{instance="1.2.3.4:9153",job="kube-dns",kubernetes_name="coredns-kube-metrics",kubernetes_namespace="kube-system",pod="coredns-65b6759cb4-qgdxp",le="8.192",server="dns://:53",zone="."}'
+ values: '0 100 200 300 400 500 600 700 800 900 1000 1100 1200'
+ - series: 'coredns_dns_request_duration_seconds_bucket{instance="1.2.3.4:9153",job="kube-dns",kubernetes_name="coredns-kube-metrics",kubernetes_namespace="kube-system",pod="coredns-65b6759cb4-qgdxp",le="+Inf",server="dns://:53",zone="."}'
+ values: '0 100 200 300 400 500 600 700 800 900 1000 1100 1200'
+ alert_rule_test:
+ - eval_time: 11m
+ alertname: CoreDNSLatencyHigh
+ exp_alerts:
+ - exp_labels:
+ severity: critical
+ server: "dns://:53"
+ zone: "."
+ exp_annotations:
+ message: "CoreDNS has 99th percentile latency of 8.109262626262627 seconds for server dns://:53 zone . ."
+ runbook_url: "https://github.com/povilasv/coredns-mixin/tree/master/runbook.md#alert-name-corednslatencyhigh"
+
+- interval: 1m
+ input_series:
+ - series: 'coredns_forward_healthcheck_failures_total{instance="1.2.3.4:9153",job="kube-dns",kubernetes_name="coredns-kube-metrics",kubernetes_namespace="kube-system",pod="coredns-65b6759cb4-qgdxp",server="dns://:53",zone=".",to="5.6.7.8"}'
+ values: '0 1 2 3 4 5 6 7 8 9 10 11 12'
+ alert_rule_test:
+ - eval_time: 11m
+ alertname: CoreDNSForwardHealthcheckFailureCount
+ exp_alerts:
+ - exp_labels:
+ severity: warning
+ to: "5.6.7.8"
+ exp_annotations:
+ message: "CoreDNS health checks have failed to upstream server 5.6.7.8."
+ runbook_url: "https://github.com/povilasv/coredns-mixin/tree/master/runbook.md#alert-name-corednsforwardhealthcheckfailurecount"
+
+- interval: 1m
+ input_series:
+ - series: 'coredns_forward_healthcheck_broken_total{instance="1.2.3.4:9153",job="kube-dns",kubernetes_name="coredns-kube-metrics",kubernetes_namespace="kube-system",pod="coredns-65b6759cb4-qgdxp",server="dns://:53",zone=".",to="5.6.7.8"}'
+ values: '0 1 2 3 4 5 6 7 8 9 10 11 12'
+ alert_rule_test:
+ - eval_time: 11m
+ alertname: CoreDNSForwardHealthcheckBrokenCount
+ exp_alerts:
+ - exp_labels:
+ severity: warning
+ exp_annotations:
+ message: "CoreDNS health checks have failed for all upstream servers."
+ runbook_url: "https://github.com/povilasv/coredns-mixin/tree/master/runbook.md#alert-name-corednsforwardhealthcheckbrokencount"
diff --git a/atmosphere/jsonnet/vendor/github.com/prometheus/mysqld_exporter/mysqld-mixin/.gitignore b/atmosphere/jsonnet/vendor/github.com/prometheus/mysqld_exporter/mysqld-mixin/.gitignore
new file mode 100644
index 0000000..97bf5f5
--- /dev/null
+++ b/atmosphere/jsonnet/vendor/github.com/prometheus/mysqld_exporter/mysqld-mixin/.gitignore
@@ -0,0 +1,3 @@
+/alerts.yaml
+/rules.yaml
+dashboards_out
diff --git a/atmosphere/jsonnet/vendor/github.com/prometheus/mysqld_exporter/mysqld-mixin/Makefile b/atmosphere/jsonnet/vendor/github.com/prometheus/mysqld_exporter/mysqld-mixin/Makefile
new file mode 100644
index 0000000..f2643c2
--- /dev/null
+++ b/atmosphere/jsonnet/vendor/github.com/prometheus/mysqld_exporter/mysqld-mixin/Makefile
@@ -0,0 +1,23 @@
+JSONNET_FMT := jsonnetfmt -n 2 --max-blank-lines 2 --string-style s --comment-style s
+
+default: build
+
+all: fmt lint build clean
+
+fmt:
+ find . -name 'vendor' -prune -o -name '*.libsonnet' -print -o -name '*.jsonnet' -print | \
+ xargs -n 1 -- $(JSONNET_FMT) -i
+
+lint:
+ find . -name 'vendor' -prune -o -name '*.libsonnet' -print -o -name '*.jsonnet' -print | \
+ while read f; do \
+ $(JSONNET_FMT) "$$f" | diff -u "$$f" -; \
+ done
+
+ mixtool lint mixin.libsonnet
+
+build:
+ mixtool generate all mixin.libsonnet
+
+clean:
+ rm -rf dashboards_out alerts.yaml rules.yaml
diff --git a/atmosphere/jsonnet/vendor/github.com/prometheus/mysqld_exporter/mysqld-mixin/README.md b/atmosphere/jsonnet/vendor/github.com/prometheus/mysqld_exporter/mysqld-mixin/README.md
new file mode 100644
index 0000000..c23605a
--- /dev/null
+++ b/atmosphere/jsonnet/vendor/github.com/prometheus/mysqld_exporter/mysqld-mixin/README.md
@@ -0,0 +1,23 @@
+# MySQLd Mixin
+
+The MySQLd Mixin is a set of configurable, reusable, and extensible alerts and
+dashboards based on the metrics exported by the MySQLd Exporter. The mixin creates
+recording and alerting rules for Prometheus and suitable dashboard descriptions
+for Grafana.
+
+To use them, you need to have `mixtool` and `jsonnetfmt` installed. If you
+have a working Go development environment, it's easiest to run the following:
+```bash
+$ go get github.com/monitoring-mixins/mixtool/cmd/mixtool
+$ go get github.com/google/go-jsonnet/cmd/jsonnetfmt
+```
+
+You can then build the Prometheus rules files `alerts.yaml` and
+`rules.yaml` and a directory `dashboard_out` with the JSON dashboard files
+for Grafana:
+```bash
+$ make build
+```
+
+For more advanced uses of mixins, see
+https://github.com/monitoring-mixins/docs.
diff --git a/atmosphere/jsonnet/vendor/github.com/prometheus/mysqld_exporter/mysqld-mixin/alerts/galera.yaml b/atmosphere/jsonnet/vendor/github.com/prometheus/mysqld_exporter/mysqld-mixin/alerts/galera.yaml
new file mode 100644
index 0000000..31016b5
--- /dev/null
+++ b/atmosphere/jsonnet/vendor/github.com/prometheus/mysqld_exporter/mysqld-mixin/alerts/galera.yaml
@@ -0,0 +1,74 @@
+###
+# Sample prometheus rules/alerts for mysqld.
+#
+# NOTE: Please review these carefully as thresholds and behavior may not meet
+# your SLOs or labels.
+#
+###
+
+groups:
+- name: GaleraAlerts
+ rules:
+ - alert: MySQLGaleraNotReady
+ expr: mysql_global_status_wsrep_ready != 1
+ for: 5m
+ labels:
+ severity: warning
+ annotations:
+ description: '{{$labels.job}} on {{$labels.instance}} is not ready.'
+ summary: Galera cluster node not ready.
+ - alert: MySQLGaleraOutOfSync
+ expr: (mysql_global_status_wsrep_local_state != 4 and mysql_global_variables_wsrep_desync
+ == 0)
+ for: 5m
+ labels:
+ severity: warning
+ annotations:
+ description: '{{$labels.job}} on {{$labels.instance}} is not in sync ({{$value}}
+ != 4).'
+ summary: Galera cluster node out of sync.
+ - alert: MySQLGaleraDonorFallingBehind
+ expr: (mysql_global_status_wsrep_local_state == 2 and mysql_global_status_wsrep_local_recv_queue
+ > 100)
+ for: 5m
+ labels:
+ severity: warning
+ annotations:
+ description: '{{$labels.job}} on {{$labels.instance}} is a donor (hotbackup)
+ and is falling behind (queue size {{$value}}).'
+ summary: XtraDB cluster donor node falling behind.
+ - alert: MySQLReplicationNotRunning
+ expr: mysql_slave_status_slave_io_running == 0 or mysql_slave_status_slave_sql_running
+ == 0
+ for: 2m
+ labels:
+ severity: critical
+ annotations:
+ description: "Replication on {{$labels.instance}} (IO or SQL) has been down for more than 2 minutes."
+ summary: Replication is not running.
+ - alert: MySQLReplicationLag
+ expr: (instance:mysql_slave_lag_seconds > 30) and on(instance) (predict_linear(instance:mysql_slave_lag_seconds[5m],
+ 60 * 2) > 0)
+ for: 1m
+ labels:
+ severity: critical
+ annotations:
+ description: "Replication on {{$labels.instance}} has fallen behind and is not recovering."
+ summary: MySQL slave replication is lagging.
+ - alert: MySQLHeartbeatLag
+ expr: (instance:mysql_heartbeat_lag_seconds > 30) and on(instance) (predict_linear(instance:mysql_heartbeat_lag_seconds[5m],
+ 60 * 2) > 0)
+ for: 1m
+ labels:
+ severity: critical
+ annotations:
+ description: "The heartbeat is lagging on {{$labels.instance}} and is not recovering."
+ summary: MySQL heartbeat is lagging.
+ - alert: MySQLInnoDBLogWaits
+ expr: rate(mysql_global_status_innodb_log_waits[15m]) > 10
+ labels:
+ severity: warning
+ annotations:
+ description: The innodb logs are waiting for disk at a rate of {{$value}} /
+ second
+ summary: MySQL innodb log writes stalling.
diff --git a/atmosphere/jsonnet/vendor/github.com/prometheus/mysqld_exporter/mysqld-mixin/alerts/general.yaml b/atmosphere/jsonnet/vendor/github.com/prometheus/mysqld_exporter/mysqld-mixin/alerts/general.yaml
new file mode 100644
index 0000000..b4d8bf9
--- /dev/null
+++ b/atmosphere/jsonnet/vendor/github.com/prometheus/mysqld_exporter/mysqld-mixin/alerts/general.yaml
@@ -0,0 +1,11 @@
+groups:
+- name: MySQLdAlerts
+ rules:
+ - alert: MySQLDown
+ expr: mysql_up != 1
+ for: 5m
+ labels:
+ severity: critical
+ annotations:
+ description: 'MySQL {{$labels.job}} on {{$labels.instance}} is not up.'
+ summary: MySQL not up.
diff --git a/atmosphere/jsonnet/vendor/github.com/prometheus/mysqld_exporter/mysqld-mixin/dashboards/mysql-overview.json b/atmosphere/jsonnet/vendor/github.com/prometheus/mysqld_exporter/mysqld-mixin/dashboards/mysql-overview.json
new file mode 100644
index 0000000..089d653
--- /dev/null
+++ b/atmosphere/jsonnet/vendor/github.com/prometheus/mysqld_exporter/mysqld-mixin/dashboards/mysql-overview.json
@@ -0,0 +1,3801 @@
+{
+ "annotations": {
+ "list": [
+ {
+ "builtIn": 1,
+ "datasource": "-- Grafana --",
+ "enable": true,
+ "hide": true,
+ "iconColor": "rgba(0, 211, 255, 1)",
+ "name": "Annotations & Alerts",
+ "type": "dashboard"
+ }
+ ]
+ },
+ "description": "",
+ "editable": true,
+ "gnetId": 11323,
+ "graphTooltip": 1,
+ "id": 31,
+ "iteration": 1603186191702,
+ "links": [],
+ "panels": [
+ {
+ "collapsed": false,
+ "datasource": "$datasource",
+ "gridPos": {
+ "h": 1,
+ "w": 24,
+ "x": 0,
+ "y": 0
+ },
+ "id": 382,
+ "panels": [],
+ "repeat": null,
+ "title": "",
+ "type": "row"
+ },
+ {
+ "cacheTimeout": null,
+ "datasource": "$datasource",
+ "description": "**Uptime**\n\nThe amount of time since the last restart of the MySQL server process.",
+ "fieldConfig": {
+ "defaults": {
+ "custom": {},
+ "decimals": 1,
+ "mappings": [],
+ "nullValueMode": "connected",
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "rgba(245, 54, 54, 0.9)",
+ "value": null
+ },
+ {
+ "color": "rgba(237, 129, 40, 0.89)",
+ "value": 300
+ },
+ {
+ "color": "rgba(50, 172, 45, 0.97)",
+ "value": 3600
+ }
+ ]
+ },
+ "unit": "s"
+ },
+ "overrides": []
+ },
+ "gridPos": {
+ "h": 3,
+ "w": 8,
+ "x": 0,
+ "y": 1
+ },
+ "id": 12,
+ "interval": "1m",
+ "links": [],
+ "maxDataPoints": 100,
+ "options": {
+ "colorMode": "value",
+ "fieldOptions": {
+ "calcs": [
+ "lastNotNull"
+ ]
+ },
+ "graphMode": "none",
+ "justifyMode": "auto",
+ "orientation": "horizontal",
+ "reduceOptions": {
+ "calcs": [
+ "mean"
+ ],
+ "fields": "",
+ "values": false
+ }
+ },
+ "pluginVersion": "7.0.4",
+ "targets": [
+ {
+ "calculatedInterval": "10m",
+ "datasourceErrors": {},
+ "errors": {},
+ "expr": "mysql_global_status_uptime{job=~\"$job\", instance=~\"$instance\"}",
+ "format": "time_series",
+ "interval": "1m",
+ "intervalFactor": 1,
+ "legendFormat": "{{instance}}",
+ "metric": "",
+ "refId": "A",
+ "step": 300
+ }
+ ],
+ "title": "Uptime",
+ "type": "stat"
+ },
+ {
+ "cacheTimeout": null,
+ "datasource": "$datasource",
+ "description": "**Current QPS**\n\nBased on the queries reported by MySQL's ``SHOW STATUS`` command, it is the number of statements executed by the server within the last second. This variable includes statements executed within stored programs, unlike the Questions variable. It does not count \n``COM_PING`` or ``COM_STATISTICS`` commands.",
+ "fieldConfig": {
+ "defaults": {
+ "custom": {},
+ "decimals": 2,
+ "mappings": [],
+ "nullValueMode": "connected",
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "rgba(245, 54, 54, 0.9)",
+ "value": null
+ },
+ {
+ "color": "rgba(237, 129, 40, 0.89)",
+ "value": 35
+ },
+ {
+ "color": "rgba(50, 172, 45, 0.97)",
+ "value": 75
+ }
+ ]
+ },
+ "unit": "short"
+ },
+ "overrides": []
+ },
+ "gridPos": {
+ "h": 3,
+ "w": 8,
+ "x": 8,
+ "y": 1
+ },
+ "id": 13,
+ "interval": "1m",
+ "links": [
+ {
+ "targetBlank": true,
+ "title": "MySQL Server Status Variables",
+ "url": "https://dev.mysql.com/doc/refman/5.7/en/server-status-variables.html#statvar_Queries"
+ }
+ ],
+ "maxDataPoints": 100,
+ "options": {
+ "colorMode": "value",
+ "fieldOptions": {
+ "calcs": [
+ "lastNotNull"
+ ]
+ },
+ "graphMode": "area",
+ "justifyMode": "auto",
+ "orientation": "horizontal",
+ "reduceOptions": {
+ "calcs": [
+ "mean"
+ ],
+ "fields": "",
+ "values": false
+ }
+ },
+ "pluginVersion": "7.0.4",
+ "targets": [
+ {
+ "calculatedInterval": "10m",
+ "datasourceErrors": {},
+ "errors": {},
+ "expr": "rate(mysql_global_status_queries{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval])",
+ "format": "time_series",
+ "interval": "1m",
+ "intervalFactor": 1,
+ "legendFormat": "{{instance}}",
+ "metric": "",
+ "refId": "A",
+ "step": 20
+ }
+ ],
+ "title": "Current QPS",
+ "type": "stat"
+ },
+ {
+ "cacheTimeout": null,
+ "datasource": "$datasource",
+ "description": "**InnoDB Buffer Pool Size**\n\nInnoDB maintains a storage area called the buffer pool for caching data and indexes in memory. Knowing how the InnoDB buffer pool works, and taking advantage of it to keep frequently accessed data in memory, is one of the most important aspects of MySQL tuning. The goal is to keep the working set in memory. In most cases, this should be between 60%-90% of available memory on a dedicated database host, but depends on many factors.",
+ "fieldConfig": {
+ "defaults": {
+ "custom": {},
+ "decimals": 0,
+ "mappings": [],
+ "nullValueMode": "connected",
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "rgba(50, 172, 45, 0.97)",
+ "value": null
+ },
+ {
+ "color": "rgba(237, 129, 40, 0.89)",
+ "value": 90
+ },
+ {
+ "color": "rgba(245, 54, 54, 0.9)",
+ "value": 95
+ }
+ ]
+ },
+ "unit": "bytes"
+ },
+ "overrides": []
+ },
+ "gridPos": {
+ "h": 3,
+ "w": 8,
+ "x": 16,
+ "y": 1
+ },
+ "id": 51,
+ "interval": "1m",
+ "links": [
+ {
+ "targetBlank": true,
+ "title": "Tuning the InnoDB Buffer Pool Size",
+ "url": "https://www.percona.com/blog/2015/06/02/80-ram-tune-innodb_buffer_pool_size/"
+ }
+ ],
+ "maxDataPoints": 100,
+ "options": {
+ "colorMode": "value",
+ "fieldOptions": {
+ "calcs": [
+ "lastNotNull"
+ ]
+ },
+ "graphMode": "none",
+ "justifyMode": "auto",
+ "orientation": "horizontal",
+ "reduceOptions": {
+ "calcs": [
+ "mean"
+ ],
+ "fields": "",
+ "values": false
+ }
+ },
+ "pluginVersion": "7.0.4",
+ "targets": [
+ {
+ "calculatedInterval": "10m",
+ "datasourceErrors": {},
+ "errors": {},
+ "expr": "mysql_global_variables_innodb_buffer_pool_size{job=~\"$job\", instance=~\"$instance\"}",
+ "format": "time_series",
+ "interval": "1m",
+ "intervalFactor": 1,
+ "legendFormat": "{{instance}}",
+ "metric": "",
+ "refId": "A",
+ "step": 300
+ }
+ ],
+ "title": "InnoDB Buffer Pool",
+ "type": "stat"
+ },
+ {
+ "collapsed": false,
+ "datasource": "$datasource",
+ "gridPos": {
+ "h": 1,
+ "w": 24,
+ "x": 0,
+ "y": 4
+ },
+ "id": 383,
+ "panels": [],
+ "repeat": null,
+ "title": "Connections",
+ "type": "row"
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "$datasource",
+ "decimals": 0,
+ "description": "**Max Connections** \n\nMax Connections is the maximum permitted number of simultaneous client connections. By default, this is 151. Increasing this value increases the number of file descriptors that mysqld requires. If the required number of descriptors are not available, the server reduces the value of Max Connections.\n\nmysqld actually permits Max Connections + 1 clients to connect. The extra connection is reserved for use by accounts that have the SUPER privilege, such as root.\n\nMax Used Connections is the maximum number of connections that have been in use simultaneously since the server started.\n\nConnections is the number of connection attempts (successful or not) to the MySQL server.",
+ "editable": true,
+ "error": false,
+ "fieldConfig": {
+ "defaults": {
+ "unit": "short",
+ "custom": {}
+ },
+ "overrides": []
+ },
+ "fill": 2,
+ "fillGradient": 0,
+ "grid": {},
+ "gridPos": {
+ "h": 7,
+ "w": 12,
+ "x": 0,
+ "y": 5
+ },
+ "height": "250px",
+ "hiddenSeries": false,
+ "id": 92,
+ "legend": {
+ "alignAsTable": true,
+ "avg": true,
+ "current": false,
+ "max": true,
+ "min": true,
+ "show": true,
+ "sort": "avg",
+ "sortDesc": true,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 2,
+ "links": [
+ {
+ "targetBlank": true,
+ "title": "MySQL Server System Variables",
+ "url": "https://dev.mysql.com/doc/refman/5.7/en/server-system-variables.html#sysvar_max_connections"
+ }
+ ],
+ "nullPointMode": "null",
+ "options": {
+ "dataLinks": []
+ },
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [
+ {
+ "alias": "Max Connections",
+ "fill": 0
+ }
+ ],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "calculatedInterval": "2m",
+ "datasourceErrors": {},
+ "errors": {},
+ "expr": "sum(max_over_time(mysql_global_status_threads_connected{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval]))",
+ "format": "time_series",
+ "interval": "1m",
+ "intervalFactor": 1,
+ "legendFormat": "Connections",
+ "metric": "",
+ "refId": "A",
+ "step": 20
+ },
+ {
+ "calculatedInterval": "2m",
+ "datasourceErrors": {},
+ "errors": {},
+ "expr": "sum(mysql_global_status_max_used_connections{job=~\"$job\", instance=~\"$instance\"})",
+ "format": "time_series",
+ "interval": "1m",
+ "intervalFactor": 1,
+ "legendFormat": "Max Used Connections",
+ "metric": "",
+ "refId": "C",
+ "step": 20,
+ "target": ""
+ },
+ {
+ "calculatedInterval": "2m",
+ "datasourceErrors": {},
+ "errors": {},
+ "expr": "sum(mysql_global_variables_max_connections{job=~\"$job\", instance=~\"$instance\"})",
+ "format": "time_series",
+ "interval": "1m",
+ "intervalFactor": 1,
+ "legendFormat": "Max Connections",
+ "metric": "",
+ "refId": "B",
+ "step": 20,
+ "target": ""
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "MySQL Connections",
+ "tooltip": {
+ "msResolution": false,
+ "shared": true,
+ "sort": 0,
+ "value_type": "cumulative"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "short",
+ "label": "",
+ "logBase": 1,
+ "max": null,
+ "min": 0,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": "",
+ "logBase": 1,
+ "max": null,
+ "min": 0,
+ "show": true
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "$datasource",
+ "decimals": 2,
+ "description": "**MySQL Active Threads**\n\nThreads Connected is the number of open connections, while Threads Running is the number of threads not sleeping.",
+ "editable": true,
+ "error": false,
+ "fieldConfig": {
+ "defaults": {
+ "unit": "short",
+ "custom": {}
+ },
+ "overrides": []
+ },
+ "fill": 2,
+ "fillGradient": 0,
+ "grid": {},
+ "gridPos": {
+ "h": 7,
+ "w": 12,
+ "x": 12,
+ "y": 5
+ },
+ "hiddenSeries": false,
+ "id": 10,
+ "legend": {
+ "alignAsTable": true,
+ "avg": true,
+ "current": false,
+ "max": true,
+ "min": true,
+ "rightSide": false,
+ "show": true,
+ "sort": "avg",
+ "sortDesc": true,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 2,
+ "links": [],
+ "nullPointMode": "null",
+ "options": {
+ "dataLinks": []
+ },
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [
+ {
+ "alias": "Peak Threads Running",
+ "color": "#E24D42",
+ "lines": false,
+ "pointradius": 1,
+ "points": true
+ },
+ {
+ "alias": "Peak Threads Connected",
+ "color": "#1F78C1"
+ },
+ {
+ "alias": "Avg Threads Running",
+ "color": "#EAB839"
+ }
+ ],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "calculatedInterval": "2m",
+ "datasourceErrors": {},
+ "errors": {},
+ "expr": "sum(max_over_time(mysql_global_status_threads_connected{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval]))",
+ "format": "time_series",
+ "hide": false,
+ "interval": "1m",
+ "intervalFactor": 1,
+ "legendFormat": "Peak Threads Connected",
+ "metric": "",
+ "refId": "A",
+ "step": 20
+ },
+ {
+ "calculatedInterval": "2m",
+ "datasourceErrors": {},
+ "errors": {},
+ "expr": "sum(max_over_time(mysql_global_status_threads_running{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval]))",
+ "format": "time_series",
+ "interval": "1m",
+ "intervalFactor": 1,
+ "legendFormat": "Peak Threads Running",
+ "metric": "",
+ "refId": "B",
+ "step": 20
+ },
+ {
+ "expr": "sum(avg_over_time(mysql_global_status_threads_running{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval]))",
+ "format": "time_series",
+ "interval": "1m",
+ "intervalFactor": 1,
+ "legendFormat": "Avg Threads Running",
+ "refId": "C",
+ "step": 20
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "MySQL Client Thread Activity",
+ "tooltip": {
+ "msResolution": false,
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": [
+ "total"
+ ]
+ },
+ "yaxes": [
+ {
+ "format": "short",
+ "label": "Threads",
+ "logBase": 1,
+ "max": null,
+ "min": 0,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": "",
+ "logBase": 1,
+ "max": null,
+ "min": 0,
+ "show": false
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "collapsed": false,
+ "datasource": "$datasource",
+ "gridPos": {
+ "h": 1,
+ "w": 24,
+ "x": 0,
+ "y": 12
+ },
+ "id": 384,
+ "panels": [],
+ "repeat": null,
+ "title": "Table Locks",
+ "type": "row"
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "$datasource",
+ "decimals": 2,
+ "description": "**MySQL Questions**\n\nThe number of statements executed by the server. This includes only statements sent to the server by clients and not statements executed within stored programs, unlike the Queries used in the QPS calculation. \n\nThis variable does not count the following commands:\n* ``COM_PING``\n* ``COM_STATISTICS``\n* ``COM_STMT_PREPARE``\n* ``COM_STMT_CLOSE``\n* ``COM_STMT_RESET``",
+ "editable": true,
+ "error": false,
+ "fieldConfig": {
+ "defaults": {
+ "unit": "short",
+ "custom": {}
+ },
+ "overrides": []
+ },
+ "fill": 2,
+ "fillGradient": 0,
+ "grid": {},
+ "gridPos": {
+ "h": 7,
+ "w": 12,
+ "x": 0,
+ "y": 13
+ },
+ "hiddenSeries": false,
+ "id": 53,
+ "legend": {
+ "alignAsTable": true,
+ "avg": true,
+ "current": false,
+ "max": true,
+ "min": true,
+ "rightSide": false,
+ "show": true,
+ "sort": "avg",
+ "sortDesc": true,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 2,
+ "links": [
+ {
+ "targetBlank": true,
+ "title": "MySQL Queries and Questions",
+ "url": "https://www.percona.com/blog/2014/05/29/how-mysql-queries-and-questions-are-measured/"
+ }
+ ],
+ "nullPointMode": "null",
+ "options": {
+ "dataLinks": []
+ },
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "calculatedInterval": "2m",
+ "datasourceErrors": {},
+ "errors": {},
+ "expr": "rate(mysql_global_status_questions{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval])",
+ "format": "time_series",
+ "interval": "1m",
+ "intervalFactor": 1,
+ "legendFormat": "{{instance}}",
+ "metric": "",
+ "refId": "A",
+ "step": 20
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "MySQL Questions",
+ "tooltip": {
+ "msResolution": false,
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "short",
+ "logBase": 1,
+ "max": null,
+ "min": 0,
+ "show": true
+ },
+ {
+ "format": "short",
+ "logBase": 1,
+ "max": null,
+ "min": 0,
+ "show": true
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "$datasource",
+ "decimals": 2,
+ "description": "**MySQL Thread Cache**\n\nThe thread_cache_size variable sets how many threads the server should cache to reuse. When a client disconnects, the client's threads are put in the cache if the cache is not full. It is autosized in MySQL 5.6.8 and above (capped to 100). Requests for threads are satisfied by reusing threads taken from the cache if possible, and only when the cache is empty is a new thread created.\n\n* *Threads_created*: The number of threads created to handle connections.\n* *Threads_cached*: The number of threads in the thread cache.",
+ "editable": true,
+ "error": false,
+ "fieldConfig": {
+ "defaults": {
+ "unit": "short",
+ "custom": {}
+ },
+ "overrides": []
+ },
+ "fill": 2,
+ "fillGradient": 0,
+ "grid": {},
+ "gridPos": {
+ "h": 7,
+ "w": 12,
+ "x": 12,
+ "y": 13
+ },
+ "hiddenSeries": false,
+ "id": 11,
+ "legend": {
+ "alignAsTable": true,
+ "avg": true,
+ "current": false,
+ "max": true,
+ "min": true,
+ "rightSide": false,
+ "show": true,
+ "sort": "avg",
+ "sortDesc": true,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 2,
+ "links": [
+ {
+ "title": "Tuning information",
+ "url": "https://dev.mysql.com/doc/refman/5.6/en/server-system-variables.html#sysvar_thread_cache_size"
+ }
+ ],
+ "nullPointMode": "null",
+ "options": {
+ "dataLinks": []
+ },
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [
+ {
+ "alias": "Threads Created",
+ "fill": 0
+ }
+ ],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "calculatedInterval": "2m",
+ "datasourceErrors": {},
+ "errors": {},
+ "expr": "sum(mysql_global_variables_thread_cache_size{job=~\"$job\", instance=~\"$instance\"})",
+ "format": "time_series",
+ "interval": "1m",
+ "intervalFactor": 1,
+ "legendFormat": "Thread Cache Size",
+ "metric": "",
+ "refId": "B",
+ "step": 20
+ },
+ {
+ "calculatedInterval": "2m",
+ "datasourceErrors": {},
+ "errors": {},
+ "expr": "sum(mysql_global_status_threads_cached{job=~\"$job\", instance=~\"$instance\"})",
+ "format": "time_series",
+ "interval": "1m",
+ "intervalFactor": 1,
+ "legendFormat": "Threads Cached",
+ "metric": "",
+ "refId": "C",
+ "step": 20
+ },
+ {
+ "calculatedInterval": "2m",
+ "datasourceErrors": {},
+ "errors": {},
+ "expr": "sum(rate(mysql_global_status_threads_created{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval]))",
+ "format": "time_series",
+ "interval": "1m",
+ "intervalFactor": 1,
+ "legendFormat": "Threads Created",
+ "metric": "",
+ "refId": "A",
+ "step": 20
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "MySQL Thread Cache",
+ "tooltip": {
+ "msResolution": false,
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "short",
+ "logBase": 1,
+ "max": null,
+ "min": 0,
+ "show": true
+ },
+ {
+ "format": "short",
+ "logBase": 1,
+ "max": null,
+ "min": 0,
+ "show": true
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "collapsed": false,
+ "datasource": "$datasource",
+ "gridPos": {
+ "h": 1,
+ "w": 24,
+ "x": 0,
+ "y": 20
+ },
+ "id": 385,
+ "panels": [],
+ "repeat": null,
+ "title": "Temporary Objects",
+ "type": "row"
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "$datasource",
+ "decimals": 2,
+ "editable": true,
+ "error": false,
+ "fieldConfig": {
+ "defaults": {
+ "unit": "short",
+ "custom": {}
+ },
+ "overrides": []
+ },
+ "fill": 2,
+ "fillGradient": 0,
+ "grid": {},
+ "gridPos": {
+ "h": 7,
+ "w": 12,
+ "x": 0,
+ "y": 21
+ },
+ "hiddenSeries": false,
+ "id": 22,
+ "legend": {
+ "alignAsTable": true,
+ "avg": true,
+ "current": false,
+ "max": true,
+ "min": true,
+ "rightSide": false,
+ "show": true,
+ "sort": "avg",
+ "sortDesc": true,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 2,
+ "links": [],
+ "nullPointMode": "null",
+ "options": {
+ "dataLinks": []
+ },
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "calculatedInterval": "2m",
+ "datasourceErrors": {},
+ "errors": {},
+ "expr": "sum(rate(mysql_global_status_created_tmp_tables{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval]))",
+ "interval": "1m",
+ "intervalFactor": 1,
+ "legendFormat": "Created Tmp Tables",
+ "metric": "",
+ "refId": "A",
+ "step": 20
+ },
+ {
+ "calculatedInterval": "2m",
+ "datasourceErrors": {},
+ "errors": {},
+ "expr": "sum(rate(mysql_global_status_created_tmp_disk_tables{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval]))",
+ "interval": "1m",
+ "intervalFactor": 1,
+ "legendFormat": "Created Tmp Disk Tables",
+ "metric": "",
+ "refId": "B",
+ "step": 20
+ },
+ {
+ "calculatedInterval": "2m",
+ "datasourceErrors": {},
+ "errors": {},
+ "expr": "sum(rate(mysql_global_status_created_tmp_files{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval]))",
+ "interval": "1m",
+ "intervalFactor": 1,
+ "legendFormat": "Created Tmp Files",
+ "metric": "",
+ "refId": "C",
+ "step": 20
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "MySQL Temporary Objects",
+ "description": "MySQL Temporary Objects",
+ "tooltip": {
+ "msResolution": false,
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "short",
+ "logBase": 1,
+ "max": null,
+ "min": 0,
+ "show": true
+ },
+ {
+ "format": "short",
+ "logBase": 1,
+ "max": null,
+ "min": 0,
+ "show": true
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "$datasource",
+ "decimals": 2,
+ "description": "**MySQL Select Types**\n\nAs with most relational databases, selecting based on indexes is more efficient than scanning an entire table's data. Here we see the counters for selects not done with indexes.\n\n* ***Select Scan*** is how many queries caused full table scans, in which all the data in the table had to be read and either discarded or returned.\n* ***Select Range*** is how many queries used a range scan, which means MySQL scanned all rows in a given range.\n* ***Select Full Join*** is the number of joins that are not joined on an index, this is usually a huge performance hit.",
+ "editable": true,
+ "error": false,
+ "fieldConfig": {
+ "defaults": {
+ "unit": "short",
+ "custom": {}
+ },
+ "overrides": []
+ },
+ "fill": 2,
+ "fillGradient": 0,
+ "grid": {},
+ "gridPos": {
+ "h": 7,
+ "w": 12,
+ "x": 12,
+ "y": 21
+ },
+ "height": "250px",
+ "hiddenSeries": false,
+ "id": 311,
+ "legend": {
+ "alignAsTable": true,
+ "avg": true,
+ "current": false,
+ "hideZero": true,
+ "max": true,
+ "min": true,
+ "rightSide": false,
+ "show": true,
+ "sort": "avg",
+ "sortDesc": true,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 2,
+ "links": [],
+ "nullPointMode": "null",
+ "options": {
+ "dataLinks": []
+ },
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "calculatedInterval": "2m",
+ "datasourceErrors": {},
+ "errors": {},
+ "expr": "sum(rate(mysql_global_status_select_full_join{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval]))",
+ "format": "time_series",
+ "interval": "1m",
+ "intervalFactor": 1,
+ "legendFormat": "Select Full Join",
+ "metric": "",
+ "refId": "A",
+ "step": 20
+ },
+ {
+ "calculatedInterval": "2m",
+ "datasourceErrors": {},
+ "errors": {},
+ "expr": "sum(rate(mysql_global_status_select_full_range_join{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval]))",
+ "format": "time_series",
+ "interval": "1m",
+ "intervalFactor": 1,
+ "legendFormat": "Select Full Range Join",
+ "metric": "",
+ "refId": "B",
+ "step": 20
+ },
+ {
+ "calculatedInterval": "2m",
+ "datasourceErrors": {},
+ "errors": {},
+ "expr": "sum(rate(mysql_global_status_select_range{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval]))",
+ "format": "time_series",
+ "interval": "1m",
+ "intervalFactor": 1,
+ "legendFormat": "Select Range",
+ "metric": "",
+ "refId": "C",
+ "step": 20
+ },
+ {
+ "calculatedInterval": "2m",
+ "datasourceErrors": {},
+ "errors": {},
+ "expr": "sum(rate(mysql_global_status_select_range_check{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval]))",
+ "format": "time_series",
+ "interval": "1m",
+ "intervalFactor": 1,
+ "legendFormat": "Select Range Check",
+ "metric": "",
+ "refId": "D",
+ "step": 20
+ },
+ {
+ "calculatedInterval": "2m",
+ "datasourceErrors": {},
+ "errors": {},
+ "expr": "sum(rate(mysql_global_status_select_scan{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval]))",
+ "format": "time_series",
+ "interval": "1m",
+ "intervalFactor": 1,
+ "legendFormat": "Select Scan",
+ "metric": "",
+ "refId": "E",
+ "step": 20
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "MySQL Select Types",
+ "tooltip": {
+ "msResolution": false,
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "short",
+ "logBase": 1,
+ "max": null,
+ "min": 0,
+ "show": true
+ },
+ {
+ "format": "short",
+ "logBase": 1,
+ "max": null,
+ "min": 0,
+ "show": true
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "collapsed": false,
+ "datasource": "$datasource",
+ "gridPos": {
+ "h": 1,
+ "w": 24,
+ "x": 0,
+ "y": 28
+ },
+ "id": 386,
+ "panels": [],
+ "repeat": null,
+ "title": "Sorts",
+ "type": "row"
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "$datasource",
+ "decimals": 2,
+ "description": "**MySQL Sorts**\n\nDue to a query's structure, order, or other requirements, MySQL sorts the rows before returning them. For example, if a table is ordered 1 to 10 but you want the results reversed, MySQL then has to sort the rows to return 10 to 1.\n\nThis graph also shows when sorts had to scan a whole table or a given range of a table in order to return the results and which could not have been sorted via an index.",
+ "editable": true,
+ "error": false,
+ "fieldConfig": {
+ "defaults": {
+ "unit": "short",
+ "custom": {}
+ },
+ "overrides": []
+ },
+ "fill": 2,
+ "fillGradient": 0,
+ "grid": {},
+ "gridPos": {
+ "h": 7,
+ "w": 12,
+ "x": 0,
+ "y": 29
+ },
+ "hiddenSeries": false,
+ "id": 30,
+ "legend": {
+ "alignAsTable": true,
+ "avg": true,
+ "current": false,
+ "hideZero": true,
+ "max": true,
+ "min": true,
+ "rightSide": false,
+ "show": true,
+ "sort": "avg",
+ "sortDesc": true,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 2,
+ "links": [],
+ "nullPointMode": "null",
+ "options": {
+ "dataLinks": []
+ },
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "calculatedInterval": "2m",
+ "datasourceErrors": {},
+ "errors": {},
+ "expr": "sum(rate(mysql_global_status_sort_rows{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval]))",
+ "format": "time_series",
+ "interval": "1m",
+ "intervalFactor": 1,
+ "legendFormat": "Sort Rows",
+ "metric": "",
+ "refId": "A",
+ "step": 20
+ },
+ {
+ "calculatedInterval": "2m",
+ "datasourceErrors": {},
+ "errors": {},
+ "expr": "sum(rate(mysql_global_status_sort_range{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval]))",
+ "format": "time_series",
+ "interval": "1m",
+ "intervalFactor": 1,
+ "legendFormat": "Sort Range",
+ "metric": "",
+ "refId": "B",
+ "step": 20
+ },
+ {
+ "calculatedInterval": "2m",
+ "datasourceErrors": {},
+ "errors": {},
+ "expr": "sum(rate(mysql_global_status_sort_merge_passes{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval]))",
+ "format": "time_series",
+ "interval": "1m",
+ "intervalFactor": 1,
+ "legendFormat": "Sort Merge Passes",
+ "metric": "",
+ "refId": "C",
+ "step": 20
+ },
+ {
+ "calculatedInterval": "2m",
+ "datasourceErrors": {},
+ "errors": {},
+ "expr": "sum(rate(mysql_global_status_sort_scan{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval]))",
+ "format": "time_series",
+ "interval": "1m",
+ "intervalFactor": 1,
+ "legendFormat": "Sort Scan",
+ "metric": "",
+ "refId": "D",
+ "step": 20
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "MySQL Sorts",
+ "tooltip": {
+ "msResolution": false,
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "short",
+ "logBase": 1,
+ "max": null,
+ "min": 0,
+ "show": true
+ },
+ {
+ "format": "short",
+ "logBase": 1,
+ "max": null,
+ "min": 0,
+ "show": true
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "$datasource",
+ "decimals": 2,
+ "description": "**MySQL Slow Queries**\n\nSlow queries are defined as queries being slower than the long_query_time setting. For example, if you have long_query_time set to 3, all queries that take longer than 3 seconds to complete will show on this graph.",
+ "editable": true,
+ "error": false,
+ "fieldConfig": {
+ "defaults": {
+ "unit": "short",
+ "custom": {}
+ },
+ "overrides": []
+ },
+ "fill": 2,
+ "fillGradient": 0,
+ "grid": {},
+ "gridPos": {
+ "h": 7,
+ "w": 12,
+ "x": 12,
+ "y": 29
+ },
+ "hiddenSeries": false,
+ "id": 48,
+ "legend": {
+ "alignAsTable": true,
+ "avg": true,
+ "current": false,
+ "max": true,
+ "min": true,
+ "show": true,
+ "sort": "avg",
+ "sortDesc": true,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 2,
+ "links": [],
+ "nullPointMode": "null",
+ "options": {
+ "dataLinks": []
+ },
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "calculatedInterval": "2m",
+ "datasourceErrors": {},
+ "errors": {},
+ "expr": "sum(rate(mysql_global_status_slow_queries{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval]))",
+ "format": "time_series",
+ "interval": "1m",
+ "intervalFactor": 1,
+ "legendFormat": "Slow Queries",
+ "metric": "",
+ "refId": "A",
+ "step": 20
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "MySQL Slow Queries",
+ "tooltip": {
+ "msResolution": false,
+ "shared": true,
+ "sort": 0,
+ "value_type": "cumulative"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "short",
+ "label": "",
+ "logBase": 1,
+ "max": null,
+ "min": 0,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": "",
+ "logBase": 1,
+ "max": null,
+ "min": 0,
+ "show": true
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "collapsed": false,
+ "datasource": "$datasource",
+ "gridPos": {
+ "h": 1,
+ "w": 24,
+ "x": 0,
+ "y": 36
+ },
+ "id": 387,
+ "panels": [],
+ "repeat": null,
+ "title": "Aborted",
+ "type": "row"
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "$datasource",
+ "decimals": 2,
+ "description": "**Aborted Connections**\n\nWhen a given host connects to MySQL and the connection is interrupted in the middle (for example due to bad credentials), MySQL keeps that info in a system table (since 5.6 this table is exposed in performance_schema).\n\nIf the amount of failed requests without a successful connection reaches the value of max_connect_errors, mysqld assumes that something is wrong and blocks the host from further connection.\n\nTo allow connections from that host again, you need to issue the ``FLUSH HOSTS`` statement.",
+ "editable": true,
+ "error": false,
+ "fieldConfig": {
+ "defaults": {
+ "unit": "short",
+ "custom": {}
+ },
+ "overrides": []
+ },
+ "fill": 2,
+ "fillGradient": 0,
+ "grid": {},
+ "gridPos": {
+ "h": 7,
+ "w": 12,
+ "x": 0,
+ "y": 37
+ },
+ "hiddenSeries": false,
+ "id": 47,
+ "legend": {
+ "alignAsTable": true,
+ "avg": true,
+ "current": false,
+ "max": true,
+ "min": true,
+ "show": true,
+ "sort": "avg",
+ "sortDesc": true,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 2,
+ "links": [],
+ "nullPointMode": "null",
+ "options": {
+ "dataLinks": []
+ },
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "calculatedInterval": "2m",
+ "datasourceErrors": {},
+ "errors": {},
+ "expr": "sum(rate(mysql_global_status_aborted_connects{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval]))",
+ "format": "time_series",
+ "interval": "1m",
+ "intervalFactor": 1,
+ "legendFormat": "Aborted Connects (attempts)",
+ "metric": "",
+ "refId": "A",
+ "step": 20
+ },
+ {
+ "calculatedInterval": "2m",
+ "datasourceErrors": {},
+ "errors": {},
+ "expr": "sum(rate(mysql_global_status_aborted_clients{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval]))",
+ "format": "time_series",
+ "interval": "1m",
+ "intervalFactor": 1,
+ "legendFormat": "Aborted Clients (timeout)",
+ "metric": "",
+ "refId": "B",
+ "step": 20,
+ "target": ""
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "MySQL Aborted Connections",
+ "tooltip": {
+ "msResolution": false,
+ "shared": true,
+ "sort": 0,
+ "value_type": "cumulative"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "short",
+ "label": "",
+ "logBase": 1,
+ "max": null,
+ "min": 0,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": "",
+ "logBase": 1,
+ "max": null,
+ "min": 0,
+ "show": true
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "$datasource",
+ "decimals": 2,
+ "description": "**Table Locks**\n\nMySQL takes a number of different locks for varying reasons. In this graph we see how many Table level locks MySQL has requested from the storage engine. In the case of InnoDB, many times the locks could actually be row locks as it only takes table level locks in a few specific cases.\n\nIt is most useful to compare Locks Immediate and Locks Waited. If Locks waited is rising, it means you have lock contention. Otherwise, Locks Immediate rising and falling is normal activity.",
+ "editable": true,
+ "error": false,
+ "fieldConfig": {
+ "defaults": {
+ "unit": "short",
+ "custom": {}
+ },
+ "overrides": []
+ },
+ "fill": 2,
+ "fillGradient": 0,
+ "grid": {},
+ "gridPos": {
+ "h": 7,
+ "w": 12,
+ "x": 12,
+ "y": 37
+ },
+ "hiddenSeries": false,
+ "id": 32,
+ "legend": {
+ "alignAsTable": true,
+ "avg": true,
+ "current": false,
+ "max": true,
+ "min": true,
+ "rightSide": false,
+ "show": true,
+ "sort": "avg",
+ "sortDesc": true,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 2,
+ "links": [],
+ "nullPointMode": "null",
+ "options": {
+ "dataLinks": []
+ },
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "calculatedInterval": "2m",
+ "datasourceErrors": {},
+ "errors": {},
+ "expr": "sum(rate(mysql_global_status_table_locks_immediate{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval]))",
+ "format": "time_series",
+ "interval": "1m",
+ "intervalFactor": 1,
+ "legendFormat": "Table Locks Immediate",
+ "metric": "",
+ "refId": "A",
+ "step": 20
+ },
+ {
+ "calculatedInterval": "2m",
+ "datasourceErrors": {},
+ "errors": {},
+ "expr": "sum(rate(mysql_global_status_table_locks_waited{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval]))",
+ "format": "time_series",
+ "interval": "1m",
+ "intervalFactor": 1,
+ "legendFormat": "Table Locks Waited",
+ "metric": "",
+ "refId": "B",
+ "step": 20
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "MySQL Table Locks",
+ "tooltip": {
+ "msResolution": false,
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "short",
+ "logBase": 1,
+ "max": null,
+ "min": 0,
+ "show": true
+ },
+ {
+ "format": "short",
+ "logBase": 1,
+ "max": null,
+ "min": 0,
+ "show": true
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "collapsed": false,
+ "datasource": "$datasource",
+ "gridPos": {
+ "h": 1,
+ "w": 24,
+ "x": 0,
+ "y": 44
+ },
+ "id": 388,
+ "panels": [],
+ "repeat": null,
+ "title": "Network",
+ "type": "row"
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "$datasource",
+ "decimals": 2,
+ "description": "**MySQL Network Traffic**\n\nHere we can see how much network traffic is generated by MySQL. Outbound is network traffic sent from MySQL and Inbound is network traffic MySQL has received.",
+ "editable": true,
+ "error": false,
+ "fieldConfig": {
+ "defaults": {
+ "unit": "short",
+ "custom": {}
+ },
+ "overrides": []
+ },
+ "fill": 6,
+ "fillGradient": 0,
+ "grid": {},
+ "gridPos": {
+ "h": 7,
+ "w": 24,
+ "x": 0,
+ "y": 45
+ },
+ "hiddenSeries": false,
+ "id": 9,
+ "legend": {
+ "alignAsTable": true,
+ "avg": true,
+ "current": false,
+ "max": true,
+ "min": true,
+ "rightSide": false,
+ "show": true,
+ "sort": "avg",
+ "sortDesc": true,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 2,
+ "links": [],
+ "nullPointMode": "null",
+ "options": {
+ "dataLinks": []
+ },
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "stack": true,
+ "steppedLine": false,
+ "targets": [
+ {
+ "calculatedInterval": "2m",
+ "datasourceErrors": {},
+ "errors": {},
+ "expr": "sum(rate(mysql_global_status_bytes_received{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval]))",
+ "format": "time_series",
+ "interval": "1m",
+ "intervalFactor": 1,
+ "legendFormat": "Inbound",
+ "metric": "",
+ "refId": "A",
+ "step": 20
+ },
+ {
+ "calculatedInterval": "2m",
+ "datasourceErrors": {},
+ "errors": {},
+ "expr": "sum(rate(mysql_global_status_bytes_sent{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval]))",
+ "format": "time_series",
+ "interval": "1m",
+ "intervalFactor": 1,
+ "legendFormat": "Outbound",
+ "metric": "",
+ "refId": "B",
+ "step": 20
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "MySQL Network Traffic",
+ "tooltip": {
+ "msResolution": false,
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "Bps",
+ "logBase": 1,
+ "max": null,
+ "min": 0,
+ "show": true
+ },
+ {
+ "format": "none",
+ "logBase": 1,
+ "max": null,
+ "min": 0,
+ "show": true
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "collapsed": false,
+ "datasource": "$datasource",
+ "gridPos": {
+ "h": 1,
+ "w": 24,
+ "x": 0,
+ "y": 52
+ },
+ "id": 389,
+ "panels": [],
+ "repeat": null,
+ "title": "Memory",
+ "type": "row"
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "$datasource",
+ "decimals": 0,
+ "description": "***System Memory***: Total Memory for the system.\\\n***InnoDB Buffer Pool Data***: InnoDB maintains a storage area called the buffer pool for caching data and indexes in memory.\\\n***TokuDB Cache Size***: Similar in function to the InnoDB Buffer Pool, TokuDB will allocate 50% of the installed RAM for its own cache.\\\n***Key Buffer Size***: Index blocks for MYISAM tables are buffered and are shared by all threads. key_buffer_size is the size of the buffer used for index blocks.\\\n***Adaptive Hash Index Size***: When InnoDB notices that some index values are being accessed very frequently, it builds a hash index for them in memory on top of B-Tree indexes.\\\n ***Query Cache Size***: The query cache stores the text of a SELECT statement together with the corresponding result that was sent to the client. The query cache has huge scalability problems in that only one thread can do an operation in the query cache at the same time.\\\n***InnoDB Dictionary Size***: The data dictionary is InnoDB ‘s internal catalog of tables. InnoDB stores the data dictionary on disk, and loads entries into memory while the server is running.\\\n***InnoDB Log Buffer Size***: The MySQL InnoDB log buffer allows transactions to run without having to write the log to disk before the transactions commit.",
+ "editable": true,
+ "error": false,
+ "fieldConfig": {
+ "defaults": {
+ "unit": "short",
+ "custom": {}
+ },
+ "overrides": []
+ },
+ "fill": 6,
+ "fillGradient": 0,
+ "grid": {},
+ "gridPos": {
+ "h": 7,
+ "w": 24,
+ "x": 0,
+ "y": 53
+ },
+ "hiddenSeries": false,
+ "id": 50,
+ "legend": {
+ "alignAsTable": true,
+ "avg": true,
+ "current": false,
+ "hideEmpty": true,
+ "hideZero": true,
+ "max": true,
+ "min": true,
+ "rightSide": true,
+ "show": true,
+ "sort": "avg",
+ "sortDesc": true,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 2,
+ "links": [
+ {
+ "title": "Detailed descriptions about metrics",
+ "url": "https://www.percona.com/doc/percona-monitoring-and-management/dashboard.mysql-overview.html#mysql-internal-memory-overview"
+ }
+ ],
+ "nullPointMode": "null",
+ "options": {
+ "dataLinks": []
+ },
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [
+ {
+ "alias": "System Memory",
+ "fill": 0,
+ "stack": false
+ }
+ ],
+ "spaceLength": 10,
+ "stack": true,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "sum(mysql_global_status_innodb_page_size{job=~\"$job\", instance=~\"$instance\"} * on (instance) mysql_global_status_buffer_pool_pages{job=~\"$job\", instance=~\"$instance\", state=\"data\"})",
+ "format": "time_series",
+ "hide": false,
+ "interval": "1m",
+ "intervalFactor": 1,
+ "legendFormat": "InnoDB Buffer Pool Data",
+ "refId": "A",
+ "step": 20
+ },
+ {
+ "expr": "sum(mysql_global_variables_innodb_log_buffer_size{job=~\"$job\", instance=~\"$instance\"})",
+ "format": "time_series",
+ "interval": "1m",
+ "intervalFactor": 1,
+ "legendFormat": "InnoDB Log Buffer Size",
+ "refId": "D",
+ "step": 20
+ },
+ {
+ "expr": "sum(mysql_global_variables_innodb_additional_mem_pool_size{job=~\"$job\", instance=~\"$instance\"})",
+ "format": "time_series",
+ "interval": "1m",
+ "intervalFactor": 2,
+ "legendFormat": "InnoDB Additional Memory Pool Size",
+ "refId": "H",
+ "step": 40
+ },
+ {
+ "expr": "sum(mysql_global_status_innodb_mem_dictionary{job=~\"$job\", instance=~\"$instance\"})",
+ "format": "time_series",
+ "interval": "1m",
+ "intervalFactor": 1,
+ "legendFormat": "InnoDB Dictionary Size",
+ "refId": "F",
+ "step": 20
+ },
+ {
+ "expr": "sum(mysql_global_variables_key_buffer_size{job=~\"$job\", instance=~\"$instance\"})",
+ "format": "time_series",
+ "interval": "1m",
+ "intervalFactor": 1,
+ "legendFormat": "Key Buffer Size",
+ "refId": "B",
+ "step": 20
+ },
+ {
+ "expr": "sum(mysql_global_variables_query_cache_size{job=~\"$job\", instance=~\"$instance\"})",
+ "format": "time_series",
+ "interval": "1m",
+ "intervalFactor": 1,
+ "legendFormat": "Query Cache Size",
+ "refId": "C",
+ "step": 20
+ },
+ {
+ "expr": "sum(mysql_global_status_innodb_mem_adaptive_hash{job=~\"$job\", instance=~\"$instance\"})",
+ "format": "time_series",
+ "interval": "1m",
+ "intervalFactor": 1,
+ "legendFormat": "Adaptive Hash Index Size",
+ "refId": "E",
+ "step": 20
+ },
+ {
+ "expr": "sum(mysql_global_variables_tokudb_cache_size{job=~\"$job\", instance=~\"$instance\"})",
+ "format": "time_series",
+ "interval": "1m",
+ "intervalFactor": 1,
+ "legendFormat": "TokuDB Cache Size",
+ "refId": "I",
+ "step": 20
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "MySQL Internal Memory Overview",
+ "tooltip": {
+ "msResolution": false,
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "bytes",
+ "label": "",
+ "logBase": 1,
+ "max": null,
+ "min": 0,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "collapsed": false,
+ "datasource": "$datasource",
+ "gridPos": {
+ "h": 1,
+ "w": 24,
+ "x": 0,
+ "y": 60
+ },
+ "id": 390,
+ "panels": [],
+ "repeat": null,
+ "title": "Command, Handlers, Processes",
+ "type": "row"
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "$datasource",
+ "decimals": 2,
+ "description": "**Top Command Counters**\n\nThe Com_{{xxx}} statement counter variables indicate the number of times each xxx statement has been executed. There is one status variable for each type of statement. For example, Com_delete and Com_update count [``DELETE``](https://dev.mysql.com/doc/refman/5.7/en/delete.html) and [``UPDATE``](https://dev.mysql.com/doc/refman/5.7/en/update.html) statements, respectively. Com_delete_multi and Com_update_multi are similar but apply to [``DELETE``](https://dev.mysql.com/doc/refman/5.7/en/delete.html) and [``UPDATE``](https://dev.mysql.com/doc/refman/5.7/en/update.html) statements that use multiple-table syntax.",
+ "editable": true,
+ "error": false,
+ "fieldConfig": {
+ "defaults": {
+ "unit": "short",
+ "custom": {}
+ },
+ "overrides": []
+ },
+ "fill": 2,
+ "fillGradient": 0,
+ "grid": {},
+ "gridPos": {
+ "h": 7,
+ "w": 24,
+ "x": 0,
+ "y": 61
+ },
+ "hiddenSeries": false,
+ "id": 14,
+ "legend": {
+ "alignAsTable": true,
+ "avg": true,
+ "current": false,
+ "hideEmpty": false,
+ "hideZero": false,
+ "max": true,
+ "min": true,
+ "rightSide": true,
+ "show": true,
+ "sort": "avg",
+ "sortDesc": true,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 2,
+ "links": [
+ {
+ "title": "Server Status Variables (Com_xxx)",
+ "url": "https://dev.mysql.com/doc/refman/5.7/en/server-status-variables.html#statvar_Com_xxx"
+ }
+ ],
+ "nullPointMode": "null as zero",
+ "options": {
+ "dataLinks": []
+ },
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "calculatedInterval": "2m",
+ "datasourceErrors": {},
+ "errors": {},
+ "expr": "topk(5, rate(mysql_global_status_commands_total{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval])>0)",
+ "format": "time_series",
+ "hide": false,
+ "interval": "1m",
+ "intervalFactor": 1,
+ "legendFormat": "Com_{{ command }}",
+ "metric": "",
+ "refId": "B",
+ "step": 20
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "Top Command Counters",
+ "tooltip": {
+ "msResolution": false,
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "short",
+ "logBase": 1,
+ "max": null,
+ "min": 0,
+ "show": true
+ },
+ {
+ "format": "short",
+ "logBase": 1,
+ "max": null,
+ "min": 0,
+ "show": true
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "$datasource",
+ "decimals": 2,
+ "description": "**MySQL Handlers**\n\nHandler statistics are internal statistics on how MySQL is selecting, updating, inserting, and modifying rows, tables, and indexes.\n\nThis is in fact the layer between the Storage Engine and MySQL.\n\n* `read_rnd_next` is incremented when the server performs a full table scan and this is a counter you don't really want to see with a high value.\n* `read_key` is incremented when a read is done with an index.\n* `read_next` is incremented when the storage engine is asked to 'read the next index entry'. A high value means a lot of index scans are being done.",
+ "editable": true,
+ "error": false,
+ "fieldConfig": {
+ "defaults": {
+ "unit": "short",
+ "custom": {}
+ },
+ "overrides": []
+ },
+ "fill": 2,
+ "fillGradient": 0,
+ "grid": {},
+ "gridPos": {
+ "h": 7,
+ "w": 24,
+ "x": 0,
+ "y": 68
+ },
+ "hiddenSeries": false,
+ "id": 8,
+ "legend": {
+ "alignAsTable": true,
+ "avg": true,
+ "current": false,
+ "hideZero": true,
+ "max": true,
+ "min": true,
+ "rightSide": true,
+ "show": true,
+ "sort": "avg",
+ "sortDesc": true,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 2,
+ "links": [],
+ "nullPointMode": "null",
+ "options": {
+ "dataLinks": []
+ },
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "calculatedInterval": "2m",
+ "datasourceErrors": {},
+ "errors": {},
+ "expr": "rate(mysql_global_status_handlers_total{job=~\"$job\",instance=~\"$instance\", handler!~\"commit|rollback|savepoint.*|prepare\"}[$__rate_interval])",
+ "format": "time_series",
+ "interval": "1m",
+ "intervalFactor": 1,
+ "legendFormat": "{{ handler }}",
+ "metric": "",
+ "refId": "J",
+ "step": 20
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "MySQL Handlers",
+ "tooltip": {
+ "msResolution": false,
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "short",
+ "logBase": 1,
+ "max": null,
+ "min": 0,
+ "show": true
+ },
+ {
+ "format": "short",
+ "logBase": 1,
+ "max": null,
+ "min": 0,
+ "show": true
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "$datasource",
+ "decimals": 2,
+ "editable": true,
+ "error": false,
+ "fieldConfig": {
+ "defaults": {
+ "unit": "short",
+ "custom": {}
+ },
+ "overrides": []
+ },
+ "fill": 2,
+ "fillGradient": 0,
+ "grid": {},
+ "gridPos": {
+ "h": 7,
+ "w": 24,
+ "x": 0,
+ "y": 75
+ },
+ "hiddenSeries": false,
+ "id": 28,
+ "legend": {
+ "alignAsTable": true,
+ "avg": true,
+ "current": false,
+ "hideZero": true,
+ "max": true,
+ "min": true,
+ "rightSide": true,
+ "show": true,
+ "sort": "avg",
+ "sortDesc": true,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 2,
+ "links": [],
+ "nullPointMode": "null",
+ "options": {
+ "dataLinks": []
+ },
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "calculatedInterval": "2m",
+ "datasourceErrors": {},
+ "errors": {},
+ "expr": "rate(mysql_global_status_handlers_total{job=~\"$job\",instance=~\"$instance\", handler=~\"commit|rollback|savepoint.*|prepare\"}[$__rate_interval])",
+ "interval": "1m",
+ "intervalFactor": 1,
+ "legendFormat": "{{ handler }}",
+ "metric": "",
+ "refId": "A",
+ "step": 20
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "MySQL Transaction Handlers",
+ "description": "MySQL Transaction Handlers",
+ "tooltip": {
+ "msResolution": false,
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "short",
+ "logBase": 1,
+ "max": null,
+ "min": 0,
+ "show": true
+ },
+ {
+ "format": "short",
+ "logBase": 1,
+ "max": null,
+ "min": 0,
+ "show": true
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "$datasource",
+ "decimals": 2,
+ "editable": true,
+ "error": false,
+ "fieldConfig": {
+ "defaults": {
+ "unit": "short",
+ "custom": {}
+ },
+ "overrides": []
+ },
+ "fill": 0,
+ "fillGradient": 0,
+ "grid": {},
+ "gridPos": {
+ "h": 7,
+ "w": 24,
+ "x": 0,
+ "y": 82
+ },
+ "hiddenSeries": false,
+ "id": 40,
+ "legend": {
+ "alignAsTable": true,
+ "avg": true,
+ "current": false,
+ "hideZero": true,
+ "max": true,
+ "min": false,
+ "rightSide": true,
+ "show": true,
+ "sort": "avg",
+ "sortDesc": true,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 2,
+ "links": [],
+ "nullPointMode": "null as zero",
+ "options": {
+ "dataLinks": []
+ },
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "calculatedInterval": "2m",
+ "datasourceErrors": {},
+ "errors": {},
+ "expr": "mysql_info_schema_threads{job=~\"$job\", instance=~\"$instance\"}",
+ "format": "time_series",
+ "interval": "1m",
+ "intervalFactor": 1,
+ "legendFormat": "{{ state }}",
+ "metric": "",
+ "refId": "A",
+ "step": 20
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "Process States",
+ "description": "Process States",
+ "tooltip": {
+ "msResolution": false,
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "short",
+ "logBase": 1,
+ "max": null,
+ "min": 0,
+ "show": true
+ },
+ {
+ "format": "short",
+ "logBase": 1,
+ "max": null,
+ "min": 0,
+ "show": true
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "aliasColors": {},
+ "bars": true,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "$datasource",
+ "decimals": 2,
+ "editable": true,
+ "error": false,
+ "fieldConfig": {
+ "defaults": {
+ "unit": "short",
+ "custom": {}
+ },
+ "overrides": []
+ },
+ "fill": 6,
+ "fillGradient": 0,
+ "grid": {},
+ "gridPos": {
+ "h": 7,
+ "w": 24,
+ "x": 0,
+ "y": 89
+ },
+ "hiddenSeries": false,
+ "id": 49,
+ "legend": {
+ "alignAsTable": true,
+ "avg": true,
+ "current": false,
+ "hideZero": true,
+ "max": true,
+ "min": false,
+ "rightSide": true,
+ "show": true,
+ "sort": "avg",
+ "sortDesc": true,
+ "total": false,
+ "values": true
+ },
+ "lines": false,
+ "linewidth": 2,
+ "links": [],
+ "nullPointMode": "null",
+ "options": {
+ "dataLinks": []
+ },
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "stack": true,
+ "steppedLine": false,
+ "targets": [
+ {
+ "calculatedInterval": "2m",
+ "datasourceErrors": {},
+ "errors": {},
+ "expr": "topk(5, avg_over_time(mysql_info_schema_threads{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval]))",
+ "interval": "1m",
+ "intervalFactor": 1,
+ "legendFormat": "{{ state }}",
+ "metric": "",
+ "refId": "A",
+ "step": 3600
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": "24h",
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "Top Process States Hourly",
+ "description": "Top Process States Hourly",
+ "tooltip": {
+ "msResolution": false,
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "short",
+ "logBase": 1,
+ "max": null,
+ "min": 0,
+ "show": true
+ },
+ {
+ "format": "short",
+ "logBase": 1,
+ "max": null,
+ "min": 0,
+ "show": true
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "collapsed": false,
+ "datasource": "$datasource",
+ "gridPos": {
+ "h": 1,
+ "w": 24,
+ "x": 0,
+ "y": 96
+ },
+ "id": 391,
+ "panels": [],
+ "repeat": null,
+ "title": "Query Cache",
+ "type": "row"
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "$datasource",
+ "decimals": 2,
+ "description": "**MySQL Query Cache Memory**\n\nThe query cache has huge scalability problems in that only one thread can do an operation in the query cache at the same time. This serialization is true not only for SELECTs, but also for INSERT/UPDATE/DELETE.\n\nThis also means that the larger the `query_cache_size` is set to, the slower those operations become. In concurrent environments, the MySQL Query Cache quickly becomes a contention point, decreasing performance. MariaDB and AWS Aurora have done work to try and eliminate the query cache contention in their flavors of MySQL, while MySQL 8.0 has eliminated the query cache feature.\n\nThe recommended settings for most environments is to set:\n ``query_cache_type=0``\n ``query_cache_size=0``\n\nNote that while you can dynamically change these values, to completely remove the contention point you have to restart the database.",
+ "editable": true,
+ "error": false,
+ "fieldConfig": {
+ "defaults": {
+ "unit": "short",
+ "custom": {}
+ },
+ "overrides": []
+ },
+ "fill": 2,
+ "fillGradient": 0,
+ "grid": {},
+ "gridPos": {
+ "h": 7,
+ "w": 12,
+ "x": 0,
+ "y": 97
+ },
+ "hiddenSeries": false,
+ "id": 46,
+ "legend": {
+ "alignAsTable": true,
+ "avg": true,
+ "current": false,
+ "max": true,
+ "min": true,
+ "rightSide": false,
+ "show": true,
+ "sort": "avg",
+ "sortDesc": true,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 2,
+ "links": [],
+ "nullPointMode": "null",
+ "options": {
+ "dataLinks": []
+ },
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "calculatedInterval": "2m",
+ "datasourceErrors": {},
+ "errors": {},
+ "expr": "mysql_global_status_qcache_free_memory{job=~\"$job\", instance=~\"$instance\"}",
+ "format": "time_series",
+ "interval": "1m",
+ "intervalFactor": 1,
+ "legendFormat": "Free Memory",
+ "metric": "",
+ "refId": "F",
+ "step": 20
+ },
+ {
+ "calculatedInterval": "2m",
+ "datasourceErrors": {},
+ "errors": {},
+ "expr": "mysql_global_variables_query_cache_size{job=~\"$job\", instance=~\"$instance\"}",
+ "format": "time_series",
+ "interval": "1m",
+ "intervalFactor": 1,
+ "legendFormat": "Query Cache Size",
+ "metric": "",
+ "refId": "E",
+ "step": 20
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "MySQL Query Cache Memory",
+ "tooltip": {
+ "msResolution": false,
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "bytes",
+ "logBase": 1,
+ "max": null,
+ "min": 0,
+ "show": true
+ },
+ {
+ "format": "short",
+ "logBase": 1,
+ "max": null,
+ "min": 0,
+ "show": true
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "$datasource",
+ "decimals": 2,
+ "description": "**MySQL Query Cache Activity**\n\nThe query cache has huge scalability problems in that only one thread can do an operation in the query cache at the same time. This serialization is true not only for SELECTs, but also for INSERT/UPDATE/DELETE.\n\nThis also means that the larger the `query_cache_size` is set to, the slower those operations become. In concurrent environments, the MySQL Query Cache quickly becomes a contention point, decreasing performance. MariaDB and AWS Aurora have done work to try and eliminate the query cache contention in their flavors of MySQL, while MySQL 8.0 has eliminated the query cache feature.\n\nThe recommended settings for most environments is to set:\n``query_cache_type=0``\n``query_cache_size=0``\n\nNote that while you can dynamically change these values, to completely remove the contention point you have to restart the database.",
+ "editable": true,
+ "error": false,
+ "fieldConfig": {
+ "defaults": {
+ "unit": "short",
+ "custom": {}
+ },
+ "overrides": []
+ },
+ "fill": 2,
+ "fillGradient": 0,
+ "grid": {},
+ "gridPos": {
+ "h": 7,
+ "w": 12,
+ "x": 12,
+ "y": 97
+ },
+ "height": "",
+ "hiddenSeries": false,
+ "id": 45,
+ "legend": {
+ "alignAsTable": true,
+ "avg": true,
+ "current": false,
+ "max": true,
+ "min": true,
+ "rightSide": false,
+ "show": true,
+ "sort": "avg",
+ "sortDesc": true,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 2,
+ "links": [],
+ "nullPointMode": "null",
+ "options": {
+ "dataLinks": []
+ },
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "calculatedInterval": "2m",
+ "datasourceErrors": {},
+ "errors": {},
+ "expr": "rate(mysql_global_status_qcache_hits{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval])",
+ "format": "time_series",
+ "interval": "1m",
+ "intervalFactor": 1,
+ "legendFormat": "Hits",
+ "metric": "",
+ "refId": "B",
+ "step": 20
+ },
+ {
+ "calculatedInterval": "2m",
+ "datasourceErrors": {},
+ "errors": {},
+ "expr": "rate(mysql_global_status_qcache_inserts{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval])",
+ "format": "time_series",
+ "interval": "1m",
+ "intervalFactor": 1,
+ "legendFormat": "Inserts",
+ "metric": "",
+ "refId": "C",
+ "step": 20
+ },
+ {
+ "calculatedInterval": "2m",
+ "datasourceErrors": {},
+ "errors": {},
+ "expr": "rate(mysql_global_status_qcache_not_cached{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval])",
+ "format": "time_series",
+ "interval": "1m",
+ "intervalFactor": 1,
+ "legendFormat": "Not Cached",
+ "metric": "",
+ "refId": "D",
+ "step": 20
+ },
+ {
+ "calculatedInterval": "2m",
+ "datasourceErrors": {},
+ "errors": {},
+ "expr": "rate(mysql_global_status_qcache_lowmem_prunes{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval])",
+ "format": "time_series",
+ "interval": "1m",
+ "intervalFactor": 1,
+ "legendFormat": "Prunes",
+ "metric": "",
+ "refId": "F",
+ "step": 20
+ },
+ {
+ "calculatedInterval": "2m",
+ "datasourceErrors": {},
+ "errors": {},
+ "expr": "mysql_global_status_qcache_queries_in_cache{job=~\"$job\", instance=~\"$instance\"}",
+ "format": "time_series",
+ "interval": "1m",
+ "intervalFactor": 1,
+ "legendFormat": "Queries in Cache",
+ "metric": "",
+ "refId": "E",
+ "step": 20
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "MySQL Query Cache Activity",
+ "tooltip": {
+ "msResolution": false,
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "short",
+ "logBase": 1,
+ "max": null,
+ "min": 0,
+ "show": true
+ },
+ {
+ "format": "short",
+ "logBase": 1,
+ "max": null,
+ "min": 0,
+ "show": true
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "collapsed": false,
+ "datasource": "$datasource",
+ "gridPos": {
+ "h": 1,
+ "w": 24,
+ "x": 0,
+ "y": 104
+ },
+ "id": 392,
+ "panels": [],
+ "repeat": null,
+ "title": "Files and Tables",
+ "type": "row"
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "$datasource",
+ "decimals": 2,
+ "editable": true,
+ "error": false,
+ "fieldConfig": {
+ "defaults": {
+ "unit": "short",
+ "custom": {}
+ },
+ "overrides": []
+ },
+ "fill": 2,
+ "fillGradient": 0,
+ "grid": {},
+ "gridPos": {
+ "h": 7,
+ "w": 12,
+ "x": 0,
+ "y": 105
+ },
+ "hiddenSeries": false,
+ "id": 43,
+ "legend": {
+ "alignAsTable": true,
+ "avg": true,
+ "current": false,
+ "max": true,
+ "min": true,
+ "rightSide": false,
+ "show": true,
+ "sort": "avg",
+ "sortDesc": true,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 2,
+ "links": [],
+ "nullPointMode": "null",
+ "options": {
+ "dataLinks": []
+ },
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "calculatedInterval": "2m",
+ "datasourceErrors": {},
+ "errors": {},
+ "expr": "rate(mysql_global_status_opened_files{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval])",
+ "interval": "1m",
+ "intervalFactor": 1,
+ "legendFormat": "Openings",
+ "metric": "",
+ "refId": "A",
+ "step": 20
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "MySQL File Openings",
+ "description": "MySQL File Openings",
+ "tooltip": {
+ "msResolution": false,
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "short",
+ "logBase": 1,
+ "max": null,
+ "min": 0,
+ "show": true
+ },
+ {
+ "format": "short",
+ "logBase": 1,
+ "max": null,
+ "min": 0,
+ "show": true
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "$datasource",
+ "decimals": 2,
+ "editable": true,
+ "error": false,
+ "fieldConfig": {
+ "defaults": {
+ "unit": "short",
+ "custom": {}
+ },
+ "overrides": []
+ },
+ "fill": 2,
+ "fillGradient": 0,
+ "grid": {},
+ "gridPos": {
+ "h": 7,
+ "w": 12,
+ "x": 12,
+ "y": 105
+ },
+ "hiddenSeries": false,
+ "id": 41,
+ "legend": {
+ "alignAsTable": true,
+ "avg": true,
+ "current": false,
+ "max": true,
+ "min": true,
+ "rightSide": false,
+ "show": true,
+ "sort": "avg",
+ "sortDesc": true,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 2,
+ "links": [],
+ "nullPointMode": "null",
+ "options": {
+ "dataLinks": []
+ },
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "calculatedInterval": "2m",
+ "datasourceErrors": {},
+ "errors": {},
+ "expr": "mysql_global_status_open_files{job=~\"$job\", instance=~\"$instance\"}",
+ "interval": "1m",
+ "intervalFactor": 1,
+ "legendFormat": "Open Files",
+ "metric": "",
+ "refId": "A",
+ "step": 20
+ },
+ {
+ "calculatedInterval": "2m",
+ "datasourceErrors": {},
+ "errors": {},
+ "expr": "mysql_global_variables_open_files_limit{job=~\"$job\", instance=~\"$instance\"}",
+ "interval": "1m",
+ "intervalFactor": 1,
+ "legendFormat": "Open Files Limit",
+ "metric": "",
+ "refId": "D",
+ "step": 20
+ },
+ {
+ "expr": "mysql_global_status_innodb_num_open_files{job=~\"$job\", instance=~\"$instance\"}",
+ "interval": "1m",
+ "intervalFactor": 1,
+ "legendFormat": "InnoDB Open Files",
+ "refId": "B",
+ "step": 20
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "MySQL Open Files",
+ "description": "MySQL Open Files",
+ "tooltip": {
+ "msResolution": false,
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "short",
+ "logBase": 1,
+ "max": null,
+ "min": 0,
+ "show": true
+ },
+ {
+ "format": "short",
+ "logBase": 1,
+ "max": null,
+ "min": 0,
+ "show": true
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "collapsed": false,
+ "datasource": "$datasource",
+ "gridPos": {
+ "h": 1,
+ "w": 24,
+ "x": 0,
+ "y": 112
+ },
+ "id": 393,
+ "panels": [],
+ "repeat": null,
+ "title": "Table Openings",
+ "type": "row"
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "$datasource",
+ "decimals": 2,
+ "description": "**MySQL Table Open Cache Status**\n\nThe recommendation is to set the `table_open_cache_instances` to a loose correlation to virtual CPUs, keeping in mind that more instances means the cache is split more times. If you have a cache set to 500 but it has 10 instances, each cache will only have 50 cached.\n\nThe `table_definition_cache` and `table_open_cache` can be left as default as they are auto-sized MySQL 5.6 and above (ie: do not set them to any value).",
+ "editable": true,
+ "error": false,
+ "fieldConfig": {
+ "defaults": {
+ "unit": "short",
+ "custom": {}
+ },
+ "overrides": []
+ },
+ "fill": 2,
+ "fillGradient": 0,
+ "grid": {},
+ "gridPos": {
+ "h": 7,
+ "w": 12,
+ "x": 0,
+ "y": 113
+ },
+ "hiddenSeries": false,
+ "id": 44,
+ "legend": {
+ "alignAsTable": true,
+ "avg": true,
+ "current": false,
+ "max": true,
+ "min": true,
+ "rightSide": false,
+ "show": true,
+ "sort": "avg",
+ "sortDesc": true,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 2,
+ "links": [
+ {
+ "title": "Server Status Variables (table_open_cache)",
+ "url": "http://dev.mysql.com/doc/refman/5.6/en/server-system-variables.html#sysvar_table_open_cache"
+ }
+ ],
+ "nullPointMode": "null",
+ "options": {
+ "dataLinks": []
+ },
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [
+ {
+ "alias": "Table Open Cache Hit Ratio",
+ "yaxis": 2
+ }
+ ],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "calculatedInterval": "2m",
+ "datasourceErrors": {},
+ "errors": {},
+ "expr": "rate(mysql_global_status_opened_tables{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval])",
+ "format": "time_series",
+ "interval": "1m",
+ "intervalFactor": 1,
+ "legendFormat": "Openings",
+ "metric": "",
+ "refId": "A",
+ "step": 20
+ },
+ {
+ "expr": "rate(mysql_global_status_table_open_cache_hits{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval])",
+ "format": "time_series",
+ "interval": "1m",
+ "intervalFactor": 1,
+ "legendFormat": "Hits",
+ "refId": "B",
+ "step": 20
+ },
+ {
+ "expr": "rate(mysql_global_status_table_open_cache_misses{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval])",
+ "format": "time_series",
+ "interval": "1m",
+ "intervalFactor": 1,
+ "legendFormat": "Misses",
+ "refId": "C",
+ "step": 20
+ },
+ {
+ "expr": "rate(mysql_global_status_table_open_cache_overflows{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval])",
+ "format": "time_series",
+ "interval": "1m",
+ "intervalFactor": 1,
+ "legendFormat": "Misses due to Overflows",
+ "refId": "D",
+ "step": 20
+ },
+ {
+ "expr": "rate(mysql_global_status_table_open_cache_hits{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval])/(rate(mysql_global_status_table_open_cache_hits{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval])+rate(mysql_global_status_table_open_cache_misses{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval]))",
+ "format": "time_series",
+ "interval": "1m",
+ "intervalFactor": 1,
+ "legendFormat": "Table Open Cache Hit Ratio",
+ "refId": "E",
+ "step": 20
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "MySQL Table Open Cache Status",
+ "tooltip": {
+ "msResolution": false,
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "short",
+ "logBase": 1,
+ "max": null,
+ "min": 0,
+ "show": true
+ },
+ {
+ "format": "percentunit",
+ "logBase": 1,
+ "max": null,
+ "min": 0,
+ "show": true
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "$datasource",
+ "decimals": 2,
+ "description": "**MySQL Open Tables**\n\nThe recommendation is to set the `table_open_cache_instances` to a loose correlation to virtual CPUs, keeping in mind that more instances means the cache is split more times. If you have a cache set to 500 but it has 10 instances, each cache will only have 50 cached.\n\nThe `table_definition_cache` and `table_open_cache` can be left as default as they are auto-sized MySQL 5.6 and above (ie: do not set them to any value).",
+ "editable": true,
+ "error": false,
+ "fieldConfig": {
+ "defaults": {
+ "unit": "short",
+ "custom": {}
+ },
+ "overrides": []
+ },
+ "fill": 2,
+ "fillGradient": 0,
+ "grid": {},
+ "gridPos": {
+ "h": 7,
+ "w": 12,
+ "x": 12,
+ "y": 113
+ },
+ "hiddenSeries": false,
+ "id": 42,
+ "legend": {
+ "alignAsTable": true,
+ "avg": true,
+ "current": false,
+ "max": true,
+ "min": true,
+ "rightSide": false,
+ "show": true,
+ "sort": "avg",
+ "sortDesc": true,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 2,
+ "links": [
+ {
+ "title": "Server Status Variables (table_open_cache)",
+ "url": "http://dev.mysql.com/doc/refman/5.6/en/server-system-variables.html#sysvar_table_open_cache"
+ }
+ ],
+ "nullPointMode": "null",
+ "options": {
+ "dataLinks": []
+ },
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "calculatedInterval": "2m",
+ "datasourceErrors": {},
+ "errors": {},
+ "expr": "mysql_global_status_open_tables{job=~\"$job\", instance=~\"$instance\"}",
+ "format": "time_series",
+ "interval": "1m",
+ "intervalFactor": 1,
+ "legendFormat": "Open Tables",
+ "metric": "",
+ "refId": "B",
+ "step": 20
+ },
+ {
+ "calculatedInterval": "2m",
+ "datasourceErrors": {},
+ "errors": {},
+ "expr": "mysql_global_variables_table_open_cache{job=~\"$job\", instance=~\"$instance\"}",
+ "format": "time_series",
+ "interval": "1m",
+ "intervalFactor": 1,
+ "legendFormat": "Table Open Cache",
+ "metric": "",
+ "refId": "C",
+ "step": 20
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "MySQL Open Tables",
+ "tooltip": {
+ "msResolution": false,
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "short",
+ "logBase": 1,
+ "max": null,
+ "min": 0,
+ "show": true
+ },
+ {
+ "format": "short",
+ "logBase": 1,
+ "max": null,
+ "min": 0,
+ "show": true
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "collapsed": false,
+ "datasource": "$datasource",
+ "gridPos": {
+ "h": 1,
+ "w": 24,
+ "x": 0,
+ "y": 120
+ },
+ "id": 394,
+ "panels": [],
+ "repeat": null,
+ "title": "MySQL Table Definition Cache",
+ "type": "row"
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "$datasource",
+ "decimals": 2,
+ "description": "**MySQL Table Definition Cache**\n\nThe recommendation is to set the `table_open_cache_instances` to a loose correlation to virtual CPUs, keeping in mind that more instances means the cache is split more times. If you have a cache set to 500 but it has 10 instances, each cache will only have 50 cached.\n\nThe `table_definition_cache` and `table_open_cache` can be left as default as they are auto-sized MySQL 5.6 and above (ie: do not set them to any value).",
+ "editable": true,
+ "error": false,
+ "fieldConfig": {
+ "defaults": {
+ "unit": "short",
+ "custom": {}
+ },
+ "overrides": []
+ },
+ "fill": 2,
+ "fillGradient": 0,
+ "grid": {},
+ "gridPos": {
+ "h": 7,
+ "w": 24,
+ "x": 0,
+ "y": 121
+ },
+ "hiddenSeries": false,
+ "id": 54,
+ "legend": {
+ "alignAsTable": true,
+ "avg": true,
+ "current": false,
+ "max": true,
+ "min": true,
+ "rightSide": false,
+ "show": true,
+ "sort": "avg",
+ "sortDesc": true,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 2,
+ "links": [
+ {
+ "title": "Server Status Variables (table_open_cache)",
+ "url": "http://dev.mysql.com/doc/refman/5.6/en/server-system-variables.html#sysvar_table_open_cache"
+ }
+ ],
+ "nullPointMode": "null",
+ "options": {
+ "dataLinks": []
+ },
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [
+ {
+ "alias": "Opened Table Definitions",
+ "yaxis": 2
+ }
+ ],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "calculatedInterval": "2m",
+ "datasourceErrors": {},
+ "errors": {},
+ "expr": "mysql_global_status_open_table_definitions{job=~\"$job\", instance=~\"$instance\"}",
+ "format": "time_series",
+ "interval": "1m",
+ "intervalFactor": 1,
+ "legendFormat": "Open Table Definitions",
+ "metric": "",
+ "refId": "B",
+ "step": 20
+ },
+ {
+ "calculatedInterval": "2m",
+ "datasourceErrors": {},
+ "errors": {},
+ "expr": "mysql_global_variables_table_definition_cache{job=~\"$job\", instance=~\"$instance\"}",
+ "format": "time_series",
+ "interval": "1m",
+ "intervalFactor": 1,
+ "legendFormat": "Table Definitions Cache Size",
+ "metric": "",
+ "refId": "C",
+ "step": 20
+ },
+ {
+ "expr": "rate(mysql_global_status_opened_table_definitions{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval])",
+ "format": "time_series",
+ "interval": "1m",
+ "intervalFactor": 1,
+ "legendFormat": "Opened Table Definitions",
+ "refId": "A",
+ "step": 20
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "MySQL Table Definition Cache",
+ "tooltip": {
+ "msResolution": false,
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "short",
+ "logBase": 1,
+ "max": null,
+ "min": 0,
+ "show": true
+ },
+ {
+ "format": "short",
+ "logBase": 1,
+ "max": null,
+ "min": 0,
+ "show": true
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ }
+ ],
+ "refresh": "10s",
+ "schemaVersion": 25,
+ "style": "dark",
+ "tags": [],
+ "templating": {
+ "list": [
+ {
+ "current": {
+ "selected": false,
+ "text": "prometheus",
+ "value": "prometheus"
+ },
+ "hide": 0,
+ "includeAll": false,
+ "label": "Data Source",
+ "multi": false,
+ "name": "datasource",
+ "options": [],
+ "query": "prometheus",
+ "refresh": 1,
+ "regex": "",
+ "skipUrlSync": false,
+ "type": "datasource"
+ },
+ {
+ "allValue": ".+",
+ "current": {
+ "selected": true,
+ "text": "hosted-grafana/cloudsql-proxy-mysql-exporter",
+ "value": [
+ "hosted-grafana/cloudsql-proxy-mysql-exporter"
+ ]
+ },
+ "datasource": "$datasource",
+ "definition": "label_values(mysql_up, job)",
+ "hide": 0,
+ "includeAll": true,
+ "label": "job",
+ "multi": true,
+ "name": "job",
+ "options": [],
+ "query": "label_values(mysql_up, job)",
+ "refresh": 1,
+ "regex": "",
+ "skipUrlSync": false,
+ "sort": 0,
+ "tagValuesQuery": "",
+ "tags": [],
+ "tagsQuery": "",
+ "type": "query",
+ "useTags": false
+ },
+ {
+ "allValue": ".+",
+ "current": {
+ "selected": true,
+ "tags": [],
+ "text": "All",
+ "value": [
+ "$__all"
+ ]
+ },
+ "datasource": "$datasource",
+ "definition": "label_values(mysql_up, instance)",
+ "hide": 0,
+ "includeAll": true,
+ "label": "instance",
+ "multi": true,
+ "name": "instance",
+ "options": [],
+ "query": "label_values(mysql_up, instance)",
+ "refresh": 1,
+ "regex": "",
+ "skipUrlSync": false,
+ "sort": 0,
+ "tagValuesQuery": "",
+ "tags": [],
+ "tagsQuery": "",
+ "type": "query",
+ "useTags": false
+ }
+ ]
+ },
+ "time": {
+ "from": "now-1h",
+ "to": "now"
+ },
+ "timepicker": {
+ "collapse": false,
+ "enable": true,
+ "hidden": false,
+ "notice": false,
+ "now": true,
+ "refresh_intervals": [
+ "10s",
+ "30s",
+ "1m",
+ "5m",
+ "15m",
+ "30m",
+ "1h",
+ "2h",
+ "1d"
+ ],
+ "status": "Stable",
+ "time_options": [
+ "5m",
+ "15m",
+ "1h",
+ "6h",
+ "12h",
+ "24h",
+ "2d",
+ "7d",
+ "30d"
+ ],
+ "type": "timepicker"
+ },
+ "timezone": "",
+ "title": "MySQL",
+ "uid": "549c2bf8936f7767ea6ac47c47b00f2a",
+ "version": 1
+}
diff --git a/atmosphere/jsonnet/vendor/github.com/prometheus/mysqld_exporter/mysqld-mixin/mixin.libsonnet b/atmosphere/jsonnet/vendor/github.com/prometheus/mysqld_exporter/mysqld-mixin/mixin.libsonnet
new file mode 100644
index 0000000..9515cde
--- /dev/null
+++ b/atmosphere/jsonnet/vendor/github.com/prometheus/mysqld_exporter/mysqld-mixin/mixin.libsonnet
@@ -0,0 +1,17 @@
+{
+ grafanaDashboards: {
+ 'mysql-overview.json': (import 'dashboards/mysql-overview.json'),
+ },
+
+ // Helper function to ensure that we don't override other rules, by forcing
+ // the patching of the groups list, and not the overall rules object.
+ local importRules(rules) = {
+ groups+: std.native('parseYaml')(rules)[0].groups,
+ },
+
+ prometheusRules+: importRules(importstr 'rules/rules.yaml'),
+
+ prometheusAlerts+:
+ importRules(importstr 'alerts/general.yaml') +
+ importRules(importstr 'alerts/galera.yaml'),
+}
diff --git a/atmosphere/jsonnet/vendor/github.com/prometheus/mysqld_exporter/mysqld-mixin/rules/rules.yaml b/atmosphere/jsonnet/vendor/github.com/prometheus/mysqld_exporter/mysqld-mixin/rules/rules.yaml
new file mode 100644
index 0000000..4d27add
--- /dev/null
+++ b/atmosphere/jsonnet/vendor/github.com/prometheus/mysqld_exporter/mysqld-mixin/rules/rules.yaml
@@ -0,0 +1,15 @@
+groups:
+- name: mysqld_rules
+ rules:
+
+ # Record slave lag seconds for pre-computed timeseries that takes
+ # `mysql_slave_status_sql_delay` into account
+ - record: instance:mysql_slave_lag_seconds
+ expr: mysql_slave_status_seconds_behind_master - mysql_slave_status_sql_delay
+
+ # Record slave lag via heartbeat method
+ - record: instance:mysql_heartbeat_lag_seconds
+ expr: mysql_heartbeat_now_timestamp_seconds - mysql_heartbeat_stored_timestamp_seconds
+
+ - record: job:mysql_transactions:rate5m
+ expr: sum without (command) (rate(mysql_global_status_commands_total{command=~"(commit|rollback)"}[5m]))
diff --git a/atmosphere/jsonnet/vendor/grafana-builder b/atmosphere/jsonnet/vendor/grafana-builder
new file mode 120000
index 0000000..cfa90dd
--- /dev/null
+++ b/atmosphere/jsonnet/vendor/grafana-builder
@@ -0,0 +1 @@
+github.com/grafana/jsonnet-libs/grafana-builder
\ No newline at end of file
diff --git a/atmosphere/jsonnet/vendor/grafonnet b/atmosphere/jsonnet/vendor/grafonnet
new file mode 120000
index 0000000..fd2d163
--- /dev/null
+++ b/atmosphere/jsonnet/vendor/grafonnet
@@ -0,0 +1 @@
+github.com/grafana/grafonnet-lib/grafonnet
\ No newline at end of file
diff --git a/atmosphere/jsonnet/vendor/memcached-mixin b/atmosphere/jsonnet/vendor/memcached-mixin
new file mode 120000
index 0000000..3fdefc5
--- /dev/null
+++ b/atmosphere/jsonnet/vendor/memcached-mixin
@@ -0,0 +1 @@
+github.com/grafana/jsonnet-libs/memcached-mixin
\ No newline at end of file
diff --git a/atmosphere/jsonnet/vendor/mysqld-mixin b/atmosphere/jsonnet/vendor/mysqld-mixin
new file mode 120000
index 0000000..d5613a5
--- /dev/null
+++ b/atmosphere/jsonnet/vendor/mysqld-mixin
@@ -0,0 +1 @@
+github.com/prometheus/mysqld_exporter/mysqld-mixin
\ No newline at end of file
diff --git a/atmosphere/models/config.py b/atmosphere/models/config.py
index 715de97..2bbdfa7 100644
--- a/atmosphere/models/config.py
+++ b/atmosphere/models/config.py
@@ -82,6 +82,10 @@
overrides = types.DictType(types.BaseType(), default={})
+class KubePrometheusStackChartConfig(ChartConfig):
+ namespace = types.StringType(default="monitoring", required=True)
+
+
class MemcachedImagesConfig(base.Model):
memcached = types.StringType(default="docker.io/library/memcached:1.6.17")
exporter = types.StringType(default="quay.io/prometheus/memcached-exporter:v0.10.0")
@@ -98,6 +102,9 @@
class Config(base.Model):
+ kube_prometheus_stack = types.ModelType(
+ KubePrometheusStackChartConfig, default=KubePrometheusStackChartConfig()
+ )
ingress_nginx = types.ModelType(
IngressNginxChartConfig, default=IngressNginxChartConfig()
)
diff --git a/atmosphere/tasks/composite/openstack_helm.py b/atmosphere/tasks/composite/openstack_helm.py
index 9889caa..444656d 100644
--- a/atmosphere/tasks/composite/openstack_helm.py
+++ b/atmosphere/tasks/composite/openstack_helm.py
@@ -49,6 +49,35 @@
)
+def kube_prometheus_stack_tasks_from_config(
+ config: config.KubePrometheusStackChartConfig,
+):
+ if not config.enabled:
+ return []
+
+ values = mergedeep.merge(
+ {},
+ constants.HELM_RELEASE_KUBE_PROMETHEUS_STACK_VALUES,
+ config.overrides,
+ )
+
+ return [
+ flux.ApplyHelmRepositoryTask(
+ namespace=constants.NAMESPACE_MONITORING,
+ name=constants.HELM_REPOSITORY_PROMETHEUS_COMMUINTY,
+ url="https://prometheus-community.github.io/helm-charts",
+ ),
+ flux.ApplyHelmReleaseTask(
+ namespace=config.namespace,
+ name=constants.HELM_RELEASE_KUBE_PROMETHEUS_STACK_NAME,
+ repository=constants.HELM_REPOSITORY_PROMETHEUS_COMMUINTY,
+ chart=constants.HELM_RELEASE_KUBE_PROMETHEUS_STACK_NAME,
+ version=constants.HELM_RELEASE_KUBE_PROMETHEUS_STACK_VERSION,
+ values=values,
+ ),
+ ]
+
+
def ingress_nginx_tasks_from_config(config: config.IngressNginxChartConfig):
if not config.enabled:
return []
diff --git a/atmosphere/tasks/constants.py b/atmosphere/tasks/constants.py
index 3073379..621ffca 100644
--- a/atmosphere/tasks/constants.py
+++ b/atmosphere/tasks/constants.py
@@ -1,3 +1,7 @@
+import pkg_resources
+
+from atmosphere import utils
+
NODE_SELECTOR_CONTROL_PLANE = {
"openstack-control-plane": "enabled",
}
@@ -21,6 +25,294 @@
HELM_REPOSITORY_PERCONA = "percona"
HELM_REPOSITORY_PROMETHEUS_COMMUINTY = "prometheus-community"
+PROMETHEUS_MONITOR_RELABELING_SET_NODE_NAME_TO_INSTANCE = {
+ "sourceLabels": ["__meta_kubernetes_pod_node_name"],
+ "targetLabel": "instance",
+}
+PROMETHEUS_MONITOR_RELABELING_SET_POD_NAME_TO_INSTANCE = {
+ "sourceLabels": ["__meta_kubernetes_pod_name"],
+ "targetLabel": "instance",
+}
+PROMETHEUS_MONITOR_RELABELING_DROP_ALL_KUBERNETES_LABELS = {
+ "action": "labeldrop",
+ "regex": "^(container|endpoint|namespace|pod|node|service)$",
+}
+
+PROMETHEUS_MONITOR_RELABELINGS_INSTANCE_TO_POD_NAME = [
+ PROMETHEUS_MONITOR_RELABELING_SET_POD_NAME_TO_INSTANCE,
+ PROMETHEUS_MONITOR_RELABELING_DROP_ALL_KUBERNETES_LABELS,
+]
+PROMETHEUS_MONITOR_RELABELINGS_INSTANCE_TO_NODE_NAME = [
+ PROMETHEUS_MONITOR_RELABELING_SET_NODE_NAME_TO_INSTANCE,
+ PROMETHEUS_MONITOR_RELABELING_DROP_ALL_KUBERNETES_LABELS,
+]
+PROMETHEUS_MONITOR_RELABELINGS_KUBELET = [
+ {"sourceLabels": ["__metrics_path__"], "targetLabel": "metrics_path"},
+ {"sourceLabels": ["node"], "targetLabel": "instance"},
+ PROMETHEUS_MONITOR_RELABELING_DROP_ALL_KUBERNETES_LABELS,
+]
+
+HELM_RELEASE_KUBE_PROMETHEUS_STACK_NAME = "kube-prometheus-stack"
+HELM_RELEASE_KUBE_PROMETHEUS_STACK_VERSION = "41.0.0"
+HELM_RELEASE_KUBE_PROMETHEUS_STACK_VALUES = {
+ "defaultRules": {
+ "disabled": {
+ # NOTE(mnaser): https://github.com/prometheus-community/helm-charts/issues/144
+ # https://github.com/openshift/cluster-monitoring-operator/issues/248
+ "etcdHighNumberOfFailedGRPCRequests": True
+ }
+ },
+ "alertmanager": {
+ "serviceMonitor": {
+ "relabelings": PROMETHEUS_MONITOR_RELABELINGS_INSTANCE_TO_POD_NAME
+ },
+ "alertmanagerSpec": {
+ "storage": {
+ "volumeClaimTemplate": {
+ "spec": {
+ "storageClassName": "general",
+ "accessModes": ["ReadWriteOnce"],
+ "resources": {"requests": {"storage": "40Gi"}},
+ }
+ }
+ },
+ "nodeSelector": NODE_SELECTOR_CONTROL_PLANE,
+ },
+ },
+ "grafana": {
+ "serviceMonitor": {
+ "relabelings": PROMETHEUS_MONITOR_RELABELINGS_INSTANCE_TO_POD_NAME
+ },
+ "nodeSelector": NODE_SELECTOR_CONTROL_PLANE,
+ },
+ "kubeApiServer": {
+ "serviceMonitor": {
+ "relabelings": PROMETHEUS_MONITOR_RELABELINGS_INSTANCE_TO_NODE_NAME
+ }
+ },
+ "kubelet": {
+ "serviceMonitor": {
+ "cAdvisorRelabelings": PROMETHEUS_MONITOR_RELABELINGS_KUBELET,
+ "probesRelabelings": PROMETHEUS_MONITOR_RELABELINGS_KUBELET,
+ "relabelings": PROMETHEUS_MONITOR_RELABELINGS_KUBELET,
+ }
+ },
+ "kubeControllerManager": {
+ "serviceMonitor": {
+ "relabelings": PROMETHEUS_MONITOR_RELABELINGS_INSTANCE_TO_NODE_NAME,
+ }
+ },
+ "coreDns": {
+ "serviceMonitor": {
+ "relabelings": PROMETHEUS_MONITOR_RELABELINGS_INSTANCE_TO_POD_NAME
+ }
+ },
+ "kubeEtcd": {
+ "serviceMonitor": {
+ "scheme": "https",
+ "serverName": "localhost",
+ "insecureSkipVerify": False,
+ "caFile": "/etc/prometheus/secrets/kube-prometheus-stack-etcd-client-cert/ca.crt",
+ "certFile": "/etc/prometheus/secrets/kube-prometheus-stack-etcd-client-cert/healthcheck-client.crt",
+ "keyFile": "/etc/prometheus/secrets/kube-prometheus-stack-etcd-client-cert/healthcheck-client.key",
+ "relabelings": PROMETHEUS_MONITOR_RELABELINGS_INSTANCE_TO_NODE_NAME,
+ }
+ },
+ "kubeScheduler": {
+ "service": {"port": 10259, "targetPort": 10259},
+ "serviceMonitor": {
+ "https": True,
+ "insecureSkipVerify": True,
+ "relabelings": PROMETHEUS_MONITOR_RELABELINGS_INSTANCE_TO_NODE_NAME,
+ },
+ },
+ "kubeProxy": {
+ "serviceMonitor": {
+ "relabelings": PROMETHEUS_MONITOR_RELABELINGS_INSTANCE_TO_NODE_NAME
+ }
+ },
+ "kube-state-metrics": {
+ "prometheus": {
+ "monitor": {
+ "relabelings": PROMETHEUS_MONITOR_RELABELINGS_INSTANCE_TO_POD_NAME
+ }
+ },
+ "nodeSelector": NODE_SELECTOR_CONTROL_PLANE,
+ },
+ "prometheus": {
+ "serviceMonitor": {
+ "relabelings": PROMETHEUS_MONITOR_RELABELINGS_INSTANCE_TO_POD_NAME
+ },
+ "prometheusSpec": {
+ "nodeSelector": NODE_SELECTOR_CONTROL_PLANE,
+ "secrets": ["kube-prometheus-stack-etcd-client-cert"],
+ },
+ "additionalServiceMonitors": [
+ {
+ "name": "ceph",
+ "jobLabel": "application",
+ "selector": {"matchLabels": {"application": "ceph"}},
+ "namespaceSelector": {"matchNames": ["openstack"]},
+ "endpoints": [
+ {
+ "port": "metrics",
+ "honorLabels": True,
+ "relabelings": [
+ {
+ "action": "replace",
+ "regex": "(.*)",
+ "replacement": "ceph",
+ "targetLabel": "cluster",
+ },
+ PROMETHEUS_MONITOR_RELABELING_DROP_ALL_KUBERNETES_LABELS,
+ ],
+ }
+ ],
+ },
+ {
+ "name": "coredns",
+ "jobLabel": "app.kubernetes.io/name",
+ "namespaceSelector": {"matchNames": ["openstack"]},
+ "selector": {
+ "matchLabels": {
+ "app.kubernetes.io/name": "coredns",
+ "app.kubernetes.io/component": "metrics",
+ }
+ },
+ "endpoints": [
+ {
+ "port": "metrics",
+ "relabelings": [
+ {
+ "sourceLabels": [
+ "__meta_kubernetes_pod_label_application"
+ ],
+ "targetLabel": "application",
+ },
+ ]
+ + PROMETHEUS_MONITOR_RELABELINGS_INSTANCE_TO_POD_NAME,
+ }
+ ],
+ },
+ {
+ "name": "memcached",
+ "jobLabel": "application",
+ "namespaceSelector": {"matchNames": ["openstack"]},
+ "selector": {
+ "matchLabels": {"application": "memcached", "component": "server"}
+ },
+ "endpoints": [
+ {
+ "port": "metrics",
+ "relabelings": PROMETHEUS_MONITOR_RELABELINGS_INSTANCE_TO_POD_NAME,
+ }
+ ],
+ },
+ {
+ "name": "openstack-exporter",
+ "jobLabel": "jobLabel",
+ "namespaceSelector": {"matchNames": ["openstack"]},
+ "selector": {"matchLabels": {"application": "openstack-exporter"}},
+ "endpoints": [
+ {
+ "interval": "1m",
+ "scrapeTimeout": "30s",
+ "port": "metrics",
+ "relabelings": [
+ {
+ "action": "replace",
+ "regex": "(.*)",
+ "replacement": "default",
+ "targetLabel": "instance",
+ }
+ ],
+ }
+ ],
+ },
+ ],
+ "additionalPodMonitors": [
+ {
+ "name": "ethtool-exporter",
+ "jobLabel": "job",
+ "selector": {"matchLabels": {"application": "ethtool-exporter"}},
+ "podMetricsEndpoints": [
+ {
+ "port": "metrics",
+ "relabelings": PROMETHEUS_MONITOR_RELABELINGS_INSTANCE_TO_NODE_NAME,
+ }
+ ],
+ },
+ {
+ "name": "ipmi-exporter",
+ "jobLabel": "job",
+ "selector": {"matchLabels": {"application": "ipmi-exporter"}},
+ "podMetricsEndpoints": [
+ {
+ "port": "metrics",
+ "interval": "60s",
+ "relabelings": PROMETHEUS_MONITOR_RELABELINGS_INSTANCE_TO_NODE_NAME,
+ }
+ ],
+ },
+ {
+ "name": "percona-xtradb-pxc",
+ "jobLabel": "app.kubernetes.io/component",
+ "namespaceSelector": {"matchNames": ["openstack"]},
+ "selector": {
+ "matchLabels": {
+ "app.kubernetes.io/component": "pxc",
+ "app.kubernetes.io/instance": "percona-xtradb",
+ }
+ },
+ "podMetricsEndpoints": [
+ {
+ "port": "metrics",
+ "relabelings": PROMETHEUS_MONITOR_RELABELINGS_INSTANCE_TO_POD_NAME,
+ }
+ ],
+ },
+ {
+ "name": "rabbitmq",
+ "jobLabel": "app.kubernetes.io/component",
+ "namespaceSelector": {"matchNames": ["openstack"]},
+ "selector": {
+ "matchLabels": {"app.kubernetes.io/component": "rabbitmq"}
+ },
+ "podMetricsEndpoints": [
+ {
+ "port": "prometheus",
+ "relabelings": PROMETHEUS_MONITOR_RELABELINGS_INSTANCE_TO_POD_NAME,
+ }
+ ],
+ },
+ ],
+ },
+ "prometheusOperator": {
+ "admissionWebhooks": {"patch": NODE_SELECTOR_CONTROL_PLANE},
+ "serviceMonitor": {
+ "relabelings": PROMETHEUS_MONITOR_RELABELINGS_INSTANCE_TO_POD_NAME
+ },
+ "nodeSelector": NODE_SELECTOR_CONTROL_PLANE,
+ },
+ "prometheus-node-exporter": {
+ "extraArgs": [
+ "--collector.diskstats.ignored-devices=^(ram|loop|nbd|fd|(h|s|v|xv)d[a-z]|nvme\\d+n\\d+p)\\d+$",
+ "--collector.filesystem.fs-types-exclude=^(autofs|binfmt_misc|bpf|cgroup2?|configfs|debugfs|devpts|devtmpfs|fusectl|fuse.squashfuse_ll|hugetlbfs|iso9660|mqueue|nsfs|overlay|proc|procfs|pstore|rpc_pipefs|securityfs|selinuxfs|squashfs|sysfs|tracefs)$", # noqa: E501
+ "--collector.filesystem.mount-points-exclude=^/(dev|proc|run/credentials/.+|sys|var/lib/docker/.+|var/lib/kubelet/pods/.+|var/lib/kubelet/plugins/kubernetes.io/csi/.+|run/containerd/.+)($|/)", # noqa: E501
+ "--collector.netclass.ignored-devices=^(lxc|cilium_|qbr|qvb|qvo|tap|ovs-system|br|tbr|gre_sys).*$",
+ "--collector.netdev.device-exclude=^(lxc|cilium_|qbr|qvb|qvo|tap|ovs-system|br|tbr|gre_sys).*$",
+ ],
+ "prometheus": {
+ "monitor": {
+ "relabelings": PROMETHEUS_MONITOR_RELABELINGS_INSTANCE_TO_NODE_NAME
+ }
+ },
+ },
+ "additionalPrometheusRulesMap": utils.load_jsonnet_from_path(
+ pkg_resources.resource_filename("atmosphere.jsonnet", "rules.jsonnet")
+ ),
+}
+
HELM_RELEASE_INGRESS_NGINX_NAME = "ingress-nginx"
HELM_RELEASE_INGRESS_NGINX_VERSION = "4.0.17"
HELM_RELEASE_INGRESS_NGINX_VALUES = {
diff --git a/atmosphere/tasks/kubernetes/flux.py b/atmosphere/tasks/kubernetes/flux.py
index be3edcb..77d6f22 100644
--- a/atmosphere/tasks/kubernetes/flux.py
+++ b/atmosphere/tasks/kubernetes/flux.py
@@ -103,9 +103,11 @@
}
},
"install": {
+ "crds": "CreateReplace",
"disableWait": True,
},
"upgrade": {
+ "crds": "CreateReplace",
"disableWait": True,
},
"values": self._values,
diff --git a/atmosphere/utils.py b/atmosphere/utils.py
new file mode 100644
index 0000000..c44db96
--- /dev/null
+++ b/atmosphere/utils.py
@@ -0,0 +1,8 @@
+import json
+
+import _jsonnet
+
+
+def load_jsonnet_from_path(path: str) -> any:
+ raw = _jsonnet.evaluate_file(path)
+ return json.loads(raw)
diff --git a/jsonnetfile.lock.json b/jsonnetfile.lock.json
new file mode 100644
index 0000000..2c79f2f
--- /dev/null
+++ b/jsonnetfile.lock.json
@@ -0,0 +1,66 @@
+{
+ "version": 1,
+ "dependencies": [
+ {
+ "source": {
+ "git": {
+ "remote": "https://github.com/ceph/ceph.git",
+ "subdir": "monitoring/ceph-mixin"
+ }
+ },
+ "version": "296ba0641a161e6f809a03f39d85faf9d695315b",
+ "sum": "ZnyCIu25NBI6Q3Ru7QK1DHf7DBMEURSMQdEJXzCyIgA="
+ },
+ {
+ "source": {
+ "git": {
+ "remote": "https://github.com/grafana/grafonnet-lib.git",
+ "subdir": "grafonnet"
+ }
+ },
+ "version": "30280196507e0fe6fa978a3e0eaca3a62844f817",
+ "sum": "342u++/7rViR/zj2jeJOjshzglkZ1SY+hFNuyCBFMdc="
+ },
+ {
+ "source": {
+ "git": {
+ "remote": "https://github.com/grafana/jsonnet-libs.git",
+ "subdir": "grafana-builder"
+ }
+ },
+ "version": "d73aff453c9784cd6922119f3ce33d8d355a79e1",
+ "sum": "tDR6yT2GVfw0wTU12iZH+m01HrbIr6g/xN+/8nzNkU0="
+ },
+ {
+ "source": {
+ "git": {
+ "remote": "https://github.com/grafana/jsonnet-libs.git",
+ "subdir": "memcached-mixin"
+ }
+ },
+ "version": "d73aff453c9784cd6922119f3ce33d8d355a79e1",
+ "sum": "kl5GJvwAVAh0qIKsAFpNXm/jhHhv/8tBbpri7VCpZ2I="
+ },
+ {
+ "source": {
+ "git": {
+ "remote": "https://github.com/povilasv/coredns-mixin.git",
+ "subdir": ""
+ }
+ },
+ "version": "8b121c41b6c0741f1603b2d75ccd324050c4bd03",
+ "sum": "4CxwtfVX/OIS5w5+FkExG5evxiatOEMhaWChWaFc9S4="
+ },
+ {
+ "source": {
+ "git": {
+ "remote": "https://github.com/prometheus/mysqld_exporter.git",
+ "subdir": "mysqld-mixin"
+ }
+ },
+ "version": "503f1fa222f0afc74a1dcf4a0ef5a7c2dfa4d105",
+ "sum": "G69++5ExKgQ9niW0Owmw0orc8voP0Qll2WZJ1fHAqzE="
+ }
+ ],
+ "legacyImports": false
+}
diff --git a/poetry.lock b/poetry.lock
index dbd44c6..28c57c2 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -1,6 +1,6 @@
[[package]]
name = "ansible-compat"
-version = "2.2.0"
+version = "2.2.4"
description = "Ansible compatibility goodies"
category = "dev"
optional = false
@@ -8,20 +8,21 @@
[package.dependencies]
jsonschema = ">=4.6.0"
+packaging = "*"
PyYAML = "*"
subprocess-tee = ">=0.3.5"
[package.extras]
-docs = ["myst-parser", "sphinx (>=4.2.0,<5.0)", "sphinx-ansible-theme", "sphinx-autobuild (>=0.7.1,<1.0)"]
-test = ["coverage", "flaky", "pip-tools", "pytest", "pytest-markdown", "pytest-mock", "pytest-plus"]
+docs = ["myst-parser", "sphinx (>=5.3.0)", "sphinx-ansible-theme", "sphinx-autobuild (>=2021.3.14)"]
+test = ["coverage", "flaky", "pip-tools", "pytest (>=7.2.0)", "pytest-mock", "pytest-plus"]
[[package]]
name = "ansible-core"
-version = "2.13.4"
+version = "2.14.0"
description = "Radically simple IT automation"
category = "dev"
optional = false
-python-versions = ">=3.8"
+python-versions = ">=3.9"
[package.dependencies]
cryptography = "*"
@@ -98,19 +99,8 @@
python-versions = "~=3.7"
[[package]]
-name = "Cerberus"
-version = "1.3.2"
-description = "Lightweight, extensible schema and data validation tool for Python dictionaries."
-category = "dev"
-optional = false
-python-versions = ">=2.7"
-
-[package.dependencies]
-setuptools = "*"
-
-[[package]]
name = "certifi"
-version = "2022.9.14"
+version = "2022.9.24"
description = "Python package for providing Mozilla's CA Bundle."
category = "main"
optional = false
@@ -173,11 +163,11 @@
[[package]]
name = "colorama"
-version = "0.4.5"
+version = "0.4.6"
description = "Cross-platform colored terminal text."
category = "main"
optional = false
-python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*"
+python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*,>=2.7"
[[package]]
name = "commonmark"
@@ -209,7 +199,7 @@
[[package]]
name = "coverage"
-version = "6.4.4"
+version = "6.5.0"
description = "Code coverage measurement for Python"
category = "dev"
optional = false
@@ -223,7 +213,7 @@
[[package]]
name = "cryptography"
-version = "38.0.1"
+version = "38.0.3"
description = "cryptography is a package which provides cryptographic recipes and primitives to Python developers."
category = "dev"
optional = false
@@ -295,6 +285,17 @@
six = ">=1.10.0"
[[package]]
+name = "exceptiongroup"
+version = "1.0.1"
+description = "Backport of PEP 654 (exception groups)"
+category = "dev"
+optional = false
+python-versions = ">=3.7"
+
+[package.extras]
+test = ["pytest (>=6)"]
+
+[[package]]
name = "fasteners"
version = "0.18"
description = "A python package that provides useful locks"
@@ -354,14 +355,15 @@
[[package]]
name = "greenlet"
-version = "1.1.3"
+version = "2.0.1"
description = "Lightweight in-process concurrent programming"
category = "main"
optional = false
python-versions = ">=2.7,!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*"
[package.extras]
-docs = ["Sphinx"]
+docs = ["Sphinx", "docutils (<0.18)"]
+test = ["faulthandler", "objgraph", "psutil"]
[[package]]
name = "idna"
@@ -381,7 +383,7 @@
[[package]]
name = "iso8601"
-version = "1.0.2"
+version = "1.1.0"
description = "Simple module to parse ISO 8601 dates"
category = "main"
optional = false
@@ -447,8 +449,16 @@
python-versions = ">=3.7"
[[package]]
+name = "jsonnet"
+version = "0.18.0"
+description = "Python bindings for Jsonnet - The data templating language"
+category = "main"
+optional = false
+python-versions = "*"
+
+[[package]]
name = "jsonschema"
-version = "4.16.0"
+version = "4.17.0"
description = "An implementation of JSON Schema validation for Python"
category = "main"
optional = false
@@ -499,7 +509,7 @@
[[package]]
name = "mkdocs"
-version = "1.4.1"
+version = "1.4.2"
description = "Project documentation with Markdown."
category = "dev"
optional = false
@@ -523,7 +533,7 @@
[[package]]
name = "mkdocs-material"
-version = "8.5.7"
+version = "8.5.8"
description = "Documentation that simply works"
category = "dev"
optional = false
@@ -533,7 +543,7 @@
jinja2 = ">=3.0.2"
markdown = ">=3.2"
mkdocs = ">=1.4.0"
-mkdocs-material-extensions = ">=1.0.3"
+mkdocs-material-extensions = ">=1.1"
pygments = ">=2.12"
pymdown-extensions = ">=9.4"
requests = ">=2.26"
@@ -548,7 +558,7 @@
[[package]]
name = "molecule"
-version = "4.0.1"
+version = "4.0.3"
description = "Molecule aids in the development and testing of Ansible roles"
category = "dev"
optional = false
@@ -556,12 +566,12 @@
[package.dependencies]
ansible-compat = ">=2.2.0"
-cerberus = ">=1.3.1,<1.3.3 || >1.3.3,<1.3.4 || >1.3.4"
click = ">=8.0,<9"
click-help-colors = ">=0.9"
cookiecutter = ">=1.7.3"
enrich = ">=1.2.7"
Jinja2 = ">=2.11.3"
+jsonschema = ">=4.9.1"
packaging = "*"
pluggy = ">=0.7.1,<2.0"
PyYAML = ">=5.1"
@@ -570,7 +580,7 @@
[package.extras]
docker = ["molecule-docker (>=1.0.0)"]
docs = ["Sphinx (>=5.0.0,<6.0.0)", "ansible-core (>=2.12.0)", "jinja2 (<3.2.0)", "simplejson (>=3.17.2)", "sphinx-ansible-theme (>=0.8.0,<0.10.0)", "sphinx-notfound-page (>=0.7.1)"]
-lint = ["flake8 (>=3.8.4)", "pre-commit (>=2.10.1)", "yamllint"]
+lint = ["check-jsonschema (>=0.18.3)", "flake8 (>=3.8.4)", "jsonschema (>=4.16.0)", "pre-commit (>=2.10.1)", "yamllint"]
podman = ["molecule-podman (>=1.0.1)"]
test = ["ansi2html (>=1.6.0)", "coverage (>=6.2)", "filelock", "pexpect (>=4.8.0,<5)", "pytest (>=6.1.2)", "pytest-cov (>=2.10.1)", "pytest-html (>=3.0.0)", "pytest-mock (>=3.3.1)", "pytest-plus (>=0.2)", "pytest-testinfra (>=6.1.0)", "pytest-xdist (>=2.1.0)"]
windows = ["pywinrm"]
@@ -601,7 +611,7 @@
[[package]]
name = "networkx"
-version = "2.8.6"
+version = "2.8.8"
description = "Python package for creating and manipulating graphs and networks"
category = "main"
optional = false
@@ -609,10 +619,10 @@
[package.extras]
default = ["matplotlib (>=3.4)", "numpy (>=1.19)", "pandas (>=1.3)", "scipy (>=1.8)"]
-developer = ["mypy (>=0.961)", "pre-commit (>=2.20)"]
-doc = ["nb2plots (>=0.6)", "numpydoc (>=1.4)", "pillow (>=9.1)", "pydata-sphinx-theme (>=0.9)", "sphinx (>=5)", "sphinx-gallery (>=0.10)", "texext (>=0.6.6)"]
+developer = ["mypy (>=0.982)", "pre-commit (>=2.20)"]
+doc = ["nb2plots (>=0.6)", "numpydoc (>=1.5)", "pillow (>=9.2)", "pydata-sphinx-theme (>=0.11)", "sphinx (>=5.2)", "sphinx-gallery (>=0.11)", "texext (>=0.6.6)"]
extra = ["lxml (>=4.6)", "pydot (>=1.4.2)", "pygraphviz (>=1.9)", "sympy (>=1.10)"]
-test = ["codecov (>=2.1)", "pytest (>=7.1)", "pytest-cov (>=3.0)"]
+test = ["codecov (>=2.1)", "pytest (>=7.2)", "pytest-cov (>=4.0)"]
[[package]]
name = "oslo.i18n"
@@ -670,7 +680,7 @@
[[package]]
name = "pbr"
-version = "5.10.0"
+version = "5.11.0"
description = "Python Build Reasonableness"
category = "main"
optional = false
@@ -690,7 +700,7 @@
[[package]]
name = "prettytable"
-version = "3.4.1"
+version = "3.5.0"
description = "A simple Python library for easily displaying tabular data in a visually appealing ASCII table format"
category = "main"
optional = false
@@ -703,14 +713,6 @@
tests = ["pytest", "pytest-cov", "pytest-lazy-fixture"]
[[package]]
-name = "py"
-version = "1.11.0"
-description = "library with cross-python path, ini-parsing, io, code, log facilities"
-category = "dev"
-optional = false
-python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*"
-
-[[package]]
name = "pycodestyle"
version = "2.9.1"
description = "Python style guide checker"
@@ -812,7 +814,7 @@
[[package]]
name = "pyrsistent"
-version = "0.18.1"
+version = "0.19.2"
description = "Persistent/Functional/Immutable data structures"
category = "main"
optional = false
@@ -820,7 +822,7 @@
[[package]]
name = "pytest"
-version = "7.1.3"
+version = "7.2.0"
description = "pytest: simple powerful testing with Python"
category = "dev"
optional = false
@@ -829,11 +831,11 @@
[package.dependencies]
attrs = ">=19.2.0"
colorama = {version = "*", markers = "sys_platform == \"win32\""}
+exceptiongroup = {version = ">=1.0.0rc8", markers = "python_version < \"3.11\""}
iniconfig = "*"
packaging = "*"
pluggy = ">=0.12,<2.0"
-py = ">=1.8.2"
-tomli = ">=1.0.0"
+tomli = {version = ">=1.0.0", markers = "python_version < \"3.11\""}
[package.extras]
testing = ["argcomplete", "hypothesis (>=3.56)", "mock", "nose", "pygments (>=2.7.2)", "requests", "xmlschema"]
@@ -866,7 +868,7 @@
[[package]]
name = "pytest-mock"
-version = "3.8.2"
+version = "3.10.0"
description = "Thin-wrapper around the mock package for easier use with pytest"
category = "dev"
optional = false
@@ -920,7 +922,7 @@
[[package]]
name = "pytz"
-version = "2022.2.1"
+version = "2022.6"
description = "World timezone definitions, modern and historical"
category = "main"
optional = false
@@ -979,7 +981,7 @@
[[package]]
name = "rich"
-version = "12.5.1"
+version = "12.6.0"
description = "Render rich text, tables, progress bars, syntax highlighting, markdown and more to the terminal"
category = "main"
optional = false
@@ -1001,19 +1003,6 @@
python-versions = "*"
[[package]]
-name = "setuptools"
-version = "65.3.0"
-description = "Easily download, build, install, upgrade, and uninstall Python packages"
-category = "dev"
-optional = false
-python-versions = ">=3.7"
-
-[package.extras]
-docs = ["furo", "jaraco.packaging (>=9)", "jaraco.tidelift (>=1.4)", "pygments-github-lexers (==0.0.5)", "rst.linker (>=1.9)", "sphinx", "sphinx-favicon", "sphinx-hoverxref (<2)", "sphinx-inline-tabs", "sphinx-notfound-page (==0.8.3)", "sphinx-reredirects", "sphinxcontrib-towncrier"]
-testing = ["build[virtualenv]", "filelock (>=3.4.0)", "flake8 (<5)", "flake8-2020", "ini2toml[lite] (>=0.9)", "jaraco.envs (>=2.2)", "jaraco.path (>=3.2.0)", "mock", "pip (>=19.1)", "pip-run (>=8.8)", "pytest (>=6)", "pytest-black (>=0.3.7)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=1.3)", "pytest-flake8", "pytest-mypy (>=0.9.1)", "pytest-perf", "pytest-xdist", "tomli-w (>=1.0.0)", "virtualenv (>=13.0.0)", "wheel"]
-testing-integration = ["build[virtualenv]", "filelock (>=3.4.0)", "jaraco.envs (>=2.2)", "jaraco.path (>=3.2.0)", "pytest", "pytest-enabler", "pytest-xdist", "tomli", "virtualenv (>=13.0.0)", "wheel"]
-
-[[package]]
name = "six"
version = "1.16.0"
description = "Python 2 and 3 compatibility utilities"
@@ -1023,7 +1012,7 @@
[[package]]
name = "stevedore"
-version = "4.0.0"
+version = "4.1.0"
description = "Manage dynamic plugins for Python applications"
category = "main"
optional = false
@@ -1088,7 +1077,7 @@
[[package]]
name = "tenacity"
-version = "8.0.1"
+version = "8.1.0"
description = "Retry code until it succeeds"
category = "main"
optional = false
@@ -1140,7 +1129,7 @@
[[package]]
name = "typer"
-version = "0.6.1"
+version = "0.7.0"
description = "Typer, build great CLIs. Easy to code. Based on Python type hints."
category = "dev"
optional = false
@@ -1152,12 +1141,12 @@
[package.extras]
all = ["colorama (>=0.4.3,<0.5.0)", "rich (>=10.11.0,<13.0.0)", "shellingham (>=1.3.0,<2.0.0)"]
dev = ["autoflake (>=1.3.1,<2.0.0)", "flake8 (>=3.8.3,<4.0.0)", "pre-commit (>=2.17.0,<3.0.0)"]
-doc = ["mdx-include (>=1.4.1,<2.0.0)", "mkdocs (>=1.1.2,<2.0.0)", "mkdocs-material (>=8.1.4,<9.0.0)"]
-test = ["black (>=22.3.0,<23.0.0)", "coverage (>=5.2,<6.0)", "isort (>=5.0.6,<6.0.0)", "mypy (==0.910)", "pytest (>=4.4.0,<5.4.0)", "pytest-cov (>=2.10.0,<3.0.0)", "pytest-sugar (>=0.9.4,<0.10.0)", "pytest-xdist (>=1.32.0,<2.0.0)", "rich (>=10.11.0,<13.0.0)", "shellingham (>=1.3.0,<2.0.0)"]
+doc = ["cairosvg (>=2.5.2,<3.0.0)", "mdx-include (>=1.4.1,<2.0.0)", "mkdocs (>=1.1.2,<2.0.0)", "mkdocs-material (>=8.1.4,<9.0.0)", "pillow (>=9.3.0,<10.0.0)"]
+test = ["black (>=22.3.0,<23.0.0)", "coverage (>=6.2,<7.0)", "isort (>=5.0.6,<6.0.0)", "mypy (==0.910)", "pytest (>=4.4.0,<8.0.0)", "pytest-cov (>=2.10.0,<5.0.0)", "pytest-sugar (>=0.9.4,<0.10.0)", "pytest-xdist (>=1.32.0,<4.0.0)", "rich (>=10.11.0,<13.0.0)", "shellingham (>=1.3.0,<2.0.0)"]
[[package]]
name = "typing-extensions"
-version = "4.3.0"
+version = "4.4.0"
description = "Backported and Experimental Type Hints for Python 3.7+"
category = "dev"
optional = false
@@ -1206,16 +1195,16 @@
[metadata]
lock-version = "1.1"
python-versions = "^3.10"
-content-hash = "76e9796ebc66a84a4dc6bf888c21aadbd5859740cbda224ebcd65479f388f94e"
+content-hash = "2d7257e69169972fa19bbfba41551011c10b080b44631d54f29d4b26e9346395"
[metadata.files]
ansible-compat = [
- {file = "ansible-compat-2.2.0.tar.gz", hash = "sha256:676db8ec0449d1f07038625b8ebb8ceef5f8ad3a1af3ee82d4ed66b9b04cb6fa"},
- {file = "ansible_compat-2.2.0-py3-none-any.whl", hash = "sha256:ce69a67785ae96e8962794a47494339991a0ae242ab5dd14a76ee2137d09072e"},
+ {file = "ansible-compat-2.2.4.tar.gz", hash = "sha256:6a2c3ade5005530cdfdd8e961c784b1718f17ad480a1be5a8014bff89c9c9c2e"},
+ {file = "ansible_compat-2.2.4-py3-none-any.whl", hash = "sha256:5589582a165a44a7ab012dabd91a28a897e63daf96ffea655b7e9db38517b648"},
]
ansible-core = [
- {file = "ansible-core-2.13.4.tar.gz", hash = "sha256:78f45c2c472af60b9b4b8cbdaba5a3911079087891a9f6c6ed726327b8f21c6a"},
- {file = "ansible_core-2.13.4-py3-none-any.whl", hash = "sha256:d83947d23441df6f96ea934032a948a060bf6db4852d28ed5f9c7b92b39165b7"},
+ {file = "ansible-core-2.14.0.tar.gz", hash = "sha256:fa48b481cb623bf79bb903f223097681a0c13e1b4ec7e78e7dd7d858d36a34b2"},
+ {file = "ansible_core-2.14.0-py3-none-any.whl", hash = "sha256:b191d397c81514bd1922e00e16f0b8ec52e0bcb19b61cc4500085f5f92470cf2"},
]
arrow = [
{file = "arrow-1.2.3-py3-none-any.whl", hash = "sha256:5a49ab92e3b7b71d96cd6bfcc4df14efefc9dfa96ea19045815914a6ab6b1fe2"},
@@ -1242,12 +1231,9 @@
{file = "cachetools-5.2.0-py3-none-any.whl", hash = "sha256:f9f17d2aec496a9aa6b76f53e3b614c965223c061982d434d160f930c698a9db"},
{file = "cachetools-5.2.0.tar.gz", hash = "sha256:6a94c6402995a99c3970cc7e4884bb60b4a8639938157eeed436098bf9831757"},
]
-Cerberus = [
- {file = "Cerberus-1.3.2.tar.gz", hash = "sha256:302e6694f206dd85cb63f13fd5025b31ab6d38c99c50c6d769f8fa0b0f299589"},
-]
certifi = [
- {file = "certifi-2022.9.14-py3-none-any.whl", hash = "sha256:e232343de1ab72c2aa521b625c80f699e356830fd0e2c620b465b304b17b0516"},
- {file = "certifi-2022.9.14.tar.gz", hash = "sha256:36973885b9542e6bd01dea287b2b4b3b21236307c56324fcc3f1160f2d655ed5"},
+ {file = "certifi-2022.9.24-py3-none-any.whl", hash = "sha256:90c1a32f1d68f940488354e36370f6cca89f0f106db09518524c88d6ed83f382"},
+ {file = "certifi-2022.9.24.tar.gz", hash = "sha256:0d9c601124e5a6ba9712dbc60d9c53c21e34f5f641fe83002317394311bdce14"},
]
cffi = [
{file = "cffi-1.15.1-cp27-cp27m-macosx_10_9_x86_64.whl", hash = "sha256:a66d3508133af6e8548451b25058d5812812ec3798c886bf38ed24a98216fab2"},
@@ -1332,8 +1318,8 @@
{file = "click_help_colors-0.9.1-py3-none-any.whl", hash = "sha256:25a6bd22d8abbc72c18a416a1cf21ab65b6120bee48e9637829666cbad22d51d"},
]
colorama = [
- {file = "colorama-0.4.5-py2.py3-none-any.whl", hash = "sha256:854bf444933e37f5824ae7bfc1e98d5bce2ebe4160d46b5edf346a89358e99da"},
- {file = "colorama-0.4.5.tar.gz", hash = "sha256:e6c6b4334fc50988a639d9b98aa429a0b57da6e17b9a44f0451f930b6967b7a4"},
+ {file = "colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6"},
+ {file = "colorama-0.4.6.tar.gz", hash = "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44"},
]
commonmark = [
{file = "commonmark-0.9.1-py2.py3-none-any.whl", hash = "sha256:da2f38c92590f83de410ba1a3cbceafbc74fee9def35f9251ba9a971d6d66fd9"},
@@ -1344,84 +1330,84 @@
{file = "cookiecutter-2.1.1.tar.gz", hash = "sha256:f3982be8d9c53dac1261864013fdec7f83afd2e42ede6f6dd069c5e149c540d5"},
]
coverage = [
- {file = "coverage-6.4.4-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:e7b4da9bafad21ea45a714d3ea6f3e1679099e420c8741c74905b92ee9bfa7cc"},
- {file = "coverage-6.4.4-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:fde17bc42e0716c94bf19d92e4c9f5a00c5feb401f5bc01101fdf2a8b7cacf60"},
- {file = "coverage-6.4.4-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:cdbb0d89923c80dbd435b9cf8bba0ff55585a3cdb28cbec65f376c041472c60d"},
- {file = "coverage-6.4.4-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:67f9346aeebea54e845d29b487eb38ec95f2ecf3558a3cffb26ee3f0dcc3e760"},
- {file = "coverage-6.4.4-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:42c499c14efd858b98c4e03595bf914089b98400d30789511577aa44607a1b74"},
- {file = "coverage-6.4.4-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:c35cca192ba700979d20ac43024a82b9b32a60da2f983bec6c0f5b84aead635c"},
- {file = "coverage-6.4.4-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:9cc4f107009bca5a81caef2fca843dbec4215c05e917a59dec0c8db5cff1d2aa"},
- {file = "coverage-6.4.4-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:5f444627b3664b80d078c05fe6a850dd711beeb90d26731f11d492dcbadb6973"},
- {file = "coverage-6.4.4-cp310-cp310-win32.whl", hash = "sha256:66e6df3ac4659a435677d8cd40e8eb1ac7219345d27c41145991ee9bf4b806a0"},
- {file = "coverage-6.4.4-cp310-cp310-win_amd64.whl", hash = "sha256:35ef1f8d8a7a275aa7410d2f2c60fa6443f4a64fae9be671ec0696a68525b875"},
- {file = "coverage-6.4.4-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:c1328d0c2f194ffda30a45f11058c02410e679456276bfa0bbe0b0ee87225fac"},
- {file = "coverage-6.4.4-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:61b993f3998ee384935ee423c3d40894e93277f12482f6e777642a0141f55782"},
- {file = "coverage-6.4.4-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d5dd4b8e9cd0deb60e6fcc7b0647cbc1da6c33b9e786f9c79721fd303994832f"},
- {file = "coverage-6.4.4-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7026f5afe0d1a933685d8f2169d7c2d2e624f6255fb584ca99ccca8c0e966fd7"},
- {file = "coverage-6.4.4-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:9c7b9b498eb0c0d48b4c2abc0e10c2d78912203f972e0e63e3c9dc21f15abdaa"},
- {file = "coverage-6.4.4-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:ee2b2fb6eb4ace35805f434e0f6409444e1466a47f620d1d5763a22600f0f892"},
- {file = "coverage-6.4.4-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:ab066f5ab67059d1f1000b5e1aa8bbd75b6ed1fc0014559aea41a9eb66fc2ce0"},
- {file = "coverage-6.4.4-cp311-cp311-win32.whl", hash = "sha256:9d6e1f3185cbfd3d91ac77ea065d85d5215d3dfa45b191d14ddfcd952fa53796"},
- {file = "coverage-6.4.4-cp311-cp311-win_amd64.whl", hash = "sha256:e3d3c4cc38b2882f9a15bafd30aec079582b819bec1b8afdbde8f7797008108a"},
- {file = "coverage-6.4.4-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:a095aa0a996ea08b10580908e88fbaf81ecf798e923bbe64fb98d1807db3d68a"},
- {file = "coverage-6.4.4-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ef6f44409ab02e202b31a05dd6666797f9de2aa2b4b3534e9d450e42dea5e817"},
- {file = "coverage-6.4.4-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:4b7101938584d67e6f45f0015b60e24a95bf8dea19836b1709a80342e01b472f"},
- {file = "coverage-6.4.4-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:14a32ec68d721c3d714d9b105c7acf8e0f8a4f4734c811eda75ff3718570b5e3"},
- {file = "coverage-6.4.4-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:6a864733b22d3081749450466ac80698fe39c91cb6849b2ef8752fd7482011f3"},
- {file = "coverage-6.4.4-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:08002f9251f51afdcc5e3adf5d5d66bb490ae893d9e21359b085f0e03390a820"},
- {file = "coverage-6.4.4-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:a3b2752de32c455f2521a51bd3ffb53c5b3ae92736afde67ce83477f5c1dd928"},
- {file = "coverage-6.4.4-cp37-cp37m-win32.whl", hash = "sha256:f855b39e4f75abd0dfbcf74a82e84ae3fc260d523fcb3532786bcbbcb158322c"},
- {file = "coverage-6.4.4-cp37-cp37m-win_amd64.whl", hash = "sha256:ee6ae6bbcac0786807295e9687169fba80cb0617852b2fa118a99667e8e6815d"},
- {file = "coverage-6.4.4-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:564cd0f5b5470094df06fab676c6d77547abfdcb09b6c29c8a97c41ad03b103c"},
- {file = "coverage-6.4.4-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:cbbb0e4cd8ddcd5ef47641cfac97d8473ab6b132dd9a46bacb18872828031685"},
- {file = "coverage-6.4.4-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6113e4df2fa73b80f77663445be6d567913fb3b82a86ceb64e44ae0e4b695de1"},
- {file = "coverage-6.4.4-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:8d032bfc562a52318ae05047a6eb801ff31ccee172dc0d2504614e911d8fa83e"},
- {file = "coverage-6.4.4-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e431e305a1f3126477abe9a184624a85308da8edf8486a863601d58419d26ffa"},
- {file = "coverage-6.4.4-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:cf2afe83a53f77aec067033199797832617890e15bed42f4a1a93ea24794ae3e"},
- {file = "coverage-6.4.4-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:783bc7c4ee524039ca13b6d9b4186a67f8e63d91342c713e88c1865a38d0892a"},
- {file = "coverage-6.4.4-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:ff934ced84054b9018665ca3967fc48e1ac99e811f6cc99ea65978e1d384454b"},
- {file = "coverage-6.4.4-cp38-cp38-win32.whl", hash = "sha256:e1fabd473566fce2cf18ea41171d92814e4ef1495e04471786cbc943b89a3781"},
- {file = "coverage-6.4.4-cp38-cp38-win_amd64.whl", hash = "sha256:4179502f210ebed3ccfe2f78bf8e2d59e50b297b598b100d6c6e3341053066a2"},
- {file = "coverage-6.4.4-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:98c0b9e9b572893cdb0a00e66cf961a238f8d870d4e1dc8e679eb8bdc2eb1b86"},
- {file = "coverage-6.4.4-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:fc600f6ec19b273da1d85817eda339fb46ce9eef3e89f220055d8696e0a06908"},
- {file = "coverage-6.4.4-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7a98d6bf6d4ca5c07a600c7b4e0c5350cd483c85c736c522b786be90ea5bac4f"},
- {file = "coverage-6.4.4-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:01778769097dbd705a24e221f42be885c544bb91251747a8a3efdec6eb4788f2"},
- {file = "coverage-6.4.4-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:dfa0b97eb904255e2ab24166071b27408f1f69c8fbda58e9c0972804851e0558"},
- {file = "coverage-6.4.4-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:fcbe3d9a53e013f8ab88734d7e517eb2cd06b7e689bedf22c0eb68db5e4a0a19"},
- {file = "coverage-6.4.4-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:15e38d853ee224e92ccc9a851457fb1e1f12d7a5df5ae44544ce7863691c7a0d"},
- {file = "coverage-6.4.4-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:6913dddee2deff8ab2512639c5168c3e80b3ebb0f818fed22048ee46f735351a"},
- {file = "coverage-6.4.4-cp39-cp39-win32.whl", hash = "sha256:354df19fefd03b9a13132fa6643527ef7905712109d9c1c1903f2133d3a4e145"},
- {file = "coverage-6.4.4-cp39-cp39-win_amd64.whl", hash = "sha256:1238b08f3576201ebf41f7c20bf59baa0d05da941b123c6656e42cdb668e9827"},
- {file = "coverage-6.4.4-pp36.pp37.pp38-none-any.whl", hash = "sha256:f67cf9f406cf0d2f08a3515ce2db5b82625a7257f88aad87904674def6ddaec1"},
- {file = "coverage-6.4.4.tar.gz", hash = "sha256:e16c45b726acb780e1e6f88b286d3c10b3914ab03438f32117c4aa52d7f30d58"},
+ {file = "coverage-6.5.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:ef8674b0ee8cc11e2d574e3e2998aea5df5ab242e012286824ea3c6970580e53"},
+ {file = "coverage-6.5.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:784f53ebc9f3fd0e2a3f6a78b2be1bd1f5575d7863e10c6e12504f240fd06660"},
+ {file = "coverage-6.5.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b4a5be1748d538a710f87542f22c2cad22f80545a847ad91ce45e77417293eb4"},
+ {file = "coverage-6.5.0-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:83516205e254a0cb77d2d7bb3632ee019d93d9f4005de31dca0a8c3667d5bc04"},
+ {file = "coverage-6.5.0-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:af4fffaffc4067232253715065e30c5a7ec6faac36f8fc8d6f64263b15f74db0"},
+ {file = "coverage-6.5.0-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:97117225cdd992a9c2a5515db1f66b59db634f59d0679ca1fa3fe8da32749cae"},
+ {file = "coverage-6.5.0-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:a1170fa54185845505fbfa672f1c1ab175446c887cce8212c44149581cf2d466"},
+ {file = "coverage-6.5.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:11b990d520ea75e7ee8dcab5bc908072aaada194a794db9f6d7d5cfd19661e5a"},
+ {file = "coverage-6.5.0-cp310-cp310-win32.whl", hash = "sha256:5dbec3b9095749390c09ab7c89d314727f18800060d8d24e87f01fb9cfb40b32"},
+ {file = "coverage-6.5.0-cp310-cp310-win_amd64.whl", hash = "sha256:59f53f1dc5b656cafb1badd0feb428c1e7bc19b867479ff72f7a9dd9b479f10e"},
+ {file = "coverage-6.5.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:4a5375e28c5191ac38cca59b38edd33ef4cc914732c916f2929029b4bfb50795"},
+ {file = "coverage-6.5.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c4ed2820d919351f4167e52425e096af41bfabacb1857186c1ea32ff9983ed75"},
+ {file = "coverage-6.5.0-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:33a7da4376d5977fbf0a8ed91c4dffaaa8dbf0ddbf4c8eea500a2486d8bc4d7b"},
+ {file = "coverage-6.5.0-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a8fb6cf131ac4070c9c5a3e21de0f7dc5a0fbe8bc77c9456ced896c12fcdad91"},
+ {file = "coverage-6.5.0-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:a6b7d95969b8845250586f269e81e5dfdd8ff828ddeb8567a4a2eaa7313460c4"},
+ {file = "coverage-6.5.0-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:1ef221513e6f68b69ee9e159506d583d31aa3567e0ae84eaad9d6ec1107dddaa"},
+ {file = "coverage-6.5.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:cca4435eebea7962a52bdb216dec27215d0df64cf27fc1dd538415f5d2b9da6b"},
+ {file = "coverage-6.5.0-cp311-cp311-win32.whl", hash = "sha256:98e8a10b7a314f454d9eff4216a9a94d143a7ee65018dd12442e898ee2310578"},
+ {file = "coverage-6.5.0-cp311-cp311-win_amd64.whl", hash = "sha256:bc8ef5e043a2af066fa8cbfc6e708d58017024dc4345a1f9757b329a249f041b"},
+ {file = "coverage-6.5.0-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:4433b90fae13f86fafff0b326453dd42fc9a639a0d9e4eec4d366436d1a41b6d"},
+ {file = "coverage-6.5.0-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f4f05d88d9a80ad3cac6244d36dd89a3c00abc16371769f1340101d3cb899fc3"},
+ {file = "coverage-6.5.0-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:94e2565443291bd778421856bc975d351738963071e9b8839ca1fc08b42d4bef"},
+ {file = "coverage-6.5.0-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:027018943386e7b942fa832372ebc120155fd970837489896099f5cfa2890f79"},
+ {file = "coverage-6.5.0-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:255758a1e3b61db372ec2736c8e2a1fdfaf563977eedbdf131de003ca5779b7d"},
+ {file = "coverage-6.5.0-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:851cf4ff24062c6aec510a454b2584f6e998cada52d4cb58c5e233d07172e50c"},
+ {file = "coverage-6.5.0-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:12adf310e4aafddc58afdb04d686795f33f4d7a6fa67a7a9d4ce7d6ae24d949f"},
+ {file = "coverage-6.5.0-cp37-cp37m-win32.whl", hash = "sha256:b5604380f3415ba69de87a289a2b56687faa4fe04dbee0754bfcae433489316b"},
+ {file = "coverage-6.5.0-cp37-cp37m-win_amd64.whl", hash = "sha256:4a8dbc1f0fbb2ae3de73eb0bdbb914180c7abfbf258e90b311dcd4f585d44bd2"},
+ {file = "coverage-6.5.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:d900bb429fdfd7f511f868cedd03a6bbb142f3f9118c09b99ef8dc9bf9643c3c"},
+ {file = "coverage-6.5.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:2198ea6fc548de52adc826f62cb18554caedfb1d26548c1b7c88d8f7faa8f6ba"},
+ {file = "coverage-6.5.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6c4459b3de97b75e3bd6b7d4b7f0db13f17f504f3d13e2a7c623786289dd670e"},
+ {file = "coverage-6.5.0-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:20c8ac5386253717e5ccc827caad43ed66fea0efe255727b1053a8154d952398"},
+ {file = "coverage-6.5.0-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6b07130585d54fe8dff3d97b93b0e20290de974dc8177c320aeaf23459219c0b"},
+ {file = "coverage-6.5.0-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:dbdb91cd8c048c2b09eb17713b0c12a54fbd587d79adcebad543bc0cd9a3410b"},
+ {file = "coverage-6.5.0-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:de3001a203182842a4630e7b8d1a2c7c07ec1b45d3084a83d5d227a3806f530f"},
+ {file = "coverage-6.5.0-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:e07f4a4a9b41583d6eabec04f8b68076ab3cd44c20bd29332c6572dda36f372e"},
+ {file = "coverage-6.5.0-cp38-cp38-win32.whl", hash = "sha256:6d4817234349a80dbf03640cec6109cd90cba068330703fa65ddf56b60223a6d"},
+ {file = "coverage-6.5.0-cp38-cp38-win_amd64.whl", hash = "sha256:7ccf362abd726b0410bf8911c31fbf97f09f8f1061f8c1cf03dfc4b6372848f6"},
+ {file = "coverage-6.5.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:633713d70ad6bfc49b34ead4060531658dc6dfc9b3eb7d8a716d5873377ab745"},
+ {file = "coverage-6.5.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:95203854f974e07af96358c0b261f1048d8e1083f2de9b1c565e1be4a3a48cfc"},
+ {file = "coverage-6.5.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b9023e237f4c02ff739581ef35969c3739445fb059b060ca51771e69101efffe"},
+ {file = "coverage-6.5.0-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:265de0fa6778d07de30bcf4d9dc471c3dc4314a23a3c6603d356a3c9abc2dfcf"},
+ {file = "coverage-6.5.0-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8f830ed581b45b82451a40faabb89c84e1a998124ee4212d440e9c6cf70083e5"},
+ {file = "coverage-6.5.0-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:7b6be138d61e458e18d8e6ddcddd36dd96215edfe5f1168de0b1b32635839b62"},
+ {file = "coverage-6.5.0-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:42eafe6778551cf006a7c43153af1211c3aaab658d4d66fa5fcc021613d02518"},
+ {file = "coverage-6.5.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:723e8130d4ecc8f56e9a611e73b31219595baa3bb252d539206f7bbbab6ffc1f"},
+ {file = "coverage-6.5.0-cp39-cp39-win32.whl", hash = "sha256:d9ecf0829c6a62b9b573c7bb6d4dcd6ba8b6f80be9ba4fc7ed50bf4ac9aecd72"},
+ {file = "coverage-6.5.0-cp39-cp39-win_amd64.whl", hash = "sha256:fc2af30ed0d5ae0b1abdb4ebdce598eafd5b35397d4d75deb341a614d333d987"},
+ {file = "coverage-6.5.0-pp36.pp37.pp38-none-any.whl", hash = "sha256:1431986dac3923c5945271f169f59c45b8802a114c8f548d611f2015133df77a"},
+ {file = "coverage-6.5.0.tar.gz", hash = "sha256:f642e90754ee3e06b0e7e51bce3379590e76b7f76b708e1a71ff043f87025c84"},
]
cryptography = [
- {file = "cryptography-38.0.1-cp36-abi3-macosx_10_10_universal2.whl", hash = "sha256:10d1f29d6292fc95acb597bacefd5b9e812099d75a6469004fd38ba5471a977f"},
- {file = "cryptography-38.0.1-cp36-abi3-macosx_10_10_x86_64.whl", hash = "sha256:3fc26e22840b77326a764ceb5f02ca2d342305fba08f002a8c1f139540cdfaad"},
- {file = "cryptography-38.0.1-cp36-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.manylinux_2_24_aarch64.whl", hash = "sha256:3b72c360427889b40f36dc214630e688c2fe03e16c162ef0aa41da7ab1455153"},
- {file = "cryptography-38.0.1-cp36-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:194044c6b89a2f9f169df475cc167f6157eb9151cc69af8a2a163481d45cc407"},
- {file = "cryptography-38.0.1-cp36-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ca9f6784ea96b55ff41708b92c3f6aeaebde4c560308e5fbbd3173fbc466e94e"},
- {file = "cryptography-38.0.1-cp36-abi3-manylinux_2_24_x86_64.whl", hash = "sha256:16fa61e7481f4b77ef53991075de29fc5bacb582a1244046d2e8b4bb72ef66d0"},
- {file = "cryptography-38.0.1-cp36-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:d4ef6cc305394ed669d4d9eebf10d3a101059bdcf2669c366ec1d14e4fb227bd"},
- {file = "cryptography-38.0.1-cp36-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:3261725c0ef84e7592597606f6583385fed2a5ec3909f43bc475ade9729a41d6"},
- {file = "cryptography-38.0.1-cp36-abi3-musllinux_1_1_aarch64.whl", hash = "sha256:0297ffc478bdd237f5ca3a7dc96fc0d315670bfa099c04dc3a4a2172008a405a"},
- {file = "cryptography-38.0.1-cp36-abi3-musllinux_1_1_x86_64.whl", hash = "sha256:89ed49784ba88c221756ff4d4755dbc03b3c8d2c5103f6d6b4f83a0fb1e85294"},
- {file = "cryptography-38.0.1-cp36-abi3-win32.whl", hash = "sha256:ac7e48f7e7261207d750fa7e55eac2d45f720027d5703cd9007e9b37bbb59ac0"},
- {file = "cryptography-38.0.1-cp36-abi3-win_amd64.whl", hash = "sha256:ad7353f6ddf285aeadfaf79e5a6829110106ff8189391704c1d8801aa0bae45a"},
- {file = "cryptography-38.0.1-pp37-pypy37_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:896dd3a66959d3a5ddcfc140a53391f69ff1e8f25d93f0e2e7830c6de90ceb9d"},
- {file = "cryptography-38.0.1-pp37-pypy37_pp73-manylinux_2_24_x86_64.whl", hash = "sha256:d3971e2749a723e9084dd507584e2a2761f78ad2c638aa31e80bc7a15c9db4f9"},
- {file = "cryptography-38.0.1-pp37-pypy37_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:79473cf8a5cbc471979bd9378c9f425384980fcf2ab6534b18ed7d0d9843987d"},
- {file = "cryptography-38.0.1-pp38-pypy38_pp73-macosx_10_10_x86_64.whl", hash = "sha256:d9e69ae01f99abe6ad646947bba8941e896cb3aa805be2597a0400e0764b5818"},
- {file = "cryptography-38.0.1-pp38-pypy38_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5067ee7f2bce36b11d0e334abcd1ccf8c541fc0bbdaf57cdd511fdee53e879b6"},
- {file = "cryptography-38.0.1-pp38-pypy38_pp73-manylinux_2_24_x86_64.whl", hash = "sha256:3e3a2599e640927089f932295a9a247fc40a5bdf69b0484532f530471a382750"},
- {file = "cryptography-38.0.1-pp38-pypy38_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:c2e5856248a416767322c8668ef1845ad46ee62629266f84a8f007a317141013"},
- {file = "cryptography-38.0.1-pp38-pypy38_pp73-win_amd64.whl", hash = "sha256:64760ba5331e3f1794d0bcaabc0d0c39e8c60bf67d09c93dc0e54189dfd7cfe5"},
- {file = "cryptography-38.0.1-pp39-pypy39_pp73-macosx_10_10_x86_64.whl", hash = "sha256:b6c9b706316d7b5a137c35e14f4103e2115b088c412140fdbd5f87c73284df61"},
- {file = "cryptography-38.0.1-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b0163a849b6f315bf52815e238bc2b2346604413fa7c1601eea84bcddb5fb9ac"},
- {file = "cryptography-38.0.1-pp39-pypy39_pp73-manylinux_2_24_x86_64.whl", hash = "sha256:d1a5bd52d684e49a36582193e0b89ff267704cd4025abefb9e26803adeb3e5fb"},
- {file = "cryptography-38.0.1-pp39-pypy39_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:765fa194a0f3372d83005ab83ab35d7c5526c4e22951e46059b8ac678b44fa5a"},
- {file = "cryptography-38.0.1-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:52e7bee800ec869b4031093875279f1ff2ed12c1e2f74923e8f49c916afd1d3b"},
- {file = "cryptography-38.0.1.tar.gz", hash = "sha256:1db3d807a14931fa317f96435695d9ec386be7b84b618cc61cfa5d08b0ae33d7"},
+ {file = "cryptography-38.0.3-cp36-abi3-macosx_10_10_universal2.whl", hash = "sha256:984fe150f350a3c91e84de405fe49e688aa6092b3525f407a18b9646f6612320"},
+ {file = "cryptography-38.0.3-cp36-abi3-macosx_10_10_x86_64.whl", hash = "sha256:ed7b00096790213e09eb11c97cc6e2b757f15f3d2f85833cd2d3ec3fe37c1722"},
+ {file = "cryptography-38.0.3-cp36-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.manylinux_2_24_aarch64.whl", hash = "sha256:bbf203f1a814007ce24bd4d51362991d5cb90ba0c177a9c08825f2cc304d871f"},
+ {file = "cryptography-38.0.3-cp36-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:554bec92ee7d1e9d10ded2f7e92a5d70c1f74ba9524947c0ba0c850c7b011828"},
+ {file = "cryptography-38.0.3-cp36-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b1b52c9e5f8aa2b802d48bd693190341fae201ea51c7a167d69fc48b60e8a959"},
+ {file = "cryptography-38.0.3-cp36-abi3-manylinux_2_24_x86_64.whl", hash = "sha256:728f2694fa743a996d7784a6194da430f197d5c58e2f4e278612b359f455e4a2"},
+ {file = "cryptography-38.0.3-cp36-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:dfb4f4dd568de1b6af9f4cda334adf7d72cf5bc052516e1b2608b683375dd95c"},
+ {file = "cryptography-38.0.3-cp36-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:5419a127426084933076132d317911e3c6eb77568a1ce23c3ac1e12d111e61e0"},
+ {file = "cryptography-38.0.3-cp36-abi3-musllinux_1_1_aarch64.whl", hash = "sha256:9b24bcff7853ed18a63cfb0c2b008936a9554af24af2fb146e16d8e1aed75748"},
+ {file = "cryptography-38.0.3-cp36-abi3-musllinux_1_1_x86_64.whl", hash = "sha256:25c1d1f19729fb09d42e06b4bf9895212292cb27bb50229f5aa64d039ab29146"},
+ {file = "cryptography-38.0.3-cp36-abi3-win32.whl", hash = "sha256:7f836217000342d448e1c9a342e9163149e45d5b5eca76a30e84503a5a96cab0"},
+ {file = "cryptography-38.0.3-cp36-abi3-win_amd64.whl", hash = "sha256:c46837ea467ed1efea562bbeb543994c2d1f6e800785bd5a2c98bc096f5cb220"},
+ {file = "cryptography-38.0.3-pp37-pypy37_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:06fc3cc7b6f6cca87bd56ec80a580c88f1da5306f505876a71c8cfa7050257dd"},
+ {file = "cryptography-38.0.3-pp37-pypy37_pp73-manylinux_2_24_x86_64.whl", hash = "sha256:65535bc550b70bd6271984d9863a37741352b4aad6fb1b3344a54e6950249b55"},
+ {file = "cryptography-38.0.3-pp37-pypy37_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:5e89468fbd2fcd733b5899333bc54d0d06c80e04cd23d8c6f3e0542358c6060b"},
+ {file = "cryptography-38.0.3-pp38-pypy38_pp73-macosx_10_10_x86_64.whl", hash = "sha256:6ab9516b85bebe7aa83f309bacc5f44a61eeb90d0b4ec125d2d003ce41932d36"},
+ {file = "cryptography-38.0.3-pp38-pypy38_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:068147f32fa662c81aebab95c74679b401b12b57494872886eb5c1139250ec5d"},
+ {file = "cryptography-38.0.3-pp38-pypy38_pp73-manylinux_2_24_x86_64.whl", hash = "sha256:402852a0aea73833d982cabb6d0c3bb582c15483d29fb7085ef2c42bfa7e38d7"},
+ {file = "cryptography-38.0.3-pp38-pypy38_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:b1b35d9d3a65542ed2e9d90115dfd16bbc027b3f07ee3304fc83580f26e43249"},
+ {file = "cryptography-38.0.3-pp38-pypy38_pp73-win_amd64.whl", hash = "sha256:6addc3b6d593cd980989261dc1cce38263c76954d758c3c94de51f1e010c9a50"},
+ {file = "cryptography-38.0.3-pp39-pypy39_pp73-macosx_10_10_x86_64.whl", hash = "sha256:be243c7e2bfcf6cc4cb350c0d5cdf15ca6383bbcb2a8ef51d3c9411a9d4386f0"},
+ {file = "cryptography-38.0.3-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:78cf5eefac2b52c10398a42765bfa981ce2372cbc0457e6bf9658f41ec3c41d8"},
+ {file = "cryptography-38.0.3-pp39-pypy39_pp73-manylinux_2_24_x86_64.whl", hash = "sha256:4e269dcd9b102c5a3d72be3c45d8ce20377b8076a43cbed6f660a1afe365e436"},
+ {file = "cryptography-38.0.3-pp39-pypy39_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:8d41a46251bf0634e21fac50ffd643216ccecfaf3701a063257fe0b2be1b6548"},
+ {file = "cryptography-38.0.3-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:785e4056b5a8b28f05a533fab69febf5004458e20dad7e2e13a3120d8ecec75a"},
+ {file = "cryptography-38.0.3.tar.gz", hash = "sha256:bfbe6ee19615b07a98b1d2287d6a6073f734735b49ee45b11324d85efc4d5cbd"},
]
debtcollector = [
{file = "debtcollector-2.5.0-py3-none-any.whl", hash = "sha256:1393a527d2c72f143ffa6a629e9c33face6642634eece475b48cab7b04ba61f3"},
@@ -1439,6 +1425,10 @@
{file = "eventlet-0.33.1-py2.py3-none-any.whl", hash = "sha256:a085922698e5029f820cf311a648ac324d73cec0e4792877609d978a4b5bbf31"},
{file = "eventlet-0.33.1.tar.gz", hash = "sha256:afbe17f06a58491e9aebd7a4a03e70b0b63fd4cf76d8307bae07f280479b1515"},
]
+exceptiongroup = [
+ {file = "exceptiongroup-1.0.1-py3-none-any.whl", hash = "sha256:4d6c0aa6dd825810941c792f53d7b8d71da26f5e5f84f20f9508e8f2d33b140a"},
+ {file = "exceptiongroup-1.0.1.tar.gz", hash = "sha256:73866f7f842ede6cb1daa42c4af078e2035e5f7607f0e2c762cc51bb31bbe7b2"},
+]
fasteners = [
{file = "fasteners-0.18-py3-none-any.whl", hash = "sha256:1d4caf5f8db57b0e4107d94fd5a1d02510a450dced6ca77d1839064c1bacf20c"},
{file = "fasteners-0.18.tar.gz", hash = "sha256:cb7c13ef91e0c7e4fe4af38ecaf6b904ec3f5ce0dda06d34924b6b74b869d953"},
@@ -1460,60 +1450,63 @@
{file = "ghp_import-2.1.0-py3-none-any.whl", hash = "sha256:8337dd7b50877f163d4c0289bc1f1c7f127550241988d568c1db512c4324a619"},
]
greenlet = [
- {file = "greenlet-1.1.3-cp27-cp27m-macosx_10_14_x86_64.whl", hash = "sha256:8c287ae7ac921dfde88b1c125bd9590b7ec3c900c2d3db5197f1286e144e712b"},
- {file = "greenlet-1.1.3-cp27-cp27m-manylinux1_x86_64.whl", hash = "sha256:870a48007872d12e95a996fca3c03a64290d3ea2e61076aa35d3b253cf34cd32"},
- {file = "greenlet-1.1.3-cp27-cp27m-manylinux2010_x86_64.whl", hash = "sha256:7c5227963409551ae4a6938beb70d56bf1918c554a287d3da6853526212fbe0a"},
- {file = "greenlet-1.1.3-cp27-cp27m-win32.whl", hash = "sha256:9fae214f6c43cd47f7bef98c56919b9222481e833be2915f6857a1e9e8a15318"},
- {file = "greenlet-1.1.3-cp27-cp27m-win_amd64.whl", hash = "sha256:de431765bd5fe62119e0bc6bc6e7b17ac53017ae1782acf88fcf6b7eae475a49"},
- {file = "greenlet-1.1.3-cp27-cp27mu-manylinux1_x86_64.whl", hash = "sha256:510c3b15587afce9800198b4b142202b323bf4b4b5f9d6c79cb9a35e5e3c30d2"},
- {file = "greenlet-1.1.3-cp27-cp27mu-manylinux2010_x86_64.whl", hash = "sha256:9951dcbd37850da32b2cb6e391f621c1ee456191c6ae5528af4a34afe357c30e"},
- {file = "greenlet-1.1.3-cp310-cp310-macosx_10_15_x86_64.whl", hash = "sha256:07c58e169bbe1e87b8bbf15a5c1b779a7616df9fd3e61cadc9d691740015b4f8"},
- {file = "greenlet-1.1.3-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:df02fdec0c533301497acb0bc0f27f479a3a63dcdc3a099ae33a902857f07477"},
- {file = "greenlet-1.1.3-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:9c88e134d51d5e82315a7c32b914a58751b7353eb5268dbd02eabf020b4c4700"},
- {file = "greenlet-1.1.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7b41d19c0cfe5c259fe6c539fd75051cd39a5d33d05482f885faf43f7f5e7d26"},
- {file = "greenlet-1.1.3-cp310-cp310-win_amd64.whl", hash = "sha256:6f5d4b2280ceea76c55c893827961ed0a6eadd5a584a7c4e6e6dd7bc10dfdd96"},
- {file = "greenlet-1.1.3-cp311-cp311-macosx_10_15_x86_64.whl", hash = "sha256:184416e481295832350a4bf731ba619a92f5689bf5d0fa4341e98b98b1265bd7"},
- {file = "greenlet-1.1.3-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:dd0404d154084a371e6d2bafc787201612a1359c2dee688ae334f9118aa0bf47"},
- {file = "greenlet-1.1.3-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:7a43bbfa9b6cfdfaeefbd91038dde65ea2c421dc387ed171613df340650874f2"},
- {file = "greenlet-1.1.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ce5b64dfe8d0cca407d88b0ee619d80d4215a2612c1af8c98a92180e7109f4b5"},
- {file = "greenlet-1.1.3-cp35-cp35m-macosx_10_14_x86_64.whl", hash = "sha256:903fa5716b8fbb21019268b44f73f3748c41d1a30d71b4a49c84b642c2fed5fa"},
- {file = "greenlet-1.1.3-cp35-cp35m-manylinux1_x86_64.whl", hash = "sha256:0118817c9341ef2b0f75f5af79ac377e4da6ff637e5ee4ac91802c0e379dadb4"},
- {file = "greenlet-1.1.3-cp35-cp35m-manylinux2010_x86_64.whl", hash = "sha256:466ce0928e33421ee84ae04c4ac6f253a3a3e6b8d600a79bd43fd4403e0a7a76"},
- {file = "greenlet-1.1.3-cp35-cp35m-win32.whl", hash = "sha256:65ad1a7a463a2a6f863661329a944a5802c7129f7ad33583dcc11069c17e622c"},
- {file = "greenlet-1.1.3-cp35-cp35m-win_amd64.whl", hash = "sha256:7532a46505470be30cbf1dbadb20379fb481244f1ca54207d7df3bf0bbab6a20"},
- {file = "greenlet-1.1.3-cp36-cp36m-macosx_10_14_x86_64.whl", hash = "sha256:caff52cb5cd7626872d9696aee5b794abe172804beb7db52eed1fd5824b63910"},
- {file = "greenlet-1.1.3-cp36-cp36m-manylinux1_x86_64.whl", hash = "sha256:db41f3845eb579b544c962864cce2c2a0257fe30f0f1e18e51b1e8cbb4e0ac6d"},
- {file = "greenlet-1.1.3-cp36-cp36m-manylinux2010_x86_64.whl", hash = "sha256:e8533f5111704d75de3139bf0b8136d3a6c1642c55c067866fa0a51c2155ee33"},
- {file = "greenlet-1.1.3-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9537e4baf0db67f382eb29255a03154fcd4984638303ff9baaa738b10371fa57"},
- {file = "greenlet-1.1.3-cp36-cp36m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:f8bfd36f368efe0ab2a6aa3db7f14598aac454b06849fb633b762ddbede1db90"},
- {file = "greenlet-1.1.3-cp36-cp36m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b0877a9a2129a2c56a2eae2da016743db7d9d6a05d5e1c198f1b7808c602a30e"},
- {file = "greenlet-1.1.3-cp36-cp36m-win32.whl", hash = "sha256:88b04e12c9b041a1e0bcb886fec709c488192638a9a7a3677513ac6ba81d8e79"},
- {file = "greenlet-1.1.3-cp36-cp36m-win_amd64.whl", hash = "sha256:4f166b4aca8d7d489e82d74627a7069ab34211ef5ebb57c300ec4b9337b60fc0"},
- {file = "greenlet-1.1.3-cp37-cp37m-macosx_10_15_x86_64.whl", hash = "sha256:cd16a89efe3a003029c87ff19e9fba635864e064da646bc749fc1908a4af18f3"},
- {file = "greenlet-1.1.3-cp37-cp37m-manylinux1_x86_64.whl", hash = "sha256:5b756e6730ea59b2745072e28ad27f4c837084688e6a6b3633c8b1e509e6ae0e"},
- {file = "greenlet-1.1.3-cp37-cp37m-manylinux2010_x86_64.whl", hash = "sha256:9b2f7d0408ddeb8ea1fd43d3db79a8cefaccadd2a812f021333b338ed6b10aba"},
- {file = "greenlet-1.1.3-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:44b4817c34c9272c65550b788913620f1fdc80362b209bc9d7dd2f40d8793080"},
- {file = "greenlet-1.1.3-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:d58a5a71c4c37354f9e0c24c9c8321f0185f6945ef027460b809f4bb474bfe41"},
- {file = "greenlet-1.1.3-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1dd51d2650e70c6c4af37f454737bf4a11e568945b27f74b471e8e2a9fd21268"},
- {file = "greenlet-1.1.3-cp37-cp37m-win32.whl", hash = "sha256:048d2bed76c2aa6de7af500ae0ea51dd2267aec0e0f2a436981159053d0bc7cc"},
- {file = "greenlet-1.1.3-cp37-cp37m-win_amd64.whl", hash = "sha256:77e41db75f9958f2083e03e9dd39da12247b3430c92267df3af77c83d8ff9eed"},
- {file = "greenlet-1.1.3-cp38-cp38-macosx_10_15_x86_64.whl", hash = "sha256:1626185d938d7381631e48e6f7713e8d4b964be246073e1a1d15c2f061ac9f08"},
- {file = "greenlet-1.1.3-cp38-cp38-manylinux1_x86_64.whl", hash = "sha256:1ec2779774d8e42ed0440cf8bc55540175187e8e934f2be25199bf4ed948cd9e"},
- {file = "greenlet-1.1.3-cp38-cp38-manylinux2010_x86_64.whl", hash = "sha256:f2f908239b7098799b8845e5936c2ccb91d8c2323be02e82f8dcb4a80dcf4a25"},
- {file = "greenlet-1.1.3-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0b181e9aa6cb2f5ec0cacc8cee6e5a3093416c841ba32c185c30c160487f0380"},
- {file = "greenlet-1.1.3-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:2cf45e339cabea16c07586306a31cfcc5a3b5e1626d365714d283732afed6809"},
- {file = "greenlet-1.1.3-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6200a11f003ec26815f7e3d2ded01b43a3810be3528dd760d2f1fa777490c3cd"},
- {file = "greenlet-1.1.3-cp38-cp38-win32.whl", hash = "sha256:db5b25265010a1b3dca6a174a443a0ed4c4ab12d5e2883a11c97d6e6d59b12f9"},
- {file = "greenlet-1.1.3-cp38-cp38-win_amd64.whl", hash = "sha256:095a980288fe05adf3d002fbb180c99bdcf0f930e220aa66fcd56e7914a38202"},
- {file = "greenlet-1.1.3-cp39-cp39-macosx_10_15_x86_64.whl", hash = "sha256:cbc1eb55342cbac8f7ec159088d54e2cfdd5ddf61c87b8bbe682d113789331b2"},
- {file = "greenlet-1.1.3-cp39-cp39-manylinux1_x86_64.whl", hash = "sha256:694ffa7144fa5cc526c8f4512665003a39fa09ef00d19bbca5c8d3406db72fbe"},
- {file = "greenlet-1.1.3-cp39-cp39-manylinux2010_x86_64.whl", hash = "sha256:aa741c1a8a8cc25eb3a3a01a62bdb5095a773d8c6a86470bde7f607a447e7905"},
- {file = "greenlet-1.1.3-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a3a669f11289a8995d24fbfc0e63f8289dd03c9aaa0cc8f1eab31d18ca61a382"},
- {file = "greenlet-1.1.3-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:76a53bfa10b367ee734b95988bd82a9a5f0038a25030f9f23bbbc005010ca600"},
- {file = "greenlet-1.1.3-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2fb0aa7f6996879551fd67461d5d3ab0c3c0245da98be90c89fcb7a18d437403"},
- {file = "greenlet-1.1.3-cp39-cp39-win32.whl", hash = "sha256:5fbe1ab72b998ca77ceabbae63a9b2e2dc2d963f4299b9b278252ddba142d3f1"},
- {file = "greenlet-1.1.3-cp39-cp39-win_amd64.whl", hash = "sha256:ffe73f9e7aea404722058405ff24041e59d31ca23d1da0895af48050a07b6932"},
- {file = "greenlet-1.1.3.tar.gz", hash = "sha256:bcb6c6dd1d6be6d38d6db283747d07fda089ff8c559a835236560a4410340455"},
+ {file = "greenlet-2.0.1-cp27-cp27m-macosx_10_14_x86_64.whl", hash = "sha256:9ed358312e63bf683b9ef22c8e442ef6c5c02973f0c2a939ec1d7b50c974015c"},
+ {file = "greenlet-2.0.1-cp27-cp27m-manylinux2010_x86_64.whl", hash = "sha256:4f09b0010e55bec3239278f642a8a506b91034f03a4fb28289a7d448a67f1515"},
+ {file = "greenlet-2.0.1-cp27-cp27m-win32.whl", hash = "sha256:1407fe45246632d0ffb7a3f4a520ba4e6051fc2cbd61ba1f806900c27f47706a"},
+ {file = "greenlet-2.0.1-cp27-cp27m-win_amd64.whl", hash = "sha256:3001d00eba6bbf084ae60ec7f4bb8ed375748f53aeaefaf2a37d9f0370558524"},
+ {file = "greenlet-2.0.1-cp27-cp27mu-manylinux2010_x86_64.whl", hash = "sha256:d566b82e92ff2e09dd6342df7e0eb4ff6275a3f08db284888dcd98134dbd4243"},
+ {file = "greenlet-2.0.1-cp310-cp310-macosx_10_15_x86_64.whl", hash = "sha256:0722c9be0797f544a3ed212569ca3fe3d9d1a1b13942d10dd6f0e8601e484d26"},
+ {file = "greenlet-2.0.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4d37990425b4687ade27810e3b1a1c37825d242ebc275066cfee8cb6b8829ccd"},
+ {file = "greenlet-2.0.1-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:be35822f35f99dcc48152c9839d0171a06186f2d71ef76dc57fa556cc9bf6b45"},
+ {file = "greenlet-2.0.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c140e7eb5ce47249668056edf3b7e9900c6a2e22fb0eaf0513f18a1b2c14e1da"},
+ {file = "greenlet-2.0.1-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:d21681f09e297a5adaa73060737e3aa1279a13ecdcfcc6ef66c292cb25125b2d"},
+ {file = "greenlet-2.0.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:fb412b7db83fe56847df9c47b6fe3f13911b06339c2aa02dcc09dce8bbf582cd"},
+ {file = "greenlet-2.0.1-cp310-cp310-win_amd64.whl", hash = "sha256:c6a08799e9e88052221adca55741bf106ec7ea0710bca635c208b751f0d5b617"},
+ {file = "greenlet-2.0.1-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:9e112e03d37987d7b90c1e98ba5e1b59e1645226d78d73282f45b326f7bddcb9"},
+ {file = "greenlet-2.0.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:56961cfca7da2fdd178f95ca407fa330c64f33289e1804b592a77d5593d9bd94"},
+ {file = "greenlet-2.0.1-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:13ba6e8e326e2116c954074c994da14954982ba2795aebb881c07ac5d093a58a"},
+ {file = "greenlet-2.0.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1bf633a50cc93ed17e494015897361010fc08700d92676c87931d3ea464123ce"},
+ {file = "greenlet-2.0.1-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:9f2c221eecb7ead00b8e3ddb913c67f75cba078fd1d326053225a3f59d850d72"},
+ {file = "greenlet-2.0.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:13ebf93c343dd8bd010cd98e617cb4c1c1f352a0cf2524c82d3814154116aa82"},
+ {file = "greenlet-2.0.1-cp311-cp311-win_amd64.whl", hash = "sha256:6f61d71bbc9b4a3de768371b210d906726535d6ca43506737682caa754b956cd"},
+ {file = "greenlet-2.0.1-cp35-cp35m-macosx_10_14_x86_64.whl", hash = "sha256:2d0bac0385d2b43a7bd1d651621a4e0f1380abc63d6fb1012213a401cbd5bf8f"},
+ {file = "greenlet-2.0.1-cp35-cp35m-manylinux2010_x86_64.whl", hash = "sha256:f6327b6907b4cb72f650a5b7b1be23a2aab395017aa6f1adb13069d66360eb3f"},
+ {file = "greenlet-2.0.1-cp35-cp35m-win32.whl", hash = "sha256:81b0ea3715bf6a848d6f7149d25bf018fd24554a4be01fcbbe3fdc78e890b955"},
+ {file = "greenlet-2.0.1-cp35-cp35m-win_amd64.whl", hash = "sha256:38255a3f1e8942573b067510f9611fc9e38196077b0c8eb7a8c795e105f9ce77"},
+ {file = "greenlet-2.0.1-cp36-cp36m-macosx_10_14_x86_64.whl", hash = "sha256:04957dc96669be041e0c260964cfef4c77287f07c40452e61abe19d647505581"},
+ {file = "greenlet-2.0.1-cp36-cp36m-manylinux2010_x86_64.whl", hash = "sha256:4aeaebcd91d9fee9aa768c1b39cb12214b30bf36d2b7370505a9f2165fedd8d9"},
+ {file = "greenlet-2.0.1-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:974a39bdb8c90a85982cdb78a103a32e0b1be986d411303064b28a80611f6e51"},
+ {file = "greenlet-2.0.1-cp36-cp36m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:8dca09dedf1bd8684767bc736cc20c97c29bc0c04c413e3276e0962cd7aeb148"},
+ {file = "greenlet-2.0.1-cp36-cp36m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a4c0757db9bd08470ff8277791795e70d0bf035a011a528ee9a5ce9454b6cba2"},
+ {file = "greenlet-2.0.1-cp36-cp36m-musllinux_1_1_aarch64.whl", hash = "sha256:5067920de254f1a2dee8d3d9d7e4e03718e8fd2d2d9db962c8c9fa781ae82a39"},
+ {file = "greenlet-2.0.1-cp36-cp36m-musllinux_1_1_x86_64.whl", hash = "sha256:5a8e05057fab2a365c81abc696cb753da7549d20266e8511eb6c9d9f72fe3e92"},
+ {file = "greenlet-2.0.1-cp36-cp36m-win32.whl", hash = "sha256:3d75b8d013086b08e801fbbb896f7d5c9e6ccd44f13a9241d2bf7c0df9eda928"},
+ {file = "greenlet-2.0.1-cp36-cp36m-win_amd64.whl", hash = "sha256:097e3dae69321e9100202fc62977f687454cd0ea147d0fd5a766e57450c569fd"},
+ {file = "greenlet-2.0.1-cp37-cp37m-macosx_10_15_x86_64.whl", hash = "sha256:cb242fc2cda5a307a7698c93173d3627a2a90d00507bccf5bc228851e8304963"},
+ {file = "greenlet-2.0.1-cp37-cp37m-manylinux2010_x86_64.whl", hash = "sha256:72b00a8e7c25dcea5946692a2485b1a0c0661ed93ecfedfa9b6687bd89a24ef5"},
+ {file = "greenlet-2.0.1-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d5b0ff9878333823226d270417f24f4d06f235cb3e54d1103b71ea537a6a86ce"},
+ {file = "greenlet-2.0.1-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:be9e0fb2ada7e5124f5282d6381903183ecc73ea019568d6d63d33f25b2a9000"},
+ {file = "greenlet-2.0.1-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0b493db84d124805865adc587532ebad30efa68f79ad68f11b336e0a51ec86c2"},
+ {file = "greenlet-2.0.1-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:a20d33124935d27b80e6fdacbd34205732660e0a1d35d8b10b3328179a2b51a1"},
+ {file = "greenlet-2.0.1-cp37-cp37m-win32.whl", hash = "sha256:ea688d11707d30e212e0110a1aac7f7f3f542a259235d396f88be68b649e47d1"},
+ {file = "greenlet-2.0.1-cp37-cp37m-win_amd64.whl", hash = "sha256:afe07421c969e259e9403c3bb658968702bc3b78ec0b6fde3ae1e73440529c23"},
+ {file = "greenlet-2.0.1-cp38-cp38-macosx_10_15_x86_64.whl", hash = "sha256:cd4ccc364cf75d1422e66e247e52a93da6a9b73cefa8cad696f3cbbb75af179d"},
+ {file = "greenlet-2.0.1-cp38-cp38-manylinux2010_x86_64.whl", hash = "sha256:4c8b1c43e75c42a6cafcc71defa9e01ead39ae80bd733a2608b297412beede68"},
+ {file = "greenlet-2.0.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:659f167f419a4609bc0516fb18ea69ed39dbb25594934bd2dd4d0401660e8a1e"},
+ {file = "greenlet-2.0.1-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:356e4519d4dfa766d50ecc498544b44c0249b6de66426041d7f8b751de4d6b48"},
+ {file = "greenlet-2.0.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:811e1d37d60b47cb8126e0a929b58c046251f28117cb16fcd371eed61f66b764"},
+ {file = "greenlet-2.0.1-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:0109af1138afbfb8ae647e31a2b1ab030f58b21dd8528c27beaeb0093b7938a9"},
+ {file = "greenlet-2.0.1-cp38-cp38-win32.whl", hash = "sha256:88c8d517e78acdf7df8a2134a3c4b964415b575d2840a2746ddb1cc6175f8608"},
+ {file = "greenlet-2.0.1-cp38-cp38-win_amd64.whl", hash = "sha256:d6ee1aa7ab36475035eb48c01efae87d37936a8173fc4d7b10bb02c2d75dd8f6"},
+ {file = "greenlet-2.0.1-cp39-cp39-macosx_10_15_x86_64.whl", hash = "sha256:b1992ba9d4780d9af9726bbcef6a1db12d9ab1ccc35e5773685a24b7fb2758eb"},
+ {file = "greenlet-2.0.1-cp39-cp39-manylinux2010_x86_64.whl", hash = "sha256:b5e83e4de81dcc9425598d9469a624826a0b1211380ac444c7c791d4a2137c19"},
+ {file = "greenlet-2.0.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:505138d4fa69462447a562a7c2ef723c6025ba12ac04478bc1ce2fcc279a2db5"},
+ {file = "greenlet-2.0.1-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:cce1e90dd302f45716a7715517c6aa0468af0bf38e814ad4eab58e88fc09f7f7"},
+ {file = "greenlet-2.0.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9e9744c657d896c7b580455e739899e492a4a452e2dd4d2b3e459f6b244a638d"},
+ {file = "greenlet-2.0.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:41b825d65f31e394b523c84db84f9383a2f7eefc13d987f308f4663794d2687e"},
+ {file = "greenlet-2.0.1-cp39-cp39-win32.whl", hash = "sha256:db38f80540083ea33bdab614a9d28bcec4b54daa5aff1668d7827a9fc769ae0a"},
+ {file = "greenlet-2.0.1-cp39-cp39-win_amd64.whl", hash = "sha256:b23d2a46d53210b498e5b701a1913697671988f4bf8e10f935433f6e7c332fb6"},
+ {file = "greenlet-2.0.1.tar.gz", hash = "sha256:42e602564460da0e8ee67cb6d7236363ee5e131aa15943b6670e44e5c2ed0f67"},
]
idna = [
{file = "idna-3.4-py3-none-any.whl", hash = "sha256:90b77e79eaa3eba6de819a0c442c0b4ceefc341a7a2ab77d7562bf49f425c5c2"},
@@ -1524,8 +1517,8 @@
{file = "iniconfig-1.1.1.tar.gz", hash = "sha256:bc3af051d7d14b2ee5ef9969666def0cd1a000e121eaea580d4a313df4b37f32"},
]
iso8601 = [
- {file = "iso8601-1.0.2-py3-none-any.whl", hash = "sha256:d7bc01b1c2a43b259570bb307f057abc578786ea734ba2b87b836c5efc5bd443"},
- {file = "iso8601-1.0.2.tar.gz", hash = "sha256:27f503220e6845d9db954fb212b95b0362d8b7e6c1b2326a87061c3de93594b1"},
+ {file = "iso8601-1.1.0-py3-none-any.whl", hash = "sha256:8400e90141bf792bce2634df533dc57e3bee19ea120a87bebcd3da89a58ad73f"},
+ {file = "iso8601-1.1.0.tar.gz", hash = "sha256:32811e7b81deee2063ea6d2e94f8819a86d1f3811e49d23623a41fa832bef03f"},
]
isort = [
{file = "isort-5.10.1-py3-none-any.whl", hash = "sha256:6f62d78e2f89b4500b080fe3a81690850cd254227f27f75c3a0c491a1f351ba7"},
@@ -1547,9 +1540,12 @@
{file = "jmespath-1.0.1-py3-none-any.whl", hash = "sha256:02e2e4cc71b5bcab88332eebf907519190dd9e6e82107fa7f83b1003a6252980"},
{file = "jmespath-1.0.1.tar.gz", hash = "sha256:90261b206d6defd58fdd5e85f478bf633a2901798906be2ad389150c5c60edbe"},
]
+jsonnet = [
+ {file = "jsonnet-0.18.0.tar.gz", hash = "sha256:4ccd13427e9097b6b7d6d38f78f638a55ab8b452a257639e8e9af2178ec235d4"},
+]
jsonschema = [
- {file = "jsonschema-4.16.0-py3-none-any.whl", hash = "sha256:9e74b8f9738d6a946d70705dc692b74b5429cd0960d58e79ffecfc43b2221eb9"},
- {file = "jsonschema-4.16.0.tar.gz", hash = "sha256:165059f076eff6971bae5b742fc029a7b4ef3f9bcf04c14e4776a7605de14b23"},
+ {file = "jsonschema-4.17.0-py3-none-any.whl", hash = "sha256:f660066c3966db7d6daeaea8a75e0b68237a48e51cf49882087757bb59916248"},
+ {file = "jsonschema-4.17.0.tar.gz", hash = "sha256:5bfcf2bca16a087ade17e02b282d34af7ccd749ef76241e7f9bd7c0cb8a9424d"},
]
Markdown = [
{file = "Markdown-3.3.7-py3-none-any.whl", hash = "sha256:f5da449a6e1c989a4cea2631aa8ee67caa5a2ef855d551c88f9e309f4634c621"},
@@ -1606,20 +1602,20 @@
{file = "mergedeep-1.3.4.tar.gz", hash = "sha256:0096d52e9dad9939c3d975a774666af186eda617e6ca84df4c94dec30004f2a8"},
]
mkdocs = [
- {file = "mkdocs-1.4.1-py3-none-any.whl", hash = "sha256:2b7845c2775396214cd408753e4cfb01af3cfed36acc141a84bce2ceec9d705d"},
- {file = "mkdocs-1.4.1.tar.gz", hash = "sha256:07ed90be4062e4ef732bbac2623097b9dca35c67b562c38cfd0bfbc7151758c1"},
+ {file = "mkdocs-1.4.2-py3-none-any.whl", hash = "sha256:c8856a832c1e56702577023cd64cc5f84948280c1c0fcc6af4cd39006ea6aa8c"},
+ {file = "mkdocs-1.4.2.tar.gz", hash = "sha256:8947af423a6d0facf41ea1195b8e1e8c85ad94ac95ae307fe11232e0424b11c5"},
]
mkdocs-material = [
- {file = "mkdocs_material-8.5.7-py3-none-any.whl", hash = "sha256:07fc70dfa325a8019b99a124751c43e4c1c2a739ed1b0b82c00f823f31c9a1e2"},
- {file = "mkdocs_material-8.5.7.tar.gz", hash = "sha256:ff4c7851b2e5f9a6cfa0a8b247e973ebae753b9836a53bd68742827541ab73e5"},
+ {file = "mkdocs_material-8.5.8-py3-none-any.whl", hash = "sha256:7ff092299e3a63cef99cd87e4a6cc7e7d9ec31fd190d766fd147c35572e6d593"},
+ {file = "mkdocs_material-8.5.8.tar.gz", hash = "sha256:61396251819cf7f547f70a09ce6a7edb2ff5d32e47b9199769020b2d20a83d44"},
]
mkdocs-material-extensions = [
{file = "mkdocs_material_extensions-1.1-py3-none-any.whl", hash = "sha256:bcc2e5fc70c0ec50e59703ee6e639d87c7e664c0c441c014ea84461a90f1e902"},
{file = "mkdocs_material_extensions-1.1.tar.gz", hash = "sha256:96ca979dae66d65c2099eefe189b49d5ac62f76afb59c38e069ffc7cf3c131ec"},
]
molecule = [
- {file = "molecule-4.0.1-py3-none-any.whl", hash = "sha256:dcf829a70fd987ad21d28d44820b05d7fc09250dc19a79572ca10669e27bd8be"},
- {file = "molecule-4.0.1.tar.gz", hash = "sha256:ee10e5463bc9940f9afd9b85812c63eacefea889a99c21274f4054ebe248c3b9"},
+ {file = "molecule-4.0.3-py3-none-any.whl", hash = "sha256:3ddf33a8b3a3fb33b75cf1c31a410834eda42dbc23d6161fbdb7b0e72009f4ce"},
+ {file = "molecule-4.0.3.tar.gz", hash = "sha256:b5a78a77f29f1deecf768dfbffafc21b419d9520123468191438ec4c72eaef69"},
]
msgpack = [
{file = "msgpack-1.0.4-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:4ab251d229d10498e9a2f3b1e68ef64cb393394ec477e3370c457f9430ce9250"},
@@ -1712,8 +1708,8 @@
{file = "netifaces-0.11.0.tar.gz", hash = "sha256:043a79146eb2907edf439899f262b3dfe41717d34124298ed281139a8b93ca32"},
]
networkx = [
- {file = "networkx-2.8.6-py3-none-any.whl", hash = "sha256:2a30822761f34d56b9a370d96a4bf4827a535f5591a4078a453425caeba0c5bb"},
- {file = "networkx-2.8.6.tar.gz", hash = "sha256:bd2b7730300860cbd2dafe8e5af89ff5c9a65c3975b352799d87a6238b4301a6"},
+ {file = "networkx-2.8.8-py3-none-any.whl", hash = "sha256:e435dfa75b1d7195c7b8378c3859f0445cd88c6b0375c181ed66823a9ceb7524"},
+ {file = "networkx-2.8.8.tar.gz", hash = "sha256:230d388117af870fce5647a3c52401fcf753e94720e6ea6b4197a5355648885e"},
]
"oslo.i18n" = [
{file = "oslo.i18n-5.1.0-py3-none-any.whl", hash = "sha256:75086cfd898819638ca741159f677e2073a78ca86a9c9be8d38b46800cdf2dc9"},
@@ -1732,20 +1728,16 @@
{file = "packaging-21.3.tar.gz", hash = "sha256:dd47c42927d89ab911e606518907cc2d3a1f38bbd026385970643f9c5b8ecfeb"},
]
pbr = [
- {file = "pbr-5.10.0-py2.py3-none-any.whl", hash = "sha256:da3e18aac0a3c003e9eea1a81bd23e5a3a75d745670dcf736317b7d966887fdf"},
- {file = "pbr-5.10.0.tar.gz", hash = "sha256:cfcc4ff8e698256fc17ea3ff796478b050852585aa5bae79ecd05b2ab7b39b9a"},
+ {file = "pbr-5.11.0-py2.py3-none-any.whl", hash = "sha256:db2317ff07c84c4c63648c9064a79fe9d9f5c7ce85a9099d4b6258b3db83225a"},
+ {file = "pbr-5.11.0.tar.gz", hash = "sha256:b97bc6695b2aff02144133c2e7399d5885223d42b7912ffaec2ca3898e673bfe"},
]
pluggy = [
{file = "pluggy-1.0.0-py2.py3-none-any.whl", hash = "sha256:74134bbf457f031a36d68416e1509f34bd5ccc019f0bcc952c7b909d06b37bd3"},
{file = "pluggy-1.0.0.tar.gz", hash = "sha256:4224373bacce55f955a878bf9cfa763c1e360858e330072059e10bad68531159"},
]
prettytable = [
- {file = "prettytable-3.4.1-py3-none-any.whl", hash = "sha256:0d23ff81e165077d93367e1379d97893c7a51541483d25bad45b9647660ef06f"},
- {file = "prettytable-3.4.1.tar.gz", hash = "sha256:7d7dd84d0b206f2daac4471a72f299d6907f34516064feb2838e333a4e2567bd"},
-]
-py = [
- {file = "py-1.11.0-py2.py3-none-any.whl", hash = "sha256:607c53218732647dff4acdfcd50cb62615cedf612e72d1724fb1a0cc6405b378"},
- {file = "py-1.11.0.tar.gz", hash = "sha256:51c75c4126074b472f746a24399ad32f6053d1b34b68d2fa41e558e6f4a98719"},
+ {file = "prettytable-3.5.0-py3-none-any.whl", hash = "sha256:fe391c3b545800028edf5dbb6a5360893feb398367fcc1cf8d7a5b29ce5c59a1"},
+ {file = "prettytable-3.5.0.tar.gz", hash = "sha256:52f682ba4efe29dccb38ff0fe5bac8a23007d0780ff92a8b85af64bc4fc74d72"},
]
pycodestyle = [
{file = "pycodestyle-2.9.1-py2.py3-none-any.whl", hash = "sha256:d1735fc58b418fd7c5f658d28d943854f8a849b01a5d0a1e6f3f3fdd0166804b"},
@@ -1818,31 +1810,32 @@
{file = "pyparsing-3.0.9.tar.gz", hash = "sha256:2b020ecf7d21b687f219b71ecad3631f644a47f01403fa1d1036b0c6416d70fb"},
]
pyrsistent = [
- {file = "pyrsistent-0.18.1-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:df46c854f490f81210870e509818b729db4488e1f30f2a1ce1698b2295a878d1"},
- {file = "pyrsistent-0.18.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5d45866ececf4a5fff8742c25722da6d4c9e180daa7b405dc0a2a2790d668c26"},
- {file = "pyrsistent-0.18.1-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:4ed6784ceac462a7d6fcb7e9b663e93b9a6fb373b7f43594f9ff68875788e01e"},
- {file = "pyrsistent-0.18.1-cp310-cp310-win32.whl", hash = "sha256:e4f3149fd5eb9b285d6bfb54d2e5173f6a116fe19172686797c056672689daf6"},
- {file = "pyrsistent-0.18.1-cp310-cp310-win_amd64.whl", hash = "sha256:636ce2dc235046ccd3d8c56a7ad54e99d5c1cd0ef07d9ae847306c91d11b5fec"},
- {file = "pyrsistent-0.18.1-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:e92a52c166426efbe0d1ec1332ee9119b6d32fc1f0bbfd55d5c1088070e7fc1b"},
- {file = "pyrsistent-0.18.1-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d7a096646eab884bf8bed965bad63ea327e0d0c38989fc83c5ea7b8a87037bfc"},
- {file = "pyrsistent-0.18.1-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:cdfd2c361b8a8e5d9499b9082b501c452ade8bbf42aef97ea04854f4a3f43b22"},
- {file = "pyrsistent-0.18.1-cp37-cp37m-win32.whl", hash = "sha256:7ec335fc998faa4febe75cc5268a9eac0478b3f681602c1f27befaf2a1abe1d8"},
- {file = "pyrsistent-0.18.1-cp37-cp37m-win_amd64.whl", hash = "sha256:6455fc599df93d1f60e1c5c4fe471499f08d190d57eca040c0ea182301321286"},
- {file = "pyrsistent-0.18.1-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:fd8da6d0124efa2f67d86fa70c851022f87c98e205f0594e1fae044e7119a5a6"},
- {file = "pyrsistent-0.18.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7bfe2388663fd18bd8ce7db2c91c7400bf3e1a9e8bd7d63bf7e77d39051b85ec"},
- {file = "pyrsistent-0.18.1-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:0e3e1fcc45199df76053026a51cc59ab2ea3fc7c094c6627e93b7b44cdae2c8c"},
- {file = "pyrsistent-0.18.1-cp38-cp38-win32.whl", hash = "sha256:b568f35ad53a7b07ed9b1b2bae09eb15cdd671a5ba5d2c66caee40dbf91c68ca"},
- {file = "pyrsistent-0.18.1-cp38-cp38-win_amd64.whl", hash = "sha256:d1b96547410f76078eaf66d282ddca2e4baae8964364abb4f4dcdde855cd123a"},
- {file = "pyrsistent-0.18.1-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:f87cc2863ef33c709e237d4b5f4502a62a00fab450c9e020892e8e2ede5847f5"},
- {file = "pyrsistent-0.18.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6bc66318fb7ee012071b2792024564973ecc80e9522842eb4e17743604b5e045"},
- {file = "pyrsistent-0.18.1-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:914474c9f1d93080338ace89cb2acee74f4f666fb0424896fcfb8d86058bf17c"},
- {file = "pyrsistent-0.18.1-cp39-cp39-win32.whl", hash = "sha256:1b34eedd6812bf4d33814fca1b66005805d3640ce53140ab8bbb1e2651b0d9bc"},
- {file = "pyrsistent-0.18.1-cp39-cp39-win_amd64.whl", hash = "sha256:e24a828f57e0c337c8d8bb9f6b12f09dfdf0273da25fda9e314f0b684b415a07"},
- {file = "pyrsistent-0.18.1.tar.gz", hash = "sha256:d4d61f8b993a7255ba714df3aca52700f8125289f84f704cf80916517c46eb96"},
+ {file = "pyrsistent-0.19.2-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:d6982b5a0237e1b7d876b60265564648a69b14017f3b5f908c5be2de3f9abb7a"},
+ {file = "pyrsistent-0.19.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:187d5730b0507d9285a96fca9716310d572e5464cadd19f22b63a6976254d77a"},
+ {file = "pyrsistent-0.19.2-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:055ab45d5911d7cae397dc418808d8802fb95262751872c841c170b0dbf51eed"},
+ {file = "pyrsistent-0.19.2-cp310-cp310-win32.whl", hash = "sha256:456cb30ca8bff00596519f2c53e42c245c09e1a4543945703acd4312949bfd41"},
+ {file = "pyrsistent-0.19.2-cp310-cp310-win_amd64.whl", hash = "sha256:b39725209e06759217d1ac5fcdb510e98670af9e37223985f330b611f62e7425"},
+ {file = "pyrsistent-0.19.2-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:2aede922a488861de0ad00c7630a6e2d57e8023e4be72d9d7147a9fcd2d30712"},
+ {file = "pyrsistent-0.19.2-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:879b4c2f4d41585c42df4d7654ddffff1239dc4065bc88b745f0341828b83e78"},
+ {file = "pyrsistent-0.19.2-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c43bec251bbd10e3cb58ced80609c5c1eb238da9ca78b964aea410fb820d00d6"},
+ {file = "pyrsistent-0.19.2-cp37-cp37m-win32.whl", hash = "sha256:d690b18ac4b3e3cab73b0b7aa7dbe65978a172ff94970ff98d82f2031f8971c2"},
+ {file = "pyrsistent-0.19.2-cp37-cp37m-win_amd64.whl", hash = "sha256:3ba4134a3ff0fc7ad225b6b457d1309f4698108fb6b35532d015dca8f5abed73"},
+ {file = "pyrsistent-0.19.2-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:a178209e2df710e3f142cbd05313ba0c5ebed0a55d78d9945ac7a4e09d923308"},
+ {file = "pyrsistent-0.19.2-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e371b844cec09d8dc424d940e54bba8f67a03ebea20ff7b7b0d56f526c71d584"},
+ {file = "pyrsistent-0.19.2-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:111156137b2e71f3a9936baf27cb322e8024dac3dc54ec7fb9f0bcf3249e68bb"},
+ {file = "pyrsistent-0.19.2-cp38-cp38-win32.whl", hash = "sha256:e5d8f84d81e3729c3b506657dddfe46e8ba9c330bf1858ee33108f8bb2adb38a"},
+ {file = "pyrsistent-0.19.2-cp38-cp38-win_amd64.whl", hash = "sha256:9cd3e9978d12b5d99cbdc727a3022da0430ad007dacf33d0bf554b96427f33ab"},
+ {file = "pyrsistent-0.19.2-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:f1258f4e6c42ad0b20f9cfcc3ada5bd6b83374516cd01c0960e3cb75fdca6770"},
+ {file = "pyrsistent-0.19.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:21455e2b16000440e896ab99e8304617151981ed40c29e9507ef1c2e4314ee95"},
+ {file = "pyrsistent-0.19.2-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:bfd880614c6237243ff53a0539f1cb26987a6dc8ac6e66e0c5a40617296a045e"},
+ {file = "pyrsistent-0.19.2-cp39-cp39-win32.whl", hash = "sha256:71d332b0320642b3261e9fee47ab9e65872c2bd90260e5d225dabeed93cbd42b"},
+ {file = "pyrsistent-0.19.2-cp39-cp39-win_amd64.whl", hash = "sha256:dec3eac7549869365fe263831f576c8457f6c833937c68542d08fde73457d291"},
+ {file = "pyrsistent-0.19.2-py3-none-any.whl", hash = "sha256:ea6b79a02a28550c98b6ca9c35b9f492beaa54d7c5c9e9949555893c8a9234d0"},
+ {file = "pyrsistent-0.19.2.tar.gz", hash = "sha256:bfa0351be89c9fcbcb8c9879b826f4353be10f58f8a677efab0c017bf7137ec2"},
]
pytest = [
- {file = "pytest-7.1.3-py3-none-any.whl", hash = "sha256:1377bda3466d70b55e3f5cecfa55bb7cfcf219c7964629b967c37cf0bda818b7"},
- {file = "pytest-7.1.3.tar.gz", hash = "sha256:4f365fec2dff9c1162f834d9f18af1ba13062db0c708bf7b946f8a5c76180c39"},
+ {file = "pytest-7.2.0-py3-none-any.whl", hash = "sha256:892f933d339f068883b6fd5a459f03d85bfcb355e4981e146d2c7616c21fef71"},
+ {file = "pytest-7.2.0.tar.gz", hash = "sha256:c4014eb40e10f11f355ad4e3c2fb2c6c6d1919c73f3b5a433de4708202cade59"},
]
pytest-cov = [
{file = "pytest-cov-3.0.0.tar.gz", hash = "sha256:e7f0f5b1617d2210a2cabc266dfe2f4c75a8d32fb89eafb7ad9d06f6d076d470"},
@@ -1853,8 +1846,8 @@
{file = "pytest_kind-22.9.0-py3-none-any.whl", hash = "sha256:9a9b693400a60822b10f3419e4f6862b5e85410af4a6780ebf0aea5878e15b26"},
]
pytest-mock = [
- {file = "pytest-mock-3.8.2.tar.gz", hash = "sha256:77f03f4554392558700295e05aed0b1096a20d4a60a4f3ddcde58b0c31c8fca2"},
- {file = "pytest_mock-3.8.2-py3-none-any.whl", hash = "sha256:8a9e226d6c0ef09fcf20c94eb3405c388af438a90f3e39687f84166da82d5948"},
+ {file = "pytest-mock-3.10.0.tar.gz", hash = "sha256:fbbdb085ef7c252a326fd8cdcac0aa3b1333d8811f131bdcc701002e1be7ed4f"},
+ {file = "pytest_mock-3.10.0-py3-none-any.whl", hash = "sha256:f4c973eeae0282963eb293eb173ce91b091a79c1334455acfac9ddee8a1c784b"},
]
python-dateutil = [
{file = "python-dateutil-2.8.2.tar.gz", hash = "sha256:0123cacc1627ae19ddf3c27a5de5bd67ee4586fbdd6440d9748f8abb483d3e86"},
@@ -1869,8 +1862,8 @@
{file = "python_slugify-6.1.2-py2.py3-none-any.whl", hash = "sha256:7b2c274c308b62f4269a9ba701aa69a797e9bca41aeee5b3a9e79e36b6656927"},
]
pytz = [
- {file = "pytz-2022.2.1-py2.py3-none-any.whl", hash = "sha256:220f481bdafa09c3955dfbdddb7b57780e9a94f5127e35456a48589b9e0c0197"},
- {file = "pytz-2022.2.1.tar.gz", hash = "sha256:cea221417204f2d1a2aa03ddae3e867921971d0d76f14d87abb4414415bbdcf5"},
+ {file = "pytz-2022.6-py2.py3-none-any.whl", hash = "sha256:222439474e9c98fced559f1709d89e6c9cbf8d79c794ff3eb9f8800064291427"},
+ {file = "pytz-2022.6.tar.gz", hash = "sha256:e89512406b793ca39f5971bc999cc538ce125c0e51c27941bef4568b460095e2"},
]
PyYAML = [
{file = "PyYAML-6.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:d4db7c7aef085872ef65a8fd7d6d09a14ae91f691dec3e87ee5ee0539d516f53"},
@@ -1927,24 +1920,20 @@
{file = "resolvelib-0.8.1.tar.gz", hash = "sha256:c6ea56732e9fb6fca1b2acc2ccc68a0b6b8c566d8f3e78e0443310ede61dbd37"},
]
rich = [
- {file = "rich-12.5.1-py3-none-any.whl", hash = "sha256:2eb4e6894cde1e017976d2975ac210ef515d7548bc595ba20e195fb9628acdeb"},
- {file = "rich-12.5.1.tar.gz", hash = "sha256:63a5c5ce3673d3d5fbbf23cd87e11ab84b6b451436f1b7f19ec54b6bc36ed7ca"},
+ {file = "rich-12.6.0-py3-none-any.whl", hash = "sha256:a4eb26484f2c82589bd9a17c73d32a010b1e29d89f1604cd9bf3a2097b81bb5e"},
+ {file = "rich-12.6.0.tar.gz", hash = "sha256:ba3a3775974105c221d31141f2c116f4fd65c5ceb0698657a11e9f295ec93fd0"},
]
schematics = [
{file = "schematics-2.1.1-py2.py3-none-any.whl", hash = "sha256:be2d451bfb86789975e5ec0864aec569b63cea9010f0d24cbbd992a4e564c647"},
{file = "schematics-2.1.1.tar.gz", hash = "sha256:34c87f51a25063bb498ae1cc201891b134cfcb329baf9e9f4f3ae869b767560f"},
]
-setuptools = [
- {file = "setuptools-65.3.0-py3-none-any.whl", hash = "sha256:2e24e0bec025f035a2e72cdd1961119f557d78ad331bb00ff82efb2ab8da8e82"},
- {file = "setuptools-65.3.0.tar.gz", hash = "sha256:7732871f4f7fa58fb6bdcaeadb0161b2bd046c85905dbaa066bdcbcc81953b57"},
-]
six = [
{file = "six-1.16.0-py2.py3-none-any.whl", hash = "sha256:8abb2f1d86890a2dfb989f9a77cfcfd3e47c2a354b01111771326f8aa26e0254"},
{file = "six-1.16.0.tar.gz", hash = "sha256:1e61c37477a1626458e36f7b1d82aa5c9b094fa4802892072e49de9c60c4c926"},
]
stevedore = [
- {file = "stevedore-4.0.0-py3-none-any.whl", hash = "sha256:87e4d27fe96d0d7e4fc24f0cbe3463baae4ec51e81d95fbe60d2474636e0c7d8"},
- {file = "stevedore-4.0.0.tar.gz", hash = "sha256:f82cc99a1ff552310d19c379827c2c64dd9f85a38bcd5559db2470161867b786"},
+ {file = "stevedore-4.1.0-py3-none-any.whl", hash = "sha256:3b1cbd592a87315f000d05164941ee5e164899f8fc0ce9a00bb0f321f40ef93e"},
+ {file = "stevedore-4.1.0.tar.gz", hash = "sha256:02518a8f0d6d29be8a445b7f2ac63753ff29e8f2a2faa01777568d5500d777a6"},
]
structlog = [
{file = "structlog-22.1.0-py3-none-any.whl", hash = "sha256:760d37b8839bd4fe1747bed7b80f7f4de160078405f4b6a1db9270ccbfce6c30"},
@@ -1959,8 +1948,8 @@
{file = "taskflow-5.0.0.tar.gz", hash = "sha256:bbfa2b91fd973b363c819fb4409713fa7538e561d20afccd098b2409a9487284"},
]
tenacity = [
- {file = "tenacity-8.0.1-py3-none-any.whl", hash = "sha256:f78f4ea81b0fabc06728c11dc2a8c01277bfc5181b321a4770471902e3eb844a"},
- {file = "tenacity-8.0.1.tar.gz", hash = "sha256:43242a20e3e73291a28bcbcacfd6e000b02d3857a9a9fff56b297a27afdc932f"},
+ {file = "tenacity-8.1.0-py3-none-any.whl", hash = "sha256:35525cd47f82830069f0d6b73f7eb83bc5b73ee2fff0437952cedf98b27653ac"},
+ {file = "tenacity-8.1.0.tar.gz", hash = "sha256:e48c437fdf9340f5666b92cd7990e96bc5fc955e1298baf4a907e3972067a445"},
]
text-unidecode = [
{file = "text-unidecode-1.3.tar.gz", hash = "sha256:bad6603bb14d279193107714b288be206cac565dfa49aa5b105294dd5c4aab93"},
@@ -1979,12 +1968,12 @@
{file = "tqdm-4.64.1.tar.gz", hash = "sha256:5f4f682a004951c1b450bc753c710e9280c5746ce6ffedee253ddbcbf54cf1e4"},
]
typer = [
- {file = "typer-0.6.1-py3-none-any.whl", hash = "sha256:54b19e5df18654070a82f8c2aa1da456a4ac16a2a83e6dcd9f170e291c56338e"},
- {file = "typer-0.6.1.tar.gz", hash = "sha256:2d5720a5e63f73eaf31edaa15f6ab87f35f0690f8ca233017d7d23d743a91d73"},
+ {file = "typer-0.7.0-py3-none-any.whl", hash = "sha256:b5e704f4e48ec263de1c0b3a2387cd405a13767d2f907f44c1a08cbad96f606d"},
+ {file = "typer-0.7.0.tar.gz", hash = "sha256:ff797846578a9f2a201b53442aedeb543319466870fbe1c701eab66dd7681165"},
]
typing-extensions = [
- {file = "typing_extensions-4.3.0-py3-none-any.whl", hash = "sha256:25642c956049920a5aa49edcdd6ab1e06d7e5d467fc00e0506c44ac86fbfca02"},
- {file = "typing_extensions-4.3.0.tar.gz", hash = "sha256:e6d2677a32f47fc7eb2795db1dd15c1f34eff616bcaf2cfb5e997f854fa1c4a6"},
+ {file = "typing_extensions-4.4.0-py3-none-any.whl", hash = "sha256:16fa4864408f655d35ec496218b85f79b3437c829e93320c7c9215ccfd92489e"},
+ {file = "typing_extensions-4.4.0.tar.gz", hash = "sha256:1511434bb92bf8dd198c12b1cc812e800d4181cfcb867674e0f8279cc93087aa"},
]
urllib3 = [
{file = "urllib3-1.26.12-py2.py3-none-any.whl", hash = "sha256:b930dd878d5a8afb066a637fbb35144fe7901e3b209d1cd4f524bd0e9deee997"},
diff --git a/pyproject.toml b/pyproject.toml
index 868d23d..0b769e2 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -21,6 +21,7 @@
tomli = "^2.0.1"
tomli-w = "^1.0.0"
jmespath = "^1.0.1"
+jsonnet = "^0.18.0"
[tool.poetry.group.dev.dependencies]
pytest = "^7.1.3"
diff --git a/roles/atmosphere/defaults/main.yml b/roles/atmosphere/defaults/main.yml
index c7e4953..6ee4d21 100644
--- a/roles/atmosphere/defaults/main.yml
+++ b/roles/atmosphere/defaults/main.yml
@@ -1,6 +1,8 @@
atmosphere_image: quay.io/vexxhost/atmosphere:0.5.0 # x-release-please-version
atmosphere_config:
+ kube_prometheus_stack:
+ overrides: "{{ kube_prometheus_stack_values | default({}) }}"
memcached:
secret_key: "{{ openstack_helm_endpoints_memcached_secret_key }}"
overrides: "{{ openstack_helm_infra_memcached_values | default({}) }}"
diff --git a/roles/kube_prometheus_stack/defaults/main.yml b/roles/kube_prometheus_stack/defaults/main.yml
deleted file mode 100644
index 3d0fef7..0000000
--- a/roles/kube_prometheus_stack/defaults/main.yml
+++ /dev/null
@@ -1,19 +0,0 @@
----
-# .. vim: foldmarker=[[[,]]]:foldmethod=marker
-
-# .. Copyright (C) 2022 VEXXHOST, Inc.
-# .. SPDX-License-Identifier: Apache-2.0
-
-# Default variables
-# =================
-
-# .. contents:: Sections
-# :local:
-
-
-# .. envvar:: kube_prometheus_stack_values [[[
-#
-# Overrides for Helm chart values
-kube_prometheus_stack_values: {}
-
- # ]]]
diff --git a/roles/kube_prometheus_stack/files/prometheus_alerts.yml b/roles/kube_prometheus_stack/files/prometheus_alerts.yml
deleted file mode 100644
index 067f533..0000000
--- a/roles/kube_prometheus_stack/files/prometheus_alerts.yml
+++ /dev/null
@@ -1,870 +0,0 @@
-# NOTE(mnaser): Imported from upstream ceph/ceph, with the following changes:
-#
-# * Dropped `CephNodeNetworkPacketDrops` due to noisy alerts with
-# no actionable items to fix it.
-# * Dropped `CephNodeDiskspaceWarning` because we already have a
-# few alerts like `NodeFilesystemSpaceFillingUp`, etc.
-#
-# https://raw.githubusercontent.com/ceph/ceph/v16.2.9/monitoring/ceph-mixin/prometheus_alerts.yml
-
-groups:
- - name: cluster health
- rules:
- - alert: CephHealthError
- expr: ceph_health_status == 2
- for: 5m
- labels:
- severity: critical
- type: ceph_default
- oid: 1.3.6.1.4.1.50495.1.2.1.2.1
- annotations:
- summary: Cluster is in an ERROR state
- description: >
- Ceph in HEALTH_ERROR state for more than 5 minutes.
- Please check "ceph health detail" for more information.
-
- - alert: CephHealthWarning
- expr: ceph_health_status == 1
- for: 15m
- labels:
- severity: warning
- type: ceph_default
- annotations:
- summary: Cluster is in a WARNING state
- description: >
- Ceph has been in HEALTH_WARN for more than 15 minutes.
- Please check "ceph health detail" for more information.
-
- - name: mon
- rules:
- - alert: CephMonDownQuorumAtRisk
- expr: ((ceph_health_detail{name="MON_DOWN"} == 1) * on() (count(ceph_mon_quorum_status == 1) == bool (floor(count(ceph_mon_metadata) / 2) + 1))) == 1
- for: 30s
- labels:
- severity: critical
- type: ceph_default
- oid: 1.3.6.1.4.1.50495.1.2.1.3.1
- annotations:
- documentation: https://docs.ceph.com/en/latest/rados/operations/health-checks#mon-down
- summary: Monitor quorum is at risk
- description: |
- {{ $min := query "floor(count(ceph_mon_metadata) / 2) +1" | first | value }}Quorum requires a majority of monitors (x {{ $min }}) to be active
- Without quorum the cluster will become inoperable, affecting all connected clients and services.
-
- The following monitors are down:
- {{- range query "(ceph_mon_quorum_status == 0) + on(ceph_daemon) group_left(hostname) (ceph_mon_metadata * 0)" }}
- - {{ .Labels.ceph_daemon }} on {{ .Labels.hostname }}
- {{- end }}
- - alert: CephMonDown
- expr: (count(ceph_mon_quorum_status == 0) <= (count(ceph_mon_metadata) - floor(count(ceph_mon_metadata) / 2) + 1))
- for: 30s
- labels:
- severity: warning
- type: ceph_default
- annotations:
- documentation: https://docs.ceph.com/en/latest/rados/operations/health-checks#mon-down
- summary: One of more ceph monitors are down
- description: |
- {{ $down := query "count(ceph_mon_quorum_status == 0)" | first | value }}{{ $s := "" }}{{ if gt $down 1.0 }}{{ $s = "s" }}{{ end }}You have {{ $down }} monitor{{ $s }} down.
- Quorum is still intact, but the loss of further monitors will make your cluster inoperable.
-
- The following monitors are down:
- {{- range query "(ceph_mon_quorum_status == 0) + on(ceph_daemon) group_left(hostname) (ceph_mon_metadata * 0)" }}
- - {{ .Labels.ceph_daemon }} on {{ .Labels.hostname }}
- {{- end }}
- - alert: CephMonDiskspaceCritical
- expr: ceph_health_detail{name="MON_DISK_CRIT"} == 1
- for: 1m
- labels:
- severity: critical
- type: ceph_default
- oid: 1.3.6.1.4.1.50495.1.2.1.3.2
- annotations:
- documentation: https://docs.ceph.com/en/latest/rados/operations/health-checks#mon-disk-crit
- summary: Disk space on at least one monitor is critically low
- description: |
- The free space available to a monitor's store is critically low (<5% by default).
- You should increase the space available to the monitor(s). The
- default location for the store sits under /var/lib/ceph. Your monitor hosts are;
- {{- range query "ceph_mon_metadata"}}
- - {{ .Labels.hostname }}
- {{- end }}
-
- - alert: CephMonDiskspaceLow
- expr: ceph_health_detail{name="MON_DISK_LOW"} == 1
- for: 5m
- labels:
- severity: warning
- type: ceph_default
- annotations:
- documentation: https://docs.ceph.com/en/latest/rados/operations/health-checks#mon-disk-low
- summary: Disk space on at least one monitor is approaching full
- description: |
- The space available to a monitor's store is approaching full (>70% is the default).
- You should increase the space available to the monitor store. The
- default location for the store sits under /var/lib/ceph. Your monitor hosts are;
- {{- range query "ceph_mon_metadata"}}
- - {{ .Labels.hostname }}
- {{- end }}
-
- - alert: CephMonClockSkew
- expr: ceph_health_detail{name="MON_CLOCK_SKEW"} == 1
- for: 1m
- labels:
- severity: warning
- type: ceph_default
- annotations:
- documentation: https://docs.ceph.com/en/latest/rados/operations/health-checks#mon-clock-skew
- summary: Clock skew across the Monitor hosts detected
- description: |
- The ceph monitors rely on a consistent time reference to maintain
- quorum and cluster consistency. This event indicates that at least
- one of your mons is not sync'd correctly.
-
- Review the cluster status with ceph -s. This will show which monitors
- are affected. Check the time sync status on each monitor host.
-
- - name: osd
- rules:
- - alert: CephOSDDownHigh
- expr: count(ceph_osd_up == 0) / count(ceph_osd_up) * 100 >= 10
- labels:
- severity: critical
- type: ceph_default
- oid: 1.3.6.1.4.1.50495.1.2.1.4.1
- annotations:
- summary: More than 10% of OSDs are down
- description: |
- {{ $value | humanize }}% or {{ with query "count(ceph_osd_up == 0)" }}{{ . | first | value }}{{ end }} of {{ with query "count(ceph_osd_up)" }}{{ . | first | value }}{{ end }} OSDs are down (>= 10%).
-
- The following OSDs are down:
- {{- range query "(ceph_osd_up * on(ceph_daemon) group_left(hostname) ceph_osd_metadata) == 0" }}
- - {{ .Labels.ceph_daemon }} on {{ .Labels.hostname }}
- {{- end }}
- - alert: CephOSDHostDown
- expr: ceph_health_detail{name="OSD_HOST_DOWN"} == 1
- for: 5m
- labels:
- severity: warning
- type: ceph_default
- oid: 1.3.6.1.4.1.50495.1.2.1.4.8
- annotations:
- summary: An OSD host is offline
- description: |
- The following OSDs are down:
- {{- range query "(ceph_osd_up * on(ceph_daemon) group_left(hostname) ceph_osd_metadata) == 0" }}
- - {{ .Labels.hostname }} : {{ .Labels.ceph_daemon }}
- {{- end }}
- - alert: CephOSDDown
- expr: ceph_health_detail{name="OSD_DOWN"} == 1
- for: 5m
- labels:
- severity: warning
- type: ceph_default
- oid: 1.3.6.1.4.1.50495.1.2.1.4.2
- annotations:
- documentation: https://docs.ceph.com/en/latest/rados/operations/health-checks#osd-down
- summary: An OSD has been marked down/unavailable
- description: |
- {{ $num := query "count(ceph_osd_up == 0)" | first | value }}{{ $s := "" }}{{ if gt $num 1.0 }}{{ $s = "s" }}{{ end }}{{ $num }} OSD{{ $s }} down for over 5mins.
-
- The following OSD{{ $s }} {{ if eq $s "" }}is{{ else }}are{{ end }} down:
- {{- range query "(ceph_osd_up * on(ceph_daemon) group_left(hostname) ceph_osd_metadata) == 0"}}
- - {{ .Labels.ceph_daemon }} on {{ .Labels.hostname }}
- {{- end }}
-
- - alert: CephOSDNearFull
- expr: ceph_health_detail{name="OSD_NEARFULL"} == 1
- for: 5m
- labels:
- severity: warning
- type: ceph_default
- oid: 1.3.6.1.4.1.50495.1.2.1.4.3
- annotations:
- documentation: https://docs.ceph.com/en/latest/rados/operations/health-checks#osd-nearfull
- summary: OSD(s) running low on free space (NEARFULL)
- description: |
- One or more OSDs have reached their NEARFULL threshold
-
- Use 'ceph health detail' to identify which OSDs have reached this threshold.
- To resolve, either add capacity to the cluster, or delete unwanted data
- - alert: CephOSDFull
- expr: ceph_health_detail{name="OSD_FULL"} > 0
- for: 1m
- labels:
- severity: critical
- type: ceph_default
- oid: 1.3.6.1.4.1.50495.1.2.1.4.6
- annotations:
- documentation: https://docs.ceph.com/en/latest/rados/operations/health-checks#osd-full
- summary: OSD(s) is full, writes blocked
- description: |
- An OSD has reached it's full threshold. Writes from all pools that share the
- affected OSD will be blocked.
-
- To resolve, either add capacity to the cluster, or delete unwanted data
- - alert: CephOSDBackfillFull
- expr: ceph_health_detail{name="OSD_BACKFILLFULL"} > 0
- for: 1m
- labels:
- severity: warning
- type: ceph_default
- annotations:
- documentation: https://docs.ceph.com/en/latest/rados/operations/health-checks#osd-backfillfull
- summary: OSD(s) too full for backfill operations
- description: |
- An OSD has reached it's BACKFILL FULL threshold. This will prevent rebalance operations
- completing for some pools. Check the current capacity utilisation with 'ceph df'
-
- To resolve, either add capacity to the cluster, or delete unwanted data
- - alert: CephOSDTooManyRepairs
- expr: ceph_health_detail{name="OSD_TOO_MANY_REPAIRS"} == 1
- for: 30s
- labels:
- severity: warning
- type: ceph_default
- annotations:
- documentation: https://docs.ceph.com/en/latest/rados/operations/health-checks#osd-too-many-repairs
- summary: OSD has hit a high number of read errors
- description: |
- Reads from an OSD have used a secondary PG to return data to the client, indicating
- a potential failing disk.
- - alert: CephOSDTimeoutsPublicNetwork
- expr: ceph_health_detail{name="OSD_SLOW_PING_TIME_FRONT"} == 1
- for: 1m
- labels:
- severity: warning
- type: ceph_default
- annotations:
- summary: Network issues delaying OSD heartbeats (public network)
- description: |
- OSD heartbeats on the cluster's 'public' network (frontend) are running slow. Investigate the network
- for any latency issues on this subnet. Use 'ceph health detail' to show the affected OSDs.
- - alert: CephOSDTimeoutsClusterNetwork
- expr: ceph_health_detail{name="OSD_SLOW_PING_TIME_BACK"} == 1
- for: 1m
- labels:
- severity: warning
- type: ceph_default
- annotations:
- summary: Network issues delaying OSD heartbeats (cluster network)
- description: |
- OSD heartbeats on the cluster's 'cluster' network (backend) are running slow. Investigate the network
- for any latency issues on this subnet. Use 'ceph health detail' to show the affected OSDs.
- - alert: CephOSDInternalDiskSizeMismatch
- expr: ceph_health_detail{name="BLUESTORE_DISK_SIZE_MISMATCH"} == 1
- for: 1m
- labels:
- severity: warning
- type: ceph_default
- annotations:
- documentation: https://docs.ceph.com/en/latest/rados/operations/health-checks#bluestore-disk-size-mismatch
- summary: OSD size inconsistency error
- description: |
- One or more OSDs have an internal inconsistency between the size of the physical device and it's metadata.
- This could lead to the OSD(s) crashing in future. You should redeploy the effected OSDs.
- - alert: CephDeviceFailurePredicted
- expr: ceph_health_detail{name="DEVICE_HEALTH"} == 1
- for: 1m
- labels:
- severity: warning
- type: ceph_default
- annotations:
- documentation: https://docs.ceph.com/en/latest/rados/operations/health-checks#id2
- summary: Device(s) have been predicted to fail soon
- description: |
- The device health module has determined that one or more devices will fail
- soon. To review the device states use 'ceph device ls'. To show a specific
- device use 'ceph device info <dev id>'.
-
- Mark the OSD as out (so data may migrate to other OSDs in the cluster). Once
- the osd is empty remove and replace the OSD.
- - alert: CephDeviceFailurePredictionTooHigh
- expr: ceph_health_detail{name="DEVICE_HEALTH_TOOMANY"} == 1
- for: 1m
- labels:
- severity: critical
- type: ceph_default
- oid: 1.3.6.1.4.1.50495.1.2.1.4.7
- annotations:
- documentation: https://docs.ceph.com/en/latest/rados/operations/health-checks#device-health-toomany
- summary: Too many devices have been predicted to fail, unable to resolve
- description: |
- The device health module has determined that the number of devices predicted to
- fail can not be remediated automatically, since it would take too many osd's out of
- the cluster, impacting performance and potentially availabililty. You should add new
- OSDs to the cluster to allow data to be relocated to avoid the data integrity issues.
- - alert: CephDeviceFailureRelocationIncomplete
- expr: ceph_health_detail{name="DEVICE_HEALTH_IN_USE"} == 1
- for: 1m
- labels:
- severity: warning
- type: ceph_default
- annotations:
- documentation: https://docs.ceph.com/en/latest/rados/operations/health-checks#device-health-in-use
- summary: A device failure is predicted, but unable to relocate data
- description: |
- The device health module has determined that one or more devices will fail
- soon, but the normal process of relocating the data on the device to other
- OSDs in the cluster is blocked.
-
- Check the the cluster has available freespace. It may be necessary to add
- more disks to the cluster to allow the data from the failing device to
- successfully migrate.
-
- - alert: CephOSDFlapping
- expr: |
- (
- rate(ceph_osd_up[5m])
- * on(ceph_daemon) group_left(hostname) ceph_osd_metadata
- ) * 60 > 1
- labels:
- severity: warning
- type: ceph_default
- oid: 1.3.6.1.4.1.50495.1.2.1.4.4
- annotations:
- documentation: https://docs.ceph.com/en/latest/rados/troubleshooting/troubleshooting-osd#flapping-osds
- summary: Network issues are causing OSD's to flap (mark each other out)
- description: >
- OSD {{ $labels.ceph_daemon }} on {{ $labels.hostname }} was
- marked down and back up at {{ $value | humanize }} times once a
- minute for 5 minutes. This could indicate a network issue (latency,
- packet drop, disruption) on the clusters "cluster network". Check the
- network environment on the listed host(s).
-
- - alert: CephOSDReadErrors
- expr: ceph_health_detail{name="BLUESTORE_SPURIOUS_READ_ERRORS"} == 1
- for: 30s
- labels:
- severity: warning
- type: ceph_default
- annotations:
- documentation: https://docs.ceph.com/en/latest/rados/operations/health-checks#bluestore-spurious-read-errors
- summary: Device read errors detected
- description: >
- An OSD has encountered read errors, but the OSD has recovered by retrying
- the reads. This may indicate an issue with the Hardware or Kernel.
- # alert on high deviation from average PG count
- - alert: CephPGImbalance
- expr: |
- abs(
- (
- (ceph_osd_numpg > 0) - on (job) group_left avg(ceph_osd_numpg > 0) by (job)
- ) / on (job) group_left avg(ceph_osd_numpg > 0) by (job)
- ) * on(ceph_daemon) group_left(hostname) ceph_osd_metadata > 0.30
- for: 5m
- labels:
- severity: warning
- type: ceph_default
- oid: 1.3.6.1.4.1.50495.1.2.1.4.5
- annotations:
- summary: PG allocations are not balanced across devices
- description: >
- OSD {{ $labels.ceph_daemon }} on {{ $labels.hostname }} deviates
- by more than 30% from average PG count.
- # alert on high commit latency...but how high is too high
-
- - name: mds
- rules:
- - alert: CephFilesystemDamaged
- expr: ceph_health_detail{name="MDS_DAMAGE"} > 0
- for: 1m
- labels:
- severity: critical
- type: ceph_default
- oid: 1.3.6.1.4.1.50495.1.2.1.5.1
- annotations:
- documentation: https://docs.ceph.com/en/latest/cephfs/health-messages#cephfs-health-messages
- summary: Ceph filesystem is damaged.
- description: >
- The filesystems metadata has been corrupted. Data access
- may be blocked.
-
- Either analyse the output from the mds daemon admin socket, or
- escalate to support
- - alert: CephFilesystemOffline
- expr: ceph_health_detail{name="MDS_ALL_DOWN"} > 0
- for: 1m
- labels:
- severity: critical
- type: ceph_default
- oid: 1.3.6.1.4.1.50495.1.2.1.5.3
- annotations:
- documentation: https://docs.ceph.com/en/latest/cephfs/health-messages/#mds-all-down
- summary: Ceph filesystem is offline
- description: >
- All MDS ranks are unavailable. The ceph daemons providing the metadata
- for the Ceph filesystem are all down, rendering the filesystem offline.
- - alert: CephFilesystemDegraded
- expr: ceph_health_detail{name="FS_DEGRADED"} > 0
- for: 1m
- labels:
- severity: critical
- type: ceph_default
- oid: 1.3.6.1.4.1.50495.1.2.1.5.4
- annotations:
- documentation: https://docs.ceph.com/en/latest/cephfs/health-messages/#fs-degraded
- summary: Ceph filesystem is degraded
- description: >
- One or more metadata daemons (MDS ranks) are failed or in a
- damaged state. At best the filesystem is partially available,
- worst case is the filesystem is completely unusable.
- - alert: CephFilesystemMDSRanksLow
- expr: ceph_health_detail{name="MDS_UP_LESS_THAN_MAX"} > 0
- for: 1m
- labels:
- severity: warning
- type: ceph_default
- annotations:
- documentation: https://docs.ceph.com/en/latest/cephfs/health-messages/#mds-up-less-than-max
- summary: Ceph MDS daemon count is lower than configured
- description: >
- The filesystem's "max_mds" setting defined the number of MDS ranks in
- the filesystem. The current number of active MDS daemons is less than
- this setting.
- - alert: CephFilesystemInsufficientStandby
- expr: ceph_health_detail{name="MDS_INSUFFICIENT_STANDBY"} > 0
- for: 1m
- labels:
- severity: warning
- type: ceph_default
- annotations:
- documentation: https://docs.ceph.com/en/latest/cephfs/health-messages/#mds-insufficient-standby
- summary: Ceph filesystem standby daemons too low
- description: >
- The minimum number of standby daemons determined by standby_count_wanted
- is less than the actual number of standby daemons. Adjust the standby count
- or increase the number of mds daemons within the filesystem.
- - alert: CephFilesystemFailureNoStandby
- expr: ceph_health_detail{name="FS_WITH_FAILED_MDS"} > 0
- for: 1m
- labels:
- severity: critical
- type: ceph_default
- oid: 1.3.6.1.4.1.50495.1.2.1.5.5
- annotations:
- documentation: https://docs.ceph.com/en/latest/cephfs/health-messages/#fs-with-failed-mds
- summary: Ceph MDS daemon failed, no further standby available
- description: >
- An MDS daemon has failed, leaving only one active rank without
- further standby. Investigate the cause of the failure or add a
- standby daemon
- - alert: CephFilesystemReadOnly
- expr: ceph_health_detail{name="MDS_HEALTH_READ_ONLY"} > 0
- for: 1m
- labels:
- severity: critical
- type: ceph_default
- oid: 1.3.6.1.4.1.50495.1.2.1.5.2
- annotations:
- documentation: https://docs.ceph.com/en/latest/cephfs/health-messages#cephfs-health-messages
- summary: Ceph filesystem in read only mode, due to write error(s)
- description: >
- The filesystem has switched to READ ONLY due to an unexpected
- write error, when writing to the metadata pool
-
- Either analyse the output from the mds daemon admin socket, or
- escalate to support
-
- - name: mgr
- rules:
- - alert: CephMgrModuleCrash
- expr: ceph_health_detail{name="RECENT_MGR_MODULE_CRASH"} == 1
- for: 5m
- labels:
- severity: critical
- type: ceph_default
- oid: 1.3.6.1.4.1.50495.1.2.1.6.1
- annotations:
- documentation: https://docs.ceph.com/en/latest/rados/operations/health-checks#recent-mgr-module-crash
- summary: A mgr module has recently crashed
- description: >
- One or more mgr modules have crashed and are yet to be acknowledged by the administrator. A
- crashed module may impact functionality within the cluster. Use the 'ceph crash' commands to
- investigate which module has failed, and archive it to acknowledge the failure.
- - alert: CephMgrPrometheusModuleInactive
- expr: up{job="ceph"} == 0
- for: 1m
- labels:
- severity: critical
- type: ceph_default
- oid: 1.3.6.1.4.1.50495.1.2.1.6.2
- annotations:
- summary: Ceph's mgr/prometheus module is not available
- description: >
- The mgr/prometheus module at {{ $labels.instance }} is unreachable. This
- could mean that the module has been disabled or the mgr itself is down.
-
- Without the mgr/prometheus module metrics and alerts will no longer
- function. Open a shell to ceph and use 'ceph -s' to to determine whether the
- mgr is active. If the mgr is not active, restart it, otherwise you can check
- the mgr/prometheus module is loaded with 'ceph mgr module ls' and if it's
- not listed as enabled, enable it with 'ceph mgr module enable prometheus'
-
- - name: pgs
- rules:
- - alert: CephPGsInactive
- expr: ceph_pool_metadata * on(pool_id,instance) group_left() (ceph_pg_total - ceph_pg_active) > 0
- for: 5m
- labels:
- severity: critical
- type: ceph_default
- oid: 1.3.6.1.4.1.50495.1.2.1.7.1
- annotations:
- summary: One or more Placement Groups are inactive
- description: >
- {{ $value }} PGs have been inactive for more than 5 minutes in pool {{ $labels.name }}.
- Inactive placement groups aren't able to serve read/write
- requests.
- - alert: CephPGsUnclean
- expr: ceph_pool_metadata * on(pool_id,instance) group_left() (ceph_pg_total - ceph_pg_clean) > 0
- for: 15m
- labels:
- severity: warning
- type: ceph_default
- oid: 1.3.6.1.4.1.50495.1.2.1.7.2
- annotations:
- summary: One or more platcment groups are marked unclean
- description: >
- {{ $value }} PGs haven't been clean for more than 15 minutes in pool {{ $labels.name }}.
- Unclean PGs haven't been able to completely recover from a previous failure.
- - alert: CephPGsDamaged
- expr: ceph_health_detail{name=~"PG_DAMAGED|OSD_SCRUB_ERRORS"} == 1
- for: 5m
- labels:
- severity: critical
- type: ceph_default
- oid: 1.3.6.1.4.1.50495.1.2.1.7.4
- annotations:
- documentation: https://docs.ceph.com/en/latest/rados/operations/health-checks#pg-damaged
- summary: Placement group damaged, manual intervention needed
- description: >
- During data consistency checks (scrub), at least one PG has been flagged as being
- damaged or inconsistent.
-
- Check to see which PG is affected, and attempt a manual repair if necessary. To list
- problematic placement groups, use 'rados list-inconsistent-pg <pool>'. To repair PGs use
- the 'ceph pg repair <pg_num>' command.
- - alert: CephPGRecoveryAtRisk
- expr: ceph_health_detail{name="PG_RECOVERY_FULL"} == 1
- for: 1m
- labels:
- severity: critical
- type: ceph_default
- oid: 1.3.6.1.4.1.50495.1.2.1.7.5
- annotations:
- documentation: https://docs.ceph.com/en/latest/rados/operations/health-checks#pg-recovery-full
- summary: OSDs are too full for automatic recovery
- description: >
- Data redundancy may be reduced, or is at risk, since one or more OSDs are at or above their
- 'full' threshold. Add more capacity to the cluster, or delete unwanted data.
- - alert: CephPGUnavilableBlockingIO
- # PG_AVAILABILITY, but an OSD is not in a DOWN state
- expr: ((ceph_health_detail{name="PG_AVAILABILITY"} == 1) - scalar(ceph_health_detail{name="OSD_DOWN"})) == 1
- for: 1m
- labels:
- severity: critical
- type: ceph_default
- oid: 1.3.6.1.4.1.50495.1.2.1.7.3
- annotations:
- documentation: https://docs.ceph.com/en/latest/rados/operations/health-checks#pg-availability
- summary: Placement group is unavailable, blocking some I/O
- description: >
- Data availability is reduced impacting the clusters ability to service I/O to some data. One or
- more placement groups (PGs) are in a state that blocks IO.
- - alert: CephPGBackfillAtRisk
- expr: ceph_health_detail{name="PG_BACKFILL_FULL"} == 1
- for: 1m
- labels:
- severity: critical
- type: ceph_default
- oid: 1.3.6.1.4.1.50495.1.2.1.7.6
- annotations:
- documentation: https://docs.ceph.com/en/latest/rados/operations/health-checks#pg-backfill-full
- summary: Backfill operations are blocked, due to lack of freespace
- description: >
- Data redundancy may be at risk due to lack of free space within the cluster. One or more OSDs
- have breached their 'backfillfull' threshold. Add more capacity, or delete unwanted data.
- - alert: CephPGNotScrubbed
- expr: ceph_health_detail{name="PG_NOT_SCRUBBED"} == 1
- for: 5m
- labels:
- severity: warning
- type: ceph_default
- annotations:
- documentation: https://docs.ceph.com/en/latest/rados/operations/health-checks#pg-not-scrubbed
- summary: Placement group(s) have not been scrubbed
- description: |
- One or more PGs have not been scrubbed recently. The scrub process is a data integrity
- feature, protectng against bit-rot. It checks that objects and their metadata (size and
- attributes) match across object replicas. When PGs miss their scrub window, it may
- indicate the scrub window is too small, or PGs were not in a 'clean' state during the
- scrub window.
-
- You can manually initiate a scrub with: ceph pg scrub <pgid>
- - alert: CephPGsHighPerOSD
- expr: ceph_health_detail{name="TOO_MANY_PGS"} == 1
- for: 1m
- labels:
- severity: warning
- type: ceph_default
- annotations:
- documentation: https://docs.ceph.com/en/latest/rados/operations/health-checks/#too-many-pgs
- summary: Placement groups per OSD is too high
- description: |
- The number of placement groups per OSD is too high (exceeds the mon_max_pg_per_osd setting).
-
- Check that the pg_autoscaler hasn't been disabled for any of the pools, with 'ceph osd pool autoscale-status'
- and that the profile selected is appropriate. You may also adjust the target_size_ratio of a pool to guide
- the autoscaler based on the expected relative size of the pool
- (i.e. 'ceph osd pool set cephfs.cephfs.meta target_size_ratio .1')
- - alert: CephPGNotDeepScrubbed
- expr: ceph_health_detail{name="PG_NOT_DEEP_SCRUBBED"} == 1
- for: 5m
- labels:
- severity: warning
- type: ceph_default
- annotations:
- documentation: https://docs.ceph.com/en/latest/rados/operations/health-checks#pg-not-deep-scrubbed
- summary: Placement group(s) have not been deep scrubbed
- description: |
- One or more PGs have not been deep scrubbed recently. Deep scrub is a data integrity
- feature, protectng against bit-rot. It compares the contents of objects and their
- replicas for inconsistency. When PGs miss their deep scrub window, it may indicate
- that the window is too small or PGs were not in a 'clean' state during the deep-scrub
- window.
-
- You can manually initiate a deep scrub with: ceph pg deep-scrub <pgid>
-
- - name: nodes
- rules:
- - alert: CephNodeRootFilesystemFull
- expr: node_filesystem_avail_bytes{mountpoint="/"} / node_filesystem_size_bytes{mountpoint="/"} * 100 < 5
- for: 5m
- labels:
- severity: critical
- type: ceph_default
- oid: 1.3.6.1.4.1.50495.1.2.1.8.1
- annotations:
- summary: Root filesystem is dangerously full
- description: >
- Root volume (OSD and MON store) is dangerously full: {{ $value | humanize }}% free.
-
- - alert: CephNodeNetworkPacketErrors
- expr: |
- (
- increase(node_network_receive_errs_total{device!="lo"}[1m]) +
- increase(node_network_transmit_errs_total{device!="lo"}[1m])
- ) / (
- increase(node_network_receive_packets_total{device!="lo"}[1m]) +
- increase(node_network_transmit_packets_total{device!="lo"}[1m])
- ) >= 0.0001 or (
- increase(node_network_receive_errs_total{device!="lo"}[1m]) +
- increase(node_network_transmit_errs_total{device!="lo"}[1m])
- ) >= 10
- labels:
- severity: warning
- type: ceph_default
- oid: 1.3.6.1.4.1.50495.1.2.1.8.3
- annotations:
- summary: One or more Nics is seeing packet errors
- description: >
- Node {{ $labels.instance }} experiences packet errors > 0.01% or
- > 10 packets/s on interface {{ $labels.device }}.
-
- - alert: CephNodeInconsistentMTU
- expr: |
- node_network_mtu_bytes * (node_network_up{device!="lo"} > 0) ==
- scalar(
- max by (device) (node_network_mtu_bytes * (node_network_up{device!="lo"} > 0)) !=
- quantile by (device) (.5, node_network_mtu_bytes * (node_network_up{device!="lo"} > 0))
- )
- or
- node_network_mtu_bytes * (node_network_up{device!="lo"} > 0) ==
- scalar(
- min by (device) (node_network_mtu_bytes * (node_network_up{device!="lo"} > 0)) !=
- quantile by (device) (.5, node_network_mtu_bytes * (node_network_up{device!="lo"} > 0))
- )
- labels:
- severity: warning
- type: ceph_default
- annotations:
- summary: MTU settings across Ceph hosts are inconsistent
- description: >
- Node {{ $labels.instance }} has a different MTU size ({{ $value }})
- than the median of devices named {{ $labels.device }}.
-
- - name: pools
- rules:
- - alert: CephPoolGrowthWarning
- expr: |
- (predict_linear(ceph_pool_percent_used[2d], 3600 * 24 * 5) * on(pool_id)
- group_right ceph_pool_metadata) >= 95
- labels:
- severity: warning
- type: ceph_default
- oid: 1.3.6.1.4.1.50495.1.2.1.9.2
- annotations:
- summary: Pool growth rate may soon exceed it's capacity
- description: >
- Pool '{{ $labels.name }}' will be full in less than 5 days
- assuming the average fill-up rate of the past 48 hours.
- - alert: CephPoolBackfillFull
- expr: ceph_health_detail{name="POOL_BACKFILLFULL"} > 0
- labels:
- severity: warning
- type: ceph_default
- annotations:
- summary: Freespace in a pool is too low for recovery/rebalance
- description: >
- A pool is approaching it's near full threshold, which will
- prevent rebalance operations from completing. You should
- consider adding more capacity to the pool.
-
- - alert: CephPoolFull
- expr: ceph_health_detail{name="POOL_FULL"} > 0
- for: 1m
- labels:
- severity: critical
- type: ceph_default
- oid: 1.3.6.1.4.1.50495.1.2.1.9.1
- annotations:
- documentation: https://docs.ceph.com/en/latest/rados/operations/health-checks#pool-full
- summary: Pool is full - writes are blocked
- description: |
- A pool has reached it's MAX quota, or the OSDs supporting the pool
- have reached their FULL threshold. Until this is resolved, writes to
- the pool will be blocked.
- Pool Breakdown (top 5)
- {{- range query "topk(5, sort_desc(ceph_pool_percent_used * on(pool_id) group_right ceph_pool_metadata))" }}
- - {{ .Labels.name }} at {{ .Value }}%
- {{- end }}
- Either increase the pools quota, or add capacity to the cluster first
- then increase it's quota (e.g. ceph osd pool set quota <pool_name> max_bytes <bytes>)
- - alert: CephPoolNearFull
- expr: ceph_health_detail{name="POOL_NEAR_FULL"} > 0
- for: 5m
- labels:
- severity: warning
- type: ceph_default
- annotations:
- summary: One or more Ceph pools are getting full
- description: |
- A pool has exceeeded it warning (percent full) threshold, or the OSDs
- supporting the pool have reached their NEARFULL thresholds. Writes may
- continue, but you are at risk of the pool going read only if more capacity
- isn't made available.
-
- Determine the affected pool with 'ceph df detail', for example looking
- at QUOTA BYTES and STORED. Either increase the pools quota, or add
- capacity to the cluster first then increase it's quota
- (e.g. ceph osd pool set quota <pool_name> max_bytes <bytes>)
- - name: healthchecks
- rules:
- - alert: CephSlowOps
- expr: ceph_healthcheck_slow_ops > 0
- for: 30s
- labels:
- severity: warning
- type: ceph_default
- annotations:
- documentation: https://docs.ceph.com/en/latest/rados/operations/health-checks#slow-ops
- summary: MON/OSD operations are slow to complete
- description: >
- {{ $value }} OSD requests are taking too long to process (osd_op_complaint_time exceeded)
-# cephadm alerts
- - name: cephadm
- rules:
- - alert: CephadmUpgradeFailed
- expr: ceph_health_detail{name="UPGRADE_EXCEPTION"} > 0
- for: 30s
- labels:
- severity: critical
- type: ceph_default
- oid: 1.3.6.1.4.1.50495.1.2.1.11.2
- annotations:
- summary: Ceph version upgrade has failed
- description: >
- The cephadm cluster upgrade process has failed. The cluster remains in
- an undetermined state.
-
- Please review the cephadm logs, to understand the nature of the issue
- - alert: CephadmDaemonFailed
- expr: ceph_health_detail{name="CEPHADM_FAILED_DAEMON"} > 0
- for: 30s
- labels:
- severity: critical
- type: ceph_default
- oid: 1.3.6.1.4.1.50495.1.2.1.11.1
- annotations:
- summary: A ceph daemon manged by cephadm is down
- description: >
- A daemon managed by cephadm is no longer active. Determine, which
- daemon is down with 'ceph health detail'. you may start daemons with
- the 'ceph orch daemon start <daemon_id>'
- - alert: CephadmPaused
- expr: ceph_health_detail{name="CEPHADM_PAUSED"} > 0
- for: 1m
- labels:
- severity: warning
- type: ceph_default
- annotations:
- documentation: https://docs.ceph.com/en/latest/cephadm/operations#cephadm-paused
- summary: Orchestration tasks via cephadm are PAUSED
- description: >
- Cluster management has been paused manually. This will prevent the
- orchestrator from service management and reconciliation. If this is
- not intentional, resume cephadm operations with 'ceph orch resume'
-
-# prometheus alerts
- - name: PrometheusServer
- rules:
- - alert: PrometheusJobMissing
- expr: absent(up{job="ceph"})
- for: 30s
- labels:
- severity: critical
- type: ceph_default
- oid: 1.3.6.1.4.1.50495.1.2.1.12.1
- annotations:
- summary: The scrape job for Ceph is missing from Prometheus
- description: |
- The prometheus job that scrapes from Ceph is no longer defined, this
- will effectively mean you'll have no metrics or alerts for the cluster.
-
- Please review the job definitions in the prometheus.yml file of the prometheus
- instance.
-# Object related events
- - name: rados
- rules:
- - alert: CephObjectMissing
- expr: (ceph_health_detail{name="OBJECT_UNFOUND"} == 1) * on() (count(ceph_osd_up == 1) == bool count(ceph_osd_metadata)) == 1
- for: 30s
- labels:
- severity: critical
- type: ceph_default
- oid: 1.3.6.1.4.1.50495.1.2.1.10.1
- annotations:
- documentation: https://docs.ceph.com/en/latest/rados/operations/health-checks#object-unfound
- summary: Object(s) has been marked UNFOUND
- description: |
- A version of a RADOS object can not be found, even though all OSDs are up. I/O
- requests for this object from clients will block (hang). Resolving this issue may
- require the object to be rolled back to a prior version manually, and manually verified.
-# Generic
- - name: generic
- rules:
- - alert: CephDaemonCrash
- expr: ceph_health_detail{name="RECENT_CRASH"} == 1
- for: 1m
- labels:
- severity: critical
- type: ceph_default
- oid: 1.3.6.1.4.1.50495.1.2.1.1.2
- annotations:
- documentation: https://docs.ceph.com/en/latest/rados/operations/health-checks/#recent-crash
- summary: One or more Ceph daemons have crashed, and are pending acknowledgement
- description: |
- One or more daemons have crashed recently, and need to be acknowledged. This notification
- ensures that software crashes don't go unseen. To acknowledge a crash, use the
- 'ceph crash archive <id>' command.
diff --git a/roles/kube_prometheus_stack/tasks/main.yml b/roles/kube_prometheus_stack/tasks/main.yml
index f6e3d71..6eebd3f 100644
--- a/roles/kube_prometheus_stack/tasks/main.yml
+++ b/roles/kube_prometheus_stack/tasks/main.yml
@@ -27,15 +27,6 @@
src: /etc/kubernetes/pki/etcd/healthcheck-client.key
register: _etcd_healthcheck_client_key
-- name: Create namespace
- kubernetes.core.k8s:
- state: present
- definition:
- apiVersion: v1
- kind: Namespace
- metadata:
- name: monitoring
-
- name: Create Secret with "etcd" TLS certificates
kubernetes.core.k8s:
state: present
@@ -49,27 +40,10 @@
ca.crt: "{{ _etcd_ca_crt.content }}"
healthcheck-client.crt: "{{ _etcd_healthcheck_client_crt.content }}"
healthcheck-client.key: "{{ _etcd_healthcheck_client_key.content }}"
-
-- name: Deploy Helm chart
- kubernetes.core.k8s:
- state: present
- definition:
- - apiVersion: helm.toolkit.fluxcd.io/v2beta1
- kind: HelmRelease
- metadata:
- name: kube-prometheus-stack
- namespace: monitoring
- spec:
- interval: 60s
- chart:
- spec:
- chart: kube-prometheus-stack
- version: 36.2.0
- sourceRef:
- kind: HelmRepository
- name: prometheus-community
- install:
- crds: CreateReplace
- upgrade:
- crds: CreateReplace
- values: "{{ _kube_prometheus_stack_values | combine(kube_prometheus_stack_values, recursive=True) }}"
+ # NOTE(mnaser): Since we haven't moved to the operator pattern yet, we need to
+ # keep retrying a few times as the namespace might not be
+ # created yet.
+ retries: 60
+ delay: 5
+ register: _result
+ until: _result is not failed
diff --git a/roles/kube_prometheus_stack/vars/main.yml b/roles/kube_prometheus_stack/vars/main.yml
deleted file mode 100644
index ba047b7..0000000
--- a/roles/kube_prometheus_stack/vars/main.yml
+++ /dev/null
@@ -1,847 +0,0 @@
-# Copyright (c) 2022 VEXXHOST, Inc.
-#
-# Licensed under the Apache License, Version 2.0 (the "License"); you may
-# not use this file except in compliance with the License. You may obtain
-# a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
-# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
-# License for the specific language governing permissions and limitations
-# under the License.
-
-_kube_prometheus_stack_values:
- defaultRules:
- disabled:
- # NOTE(mnaser): https://github.com/prometheus-community/helm-charts/issues/144
- # https://github.com/openshift/cluster-monitoring-operator/issues/248
- etcdHighNumberOfFailedGRPCRequests: true
- alertmanager:
- serviceMonitor:
- relabelings:
- - sourceLabels: ["__meta_kubernetes_pod_name"]
- targetLabel: "instance"
- - action: "labeldrop"
- regex: "^(container|endpoint|namespace|pod|service)$"
- alertmanagerSpec:
- storage:
- volumeClaimTemplate:
- spec:
- storageClassName: general
- accessModes: ["ReadWriteOnce"]
- resources:
- requests:
- storage: 40Gi
- nodeSelector:
- openstack-control-plane: enabled
- grafana:
- serviceMonitor:
- relabelings:
- - sourceLabels: ["__meta_kubernetes_pod_name"]
- targetLabel: "instance"
- - action: "labeldrop"
- regex: "^(container|endpoint|namespace|pod|service)$"
- nodeSelector:
- openstack-control-plane: enabled
- kubeApiServer:
- serviceMonitor:
- relabelings:
- - sourceLabels: ["__meta_kubernetes_pod_node_name"]
- targetLabel: "instance"
- - action: "labeldrop"
- regex: "^(container|endpoint|namespace|pod|service)$"
- kubelet:
- serviceMonitor:
- cAdvisorRelabelings:
- - sourceLabels: [__metrics_path__]
- targetLabel: metrics_path
- - sourceLabels: ["node"]
- targetLabel: "instance"
- - action: "labeldrop"
- regex: "^(container|endpoint|namespace|node|service)$"
- probesRelabelings:
- - sourceLabels: [__metrics_path__]
- targetLabel: metrics_path
- - sourceLabels: ["node"]
- targetLabel: "instance"
- - action: "labeldrop"
- regex: "^(container|endpoint|namespace|node|service)$"
- relabelings:
- - sourceLabels: [__metrics_path__]
- targetLabel: metrics_path
- - sourceLabels: ["node"]
- targetLabel: "instance"
- - action: "labeldrop"
- regex: "^(container|endpoint|namespace|node|service)$"
- kubeControllerManager:
- serviceMonitor:
- relabelings:
- - sourceLabels: ["__meta_kubernetes_pod_node_name"]
- targetLabel: "instance"
- - action: "labeldrop"
- regex: "^(container|endpoint|namespace|pod|service)$"
- coreDns:
- serviceMonitor:
- relabelings:
- - sourceLabels: ["__meta_kubernetes_pod_name"]
- targetLabel: "instance"
- - regex: "^(container|endpoint|namespace|pod|service)$"
- action: "labeldrop"
- kubeEtcd:
- serviceMonitor:
- scheme: https
- serverName: localhost
- insecureSkipVerify: false
- caFile: /etc/prometheus/secrets/kube-prometheus-stack-etcd-client-cert/ca.crt
- certFile: /etc/prometheus/secrets/kube-prometheus-stack-etcd-client-cert/healthcheck-client.crt
- keyFile: /etc/prometheus/secrets/kube-prometheus-stack-etcd-client-cert/healthcheck-client.key
- relabelings:
- - sourceLabels: ["__meta_kubernetes_pod_node_name"]
- targetLabel: "instance"
- - action: "labeldrop"
- regex: "^(container|endpoint|namespace|pod|service)$"
- kubeScheduler:
- service:
- port: 10259
- targetPort: 10259
- serviceMonitor:
- https: true
- insecureSkipVerify: true
- relabelings:
- - sourceLabels: ["__meta_kubernetes_pod_node_name"]
- targetLabel: "instance"
- - action: "labeldrop"
- regex: "^(container|endpoint|namespace|pod|service)$"
- kubeProxy:
- serviceMonitor:
- relabelings:
- - sourceLabels: ["__meta_kubernetes_pod_node_name"]
- targetLabel: "instance"
- - action: "labeldrop"
- regex: "^(container|endpoint|namespace|pod|service)$"
- kube-state-metrics:
- prometheus:
- monitor:
- relabelings:
- - sourceLabels: ["__meta_kubernetes_pod_name"]
- targetLabel: "instance"
- - action: "labeldrop"
- regex: "^(container|endpoint|namespace|pod|service)$"
- nodeSelector:
- openstack-control-plane: enabled
- prometheus:
- serviceMonitor:
- relabelings:
- - sourceLabels: ["__meta_kubernetes_pod_name"]
- targetLabel: "instance"
- - action: "labeldrop"
- regex: "^(container|endpoint|namespace|pod|service)$"
- prometheusSpec:
- nodeSelector:
- openstack-control-plane: enabled
- secrets:
- - kube-prometheus-stack-etcd-client-cert
- additionalServiceMonitors:
- - name: ceph
- jobLabel: application
- selector:
- matchLabels:
- application: ceph
- namespaceSelector:
- matchNames:
- - openstack
- endpoints:
- - port: metrics
- honorLabels: true
- relabelings:
- - action: replace
- regex: (.*)
- replacement: ceph
- targetLabel: cluster
- - action: "labeldrop"
- regex: "^(container|endpoint|namespace|pod|service)$"
- - name: coredns
- jobLabel: app.kubernetes.io/name
- namespaceSelector:
- matchNames:
- - openstack
- selector:
- matchLabels:
- app.kubernetes.io/name: coredns
- app.kubernetes.io/component: metrics
- endpoints:
- - port: "metrics"
- path: "/metrics"
- relabelings:
- - sourceLabels: ["__meta_kubernetes_pod_label_application"]
- targetLabel: "application"
- - sourceLabels: ["__meta_kubernetes_pod_name"]
- targetLabel: "instance"
- - action: "labeldrop"
- regex: "^(container|endpoint|namespace|pod|service)$"
- - name: memcached
- jobLabel: application
- namespaceSelector:
- matchNames:
- - openstack
- selector:
- matchLabels:
- application: memcached
- component: server
- endpoints:
- - port: "metrics"
- path: "/metrics"
- relabelings:
- - sourceLabels: ["__meta_kubernetes_pod_name"]
- targetLabel: "instance"
- - action: "labeldrop"
- regex: "^(container|endpoint|namespace|pod|service)$"
- - name: openstack-exporter
- jobLabel: jobLabel
- namespaceSelector:
- matchNames:
- - openstack
- selector:
- matchLabels:
- application: openstack-exporter
- endpoints:
- - interval: 1m
- scrapeTimeout: 30s
- port: metrics
- relabelings:
- - action: replace
- regex: (.*)
- replacement: default
- targetLabel: instance
- additionalPodMonitors:
- - name: ethtool-exporter
- jobLabel: job
- selector:
- matchLabels:
- application: ethtool-exporter
- podMetricsEndpoints:
- - port: metrics
- path: /metrics
- relabelings:
- - sourceLabels: ["__meta_kubernetes_pod_node_name"]
- targetLabel: instance
- - action: labeldrop
- regex: ^(container|endpoint|namespace|pod)$
- - name: ipmi-exporter
- jobLabel: job
- selector:
- matchLabels:
- application: ipmi-exporter
- podMetricsEndpoints:
- - port: metrics
- path: /metrics
- interval: 60s
- relabelings:
- - sourceLabels: ["__meta_kubernetes_pod_node_name"]
- targetLabel: instance
- - action: labeldrop
- regex: ^(container|endpoint|namespace|pod)$
- - name: percona-xtradb-pxc
- jobLabel: app.kubernetes.io/component
- namespaceSelector:
- matchNames:
- - openstack
- selector:
- matchLabels:
- app.kubernetes.io/component: pxc
- app.kubernetes.io/instance: percona-xtradb
- podMetricsEndpoints:
- - port: metrics
- path: /metrics
- relabelings:
- - sourceLabels: ["__meta_kubernetes_pod_name"]
- targetLabel: "instance"
- - action: "labeldrop"
- regex: "^(container|endpoint|namespace|pod|service)$"
- - name: rabbitmq
- jobLabel: app.kubernetes.io/component
- namespaceSelector:
- matchNames:
- - openstack
- selector:
- matchLabels:
- app.kubernetes.io/component: rabbitmq
- podMetricsEndpoints:
- - port: prometheus
- path: /metrics
- relabelings:
- - sourceLabels: ["__meta_kubernetes_pod_name"]
- targetLabel: "instance"
- - action: "labeldrop"
- regex: "^(container|endpoint|namespace|pod|service)$"
- prometheusOperator:
- admissionWebhooks:
- patch:
- nodeSelector:
- openstack-control-plane: enabled
- serviceMonitor:
- relabelings:
- - sourceLabels: ["__meta_kubernetes_pod_name"]
- targetLabel: "instance"
- - action: "labeldrop"
- regex: "^(container|endpoint|namespace|pod|service)$"
- nodeSelector:
- openstack-control-plane: enabled
- prometheus-node-exporter:
- extraArgs:
- - --collector.diskstats.ignored-devices=^(ram|loop|nbd|fd|(h|s|v|xv)d[a-z]|nvme\\d+n\\d+p)\\d+$
- - --collector.filesystem.fs-types-exclude=^(autofs|binfmt_misc|bpf|cgroup2?|configfs|debugfs|devpts|devtmpfs|fusectl|fuse.squashfuse_ll|hugetlbfs|iso9660|mqueue|nsfs|overlay|proc|procfs|pstore|rpc_pipefs|securityfs|selinuxfs|squashfs|sysfs|tracefs)$
- - --collector.filesystem.mount-points-exclude=^/(dev|proc|run/credentials/.+|sys|var/lib/docker/.+|var/lib/kubelet/pods/.+|var/lib/kubelet/plugins/kubernetes.io/csi/.+|run/containerd/.+)($|/)
- - --collector.netclass.ignored-devices=^(lxc|cilium_|qbr|qvb|qvo|tap|ovs-system|br|tbr|gre_sys).*$
- - --collector.netdev.device-exclude=^(lxc|cilium_|qbr|qvb|qvo|tap|ovs-system|br|tbr|gre_sys).*$
- prometheus:
- monitor:
- relabelings:
- - sourceLabels: ["__meta_kubernetes_pod_node_name"]
- targetLabel: "instance"
- - action: "labeldrop"
- regex: "^(container|endpoint|namespace|pod|service)$"
- additionalPrometheusRulesMap:
- ceph: "{{ lookup('ansible.builtin.file', 'prometheus_alerts.yml') | from_yaml }}"
- coredns:
- groups:
- - name: coredns
- rules:
- - alert: CoreDNSDown
- expr: absent(up{job="coredns"} == 1)
- for: 15m
- labels:
- severity: critical
- - alert: CoreDNSLatencyHigh
- expr: histogram_quantile(0.99, sum(rate(coredns_dns_request_duration_seconds_bucket{job="coredns"}[5m])) by(server, zone, le)) > 4
- for: 10m
- labels:
- severity: critical
- - alert: CoreDNSErrorsHigh
- expr:
- sum(rate(coredns_dns_responses_total{job="coredns",rcode="SERVFAIL"}[5m]))
- /
- sum(rate(coredns_dns_responses_total{job="coredns"}[5m])) > 0.01
- for: 10m
- labels:
- severity: warning
- - alert: CoreDNSErrorsHigh
- expr:
- sum(rate(coredns_dns_responses_total{job="coredns",rcode="SERVFAIL"}[5m]))
- /
- sum(rate(coredns_dns_responses_total{job="coredns"}[5m])) > 0.03
- for: 10m
- labels:
- severity: critical
- - name: coredns_forward
- rules:
- - alert: CoreDNSForwardLatencyHigh
- expr: histogram_quantile(0.99, sum(rate(coredns_forward_request_duration_seconds_bucket{job="coredns"}[5m])) by(to, le)) > 4
- for: 10m
- labels:
- severity: critical
- - alert: CoreDNSForwardErrorsHigh
- expr:
- sum(rate(coredns_forward_responses_total{job="coredns",rcode="SERVFAIL"}[5m]))
- /
- sum(rate(coredns_forward_responses_total{job="coredns"}[5m])) > 0.01
- for: 10m
- labels:
- severity: warning
- - alert: CoreDNSForwardErrorsHigh
- expr:
- sum(rate(coredns_forward_responses_total{job="coredns",rcode="SERVFAIL"}[5m]))
- /
- sum(rate(coredns_forward_responses_total{job="coredns"}[5m])) > 0.03
- for: 10m
- labels:
- severity: critical
- - alert: CoreDNSForwardHealthcheckFailureCount
- expr: sum(rate(coredns_forward_healthcheck_failures_total{job="coredns"}[5m])) by (to) > 0
- for: 2m
- labels:
- severity: warning
- - alert: CoreDNSForwardHealthcheckBrokenCount
- expr: sum(rate(coredns_forward_healthcheck_broken_total{job="coredns"}[5m])) > 0
- for: 2m
- labels:
- severity: critical
- ethtool-exporter:
- groups:
- - name: rules
- rules:
- - alert: EthernetReceiveDiscards
- expr: rate(node_net_ethtool{type="rx_discards"}[1m]) > 0
- labels:
- severity: warning
- ipmi-exporter:
- groups:
- - name: rules
- rules:
- - alert: IpmiCollectorDown
- expr: ipmi_up == 0
- - name: collectors-state-warning
- rules:
- - alert: IpmiCurrent
- expr: ipmi_current_state == 1
- labels:
- severity: warning
- - alert: IpmiFanSpeed
- expr: ipmi_fan_speed_state == 1
- labels:
- severity: warning
- - alert: IpmiPower
- expr: ipmi_power_state == 1
- labels:
- severity: warning
- - alert: IpmiSensor
- expr: ipmi_sensor_state == 1
- labels:
- severity: warning
- - alert: IpmiTemperature
- expr: ipmi_temperature_state == 1
- labels:
- severity: warning
- - alert: IpmiVoltage
- expr: ipmi_voltage_state == 1
- labels:
- severity: warning
- - name: collectors-state-critical
- rules:
- - alert: IpmiCurrent
- expr: ipmi_current_state == 2
- labels:
- severity: critical
- - alert: IpmiFanSpeed
- expr: ipmi_fan_speed_state == 2
- labels:
- severity: critical
- - alert: IpmiPower
- expr: ipmi_power_state == 2
- labels:
- severity: critical
- - alert: IpmiSensor
- expr: ipmi_sensor_state == 2
- labels:
- severity: critical
- - alert: IpmiTemperature
- expr: ipmi_temperature_state == 2
- labels:
- severity: critical
- - alert: IpmiVoltage
- expr: ipmi_voltage_state == 2
- labels:
- severity: critical
- memcached:
- groups:
- - name: memcached
- rules:
- - alert: MemcachedDown
- expr: memcached_up == 0
- for: 5m
- labels:
- severity: critical
- - alert: MemcachedConnectionLimitApproaching
- expr: (memcached_current_connections / memcached_max_connections * 100) > 80
- for: 5m
- labels:
- severity: warning
- - alert: MemcachedConnectionLimitApproaching
- expr: (memcached_current_connections / memcached_max_connections * 100) > 95
- for: 5m
- labels:
- severity: critical
- node-exporter-local:
- groups:
- - name: node
- rules:
- - alert: NodeHighLoadAverage
- expr: node_load5 / count(node_cpu_seconds_total{mode="system"}) without (cpu, mode) > 1.5
- for: 30m
- labels:
- severity: warning
- - alert: NodeHighMemoryUsage
- expr: (node_memory_MemAvailable_bytes / node_memory_MemTotal_bytes) * 100 < 2.5
- for: 2m
- labels:
- severity: critical
- - alert: NodeHighCpuUsage
- expr: sum by(instance)(irate(node_cpu_seconds_total{mode='idle'}[5m])) < 1
- for: 2m
- labels:
- severity: warning
- - alert: NodeLowEntropy
- expr: node_entropy_available_bits < 1000
- for: 5m
- labels:
- severity: warning
- - name: softnet
- rules:
- - alert: NodeSoftNetTimesSqueezed
- expr: sum(rate(node_softnet_times_squeezed_total[1m])) by (instance) > 10
- for: 10m
- labels:
- severity: warning
- - alert: NodeSoftNetDrops
- expr: sum(rate(node_softnet_dropped_total[1m])) by (instance) != 0
- for: 1m
- labels:
- severity: critical
- openstack-exporter:
- groups:
- - name: cinder
- rules:
- - alert: CinderAgentDown
- annotations:
- description: |
- '{% raw %}The service {{ $labels.exported_service }} running
- on {{ $labels.hostname }} is being reported as down.{% endraw %}'
- summary: |
- '{% raw %}[{{ $labels.hostname }}] {{ $labels.exported_service }}
- down{% endraw %}'
- expr: |
- openstack_cinder_agent_state != 1
- labels:
- severity: warning
- - alert: CinderAgentDown
- annotations:
- description: |
- '{% raw %}The service {{ $labels.exported_service }} running on
- {{ $labels.hostname }} is being reported as down for 5 minutes.
- This can affect volume operations so it must be resolved as
- quickly as possible.{% endraw %}'
- summary: |
- '{% raw %}[{{ $labels.hostname }}] {{ $labels.exported_service }}
- down{% endraw %}'
- expr: |
- openstack_cinder_agent_state != 1
- for: 5m
- labels:
- severity: critical
- - alert: CinderAgentDisabled
- annotations:
- description: |
- '{% raw %}The service {{ $labels.exported_service }} running on {{ $labels.hostname }}
- has been disabled for 60 minutes. This can affect volume operations so it must be
- resolved as quickly as possible.{% endraw %}'
- summary: |
- '{% raw %}[{{ $labels.hostname }}] {{ $labels.exported_service }}
- disabled{% endraw %}'
- expr: |
- openstack_cinder_agent_state{adminState!="enabled"}
- for: 1h
- labels:
- severity: warning
- - alert: CinderVolumeInError
- annotations:
- description: |
- '{% raw %}The volume {{ $labels.id }} has been in ERROR state for over 24 hours.
- It must be cleaned up or removed in order to provide a consistent customer
- experience.{% endraw %}'
- summary: |
- '{% raw %}[{{ $labels.id }}] Volume in ERROR state{% endraw %}'
- expr: |
- openstack_cinder_volume_status{status=~"error.*"}
- for: 24h
- labels:
- severity: warning
- - name: neutron
- rules:
- - alert: NeutronAgentDown
- annotations:
- description: |
- '{% raw %}The service {{ $labels.exported_service }} running on {{ $labels.hostname }}
- is being reported as down.{% endraw %}'
- summary: |
- '{% raw %}[{{ $labels.hostname }}] {{ $labels.exported_service }}
- down{% endraw %}'
- expr: |
- openstack_neutron_agent_state != 1
- labels:
- severity: warning
- - alert: NeutronAgentDown
- annotations:
- description: |
- '{% raw %}The service {{ $labels.exported_service }} running on {{ $labels.hostname }}
- is being reported as down for 5 minutes. This can affect network operations so it
- must be resolved as quickly as possible.{% endraw %}'
- summary: |
- '{% raw %}[{{ $labels.hostname }}] {{ $labels.exported_service }}
- down{% endraw %}'
- expr: |
- openstack_neutron_agent_state != 1
- for: 5m
- labels:
- severity: critical
- - alert: NeutronAgentDisabled
- annotations:
- description: |
- '{% raw %}The service {{ $labels.exported_service }} running on {{ $labels.hostname }}
- has been disabled for 60 minutes. This can affect network operations so it must be
- resolved as quickly as possible.{% endraw %}'
- summary: |
- '{% raw %}[{{ $labels.hostname }}] {{ $labels.exported_service }}
- disabled{% endraw %}'
- expr: |
- openstack_neutron_agent_state{adminState!="up"}
- for: 1h
- labels:
- severity: warning
- - alert: NeutronBindingFailedPorts
- annotations:
- description: |
- '{% raw %}The NIC {{ $labels.mac_address }} of {{ $labels.device_owner }}
- has binding failed port now.{% endraw %}'
- summary: |
- '{% raw %}[{{ $labels.device_owner }}] {{ $labels.mac_address }}
- binding failed{% endraw %}'
- expr: |
- openstack_neutron_port{binding_vif_type="binding_failed"} != 0
- labels:
- severity: warning
- - alert: NeutronNetworkOutOfIPs
- annotations:
- description: |
- '{% raw %}The subnet {{ $labels.subnet_name }} within {{ $labels.network_name }}
- is currently at {{ $value }}% utilization. If the IP addresses run out, it will
- impact the provisioning of new ports.{% endraw %}'
- summary: |
- '{% raw %}[{{ $labels.network_name }}] {{ $labels.subnet_name }}
- running out of IPs{% endraw %}'
- expr: |
- sum by (network_id) (openstack_neutron_network_ip_availabilities_used{project_id!=""}) / sum by (network_id)
- (openstack_neutron_network_ip_availabilities_total{project_id!=""}) * 100 > 80
- labels:
- severity: warning
- - name: nova
- rules:
- - alert: NovaAgentDown
- annotations:
- description: |
- '{% raw %}The service {{ $labels.exported_service }} running on {{ $labels.hostname }}
- is being reported as down.{% endraw %}'
- summary: |
- '{% raw %}[{{ $labels.hostname }}] {{ $labels.exported_service }}
- down{% endraw %}'
- expr: |
- openstack_nova_agent_state != 1
- labels:
- severity: warning
- - alert: NovaAgentDown
- annotations:
- description: |
- '{% raw %}The service {{ $labels.exported_service }} running on {{ $labels.hostname }} is
- being reported as down. This can affect compute operations so it must be resolved as
- quickly as possible.{% endraw %}'
- summary: |
- '{% raw %}[{{ $labels.hostname }}] {{ $labels.exported_service }}
- down{% endraw %}'
- expr: |
- openstack_nova_agent_state != 1
- for: 5m
- labels:
- severity: critical
- - alert: NovaAgentDisabled
- annotations:
- description: |
- '{% raw %}The service {{ $labels.exported_service }} running on {{ $labels.hostname }} has been
- disabled for 60 minutes. This can affect compute operations so it must be resolved as quickly
- as possible.{% endraw %}'
- summary: |
- '{% raw %}[{{ $labels.hostname }}] {{ $labels.exported_service }}
- disabled{% endraw %}'
- expr: |
- openstack_nova_agent_state{adminState!="enabled"}
- for: 1h
- labels:
- severity: warning
- - alert: NovaInstanceInError
- annotations:
- description: |
- '{% raw %}The instance {{ $labels.id }} has been in ERROR state for over 24 hours.
- It must be cleaned up or removed in order to provide a consistent customer
- experience.{% endraw %}'
- summary: "{% raw %}[{{ $labels.id }}] Instance in ERROR state{% endraw %}"
- expr: |
- openstack_nova_server_status{status="ERROR"}
- for: 24h
- labels:
- severity: warning
- - alert: NovaFailureRisk
- annotations:
- description: |
- '{% raw %}The cloud capacity will be at {{ $value }} in the event of the failure of
- a single hypervisor which puts the cloud at risk of not being able to recover should
- any hypervisor failures occur. Please ensure that adequate amount of infrastructure
- is assigned to this deployment to prevent this.{% endraw %}'
- summary: "{% raw %}[nova] Failure risk{% endraw %}"
- expr: |
- (sum(openstack_nova_memory_available_bytes-openstack_nova_memory_used_bytes) - max(openstack_nova_memory_used_bytes))
- / sum(openstack_nova_memory_available_bytes-openstack_nova_memory_used_bytes) * 100 < 0.25
- for: 6h
- labels:
- severity: warning
- - alert: NovaCapacity
- annotations:
- description: |
- '{% raw %}The cloud capacity is currently at `{{ $value }}` which means there is a
- risk of running out of capacity due to the timeline required to add new nodes.
- Please ensure that adequate amount of infrastructure is assigned to this deployment
- to prevent this.{% endraw %}'
- summary: "{% raw %}[nova] Capacity risk{% endraw %}"
- expr: |
- sum (
- openstack_nova_memory_used_bytes
- + on(hostname) group_left(adminState)
- (0 * openstack_nova_agent_state{exported_service="nova-compute",adminState="enabled"})
- ) / sum (
- openstack_nova_memory_available_bytes
- + on(hostname) group_left(adminState)
- (0 * openstack_nova_agent_state{exported_service="nova-compute",adminState="enabled"})
- ) * 100 > 75
- for: 6h
- labels:
- severity: warning
- percona-xtradb-pxc:
- groups:
- # TODO: basic rules
- - name: general
- rules:
- - alert: MySQLDown
- expr: mysql_up != 1
- for: 5m
- labels:
- severity: critical
- - alert: MysqlTooManyConnections
- expr: max_over_time(mysql_global_status_threads_connected[1m]) / mysql_global_variables_max_connections * 100 > 80
- for: 2m
- labels:
- severity: warning
- - alert: MysqlHighThreadsRunning
- expr: max_over_time(mysql_global_status_threads_running[1m]) / mysql_global_variables_max_connections * 100 > 60
- for: 2m
- labels:
- severity: warning
- - alert: MysqlSlowQueries
- expr: increase(mysql_global_status_slow_queries[1m]) > 0
- for: 2m
- labels:
- severity: warning
- - name: galera
- rules:
- - alert: MySQLGaleraNotReady
- expr: mysql_global_status_wsrep_ready != 1
- for: 5m
- labels:
- severity: critical
- - alert: MySQLGaleraOutOfSync
- expr: mysql_global_status_wsrep_local_state != 4 and mysql_global_variables_wsrep_desync == 0
- for: 5m
- labels:
- severity: critical
- - alert: MySQLGaleraDonorFallingBehind
- expr: mysql_global_status_wsrep_local_state == 2 and mysql_global_status_wsrep_local_recv_queue > 100
- for: 5m
- labels:
- severity: warning
- - alert: MySQLReplicationNotRunning
- expr: mysql_slave_status_slave_io_running == 0 or mysql_slave_status_slave_sql_running == 0
- for: 2m
- labels:
- severity: critical
- - alert: MySQLReplicationLag
- expr: (instance:mysql_slave_lag_seconds > 30) and on(instance) (predict_linear(instance:mysql_slave_lag_seconds[5m], 60 * 2) > 0)
- for: 1m
- labels:
- severity: critical
- - alert: MySQLHeartbeatLag
- expr: (instance:mysql_heartbeat_lag_seconds > 30) and on(instance) (predict_linear(instance:mysql_heartbeat_lag_seconds[5m], 60 * 2) > 0)
- for: 1m
- labels:
- severity: critical
- - alert: MySQLInnoDBLogWaits
- expr: rate(mysql_global_status_innodb_log_waits[15m]) > 10
- labels:
- severity: warning
- rabbitmq:
- groups:
- - name: recording
- rules:
- - record: rabbitmq:usage:memory
- labels:
- job: rabbitmq
- expr: |
- sum without (job) (
- rabbitmq_process_resident_memory_bytes
- ) / sum without (
- container,
- pod,
- job,
- namespace,
- node,
- resource,
- uid,
- unit
- ) (
- label_replace(
- cluster:namespace:pod_memory:active:kube_pod_container_resource_limits,
- "instance",
- "$1",
- "pod",
- "(.*)"
- )
- )
- - name: alarms
- rules:
- - alert: RabbitmqAlarmFreeDiskSpace
- expr: rabbitmq_alarms_free_disk_space_watermark == 1
- labels:
- severity: critical
- - alert: RabbitmqAlarmMemoryUsedWatermark
- expr: rabbitmq_alarms_memory_used_watermark == 1
- labels:
- severity: critical
- - alert: RabbitmqAlarmFileDescriptorLimit
- expr: rabbitmq_alarms_file_descriptor_limit == 1
- labels:
- severity: critical
- - name: limits
- rules:
- - alert: RabbitmqMemoryHigh
- expr: rabbitmq:usage:memory > 0.80
- labels:
- severity: warning
- - alert: RabbitmqMemoryHigh
- expr: rabbitmq:usage:memory > 0.95
- labels:
- severity: critical
- - alert: RabbitmqFileDescriptorsUsage
- expr: rabbitmq_process_open_fds / rabbitmq_process_max_fds > 0.80
- labels:
- severity: warning
- - alert: RabbitmqFileDescriptorsUsage
- expr: rabbitmq_process_open_fds / rabbitmq_process_max_fds > 0.95
- labels:
- severity: critical
- - alert: RabbitmqTcpSocketsUsage
- expr: rabbitmq_process_open_tcp_sockets / rabbitmq_process_max_tcp_sockets > 0.80
- labels:
- severity: warning
- - alert: RabbitmqTcpSocketsUsage
- expr: rabbitmq_process_open_tcp_sockets / rabbitmq_process_max_tcp_sockets > 0.95
- labels:
- severity: critical
- - name: msgs
- rules:
- - alert: RabbitmqUnackedMessages
- expr: sum(rabbitmq_queue_messages_unacked) BY (queue) > 1000
- for: 5m
- labels:
- severity: warning
- - alert: RabbitmqUnackedMessages
- expr: sum(rabbitmq_queue_messages_unacked) BY (queue) > 1000
- for: 1h
- labels:
- severity: critical