fix: add lpfc role (#381)
* fix: enable volume_use_multipath by default
* fix: add lpfc tuning role
diff --git a/playbooks/openstack.yml b/playbooks/openstack.yml
index fea1ad8..9dfbf5d 100644
--- a/playbooks/openstack.yml
+++ b/playbooks/openstack.yml
@@ -110,6 +110,15 @@
tags:
- placement
+- name: Configure operating system
+ hosts: controllers:computes
+ become: true
+ gather_facts: false
+ roles:
+ - role: lpfc
+ tags:
+ - lpfc
+
- name: Deploy Open vSwitch
hosts: controllers:computes
become: true
diff --git a/roles/lpfc/README.md b/roles/lpfc/README.md
new file mode 100644
index 0000000..3b2523b
--- /dev/null
+++ b/roles/lpfc/README.md
@@ -0,0 +1,4 @@
+# Emulex LightPulse Fibre Channel (LPFC) driver
+
+This role tunes the Emulex LightPulse Fibre Channel (LPFC) driver for usage
+with OpenStack.
diff --git a/roles/lpfc/defaults/main.yml b/roles/lpfc/defaults/main.yml
new file mode 100644
index 0000000..04a17ef
--- /dev/null
+++ b/roles/lpfc/defaults/main.yml
@@ -0,0 +1,25 @@
+# Copyright (c) 2023 VEXXHOST, Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+# License for the specific language governing permissions and limitations
+# under the License.
+
+# Max number of FCP commands we can queue to a specific LUN
+lpfc_lun_queue_depth: 128
+
+# Max Scatter Gather Segment Count
+lpfc_sg_seg_cnt: 256
+
+# Maximum allowed LUN ID
+lpfc_max_luns: 65535
+
+# Enable FC4 Protocol support - FCP / NVME (FCP=1, both=3)
+lpfc_enable_fc4_type: 3
diff --git a/roles/lpfc/tasks/main.yml b/roles/lpfc/tasks/main.yml
new file mode 100644
index 0000000..91a0131
--- /dev/null
+++ b/roles/lpfc/tasks/main.yml
@@ -0,0 +1,60 @@
+# Copyright (c) 2023 VEXXHOST, Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+# License for the specific language governing permissions and limitations
+# under the License.
+
+- name: Detect if the "lpfc" module is loaded
+ ansible.builtin.stat:
+ path: /sys/module/lpfc
+ register: _lpfc_module
+
+- name: Install the configuration file and reboot if necessary
+ when: _lpfc_module.stat.exists
+ block:
+ - name: Install the configuration file
+ ansible.builtin.template:
+ src: lpfc.conf.j2
+ dest: /etc/modprobe.d/lpfc.conf
+ owner: root
+ group: root
+ mode: "0644"
+ register: _lpfc_module_conf
+
+ - name: Get the values for the module parameters
+ ansible.builtin.slurp:
+ src: /sys/module/lpfc/parameters/{{ item }}
+ register: _lpfc_module_parameters
+ loop:
+ - lpfc_lun_queue_depth
+ - lpfc_sg_seg_cnt
+ - lpfc_max_luns
+ - lpfc_enable_fc4_type
+
+ - name: Reboot the system if the module parameters are not set correctly
+ ansible.builtin.assert:
+ quiet: true
+ that:
+ - item.content | b64decode | trim | int == lookup('vars', item.item)
+ fail_msg: >-
+ The module parameter {{ item.item }} is not set correctly, expected:
+ {{ lookup('vars', item.item) }}, got: {{ item.content | b64decode | trim }}
+ loop: "{{ _lpfc_module_parameters.results }}"
+ loop_control:
+ label: "{{ item.item }}"
+ register: _lpfc_module_parameters_assert
+ failed_when: false
+ changed_when: _lpfc_module_parameters_assert.failed
+
+ - name: Reboot the system if the configuration file has changed
+ ansible.builtin.include_role:
+ name: reboot
+ when: _lpfc_module_conf.changed or _lpfc_module_parameters_assert.changed
diff --git a/roles/lpfc/templates/lpfc.conf.j2 b/roles/lpfc/templates/lpfc.conf.j2
new file mode 100644
index 0000000..2b795e0
--- /dev/null
+++ b/roles/lpfc/templates/lpfc.conf.j2
@@ -0,0 +1,3 @@
+# {{ ansible_managed }}
+
+options lpfc lpfc_lun_queue_depth={{ lpfc_lun_queue_depth }} lpfc_sg_seg_cnt={{ lpfc_sg_seg_cnt }} lpfc_max_luns={{ lpfc_max_luns }} lpfc_enable_fc4_type={{ lpfc_enable_fc4_type }}
diff --git a/roles/nova/vars/main.yml b/roles/nova/vars/main.yml
index 820b12c..712242a 100644
--- a/roles/nova/vars/main.yml
+++ b/roles/nova/vars/main.yml
@@ -80,6 +80,8 @@
max_instances_per_host: 200
glance:
enable_rbd_download: true
+ libvirt:
+ volume_use_multipath: true
neutron:
metadata_proxy_shared_secret: "{{ openstack_helm_endpoints['compute_metadata']['secret'] }}"
oslo_messaging_notifications:
diff --git a/roles/reboot/README.md b/roles/reboot/README.md
new file mode 100644
index 0000000..735fd9e
--- /dev/null
+++ b/roles/reboot/README.md
@@ -0,0 +1,7 @@
+# `reboot`
+
+This is an operational role that will run a set of checks to verify that the
+server is safe to reboot. If the checks pass, a silence will be created to
+prevent alarms from firing during the reboot. Once that's done, the reboot
+will start and the silence will be removed once the server is back online.
+
diff --git a/roles/reboot/meta/main.yml b/roles/reboot/meta/main.yml
new file mode 100644
index 0000000..322fe27
--- /dev/null
+++ b/roles/reboot/meta/main.yml
@@ -0,0 +1,29 @@
+# Copyright (c) 2022 VEXXHOST, Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+# License for the specific language governing permissions and limitations
+# under the License.
+
+galaxy_info:
+ author: VEXXHOST, Inc.
+ description: Install QEMU utilities
+ license: Apache-2.0
+ min_ansible_version: 5.5.0
+ standalone: false
+ platforms:
+ - name: EL
+ versions:
+ - "8"
+ - "9"
+ - name: Ubuntu
+ versions:
+ - focal
+ - jammy
diff --git a/roles/reboot/tasks/main.yml b/roles/reboot/tasks/main.yml
new file mode 100644
index 0000000..efcd969
--- /dev/null
+++ b/roles/reboot/tasks/main.yml
@@ -0,0 +1,53 @@
+# Copyright (c) 2023 VEXXHOST, Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+# License for the specific language governing permissions and limitations
+# under the License.
+
+- name: Install package for "community.general.pids"
+ ansible.builtin.package:
+ name: python3-psutil
+
+# NOTE(mnaser): These are all processes that we do not currently have a way to
+# safely evacuate *yet*. We should remove these as we add ways
+# to evacuate them.
+- name: Get process IDs for critical services
+ community.general.pids:
+ pattern: "{{ item }}"
+ loop:
+ - ceph-mgr
+ - ceph-mon
+ - ceph-osd
+ - mysqld
+ - neutron-l3-agent
+ - qemu
+ - rabbitmq-server
+ - radosgw
+ register: _reboots_pids
+
+- name: Assert that none of these processes are running
+ ansible.builtin.assert:
+ quiet: true
+ that:
+ - item.pids | length == 0
+ fail_msg: >-
+ The following processes are running: {{ item.item }} ({{ item.pids | join(', ') }})
+ loop: "{{ _reboots_pids.results }}"
+ loop_control:
+ label: "{{ item.item }}"
+
+- name: Create a silence and reboot the host
+ block:
+ # TODO(mnaser): Create a silence inside AlertManager
+ - name: Reboot the host
+ ansible.builtin.reboot:
+ always: []
+ # TODO(mnaser): Remove the silence from AlertManager