fix: add lpfc role (#381)

* fix: enable volume_use_multipath by default

* fix: add lpfc tuning role
diff --git a/playbooks/openstack.yml b/playbooks/openstack.yml
index fea1ad8..9dfbf5d 100644
--- a/playbooks/openstack.yml
+++ b/playbooks/openstack.yml
@@ -110,6 +110,15 @@
       tags:
         - placement
 
+- name: Configure operating system
+  hosts: controllers:computes
+  become: true
+  gather_facts: false
+  roles:
+    - role: lpfc
+      tags:
+        - lpfc
+
 - name: Deploy Open vSwitch
   hosts: controllers:computes
   become: true
diff --git a/roles/lpfc/README.md b/roles/lpfc/README.md
new file mode 100644
index 0000000..3b2523b
--- /dev/null
+++ b/roles/lpfc/README.md
@@ -0,0 +1,4 @@
+# Emulex LightPulse Fibre Channel (LPFC) driver
+
+This role tunes the Emulex LightPulse Fibre Channel (LPFC) driver for usage
+with OpenStack.
diff --git a/roles/lpfc/defaults/main.yml b/roles/lpfc/defaults/main.yml
new file mode 100644
index 0000000..04a17ef
--- /dev/null
+++ b/roles/lpfc/defaults/main.yml
@@ -0,0 +1,25 @@
+# Copyright (c) 2023 VEXXHOST, Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+# License for the specific language governing permissions and limitations
+# under the License.
+
+# Max number of FCP commands we can queue to a specific LUN
+lpfc_lun_queue_depth: 128
+
+# Max Scatter Gather Segment Count
+lpfc_sg_seg_cnt: 256
+
+# Maximum allowed LUN ID
+lpfc_max_luns: 65535
+
+# Enable FC4 Protocol support - FCP / NVME (FCP=1, both=3)
+lpfc_enable_fc4_type: 3
diff --git a/roles/lpfc/tasks/main.yml b/roles/lpfc/tasks/main.yml
new file mode 100644
index 0000000..91a0131
--- /dev/null
+++ b/roles/lpfc/tasks/main.yml
@@ -0,0 +1,60 @@
+# Copyright (c) 2023 VEXXHOST, Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+# License for the specific language governing permissions and limitations
+# under the License.
+
+- name: Detect if the "lpfc" module is loaded
+  ansible.builtin.stat:
+    path: /sys/module/lpfc
+  register: _lpfc_module
+
+- name: Install the configuration file and reboot if necessary
+  when: _lpfc_module.stat.exists
+  block:
+    - name: Install the configuration file
+      ansible.builtin.template:
+        src: lpfc.conf.j2
+        dest: /etc/modprobe.d/lpfc.conf
+        owner: root
+        group: root
+        mode: "0644"
+      register: _lpfc_module_conf
+
+    - name: Get the values for the module parameters
+      ansible.builtin.slurp:
+        src: /sys/module/lpfc/parameters/{{ item }}
+      register: _lpfc_module_parameters
+      loop:
+        - lpfc_lun_queue_depth
+        - lpfc_sg_seg_cnt
+        - lpfc_max_luns
+        - lpfc_enable_fc4_type
+
+    - name: Reboot the system if the module parameters are not set correctly
+      ansible.builtin.assert:
+        quiet: true
+        that:
+          - item.content | b64decode | trim | int == lookup('vars', item.item)
+        fail_msg: >-
+          The module parameter {{ item.item }} is not set correctly, expected:
+          {{ lookup('vars', item.item) }}, got: {{ item.content | b64decode | trim }}
+      loop: "{{ _lpfc_module_parameters.results }}"
+      loop_control:
+        label: "{{ item.item }}"
+      register: _lpfc_module_parameters_assert
+      failed_when: false
+      changed_when: _lpfc_module_parameters_assert.failed
+
+    - name: Reboot the system if the configuration file has changed
+      ansible.builtin.include_role:
+        name: reboot
+      when: _lpfc_module_conf.changed or _lpfc_module_parameters_assert.changed
diff --git a/roles/lpfc/templates/lpfc.conf.j2 b/roles/lpfc/templates/lpfc.conf.j2
new file mode 100644
index 0000000..2b795e0
--- /dev/null
+++ b/roles/lpfc/templates/lpfc.conf.j2
@@ -0,0 +1,3 @@
+# {{ ansible_managed }}
+
+options lpfc lpfc_lun_queue_depth={{ lpfc_lun_queue_depth }} lpfc_sg_seg_cnt={{ lpfc_sg_seg_cnt }} lpfc_max_luns={{ lpfc_max_luns }} lpfc_enable_fc4_type={{ lpfc_enable_fc4_type }}
diff --git a/roles/nova/vars/main.yml b/roles/nova/vars/main.yml
index 820b12c..712242a 100644
--- a/roles/nova/vars/main.yml
+++ b/roles/nova/vars/main.yml
@@ -80,6 +80,8 @@
         max_instances_per_host: 200
       glance:
         enable_rbd_download: true
+      libvirt:
+        volume_use_multipath: true
       neutron:
         metadata_proxy_shared_secret: "{{ openstack_helm_endpoints['compute_metadata']['secret'] }}"
       oslo_messaging_notifications:
diff --git a/roles/reboot/README.md b/roles/reboot/README.md
new file mode 100644
index 0000000..735fd9e
--- /dev/null
+++ b/roles/reboot/README.md
@@ -0,0 +1,7 @@
+# `reboot`
+
+This is an operational role that will run a set of checks to verify that the
+server is safe to reboot.  If the checks pass, a silence will be created to
+prevent alarms from firing during the reboot.   Once that's done, the reboot
+will start and the silence will be removed once the server is back online.
+
diff --git a/roles/reboot/meta/main.yml b/roles/reboot/meta/main.yml
new file mode 100644
index 0000000..322fe27
--- /dev/null
+++ b/roles/reboot/meta/main.yml
@@ -0,0 +1,29 @@
+# Copyright (c) 2022 VEXXHOST, Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+# License for the specific language governing permissions and limitations
+# under the License.
+
+galaxy_info:
+  author: VEXXHOST, Inc.
+  description: Install QEMU utilities
+  license: Apache-2.0
+  min_ansible_version: 5.5.0
+  standalone: false
+  platforms:
+    - name: EL
+      versions:
+        - "8"
+        - "9"
+    - name: Ubuntu
+      versions:
+        - focal
+        - jammy
diff --git a/roles/reboot/tasks/main.yml b/roles/reboot/tasks/main.yml
new file mode 100644
index 0000000..efcd969
--- /dev/null
+++ b/roles/reboot/tasks/main.yml
@@ -0,0 +1,53 @@
+# Copyright (c) 2023 VEXXHOST, Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+# License for the specific language governing permissions and limitations
+# under the License.
+
+- name: Install package for "community.general.pids"
+  ansible.builtin.package:
+    name: python3-psutil
+
+# NOTE(mnaser): These are all processes that we do not currently have a way to
+#               safely evacuate *yet*.  We should remove these as we add ways
+#               to evacuate them.
+- name: Get process IDs for critical services
+  community.general.pids:
+    pattern: "{{ item }}"
+  loop:
+    - ceph-mgr
+    - ceph-mon
+    - ceph-osd
+    - mysqld
+    - neutron-l3-agent
+    - qemu
+    - rabbitmq-server
+    - radosgw
+  register: _reboots_pids
+
+- name: Assert that none of these processes are running
+  ansible.builtin.assert:
+    quiet: true
+    that:
+      - item.pids | length == 0
+    fail_msg: >-
+      The following processes are running: {{ item.item }} ({{ item.pids | join(', ') }})
+  loop: "{{ _reboots_pids.results }}"
+  loop_control:
+    label: "{{ item.item }}"
+
+- name: Create a silence and reboot the host
+  block:
+    # TODO(mnaser): Create a silence inside AlertManager
+    - name: Reboot the host
+      ansible.builtin.reboot:
+  always: []
+    # TODO(mnaser): Remove the silence from AlertManager