blob: d87a12626c5f62b3353ca27da1730ed3aa278cb1 [file] [log] [blame]
Mohammed Naserf3f59a72023-01-15 21:02:04 -05001#!/bin/bash
2
3{{/*
4Licensed under the Apache License, Version 2.0 (the "License");
5you may not use this file except in compliance with the License.
6You may obtain a copy of the License at
7
8 http://www.apache.org/licenses/LICENSE-2.0
9
10Unless required by applicable law or agreed to in writing, software
11distributed under the License is distributed on an "AS IS" BASIS,
12WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13See the License for the specific language governing permissions and
14limitations under the License.
15*/}}
16
17set -ex
18
Mohammed Naser168acc32024-01-09 17:15:26 -050019wait_for_file() {
20 local file=$1
okozachenko120314e38012023-09-01 01:40:19 +100021
Mohammed Naser168acc32024-01-09 17:15:26 -050022 while [ ! -f $file ]; do
23 sleep 1
24 done
25}
okozachenko120314e38012023-09-01 01:40:19 +100026
Mohammed Naser168acc32024-01-09 17:15:26 -050027wait_for_file {{ .Values.conf.libvirt.ca_file }}
28wait_for_file /etc/pki/qemu/ca-cert.pem
okozachenko120314e38012023-09-01 01:40:19 +100029
Mohammed Naser168acc32024-01-09 17:15:26 -050030wait_for_file {{ .Values.conf.libvirt.cert_file }}
31wait_for_file /etc/pki/libvirt/clientcert.pem
32wait_for_file /etc/pki/qemu/server-cert.pem
33wait_for_file /etc/pki/qemu/client-cert.pem
okozachenko120314e38012023-09-01 01:40:19 +100034
Mohammed Naser168acc32024-01-09 17:15:26 -050035wait_for_file {{ .Values.conf.libvirt.key_file }}
36wait_for_file /etc/pki/libvirt/private/clientkey.pem
37wait_for_file /etc/pki/qemu/server-key.pem
38wait_for_file /etc/pki/qemu/client-key.pem
39
40wait_for_file /etc/pki/libvirt-vnc/ca-cert.pem
41wait_for_file /etc/pki/libvirt-vnc/server-cert.pem
42wait_for_file /etc/pki/libvirt-vnc/server-key.pem
Mohammed Naserf3bd28e2023-07-10 16:14:28 -040043
okozachenko1203d2cd6872023-08-31 19:53:51 +100044# TODO: We disable cgroup functionality for cgroup v2, we should fix this in the future
45if $(stat -fc %T /sys/fs/cgroup/ | grep -q cgroup2fs); then
46 CGROUP_VERSION=v2
47else
48 CGROUP_VERSION=v1
49fi
50
Mohammed Naserf3f59a72023-01-15 21:02:04 -050051if [ -n "$(cat /proc/*/comm 2>/dev/null | grep -w libvirtd)" ]; then
52 set +x
53 for proc in $(ls /proc/*/comm 2>/dev/null); do
54 if [ "x$(cat $proc 2>/dev/null | grep -w libvirtd)" == "xlibvirtd" ]; then
55 set -x
56 libvirtpid=$(echo $proc | cut -f 3 -d '/')
57 echo "WARNING: libvirtd daemon already running on host" 1>&2
58 echo "$(cat "/proc/${libvirtpid}/status" 2>/dev/null | grep State)" 1>&2
59 kill -9 "$libvirtpid" || true
60 set +x
61 fi
62 done
63 set -x
64fi
65
66rm -f /var/run/libvirtd.pid
67
68if [[ -c /dev/kvm ]]; then
69 chmod 660 /dev/kvm
70 chown root:kvm /dev/kvm
71fi
72
okozachenko1203d2cd6872023-08-31 19:53:51 +100073if [ $CGROUP_VERSION != "v2" ]; then
74 #Setup Cgroups to use when breaking out of Kubernetes defined groups
75 CGROUPS=""
76 for CGROUP in cpu rdma hugetlb; do
77 if [ -d /sys/fs/cgroup/${CGROUP} ]; then
78 CGROUPS+="${CGROUP},"
79 fi
80 done
81 cgcreate -g ${CGROUPS%,}:/osh-libvirt
82fi
Mohammed Naserf3f59a72023-01-15 21:02:04 -050083
84# We assume that if hugepage count > 0, then hugepages should be exposed to libvirt/qemu
85hp_count="$(cat /proc/meminfo | grep HugePages_Total | tr -cd '[:digit:]')"
86if [ 0"$hp_count" -gt 0 ]; then
87
88 echo "INFO: Detected hugepage count of '$hp_count'. Enabling hugepage settings for libvirt/qemu."
89
90 # Enable KVM hugepages for QEMU
91 if [ -n "$(grep KVM_HUGEPAGES=0 /etc/default/qemu-kvm)" ]; then
92 sed -i 's/.*KVM_HUGEPAGES=0.*/KVM_HUGEPAGES=1/g' /etc/default/qemu-kvm
93 else
94 echo KVM_HUGEPAGES=1 >> /etc/default/qemu-kvm
95 fi
96
97 # Ensure that the hugepage mount location is available/mapped inside the
98 # container. This assumes use of the default ubuntu dev-hugepages.mount
99 # systemd unit which mounts hugepages at this location.
100 if [ ! -d /dev/hugepages ]; then
101 echo "ERROR: Hugepages configured in kernel, but libvirtd container cannot access /dev/hugepages"
102 exit 1
103 fi
104
okozachenko1203d2cd6872023-08-31 19:53:51 +1000105 if [ $CGROUP_VERSION != "v2" ]; then
106 # Kubernetes 1.10.x introduced cgroup changes that caused the container's
107 # hugepage byte limit quota to zero out. This workaround sets that pod limit
108 # back to the total number of hugepage bytes available to the baremetal host.
109 if [ -d /sys/fs/cgroup/hugetlb ]; then
110 limits="$(ls /sys/fs/cgroup/hugetlb/{{ .Values.conf.kubernetes.cgroup }}/hugetlb.*.limit_in_bytes)" || \
111 (echo "ERROR: Failed to locate any hugetable limits. Did you set the correct cgroup in your values used for this chart?"
112 exit 1)
113 for limit in $limits; do
114 target="/sys/fs/cgroup/hugetlb/$(dirname $(awk -F: '($2~/hugetlb/){print $3}' /proc/self/cgroup))/$(basename $limit)"
115 # Ensure the write target for the hugepage limit for the pod exists
116 if [ ! -f "$target" ]; then
117 echo "ERROR: Could not find write target for hugepage limit: $target"
118 fi
Mohammed Naserf3f59a72023-01-15 21:02:04 -0500119
okozachenko1203d2cd6872023-08-31 19:53:51 +1000120 # Write hugetable limit for pod
121 echo "$(cat $limit)" > "$target"
122 done
123 fi
Mohammed Naserf3f59a72023-01-15 21:02:04 -0500124
okozachenko1203d2cd6872023-08-31 19:53:51 +1000125 # Determine OS default hugepage size to use for the hugepage write test
126 default_hp_kb="$(cat /proc/meminfo | grep Hugepagesize | tr -cd '[:digit:]')"
Mohammed Naserf3f59a72023-01-15 21:02:04 -0500127
okozachenko1203d2cd6872023-08-31 19:53:51 +1000128 # Attempt to write to the hugepage mount to ensure it is operational, but only
129 # if we have at least 1 free page.
130 num_free_pages="$(cat /sys/kernel/mm/hugepages/hugepages-${default_hp_kb}kB/free_hugepages | tr -cd '[:digit:]')"
131 echo "INFO: '$num_free_pages' free hugepages of size ${default_hp_kb}kB"
132 if [ 0"$num_free_pages" -gt 0 ]; then
133 (fallocate -o0 -l "$default_hp_kb" /dev/hugepages/foo && rm /dev/hugepages/foo) || \
134 (echo "ERROR: fallocate failed test at /dev/hugepages with size ${default_hp_kb}kB"
135 rm /dev/hugepages/foo
136 exit 1)
137 fi
Mohammed Naserf3f59a72023-01-15 21:02:04 -0500138 fi
139fi
140
okozachenko1203d2cd6872023-08-31 19:53:51 +1000141if [ -n "${LIBVIRT_CEPH_CINDER_SECRET_UUID}" ] || [ -n "${LIBVIRT_EXTERNAL_CEPH_CINDER_SECRET_UUID}" ] ; then
142 if [ $CGROUP_VERSION != "v2" ]; then
143 #NOTE(portdirect): run libvirtd as a transient unit on the host with the osh-libvirt cgroups applied.
144 cgexec -g ${CGROUPS%,}:/osh-libvirt systemd-run --scope --slice=system libvirtd --listen &
145 else
146 systemd-run --scope --slice=system libvirtd --listen &
147 fi
Mohammed Naserf3f59a72023-01-15 21:02:04 -0500148
149 tmpsecret=$(mktemp --suffix .xml)
150 if [ -n "${LIBVIRT_EXTERNAL_CEPH_CINDER_SECRET_UUID}" ] ; then
151 tmpsecret2=$(mktemp --suffix .xml)
152 fi
153 function cleanup {
154 rm -f "${tmpsecret}"
155 if [ -n "${LIBVIRT_EXTERNAL_CEPH_CINDER_SECRET_UUID}" ] ; then
156 rm -f "${tmpsecret2}"
157 fi
158 }
159 trap cleanup EXIT
160
161 # Wait for the libvirtd is up
162 TIMEOUT=60
163 while [[ ! -f /var/run/libvirtd.pid ]]; do
164 if [[ ${TIMEOUT} -gt 0 ]]; then
165 let TIMEOUT-=1
166 sleep 1
167 else
168 echo "ERROR: libvirt did not start in time (pid file missing)"
169 exit 1
170 fi
171 done
172
173 # Even though we see the pid file the socket immediately (this is
174 # needed for virsh)
175 TIMEOUT=10
176 while [[ ! -e /var/run/libvirt/libvirt-sock ]]; do
177 if [[ ${TIMEOUT} -gt 0 ]]; then
178 let TIMEOUT-=1
179 sleep 1
180 else
181 echo "ERROR: libvirt did not start in time (socket missing)"
182 exit 1
183 fi
184 done
185
186 function create_virsh_libvirt_secret {
187 sec_user=$1
188 sec_uuid=$2
189 sec_ceph_keyring=$3
190 cat > ${tmpsecret} <<EOF
191<secret ephemeral='no' private='no'>
192 <uuid>${sec_uuid}</uuid>
193 <usage type='ceph'>
194 <name>client.${sec_user}. secret</name>
195 </usage>
196</secret>
197EOF
198 virsh secret-define --file ${tmpsecret}
199 virsh secret-set-value --secret "${sec_uuid}" --base64 "${sec_ceph_keyring}"
200 }
201
okozachenko1203d2cd6872023-08-31 19:53:51 +1000202 if [ -z "${CEPH_CINDER_KEYRING}" ] && [ -n "${CEPH_CINDER_USER}" ] ; then
Mohammed Naserf3f59a72023-01-15 21:02:04 -0500203 CEPH_CINDER_KEYRING=$(awk '/key/{print $3}' /etc/ceph/ceph.client.${CEPH_CINDER_USER}.keyring)
204 fi
okozachenko1203d2cd6872023-08-31 19:53:51 +1000205 if [ -n "${CEPH_CINDER_USER}" ] ; then
206 create_virsh_libvirt_secret ${CEPH_CINDER_USER} ${LIBVIRT_CEPH_CINDER_SECRET_UUID} ${CEPH_CINDER_KEYRING}
207 fi
Mohammed Naserf3f59a72023-01-15 21:02:04 -0500208
209 if [ -n "${LIBVIRT_EXTERNAL_CEPH_CINDER_SECRET_UUID}" ] ; then
210 EXTERNAL_CEPH_CINDER_KEYRING=$(cat /tmp/external-ceph-client-keyring)
211 create_virsh_libvirt_secret ${EXTERNAL_CEPH_CINDER_USER} ${LIBVIRT_EXTERNAL_CEPH_CINDER_SECRET_UUID} ${EXTERNAL_CEPH_CINDER_KEYRING}
212 fi
213
okozachenko1203d2cd6872023-08-31 19:53:51 +1000214 cleanup
215
216 # stop libvirtd; we needed it up to create secrets
217 LIBVIRTD_PID=$(cat /var/run/libvirtd.pid)
218 kill $LIBVIRTD_PID
219 tail --pid=$LIBVIRTD_PID -f /dev/null
220
221fi
222
223if [ $CGROUP_VERSION != "v2" ]; then
Mohammed Naserf3f59a72023-01-15 21:02:04 -0500224 #NOTE(portdirect): run libvirtd as a transient unit on the host with the osh-libvirt cgroups applied.
okozachenko1203d2cd6872023-08-31 19:53:51 +1000225 cgexec -g ${CGROUPS%,}:/osh-libvirt systemd-run --scope --slice=system libvirtd --listen
226else
227 systemd-run --scope --slice=system libvirtd --listen
Mohammed Naserf3f59a72023-01-15 21:02:04 -0500228fi