gpu_setup.sh 8.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337
  1. #!/bin/bash
  2. set -o errexit
  3. pushd $(dirname $(readlink -f "$BASH_SOURCE")) > /dev/null
  4. CUR_DIR=$(pwd)
  5. ROOT_DIR=$(cd .. && pwd)
  6. popd > /dev/null
  7. export LC_CTYPE="en_US.UTF-8"
  8. ################################################
  9. . $CUR_DIR/functions
  10. is_intel_cpu() {
  11. grep -m 1 'model name' /proc/cpuinfo | grep -qi intel
  12. return $?
  13. }
  14. PCIIDS_FILE=${PCIIDS_FILE:-$CUR_DIR/pci.ids}
  15. VFIO_PCI_OVERRIDE_TOOL=/usr/bin/vfio-pci-override.sh
  16. declare -A NEW_KERNEL_PARAMS=(
  17. [crashkernel]=auto
  18. [iommu]=pt
  19. [vfio_iommu_type1.allow_unsafe_interrupts]=1
  20. [rdblacklist]=nouveau
  21. [nouveau.modeset]=0
  22. [mgag200.modeset]=0
  23. )
  24. if is_intel_cpu; then
  25. info "enable intel_iommu=on"
  26. NEW_KERNEL_PARAMS[intel_iommu]=on
  27. fi
  28. OLD_KERNEL_PARAMS_FILE="/tmp/ocboot_gpusetup_old_kernel_params_file.txt"
  29. export LC_CTYPE="en_US.UTF-8"
  30. ################################################
  31. ensure_file_exist() {
  32. local file="$1"
  33. if [ ! -e "$file" ]; then
  34. error_exit "$file not exists"
  35. fi
  36. }
  37. ensure_file_writable() {
  38. local file="$1"
  39. ensure_file_exist "$file"
  40. if [ ! -w "$file" ]; then
  41. error_exit "$file not writable"
  42. fi
  43. }
  44. ensure_file_readable() {
  45. local file="$1"
  46. ensure_file_exist "$file"
  47. if [ ! -r "$file" ]; then
  48. error_exit "$file not readable"
  49. fi
  50. }
  51. # refresh_pciids() {
  52. # info "Refresh PCI ids..."
  53. # local args=""
  54. # if [ -z "$PCIIDS_FILE" ]; then
  55. # info "pci.ids file not provided, fetch from upstream..."
  56. # elif [ ! -e "$PCIIDS_FILE" ]; then
  57. # error "pciids file $PCIIDS_FILE not exists, fetch from upstream"
  58. # else
  59. # args="-s $PCIIDS_FILE"
  60. # fi
  61. # $CUR_DIR/update_pciids.sh $args
  62. # }
  63. _fill_old_kernel_params() {
  64. rm -rf $OLD_KERNEL_PARAMS_FILE && touch $OLD_KERNEL_PARAMS_FILE
  65. local cmdline_param=$*
  66. for param in $cmdline_param; do
  67. local key
  68. local val
  69. key=${param%=*}
  70. val=${param#*=}
  71. if [[ "$key" == "$val" ]]; then
  72. echo "$key" >> $OLD_KERNEL_PARAMS_FILE
  73. else
  74. echo "$key=$val" >> $OLD_KERNEL_PARAMS_FILE
  75. fi
  76. done
  77. }
  78. _merge_new_kernel_params() {
  79. local new_tmp_val
  80. for key in "${!NEW_KERNEL_PARAMS[@]}"; do
  81. new_tmp_val="${NEW_KERNEL_PARAMS[$key]}"
  82. if grep -q "^$key=" $OLD_KERNEL_PARAMS_FILE; then
  83. sed -i "s|^$key=.*|$key=$new_tmp_val|g" $OLD_KERNEL_PARAMS_FILE
  84. else
  85. echo "$key=$new_tmp_val" >> $OLD_KERNEL_PARAMS_FILE
  86. fi
  87. done
  88. }
  89. _generate_kernel_cmdline() {
  90. cat $OLD_KERNEL_PARAMS_FILE | tr '\n' ' '
  91. }
  92. mk_grub2(){
  93. if [ -d /sys/firmware/efi ]; then
  94. mkdir -p /boot/efi/EFI/centos
  95. grub2-mkconfig -o /boot/efi/EFI/centos/grub.cfg
  96. else
  97. grub2-mkconfig -o /boot/grub2/grub.cfg
  98. fi
  99. }
  100. mk_grub2_openeuler(){
  101. if [ -d /sys/firmware/efi ]; then
  102. mkdir -p /boot/efi/EFI/openEuler
  103. grub2-mkconfig -o /boot/efi/EFI/openEuler/grub.cfg
  104. else
  105. grub2-mkconfig -o /boot/grub2/grub.cfg
  106. fi
  107. }
  108. mk_grub_legacy(){
  109. update-grub
  110. }
  111. mk_grub(){
  112. local distro=${1}
  113. if [[ "${distro}" == "centos" ]]; then
  114. mk_grub2
  115. elif [[ "${distro}" == "debian" ]] || [[ "${distro}" == "ubuntu" ]]; then
  116. mk_grub_legacy
  117. elif [[ "${distro}" == "openeuler" ]]; then
  118. mk_grub2_openeuler
  119. else
  120. error_exit "unsupport distro ${distro}!"
  121. fi
  122. }
  123. grub_setup() {
  124. info "Configure grub option..."
  125. local grub_cfg="/etc/default/grub"
  126. local cmdline_param
  127. local idx
  128. local distro=${1}
  129. ensure_file_writable "$grub_cfg"
  130. cmdline_param=$(grep GRUB_CMDLINE_LINUX $grub_cfg | cut -d'"' -f2)
  131. _fill_old_kernel_params $cmdline_param
  132. _merge_new_kernel_params
  133. cmdline_param=$(_generate_kernel_cmdline)
  134. sed -i "s|GRUB_CMDLINE_LINUX=.*|GRUB_CMDLINE_LINUX=\"$cmdline_param\"|g" $grub_cfg
  135. # 删掉 rd.lvm.lv(含)之后,空格之前的所有字符
  136. # 以便解决重启后因未加载 lvm 驱动而卡住的问题
  137. # sed -i -e 's#rd.lvm.lv=[^ ]*##gi' $grub_cfg
  138. # 替换成带有 yn 后缀的内核,只对 centos
  139. if [[ "${distro}" != "centos" ]]; then
  140. mk_grub ${distro}
  141. return
  142. fi
  143. for i in {1..3}; do
  144. idx=$(awk -F\' '$1=="menuentry " {print i++ " : " $2}' $(find /etc/ -name 'grub2*cfg' -exec test -e {} \; -print |head -1 ) |grep -P '\.yn\d{8}\.'|awk '{print $1}' |head -1)
  145. if [ "$idx" -gt "0" ]; then
  146. break
  147. fi
  148. mk_grub ${distro}
  149. done
  150. if grep -q '^GRUB_DEFAULT' $grub_cfg; then
  151. sudo sed -i -e "s#^GRUB_DEFAULT=.*#GRUB_DEFAULT=$idx#" $grub_cfg
  152. else
  153. local tmp_conf=$(mktemp)
  154. cp -fv $conf $tmp_conf
  155. echo "GRUB_DEFAULT=$idx" >> $tmp_conf
  156. sudo mv $tmp_conf $grub_cfg
  157. fi
  158. mk_grub ${distro}
  159. }
  160. vfio_override_script_setup() {
  161. local vfio_override_file="/usr/bin/vfio-pci-override.sh"
  162. info "Add script: ${UCYAN}$vfio_override_file"
  163. cat <<EOF >"$vfio_override_file"
  164. #!/bin/sh
  165. CODE_VGA=0x030000
  166. CODE_3D=0x030200
  167. for i in \$(/usr/bin/find /sys/devices/pci* -name class); do
  168. CLS_CODE=\$(cat "\$i")
  169. if [ \$CLS_CODE == \$CODE_VGA ] || [ \$CLS_CODE == \$CODE_3D ]; then
  170. GPU="\${i%/class}"
  171. BOOT_VGA="\$GPU/boot_vga"
  172. if [ -f "\$BOOT_VGA" ]; then
  173. if [ \$(cat "\$BOOT_VGA") -eq 1 ]; then
  174. continue
  175. fi
  176. fi
  177. AUDIO="\$(echo "\$GPU" | sed -e "s/0$/1/")"
  178. echo "vfio-pci" > "\$GPU/driver_override"
  179. if [ -d "\$AUDIO" ]; then
  180. echo "vfio-pci" > "\$AUDIO/driver_override"
  181. fi
  182. fi
  183. done
  184. modprobe -i vfio-pci
  185. EOF
  186. chmod a+x "$vfio_override_file"
  187. }
  188. modules_setup() {
  189. info "Configure kernel modules..."
  190. local vfio_load_file="/etc/modules-load.d/vfio.conf"
  191. local vfio_conf_file="/etc/modprobe.d/vfio.conf"
  192. local mod_blacklist_file="/etc/modprobe.d/blacklist-gpu.conf"
  193. local kvm_conf_file="/etc/modprobe.d/kvm.conf"
  194. cat <<EOF >"$vfio_load_file"
  195. vfio
  196. vfio_iommu_type1
  197. vfio_pci
  198. EOF
  199. cat <<EOF >"$vfio_conf_file"
  200. install vfio-pci $VFIO_PCI_OVERRIDE_TOOL
  201. EOF
  202. cat <<EOF >"$mod_blacklist_file"
  203. blacklist nouveau
  204. blacklist nvidia
  205. blacklist nvidia_drm
  206. EOF
  207. local kvm_options="options kvm ignore_msrs=1"
  208. if grep -xq "$kvm_options" "$kvm_conf_file"; then
  209. return
  210. else
  211. cat <<EOF >>"$kvm_conf_file"
  212. options kvm ignore_msrs=1
  213. EOF
  214. fi
  215. modprobe vfio
  216. modprobe vfio_pci
  217. }
  218. get_distro() {
  219. distro=($(awk '/^ID=/' /etc/*-release | awk -F'=' '{ print tolower($2) }' | tr -d \"))
  220. echo "${distro[@]}"
  221. }
  222. function findStringInArray() {
  223. local search="$1"
  224. shift
  225. for element in "$@"; do
  226. if [[ "$element" == "$search" ]]; then
  227. return 0
  228. fi
  229. done
  230. return 1
  231. }
  232. env_check() {
  233. if [[ $EUID -ne 0 ]]; then
  234. error_exit "You need sudo or root to run this script."
  235. fi
  236. local supported_distros=("centos" "debian" "openeuler" "ubuntu")
  237. local distros=($(get_distro))
  238. local found_supported_distro=false
  239. local unsupported_distros=()
  240. for distro in "${distros[@]}"; do
  241. if findStringInArray "${distro}" "${supported_distros[@]}"; then
  242. found_supported_distro=true
  243. echo "${distro}"
  244. break
  245. else
  246. unsupported_distros+=("${distro}")
  247. fi
  248. done
  249. if [[ $found_supported_distro == false ]]; then
  250. error_exit "The following Linux distributions are not supported: ${unsupported_distros[*]}, only support ${supported_distros[*]}"
  251. fi
  252. }
  253. dracut_ramfs() {
  254. info "Use dracut rebuild initramfs..."
  255. local yn_kernel=$(ls /boot/vmlinuz-* | grep yn | sort -r | head -n 1)
  256. if [ -z "$yn_kernel" ]; then
  257. warn "Dracut ramfs not found cloud customize kernel, skip it"
  258. return
  259. fi
  260. local dracut_vfio_file="/etc/dracut.conf.d/vfio.conf"
  261. cat <<EOF >"$dracut_vfio_file"
  262. add_drivers+=" vfio vfio_iommu_type1 vfio_pci"
  263. EOF
  264. local kernel_release=$(basename $yn_kernel | sed 's/vmlinuz-//g')
  265. dracut -f --kver $kernel_release --install find --install $VFIO_PCI_OVERRIDE_TOOL
  266. }
  267. main() {
  268. distro=$(env_check)
  269. grub_setup ${distro}
  270. vfio_override_script_setup
  271. modules_setup
  272. # refresh_pciids # has been replaced by "Update pciids" task in playbook.
  273. dracut_ramfs
  274. info "All done, ${UCYAN}REBOOT to make it work"
  275. }
  276. main