install-cuda.yml 2.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105
  1. - name: Check if CUDA is already installed
  2. shell: /usr/local/cuda/bin/nvcc --version
  3. register: cuda_check
  4. ignore_errors: true
  5. failed_when: false
  6. - name: Set CUDA installation flag
  7. set_fact:
  8. cuda_installed: "{{ cuda_check.rc == 0 }}"
  9. - block:
  10. - name: Extract CUDA installer filename
  11. set_fact:
  12. cuda_installer: "{{ cuda_installer_path | basename }}"
  13. - name: Ensure rsync is installed on remote host
  14. package:
  15. name: rsync
  16. state: present
  17. become: true
  18. - name: Copy CUDA installer to remote host using synchronize
  19. ansible.builtin.synchronize:
  20. src: "{{ cuda_installer_path }}"
  21. dest: "/opt/nvidia/{{ cuda_installer }}"
  22. mode: push
  23. delegate_to: localhost
  24. become: false
  25. when: cuda_installer_path is defined
  26. - name: Check CUDA installer help
  27. shell: |
  28. cd /opt/nvidia
  29. ./{{ cuda_installer }} --help
  30. become: true
  31. args:
  32. executable: /bin/bash
  33. register: cuda_help
  34. ignore_errors: true
  35. - name: Display CUDA installer help
  36. debug: var=cuda_help.stdout_lines
  37. - name: Create temporary directory for CUDA extraction
  38. file:
  39. path: /opt/nvidia/tmp
  40. state: directory
  41. mode: '0755'
  42. become: true
  43. - name: Verify CUDA installer file integrity
  44. shell: |
  45. cd /opt/nvidia
  46. file ./{{ cuda_installer }}
  47. ls -lh ./{{ cuda_installer }}
  48. become: true
  49. register: cuda_file_check
  50. changed_when: false
  51. - name: Display CUDA installer file info
  52. debug: var=cuda_file_check.stdout_lines
  53. - name: Install CUDA
  54. shell: |
  55. cd /opt/nvidia
  56. export TMPDIR=/opt/nvidia/tmp
  57. export TEMP=/opt/nvidia/tmp
  58. export TMP=/opt/nvidia/tmp
  59. chmod +x {{ cuda_installer }}
  60. setsid bash -c "./{{ cuda_installer }} --silent --toolkit" < /dev/null > /tmp/cuda_install.log 2>&1 || {
  61. EXIT_CODE=$?
  62. cat /tmp/cuda_install.log
  63. exit $EXIT_CODE
  64. }
  65. cat /tmp/cuda_install.log
  66. become: true
  67. args:
  68. executable: /bin/bash
  69. register: cuda_install_result
  70. environment:
  71. DEBIAN_FRONTEND: noninteractive
  72. TERM: dumb
  73. TMPDIR: /opt/nvidia/tmp
  74. TEMP: /opt/nvidia/tmp
  75. TMP: /opt/nvidia/tmp
  76. - name: Check CUDA installation result
  77. shell: |
  78. if [ -f /usr/local/cuda/bin/nvcc ]; then
  79. /usr/local/cuda/bin/nvcc --version
  80. exit 0
  81. else
  82. echo "CUDA installation failed: nvcc not found"
  83. exit 1
  84. fi
  85. become: true
  86. args:
  87. executable: /bin/bash
  88. register: cuda_verify_result
  89. failed_when: cuda_verify_result.rc != 0
  90. - debug: var=cuda_install_result.stdout_lines
  91. - debug: var=cuda_verify_result.stdout_lines
  92. when: not cuda_installed | default(false)