service.py 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447
  1. #!/usr/bin/env python3
  2. # encoding: utf-8
  3. from __future__ import unicode_literals
  4. from .ocboot import KEY_DISK_PATHS, KEY_ENABLE_CONTAINERD, KEY_HOST_NETWORKS, KEY_IMAGE_REPOSITORY, KEY_K8S_CONTROLPLANE_HOST, ClickhouseConfig, NodeConfig, Config, get_ansible_global_vars_by_cluster, KEY_PRIMARY_MASTER_NODE_IP
  5. from .cmd import run_ansible_playbook
  6. from .ansible import get_inventory_config
  7. from .parser import help_d, inject_add_hostagent_options, inject_primary_node_options, inject_ssh_options
  8. from .parser import inject_add_nodes_options
  9. from .parser import inject_auto_backup_options
  10. from .parser import inject_ssh_hosts_options
  11. from . import utils
  12. from . import ansible
  13. from . import consts
  14. from .cluster import construct_cluster
  15. from .ocboot import WorkerConfig, Config
  16. from .ocboot import get_ansible_global_vars
  17. from .ocboot import KEY_ONECLOUD_VERSION
  18. from .ssh import SSHClient
  19. from .color import RB as Red
  20. class BaseService(object):
  21. def __init__(self, subparsers, action, help_text):
  22. self.action = action
  23. parser = subparsers.add_parser(
  24. action, help=help_text)
  25. self.inject_options(parser)
  26. self._set_parser_defaults(parser)
  27. def inject_options(self, parser):
  28. pass
  29. def _set_parser_defaults(self, parser):
  30. parser.set_defaults(func=self.do_action)
  31. def do_action(self, args):
  32. pass
  33. class Service(BaseService):
  34. def __init__(self, subparsers, action):
  35. super(Service, self).__init__(subparsers,
  36. action, "%s services of hosts" % action)
  37. def inject_options(self, parser):
  38. inject_ssh_hosts_options(parser)
  39. def do_action(self, args):
  40. config = NodesConfig(args.target_node_hosts,
  41. args.ssh_user,
  42. args.ssh_private_file,
  43. args.ssh_port)
  44. return config.run(self.action)
  45. class NodesConfig(object):
  46. def __init__(self, target_nodes, ssh_user, ssh_private_file, ssh_port):
  47. target_nodes = list(set(target_nodes))
  48. conf = [{'hostname': target_node, 'user': ssh_user, 'port': ssh_port}
  49. for target_node in target_nodes]
  50. conf_dict = {
  51. 'hosts': conf,
  52. }
  53. self.nodes_config = NodeConfig(Config(conf_dict))
  54. def run(self, action, vars=None):
  55. inventory_content = get_inventory_config(self.nodes_config)
  56. yaml_content = utils.to_yaml(inventory_content)
  57. filepath = '/tmp/cluster_%s_node_services_inventory.yml' % action
  58. with open(filepath, 'w') as f:
  59. f.write(yaml_content)
  60. # start run upgrade playbook
  61. run_ansible_playbook(
  62. filepath,
  63. './onecloud/%s-services.yml' % action,
  64. vars=vars,
  65. )
  66. class PrimaryMasterService(BaseService):
  67. def __init__(self, subparsers, action):
  68. super().__init__(subparsers, action, f"{action} service of primary_master_host")
  69. def inject_options(self, parser):
  70. inject_primary_node_options(parser)
  71. inject_ssh_options(parser)
  72. def get_ansible_vars(self, args, cluster, primary_master_host):
  73. vars = get_ansible_global_vars(
  74. cluster.get_current_version(),
  75. cluster.is_using_k3s())
  76. vars[KEY_K8S_CONTROLPLANE_HOST] = primary_master_host
  77. return vars
  78. def do_action(self, args):
  79. cluster = construct_cluster(
  80. args.primary_master_host,
  81. args.ssh_user,
  82. args.ssh_private_file,
  83. args.ssh_port)
  84. vars = self.get_ansible_vars(args, cluster, args.primary_master_host)
  85. config = NodesConfig(
  86. [args.primary_master_host],
  87. args.ssh_user,
  88. args.ssh_private_file,
  89. args.ssh_port,
  90. )
  91. return config.run(self.action, vars=vars)
  92. class AddNodeBaseService(BaseService):
  93. def __init__(self, subparsers, action, help_text):
  94. super(AddNodeBaseService, self).__init__(subparsers, action, help_text)
  95. def inject_options(self, parser):
  96. inject_add_nodes_options(parser)
  97. class AddNodeService(AddNodeBaseService):
  98. def __init__(self, subparsers, action, help_text):
  99. super(AddNodeService, self).__init__(subparsers, action, help_text)
  100. def inject_options(self, parser):
  101. super(AddNodeService, self).inject_options(parser)
  102. inject_add_hostagent_options(parser)
  103. def do_action(self, args):
  104. cluster = construct_cluster(
  105. args.primary_master_host,
  106. args.ssh_user,
  107. args.ssh_private_file,
  108. args.ssh_port)
  109. config = AddNodesConfig(cluster,
  110. args.target_node_hosts,
  111. args.ssh_user,
  112. args.ssh_private_file,
  113. args.ssh_port,
  114. args.ssh_node_port,
  115. args.enable_host_on_vm)
  116. return config.run()
  117. class AddLBAgentService(AddNodeBaseService):
  118. def __init__(self, subparsers, action, help_text):
  119. super(AddLBAgentService, self).__init__(subparsers, action, help_text)
  120. def inject_options(self, parser):
  121. super(AddLBAgentService, self).inject_options(parser)
  122. def do_action(self, args):
  123. cluster = construct_cluster(
  124. args.primary_master_host,
  125. args.ssh_user,
  126. args.ssh_private_file,
  127. args.ssh_port)
  128. if args.ip_type == '':
  129. if utils.is_ipv4(args.primary_master_host):
  130. args.ip_type = consts.IP_TYPE_IPV4
  131. elif utils.is_ipv6(args.primary_master_host):
  132. args.ip_type = consts.IP_TYPE_IPV6
  133. else:
  134. raise ValueError("ip type is not set and cannot be determined from primary master host")
  135. # 处理双栈配置
  136. kwargs = {
  137. 'ip_dual_conf': getattr(args, 'ip_dual_conf', None),
  138. 'ip_type': args.ip_type,
  139. 'offline_data_path': args.offline_data_path,
  140. }
  141. # 如果是双栈配置,需要处理IPv4和IPv6地址
  142. if args.ip_type == consts.IP_TYPE_DUAL_STACK and hasattr(args, 'ip_dual_conf') and args.ip_dual_conf:
  143. # 确定哪个是IPv4,哪个是IPv6
  144. if utils.is_ipv4(args.target_node_hosts[0]):
  145. # 主IP是IPv4,ip_dual_conf是IPv6
  146. kwargs['node_ip_v4'] = args.target_node_hosts[0]
  147. kwargs['node_ip_v6'] = args.ip_dual_conf
  148. else:
  149. # 主IP是IPv6,ip_dual_conf是IPv4
  150. kwargs['node_ip_v4'] = args.ip_dual_conf
  151. kwargs['node_ip_v6'] = args.target_node_hosts[0]
  152. config = AddNodesConfig(cluster,
  153. args.target_node_hosts,
  154. args.ssh_user,
  155. args.ssh_private_file,
  156. args.ssh_port,
  157. args.ssh_node_port,
  158. enable_host_on_vm=False, enable_lbagent=True, **kwargs)
  159. return config.run()
  160. class AddNodesConfig(object):
  161. def __init__(self, cluster, target_nodes, ssh_user, ssh_private_file,
  162. controlplane_ssh_port, ssh_port,
  163. enable_host_on_vm=False,
  164. enable_lbagent=False,
  165. **kwargs):
  166. target_nodes = list(set(target_nodes))
  167. target_hostnames = [node.get_hostname() for node in cluster.k8s_nodes]
  168. self.enable_containerd = kwargs.get('runtime') == 'containerd'
  169. for target_node in target_nodes:
  170. # check IP:
  171. node = cluster.find_node_by_ip_or_hostname(target_node)
  172. if node is not None:
  173. raise Exception(Red("Node %s(%s) already exists in cluster (By IP Check). " % (
  174. node.get_hostname(), node.get_ip())))
  175. # check Hostname:
  176. cli = SSHClient(
  177. target_node,
  178. ssh_user,
  179. ssh_private_file,
  180. ssh_port
  181. )
  182. target_hostname = cli.get_hostname()
  183. if target_hostname in target_hostnames:
  184. raise Exception(Red(f"Node {target_hostname}[{target_node}] already exists in cluster (By Hostname Check). "))
  185. self.current_version = cluster.get_current_version()
  186. controlplane_host = cluster.get_cluster_controlplane_host()
  187. primary_master_node_ip = cluster.get_primary_master_node_ip()
  188. nodes_conf = [{'hostname': target_node, 'user': ssh_user,
  189. 'port': ssh_port} for target_node in target_nodes]
  190. as_host = True
  191. if enable_lbagent:
  192. # can't enable lbagent and host at same time
  193. as_host = False
  194. woker_config_dict = {
  195. KEY_ONECLOUD_VERSION: self.current_version,
  196. 'hosts': nodes_conf,
  197. 'controlplane_host': controlplane_host,
  198. 'as_controller': False,
  199. 'as_host': as_host,
  200. 'as_host_on_vm': enable_host_on_vm,
  201. 'controlplane_ssh_port': controlplane_ssh_port,
  202. 'enable_lbagent': enable_lbagent,
  203. KEY_ENABLE_CONTAINERD: self.enable_containerd,
  204. KEY_HOST_NETWORKS: kwargs.get(KEY_HOST_NETWORKS, None),
  205. KEY_DISK_PATHS: kwargs.get(KEY_DISK_PATHS, None),
  206. KEY_PRIMARY_MASTER_NODE_IP: primary_master_node_ip,
  207. }
  208. (repo, is_insecure) = cluster.get_repository()
  209. if is_insecure:
  210. woker_config_dict['insecure_registries'] = [repo]
  211. self.worker_config = WorkerConfig(Config(woker_config_dict))
  212. self.image_repository = cluster.get_image_repository()
  213. self.is_using_k3s = cluster.is_using_k3s()
  214. self.offline_data_path = kwargs.get('offline_data_path', None)
  215. self.ip_type = kwargs.get('ip_type', None)
  216. self.ip_dual_conf = kwargs.get("ip_dual_conf", None)
  217. self.node_ip_v4 = kwargs.get("node_ip_v4", None)
  218. self.node_ip_v6 = kwargs.get("node_ip_v6", None)
  219. self.enable_ai_env = kwargs.get('enable_ai_env', False)
  220. self.gpu_device_virtual_number = kwargs.get('gpu_device_virtual_number', 2)
  221. self.nvidia_driver_installer_path = kwargs.get('nvidia_driver_installer_path')
  222. self.cuda_installer_path = kwargs.get('cuda_installer_path')
  223. utils.pr_green(f"Get current cluster: {controlplane_host}, primary_master_node_ip: {primary_master_node_ip}, version: {self.current_version}, is_using_k3s: {self.is_using_k3s}")
  224. def run(self):
  225. inventory_content = ansible.get_inventory_config(self.worker_config)
  226. yaml_content = utils.to_yaml(inventory_content)
  227. filepath = './cluster_add_node_inventory.yml'
  228. with open(filepath, 'w') as f:
  229. f.write(yaml_content)
  230. # start run upgrade playbook
  231. return_code = run_ansible_playbook(
  232. filepath,
  233. './onecloud/add-node.yml',
  234. vars=self.get_vars(),
  235. )
  236. return return_code
  237. def get_vars(self):
  238. vars = get_ansible_global_vars(self.current_version, self.is_using_k3s)
  239. vars[KEY_IMAGE_REPOSITORY] = self.image_repository
  240. if self.offline_data_path:
  241. vars['offline_data_path'] = self.offline_data_path
  242. vars['iso_install_mode'] = True
  243. vars['docker_insecure_registries'] = ['private-registry.onecloud:5000']
  244. vars[KEY_IMAGE_REPOSITORY] = 'private-registry.onecloud:5000/yunion'
  245. if self.ip_type:
  246. vars['ip_type'] = self.ip_type
  247. if self.node_ip_v4:
  248. vars['node_ip_v4'] = self.node_ip_v4
  249. if self.node_ip_v6:
  250. vars['node_ip_v6'] = self.node_ip_v6
  251. if self.enable_ai_env:
  252. vars['enable_ai_env'] = True
  253. vars['gpu_device_virtual_number'] = self.gpu_device_virtual_number
  254. if self.nvidia_driver_installer_path:
  255. vars['nvidia_driver_installer_path'] = self.nvidia_driver_installer_path
  256. if self.cuda_installer_path:
  257. vars['cuda_installer_path'] = self.cuda_installer_path
  258. return vars
  259. class AutoBackupConfig(NodesConfig):
  260. def __init__(self, target_nodes, ssh_user, ssh_private_file, ssh_port,
  261. backup_path,
  262. light_backup,
  263. max_backups,
  264. max_disck_percentage):
  265. target_nodes = list(set(target_nodes))
  266. conf = [{'hostname': target_node, 'user': ssh_user, 'port': ssh_port}
  267. for target_node in target_nodes]
  268. conf_dict = {
  269. 'hosts': conf,
  270. }
  271. self.vars = {
  272. 'backup_path': backup_path,
  273. 'light_backup': 'true' if light_backup else 'false',
  274. 'max_backups': max_backups,
  275. 'max_disck_percentage': max_disck_percentage,
  276. }
  277. self.nodes_config = NodeConfig(Config(conf_dict))
  278. def run(self, action):
  279. inventory_content = get_inventory_config(self.nodes_config)
  280. yaml_content = utils.to_yaml(inventory_content)
  281. filepath = '/tmp/cluster_%s_node_services_inventory.yml' % action
  282. with open(filepath, 'w') as f:
  283. f.write(yaml_content)
  284. # start run upgrade playbook
  285. run_ansible_playbook(
  286. filepath,
  287. './onecloud/%s-services.yml' % action,
  288. vars=self.vars,
  289. )
  290. class AutoBackupService(Service):
  291. def __init__(self, subparsers, action):
  292. super(AutoBackupService, self).__init__(subparsers, action)
  293. def inject_options(self, parser):
  294. super(AutoBackupService, self).inject_options(parser)
  295. inject_auto_backup_options(parser)
  296. def do_action(self, args):
  297. config = AutoBackupConfig(args.target_node_hosts,
  298. args.ssh_user,
  299. args.ssh_private_file,
  300. args.ssh_port,
  301. args.backup_path,
  302. args.light,
  303. args.max_backups,
  304. args.max_disk_percentage,
  305. )
  306. return config.run(self.action)
  307. class ClickhouseServiceConfig(object):
  308. def __init__(self, cluster, primary_host,
  309. ch_pwd, ch_port, offline_data_path,
  310. ssh_user='root', ssh_port=22):
  311. cfg = {
  312. 'host': primary_host,
  313. 'user': ssh_user,
  314. 'port': ssh_port,
  315. 'ch_password': ch_pwd,
  316. 'ch_port': ch_port,
  317. }
  318. self.cluster = cluster
  319. self.primary_host = primary_host
  320. self.ch_config = ClickhouseConfig(Config(cfg))
  321. self.offline_data_path = offline_data_path
  322. def run(self):
  323. inventory_content = ansible.get_inventory_config(self.ch_config)
  324. yaml_content = utils.to_yaml(inventory_content)
  325. filepath = './cluster_clickhouse_inventory.yml'
  326. with open(filepath, 'w') as f:
  327. f.write(yaml_content)
  328. # start run upgrade playbook
  329. return_code = run_ansible_playbook(
  330. filepath,
  331. './onecloud/clickhouse-services.yml',
  332. vars=self.get_vars(),
  333. )
  334. def get_vars(self):
  335. vars = self.ch_config.ansible_vars()
  336. vars['offline_data_path'] = self.offline_data_path
  337. vars[KEY_K8S_CONTROLPLANE_HOST] = self.primary_host
  338. global_vars = get_ansible_global_vars_by_cluster(self.cluster)
  339. vars.update(global_vars)
  340. return vars
  341. class ClickhouseService(BaseService):
  342. def __init__(self, subparsers, action):
  343. super().__init__(subparsers, action, 'deploy clickhouse')
  344. def inject_options(self, parser):
  345. inject_primary_node_options(parser)
  346. inject_ssh_options(parser)
  347. parser.add_argument("offline_data_path",
  348. metavar='OFFLINE_DATA_PATH',
  349. help="offline ISO mount point, e.g., /mnt")
  350. parser.add_argument("--ch-password", dest="ch_password",
  351. help=help_d("clickhouse password"))
  352. parser.add_argument("--ch-port", dest="ch_port",
  353. default=9000, type=int,
  354. help=help_d("clickhouse port"))
  355. def do_action(self, args):
  356. # config =
  357. if args.ch_password is None:
  358. args.ch_password = utils.generage_random_string(12)
  359. print(f'generated clickhouse password: {args.ch_password}')
  360. cluster = construct_cluster(
  361. args.primary_master_host,
  362. args.ssh_user,
  363. args.ssh_private_file,
  364. args.ssh_port)
  365. print(f'found cluster {cluster.get_current_version()}')
  366. config = ClickhouseServiceConfig(cluster,
  367. args.primary_master_host, args.ch_password, args.ch_port, args.offline_data_path, args.ssh_user, args.ssh_port)
  368. return config.run()