health_check.go 5.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152
  1. // Copyright 2019 Yunion
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. package autoscaling
  15. import (
  16. "context"
  17. "time"
  18. "yunion.io/x/jsonutils"
  19. "yunion.io/x/log"
  20. "yunion.io/x/pkg/util/sets"
  21. "yunion.io/x/sqlchemy"
  22. apis "yunion.io/x/onecloud/pkg/apis/compute"
  23. "yunion.io/x/onecloud/pkg/cloudcommon/db"
  24. "yunion.io/x/onecloud/pkg/compute/models"
  25. "yunion.io/x/onecloud/pkg/mcclient"
  26. "yunion.io/x/onecloud/pkg/mcclient/auth"
  27. "yunion.io/x/onecloud/pkg/mcclient/modules/compute"
  28. )
  29. var UnhealthStatus = []string{
  30. apis.VM_UNKNOWN, apis.VM_SCHEDULE_FAILED, apis.VM_NETWORK_FAILED, apis.VM_DEVICE_FAILED, apis.VM_DISK_FAILED,
  31. apis.VM_DEPLOY_FAILED, apis.VM_READY, apis.VM_START_FAILED,
  32. }
  33. type sUnnormalGuest struct {
  34. Id string `json:"id"`
  35. Status string `json:"status"`
  36. ScalngGroupId string `json:"scaling_group_id"`
  37. CreateCompleteTime time.Time `json:"create_complete_time"`
  38. }
  39. func (asc *SASController) HealthCheckSql() *sqlchemy.SQuery {
  40. now := time.Now()
  41. sgSubQ := models.ScalingGroupManager.Query("id").IsTrue("enabled").LT("next_check_time", now).SubQuery()
  42. sggQ := models.ScalingGroupGuestManager.Query("guest_id", "scaling_group_id", "updated_at").Equals("guest_status", apis.SG_GUEST_STATUS_READY)
  43. sggSubQ := sggQ.Join(sgSubQ, sqlchemy.Equals(sgSubQ.Field("id"), sggQ.Field("scaling_group_id"))).SubQuery()
  44. q := models.GuestManager.Query("id", "status").In("status", UnhealthStatus)
  45. q = q.Join(sggSubQ, sqlchemy.Equals(q.Field("id"), sggSubQ.Field("guest_id")))
  46. q = q.AppendField(sggSubQ.Field("scaling_group_id"), sggSubQ.Field("updated_at", "create_complete_time"))
  47. return q
  48. }
  49. func (asc *SASController) CheckInstanceHealth(ctx context.Context, userCred mcclient.TokenCredential, isStart bool) {
  50. // Fetch all unhealth status instace
  51. unnormalGuests := make([]sUnnormalGuest, 0, 5)
  52. scalingGroupIdSet := sets.NewString()
  53. rows, err := asc.HealthCheckSql().Rows()
  54. if err != nil {
  55. log.Errorf("GuestManager's SQuery.Rows: %s", err.Error())
  56. return
  57. }
  58. for rows.Next() {
  59. var ug sUnnormalGuest
  60. rows.Scan(&ug.Id, &ug.Status, &ug.ScalngGroupId, &ug.CreateCompleteTime)
  61. scalingGroupIdSet.Insert(ug.ScalngGroupId)
  62. unnormalGuests = append(unnormalGuests, ug)
  63. }
  64. rows.Close()
  65. // fetch all ScalingGroup
  66. scalingGroups := make([]models.SScalingGroup, 0, scalingGroupIdSet.Len())
  67. q := models.ScalingGroupManager.Query().In("id", scalingGroupIdSet.UnsortedList())
  68. err = db.FetchModelObjects(models.ScalingGroupManager, q, &scalingGroups)
  69. if err != nil {
  70. log.Errorf("unable to fetch ScalingGroup")
  71. return
  72. }
  73. scalingGroupMap := make(map[string]*models.SScalingGroup, len(scalingGroups))
  74. for i := range scalingGroups {
  75. scalingGroupMap[scalingGroups[i].GetId()] = &scalingGroups[i]
  76. }
  77. // update NextCheckTime for ScalingGroup
  78. now := time.Now()
  79. for i := range scalingGroups {
  80. sg := &scalingGroups[i]
  81. _, err := db.Update(sg, func() error {
  82. sg.NextCheckTime = now.Add(time.Duration(sg.HealthCheckCycle) * time.Second)
  83. return nil
  84. })
  85. if err != nil {
  86. log.Errorf("unable to update NextCheckTime for ScalingGroup '%s'", sg.GetId())
  87. }
  88. }
  89. // request to detach
  90. readyGuestList := make([]string, 0, 5)
  91. readyGuestMap := make(map[string]string, 5)
  92. removeParams := jsonutils.NewDict()
  93. removeParams.Set("delete_server", jsonutils.JSONTrue)
  94. removeParams.Set("auto", jsonutils.JSONTrue)
  95. session := auth.GetSession(ctx, userCred, "")
  96. for i := range unnormalGuests {
  97. ug := unnormalGuests[i]
  98. if ug.CreateCompleteTime.Add(time.Duration(scalingGroupMap[ug.ScalngGroupId].HealthCheckGov) * time.Second).After(now) {
  99. continue
  100. }
  101. if ug.Status == apis.VM_READY {
  102. readyGuestList = append(readyGuestList, ug.Id)
  103. readyGuestMap[ug.Id] = ug.ScalngGroupId
  104. continue
  105. }
  106. removeParams.Set("scaling_group", jsonutils.NewString(ug.ScalngGroupId))
  107. _, err := compute.Servers.PerformAction(session, ug.Id, "detach-scaling-group", removeParams)
  108. if err != nil {
  109. log.Errorf("Request Detach Scaling Group failed: %s", err.Error())
  110. }
  111. }
  112. // check NextCheckTime for ScalngGroup
  113. if len(readyGuestList) > 0 {
  114. go func() {
  115. time.Sleep(2 * time.Minute)
  116. q := models.GuestManager.Query("id").In("id", readyGuestList).Equals("status", apis.VM_READY)
  117. rows, err := q.Rows()
  118. if err != nil {
  119. log.Errorf("GuestManager's SQuery.Rows: %s", err.Error())
  120. }
  121. removeGuestList := make([]string, 0, len(readyGuestList)/2)
  122. for rows.Next() {
  123. var g string
  124. rows.Scan(&g)
  125. removeGuestList = append(removeGuestList, g)
  126. }
  127. rows.Close()
  128. for _, id := range removeGuestList {
  129. removeParams.Set("scaling_group", jsonutils.NewString(readyGuestMap[id]))
  130. _, err := compute.Servers.PerformAction(session, id, "detach-scaling-group", removeParams)
  131. if err != nil {
  132. log.Errorf("Request Detach Scaling Group failed: %s", err.Error())
  133. }
  134. }
  135. }()
  136. }
  137. }