prober.go 6.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230
  1. // Copyright 2019 Yunion
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. /*
  15. Copyright 2015 The Kubernetes Authors.
  16. Licensed under the Apache License, Version 2.0 (the "License");
  17. you may not use this file except in compliance with the License.
  18. You may obtain a copy of the License at
  19. http://www.apache.org/licenses/LICENSE-2.0
  20. Unless required by applicable law or agreed to in writing, software
  21. distributed under the License is distributed on an "AS IS" BASIS,
  22. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  23. See the License for the specific language governing permissions and
  24. limitations under the License.
  25. */
  26. package prober
  27. import (
  28. "fmt"
  29. "io"
  30. "strings"
  31. "time"
  32. "yunion.io/x/log"
  33. "yunion.io/x/pkg/errors"
  34. "yunion.io/x/onecloud/pkg/apis"
  35. hostapi "yunion.io/x/onecloud/pkg/apis/host"
  36. "yunion.io/x/onecloud/pkg/hostman/container/prober/results"
  37. "yunion.io/x/onecloud/pkg/hostman/guestman/container"
  38. "yunion.io/x/onecloud/pkg/util/exec"
  39. "yunion.io/x/onecloud/pkg/util/probe"
  40. execprobe "yunion.io/x/onecloud/pkg/util/probe/exec"
  41. tcpprobe "yunion.io/x/onecloud/pkg/util/probe/tcp"
  42. )
  43. const maxProbeRetries = 3
  44. // Prober helps to check the liveness of a container.
  45. type prober struct {
  46. exec execprobe.Prober
  47. tcp tcpprobe.Prober
  48. runner container.CommandRunner
  49. }
  50. func newProber(runner container.CommandRunner) *prober {
  51. return &prober{
  52. exec: execprobe.New(),
  53. tcp: tcpprobe.New(),
  54. runner: runner,
  55. }
  56. }
  57. // probe probes the container.
  58. func (pb *prober) probe(probeType apis.ContainerProbeType, pod IPod, container *hostapi.ContainerDesc) (results.ProbeResult, error) {
  59. var probeSpec *apis.ContainerProbe
  60. switch probeType {
  61. //case apis.ContainerProbeTypeLiveness:
  62. // probeSpec = container.Spec.LivenessProbe
  63. case apis.ContainerProbeTypeStartup:
  64. probeSpec = container.Spec.StartupProbe
  65. default:
  66. err := errors.Errorf("unknown probe type: %q", probeType)
  67. return results.NewFailure(err.Error()), err
  68. }
  69. ctrName := fmt.Sprintf("%s:%s", pod.GetDesc().Name, container.Name)
  70. if probeSpec == nil {
  71. log.Warningf("%s probe for %s is nil", probeType, ctrName)
  72. return results.NewSuccess("probe is not defined"), nil
  73. }
  74. result, output, err := pb.runProbeWithRetries(probeType, probeSpec, pod, container, maxProbeRetries)
  75. var msg string
  76. if err != nil || (result != probe.Success && result != probe.Warning) {
  77. // Probe failed in one way or another
  78. if err != nil {
  79. msg = fmt.Sprintf("%s probe for %q errored: %v", probeType, ctrName, err)
  80. log.Errorf("%s", msg)
  81. } else {
  82. // result != probe.Success
  83. msg = fmt.Sprintf("%s probe for %q failed (%v): %s", probeType, ctrName, result, output)
  84. log.Debugf("%s", msg)
  85. }
  86. return results.NewFailure(msg), err
  87. }
  88. if result == probe.Warning {
  89. msg = fmt.Sprintf("%s probe for %q succeeded with a warning: %s", probeType, ctrName, output)
  90. log.Warningf("%s", msg)
  91. } else {
  92. msg = fmt.Sprintf("%s probe for %q succeeded", probeType, ctrName)
  93. //log.Debugf(msg)
  94. }
  95. return results.NewSuccess(msg), nil
  96. }
  97. // runProbeWithRetries tries to probe the container in a finite loop, it returns the last result
  98. // if it never succeeds.
  99. func (pb *prober) runProbeWithRetries(probeType apis.ContainerProbeType, p *apis.ContainerProbe, pod IPod, container *hostapi.ContainerDesc, retries int) (probe.Result, string, error) {
  100. var err error
  101. var result probe.Result
  102. var output string
  103. for i := 0; i < retries; i++ {
  104. result, output, err = pb.runProbe(probeType, p, pod, container)
  105. if err == nil {
  106. return result, output, nil
  107. }
  108. }
  109. return result, output, err
  110. }
  111. func (pb *prober) runProbe(probeType apis.ContainerProbeType, p *apis.ContainerProbe, pod IPod, container *hostapi.ContainerDesc) (probe.Result, string, error) {
  112. timeout := time.Duration(p.TimeoutSeconds) * time.Second
  113. if p.Exec != nil {
  114. // log.Debugf("Exec-Probe Pod: %v, Container: %v, Command: %v", pod.GetDesc().Name, container.Name, p.Exec.Command)
  115. return pb.exec.Probe(pb.newExecInContainer(pod, container, p.Exec.Command, timeout), strings.Join(p.Exec.Command, " "))
  116. }
  117. if p.TCPSocket != nil {
  118. port := p.TCPSocket.Port
  119. host := p.TCPSocket.Host
  120. if host == "" {
  121. for _, nic := range pod.GetDesc().Nics {
  122. if nic.Ip != "" {
  123. host = nic.Ip
  124. break
  125. }
  126. }
  127. if host == "" {
  128. return probe.Unknown, "", errors.Errorf("not found guest ip")
  129. }
  130. }
  131. // log.Debugf("TCP-Probe Host: %v, Port: %v, Timeout: %v", host, port, timeout)
  132. return pb.tcp.Probe(host, port, timeout)
  133. }
  134. errMsg := fmt.Sprintf("Failed to find probe builder for pod %v, container: %v", pod.GetName(), container.Name)
  135. log.Warningf("%s", errMsg)
  136. return probe.Unknown, "", errors.Error(errMsg)
  137. }
  138. type execInContainer struct {
  139. // run executes a command in a container. Combined stdout and stderr output is always returned. An
  140. // error is returned if one occurred.
  141. run func() ([]byte, error)
  142. writer io.Writer
  143. cmd []string
  144. }
  145. func (pb *prober) newExecInContainer(pod IPod, container *hostapi.ContainerDesc, cmd []string, timeout time.Duration) exec.Cmd {
  146. return &execInContainer{
  147. cmd: cmd,
  148. run: func() ([]byte, error) {
  149. return pb.runner.RunInContainer(pod.GetId(), container.Id, cmd, timeout)
  150. },
  151. }
  152. }
  153. func (eic *execInContainer) Command() []string {
  154. return eic.cmd
  155. }
  156. func (eic *execInContainer) Run() error {
  157. return nil
  158. }
  159. func (eic *execInContainer) CombinedOutput() ([]byte, error) {
  160. return eic.run()
  161. }
  162. func (eic *execInContainer) Output() ([]byte, error) {
  163. return nil, fmt.Errorf("unimplemented")
  164. }
  165. func (eic *execInContainer) SetDir(dir string) {
  166. //unimplemented
  167. }
  168. func (eic *execInContainer) SetStdin(in io.Reader) {
  169. //unimplemented
  170. }
  171. func (eic *execInContainer) SetStdout(out io.Writer) {
  172. eic.writer = out
  173. }
  174. func (eic *execInContainer) SetStderr(out io.Writer) {
  175. eic.writer = out
  176. }
  177. func (eic *execInContainer) SetEnv(env []string) {
  178. //unimplemented
  179. }
  180. func (eic *execInContainer) Stop() {
  181. //unimplemented
  182. }
  183. func (eic *execInContainer) Start() error {
  184. data, err := eic.run()
  185. if eic.writer != nil {
  186. eic.writer.Write(data)
  187. }
  188. return err
  189. }
  190. func (eic *execInContainer) Wait() error {
  191. return nil
  192. }
  193. func (eic *execInContainer) StdoutPipe() (io.ReadCloser, error) {
  194. return nil, fmt.Errorf("unimplemented")
  195. }
  196. func (eic *execInContainer) StderrPipe() (io.ReadCloser, error) {
  197. return nil, fmt.Errorf("unimplemented")
  198. }