pod_logrotate.go 5.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200
  1. // Copyright 2019 Yunion
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. package guestman
  15. import (
  16. "context"
  17. "os"
  18. "path/filepath"
  19. "strconv"
  20. "sync"
  21. "time"
  22. "github.com/docker/go-units"
  23. runtimeapi "k8s.io/cri-api/pkg/apis/runtime/v1"
  24. "yunion.io/x/log"
  25. "yunion.io/x/pkg/errors"
  26. "yunion.io/x/onecloud/pkg/hostman/options"
  27. )
  28. const (
  29. containerLogRotateInterval = 10 * time.Minute
  30. )
  31. var (
  32. containerLogRotateMu sync.Mutex
  33. )
  34. // RunContainerLogRotate runs log rotation for all running pod containers once.
  35. // It is safe to call concurrently; only one run executes at a time.
  36. func RunContainerLogRotate(ctx context.Context, manager *SGuestManager, maxSizeBytes int64, maxFiles int) {
  37. if maxSizeBytes <= 0 || maxFiles <= 0 {
  38. return
  39. }
  40. if !containerLogRotateMu.TryLock() {
  41. return
  42. }
  43. defer containerLogRotateMu.Unlock()
  44. cri := manager.host.GetCRI()
  45. if cri == nil {
  46. return
  47. }
  48. runtimeClient := cri.GetRuntimeClient()
  49. if runtimeClient == nil {
  50. return
  51. }
  52. manager.Servers.Range(func(_id, value interface{}) bool {
  53. select {
  54. case <-ctx.Done():
  55. return false
  56. default:
  57. }
  58. pod, ok := value.(PodInstance)
  59. if !ok {
  60. return true
  61. }
  62. if !pod.IsRunning() {
  63. return true
  64. }
  65. logDir := pod.GetPodLogDir()
  66. for ctrId, criId := range pod.ListContainerCriIds() {
  67. if criId == "" {
  68. continue
  69. }
  70. logPath := filepath.Join(logDir, pod.GetContainerLogPath(ctrId))
  71. if err := rotateContainerLog(ctx, logPath, criId, maxSizeBytes, maxFiles, runtimeClient); err != nil {
  72. log.Warningf("rotate container log %s (cri %s): %v", logPath, criId, err)
  73. }
  74. }
  75. return true
  76. })
  77. }
  78. // rotateContainerLog rotates the container log file at logPath if it exceeds maxSizeBytes,
  79. // keeps up to maxFiles (current + rotated), then calls ReopenContainerLog for the container.
  80. func rotateContainerLog(ctx context.Context, logPath, criId string, maxSizeBytes int64, maxFiles int, runtimeClient runtimeapi.RuntimeServiceClient) error {
  81. dir := filepath.Dir(logPath)
  82. base := filepath.Base(logPath)
  83. // Always try to cleanup stale rotated logs, even if we don't rotate this time.
  84. cleanupRotatedLogs(dir, base, maxFiles)
  85. info, err := os.Stat(logPath)
  86. if err != nil {
  87. if os.IsNotExist(err) {
  88. return nil
  89. }
  90. return err
  91. }
  92. if !info.Mode().IsRegular() {
  93. return nil
  94. }
  95. if info.Size() < maxSizeBytes {
  96. return nil
  97. }
  98. // Rename from high to low so we don't overwrite: .(n-1)->.n, ..., .1->.2, then main->.1
  99. for i := maxFiles - 1; i >= 2; i-- {
  100. src := filepath.Join(dir, base+"."+strconv.Itoa(i-1))
  101. dst := filepath.Join(dir, base+"."+strconv.Itoa(i))
  102. if _, err := os.Stat(src); err != nil {
  103. if os.IsNotExist(err) {
  104. continue
  105. }
  106. return err
  107. }
  108. if err := os.Rename(src, dst); err != nil {
  109. log.Warningf("rename %s -> %s: %v", src, dst, err)
  110. }
  111. }
  112. // Then rotate current log to .1
  113. dst1 := filepath.Join(dir, base+".1")
  114. if err := os.Rename(logPath, dst1); err != nil {
  115. return errors.Wrapf(err, "rename %s -> %s", logPath, dst1)
  116. }
  117. // Cleanup again after shift.
  118. cleanupRotatedLogs(dir, base, maxFiles)
  119. _, err = runtimeClient.ReopenContainerLog(ctx, &runtimeapi.ReopenContainerLogRequest{
  120. ContainerId: criId,
  121. })
  122. if err != nil {
  123. // If runtime failed to reopen the log, try best to rename back so containerd keeps writing to logPath.
  124. if _, statErr := os.Stat(logPath); os.IsNotExist(statErr) {
  125. if rbErr := os.Rename(dst1, logPath); rbErr != nil && !os.IsNotExist(rbErr) {
  126. log.Warningf("reopen log failed, rename back %s -> %s: %v", dst1, logPath, rbErr)
  127. }
  128. }
  129. return errors.Wrap(err, "ReopenContainerLog")
  130. }
  131. return nil
  132. }
  133. func cleanupRotatedLogs(dir, base string, maxFiles int) {
  134. // Keep only .1 .. .(maxFiles-1). Remove .maxFiles and above.
  135. if maxFiles <= 0 {
  136. return
  137. }
  138. // Stop after some consecutive not-exist to avoid infinite loop.
  139. miss := 0
  140. for i := maxFiles; i < maxFiles+100; i++ {
  141. p := filepath.Join(dir, base+"."+strconv.Itoa(i))
  142. if err := os.Remove(p); err != nil {
  143. if os.IsNotExist(err) {
  144. miss++
  145. if miss >= 20 {
  146. return
  147. }
  148. continue
  149. }
  150. log.Errorf("remove old container log %s: %v", p, err)
  151. continue
  152. }
  153. log.Infof("remove old container log %s", p)
  154. miss = 0
  155. }
  156. }
  157. // StartContainerLogRotateLoop starts a goroutine that periodically runs container log rotation
  158. // when options are enabled. Call from guestman after manager and host are ready.
  159. func StartContainerLogRotateLoop(manager *SGuestManager) {
  160. maxSizeStr := options.HostOptions.ContainerLogMaxSize
  161. maxFiles := options.HostOptions.ContainerLogMaxFiles
  162. if maxSizeStr == "" || maxFiles <= 0 {
  163. return
  164. }
  165. maxSizeBytes, err := units.FromHumanSize(maxSizeStr)
  166. if err != nil {
  167. log.Warningf("parse ContainerLogMaxSize %q: %v, disable container log rotate", maxSizeStr, err)
  168. return
  169. }
  170. if maxSizeBytes <= 0 {
  171. return
  172. }
  173. go func() {
  174. ticker := time.NewTicker(containerLogRotateInterval)
  175. defer ticker.Stop()
  176. for range ticker.C {
  177. ctx, cancel := context.WithTimeout(context.Background(), 2*containerLogRotateInterval)
  178. RunContainerLogRotate(ctx, manager, maxSizeBytes, maxFiles)
  179. cancel()
  180. }
  181. }()
  182. log.Infof("container log rotate started: maxSize=%s, maxFiles=%d", maxSizeStr, maxFiles)
  183. }