| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200 |
- // Copyright 2019 Yunion
- //
- // Licensed under the Apache License, Version 2.0 (the "License");
- // you may not use this file except in compliance with the License.
- // You may obtain a copy of the License at
- //
- // http://www.apache.org/licenses/LICENSE-2.0
- //
- // Unless required by applicable law or agreed to in writing, software
- // distributed under the License is distributed on an "AS IS" BASIS,
- // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- // See the License for the specific language governing permissions and
- // limitations under the License.
- package guestman
- import (
- "context"
- "os"
- "path/filepath"
- "strconv"
- "sync"
- "time"
- "github.com/docker/go-units"
- runtimeapi "k8s.io/cri-api/pkg/apis/runtime/v1"
- "yunion.io/x/log"
- "yunion.io/x/pkg/errors"
- "yunion.io/x/onecloud/pkg/hostman/options"
- )
- const (
- containerLogRotateInterval = 10 * time.Minute
- )
- var (
- containerLogRotateMu sync.Mutex
- )
- // RunContainerLogRotate runs log rotation for all running pod containers once.
- // It is safe to call concurrently; only one run executes at a time.
- func RunContainerLogRotate(ctx context.Context, manager *SGuestManager, maxSizeBytes int64, maxFiles int) {
- if maxSizeBytes <= 0 || maxFiles <= 0 {
- return
- }
- if !containerLogRotateMu.TryLock() {
- return
- }
- defer containerLogRotateMu.Unlock()
- cri := manager.host.GetCRI()
- if cri == nil {
- return
- }
- runtimeClient := cri.GetRuntimeClient()
- if runtimeClient == nil {
- return
- }
- manager.Servers.Range(func(_id, value interface{}) bool {
- select {
- case <-ctx.Done():
- return false
- default:
- }
- pod, ok := value.(PodInstance)
- if !ok {
- return true
- }
- if !pod.IsRunning() {
- return true
- }
- logDir := pod.GetPodLogDir()
- for ctrId, criId := range pod.ListContainerCriIds() {
- if criId == "" {
- continue
- }
- logPath := filepath.Join(logDir, pod.GetContainerLogPath(ctrId))
- if err := rotateContainerLog(ctx, logPath, criId, maxSizeBytes, maxFiles, runtimeClient); err != nil {
- log.Warningf("rotate container log %s (cri %s): %v", logPath, criId, err)
- }
- }
- return true
- })
- }
- // rotateContainerLog rotates the container log file at logPath if it exceeds maxSizeBytes,
- // keeps up to maxFiles (current + rotated), then calls ReopenContainerLog for the container.
- func rotateContainerLog(ctx context.Context, logPath, criId string, maxSizeBytes int64, maxFiles int, runtimeClient runtimeapi.RuntimeServiceClient) error {
- dir := filepath.Dir(logPath)
- base := filepath.Base(logPath)
- // Always try to cleanup stale rotated logs, even if we don't rotate this time.
- cleanupRotatedLogs(dir, base, maxFiles)
- info, err := os.Stat(logPath)
- if err != nil {
- if os.IsNotExist(err) {
- return nil
- }
- return err
- }
- if !info.Mode().IsRegular() {
- return nil
- }
- if info.Size() < maxSizeBytes {
- return nil
- }
- // Rename from high to low so we don't overwrite: .(n-1)->.n, ..., .1->.2, then main->.1
- for i := maxFiles - 1; i >= 2; i-- {
- src := filepath.Join(dir, base+"."+strconv.Itoa(i-1))
- dst := filepath.Join(dir, base+"."+strconv.Itoa(i))
- if _, err := os.Stat(src); err != nil {
- if os.IsNotExist(err) {
- continue
- }
- return err
- }
- if err := os.Rename(src, dst); err != nil {
- log.Warningf("rename %s -> %s: %v", src, dst, err)
- }
- }
- // Then rotate current log to .1
- dst1 := filepath.Join(dir, base+".1")
- if err := os.Rename(logPath, dst1); err != nil {
- return errors.Wrapf(err, "rename %s -> %s", logPath, dst1)
- }
- // Cleanup again after shift.
- cleanupRotatedLogs(dir, base, maxFiles)
- _, err = runtimeClient.ReopenContainerLog(ctx, &runtimeapi.ReopenContainerLogRequest{
- ContainerId: criId,
- })
- if err != nil {
- // If runtime failed to reopen the log, try best to rename back so containerd keeps writing to logPath.
- if _, statErr := os.Stat(logPath); os.IsNotExist(statErr) {
- if rbErr := os.Rename(dst1, logPath); rbErr != nil && !os.IsNotExist(rbErr) {
- log.Warningf("reopen log failed, rename back %s -> %s: %v", dst1, logPath, rbErr)
- }
- }
- return errors.Wrap(err, "ReopenContainerLog")
- }
- return nil
- }
- func cleanupRotatedLogs(dir, base string, maxFiles int) {
- // Keep only .1 .. .(maxFiles-1). Remove .maxFiles and above.
- if maxFiles <= 0 {
- return
- }
- // Stop after some consecutive not-exist to avoid infinite loop.
- miss := 0
- for i := maxFiles; i < maxFiles+100; i++ {
- p := filepath.Join(dir, base+"."+strconv.Itoa(i))
- if err := os.Remove(p); err != nil {
- if os.IsNotExist(err) {
- miss++
- if miss >= 20 {
- return
- }
- continue
- }
- log.Errorf("remove old container log %s: %v", p, err)
- continue
- }
- log.Infof("remove old container log %s", p)
- miss = 0
- }
- }
- // StartContainerLogRotateLoop starts a goroutine that periodically runs container log rotation
- // when options are enabled. Call from guestman after manager and host are ready.
- func StartContainerLogRotateLoop(manager *SGuestManager) {
- maxSizeStr := options.HostOptions.ContainerLogMaxSize
- maxFiles := options.HostOptions.ContainerLogMaxFiles
- if maxSizeStr == "" || maxFiles <= 0 {
- return
- }
- maxSizeBytes, err := units.FromHumanSize(maxSizeStr)
- if err != nil {
- log.Warningf("parse ContainerLogMaxSize %q: %v, disable container log rotate", maxSizeStr, err)
- return
- }
- if maxSizeBytes <= 0 {
- return
- }
- go func() {
- ticker := time.NewTicker(containerLogRotateInterval)
- defer ticker.Stop()
- for range ticker.C {
- ctx, cancel := context.WithTimeout(context.Background(), 2*containerLogRotateInterval)
- RunContainerLogRotate(ctx, manager, maxSizeBytes, maxFiles)
- cancel()
- }
- }()
- log.Infof("container log rotate started: maxSize=%s, maxFiles=%d", maxSizeStr, maxFiles)
- }
|