| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171 |
- //go:build linux
- // +build linux
- // Copyright 2021 Google Inc. All Rights Reserved.
- //
- // Licensed under the Apache License, Version 2.0 (the "License");
- // you may not use this file except in compliance with the License.
- // You may obtain a copy of the License at
- //
- // http://www.apache.org/licenses/LICENSE-2.0
- //
- // Unless required by applicable law or agreed to in writing, software
- // distributed under the License is distributed on an "AS IS" BASIS,
- // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- // See the License for the specific language governing permissions and
- // limitations under the License.
- // Collector of resctrl for a container.
- package resctrl
- import (
- "fmt"
- "os"
- "path/filepath"
- "strings"
- "sync"
- "time"
- "k8s.io/klog/v2"
- info "github.com/google/cadvisor/info/v1"
- )
- const noInterval = 0
- type collector struct {
- id string
- interval time.Duration
- getContainerPids func() ([]string, error)
- resctrlPath string
- running bool
- destroyed bool
- numberOfNUMANodes int
- vendorID string
- mu sync.Mutex
- inHostNamespace bool
- }
- func newCollector(id string, getContainerPids func() ([]string, error), interval time.Duration, numberOfNUMANodes int, vendorID string, inHostNamespace bool) *collector {
- return &collector{id: id, interval: interval, getContainerPids: getContainerPids, numberOfNUMANodes: numberOfNUMANodes,
- vendorID: vendorID, mu: sync.Mutex{}, inHostNamespace: inHostNamespace}
- }
- func (c *collector) setup() error {
- var err error
- c.resctrlPath, err = prepareMonitoringGroup(c.id, c.getContainerPids, c.inHostNamespace)
- if c.interval != noInterval {
- if err != nil {
- klog.Errorf("Failed to setup container %q resctrl collector: %s \n Trying again in next intervals.", c.id, err)
- } else {
- c.running = true
- }
- go func() {
- for {
- time.Sleep(c.interval)
- c.mu.Lock()
- if c.destroyed {
- break
- }
- klog.V(5).Infof("Trying to check %q containers control group.", c.id)
- if c.running {
- err = c.checkMonitoringGroup()
- if err != nil {
- c.running = false
- klog.Errorf("Failed to check %q resctrl collector control group: %s \n Trying again in next intervals.", c.id, err)
- }
- } else {
- c.resctrlPath, err = prepareMonitoringGroup(c.id, c.getContainerPids, c.inHostNamespace)
- if err != nil {
- c.running = false
- klog.Errorf("Failed to setup container %q resctrl collector: %s \n Trying again in next intervals.", c.id, err)
- }
- }
- c.mu.Unlock()
- }
- }()
- } else {
- // There is no interval set, if setup fail, stop.
- if err != nil {
- return fmt.Errorf("failed to setup container %q resctrl collector: %w", c.id, err)
- }
- c.running = true
- }
- return nil
- }
- func (c *collector) checkMonitoringGroup() error {
- newPath, err := prepareMonitoringGroup(c.id, c.getContainerPids, c.inHostNamespace)
- if err != nil {
- return fmt.Errorf("couldn't obtain mon_group path: %v", err)
- }
- // Check if container moved between control groups.
- if newPath != c.resctrlPath {
- err = c.clear()
- if err != nil {
- return fmt.Errorf("couldn't clear previous monitoring group: %w", err)
- }
- c.resctrlPath = newPath
- }
- return nil
- }
- func (c *collector) UpdateStats(stats *info.ContainerStats) error {
- c.mu.Lock()
- defer c.mu.Unlock()
- if c.running {
- stats.Resctrl = info.ResctrlStats{}
- resctrlStats, err := getIntelRDTStatsFrom(c.resctrlPath, c.vendorID)
- if err != nil {
- return err
- }
- stats.Resctrl.MemoryBandwidth = make([]info.MemoryBandwidthStats, 0, c.numberOfNUMANodes)
- stats.Resctrl.Cache = make([]info.CacheStats, 0, c.numberOfNUMANodes)
- for _, numaNodeStats := range *resctrlStats.MBMStats {
- stats.Resctrl.MemoryBandwidth = append(stats.Resctrl.MemoryBandwidth,
- info.MemoryBandwidthStats{
- TotalBytes: numaNodeStats.MBMTotalBytes,
- LocalBytes: numaNodeStats.MBMLocalBytes,
- })
- }
- for _, numaNodeStats := range *resctrlStats.CMTStats {
- stats.Resctrl.Cache = append(stats.Resctrl.Cache,
- info.CacheStats{LLCOccupancy: numaNodeStats.LLCOccupancy})
- }
- }
- return nil
- }
- func (c *collector) Destroy() {
- c.mu.Lock()
- defer c.mu.Unlock()
- c.running = false
- err := c.clear()
- if err != nil {
- klog.Errorf("trying to destroy %q resctrl collector but: %v", c.id, err)
- }
- c.destroyed = true
- }
- func (c *collector) clear() error {
- // Not allowed to remove root or undefined resctrl directory.
- if c.id != rootContainer && c.resctrlPath != "" {
- // Remove only own prepared mon group.
- if strings.HasPrefix(filepath.Base(c.resctrlPath), monGroupPrefix) {
- err := os.RemoveAll(c.resctrlPath)
- if err != nil {
- return fmt.Errorf("couldn't clear mon_group: %v", err)
- }
- }
- }
- return nil
- }
|