container.go 33 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071
  1. // Copyright 2014 Google Inc. All Rights Reserved.
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. package v1
  15. import (
  16. "reflect"
  17. "time"
  18. )
  19. type CpuSpec struct {
  20. Limit uint64 `json:"limit"`
  21. MaxLimit uint64 `json:"max_limit"`
  22. Mask string `json:"mask,omitempty"`
  23. Quota uint64 `json:"quota,omitempty"`
  24. Period uint64 `json:"period,omitempty"`
  25. }
  26. type MemorySpec struct {
  27. // The amount of memory requested. Default is unlimited (-1).
  28. // Units: bytes.
  29. Limit uint64 `json:"limit,omitempty"`
  30. // The amount of guaranteed memory. Default is 0.
  31. // Units: bytes.
  32. Reservation uint64 `json:"reservation,omitempty"`
  33. // The amount of swap space requested. Default is unlimited (-1).
  34. // Units: bytes.
  35. SwapLimit uint64 `json:"swap_limit,omitempty"`
  36. }
  37. type ProcessSpec struct {
  38. Limit uint64 `json:"limit,omitempty"`
  39. }
  40. type ContainerSpec struct {
  41. // Time at which the container was created.
  42. CreationTime time.Time `json:"creation_time,omitempty"`
  43. // Metadata labels associated with this container.
  44. Labels map[string]string `json:"labels,omitempty"`
  45. // Metadata envs associated with this container. Only whitelisted envs are added.
  46. Envs map[string]string `json:"envs,omitempty"`
  47. HasCpu bool `json:"has_cpu"`
  48. Cpu CpuSpec `json:"cpu,omitempty"`
  49. HasMemory bool `json:"has_memory"`
  50. Memory MemorySpec `json:"memory,omitempty"`
  51. HasHugetlb bool `json:"has_hugetlb"`
  52. HasNetwork bool `json:"has_network"`
  53. HasProcesses bool `json:"has_processes"`
  54. Processes ProcessSpec `json:"processes,omitempty"`
  55. HasFilesystem bool `json:"has_filesystem"`
  56. // HasDiskIo when true, indicates that DiskIo stats will be available.
  57. HasDiskIo bool `json:"has_diskio"`
  58. HasCustomMetrics bool `json:"has_custom_metrics"`
  59. CustomMetrics []MetricSpec `json:"custom_metrics,omitempty"`
  60. // Image name used for this container.
  61. Image string `json:"image,omitempty"`
  62. }
  63. // Container reference contains enough information to uniquely identify a container
  64. type ContainerReference struct {
  65. // The container id
  66. Id string `json:"id,omitempty"`
  67. // The absolute name of the container. This is unique on the machine.
  68. Name string `json:"name"`
  69. // Other names by which the container is known within a certain namespace.
  70. // This is unique within that namespace.
  71. Aliases []string `json:"aliases,omitempty"`
  72. // Namespace under which the aliases of a container are unique.
  73. // An example of a namespace is "docker" for Docker containers.
  74. Namespace string `json:"namespace,omitempty"`
  75. }
  76. // Sorts by container name.
  77. type ContainerReferenceSlice []ContainerReference
  78. func (s ContainerReferenceSlice) Len() int { return len(s) }
  79. func (s ContainerReferenceSlice) Swap(i, j int) { s[i], s[j] = s[j], s[i] }
  80. func (s ContainerReferenceSlice) Less(i, j int) bool { return s[i].Name < s[j].Name }
  81. // ContainerInfoRequest is used when users check a container info from the REST API.
  82. // It specifies how much data users want to get about a container
  83. type ContainerInfoRequest struct {
  84. // Max number of stats to return. Specify -1 for all stats currently available.
  85. // Default: 60
  86. NumStats int `json:"num_stats,omitempty"`
  87. // Start time for which to query information.
  88. // If omitted, the beginning of time is assumed.
  89. Start time.Time `json:"start,omitempty"`
  90. // End time for which to query information.
  91. // If omitted, current time is assumed.
  92. End time.Time `json:"end,omitempty"`
  93. }
  94. // Returns a ContainerInfoRequest with all default values specified.
  95. func DefaultContainerInfoRequest() ContainerInfoRequest {
  96. return ContainerInfoRequest{
  97. NumStats: 60,
  98. }
  99. }
  100. func (r *ContainerInfoRequest) Equals(other ContainerInfoRequest) bool {
  101. return r.NumStats == other.NumStats &&
  102. r.Start.Equal(other.Start) &&
  103. r.End.Equal(other.End)
  104. }
  105. type ContainerInfo struct {
  106. ContainerReference
  107. // The direct subcontainers of the current container.
  108. Subcontainers []ContainerReference `json:"subcontainers,omitempty"`
  109. // The isolation used in the container.
  110. Spec ContainerSpec `json:"spec,omitempty"`
  111. // Historical statistics gathered from the container.
  112. Stats []*ContainerStats `json:"stats,omitempty"`
  113. }
  114. // TODO(vmarmol): Refactor to not need this equality comparison.
  115. // ContainerInfo may be (un)marshaled by json or other en/decoder. In that
  116. // case, the Timestamp field in each stats/sample may not be precisely
  117. // en/decoded. This will lead to small but acceptable differences between a
  118. // ContainerInfo and its encode-then-decode version. Eq() is used to compare
  119. // two ContainerInfo accepting small difference (<10ms) of Time fields.
  120. func (ci *ContainerInfo) Eq(b *ContainerInfo) bool {
  121. // If both ci and b are nil, then Eq() returns true
  122. if ci == nil {
  123. return b == nil
  124. }
  125. if b == nil {
  126. return ci == nil
  127. }
  128. // For fields other than time.Time, we will compare them precisely.
  129. // This would require that any slice should have same order.
  130. if !reflect.DeepEqual(ci.ContainerReference, b.ContainerReference) {
  131. return false
  132. }
  133. if !reflect.DeepEqual(ci.Subcontainers, b.Subcontainers) {
  134. return false
  135. }
  136. if !ci.Spec.Eq(&b.Spec) {
  137. return false
  138. }
  139. for i, expectedStats := range b.Stats {
  140. selfStats := ci.Stats[i]
  141. if !expectedStats.Eq(selfStats) {
  142. return false
  143. }
  144. }
  145. return true
  146. }
  147. func (s *ContainerSpec) Eq(b *ContainerSpec) bool {
  148. // Creation within 1s of each other.
  149. diff := s.CreationTime.Sub(b.CreationTime)
  150. if (diff > time.Second) || (diff < -time.Second) {
  151. return false
  152. }
  153. if s.HasCpu != b.HasCpu {
  154. return false
  155. }
  156. if !reflect.DeepEqual(s.Cpu, b.Cpu) {
  157. return false
  158. }
  159. if s.HasMemory != b.HasMemory {
  160. return false
  161. }
  162. if !reflect.DeepEqual(s.Memory, b.Memory) {
  163. return false
  164. }
  165. if s.HasHugetlb != b.HasHugetlb {
  166. return false
  167. }
  168. if s.HasNetwork != b.HasNetwork {
  169. return false
  170. }
  171. if s.HasProcesses != b.HasProcesses {
  172. return false
  173. }
  174. if s.HasFilesystem != b.HasFilesystem {
  175. return false
  176. }
  177. if s.HasDiskIo != b.HasDiskIo {
  178. return false
  179. }
  180. if s.HasCustomMetrics != b.HasCustomMetrics {
  181. return false
  182. }
  183. if s.Image != b.Image {
  184. return false
  185. }
  186. return true
  187. }
  188. func (ci *ContainerInfo) StatsAfter(ref time.Time) []*ContainerStats {
  189. n := len(ci.Stats) + 1
  190. for i, s := range ci.Stats {
  191. if s.Timestamp.After(ref) {
  192. n = i
  193. break
  194. }
  195. }
  196. if n > len(ci.Stats) {
  197. return nil
  198. }
  199. return ci.Stats[n:]
  200. }
  201. func (ci *ContainerInfo) StatsStartTime() time.Time {
  202. var ret time.Time
  203. for _, s := range ci.Stats {
  204. if s.Timestamp.Before(ret) || ret.IsZero() {
  205. ret = s.Timestamp
  206. }
  207. }
  208. return ret
  209. }
  210. func (ci *ContainerInfo) StatsEndTime() time.Time {
  211. var ret time.Time
  212. for i := len(ci.Stats) - 1; i >= 0; i-- {
  213. s := ci.Stats[i]
  214. if s.Timestamp.After(ret) {
  215. ret = s.Timestamp
  216. }
  217. }
  218. return ret
  219. }
  220. // This mirrors kernel internal structure.
  221. type LoadStats struct {
  222. // Number of sleeping tasks.
  223. NrSleeping uint64 `json:"nr_sleeping"`
  224. // Number of running tasks.
  225. NrRunning uint64 `json:"nr_running"`
  226. // Number of tasks in stopped state
  227. NrStopped uint64 `json:"nr_stopped"`
  228. // Number of tasks in uninterruptible state
  229. NrUninterruptible uint64 `json:"nr_uninterruptible"`
  230. // Number of tasks waiting on IO
  231. NrIoWait uint64 `json:"nr_io_wait"`
  232. }
  233. // CPU usage time statistics.
  234. type CpuUsage struct {
  235. // Total CPU usage.
  236. // Unit: nanoseconds.
  237. Total uint64 `json:"total"`
  238. // Per CPU/core usage of the container.
  239. // Unit: nanoseconds.
  240. PerCpu []uint64 `json:"per_cpu_usage,omitempty"`
  241. // Time spent in user space.
  242. // Unit: nanoseconds.
  243. User uint64 `json:"user"`
  244. // Time spent in kernel space.
  245. // Unit: nanoseconds.
  246. System uint64 `json:"system"`
  247. }
  248. // Cpu Completely Fair Scheduler statistics.
  249. type CpuCFS struct {
  250. // Total number of elapsed enforcement intervals.
  251. Periods uint64 `json:"periods"`
  252. // Total number of times tasks in the cgroup have been throttled.
  253. ThrottledPeriods uint64 `json:"throttled_periods"`
  254. // Total time duration for which tasks in the cgroup have been throttled.
  255. // Unit: nanoseconds.
  256. ThrottledTime uint64 `json:"throttled_time"`
  257. }
  258. // Cpu Aggregated scheduler statistics
  259. type CpuSchedstat struct {
  260. // https://www.kernel.org/doc/Documentation/scheduler/sched-stats.txt
  261. // time spent on the cpu
  262. RunTime uint64 `json:"run_time"`
  263. // time spent waiting on a runqueue
  264. RunqueueTime uint64 `json:"runqueue_time"`
  265. // # of timeslices run on this cpu
  266. RunPeriods uint64 `json:"run_periods"`
  267. }
  268. // All CPU usage metrics are cumulative from the creation of the container
  269. type CpuStats struct {
  270. Usage CpuUsage `json:"usage"`
  271. CFS CpuCFS `json:"cfs"`
  272. Schedstat CpuSchedstat `json:"schedstat"`
  273. // Smoothed average of number of runnable threads x 1000.
  274. // We multiply by thousand to avoid using floats, but preserving precision.
  275. // Load is smoothed over the last 10 seconds. Instantaneous value can be read
  276. // from LoadStats.NrRunning.
  277. LoadAverage int32 `json:"load_average"`
  278. }
  279. type PerDiskStats struct {
  280. Device string `json:"device"`
  281. Major uint64 `json:"major"`
  282. Minor uint64 `json:"minor"`
  283. Stats map[string]uint64 `json:"stats"`
  284. }
  285. type DiskIoStats struct {
  286. IoServiceBytes []PerDiskStats `json:"io_service_bytes,omitempty"`
  287. IoServiced []PerDiskStats `json:"io_serviced,omitempty"`
  288. IoQueued []PerDiskStats `json:"io_queued,omitempty"`
  289. Sectors []PerDiskStats `json:"sectors,omitempty"`
  290. IoServiceTime []PerDiskStats `json:"io_service_time,omitempty"`
  291. IoWaitTime []PerDiskStats `json:"io_wait_time,omitempty"`
  292. IoMerged []PerDiskStats `json:"io_merged,omitempty"`
  293. IoTime []PerDiskStats `json:"io_time,omitempty"`
  294. }
  295. type HugetlbStats struct {
  296. // current res_counter usage for hugetlb
  297. Usage uint64 `json:"usage,omitempty"`
  298. // maximum usage ever recorded.
  299. MaxUsage uint64 `json:"max_usage,omitempty"`
  300. // number of times hugetlb usage allocation failure.
  301. Failcnt uint64 `json:"failcnt"`
  302. }
  303. type MemoryStats struct {
  304. // Current memory usage, this includes all memory regardless of when it was
  305. // accessed.
  306. // Units: Bytes.
  307. Usage uint64 `json:"usage"`
  308. // Maximum memory usage recorded.
  309. // Units: Bytes.
  310. MaxUsage uint64 `json:"max_usage"`
  311. // Number of bytes of page cache memory.
  312. // Units: Bytes.
  313. Cache uint64 `json:"cache"`
  314. // The amount of anonymous and swap cache memory (includes transparent
  315. // hugepages).
  316. // Units: Bytes.
  317. RSS uint64 `json:"rss"`
  318. // The amount of swap currently used by the processes in this cgroup
  319. // Units: Bytes.
  320. Swap uint64 `json:"swap"`
  321. // The amount of memory used for mapped files (includes tmpfs/shmem)
  322. MappedFile uint64 `json:"mapped_file"`
  323. // The amount of working set memory, this includes recently accessed memory,
  324. // dirty memory, and kernel memory. Working set is <= "usage".
  325. // Units: Bytes.
  326. WorkingSet uint64 `json:"working_set"`
  327. Failcnt uint64 `json:"failcnt"`
  328. ContainerData MemoryStatsMemoryData `json:"container_data,omitempty"`
  329. HierarchicalData MemoryStatsMemoryData `json:"hierarchical_data,omitempty"`
  330. }
  331. type CPUSetStats struct {
  332. MemoryMigrate uint64 `json:"memory_migrate"`
  333. }
  334. type MemoryNumaStats struct {
  335. File map[uint8]uint64 `json:"file,omitempty"`
  336. Anon map[uint8]uint64 `json:"anon,omitempty"`
  337. Unevictable map[uint8]uint64 `json:"unevictable,omitempty"`
  338. }
  339. type MemoryStatsMemoryData struct {
  340. Pgfault uint64 `json:"pgfault"`
  341. Pgmajfault uint64 `json:"pgmajfault"`
  342. NumaStats MemoryNumaStats `json:"numa_stats,omitempty"`
  343. }
  344. type InterfaceStats struct {
  345. // The name of the interface.
  346. Name string `json:"name"`
  347. // Cumulative count of bytes received.
  348. RxBytes uint64 `json:"rx_bytes"`
  349. // Cumulative count of packets received.
  350. RxPackets uint64 `json:"rx_packets"`
  351. // Cumulative count of receive errors encountered.
  352. RxErrors uint64 `json:"rx_errors"`
  353. // Cumulative count of packets dropped while receiving.
  354. RxDropped uint64 `json:"rx_dropped"`
  355. // Cumulative count of bytes transmitted.
  356. TxBytes uint64 `json:"tx_bytes"`
  357. // Cumulative count of packets transmitted.
  358. TxPackets uint64 `json:"tx_packets"`
  359. // Cumulative count of transmit errors encountered.
  360. TxErrors uint64 `json:"tx_errors"`
  361. // Cumulative count of packets dropped while transmitting.
  362. TxDropped uint64 `json:"tx_dropped"`
  363. }
  364. type NetworkStats struct {
  365. InterfaceStats `json:",inline"`
  366. Interfaces []InterfaceStats `json:"interfaces,omitempty"`
  367. // TCP connection stats (Established, Listen...)
  368. Tcp TcpStat `json:"tcp"`
  369. // TCP6 connection stats (Established, Listen...)
  370. Tcp6 TcpStat `json:"tcp6"`
  371. // UDP connection stats
  372. Udp UdpStat `json:"udp"`
  373. // UDP6 connection stats
  374. Udp6 UdpStat `json:"udp6"`
  375. // TCP advanced stats
  376. TcpAdvanced TcpAdvancedStat `json:"tcp_advanced"`
  377. }
  378. type TcpStat struct {
  379. // Count of TCP connections in state "Established"
  380. Established uint64
  381. // Count of TCP connections in state "Syn_Sent"
  382. SynSent uint64
  383. // Count of TCP connections in state "Syn_Recv"
  384. SynRecv uint64
  385. // Count of TCP connections in state "Fin_Wait1"
  386. FinWait1 uint64
  387. // Count of TCP connections in state "Fin_Wait2"
  388. FinWait2 uint64
  389. // Count of TCP connections in state "Time_Wait
  390. TimeWait uint64
  391. // Count of TCP connections in state "Close"
  392. Close uint64
  393. // Count of TCP connections in state "Close_Wait"
  394. CloseWait uint64
  395. // Count of TCP connections in state "Listen_Ack"
  396. LastAck uint64
  397. // Count of TCP connections in state "Listen"
  398. Listen uint64
  399. // Count of TCP connections in state "Closing"
  400. Closing uint64
  401. }
  402. type TcpAdvancedStat struct {
  403. // The algorithm used to determine the timeout value used for
  404. // retransmitting unacknowledged octets, ref: RFC2698, default 1
  405. RtoAlgorithm uint64
  406. // The minimum value permitted by a TCP implementation for the
  407. // retransmission timeout, measured in milliseconds, default 200ms
  408. RtoMin uint64
  409. // The maximum value permitted by a TCP implementation for the
  410. // retransmission timeout, measured in milliseconds, default 120s
  411. RtoMax uint64
  412. // The limit on the total number of TCP connections the entity
  413. // can support., default -1, i.e. infinity
  414. MaxConn int64
  415. // The number of times TCP connections have made a direct
  416. // transition to the SYN-SENT state from the CLOSED state.
  417. ActiveOpens uint64
  418. // The number of times TCP connections have made a direct
  419. // transition to the SYN-RCVD state from the LISTEN state.
  420. PassiveOpens uint64
  421. // The number of times TCP connections have made a direct
  422. // transition to the CLOSED state from either the SYN-SENT
  423. // state or the SYN-RCVD state, plus the number of times TCP
  424. // connections have made a direct transition to the LISTEN
  425. // state from the SYN-RCVD state.
  426. AttemptFails uint64
  427. // The number of times TCP connections have made a direct
  428. // transition to the CLOSED state from either the ESTABLISHED
  429. // state or the CLOSE-WAIT state.
  430. EstabResets uint64
  431. // The number of TCP connections for which the current state
  432. // is either ESTABLISHED or CLOSE- WAIT.
  433. CurrEstab uint64
  434. // The total number of segments received, including those
  435. // received in error.
  436. InSegs uint64
  437. // The total number of segments sent, including those on
  438. // current connections but excluding those containing only
  439. // retransmitted octets.
  440. OutSegs uint64
  441. // The total number of segments retransmitted - that is, the
  442. // number of TCP segments transmitted containing one or more
  443. // previously transmitted octets.
  444. RetransSegs uint64
  445. // The total number of segments received in error (e.g., bad
  446. // TCP checksums).
  447. InErrs uint64
  448. // The number of TCP segments sent containing the RST flag.
  449. OutRsts uint64
  450. // The number of IP Packets with checksum errors
  451. InCsumErrors uint64
  452. // The number of resets received for embryonic SYN_RECV sockets
  453. EmbryonicRsts uint64
  454. // The number of SYN cookies sent
  455. SyncookiesSent uint64
  456. // The number of SYN cookies received
  457. SyncookiesRecv uint64
  458. // The number of invalid SYN cookies received
  459. SyncookiesFailed uint64
  460. // The number of packets pruned from receive queue because of socket buffer overrun
  461. PruneCalled uint64
  462. // The number of packets pruned from receive queue
  463. RcvPruned uint64
  464. // The number of packets dropped from out-of-order queue because of socket buffer overrun
  465. OfoPruned uint64
  466. // The number of ICMP packets dropped because they were out-of-window
  467. OutOfWindowIcmps uint64
  468. // The number of ICMP packets dropped because socket was locked
  469. LockDroppedIcmps uint64
  470. // The number of TCP sockets finished time wait in fast timer
  471. TW uint64
  472. // The number of time wait sockets recycled by time stamp
  473. TWRecycled uint64
  474. // The number of TCP sockets finished time wait in slow timer
  475. TWKilled uint64
  476. // counter, if no more mem for TIME-WAIT struct, +1
  477. TCPTimeWaitOverflow uint64
  478. // The number of RTO timer first timeout times
  479. TCPTimeouts uint64
  480. // The number of fake timeouts detected by F-RTO
  481. TCPSpuriousRTOs uint64
  482. // The number of send Tail Loss Probe (TLP) times by Probe Timeout(PTO)
  483. TCPLossProbes uint64
  484. // The number of recovery times by TLP
  485. TCPLossProbeRecovery uint64
  486. // The number of RTO failed times when in Recovery state, and remote end has no sack
  487. TCPRenoRecoveryFail uint64
  488. // The number of RTO failed times when in Recovery state, and remote end has sack
  489. TCPSackRecoveryFail uint64
  490. // The number of RTO failed times when in TCP_CA_Disorder state, and remote end has no sack
  491. TCPRenoFailures uint64
  492. // The number of RTO failed times when in TCP_CA_Disorder state, and remote end has sack
  493. TCPSackFailures uint64
  494. // The number of RTO failed times when in TCP_CA_Loss state,
  495. TCPLossFailures uint64
  496. // The number of delayed acks sent
  497. DelayedACKs uint64
  498. // The number of delayed acks further delayed because of locked socket
  499. DelayedACKLocked uint64
  500. // The number of quick ack mode was activated times
  501. DelayedACKLost uint64
  502. // The number of times the listen queue of a socket overflowed
  503. ListenOverflows uint64
  504. // The number of SYNs to LISTEN sockets dropped
  505. ListenDrops uint64
  506. // The number of packet headers predicted
  507. TCPHPHits uint64
  508. // The number of acknowledgments not containing data payload received
  509. TCPPureAcks uint64
  510. // The number of predicted acknowledgments
  511. TCPHPAcks uint64
  512. // The number of times recovered from packet loss due to fast retransmit
  513. TCPRenoRecovery uint64
  514. // The number of SACK retransmits failed
  515. TCPSackRecovery uint64
  516. // The number of bad SACK blocks received
  517. TCPSACKReneging uint64
  518. // The number of detected reordering times using FACK
  519. TCPFACKReorder uint64
  520. // The number of detected reordering times using SACK
  521. TCPSACKReorder uint64
  522. // The number of detected reordering times using Reno
  523. TCPRenoReorder uint64
  524. // The number of detected reordering times using time stamp
  525. TCPTSReorder uint64
  526. // The number of congestion windows fully recovered without slow start
  527. TCPFullUndo uint64
  528. // The number of congestion windows partially recovered using Hoe heuristic
  529. TCPPartialUndo uint64
  530. // The number of congestion windows recovered without slow start by DSACK
  531. TCPDSACKUndo uint64
  532. // The number of congestion windows recovered without slow start after partial ack
  533. TCPLossUndo uint64
  534. // The number of fast retransmits
  535. TCPFastRetrans uint64
  536. // The number of retransmits in slow start
  537. TCPSlowStartRetrans uint64
  538. // The number of retransmits lost
  539. TCPLostRetransmit uint64
  540. // The number of retransmits failed, including FastRetrans, SlowStartRetrans
  541. TCPRetransFail uint64
  542. // he number of packets collapsed in receive queue due to low socket buffer
  543. TCPRcvCollapsed uint64
  544. // The number of DSACKs sent for old packets
  545. TCPDSACKOldSent uint64
  546. // The number of DSACKs sent for out of order packets
  547. TCPDSACKOfoSent uint64
  548. // The number of DSACKs received
  549. TCPDSACKRecv uint64
  550. // The number of DSACKs for out of order packets received
  551. TCPDSACKOfoRecv uint64
  552. // The number of connections reset due to unexpected data
  553. TCPAbortOnData uint64
  554. // The number of connections reset due to early user close
  555. TCPAbortOnClose uint64
  556. // The number of connections aborted due to memory pressure
  557. TCPAbortOnMemory uint64
  558. // The number of connections aborted due to timeout
  559. TCPAbortOnTimeout uint64
  560. // The number of connections aborted after user close in linger timeout
  561. TCPAbortOnLinger uint64
  562. // The number of times unable to send RST due to no memory
  563. TCPAbortFailed uint64
  564. // The number of TCP ran low on memory times
  565. TCPMemoryPressures uint64
  566. // The number of TCP cumulative duration of
  567. // memory pressure events, by ms
  568. TCPMemoryPressuresChrono uint64
  569. // The number of SACKs discard
  570. TCPSACKDiscard uint64
  571. // The number of DSACKs ignore old
  572. TCPDSACKIgnoredOld uint64
  573. // The number of DSACKs ignore no undo
  574. TCPDSACKIgnoredNoUndo uint64
  575. // The number of MD5 not found
  576. TCPMD5NotFound uint64
  577. // The number of MD5 unexpected
  578. TCPMD5Unexpected uint64
  579. // The number of MD5 failed
  580. TCPMD5Failure uint64
  581. // The number of Sack shifted
  582. TCPSackShifted uint64
  583. // The number of Sack merged
  584. TCPSackMerged uint64
  585. // The number of Sack shift fall back
  586. TCPSackShiftFallback uint64
  587. // The number of Backlog drop
  588. TCPBacklogDrop uint64
  589. // The number of PFmemalloc drop
  590. PFMemallocDrop uint64
  591. // The number of memalloc drop
  592. TCPMinTTLDrop uint64
  593. // The number of DeferAccept drop
  594. TCPDeferAcceptDrop uint64
  595. // The number of IP reverse path filter
  596. IPReversePathFilter uint64
  597. // The number of request full do cookies
  598. TCPReqQFullDoCookies uint64
  599. // The number of request full drop
  600. TCPReqQFullDrop uint64
  601. // number of successful outbound TFO connections
  602. TCPFastOpenActive uint64
  603. // number of SYN-ACK packets received that did not acknowledge data
  604. // sent in the SYN packet and caused a retransmissions without SYN data.
  605. TCPFastOpenActiveFail uint64
  606. // number of successful inbound TFO connections
  607. TCPFastOpenPassive uint64
  608. // number of inbound SYN packets with TFO cookie that was invalid
  609. TCPFastOpenPassiveFail uint64
  610. // number of inbound SYN packets that will have TFO disabled because
  611. // the socket has exceeded the max queue length
  612. TCPFastOpenListenOverflow uint64
  613. // number of inbound SYN packets requesting TFO with TFO set but no cookie
  614. TCPFastOpenCookieReqd uint64
  615. // number of SYN and SYN/ACK retransmits to break down retransmissions
  616. // into SYN, fast-retransmits, timeout retransmits, etc.
  617. TCPSynRetrans uint64
  618. // number of outgoing packets with original data
  619. // (excluding retransmission but including data-in-SYN).
  620. TCPOrigDataSent uint64
  621. // The number of active connections rejected because of time stamp
  622. PAWSActive uint64
  623. // The number of packetes rejected in established connections because of timestamp
  624. PAWSEstab uint64
  625. }
  626. type UdpStat struct {
  627. // Count of UDP sockets in state "Listen"
  628. Listen uint64
  629. // Count of UDP packets dropped by the IP stack
  630. Dropped uint64
  631. // Count of packets Queued for Receieve
  632. RxQueued uint64
  633. // Count of packets Queued for Transmit
  634. TxQueued uint64
  635. }
  636. type FsStats struct {
  637. // The block device name associated with the filesystem.
  638. Device string `json:"device,omitempty"`
  639. // Type of the filesytem.
  640. Type string `json:"type"`
  641. // Number of bytes that can be consumed by the container on this filesystem.
  642. Limit uint64 `json:"capacity"`
  643. // Number of bytes that is consumed by the container on this filesystem.
  644. Usage uint64 `json:"usage"`
  645. // Base Usage that is consumed by the container's writable layer.
  646. // This field is only applicable for docker container's as of now.
  647. BaseUsage uint64 `json:"base_usage"`
  648. // Number of bytes available for non-root user.
  649. Available uint64 `json:"available"`
  650. // HasInodes when true, indicates that Inodes info will be available.
  651. HasInodes bool `json:"has_inodes"`
  652. // Number of Inodes
  653. Inodes uint64 `json:"inodes"`
  654. // Number of available Inodes
  655. InodesFree uint64 `json:"inodes_free"`
  656. // Number of reads completed
  657. // This is the total number of reads completed successfully.
  658. ReadsCompleted uint64 `json:"reads_completed"`
  659. // Number of reads merged
  660. // Reads and writes which are adjacent to each other may be merged for
  661. // efficiency. Thus two 4K reads may become one 8K read before it is
  662. // ultimately handed to the disk, and so it will be counted (and queued)
  663. // as only one I/O. This field lets you know how often this was done.
  664. ReadsMerged uint64 `json:"reads_merged"`
  665. // Number of sectors read
  666. // This is the total number of sectors read successfully.
  667. SectorsRead uint64 `json:"sectors_read"`
  668. // Number of milliseconds spent reading
  669. // This is the total number of milliseconds spent by all reads (as
  670. // measured from __make_request() to end_that_request_last()).
  671. ReadTime uint64 `json:"read_time"`
  672. // Number of writes completed
  673. // This is the total number of writes completed successfully.
  674. WritesCompleted uint64 `json:"writes_completed"`
  675. // Number of writes merged
  676. // See the description of reads merged.
  677. WritesMerged uint64 `json:"writes_merged"`
  678. // Number of sectors written
  679. // This is the total number of sectors written successfully.
  680. SectorsWritten uint64 `json:"sectors_written"`
  681. // Number of milliseconds spent writing
  682. // This is the total number of milliseconds spent by all writes (as
  683. // measured from __make_request() to end_that_request_last()).
  684. WriteTime uint64 `json:"write_time"`
  685. // Number of I/Os currently in progress
  686. // The only field that should go to zero. Incremented as requests are
  687. // given to appropriate struct request_queue and decremented as they finish.
  688. IoInProgress uint64 `json:"io_in_progress"`
  689. // Number of milliseconds spent doing I/Os
  690. // This field increases so long as field 9 is nonzero.
  691. IoTime uint64 `json:"io_time"`
  692. // weighted number of milliseconds spent doing I/Os
  693. // This field is incremented at each I/O start, I/O completion, I/O
  694. // merge, or read of these stats by the number of I/Os in progress
  695. // (field 9) times the number of milliseconds spent doing I/O since the
  696. // last update of this field. This can provide an easy measure of both
  697. // I/O completion time and the backlog that may be accumulating.
  698. WeightedIoTime uint64 `json:"weighted_io_time"`
  699. }
  700. type AcceleratorStats struct {
  701. // Make of the accelerator (nvidia, amd, google etc.)
  702. Make string `json:"make"`
  703. // Model of the accelerator (tesla-p100, tesla-k80 etc.)
  704. Model string `json:"model"`
  705. // ID of the accelerator.
  706. ID string `json:"id"`
  707. // Total accelerator memory.
  708. // unit: bytes
  709. MemoryTotal uint64 `json:"memory_total"`
  710. // Total accelerator memory allocated.
  711. // unit: bytes
  712. MemoryUsed uint64 `json:"memory_used"`
  713. // Percent of time over the past sample period during which
  714. // the accelerator was actively processing.
  715. DutyCycle uint64 `json:"duty_cycle"`
  716. }
  717. // PerfStat represents value of a single monitored perf event.
  718. type PerfStat struct {
  719. PerfValue
  720. // CPU that perf event was measured on.
  721. Cpu int `json:"cpu"`
  722. }
  723. type PerfValue struct {
  724. // Indicates scaling ratio for an event: time_running/time_enabled
  725. // (amount of time that event was being measured divided by
  726. // amount of time that event was enabled for).
  727. // value 1.0 indicates that no multiplexing occurred. Value close
  728. // to 0 indicates that event was measured for short time and event's
  729. // value might be inaccurate.
  730. // See: https://lwn.net/Articles/324756/
  731. ScalingRatio float64 `json:"scaling_ratio"`
  732. // Value represents value of perf event retrieved from OS. It is
  733. // normalized against ScalingRatio and takes multiplexing into
  734. // consideration.
  735. Value uint64 `json:"value"`
  736. // Name is human readable name of an event.
  737. Name string `json:"name"`
  738. }
  739. // MemoryBandwidthStats corresponds to MBM (Memory Bandwidth Monitoring).
  740. // See: https://01.org/cache-monitoring-technology
  741. // See: https://www.kernel.org/doc/Documentation/x86/intel_rdt_ui.txt
  742. type MemoryBandwidthStats struct {
  743. // The 'mbm_total_bytes'.
  744. TotalBytes uint64 `json:"mbm_total_bytes,omitempty"`
  745. // The 'mbm_local_bytes'.
  746. LocalBytes uint64 `json:"mbm_local_bytes,omitempty"`
  747. }
  748. // CacheStats corresponds to CMT (Cache Monitoring Technology).
  749. // See: https://01.org/cache-monitoring-technology
  750. // See: https://www.kernel.org/doc/Documentation/x86/intel_rdt_ui.txt
  751. type CacheStats struct {
  752. // The 'llc_occupancy'.
  753. LLCOccupancy uint64 `json:"llc_occupancy,omitempty"`
  754. }
  755. // ResctrlStats corresponds to statistics from Resource Control.
  756. type ResctrlStats struct {
  757. // Each NUMA Node statistics corresponds to one element in the array.
  758. MemoryBandwidth []MemoryBandwidthStats `json:"memory_bandwidth,omitempty"`
  759. Cache []CacheStats `json:"cache,omitempty"`
  760. }
  761. // PerfUncoreStat represents value of a single monitored perf uncore event.
  762. type PerfUncoreStat struct {
  763. PerfValue
  764. // Socket that perf event was measured on.
  765. Socket int `json:"socket"`
  766. // PMU is Performance Monitoring Unit which collected these stats.
  767. PMU string `json:"pmu"`
  768. }
  769. type UlimitSpec struct {
  770. Name string `json:"name"`
  771. SoftLimit int64 `json:"soft_limit"`
  772. HardLimit int64 `json:"hard_limit"`
  773. }
  774. type ProcessStats struct {
  775. // Number of processes
  776. ProcessCount uint64 `json:"process_count"`
  777. // Number of open file descriptors
  778. FdCount uint64 `json:"fd_count"`
  779. // Number of sockets
  780. SocketCount uint64 `json:"socket_count"`
  781. // Number of threads currently in container
  782. ThreadsCurrent uint64 `json:"threads_current,omitempty"`
  783. // Maxium number of threads allowed in container
  784. ThreadsMax uint64 `json:"threads_max,omitempty"`
  785. // Ulimits for the top-level container process
  786. Ulimits []UlimitSpec `json:"ulimits,omitempty"`
  787. }
  788. type ContainerStats struct {
  789. // The time of this stat point.
  790. Timestamp time.Time `json:"timestamp"`
  791. Cpu CpuStats `json:"cpu,omitempty"`
  792. DiskIo DiskIoStats `json:"diskio,omitempty"`
  793. Memory MemoryStats `json:"memory,omitempty"`
  794. Hugetlb map[string]HugetlbStats `json:"hugetlb,omitempty"`
  795. Network NetworkStats `json:"network,omitempty"`
  796. // Filesystem statistics
  797. Filesystem []FsStats `json:"filesystem,omitempty"`
  798. // Task load stats
  799. TaskStats LoadStats `json:"task_stats,omitempty"`
  800. // Metrics for Accelerators. Each Accelerator corresponds to one element in the array.
  801. Accelerators []AcceleratorStats `json:"accelerators,omitempty"`
  802. // ProcessStats for Containers
  803. Processes ProcessStats `json:"processes,omitempty"`
  804. // Custom metrics from all collectors
  805. CustomMetrics map[string][]MetricVal `json:"custom_metrics,omitempty"`
  806. // Statistics originating from perf events
  807. PerfStats []PerfStat `json:"perf_stats,omitempty"`
  808. // Statistics originating from perf uncore events.
  809. // Applies only for root container.
  810. PerfUncoreStats []PerfUncoreStat `json:"perf_uncore_stats,omitempty"`
  811. // Referenced memory
  812. ReferencedMemory uint64 `json:"referenced_memory,omitempty"`
  813. // Resource Control (resctrl) statistics
  814. Resctrl ResctrlStats `json:"resctrl,omitempty"`
  815. CpuSet CPUSetStats `json:"cpuset,omitempty"`
  816. OOMEvents uint64 `json:"oom_events,omitempty"`
  817. }
  818. func timeEq(t1, t2 time.Time, tolerance time.Duration) bool {
  819. // t1 should not be later than t2
  820. if t1.After(t2) {
  821. t1, t2 = t2, t1
  822. }
  823. diff := t2.Sub(t1)
  824. return diff <= tolerance
  825. }
  826. const (
  827. // 10ms, i.e. 0.01s
  828. timePrecision time.Duration = 10 * time.Millisecond
  829. )
  830. // This function is useful because we do not require precise time
  831. // representation.
  832. func (a *ContainerStats) Eq(b *ContainerStats) bool {
  833. if !timeEq(a.Timestamp, b.Timestamp, timePrecision) {
  834. return false
  835. }
  836. return a.StatsEq(b)
  837. }
  838. // Checks equality of the stats values.
  839. func (a *ContainerStats) StatsEq(b *ContainerStats) bool {
  840. // TODO(vmarmol): Consider using this through reflection.
  841. if !reflect.DeepEqual(a.Cpu, b.Cpu) {
  842. return false
  843. }
  844. if !reflect.DeepEqual(a.Memory, b.Memory) {
  845. return false
  846. }
  847. if !reflect.DeepEqual(a.Hugetlb, b.Hugetlb) {
  848. return false
  849. }
  850. if !reflect.DeepEqual(a.DiskIo, b.DiskIo) {
  851. return false
  852. }
  853. if !reflect.DeepEqual(a.Network, b.Network) {
  854. return false
  855. }
  856. if !reflect.DeepEqual(a.Processes, b.Processes) {
  857. return false
  858. }
  859. if !reflect.DeepEqual(a.Filesystem, b.Filesystem) {
  860. return false
  861. }
  862. if !reflect.DeepEqual(a.TaskStats, b.TaskStats) {
  863. return false
  864. }
  865. if !reflect.DeepEqual(a.Accelerators, b.Accelerators) {
  866. return false
  867. }
  868. if !reflect.DeepEqual(a.CustomMetrics, b.CustomMetrics) {
  869. return false
  870. }
  871. return true
  872. }
  873. // Event contains information general to events such as the time at which they
  874. // occurred, their specific type, and the actual event. Event types are
  875. // differentiated by the EventType field of Event.
  876. type Event struct {
  877. // the absolute container name for which the event occurred
  878. ContainerName string `json:"container_name"`
  879. // the time at which the event occurred
  880. Timestamp time.Time `json:"timestamp"`
  881. // the type of event. EventType is an enumerated type
  882. EventType EventType `json:"event_type"`
  883. // the original event object and all of its extraneous data, ex. an
  884. // OomInstance
  885. EventData EventData `json:"event_data,omitempty"`
  886. }
  887. // EventType is an enumerated type which lists the categories under which
  888. // events may fall. The Event field EventType is populated by this enum.
  889. type EventType string
  890. const (
  891. EventOom EventType = "oom"
  892. EventOomKill EventType = "oomKill"
  893. EventContainerCreation EventType = "containerCreation"
  894. EventContainerDeletion EventType = "containerDeletion"
  895. )
  896. // Extra information about an event. Only one type will be set.
  897. type EventData struct {
  898. // Information about an OOM kill event.
  899. OomKill *OomKillEventData `json:"oom,omitempty"`
  900. }
  901. // Information related to an OOM kill instance
  902. type OomKillEventData struct {
  903. // process id of the killed process
  904. Pid int `json:"pid"`
  905. // The name of the killed process
  906. ProcessName string `json:"process_name"`
  907. }