nl_linux.go 21 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900
  1. // Package nl has low level primitives for making Netlink calls.
  2. package nl
  3. import (
  4. "bytes"
  5. "encoding/binary"
  6. "fmt"
  7. "net"
  8. "runtime"
  9. "sync"
  10. "sync/atomic"
  11. "syscall"
  12. "unsafe"
  13. "github.com/vishvananda/netns"
  14. "golang.org/x/sys/unix"
  15. )
  16. const (
  17. // Family type definitions
  18. FAMILY_ALL = unix.AF_UNSPEC
  19. FAMILY_V4 = unix.AF_INET
  20. FAMILY_V6 = unix.AF_INET6
  21. FAMILY_MPLS = unix.AF_MPLS
  22. // Arbitrary set value (greater than default 4k) to allow receiving
  23. // from kernel more verbose messages e.g. for statistics,
  24. // tc rules or filters, or other more memory requiring data.
  25. RECEIVE_BUFFER_SIZE = 65536
  26. // Kernel netlink pid
  27. PidKernel uint32 = 0
  28. SizeofCnMsgOp = 0x18
  29. )
  30. // SupportedNlFamilies contains the list of netlink families this netlink package supports
  31. var SupportedNlFamilies = []int{unix.NETLINK_ROUTE, unix.NETLINK_XFRM, unix.NETLINK_NETFILTER}
  32. var nextSeqNr uint32
  33. // Default netlink socket timeout, 60s
  34. var SocketTimeoutTv = unix.Timeval{Sec: 60, Usec: 0}
  35. // ErrorMessageReporting is the default error message reporting configuration for the new netlink sockets
  36. var EnableErrorMessageReporting bool = false
  37. // GetIPFamily returns the family type of a net.IP.
  38. func GetIPFamily(ip net.IP) int {
  39. if len(ip) <= net.IPv4len {
  40. return FAMILY_V4
  41. }
  42. if ip.To4() != nil {
  43. return FAMILY_V4
  44. }
  45. return FAMILY_V6
  46. }
  47. var nativeEndian binary.ByteOrder
  48. // NativeEndian gets native endianness for the system
  49. func NativeEndian() binary.ByteOrder {
  50. if nativeEndian == nil {
  51. var x uint32 = 0x01020304
  52. if *(*byte)(unsafe.Pointer(&x)) == 0x01 {
  53. nativeEndian = binary.BigEndian
  54. } else {
  55. nativeEndian = binary.LittleEndian
  56. }
  57. }
  58. return nativeEndian
  59. }
  60. // Byte swap a 16 bit value if we aren't big endian
  61. func Swap16(i uint16) uint16 {
  62. if NativeEndian() == binary.BigEndian {
  63. return i
  64. }
  65. return (i&0xff00)>>8 | (i&0xff)<<8
  66. }
  67. // Byte swap a 32 bit value if aren't big endian
  68. func Swap32(i uint32) uint32 {
  69. if NativeEndian() == binary.BigEndian {
  70. return i
  71. }
  72. return (i&0xff000000)>>24 | (i&0xff0000)>>8 | (i&0xff00)<<8 | (i&0xff)<<24
  73. }
  74. const (
  75. NLMSGERR_ATTR_UNUSED = 0
  76. NLMSGERR_ATTR_MSG = 1
  77. NLMSGERR_ATTR_OFFS = 2
  78. NLMSGERR_ATTR_COOKIE = 3
  79. NLMSGERR_ATTR_POLICY = 4
  80. )
  81. type NetlinkRequestData interface {
  82. Len() int
  83. Serialize() []byte
  84. }
  85. const (
  86. PROC_CN_MCAST_LISTEN = 1
  87. PROC_CN_MCAST_IGNORE
  88. )
  89. type CbID struct {
  90. Idx uint32
  91. Val uint32
  92. }
  93. type CnMsg struct {
  94. ID CbID
  95. Seq uint32
  96. Ack uint32
  97. Length uint16
  98. Flags uint16
  99. }
  100. type CnMsgOp struct {
  101. CnMsg
  102. // here we differ from the C header
  103. Op uint32
  104. }
  105. func NewCnMsg(idx, val, op uint32) *CnMsgOp {
  106. var cm CnMsgOp
  107. cm.ID.Idx = idx
  108. cm.ID.Val = val
  109. cm.Ack = 0
  110. cm.Seq = 1
  111. cm.Length = uint16(binary.Size(op))
  112. cm.Op = op
  113. return &cm
  114. }
  115. func (msg *CnMsgOp) Serialize() []byte {
  116. return (*(*[SizeofCnMsgOp]byte)(unsafe.Pointer(msg)))[:]
  117. }
  118. func DeserializeCnMsgOp(b []byte) *CnMsgOp {
  119. return (*CnMsgOp)(unsafe.Pointer(&b[0:SizeofCnMsgOp][0]))
  120. }
  121. func (msg *CnMsgOp) Len() int {
  122. return SizeofCnMsgOp
  123. }
  124. // IfInfomsg is related to links, but it is used for list requests as well
  125. type IfInfomsg struct {
  126. unix.IfInfomsg
  127. }
  128. // Create an IfInfomsg with family specified
  129. func NewIfInfomsg(family int) *IfInfomsg {
  130. return &IfInfomsg{
  131. IfInfomsg: unix.IfInfomsg{
  132. Family: uint8(family),
  133. },
  134. }
  135. }
  136. func DeserializeIfInfomsg(b []byte) *IfInfomsg {
  137. return (*IfInfomsg)(unsafe.Pointer(&b[0:unix.SizeofIfInfomsg][0]))
  138. }
  139. func (msg *IfInfomsg) Serialize() []byte {
  140. return (*(*[unix.SizeofIfInfomsg]byte)(unsafe.Pointer(msg)))[:]
  141. }
  142. func (msg *IfInfomsg) Len() int {
  143. return unix.SizeofIfInfomsg
  144. }
  145. func (msg *IfInfomsg) EncapType() string {
  146. switch msg.Type {
  147. case 0:
  148. return "generic"
  149. case unix.ARPHRD_ETHER:
  150. return "ether"
  151. case unix.ARPHRD_EETHER:
  152. return "eether"
  153. case unix.ARPHRD_AX25:
  154. return "ax25"
  155. case unix.ARPHRD_PRONET:
  156. return "pronet"
  157. case unix.ARPHRD_CHAOS:
  158. return "chaos"
  159. case unix.ARPHRD_IEEE802:
  160. return "ieee802"
  161. case unix.ARPHRD_ARCNET:
  162. return "arcnet"
  163. case unix.ARPHRD_APPLETLK:
  164. return "atalk"
  165. case unix.ARPHRD_DLCI:
  166. return "dlci"
  167. case unix.ARPHRD_ATM:
  168. return "atm"
  169. case unix.ARPHRD_METRICOM:
  170. return "metricom"
  171. case unix.ARPHRD_IEEE1394:
  172. return "ieee1394"
  173. case unix.ARPHRD_INFINIBAND:
  174. return "infiniband"
  175. case unix.ARPHRD_SLIP:
  176. return "slip"
  177. case unix.ARPHRD_CSLIP:
  178. return "cslip"
  179. case unix.ARPHRD_SLIP6:
  180. return "slip6"
  181. case unix.ARPHRD_CSLIP6:
  182. return "cslip6"
  183. case unix.ARPHRD_RSRVD:
  184. return "rsrvd"
  185. case unix.ARPHRD_ADAPT:
  186. return "adapt"
  187. case unix.ARPHRD_ROSE:
  188. return "rose"
  189. case unix.ARPHRD_X25:
  190. return "x25"
  191. case unix.ARPHRD_HWX25:
  192. return "hwx25"
  193. case unix.ARPHRD_PPP:
  194. return "ppp"
  195. case unix.ARPHRD_HDLC:
  196. return "hdlc"
  197. case unix.ARPHRD_LAPB:
  198. return "lapb"
  199. case unix.ARPHRD_DDCMP:
  200. return "ddcmp"
  201. case unix.ARPHRD_RAWHDLC:
  202. return "rawhdlc"
  203. case unix.ARPHRD_TUNNEL:
  204. return "ipip"
  205. case unix.ARPHRD_TUNNEL6:
  206. return "tunnel6"
  207. case unix.ARPHRD_FRAD:
  208. return "frad"
  209. case unix.ARPHRD_SKIP:
  210. return "skip"
  211. case unix.ARPHRD_LOOPBACK:
  212. return "loopback"
  213. case unix.ARPHRD_LOCALTLK:
  214. return "ltalk"
  215. case unix.ARPHRD_FDDI:
  216. return "fddi"
  217. case unix.ARPHRD_BIF:
  218. return "bif"
  219. case unix.ARPHRD_SIT:
  220. return "sit"
  221. case unix.ARPHRD_IPDDP:
  222. return "ip/ddp"
  223. case unix.ARPHRD_IPGRE:
  224. return "gre"
  225. case unix.ARPHRD_PIMREG:
  226. return "pimreg"
  227. case unix.ARPHRD_HIPPI:
  228. return "hippi"
  229. case unix.ARPHRD_ASH:
  230. return "ash"
  231. case unix.ARPHRD_ECONET:
  232. return "econet"
  233. case unix.ARPHRD_IRDA:
  234. return "irda"
  235. case unix.ARPHRD_FCPP:
  236. return "fcpp"
  237. case unix.ARPHRD_FCAL:
  238. return "fcal"
  239. case unix.ARPHRD_FCPL:
  240. return "fcpl"
  241. case unix.ARPHRD_FCFABRIC:
  242. return "fcfb0"
  243. case unix.ARPHRD_FCFABRIC + 1:
  244. return "fcfb1"
  245. case unix.ARPHRD_FCFABRIC + 2:
  246. return "fcfb2"
  247. case unix.ARPHRD_FCFABRIC + 3:
  248. return "fcfb3"
  249. case unix.ARPHRD_FCFABRIC + 4:
  250. return "fcfb4"
  251. case unix.ARPHRD_FCFABRIC + 5:
  252. return "fcfb5"
  253. case unix.ARPHRD_FCFABRIC + 6:
  254. return "fcfb6"
  255. case unix.ARPHRD_FCFABRIC + 7:
  256. return "fcfb7"
  257. case unix.ARPHRD_FCFABRIC + 8:
  258. return "fcfb8"
  259. case unix.ARPHRD_FCFABRIC + 9:
  260. return "fcfb9"
  261. case unix.ARPHRD_FCFABRIC + 10:
  262. return "fcfb10"
  263. case unix.ARPHRD_FCFABRIC + 11:
  264. return "fcfb11"
  265. case unix.ARPHRD_FCFABRIC + 12:
  266. return "fcfb12"
  267. case unix.ARPHRD_IEEE802_TR:
  268. return "tr"
  269. case unix.ARPHRD_IEEE80211:
  270. return "ieee802.11"
  271. case unix.ARPHRD_IEEE80211_PRISM:
  272. return "ieee802.11/prism"
  273. case unix.ARPHRD_IEEE80211_RADIOTAP:
  274. return "ieee802.11/radiotap"
  275. case unix.ARPHRD_IEEE802154:
  276. return "ieee802.15.4"
  277. case 65534:
  278. return "none"
  279. case 65535:
  280. return "void"
  281. }
  282. return fmt.Sprintf("unknown%d", msg.Type)
  283. }
  284. // Round the length of a netlink message up to align it properly.
  285. // Taken from syscall/netlink_linux.go by The Go Authors under BSD-style license.
  286. func nlmAlignOf(msglen int) int {
  287. return (msglen + syscall.NLMSG_ALIGNTO - 1) & ^(syscall.NLMSG_ALIGNTO - 1)
  288. }
  289. func rtaAlignOf(attrlen int) int {
  290. return (attrlen + unix.RTA_ALIGNTO - 1) & ^(unix.RTA_ALIGNTO - 1)
  291. }
  292. func NewIfInfomsgChild(parent *RtAttr, family int) *IfInfomsg {
  293. msg := NewIfInfomsg(family)
  294. parent.children = append(parent.children, msg)
  295. return msg
  296. }
  297. type Uint32Attribute struct {
  298. Type uint16
  299. Value uint32
  300. }
  301. func (a *Uint32Attribute) Serialize() []byte {
  302. native := NativeEndian()
  303. buf := make([]byte, rtaAlignOf(8))
  304. native.PutUint16(buf[0:2], 8)
  305. native.PutUint16(buf[2:4], a.Type)
  306. if a.Type&NLA_F_NET_BYTEORDER != 0 {
  307. binary.BigEndian.PutUint32(buf[4:], a.Value)
  308. } else {
  309. native.PutUint32(buf[4:], a.Value)
  310. }
  311. return buf
  312. }
  313. func (a *Uint32Attribute) Len() int {
  314. return 8
  315. }
  316. // Extend RtAttr to handle data and children
  317. type RtAttr struct {
  318. unix.RtAttr
  319. Data []byte
  320. children []NetlinkRequestData
  321. }
  322. // Create a new Extended RtAttr object
  323. func NewRtAttr(attrType int, data []byte) *RtAttr {
  324. return &RtAttr{
  325. RtAttr: unix.RtAttr{
  326. Type: uint16(attrType),
  327. },
  328. children: []NetlinkRequestData{},
  329. Data: data,
  330. }
  331. }
  332. // NewRtAttrChild adds an RtAttr as a child to the parent and returns the new attribute
  333. //
  334. // Deprecated: Use AddRtAttr() on the parent object
  335. func NewRtAttrChild(parent *RtAttr, attrType int, data []byte) *RtAttr {
  336. return parent.AddRtAttr(attrType, data)
  337. }
  338. // AddRtAttr adds an RtAttr as a child and returns the new attribute
  339. func (a *RtAttr) AddRtAttr(attrType int, data []byte) *RtAttr {
  340. attr := NewRtAttr(attrType, data)
  341. a.children = append(a.children, attr)
  342. return attr
  343. }
  344. // AddChild adds an existing NetlinkRequestData as a child.
  345. func (a *RtAttr) AddChild(attr NetlinkRequestData) {
  346. a.children = append(a.children, attr)
  347. }
  348. func (a *RtAttr) Len() int {
  349. if len(a.children) == 0 {
  350. return (unix.SizeofRtAttr + len(a.Data))
  351. }
  352. l := 0
  353. for _, child := range a.children {
  354. l += rtaAlignOf(child.Len())
  355. }
  356. l += unix.SizeofRtAttr
  357. return rtaAlignOf(l + len(a.Data))
  358. }
  359. // Serialize the RtAttr into a byte array
  360. // This can't just unsafe.cast because it must iterate through children.
  361. func (a *RtAttr) Serialize() []byte {
  362. native := NativeEndian()
  363. length := a.Len()
  364. buf := make([]byte, rtaAlignOf(length))
  365. next := 4
  366. if a.Data != nil {
  367. copy(buf[next:], a.Data)
  368. next += rtaAlignOf(len(a.Data))
  369. }
  370. if len(a.children) > 0 {
  371. for _, child := range a.children {
  372. childBuf := child.Serialize()
  373. copy(buf[next:], childBuf)
  374. next += rtaAlignOf(len(childBuf))
  375. }
  376. }
  377. if l := uint16(length); l != 0 {
  378. native.PutUint16(buf[0:2], l)
  379. }
  380. native.PutUint16(buf[2:4], a.Type)
  381. return buf
  382. }
  383. type NetlinkRequest struct {
  384. unix.NlMsghdr
  385. Data []NetlinkRequestData
  386. RawData []byte
  387. Sockets map[int]*SocketHandle
  388. }
  389. // Serialize the Netlink Request into a byte array
  390. func (req *NetlinkRequest) Serialize() []byte {
  391. length := unix.SizeofNlMsghdr
  392. dataBytes := make([][]byte, len(req.Data))
  393. for i, data := range req.Data {
  394. dataBytes[i] = data.Serialize()
  395. length = length + len(dataBytes[i])
  396. }
  397. length += len(req.RawData)
  398. req.Len = uint32(length)
  399. b := make([]byte, length)
  400. hdr := (*(*[unix.SizeofNlMsghdr]byte)(unsafe.Pointer(req)))[:]
  401. next := unix.SizeofNlMsghdr
  402. copy(b[0:next], hdr)
  403. for _, data := range dataBytes {
  404. for _, dataByte := range data {
  405. b[next] = dataByte
  406. next = next + 1
  407. }
  408. }
  409. // Add the raw data if any
  410. if len(req.RawData) > 0 {
  411. copy(b[next:length], req.RawData)
  412. }
  413. return b
  414. }
  415. func (req *NetlinkRequest) AddData(data NetlinkRequestData) {
  416. req.Data = append(req.Data, data)
  417. }
  418. // AddRawData adds raw bytes to the end of the NetlinkRequest object during serialization
  419. func (req *NetlinkRequest) AddRawData(data []byte) {
  420. req.RawData = append(req.RawData, data...)
  421. }
  422. // Execute the request against a the given sockType.
  423. // Returns a list of netlink messages in serialized format, optionally filtered
  424. // by resType.
  425. func (req *NetlinkRequest) Execute(sockType int, resType uint16) ([][]byte, error) {
  426. var (
  427. s *NetlinkSocket
  428. err error
  429. )
  430. if req.Sockets != nil {
  431. if sh, ok := req.Sockets[sockType]; ok {
  432. s = sh.Socket
  433. req.Seq = atomic.AddUint32(&sh.Seq, 1)
  434. }
  435. }
  436. sharedSocket := s != nil
  437. if s == nil {
  438. s, err = getNetlinkSocket(sockType)
  439. if err != nil {
  440. return nil, err
  441. }
  442. if err := s.SetSendTimeout(&SocketTimeoutTv); err != nil {
  443. return nil, err
  444. }
  445. if err := s.SetReceiveTimeout(&SocketTimeoutTv); err != nil {
  446. return nil, err
  447. }
  448. if EnableErrorMessageReporting {
  449. if err := s.SetExtAck(true); err != nil {
  450. return nil, err
  451. }
  452. }
  453. defer s.Close()
  454. } else {
  455. s.Lock()
  456. defer s.Unlock()
  457. }
  458. if err := s.Send(req); err != nil {
  459. return nil, err
  460. }
  461. pid, err := s.GetPid()
  462. if err != nil {
  463. return nil, err
  464. }
  465. var res [][]byte
  466. done:
  467. for {
  468. msgs, from, err := s.Receive()
  469. if err != nil {
  470. return nil, err
  471. }
  472. if from.Pid != PidKernel {
  473. return nil, fmt.Errorf("Wrong sender portid %d, expected %d", from.Pid, PidKernel)
  474. }
  475. for _, m := range msgs {
  476. if m.Header.Seq != req.Seq {
  477. if sharedSocket {
  478. continue
  479. }
  480. return nil, fmt.Errorf("Wrong Seq nr %d, expected %d", m.Header.Seq, req.Seq)
  481. }
  482. if m.Header.Pid != pid {
  483. continue
  484. }
  485. if m.Header.Type == unix.NLMSG_DONE || m.Header.Type == unix.NLMSG_ERROR {
  486. native := NativeEndian()
  487. errno := int32(native.Uint32(m.Data[0:4]))
  488. if errno == 0 {
  489. break done
  490. }
  491. var err error
  492. err = syscall.Errno(-errno)
  493. unreadData := m.Data[4:]
  494. if m.Header.Flags|unix.NLM_F_ACK_TLVS != 0 && len(unreadData) > syscall.SizeofNlMsghdr {
  495. // Skip the echoed request message.
  496. echoReqH := (*syscall.NlMsghdr)(unsafe.Pointer(&unreadData[0]))
  497. unreadData = unreadData[nlmAlignOf(int(echoReqH.Len)):]
  498. // Annotate `err` using nlmsgerr attributes.
  499. for len(unreadData) >= syscall.SizeofRtAttr {
  500. attr := (*syscall.RtAttr)(unsafe.Pointer(&unreadData[0]))
  501. attrData := unreadData[syscall.SizeofRtAttr:attr.Len]
  502. switch attr.Type {
  503. case NLMSGERR_ATTR_MSG:
  504. err = fmt.Errorf("%w: %s", err, string(attrData))
  505. default:
  506. // TODO: handle other NLMSGERR_ATTR types
  507. }
  508. unreadData = unreadData[rtaAlignOf(int(attr.Len)):]
  509. }
  510. }
  511. return nil, err
  512. }
  513. if resType != 0 && m.Header.Type != resType {
  514. continue
  515. }
  516. res = append(res, m.Data)
  517. if m.Header.Flags&unix.NLM_F_MULTI == 0 {
  518. break done
  519. }
  520. }
  521. }
  522. return res, nil
  523. }
  524. // Create a new netlink request from proto and flags
  525. // Note the Len value will be inaccurate once data is added until
  526. // the message is serialized
  527. func NewNetlinkRequest(proto, flags int) *NetlinkRequest {
  528. return &NetlinkRequest{
  529. NlMsghdr: unix.NlMsghdr{
  530. Len: uint32(unix.SizeofNlMsghdr),
  531. Type: uint16(proto),
  532. Flags: unix.NLM_F_REQUEST | uint16(flags),
  533. Seq: atomic.AddUint32(&nextSeqNr, 1),
  534. },
  535. }
  536. }
  537. type NetlinkSocket struct {
  538. fd int32
  539. lsa unix.SockaddrNetlink
  540. sync.Mutex
  541. }
  542. func getNetlinkSocket(protocol int) (*NetlinkSocket, error) {
  543. fd, err := unix.Socket(unix.AF_NETLINK, unix.SOCK_RAW|unix.SOCK_CLOEXEC, protocol)
  544. if err != nil {
  545. return nil, err
  546. }
  547. s := &NetlinkSocket{
  548. fd: int32(fd),
  549. }
  550. s.lsa.Family = unix.AF_NETLINK
  551. if err := unix.Bind(fd, &s.lsa); err != nil {
  552. unix.Close(fd)
  553. return nil, err
  554. }
  555. return s, nil
  556. }
  557. // GetNetlinkSocketAt opens a netlink socket in the network namespace newNs
  558. // and positions the thread back into the network namespace specified by curNs,
  559. // when done. If curNs is close, the function derives the current namespace and
  560. // moves back into it when done. If newNs is close, the socket will be opened
  561. // in the current network namespace.
  562. func GetNetlinkSocketAt(newNs, curNs netns.NsHandle, protocol int) (*NetlinkSocket, error) {
  563. c, err := executeInNetns(newNs, curNs)
  564. if err != nil {
  565. return nil, err
  566. }
  567. defer c()
  568. return getNetlinkSocket(protocol)
  569. }
  570. // executeInNetns sets execution of the code following this call to the
  571. // network namespace newNs, then moves the thread back to curNs if open,
  572. // otherwise to the current netns at the time the function was invoked
  573. // In case of success, the caller is expected to execute the returned function
  574. // at the end of the code that needs to be executed in the network namespace.
  575. // Example:
  576. // func jobAt(...) error {
  577. // d, err := executeInNetns(...)
  578. // if err != nil { return err}
  579. // defer d()
  580. // < code which needs to be executed in specific netns>
  581. // }
  582. // TODO: his function probably belongs to netns pkg.
  583. func executeInNetns(newNs, curNs netns.NsHandle) (func(), error) {
  584. var (
  585. err error
  586. moveBack func(netns.NsHandle) error
  587. closeNs func() error
  588. unlockThd func()
  589. )
  590. restore := func() {
  591. // order matters
  592. if moveBack != nil {
  593. moveBack(curNs)
  594. }
  595. if closeNs != nil {
  596. closeNs()
  597. }
  598. if unlockThd != nil {
  599. unlockThd()
  600. }
  601. }
  602. if newNs.IsOpen() {
  603. runtime.LockOSThread()
  604. unlockThd = runtime.UnlockOSThread
  605. if !curNs.IsOpen() {
  606. if curNs, err = netns.Get(); err != nil {
  607. restore()
  608. return nil, fmt.Errorf("could not get current namespace while creating netlink socket: %v", err)
  609. }
  610. closeNs = curNs.Close
  611. }
  612. if err := netns.Set(newNs); err != nil {
  613. restore()
  614. return nil, fmt.Errorf("failed to set into network namespace %d while creating netlink socket: %v", newNs, err)
  615. }
  616. moveBack = netns.Set
  617. }
  618. return restore, nil
  619. }
  620. // Create a netlink socket with a given protocol (e.g. NETLINK_ROUTE)
  621. // and subscribe it to multicast groups passed in variable argument list.
  622. // Returns the netlink socket on which Receive() method can be called
  623. // to retrieve the messages from the kernel.
  624. func Subscribe(protocol int, groups ...uint) (*NetlinkSocket, error) {
  625. fd, err := unix.Socket(unix.AF_NETLINK, unix.SOCK_RAW, protocol)
  626. if err != nil {
  627. return nil, err
  628. }
  629. s := &NetlinkSocket{
  630. fd: int32(fd),
  631. }
  632. s.lsa.Family = unix.AF_NETLINK
  633. for _, g := range groups {
  634. s.lsa.Groups |= (1 << (g - 1))
  635. }
  636. if err := unix.Bind(fd, &s.lsa); err != nil {
  637. unix.Close(fd)
  638. return nil, err
  639. }
  640. return s, nil
  641. }
  642. // SubscribeAt works like Subscribe plus let's the caller choose the network
  643. // namespace in which the socket would be opened (newNs). Then control goes back
  644. // to curNs if open, otherwise to the netns at the time this function was called.
  645. func SubscribeAt(newNs, curNs netns.NsHandle, protocol int, groups ...uint) (*NetlinkSocket, error) {
  646. c, err := executeInNetns(newNs, curNs)
  647. if err != nil {
  648. return nil, err
  649. }
  650. defer c()
  651. return Subscribe(protocol, groups...)
  652. }
  653. func (s *NetlinkSocket) Close() {
  654. fd := int(atomic.SwapInt32(&s.fd, -1))
  655. unix.Close(fd)
  656. }
  657. func (s *NetlinkSocket) GetFd() int {
  658. return int(atomic.LoadInt32(&s.fd))
  659. }
  660. func (s *NetlinkSocket) Send(request *NetlinkRequest) error {
  661. fd := int(atomic.LoadInt32(&s.fd))
  662. if fd < 0 {
  663. return fmt.Errorf("Send called on a closed socket")
  664. }
  665. if err := unix.Sendto(fd, request.Serialize(), 0, &s.lsa); err != nil {
  666. return err
  667. }
  668. return nil
  669. }
  670. func (s *NetlinkSocket) Receive() ([]syscall.NetlinkMessage, *unix.SockaddrNetlink, error) {
  671. fd := int(atomic.LoadInt32(&s.fd))
  672. if fd < 0 {
  673. return nil, nil, fmt.Errorf("Receive called on a closed socket")
  674. }
  675. var fromAddr *unix.SockaddrNetlink
  676. var rb [RECEIVE_BUFFER_SIZE]byte
  677. nr, from, err := unix.Recvfrom(fd, rb[:], 0)
  678. if err != nil {
  679. return nil, nil, err
  680. }
  681. fromAddr, ok := from.(*unix.SockaddrNetlink)
  682. if !ok {
  683. return nil, nil, fmt.Errorf("Error converting to netlink sockaddr")
  684. }
  685. if nr < unix.NLMSG_HDRLEN {
  686. return nil, nil, fmt.Errorf("Got short response from netlink")
  687. }
  688. rb2 := make([]byte, nr)
  689. copy(rb2, rb[:nr])
  690. nl, err := syscall.ParseNetlinkMessage(rb2)
  691. if err != nil {
  692. return nil, nil, err
  693. }
  694. return nl, fromAddr, nil
  695. }
  696. // SetSendTimeout allows to set a send timeout on the socket
  697. func (s *NetlinkSocket) SetSendTimeout(timeout *unix.Timeval) error {
  698. // Set a send timeout of SOCKET_SEND_TIMEOUT, this will allow the Send to periodically unblock and avoid that a routine
  699. // remains stuck on a send on a closed fd
  700. return unix.SetsockoptTimeval(int(s.fd), unix.SOL_SOCKET, unix.SO_SNDTIMEO, timeout)
  701. }
  702. // SetReceiveTimeout allows to set a receive timeout on the socket
  703. func (s *NetlinkSocket) SetReceiveTimeout(timeout *unix.Timeval) error {
  704. // Set a read timeout of SOCKET_READ_TIMEOUT, this will allow the Read to periodically unblock and avoid that a routine
  705. // remains stuck on a recvmsg on a closed fd
  706. return unix.SetsockoptTimeval(int(s.fd), unix.SOL_SOCKET, unix.SO_RCVTIMEO, timeout)
  707. }
  708. // SetExtAck requests error messages to be reported on the socket
  709. func (s *NetlinkSocket) SetExtAck(enable bool) error {
  710. var enableN int
  711. if enable {
  712. enableN = 1
  713. }
  714. return unix.SetsockoptInt(int(s.fd), unix.SOL_NETLINK, unix.NETLINK_EXT_ACK, enableN)
  715. }
  716. func (s *NetlinkSocket) GetPid() (uint32, error) {
  717. fd := int(atomic.LoadInt32(&s.fd))
  718. lsa, err := unix.Getsockname(fd)
  719. if err != nil {
  720. return 0, err
  721. }
  722. switch v := lsa.(type) {
  723. case *unix.SockaddrNetlink:
  724. return v.Pid, nil
  725. }
  726. return 0, fmt.Errorf("Wrong socket type")
  727. }
  728. func ZeroTerminated(s string) []byte {
  729. bytes := make([]byte, len(s)+1)
  730. for i := 0; i < len(s); i++ {
  731. bytes[i] = s[i]
  732. }
  733. bytes[len(s)] = 0
  734. return bytes
  735. }
  736. func NonZeroTerminated(s string) []byte {
  737. bytes := make([]byte, len(s))
  738. for i := 0; i < len(s); i++ {
  739. bytes[i] = s[i]
  740. }
  741. return bytes
  742. }
  743. func BytesToString(b []byte) string {
  744. n := bytes.Index(b, []byte{0})
  745. return string(b[:n])
  746. }
  747. func Uint8Attr(v uint8) []byte {
  748. return []byte{byte(v)}
  749. }
  750. func Uint16Attr(v uint16) []byte {
  751. native := NativeEndian()
  752. bytes := make([]byte, 2)
  753. native.PutUint16(bytes, v)
  754. return bytes
  755. }
  756. func Uint32Attr(v uint32) []byte {
  757. native := NativeEndian()
  758. bytes := make([]byte, 4)
  759. native.PutUint32(bytes, v)
  760. return bytes
  761. }
  762. func Uint64Attr(v uint64) []byte {
  763. native := NativeEndian()
  764. bytes := make([]byte, 8)
  765. native.PutUint64(bytes, v)
  766. return bytes
  767. }
  768. func ParseRouteAttr(b []byte) ([]syscall.NetlinkRouteAttr, error) {
  769. var attrs []syscall.NetlinkRouteAttr
  770. for len(b) >= unix.SizeofRtAttr {
  771. a, vbuf, alen, err := netlinkRouteAttrAndValue(b)
  772. if err != nil {
  773. return nil, err
  774. }
  775. ra := syscall.NetlinkRouteAttr{Attr: syscall.RtAttr(*a), Value: vbuf[:int(a.Len)-unix.SizeofRtAttr]}
  776. attrs = append(attrs, ra)
  777. b = b[alen:]
  778. }
  779. return attrs, nil
  780. }
  781. func netlinkRouteAttrAndValue(b []byte) (*unix.RtAttr, []byte, int, error) {
  782. a := (*unix.RtAttr)(unsafe.Pointer(&b[0]))
  783. if int(a.Len) < unix.SizeofRtAttr || int(a.Len) > len(b) {
  784. return nil, nil, 0, unix.EINVAL
  785. }
  786. return a, b[unix.SizeofRtAttr:], rtaAlignOf(int(a.Len)), nil
  787. }
  788. // SocketHandle contains the netlink socket and the associated
  789. // sequence counter for a specific netlink family
  790. type SocketHandle struct {
  791. Seq uint32
  792. Socket *NetlinkSocket
  793. }
  794. // Close closes the netlink socket
  795. func (sh *SocketHandle) Close() {
  796. if sh.Socket != nil {
  797. sh.Socket.Close()
  798. }
  799. }