vp9_packet.go 9.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388
  1. package codecs
  2. import (
  3. "github.com/pion/randutil"
  4. )
  5. // Use global random generator to properly seed by crypto grade random.
  6. var globalMathRandomGenerator = randutil.NewMathRandomGenerator() // nolint:gochecknoglobals
  7. // VP9Payloader payloads VP9 packets
  8. type VP9Payloader struct {
  9. pictureID uint16
  10. initialized bool
  11. // InitialPictureIDFn is a function that returns random initial picture ID.
  12. InitialPictureIDFn func() uint16
  13. }
  14. const (
  15. vp9HeaderSize = 3 // Flexible mode 15 bit picture ID
  16. maxSpatialLayers = 5
  17. maxVP9RefPics = 3
  18. )
  19. // Payload fragments an VP9 packet across one or more byte arrays
  20. func (p *VP9Payloader) Payload(mtu uint16, payload []byte) [][]byte {
  21. /*
  22. * https://www.ietf.org/id/draft-ietf-payload-vp9-13.txt
  23. *
  24. * Flexible mode (F=1)
  25. * 0 1 2 3 4 5 6 7
  26. * +-+-+-+-+-+-+-+-+
  27. * |I|P|L|F|B|E|V|Z| (REQUIRED)
  28. * +-+-+-+-+-+-+-+-+
  29. * I: |M| PICTURE ID | (REQUIRED)
  30. * +-+-+-+-+-+-+-+-+
  31. * M: | EXTENDED PID | (RECOMMENDED)
  32. * +-+-+-+-+-+-+-+-+
  33. * L: | TID |U| SID |D| (CONDITIONALLY RECOMMENDED)
  34. * +-+-+-+-+-+-+-+-+ -\
  35. * P,F: | P_DIFF |N| (CONDITIONALLY REQUIRED) - up to 3 times
  36. * +-+-+-+-+-+-+-+-+ -/
  37. * V: | SS |
  38. * | .. |
  39. * +-+-+-+-+-+-+-+-+
  40. *
  41. * Non-flexible mode (F=0)
  42. * 0 1 2 3 4 5 6 7
  43. * +-+-+-+-+-+-+-+-+
  44. * |I|P|L|F|B|E|V|Z| (REQUIRED)
  45. * +-+-+-+-+-+-+-+-+
  46. * I: |M| PICTURE ID | (RECOMMENDED)
  47. * +-+-+-+-+-+-+-+-+
  48. * M: | EXTENDED PID | (RECOMMENDED)
  49. * +-+-+-+-+-+-+-+-+
  50. * L: | TID |U| SID |D| (CONDITIONALLY RECOMMENDED)
  51. * +-+-+-+-+-+-+-+-+
  52. * | TL0PICIDX | (CONDITIONALLY REQUIRED)
  53. * +-+-+-+-+-+-+-+-+
  54. * V: | SS |
  55. * | .. |
  56. * +-+-+-+-+-+-+-+-+
  57. */
  58. if !p.initialized {
  59. if p.InitialPictureIDFn == nil {
  60. p.InitialPictureIDFn = func() uint16 {
  61. return uint16(globalMathRandomGenerator.Intn(0x7FFF))
  62. }
  63. }
  64. p.pictureID = p.InitialPictureIDFn() & 0x7FFF
  65. p.initialized = true
  66. }
  67. if payload == nil {
  68. return [][]byte{}
  69. }
  70. maxFragmentSize := int(mtu) - vp9HeaderSize
  71. payloadDataRemaining := len(payload)
  72. payloadDataIndex := 0
  73. if min(maxFragmentSize, payloadDataRemaining) <= 0 {
  74. return [][]byte{}
  75. }
  76. var payloads [][]byte
  77. for payloadDataRemaining > 0 {
  78. currentFragmentSize := min(maxFragmentSize, payloadDataRemaining)
  79. out := make([]byte, vp9HeaderSize+currentFragmentSize)
  80. out[0] = 0x90 // F=1 I=1
  81. if payloadDataIndex == 0 {
  82. out[0] |= 0x08 // B=1
  83. }
  84. if payloadDataRemaining == currentFragmentSize {
  85. out[0] |= 0x04 // E=1
  86. }
  87. out[1] = byte(p.pictureID>>8) | 0x80
  88. out[2] = byte(p.pictureID)
  89. copy(out[vp9HeaderSize:], payload[payloadDataIndex:payloadDataIndex+currentFragmentSize])
  90. payloads = append(payloads, out)
  91. payloadDataRemaining -= currentFragmentSize
  92. payloadDataIndex += currentFragmentSize
  93. }
  94. p.pictureID++
  95. if p.pictureID >= 0x8000 {
  96. p.pictureID = 0
  97. }
  98. return payloads
  99. }
  100. // VP9Packet represents the VP9 header that is stored in the payload of an RTP Packet
  101. type VP9Packet struct {
  102. // Required header
  103. I bool // PictureID is present
  104. P bool // Inter-picture predicted frame
  105. L bool // Layer indices is present
  106. F bool // Flexible mode
  107. B bool // Start of a frame
  108. E bool // End of a frame
  109. V bool // Scalability structure (SS) data present
  110. Z bool // Not a reference frame for upper spatial layers
  111. // Recommended headers
  112. PictureID uint16 // 7 or 16 bits, picture ID
  113. // Conditionally recommended headers
  114. TID uint8 // Temporal layer ID
  115. U bool // Switching up point
  116. SID uint8 // Spatial layer ID
  117. D bool // Inter-layer dependency used
  118. // Conditionally required headers
  119. PDiff []uint8 // Reference index (F=1)
  120. TL0PICIDX uint8 // Temporal layer zero index (F=0)
  121. // Scalability structure headers
  122. NS uint8 // N_S + 1 indicates the number of spatial layers present in the VP9 stream
  123. Y bool // Each spatial layer's frame resolution present
  124. G bool // PG description present flag.
  125. NG uint8 // N_G indicates the number of pictures in a Picture Group (PG)
  126. Width []uint16
  127. Height []uint16
  128. PGTID []uint8 // Temporal layer ID of pictures in a Picture Group
  129. PGU []bool // Switching up point of pictures in a Picture Group
  130. PGPDiff [][]uint8 // Reference indecies of pictures in a Picture Group
  131. Payload []byte
  132. videoDepacketizer
  133. }
  134. // Unmarshal parses the passed byte slice and stores the result in the VP9Packet this method is called upon
  135. func (p *VP9Packet) Unmarshal(packet []byte) ([]byte, error) {
  136. if packet == nil {
  137. return nil, errNilPacket
  138. }
  139. if len(packet) < 1 {
  140. return nil, errShortPacket
  141. }
  142. p.I = packet[0]&0x80 != 0
  143. p.P = packet[0]&0x40 != 0
  144. p.L = packet[0]&0x20 != 0
  145. p.F = packet[0]&0x10 != 0
  146. p.B = packet[0]&0x08 != 0
  147. p.E = packet[0]&0x04 != 0
  148. p.V = packet[0]&0x02 != 0
  149. p.Z = packet[0]&0x01 != 0
  150. pos := 1
  151. var err error
  152. if p.I {
  153. pos, err = p.parsePictureID(packet, pos)
  154. if err != nil {
  155. return nil, err
  156. }
  157. }
  158. if p.L {
  159. pos, err = p.parseLayerInfo(packet, pos)
  160. if err != nil {
  161. return nil, err
  162. }
  163. }
  164. if p.F && p.P {
  165. pos, err = p.parseRefIndices(packet, pos)
  166. if err != nil {
  167. return nil, err
  168. }
  169. }
  170. if p.V {
  171. pos, err = p.parseSSData(packet, pos)
  172. if err != nil {
  173. return nil, err
  174. }
  175. }
  176. p.Payload = packet[pos:]
  177. return p.Payload, nil
  178. }
  179. // Picture ID:
  180. //
  181. // +-+-+-+-+-+-+-+-+
  182. // I: |M| PICTURE ID | M:0 => picture id is 7 bits.
  183. // +-+-+-+-+-+-+-+-+ M:1 => picture id is 15 bits.
  184. // M: | EXTENDED PID |
  185. // +-+-+-+-+-+-+-+-+
  186. //
  187. func (p *VP9Packet) parsePictureID(packet []byte, pos int) (int, error) {
  188. if len(packet) <= pos {
  189. return pos, errShortPacket
  190. }
  191. p.PictureID = uint16(packet[pos] & 0x7F)
  192. if packet[pos]&0x80 != 0 {
  193. pos++
  194. if len(packet) <= pos {
  195. return pos, errShortPacket
  196. }
  197. p.PictureID = p.PictureID<<8 | uint16(packet[pos])
  198. }
  199. pos++
  200. return pos, nil
  201. }
  202. func (p *VP9Packet) parseLayerInfo(packet []byte, pos int) (int, error) {
  203. pos, err := p.parseLayerInfoCommon(packet, pos)
  204. if err != nil {
  205. return pos, err
  206. }
  207. if p.F {
  208. return pos, nil
  209. }
  210. return p.parseLayerInfoNonFlexibleMode(packet, pos)
  211. }
  212. // Layer indices (flexible mode):
  213. //
  214. // +-+-+-+-+-+-+-+-+
  215. // L: | T |U| S |D|
  216. // +-+-+-+-+-+-+-+-+
  217. //
  218. func (p *VP9Packet) parseLayerInfoCommon(packet []byte, pos int) (int, error) {
  219. if len(packet) <= pos {
  220. return pos, errShortPacket
  221. }
  222. p.TID = packet[pos] >> 5
  223. p.U = packet[pos]&0x10 != 0
  224. p.SID = (packet[pos] >> 1) & 0x7
  225. p.D = packet[pos]&0x01 != 0
  226. if p.SID >= maxSpatialLayers {
  227. return pos, errTooManySpatialLayers
  228. }
  229. pos++
  230. return pos, nil
  231. }
  232. // Layer indices (non-flexible mode):
  233. //
  234. // +-+-+-+-+-+-+-+-+
  235. // L: | T |U| S |D|
  236. // +-+-+-+-+-+-+-+-+
  237. // | TL0PICIDX |
  238. // +-+-+-+-+-+-+-+-+
  239. //
  240. func (p *VP9Packet) parseLayerInfoNonFlexibleMode(packet []byte, pos int) (int, error) {
  241. if len(packet) <= pos {
  242. return pos, errShortPacket
  243. }
  244. p.TL0PICIDX = packet[pos]
  245. pos++
  246. return pos, nil
  247. }
  248. // Reference indices:
  249. //
  250. // +-+-+-+-+-+-+-+-+ P=1,F=1: At least one reference index
  251. // P,F: | P_DIFF |N| up to 3 times has to be specified.
  252. // +-+-+-+-+-+-+-+-+ N=1: An additional P_DIFF follows
  253. // current P_DIFF.
  254. //
  255. func (p *VP9Packet) parseRefIndices(packet []byte, pos int) (int, error) {
  256. for {
  257. if len(packet) <= pos {
  258. return pos, errShortPacket
  259. }
  260. p.PDiff = append(p.PDiff, packet[pos]>>1)
  261. if packet[pos]&0x01 == 0 {
  262. break
  263. }
  264. if len(p.PDiff) >= maxVP9RefPics {
  265. return pos, errTooManyPDiff
  266. }
  267. pos++
  268. }
  269. pos++
  270. return pos, nil
  271. }
  272. // Scalability structure (SS):
  273. //
  274. // +-+-+-+-+-+-+-+-+
  275. // V: | N_S |Y|G|-|-|-|
  276. // +-+-+-+-+-+-+-+-+ -|
  277. // Y: | WIDTH | (OPTIONAL) .
  278. // + + .
  279. // | | (OPTIONAL) .
  280. // +-+-+-+-+-+-+-+-+ . N_S + 1 times
  281. // | HEIGHT | (OPTIONAL) .
  282. // + + .
  283. // | | (OPTIONAL) .
  284. // +-+-+-+-+-+-+-+-+ -|
  285. // G: | N_G | (OPTIONAL)
  286. // +-+-+-+-+-+-+-+-+ -|
  287. // N_G: | T |U| R |-|-| (OPTIONAL) .
  288. // +-+-+-+-+-+-+-+-+ -| . N_G times
  289. // | P_DIFF | (OPTIONAL) . R times .
  290. // +-+-+-+-+-+-+-+-+ -| -|
  291. //
  292. func (p *VP9Packet) parseSSData(packet []byte, pos int) (int, error) {
  293. if len(packet) <= pos {
  294. return pos, errShortPacket
  295. }
  296. p.NS = packet[pos] >> 5
  297. p.Y = packet[pos]&0x10 != 0
  298. p.G = (packet[pos]>>1)&0x7 != 0
  299. pos++
  300. NS := p.NS + 1
  301. p.NG = 0
  302. if p.Y {
  303. p.Width = make([]uint16, NS)
  304. p.Height = make([]uint16, NS)
  305. for i := 0; i < int(NS); i++ {
  306. p.Width[i] = uint16(packet[pos])<<8 | uint16(packet[pos+1])
  307. pos += 2
  308. p.Height[i] = uint16(packet[pos])<<8 | uint16(packet[pos+1])
  309. pos += 2
  310. }
  311. }
  312. if p.G {
  313. p.NG = packet[pos]
  314. pos++
  315. }
  316. for i := 0; i < int(p.NG); i++ {
  317. p.PGTID = append(p.PGTID, packet[pos]>>5)
  318. p.PGU = append(p.PGU, packet[pos]&0x10 != 0)
  319. R := (packet[pos] >> 2) & 0x3
  320. pos++
  321. p.PGPDiff = append(p.PGPDiff, []uint8{})
  322. for j := 0; j < int(R); j++ {
  323. p.PGPDiff[i] = append(p.PGPDiff[i], packet[pos])
  324. pos++
  325. }
  326. }
  327. return pos, nil
  328. }
  329. // VP9PartitionHeadChecker is obsolete
  330. type VP9PartitionHeadChecker struct{}
  331. // IsPartitionHead checks whether if this is a head of the VP9 partition
  332. func (*VP9Packet) IsPartitionHead(payload []byte) bool {
  333. if len(payload) < 1 {
  334. return false
  335. }
  336. return (payload[0] & 0x08) != 0
  337. }