opus_in_isobmff.html 47 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813
  1. <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd">
  2. <html>
  3. <head>
  4. <link rel="stylesheet" type="text/css" href="opus_in_isobmff.css"/>
  5. <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
  6. <title>Encapsulation of Opus in ISO Base Media File Format</title>
  7. </head>
  8. <body bgcolor="0x333333" text="#60B0C0">
  9. <b><u>Encapsulation of Opus in ISO Base Media File Format</u></b><br>
  10. <font size="2">last updated: October 1, 2014</font><br>
  11. <br>
  12. <div class="normal_link pre frame_box">
  13. Encapsulation of Opus in ISO Base Media File Format
  14. Version 0.5.3 (incomplete)
  15. Table of Contents
  16. <a href="#1">1</a> Scope
  17. <a href="#2">2</a> Normative References
  18. <a href="#3">3</a> Terms and Definitions
  19. <a href="#4">4</a> Design Rules of Encapsulation
  20. <a href="#4.1">4.1</a> File Type Indentification
  21. <a href="#4.2">4.2</a> Basic Structure
  22. <a href="#4.2.1">4.2.1</a> Initial Movie
  23. <a href="#4.2.2">4.2.2</a> Movie Fragments
  24. <a href="#4.3">4.3</a> Byte Order
  25. <a href="#4.4">4.4</a> Definition of Opus sample
  26. <a href="#4.4.1">4.4.1</a> Opus sample
  27. <a href="#4.4.2">4.4.2</a> Duration of Opus sample
  28. <a href="#4.4.3">4.4.3</a> Sub-sample
  29. <a href="#4.5">4.5</a> Random Access
  30. <a href="#4.5.1">4.5.1</a> Random Access Point
  31. <a href="#4.5.2">4.5.2</a> Pre-roll
  32. <a href="#4.6">4.6</a> Trimming of Actual Duration
  33. <a href="#4.7">4.7</a> Channel Layout
  34. <a href="#4.8">4.8</a> Additional Requirements, Restrictions, Recommendations and Definitions for Boxes
  35. <a href="#4.8.1">4.8.1</a> File Type Box
  36. <a href="#4.8.2">4.8.2</a> Segment Type Box
  37. <a href="#4.8.3">4.8.3</a> Movie Header Box
  38. <a href="#4.8.4">4.8.4</a> Track Header Box
  39. <a href="#4.8.5">4.8.5</a> Edit Box
  40. <a href="#4.8.6">4.8.6</a> Edit List Box
  41. <a href="#4.8.7">4.8.7</a> Media Header Box
  42. <a href="#4.8.8">4.8.8</a> Handler Reference Box
  43. <a href="#4.8.9">4.8.9</a> Sound Media Header Box
  44. <a href="#4.8.10">4.8.10</a> Sample Table Box
  45. <a href="#4.8.11">4.8.11</a> OpusSampleEntry
  46. <a href="#4.8.12">4.8.12</a> Opus Specific Box
  47. <a href="#4.8.13">4.8.13</a> Sample Group Description Box
  48. <a href="#4.8.14">4.8.14</a> Sample to Group Box
  49. <a href="#4.8.15">4.8.15</a> Track Extends Box
  50. <a href="#4.8.16">4.8.16</a> Track Fragment Box
  51. <a href="#4.8.17">4.8.17</a> Track Fragment Header Box
  52. <a href="#4.8.18">4.8.18</a> Track Fragment Run Box
  53. <a href="#4.9">4.9</a> Example of Encapsulation
  54. <a href="#5">5</a> Author's Address
  55. <a name="1"></a>
  56. 1 Scope
  57. This specification specifies the fundamental way of the encapsulation of Opus coded bitstreams in ISO Base Media
  58. file formats and its derivatives.
  59. <a name="2"></a>
  60. 2 Normative References
  61. [1] ISO/IEC 14496-12:2012 Corrected version
  62. Information technology — Coding of audio-visual objects — Part 12: ISO base media file format
  63. [2] ISO/IEC 14496-12:2012/Amd.1:2013
  64. Information technology — Coding of audio-visual objects — Part 12: ISO base media file format
  65. AMENDMENT 1: Various enhancements including support for large metadata
  66. [3] RFC 6716
  67. Definition of the Opus Audio Codec
  68. [4] draft-ietf-codec-oggopus-04
  69. Ogg Encapsulation for the Opus Audio Codec
  70. <a name="3"></a>
  71. 3 Terms and Definitions
  72. 3.1 active track
  73. enabled track from the non-alternate group or selected track from alternate group
  74. TODO: For alternate group, how about handling of disabled tracks?
  75. Some implementations treat disabled tracks in alternate group as a non-default track.
  76. Under the such implementations, the selected track behaves as an enabled track.
  77. Should we define the implementation in this specification?
  78. Or leave it as implementation-defined?
  79. 3.2 actual duration
  80. duration constructed from valid samples
  81. 3.3 edit
  82. entry in the Edit List Box
  83. 3.4 padded samples
  84. PCM samples after decoding Opus sample(s) which are not valid samples
  85. An Opus bitstream always contains them partially at the beginning and may contain them in part at the end, as
  86. long as not physically removed yet at the beginning and/or the end.
  87. 3.5 priming samples
  88. padded samples at the beginning of the Opus bitstream
  89. 3.6 sample-accurate
  90. for any PCM sample, a timestamp exactly matching its sampling timestamp is present in the media timeline.
  91. 3.7 valid samples
  92. PCM samples after decoding Opus sample(s) corresponding to input PCM samples
  93. <a name="4"></a>
  94. 4 Design Rules of Encapsulation
  95. 4.1 File Type Indentification<a name="4.1"></a>
  96. This specification does not define any brand to declare files are conformant to this specification.
  97. TODO: Should we define such brands, e.g. 'Opus'? If we define the brand(s), we can utilize files conformant to
  98. this specification for the storage of Opus coded bitstreams without other derived file formats.
  99. It is not preferable that encapsulation of Opus bitstreams with only the brands of the ISO Base Media File
  100. Format, though files conformant to this specification are compatible with certain versions of the ISO
  101. Base Media File Format. See ISO/IEC 14496-12 [3] E.1 Introduction.
  102. If you desire that this file format is an alternative file format to the Ogg Opus, I recommend you define.
  103. <a name="4.2"></a>
  104. 4.2 Basic Structure
  105. 4.2.1 Initial Movie<a name="4.2.1"></a>
  106. This subclause specifies a basic structure of the Movie Box as follows:
  107. +----+----+----+----+----+----+----+----+------------------------------+
  108. |moov| | | | | | | | Movie Box |
  109. +----+----+----+----+----+----+----+----+------------------------------+
  110. | |mvhd| | | | | | | Movie Header Box |
  111. +----+----+----+----+----+----+----+----+------------------------------+
  112. | |trak| | | | | | | Track Box |
  113. +----+----+----+----+----+----+----+----+------------------------------+
  114. | | |tkhd| | | | | | Track Header Box |
  115. +----+----+----+----+----+----+----+----+------------------------------+
  116. | | |edts| | | | | | Edit Box |
  117. +----+----+----+----+----+----+----+----+------------------------------+
  118. | | | |elst| | | | | Edit List Box |
  119. +----+----+----+----+----+----+----+----+------------------------------+
  120. | | |mdia| | | | | | Media Box |
  121. +----+----+----+----+----+----+----+----+------------------------------+
  122. | | | |mdhd| | | | | Media Header Box |
  123. +----+----+----+----+----+----+----+----+------------------------------+
  124. | | | |hdlr| | | | | Handler Reference Box |
  125. +----+----+----+----+----+----+----+----+------------------------------+
  126. | | | |minf| | | | | Media Information Box |
  127. +----+----+----+----+----+----+----+----+------------------------------+
  128. | | | | |smhd| | | | Sound Media Information Box |
  129. +----+----+----+----+----+----+----+----+------------------------------+
  130. | | | | |dinf| | | | Data Information Box |
  131. +----+----+----+----+----+----+----+----+------------------------------+
  132. | | | | | |dref| | | Data Reference Box |
  133. +----+----+----+----+----+----+----+----+------------------------------+
  134. | | | | | | |url | | DataEntryUrlBox |
  135. +----+----+----+----+----+----+ or +----+------------------------------+
  136. | | | | | | |urn | | DataEntryUrnBox |
  137. +----+----+----+----+----+----+----+----+------------------------------+
  138. | | | | |stbl| | | | Sample Table |
  139. +----+----+----+----+----+----+----+----+------------------------------+
  140. | | | | | |stsd| | | Sample Description Box |
  141. +----+----+----+----+----+----+----+----+------------------------------+
  142. | | | | | | |Opus| | OpusSampleEntry |
  143. +----+----+----+----+----+----+----+----+------------------------------+
  144. | | | | | | | |dOps| Opus Specific Box |
  145. +----+----+----+----+----+----+----+----+------------------------------+
  146. | | | | | |stts| | | Decoding Time to Sample Box |
  147. +----+----+----+----+----+----+----+----+------------------------------+
  148. | | | | | |stsc| | | Sample To Chunk Box |
  149. +----+----+----+----+----+----+----+----+------------------------------+
  150. | | | | | |stsz| | | Sample Size Box |
  151. +----+----+----+----+----+ or +----+----+------------------------------+
  152. | | | | | |stz2| | | Compact Sample Size Box |
  153. +----+----+----+----+----+----+----+----+------------------------------+
  154. | | | | | |stco| | | Chunk Offset Box |
  155. +----+----+----+----+----+ or +----+----+------------------------------+
  156. | | | | | |co64| | | Chunk Large Offset Box |
  157. +----+----+----+----+----+----+----+----+------------------------------+
  158. | | | | | |sgpd| | | Sample Group Description Box |
  159. +----+----+----+----+----+----+----+----+------------------------------+
  160. | | | | | |sbgp| | | Sample to Group Box |
  161. +----+----+----+----+----+----+----+----+------------------------------+
  162. | |mvex|* | | | | | | Movie Extends Box |
  163. +----+----+----+----+----+----+----+----+------------------------------+
  164. | | |trex|* | | | | | Track Extends Box |
  165. +----+----+----+----+----+----+----+----+------------------------------+
  166. Figure 1 - Basic structure of Movie Box
  167. It is strongly recommended that the order of boxes should follow the above structure.
  168. Boxes marked with an asterisk (*) may be present.
  169. For some boxes listed above, the additional requirements, restrictions, recommendations and definitions
  170. are specified in 4.8 Additional Requirements, Restrictions, Recommendations and Definitions for Boxes in
  171. this specification. For the others, the definition is as is defined in ISO/IEC 14496-12 [1].
  172. 4.2.2 Movie Fragments<a name="4.2.2"></a>
  173. This subclause specifies a basic structure of the Movie Fragment Box as follows:
  174. +----+----+----+----+----+----+----+----+------------------------------+
  175. |moof| | | | | | | | Movie Fragment Box |
  176. +----+----+----+----+----+----+----+----+------------------------------+
  177. | |mfhd| | | | | | | Movie Fragment Header Box |
  178. +----+----+----+----+----+----+----+----+------------------------------+
  179. | |traf| | | | | | | Track Fragment Box |
  180. +----+----+----+----+----+----+----+----+------------------------------+
  181. | | |tfhd| | | | | | Track Fragment Header Box |
  182. +----+----+----+----+----+----+----+----+------------------------------+
  183. | | |trun| | | | | | Track Fragment Run Box |
  184. +----+----+----+----+----+----+----+----+------------------------------+
  185. | | |sgpd|* | | | | | Sample Group Description Box |
  186. +----+----+----+----+----+----+----+----+------------------------------+
  187. | | |sbgp|* | | | | | Sample to Group Box |
  188. +----+----+----+----+----+----+----+----+------------------------------+
  189. Figure 2 - Basic structure of Movie Fragment Box
  190. It is strongly recommended that the Movie Fragment Header Box and the Track Fragment Header Box be
  191. placed first in their container.
  192. Boxes marked with an asterisk (*) may be present.
  193. For some boxes listed above, the additional requirements, restrictions, recommendations and definitions
  194. are specified in 4.8 Additional Requirements, Restrictions, Recommendations and Definitions for Boxes in
  195. this specification. For the others, the definition is as is defined in ISO/IEC 14496-12 [1].
  196. <a name="4.3"></a>
  197. 4.3 Byte Order
  198. The fields in the boxes are stored as big-endian format.
  199. All Opus samples are processed byte-by-byte. Therefore, the endianness has nothing to do with any Opus sample.
  200. <a name="4.4"></a>
  201. 4.4 Definition of Opus sample
  202. 4.4.1 Opus sample<a name="4.4.1"></a>
  203. An Opus sample is exactly one Opus packet for each of different Opus bitstreams. Due to support more than
  204. two channels, an Opus sample can contain frames from multiple Opus bitstreams but all Opus packets shall
  205. share with the total of frame sizes in a single Opus sample. The way of how to pack an Opus packet from
  206. each of Opus bitstreams into a single Opus sample follows Appendix B. in RFC 6716 [3].
  207. In this specification, 'sample' means 'Opus sample' except for 'padded samples', 'priming samples', 'valid
  208. sample' and 'sample-accurate', i.e. 'sample' is 'sample' in the term defined in ISO/IEC 14496-12 [1].
  209. +-----------------------------------------+-------------------------------------+
  210. | Opus packet 0 (self-delimiting framing) | Opus packet 1 (undelimited framing) |
  211. +-----------------------------------------+-------------------------------------+
  212. |<---------------------------- the size of Opus sample ------------------------>|
  213. Figure 3 - Example structure of an Opus sample containing two Opus bitstreams
  214. 4.4.2 Duration of Opus sample<a name="4.4.2"></a>
  215. The duration of Opus sample is given by multiplying the total of frame sizes for a single Opus bitstream
  216. expressed in seconds by the value of the timescale field in the Media Header Box.
  217. Let's say an Opus sample consists of two Opus bitstreams, where the frame size of one bitstream is 40 milli-
  218. seconds and the frame size of another is 60 milliseconds, and the timescale field in the Media Header Box
  219. is set to 48000, then the duration of that Opus sample shall be 120 milliseconds since three 40 millisecond
  220. frame and two 60 millisecond frames shall be contained because of the maximum duration of Opus packet, 120
  221. milliseconds, and 5760 in the timescale indicated in the Media Header Box.
  222. To indicate the valid samples excluding the padded samples at the end of Opus bitstream, the duration of
  223. the last Opus sample of an Opus bitstream is given by multiplying the number of the valid samples by the
  224. value produced by dividing the value of the timescale field in the Media Header Box by 48000.
  225. 4.4.3 Sub-sample<a name="4.4.3"></a>
  226. The structure of the last Opus packet in an Opus sample is different from the others in the same Opus sample,
  227. and the others are invalid Opus packets as an Opus sample because of self-delimiting framing. To avoid
  228. complexities, sub-sample is not defined for Opus sample in this specification.
  229. <a name="4.5"></a>
  230. 4.5 Random Access
  231. 4.5.1 Random Access Point<a name="4.5.1"></a>
  232. All Opus samples can be independently decoded i.e. every Opus sample is a sync sample. Therefore, the Sync
  233. Sample Box shall not be present as long as there are no samples other than Opus samples in the same track.
  234. 4.5.2 Pre-roll<a name="4.5.2"></a>
  235. Opus requires at least 80 millisecond pre-roll after each random access.
  236. Pre-roll is indicated by the roll_distance field in AudioRollRecoveryEntry. AudioPreRollEntry shall not be
  237. used since every Opus sample is a sync sample in Opus bitstream. Note that roll_distance is expressed in
  238. sample units in a term of ISO Base Media File Format, and always takes negative values.
  239. For the requirement of AudioRollRecoveryEntry, the compatible_brands field in the File Type Box and/or
  240. the Segment Type Box shall contain at least one brand which requires support for roll groups. See also
  241. 4.8.1 File Type Box and 4.8.2 Segment Type Box in this specification.
  242. <a name="4.6"></a>
  243. 4.6 Trimming of Actual Duration
  244. Due to the priming samples (or the padding at the beginning) derived from the pre-roll for the startup and the
  245. padded samples at the end, we need trim from media to get the actual duration. An edit in the Edit List Box can
  246. achieve this demand.
  247. For sample-accurate trimming, proper timescale should be set to the timescale field in the Movie Header Box
  248. and the Media Header Box inside Track Box(es) for Opus bitstream.
  249. The Edit List Box is applied to whole movie including all movie fragments. Therefore, it is impossible to tell
  250. the actual duration in the case producing movie fragments on the fly such as live-streaming. In such cases,
  251. the duration of the last Opus sample may be helpful.
  252. TODO: Should we define a new box which indicates the last Opus samples?
  253. Since this specification allows multiple sample descriptions, i.e. allows concatenation of multiple Opus
  254. bitstreams in a track, each Opus bitstream may contain some padded samples.
  255. Without such a box, we cannot know in container level whether an Opus sample is the last Opus sample in
  256. an Opus bitstream or not. Is this preferable?
  257. See also 4.8.6 Edit List Box in this specification.
  258. <a name="4.7"></a>
  259. 4.7 Channel Layout
  260. By the application of alternate_group in the Track Header Box, whole audio channels in all active tracks from
  261. non-alternate group and/or different alternate group from each other are composited into the presentation. If
  262. an Opus sample consists of multiple Opus bitstreams, it can be splitted into individual Opus bitstreams and
  263. reconstructed into new Opus samples as long as every Opus bitstream has the same total duration in each Opus
  264. sample. This nature can be utilized to encapsulate a single Opus bitstream in each track without breaking the
  265. original channel layout.
  266. As an example, let's say there is a following track:
  267. OutputChannelCount = 6;
  268. StreamCount = 4;
  269. CoupledCount = 2;
  270. ChannelMapping = {0, 1, 2, 3, 4, 5}; // front left, front center, front right, rear left, rear right, LFE
  271. You extract the four Opus bitstreams from this track and you encapsulate two of the four into a track and the
  272. others into another track. The former track is as follows.
  273. OutputChannelCount = 6;
  274. StreamCount = 2;
  275. CoupledCount = 2;
  276. ChannelMapping = {0, 255, 1, 2, 3, 255}; // front left, front center, front right, rear left, rear right, LFE
  277. And the latter track is as follows.
  278. OutputChannelCount = 6;
  279. StreamCount = 2;
  280. CoupledCount = 0;
  281. ChannelMapping = {255, 0, 255, 255, 255, 1}; // front left, front center, front right, rear left, rear right, LFE
  282. In addition, the value of the alternate_group field in the both tracks is set to 0. As the result, the player
  283. may play as if channels with 255 are not present, and play the presentation constructed from the both tracks
  284. in the same channel layout as the one of the original track. Keep in mind that the way of the composition, i.e.
  285. the mixing for playback, is not defined here, and maybe different results could occur except for the channel
  286. layout of the original, depending on an implementation or the definition of a derived file format.
  287. Note that some derived file formats may specify the restriction to ignore alternate grouping. In the context of
  288. such file formats, this application is not available. This unavailability does not mean incompatibilities among
  289. file formats unless the restriction to the value of the alternate_group field is specified and brings about
  290. any conflict among their definitions.
  291. TODO: The future amendments of ISO/IEC 14496-12 [1] will add further supports of channel layouts and it may be
  292. able to exclude certain channels from the already mapped channels to remove pure silent channels. The
  293. channel mapping defined in the Opus Specific Box should be designed as processed before the extensions,
  294. and the extensions should be placed after the Opus Specific Box.
  295. <a name="4.8"></a>
  296. 4.8 Additional Requirements, Restrictions, Recommendations and Definitions for Boxes
  297. 4.8.1 File Type Box<a name="4.8.1"></a>
  298. For any track containing Opus bitstreams, the following requirements are applied.
  299. + compatible_brands:
  300. The compatible_brands fields shall contain at least one brand which requires support for the structural
  301. boxes listed at 4.2 Basic Structure, and the additional requirements, restrictions, recommendations and
  302. definitions specified in 4.8 Additional Requirements, Restrictions, Recommendations and Definitions for
  303. Boxes in this specification.
  304. As an example, the minimal support of the encapsulation of Opus bitstreams in ISO Base Media file format
  305. requires the 'iso2' brand since support of roll groups is required.
  306. 4.8.2 Segment Type Box<a name="4.8.2"></a>
  307. The same requirements are applied as specified at 4.8.1 File Type Box in this specification.
  308. 4.8.3 Movie Header Box<a name="4.8.3"></a>
  309. If any track containing Opus bitstreams, the following recommendations are applied.
  310. + timescale:
  311. The timescale field should be set to the same value of the timescale field in the Media Header Box
  312. inside Track Box(es) for Opus bitstream if no tracks for bitstreams other than Opus bitstream is present.
  313. 4.8.4 Track Header Box<a name="4.8.4"></a>
  314. For any track containing Opus bitstreams, the following requirements are applied.
  315. + layer:
  316. The layer field shall be set to 0.
  317. + matrix:
  318. The matrix field shall be set to { 0x00010000,0,0,0,0x00010000,0,0,0,0x40000000 }.
  319. + width:
  320. The width field shall be set to 0.
  321. + height:
  322. The height field shall be set to 0.
  323. 4.8.5 Edit Box<a name="4.8.5"></a>
  324. For any track containing Opus bitstreams, exactly one Edit Box shall be present.
  325. 4.8.6 Edit List Box<a name="4.8.6"></a>
  326. For any track containing Opus bitstreams, exactly one Edit List Box shall be present. In addition, for
  327. non-empty edits, the following recommendations are applied.
  328. + segment_duration:
  329. The segment_duration field can be used to indicate the actual duration of Opus bitstream.
  330. When the value of the timescale field in the Movie Header Box is equal to 48000, the segment_duration
  331. field shall be set to the number of the valid samples to indicate the actual duration.
  332. When enabling movie fragments, the segment_duration field may be set to 0. The value 0 represents
  333. implicit duration equal to the sum of the duration of all samples. This would be helpful for excluding
  334. the padded samples from the presentation timeline when producing movie fragments on the fly.
  335. + media_time:
  336. The media_time field can be used to remove the priming samples of Opus bitstreams.
  337. When the value of the timescale field in the Media Header Box is equal to 48000, the media_time field
  338. shall be set to the number of the priming samples to remove the priming samples.
  339. + media_rate:
  340. If the segment_duration field is used to indicate the actual duration, the media_rate field shall be
  341. set to 1.
  342. 4.8.7 Media Header Box<a name="4.8.7"></a>
  343. For any track containing Opus bitstreams, the following recommendation is applied.
  344. + timescale:
  345. The timescale field should be set to 48000 to access sample-accurately.
  346. 4.8.8 Handler Reference Box<a name="4.8.8"></a>
  347. For any track containing Opus bitstreams, the following requirement is applied.
  348. + handler_type:
  349. The handler_type field shall be set to 'soun'.
  350. 4.8.9 Sound Media Header Box<a name="4.8.9"></a>
  351. For any track containing Opus bitstreams, the Sound Media Header Box shall be present.
  352. 4.8.10 Sample Table Box<a name="4.8.10"></a>
  353. For any track containing Opus bitstreams, at least one Sample Group Description Boxes and at least one
  354. Sample to Group Boxes shall be present, and the Sync Sample Box shall not be present as long as there are
  355. no samples other than Opus samples in the same track.
  356. 4.8.11 OpusSampleEntry<a name="4.8.11"></a>
  357. For any track containing Opus bitstreams, at least one OpusSampleEntry shall be present.
  358. The syntax and semantics of the OpusSampleEntry is shown as follows.
  359. class OpusSampleEntry() extends AudioSampleEntry ('Opus'){
  360. OpusSpecificBox();
  361. }
  362. + channelcount:
  363. The channelcount field shall be set to the sum of the total number of Opus bitstreams and the number
  364. of Opus bitstreams producing two channels. This value is indentical with (M+N), where M is the value of
  365. the *Coupled Stream Count* field and N is the value of the *Stream Count* field in the *Channel Mapping
  366. Table* in the identification header defined in Ogg Opus [4].
  367. + samplesize:
  368. The samplesize field shall be set to 16.
  369. + samplerate:
  370. The samplerate field shall be set to 48000&lt&lt16.
  371. 4.8.12 Opus Specific Box<a name="4.8.12"></a>
  372. Exactly one Opus Specific Box shall be present in each OpusSampleEntry.
  373. The Opus Specific Box contains the version field and this specification defines version 0 of this box.
  374. If incompatible changes occured in the fields after the version field within the OpusSpecificBox in the
  375. future versions of this specification, another version will be defined.
  376. The syntax and semantics of the Opus Specific Box is shown as follows.
  377. class ChannelMappingTable (unsigned int(8) OutputChannelCount){
  378. unsigned int(8) StreamCount;
  379. unsigned int(8) CoupledCount;
  380. unsigned int(8 * OutputChannelCount) ChannelMapping;
  381. }
  382. aligned(8) class OpusSpecificBox extends FullBox('dOps', version, dflags){
  383. unsigned int(8) OutputChannelCount;
  384. if (dflags & 0x000001) {
  385. unsigned int(16) PreSkip;
  386. }
  387. if (dflags & 0x000002) {
  388. unsigned int(32) InputSampleRate;
  389. }
  390. if (dflags & 0x000004) {
  391. signed int(16) OutputGain;
  392. }
  393. unsigned int(8) ChannelMappingFamily;
  394. if (ChannelMappingFamily != 0) {
  395. ChannelMappingTable(OutputChannelCount);
  396. }
  397. }
  398. + version:
  399. The version field shall be set to 0.
  400. In the future versions of this specification, this field may be set to other values. And without support
  401. of those values, the reader shall not read the fields after this within the OpusSpecificBox.
  402. + flags:
  403. The following flags are defined in the dflags:
  404. 0x000001 pre-skip-present:
  405. This flag indicates the presence of the PreSkip field.
  406. 0x000002 input-sample-rate-present:
  407. This flag indicates the presence of the InputSampleRate field.
  408. 0x000004 output-gain-present:
  409. This flag indicates the presence of the OutputGain field.
  410. + OutputChannelCount:
  411. The OutputChannelCount field shall be set to the same value as the *Output Channel Count* field in the
  412. identification header defined in Ogg Opus [4].
  413. + PreSkip:
  414. The PreSkip field shall be set to the same value as the *Pre-skip* field in the identification header
  415. defined in Ogg Opus [4]. Note that the value is stored as big-endian format.
  416. The PreSkip field can be absent after removing Opus samples containing the number of PCM samples more
  417. than of the priming samples.
  418. The PreSkip field is not used for removing the priming samples at the whole playback at all since it is
  419. informative only, and that task falls on the Edit List Box.
  420. + InputSampleRate:
  421. The InputSampleRate field shall be set to the same value as the *Input Sample Rate* field in the
  422. identification header defined in Ogg Opus [4]. Note that the value is stored as big-endian format.
  423. If the InputSampleRate field is absent, process as if it is set to 0, which indicates "unspecified".
  424. + OutputGain:
  425. The OutputGain field shall be set to the same value as the *Output Gain* field in the identification
  426. header define in Ogg Opus [4]. Note that the value is stored as 8.8 fixed-point and big-endian format.
  427. If the OutputGain field is absent, process as if it is set to 0.
  428. + ChannelMappingFamily:
  429. The ChannelMappingFamily field shall be set to the same value as the *Channel Mapping Family* field in
  430. the identification header defined in Ogg Opus [4].
  431. + StreamCount:
  432. The StreamCount field shall be set to the same value as the *Stream Count* field in the identification
  433. header defined in Ogg Opus [4].
  434. + CoupledCount:
  435. The CoupledCount field shall be set to the same value as the *Coupled Count* field in the identification
  436. header defined in Ogg Opus [4].
  437. + ChannelMapping:
  438. The ChannelMapping field shall be set to the same octet string as *Channel Mapping* field in the identi-
  439. fication header defined in Ogg Opus [4].
  440. 4.8.13 Sample Group Description Box<a name="4.8.13"></a>
  441. For any track containing Opus bitstreams, at least one Sample Group Description Box shall be present and have
  442. the grouping_type field set to 'roll'. In addition, the following requirements and restriction are applied.
  443. + version:
  444. The version field shall be set to 1 if the grouping_type field set to 'roll'.
  445. + default_length
  446. The default_length field shall be set to 2 if the grouping_type field set to 'roll'.
  447. + roll_distance:
  448. The roll_distance field in any AudioRollRecoveryEntry shall not be set to zero and positive values for
  449. any Opus sample.
  450. See also 4.5.2 Pre-roll.
  451. 4.8.14 Sample to Group Box<a name="4.8.14"></a>
  452. For any track containing Opus bitstreams, at least one Sample to Group Box shall be present and have the
  453. grouping_type field set to 'roll'. In addition, the following requirement is applied.
  454. + group_description_index:
  455. The group_description_index fields shall not be set to 0 if the grouping_type field set to 'roll'.
  456. 4.8.15 Track Extends Box<a name="4.8.15"></a>
  457. For any track containing Opus bitstreams, the following requirement is applied.
  458. + default_sample_flags:
  459. The sample_is_non_sync_sample field shall be set to 0.
  460. 4.8.16 Track Fragment Box<a name="4.8.16"></a>
  461. For any track containing Opus bitstreams, if any sample is contained in track fragment, the Sample to
  462. Group Box with the grouping_type field set to 'roll' shall be present for that track fragment.
  463. 4.8.17 Track Fragment Header Box<a name="4.8.17"></a>
  464. For any track containing Opus bitstreams, the following requirement is applied.
  465. + default_sample_flags:
  466. The sample_is_non_sync_sample field shall be set to 0.
  467. 4.8.18 Track Fragment Run Box<a name="4.8.18"></a>
  468. For any track containing Opus bitstreams, the following requirements are applied.
  469. + first_sample_flags:
  470. The sample_is_non_sync_sample field shall be set to 0.
  471. + sample_flags:
  472. The sample_is_non_sync_sample field shall be set to 0.
  473. <a name="4.9"></a>
  474. 4.9 Example of Encapsulation
  475. [File]
  476. size = 10349
  477. [ftyp: File Type Box]
  478. position = 0
  479. size = 24
  480. major_brand = mp42 : MP4 version 2
  481. minor_version = 0
  482. compatible_brands
  483. brand[0] = mp42 : MP4 version 2
  484. brand[1] = iso2 : ISO Base Media file format version 2
  485. [free: Free Space Box]
  486. position = 24
  487. size = 8
  488. [mdat: Media Data Box]
  489. position = 32
  490. size = 9551
  491. [moov: Movie Box]
  492. position = 9583
  493. size = 766
  494. [mvhd: Movie Header Box]
  495. position = 9591
  496. size = 108
  497. version = 0
  498. flags = 0x000000
  499. creation_time = UTC 2014/09/23, 15:23:21
  500. modification_time = UTC 2014/09/23, 15:23:21
  501. timescale = 48000
  502. duration = 33600 (00:00:00.700)
  503. rate = 1.000000
  504. volume = 1.000000
  505. reserved = 0x0000
  506. reserved = 0x00000000
  507. reserved = 0x00000000
  508. transformation matrix
  509. | a, b, u | | 1.000000, 0.000000, 0.000000 |
  510. | c, d, v | = | 0.000000, 1.000000, 0.000000 |
  511. | x, y, w | | 0.000000, 0.000000, 1.000000 |
  512. pre_defined = 0x00000000
  513. pre_defined = 0x00000000
  514. pre_defined = 0x00000000
  515. pre_defined = 0x00000000
  516. pre_defined = 0x00000000
  517. pre_defined = 0x00000000
  518. next_track_ID = 2
  519. [iods: Object Descriptor Box]
  520. position = 9699
  521. size = 33
  522. version = 0
  523. flags = 0x000000
  524. [tag = 0x10: MP4_IOD]
  525. expandableClassSize = 16
  526. ObjectDescriptorID = 1
  527. URL_Flag = 0
  528. includeInlineProfileLevelFlag = 0
  529. reserved = 0xf
  530. ODProfileLevelIndication = 0xff
  531. sceneProfileLevelIndication = 0xff
  532. audioProfileLevelIndication = 0xfe
  533. visualProfileLevelIndication = 0xff
  534. graphicsProfileLevelIndication = 0xff
  535. [tag = 0x0e: ES_ID_Inc]
  536. expandableClassSize = 4
  537. Track_ID = 1
  538. [trak: Track Box]
  539. position = 9732
  540. size = 617
  541. [tkhd: Track Header Box]
  542. position = 9740
  543. size = 92
  544. version = 0
  545. flags = 0x000007
  546. Track enabled
  547. Track in movie
  548. Track in preview
  549. creation_time = UTC 2014/09/23, 15:23:21
  550. modification_time = UTC 2014/09/23, 15:23:21
  551. track_ID = 1
  552. reserved = 0x00000000
  553. duration = 33600 (00:00:00.700)
  554. reserved = 0x00000000
  555. reserved = 0x00000000
  556. layer = 0
  557. alternate_group = 0
  558. volume = 1.000000
  559. reserved = 0x0000
  560. transformation matrix
  561. | a, b, u | | 1.000000, 0.000000, 0.000000 |
  562. | c, d, v | = | 0.000000, 1.000000, 0.000000 |
  563. | x, y, w | | 0.000000, 0.000000, 1.000000 |
  564. width = 0.000000
  565. height = 0.000000
  566. [edts: Edit Box]
  567. position = 9832
  568. size = 36
  569. [elst: Edit List Box]
  570. position = 9840
  571. size = 28
  572. version = 0
  573. flags = 0x000000
  574. entry_count = 1
  575. entry[0]
  576. segment_duration = 33600
  577. media_time = 3840
  578. media_rate = 1.000000
  579. [mdia: Media Box]
  580. position = 9868
  581. size = 481
  582. [mdhd: Media Header Box]
  583. position = 9876
  584. size = 32
  585. version = 0
  586. flags = 0x000000
  587. creation_time = UTC 2014/09/23, 15:23:21
  588. modification_time = UTC 2014/09/23, 15:23:21
  589. timescale = 48000
  590. duration = 38400 (00:00:00.800)
  591. language = und
  592. pre_defined = 0x0000
  593. [hdlr: Handler Reference Box]
  594. position = 9908
  595. size = 51
  596. version = 0
  597. flags = 0x000000
  598. pre_defined = 0x00000000
  599. handler_type = soun
  600. reserved = 0x00000000
  601. reserved = 0x00000000
  602. reserved = 0x00000000
  603. name = Xiph Audio Handler
  604. [minf: Media Information Box]
  605. position = 9959
  606. size = 390
  607. [smhd: Sound Media Header Box]
  608. position = 9967
  609. size = 16
  610. version = 0
  611. flags = 0x000000
  612. balance = 0.000000
  613. reserved = 0x0000
  614. [dinf: Data Information Box]
  615. position = 9983
  616. size = 36
  617. [dref: Data Reference Box]
  618. position = 9991
  619. size = 28
  620. version = 0
  621. flags = 0x000000
  622. entry_count = 1
  623. [url : Data Entry Url Box]
  624. position = 10007
  625. size = 12
  626. version = 0
  627. flags = 0x000001
  628. location = in the same file
  629. [stbl: Sample Table Box]
  630. position = 10019
  631. size = 330
  632. [stsd: Sample Description Box]
  633. position = 10027
  634. size = 72
  635. version = 0
  636. flags = 0x000000
  637. entry_count = 1
  638. [Opus: Audio Description]
  639. position = 10043
  640. size = 56
  641. reserved = 0x000000000000
  642. data_reference_index = 1
  643. reserved = 0x0000
  644. reserved = 0x0000
  645. reserved = 0x00000000
  646. channelcount = 2
  647. samplesize = 16
  648. pre_defined = 0
  649. reserved = 0
  650. samplerate = 48000.000000
  651. [dOps: Opus Specific Box]
  652. position = 10071
  653. size = 20
  654. version = 0
  655. flags = 0x000006
  656. OutputChannelCount = 2
  657. InputSampleRate = 44100
  658. OutputGain = 0.000000
  659. ChannelMappingFamily = 0
  660. [stts: Decoding Time to Sample Box]
  661. position = 10099
  662. size = 24
  663. version = 0
  664. flags = 0x000000
  665. entry_count = 1
  666. entry[0]
  667. sample_count = 10
  668. sample_delta = 3840
  669. [stsc: Sample To Chunk Box]
  670. position = 10123
  671. size = 40
  672. version = 0
  673. flags = 0x000000
  674. entry_count = 2
  675. entry[0]
  676. first_chunk = 1
  677. samples_per_chunk = 4
  678. sample_description_index = 1
  679. entry[1]
  680. first_chunk = 3
  681. samples_per_chunk = 2
  682. sample_description_index = 1
  683. [stsz: Sample Size Box]
  684. position = 10163
  685. size = 60
  686. version = 0
  687. flags = 0x000000
  688. sample_size = 0 (variable)
  689. sample_count = 10
  690. entry_size[0] = 780
  691. entry_size[1] = 920
  692. entry_size[2] = 963
  693. entry_size[3] = 988
  694. entry_size[4] = 1024
  695. entry_size[5] = 951
  696. entry_size[6] = 933
  697. entry_size[7] = 1017
  698. entry_size[8] = 992
  699. entry_size[9] = 975
  700. [stco: Chunk Offset Box]
  701. position = 10223
  702. size = 28
  703. version = 0
  704. flags = 0x000000
  705. entry_count = 3
  706. chunk_offset[0] = 40
  707. chunk_offset[1] = 3691
  708. chunk_offset[2] = 7616
  709. [sgpd: Sample Group Description Box]
  710. position = 10251
  711. size = 30
  712. version = 1
  713. flags = 0x000000
  714. grouping_type = roll
  715. default_length = 2 (constant)
  716. entry_count = 3
  717. roll_distance[0] = -1
  718. roll_distance[1] = -2
  719. roll_distance[2] = -3
  720. [sbgp: Sample to Group Box]
  721. position = 10281
  722. size = 68
  723. version = 0
  724. flags = 0x000000
  725. grouping_type = roll
  726. entry_count = 6
  727. entry[0]
  728. sample_count = 2
  729. group_description_index = 1
  730. entry[1]
  731. sample_count = 1
  732. group_description_index = 2
  733. entry[2]
  734. sample_count = 1
  735. group_description_index = 3
  736. entry[3]
  737. sample_count = 1
  738. group_description_index = 2
  739. entry[4]
  740. sample_count = 3
  741. group_description_index = 3
  742. entry[5]
  743. sample_count = 2
  744. group_description_index = 1
  745. <a name="5"></a>
  746. 5 Authors' Address
  747. Yusuke Nakamura
  748. Email: muken.the.vfrmaniac |at| gmail.com
  749. </div>
  750. </body>
  751. </html>